xref: /illumos-gate/usr/src/cmd/bhyve/block_if.c (revision 32640292339b07090f10ce34d455f98711077343)
1bf21cd93STycho Nightingale /*-
2*32640292SAndy Fiddaman  * SPDX-License-Identifier: BSD-2-Clause
34c87aefeSPatrick Mooney  *
4bf21cd93STycho Nightingale  * Copyright (c) 2013  Peter Grehan <grehan@freebsd.org>
5bf21cd93STycho Nightingale  * All rights reserved.
6154972afSPatrick Mooney  * Copyright 2020 Joyent, Inc.
7bf21cd93STycho Nightingale  *
8bf21cd93STycho Nightingale  * Redistribution and use in source and binary forms, with or without
9bf21cd93STycho Nightingale  * modification, are permitted provided that the following conditions
10bf21cd93STycho Nightingale  * are met:
11bf21cd93STycho Nightingale  * 1. Redistributions of source code must retain the above copyright
12bf21cd93STycho Nightingale  *    notice, this list of conditions and the following disclaimer.
13bf21cd93STycho Nightingale  * 2. Redistributions in binary form must reproduce the above copyright
14bf21cd93STycho Nightingale  *    notice, this list of conditions and the following disclaimer in the
15bf21cd93STycho Nightingale  *    documentation and/or other materials provided with the distribution.
16bf21cd93STycho Nightingale  *
17bf21cd93STycho Nightingale  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
18bf21cd93STycho Nightingale  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19bf21cd93STycho Nightingale  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20bf21cd93STycho Nightingale  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21bf21cd93STycho Nightingale  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22bf21cd93STycho Nightingale  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23bf21cd93STycho Nightingale  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24bf21cd93STycho Nightingale  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25bf21cd93STycho Nightingale  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26bf21cd93STycho Nightingale  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27bf21cd93STycho Nightingale  * SUCH DAMAGE.
284c87aefeSPatrick Mooney  */
294c87aefeSPatrick Mooney 
304c87aefeSPatrick Mooney /*
31282a8ecbSJason King  * Copyright 2020 Joyent, Inc.
32bf21cd93STycho Nightingale  */
33bf21cd93STycho Nightingale 
34bf21cd93STycho Nightingale #include <sys/cdefs.h>
35bf21cd93STycho Nightingale 
36bf21cd93STycho Nightingale #include <sys/param.h>
374c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM
384c87aefeSPatrick Mooney #include <sys/capsicum.h>
394c87aefeSPatrick Mooney #endif
40bf21cd93STycho Nightingale #include <sys/queue.h>
41bf21cd93STycho Nightingale #include <sys/errno.h>
42bf21cd93STycho Nightingale #include <sys/stat.h>
43bf21cd93STycho Nightingale #include <sys/ioctl.h>
44bf21cd93STycho Nightingale #include <sys/disk.h>
454f3f3e9aSAndy Fiddaman #ifndef __FreeBSD__
464c87aefeSPatrick Mooney #include <sys/limits.h>
474c87aefeSPatrick Mooney #include <sys/uio.h>
484c87aefeSPatrick Mooney #include <sys/dkio.h>
494c87aefeSPatrick Mooney #endif
50bf21cd93STycho Nightingale 
51bf21cd93STycho Nightingale #include <assert.h>
524c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM
534c87aefeSPatrick Mooney #include <capsicum_helpers.h>
544c87aefeSPatrick Mooney #endif
554c87aefeSPatrick Mooney #include <err.h>
56bf21cd93STycho Nightingale #include <fcntl.h>
57bf21cd93STycho Nightingale #include <stdio.h>
58bf21cd93STycho Nightingale #include <stdlib.h>
59bf21cd93STycho Nightingale #include <string.h>
60bf21cd93STycho Nightingale #include <pthread.h>
61bf21cd93STycho Nightingale #include <pthread_np.h>
62bf21cd93STycho Nightingale #include <signal.h>
634c87aefeSPatrick Mooney #include <sysexits.h>
64bf21cd93STycho Nightingale #include <unistd.h>
65bf21cd93STycho Nightingale 
66bf21cd93STycho Nightingale #include <machine/atomic.h>
67bf21cd93STycho Nightingale 
68bf21cd93STycho Nightingale #include "bhyverun.h"
692b948146SAndy Fiddaman #include "config.h"
70154972afSPatrick Mooney #include "debug.h"
71bf21cd93STycho Nightingale #include "mevent.h"
722b948146SAndy Fiddaman #include "pci_emul.h"
73bf21cd93STycho Nightingale #include "block_if.h"
74bf21cd93STycho Nightingale 
75bf21cd93STycho Nightingale #define BLOCKIF_SIG	0xb109b109
76bf21cd93STycho Nightingale 
774c87aefeSPatrick Mooney #ifdef __FreeBSD__
784c87aefeSPatrick Mooney #define BLOCKIF_NUMTHR	8
794c87aefeSPatrick Mooney #else
804c87aefeSPatrick Mooney /* Enlarge to keep pace with the virtio-block ring size */
814c87aefeSPatrick Mooney #define BLOCKIF_NUMTHR	16
824c87aefeSPatrick Mooney #endif
834c87aefeSPatrick Mooney #define BLOCKIF_MAXREQ	(BLOCKIF_RING_MAX + BLOCKIF_NUMTHR)
84bf21cd93STycho Nightingale 
85bf21cd93STycho Nightingale enum blockop {
86bf21cd93STycho Nightingale 	BOP_READ,
87bf21cd93STycho Nightingale 	BOP_WRITE,
884c87aefeSPatrick Mooney #ifndef __FreeBSD__
894c87aefeSPatrick Mooney 	BOP_WRITE_SYNC,
904c87aefeSPatrick Mooney #endif
914c87aefeSPatrick Mooney 	BOP_FLUSH,
924c87aefeSPatrick Mooney 	BOP_DELETE
93bf21cd93STycho Nightingale };
94bf21cd93STycho Nightingale 
95bf21cd93STycho Nightingale enum blockstat {
96bf21cd93STycho Nightingale 	BST_FREE,
974c87aefeSPatrick Mooney 	BST_BLOCK,
98bf21cd93STycho Nightingale 	BST_PEND,
99bf21cd93STycho Nightingale 	BST_BUSY,
100bf21cd93STycho Nightingale 	BST_DONE
101bf21cd93STycho Nightingale };
102bf21cd93STycho Nightingale 
103bf21cd93STycho Nightingale struct blockif_elem {
104bf21cd93STycho Nightingale 	TAILQ_ENTRY(blockif_elem) be_link;
105bf21cd93STycho Nightingale 	struct blockif_req  *be_req;
106bf21cd93STycho Nightingale 	enum blockop	     be_op;
107bf21cd93STycho Nightingale 	enum blockstat	     be_status;
108bf21cd93STycho Nightingale 	pthread_t            be_tid;
1094c87aefeSPatrick Mooney 	off_t		     be_block;
110bf21cd93STycho Nightingale };
111bf21cd93STycho Nightingale 
1124c87aefeSPatrick Mooney #ifndef __FreeBSD__
1134c87aefeSPatrick Mooney enum blockif_wce {
1144c87aefeSPatrick Mooney 	WCE_NONE = 0,
1154c87aefeSPatrick Mooney 	WCE_IOCTL,
1164c87aefeSPatrick Mooney 	WCE_FCNTL
1174c87aefeSPatrick Mooney };
1184c87aefeSPatrick Mooney #endif
1194c87aefeSPatrick Mooney 
120bf21cd93STycho Nightingale struct blockif_ctxt {
12159d65d31SAndy Fiddaman 	unsigned int		bc_magic;
122bf21cd93STycho Nightingale 	int			bc_fd;
1234c87aefeSPatrick Mooney 	int			bc_ischr;
1244c87aefeSPatrick Mooney 	int			bc_isgeom;
1254c87aefeSPatrick Mooney 	int			bc_candelete;
1264c87aefeSPatrick Mooney #ifndef __FreeBSD__
1274c87aefeSPatrick Mooney 	enum blockif_wce	bc_wce;
1284c87aefeSPatrick Mooney #endif
129bf21cd93STycho Nightingale 	int			bc_rdonly;
130bf21cd93STycho Nightingale 	off_t			bc_size;
131bf21cd93STycho Nightingale 	int			bc_sectsz;
1324c87aefeSPatrick Mooney 	int			bc_psectsz;
1334c87aefeSPatrick Mooney 	int			bc_psectoff;
1344c87aefeSPatrick Mooney 	int			bc_closing;
1354c87aefeSPatrick Mooney 	pthread_t		bc_btid[BLOCKIF_NUMTHR];
136bf21cd93STycho Nightingale 	pthread_mutex_t		bc_mtx;
137bf21cd93STycho Nightingale 	pthread_cond_t		bc_cond;
138b0de25cbSAndy Fiddaman 	blockif_resize_cb	*bc_resize_cb;
139b0de25cbSAndy Fiddaman 	void			*bc_resize_cb_arg;
140b0de25cbSAndy Fiddaman 	struct mevent		*bc_resize_event;
141bf21cd93STycho Nightingale 
142bf21cd93STycho Nightingale 	/* Request elements and free/pending/busy queues */
143bf21cd93STycho Nightingale 	TAILQ_HEAD(, blockif_elem) bc_freeq;
144bf21cd93STycho Nightingale 	TAILQ_HEAD(, blockif_elem) bc_pendq;
145bf21cd93STycho Nightingale 	TAILQ_HEAD(, blockif_elem) bc_busyq;
146bf21cd93STycho Nightingale 	struct blockif_elem	bc_reqs[BLOCKIF_MAXREQ];
147*32640292SAndy Fiddaman 	int			bc_bootindex;
148bf21cd93STycho Nightingale };
149bf21cd93STycho Nightingale 
150bf21cd93STycho Nightingale static pthread_once_t blockif_once = PTHREAD_ONCE_INIT;
151bf21cd93STycho Nightingale 
152bf21cd93STycho Nightingale struct blockif_sig_elem {
153bf21cd93STycho Nightingale 	pthread_mutex_t			bse_mtx;
154bf21cd93STycho Nightingale 	pthread_cond_t			bse_cond;
155bf21cd93STycho Nightingale 	int				bse_pending;
156bf21cd93STycho Nightingale 	struct blockif_sig_elem		*bse_next;
157bf21cd93STycho Nightingale };
158bf21cd93STycho Nightingale 
159bf21cd93STycho Nightingale static struct blockif_sig_elem *blockif_bse_head;
160bf21cd93STycho Nightingale 
161bf21cd93STycho Nightingale static int
blockif_enqueue(struct blockif_ctxt * bc,struct blockif_req * breq,enum blockop op)162bf21cd93STycho Nightingale blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq,
163bf21cd93STycho Nightingale 		enum blockop op)
164bf21cd93STycho Nightingale {
1654c87aefeSPatrick Mooney 	struct blockif_elem *be, *tbe;
1664c87aefeSPatrick Mooney 	off_t off;
1674c87aefeSPatrick Mooney 	int i;
168bf21cd93STycho Nightingale 
169bf21cd93STycho Nightingale 	be = TAILQ_FIRST(&bc->bc_freeq);
170bf21cd93STycho Nightingale 	assert(be != NULL);
171bf21cd93STycho Nightingale 	assert(be->be_status == BST_FREE);
172bf21cd93STycho Nightingale 	TAILQ_REMOVE(&bc->bc_freeq, be, be_link);
173bf21cd93STycho Nightingale 	be->be_req = breq;
174bf21cd93STycho Nightingale 	be->be_op = op;
1754c87aefeSPatrick Mooney 	switch (op) {
1764c87aefeSPatrick Mooney 	case BOP_READ:
1774c87aefeSPatrick Mooney 	case BOP_WRITE:
1784c87aefeSPatrick Mooney #ifndef __FreeBSD__
1794c87aefeSPatrick Mooney 	case BOP_WRITE_SYNC:
1804c87aefeSPatrick Mooney #endif
1814c87aefeSPatrick Mooney 	case BOP_DELETE:
1824c87aefeSPatrick Mooney 		off = breq->br_offset;
1834c87aefeSPatrick Mooney 		for (i = 0; i < breq->br_iovcnt; i++)
1844c87aefeSPatrick Mooney 			off += breq->br_iov[i].iov_len;
1854c87aefeSPatrick Mooney 		break;
1864c87aefeSPatrick Mooney 	default:
1874c87aefeSPatrick Mooney 		off = OFF_MAX;
1884c87aefeSPatrick Mooney 	}
1894c87aefeSPatrick Mooney 	be->be_block = off;
1904c87aefeSPatrick Mooney 	TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
1914c87aefeSPatrick Mooney 		if (tbe->be_block == breq->br_offset)
1924c87aefeSPatrick Mooney 			break;
1934c87aefeSPatrick Mooney 	}
1944c87aefeSPatrick Mooney 	if (tbe == NULL) {
1954c87aefeSPatrick Mooney 		TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) {
1964c87aefeSPatrick Mooney 			if (tbe->be_block == breq->br_offset)
1974c87aefeSPatrick Mooney 				break;
1984c87aefeSPatrick Mooney 		}
1994c87aefeSPatrick Mooney 	}
2004c87aefeSPatrick Mooney 	if (tbe == NULL)
2014c87aefeSPatrick Mooney 		be->be_status = BST_PEND;
2024c87aefeSPatrick Mooney 	else
2034c87aefeSPatrick Mooney 		be->be_status = BST_BLOCK;
204bf21cd93STycho Nightingale 	TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link);
2054c87aefeSPatrick Mooney 	return (be->be_status == BST_PEND);
206bf21cd93STycho Nightingale }
207bf21cd93STycho Nightingale 
208bf21cd93STycho Nightingale static int
blockif_dequeue(struct blockif_ctxt * bc,pthread_t t,struct blockif_elem ** bep)2094c87aefeSPatrick Mooney blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep)
210bf21cd93STycho Nightingale {
211bf21cd93STycho Nightingale 	struct blockif_elem *be;
212bf21cd93STycho Nightingale 
2134c87aefeSPatrick Mooney 	TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
2144c87aefeSPatrick Mooney 		if (be->be_status == BST_PEND)
2154c87aefeSPatrick Mooney 			break;
2164c87aefeSPatrick Mooney 		assert(be->be_status == BST_BLOCK);
2174c87aefeSPatrick Mooney 	}
2184c87aefeSPatrick Mooney 	if (be == NULL)
2194c87aefeSPatrick Mooney 		return (0);
220bf21cd93STycho Nightingale 	TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
221bf21cd93STycho Nightingale 	be->be_status = BST_BUSY;
2224c87aefeSPatrick Mooney 	be->be_tid = t;
223bf21cd93STycho Nightingale 	TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link);
224bf21cd93STycho Nightingale 	*bep = be;
2254c87aefeSPatrick Mooney 	return (1);
226bf21cd93STycho Nightingale }
227bf21cd93STycho Nightingale 
228bf21cd93STycho Nightingale static void
blockif_complete(struct blockif_ctxt * bc,struct blockif_elem * be)229bf21cd93STycho Nightingale blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
230bf21cd93STycho Nightingale {
2314c87aefeSPatrick Mooney 	struct blockif_elem *tbe;
232bf21cd93STycho Nightingale 
2334c87aefeSPatrick Mooney 	if (be->be_status == BST_DONE || be->be_status == BST_BUSY)
234bf21cd93STycho Nightingale 		TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
2354c87aefeSPatrick Mooney 	else
2364c87aefeSPatrick Mooney 		TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
2374c87aefeSPatrick Mooney 	TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
2384c87aefeSPatrick Mooney 		if (tbe->be_req->br_offset == be->be_block)
2394c87aefeSPatrick Mooney 			tbe->be_status = BST_PEND;
2404c87aefeSPatrick Mooney 	}
241bf21cd93STycho Nightingale 	be->be_tid = 0;
242bf21cd93STycho Nightingale 	be->be_status = BST_FREE;
243bf21cd93STycho Nightingale 	be->be_req = NULL;
244bf21cd93STycho Nightingale 	TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
245bf21cd93STycho Nightingale }
246bf21cd93STycho Nightingale 
2474f3f3e9aSAndy Fiddaman static int
blockif_flush_bc(struct blockif_ctxt * bc)2484f3f3e9aSAndy Fiddaman blockif_flush_bc(struct blockif_ctxt *bc)
2494f3f3e9aSAndy Fiddaman {
2504f3f3e9aSAndy Fiddaman #ifdef	__FreeBSD__
2514f3f3e9aSAndy Fiddaman 	if (bc->bc_ischr) {
2524f3f3e9aSAndy Fiddaman 		if (ioctl(bc->bc_fd, DIOCGFLUSH))
2534f3f3e9aSAndy Fiddaman 			return (errno);
2544f3f3e9aSAndy Fiddaman 	} else if (fsync(bc->bc_fd))
2554f3f3e9aSAndy Fiddaman 		return (errno);
2564f3f3e9aSAndy Fiddaman #else
2574f3f3e9aSAndy Fiddaman 	/*
2584f3f3e9aSAndy Fiddaman 	 * This fsync() should be adequate to flush the cache of a file
2594f3f3e9aSAndy Fiddaman 	 * or device.  In VFS, the VOP_SYNC operation is converted to
2604f3f3e9aSAndy Fiddaman 	 * the appropriate ioctl in both sdev (for real devices) and
2614f3f3e9aSAndy Fiddaman 	 * zfs (for zvols).
2624f3f3e9aSAndy Fiddaman 	 */
2634f3f3e9aSAndy Fiddaman 	if (fsync(bc->bc_fd))
2644f3f3e9aSAndy Fiddaman 		return (errno);
2654f3f3e9aSAndy Fiddaman #endif
2664f3f3e9aSAndy Fiddaman 
2674f3f3e9aSAndy Fiddaman 	return (0);
2684f3f3e9aSAndy Fiddaman }
2694f3f3e9aSAndy Fiddaman 
270bf21cd93STycho Nightingale static void
blockif_proc(struct blockif_ctxt * bc,struct blockif_elem * be,uint8_t * buf)2714c87aefeSPatrick Mooney blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
272bf21cd93STycho Nightingale {
27359d65d31SAndy Fiddaman #ifdef	__FreeBSD__
27459d65d31SAndy Fiddaman 	struct spacectl_range range;
27559d65d31SAndy Fiddaman #endif
276bf21cd93STycho Nightingale 	struct blockif_req *br;
2774c87aefeSPatrick Mooney #ifdef	__FreeBSD__
2784c87aefeSPatrick Mooney 	off_t arg[2];
2794c87aefeSPatrick Mooney #endif
28059d65d31SAndy Fiddaman 	ssize_t n;
28159d65d31SAndy Fiddaman 	size_t clen, len, off, boff, voff;
2824c87aefeSPatrick Mooney 	int i, err;
283bf21cd93STycho Nightingale 
284bf21cd93STycho Nightingale 	br = be->be_req;
28559d65d31SAndy Fiddaman 	assert(br->br_resid >= 0);
28659d65d31SAndy Fiddaman 
2874c87aefeSPatrick Mooney 	if (br->br_iovcnt <= 1)
2884c87aefeSPatrick Mooney 		buf = NULL;
289bf21cd93STycho Nightingale 	err = 0;
290bf21cd93STycho Nightingale 	switch (be->be_op) {
291bf21cd93STycho Nightingale 	case BOP_READ:
2924c87aefeSPatrick Mooney 		if (buf == NULL) {
29359d65d31SAndy Fiddaman 			if ((n = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
2944c87aefeSPatrick Mooney 			    br->br_offset)) < 0)
295bf21cd93STycho Nightingale 				err = errno;
2964c87aefeSPatrick Mooney 			else
29759d65d31SAndy Fiddaman 				br->br_resid -= n;
2984c87aefeSPatrick Mooney 			break;
2994c87aefeSPatrick Mooney 		}
3004c87aefeSPatrick Mooney 		i = 0;
3014c87aefeSPatrick Mooney 		off = voff = 0;
3024c87aefeSPatrick Mooney 		while (br->br_resid > 0) {
3034c87aefeSPatrick Mooney 			len = MIN(br->br_resid, MAXPHYS);
30459d65d31SAndy Fiddaman 			n = pread(bc->bc_fd, buf, len, br->br_offset + off);
30559d65d31SAndy Fiddaman 			if (n < 0) {
3064c87aefeSPatrick Mooney 				err = errno;
3074c87aefeSPatrick Mooney 				break;
3084c87aefeSPatrick Mooney 			}
30959d65d31SAndy Fiddaman 			len = (size_t)n;
3104c87aefeSPatrick Mooney 			boff = 0;
3114c87aefeSPatrick Mooney 			do {
3124c87aefeSPatrick Mooney 				clen = MIN(len - boff, br->br_iov[i].iov_len -
3134c87aefeSPatrick Mooney 				    voff);
31459d65d31SAndy Fiddaman 				memcpy((uint8_t *)br->br_iov[i].iov_base + voff,
3154c87aefeSPatrick Mooney 				    buf + boff, clen);
3164c87aefeSPatrick Mooney 				if (clen < br->br_iov[i].iov_len - voff)
3174c87aefeSPatrick Mooney 					voff += clen;
3184c87aefeSPatrick Mooney 				else {
3194c87aefeSPatrick Mooney 					i++;
3204c87aefeSPatrick Mooney 					voff = 0;
3214c87aefeSPatrick Mooney 				}
3224c87aefeSPatrick Mooney 				boff += clen;
3234c87aefeSPatrick Mooney 			} while (boff < len);
3244c87aefeSPatrick Mooney 			off += len;
3254c87aefeSPatrick Mooney 			br->br_resid -= len;
3264c87aefeSPatrick Mooney 		}
327bf21cd93STycho Nightingale 		break;
328bf21cd93STycho Nightingale 	case BOP_WRITE:
3294c87aefeSPatrick Mooney 		if (bc->bc_rdonly) {
330bf21cd93STycho Nightingale 			err = EROFS;
3314c87aefeSPatrick Mooney 			break;
3324c87aefeSPatrick Mooney 		}
3334c87aefeSPatrick Mooney 		if (buf == NULL) {
33459d65d31SAndy Fiddaman 			if ((n = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
3354c87aefeSPatrick Mooney 			    br->br_offset)) < 0)
3364c87aefeSPatrick Mooney 				err = errno;
3374c87aefeSPatrick Mooney 			else
33859d65d31SAndy Fiddaman 				br->br_resid -= n;
3394c87aefeSPatrick Mooney 			break;
3404c87aefeSPatrick Mooney 		}
3414c87aefeSPatrick Mooney 		i = 0;
3424c87aefeSPatrick Mooney 		off = voff = 0;
3434c87aefeSPatrick Mooney 		while (br->br_resid > 0) {
3444c87aefeSPatrick Mooney 			len = MIN(br->br_resid, MAXPHYS);
3454c87aefeSPatrick Mooney 			boff = 0;
3464c87aefeSPatrick Mooney 			do {
3474c87aefeSPatrick Mooney 				clen = MIN(len - boff, br->br_iov[i].iov_len -
3484c87aefeSPatrick Mooney 				    voff);
3494c87aefeSPatrick Mooney 				memcpy(buf + boff,
35059d65d31SAndy Fiddaman 				    (uint8_t *)br->br_iov[i].iov_base + voff,
35159d65d31SAndy Fiddaman 				    clen);
3524c87aefeSPatrick Mooney 				if (clen < br->br_iov[i].iov_len - voff)
3534c87aefeSPatrick Mooney 					voff += clen;
3544c87aefeSPatrick Mooney 				else {
3554c87aefeSPatrick Mooney 					i++;
3564c87aefeSPatrick Mooney 					voff = 0;
3574c87aefeSPatrick Mooney 				}
3584c87aefeSPatrick Mooney 				boff += clen;
3594c87aefeSPatrick Mooney 			} while (boff < len);
36059d65d31SAndy Fiddaman 
36159d65d31SAndy Fiddaman 			n = pwrite(bc->bc_fd, buf, len, br->br_offset + off);
36259d65d31SAndy Fiddaman 			if (n < 0) {
363bf21cd93STycho Nightingale 				err = errno;
364bf21cd93STycho Nightingale 				break;
3654c87aefeSPatrick Mooney 			}
36659d65d31SAndy Fiddaman 			off += n;
36759d65d31SAndy Fiddaman 			br->br_resid -= n;
3684c87aefeSPatrick Mooney 		}
3694c87aefeSPatrick Mooney 		break;
370bf21cd93STycho Nightingale 	case BOP_FLUSH:
3714f3f3e9aSAndy Fiddaman 		err = blockif_flush_bc(bc);
3724c87aefeSPatrick Mooney 		break;
3734c87aefeSPatrick Mooney 	case BOP_DELETE:
3744c87aefeSPatrick Mooney 		if (!bc->bc_candelete)
3754c87aefeSPatrick Mooney 			err = EOPNOTSUPP;
3764c87aefeSPatrick Mooney 		else if (bc->bc_rdonly)
3774c87aefeSPatrick Mooney 			err = EROFS;
3784c87aefeSPatrick Mooney #ifdef	__FreeBSD__
3794c87aefeSPatrick Mooney 		else if (bc->bc_ischr) {
3804c87aefeSPatrick Mooney 			arg[0] = br->br_offset;
3814c87aefeSPatrick Mooney 			arg[1] = br->br_resid;
3824c87aefeSPatrick Mooney 			if (ioctl(bc->bc_fd, DIOCGDELETE, arg))
3834c87aefeSPatrick Mooney 				err = errno;
3844c87aefeSPatrick Mooney 			else
3854c87aefeSPatrick Mooney 				br->br_resid = 0;
386b0de25cbSAndy Fiddaman 		} else {
387b0de25cbSAndy Fiddaman 			range.r_offset = br->br_offset;
388b0de25cbSAndy Fiddaman 			range.r_len = br->br_resid;
389b0de25cbSAndy Fiddaman 
390b0de25cbSAndy Fiddaman 			while (range.r_len > 0) {
391b0de25cbSAndy Fiddaman 				if (fspacectl(bc->bc_fd, SPACECTL_DEALLOC,
392b0de25cbSAndy Fiddaman 				    &range, 0, &range) != 0) {
393b0de25cbSAndy Fiddaman 					err = errno;
394b0de25cbSAndy Fiddaman 					break;
3954c87aefeSPatrick Mooney 				}
396b0de25cbSAndy Fiddaman 			}
397b0de25cbSAndy Fiddaman 			if (err == 0)
398b0de25cbSAndy Fiddaman 				br->br_resid = 0;
399b0de25cbSAndy Fiddaman 		}
400282a8ecbSJason King #else
401282a8ecbSJason King 		else if (bc->bc_ischr) {
402282a8ecbSJason King 			dkioc_free_list_t dfl = {
403282a8ecbSJason King 				.dfl_num_exts = 1,
404282a8ecbSJason King 				.dfl_offset = 0,
405282a8ecbSJason King 				.dfl_flags = 0,
406282a8ecbSJason King 				.dfl_exts = {
407282a8ecbSJason King 					{
408282a8ecbSJason King 						.dfle_start = br->br_offset,
409282a8ecbSJason King 						.dfle_length = br->br_resid
410282a8ecbSJason King 					}
411282a8ecbSJason King 				}
412282a8ecbSJason King 			};
413282a8ecbSJason King 
414282a8ecbSJason King 			if (ioctl(bc->bc_fd, DKIOCFREE, &dfl))
415282a8ecbSJason King 				err = errno;
416282a8ecbSJason King 			else
417282a8ecbSJason King 				br->br_resid = 0;
418282a8ecbSJason King 		} else {
419282a8ecbSJason King 			struct flock fl = {
420282a8ecbSJason King 				.l_whence = 0,
421282a8ecbSJason King 				.l_type = F_WRLCK,
422282a8ecbSJason King 				.l_start = br->br_offset,
423282a8ecbSJason King 				.l_len = br->br_resid
424282a8ecbSJason King 			};
425282a8ecbSJason King 
426282a8ecbSJason King 			if (fcntl(bc->bc_fd, F_FREESP, &fl))
427282a8ecbSJason King 				err = errno;
428282a8ecbSJason King 			else
429282a8ecbSJason King 				br->br_resid = 0;
430282a8ecbSJason King 		}
431282a8ecbSJason King #endif
432bf21cd93STycho Nightingale 		break;
433bf21cd93STycho Nightingale 	default:
434bf21cd93STycho Nightingale 		err = EINVAL;
435bf21cd93STycho Nightingale 		break;
436bf21cd93STycho Nightingale 	}
437bf21cd93STycho Nightingale 
438bf21cd93STycho Nightingale 	be->be_status = BST_DONE;
439bf21cd93STycho Nightingale 
440bf21cd93STycho Nightingale 	(*br->br_callback)(br, err);
441bf21cd93STycho Nightingale }
442bf21cd93STycho Nightingale 
4434f3f3e9aSAndy Fiddaman static inline bool
blockif_empty(const struct blockif_ctxt * bc)4444f3f3e9aSAndy Fiddaman blockif_empty(const struct blockif_ctxt *bc)
4454f3f3e9aSAndy Fiddaman {
4464f3f3e9aSAndy Fiddaman 	return (TAILQ_EMPTY(&bc->bc_pendq) && TAILQ_EMPTY(&bc->bc_busyq));
4474f3f3e9aSAndy Fiddaman }
4484f3f3e9aSAndy Fiddaman 
449bf21cd93STycho Nightingale static void *
blockif_thr(void * arg)450bf21cd93STycho Nightingale blockif_thr(void *arg)
451bf21cd93STycho Nightingale {
452bf21cd93STycho Nightingale 	struct blockif_ctxt *bc;
453bf21cd93STycho Nightingale 	struct blockif_elem *be;
4544c87aefeSPatrick Mooney 	pthread_t t;
4554c87aefeSPatrick Mooney 	uint8_t *buf;
456bf21cd93STycho Nightingale 
457bf21cd93STycho Nightingale 	bc = arg;
4584c87aefeSPatrick Mooney 	if (bc->bc_isgeom)
4594c87aefeSPatrick Mooney 		buf = malloc(MAXPHYS);
4604c87aefeSPatrick Mooney 	else
4614c87aefeSPatrick Mooney 		buf = NULL;
4624c87aefeSPatrick Mooney 	t = pthread_self();
463bf21cd93STycho Nightingale 
464bf21cd93STycho Nightingale 	pthread_mutex_lock(&bc->bc_mtx);
4654c87aefeSPatrick Mooney 	for (;;) {
4664c87aefeSPatrick Mooney 		while (blockif_dequeue(bc, t, &be)) {
467bf21cd93STycho Nightingale 			pthread_mutex_unlock(&bc->bc_mtx);
4684c87aefeSPatrick Mooney 			blockif_proc(bc, be, buf);
469bf21cd93STycho Nightingale 			pthread_mutex_lock(&bc->bc_mtx);
470bf21cd93STycho Nightingale 			blockif_complete(bc, be);
471bf21cd93STycho Nightingale 		}
4724c87aefeSPatrick Mooney 		/* Check ctxt status here to see if exit requested */
4734c87aefeSPatrick Mooney 		if (bc->bc_closing)
4744c87aefeSPatrick Mooney 			break;
4754f3f3e9aSAndy Fiddaman 
476bf21cd93STycho Nightingale 		pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
4774c87aefeSPatrick Mooney 	}
478bf21cd93STycho Nightingale 	pthread_mutex_unlock(&bc->bc_mtx);
479bf21cd93STycho Nightingale 
4804c87aefeSPatrick Mooney 	if (buf)
4814c87aefeSPatrick Mooney 		free(buf);
482bf21cd93STycho Nightingale 	pthread_exit(NULL);
483bf21cd93STycho Nightingale 	return (NULL);
484bf21cd93STycho Nightingale }
485bf21cd93STycho Nightingale 
486bf21cd93STycho Nightingale #ifdef	__FreeBSD__
487bf21cd93STycho Nightingale static void
blockif_sigcont_handler(int signal __unused,enum ev_type type __unused,void * arg __unused)48859d65d31SAndy Fiddaman blockif_sigcont_handler(int signal __unused, enum ev_type type __unused,
48959d65d31SAndy Fiddaman     void *arg __unused)
490bf21cd93STycho Nightingale #else
491bf21cd93STycho Nightingale static void
49259d65d31SAndy Fiddaman blockif_sigcont_handler(int signal __unused)
493bf21cd93STycho Nightingale #endif
494bf21cd93STycho Nightingale {
495bf21cd93STycho Nightingale 	struct blockif_sig_elem *bse;
496bf21cd93STycho Nightingale 
497bf21cd93STycho Nightingale 	for (;;) {
498bf21cd93STycho Nightingale 		/*
499bf21cd93STycho Nightingale 		 * Process the entire list even if not intended for
500bf21cd93STycho Nightingale 		 * this thread.
501bf21cd93STycho Nightingale 		 */
502bf21cd93STycho Nightingale 		do {
503bf21cd93STycho Nightingale 			bse = blockif_bse_head;
504bf21cd93STycho Nightingale 			if (bse == NULL)
505bf21cd93STycho Nightingale 				return;
506bf21cd93STycho Nightingale 		} while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
507bf21cd93STycho Nightingale 					    (uintptr_t)bse,
508bf21cd93STycho Nightingale 					    (uintptr_t)bse->bse_next));
509bf21cd93STycho Nightingale 
510bf21cd93STycho Nightingale 		pthread_mutex_lock(&bse->bse_mtx);
511bf21cd93STycho Nightingale 		bse->bse_pending = 0;
512bf21cd93STycho Nightingale 		pthread_cond_signal(&bse->bse_cond);
513bf21cd93STycho Nightingale 		pthread_mutex_unlock(&bse->bse_mtx);
514bf21cd93STycho Nightingale 	}
515bf21cd93STycho Nightingale }
516bf21cd93STycho Nightingale 
517bf21cd93STycho Nightingale static void
blockif_init(void)518bf21cd93STycho Nightingale blockif_init(void)
519bf21cd93STycho Nightingale {
520bf21cd93STycho Nightingale #ifdef	__FreeBSD__
521bf21cd93STycho Nightingale 	mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL);
522bf21cd93STycho Nightingale 	(void) signal(SIGCONT, SIG_IGN);
523bf21cd93STycho Nightingale #else
524bf21cd93STycho Nightingale 	(void) sigset(SIGCONT, blockif_sigcont_handler);
525bf21cd93STycho Nightingale #endif
526bf21cd93STycho Nightingale }
527bf21cd93STycho Nightingale 
5282b948146SAndy Fiddaman int
blockif_legacy_config(nvlist_t * nvl,const char * opts)5292b948146SAndy Fiddaman blockif_legacy_config(nvlist_t *nvl, const char *opts)
5302b948146SAndy Fiddaman {
5312b948146SAndy Fiddaman 	char *cp, *path;
5322b948146SAndy Fiddaman 
5332b948146SAndy Fiddaman 	if (opts == NULL)
5342b948146SAndy Fiddaman 		return (0);
5352b948146SAndy Fiddaman 
5362b948146SAndy Fiddaman 	cp = strchr(opts, ',');
5372b948146SAndy Fiddaman 	if (cp == NULL) {
5382b948146SAndy Fiddaman 		set_config_value_node(nvl, "path", opts);
5392b948146SAndy Fiddaman 		return (0);
5402b948146SAndy Fiddaman 	}
5412b948146SAndy Fiddaman 	path = strndup(opts, cp - opts);
5422b948146SAndy Fiddaman 	set_config_value_node(nvl, "path", path);
5432b948146SAndy Fiddaman 	free(path);
5442b948146SAndy Fiddaman 	return (pci_parse_legacy_config(nvl, cp + 1));
5452b948146SAndy Fiddaman }
5462b948146SAndy Fiddaman 
547*32640292SAndy Fiddaman int
blockif_add_boot_device(struct pci_devinst * const pi,struct blockif_ctxt * const bc)548*32640292SAndy Fiddaman blockif_add_boot_device(struct pci_devinst *const pi,
549*32640292SAndy Fiddaman     struct blockif_ctxt *const bc)
550*32640292SAndy Fiddaman {
551*32640292SAndy Fiddaman 	if (bc->bc_bootindex < 0)
552*32640292SAndy Fiddaman 		return (0);
553*32640292SAndy Fiddaman 
554*32640292SAndy Fiddaman 	return (pci_emul_add_boot_device(pi, bc->bc_bootindex));
555*32640292SAndy Fiddaman }
556*32640292SAndy Fiddaman 
557bf21cd93STycho Nightingale struct blockif_ctxt *
blockif_open(nvlist_t * nvl,const char * ident)5582b948146SAndy Fiddaman blockif_open(nvlist_t *nvl, const char *ident)
559bf21cd93STycho Nightingale {
560bf21cd93STycho Nightingale 	char tname[MAXCOMLEN + 1];
5614c87aefeSPatrick Mooney #ifdef	__FreeBSD__
5624c87aefeSPatrick Mooney 	char name[MAXPATHLEN];
5634c87aefeSPatrick Mooney #endif
564*32640292SAndy Fiddaman 	const char *path, *pssval, *ssval, *bootindex_val;
5652b948146SAndy Fiddaman 	char *cp;
566bf21cd93STycho Nightingale 	struct blockif_ctxt *bc;
567bf21cd93STycho Nightingale 	struct stat sbuf;
5684c87aefeSPatrick Mooney #ifdef	__FreeBSD__
5694c87aefeSPatrick Mooney 	struct diocgattr_arg arg;
5704c87aefeSPatrick Mooney #else
5714c87aefeSPatrick Mooney 	enum blockif_wce wce = WCE_NONE;
5724c87aefeSPatrick Mooney #endif
5734c87aefeSPatrick Mooney 	off_t size, psectsz, psectoff;
574bf21cd93STycho Nightingale 	int extra, fd, i, sectsz;
5752b948146SAndy Fiddaman 	int ro, candelete, geom, ssopt, pssopt;
576282a8ecbSJason King 	int nodelete;
577*32640292SAndy Fiddaman 	int bootindex;
578282a8ecbSJason King 
5794c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM
5804c87aefeSPatrick Mooney 	cap_rights_t rights;
581d7b72f7bSAndy Fiddaman 	cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE, DIOCGMEDIASIZE };
5824c87aefeSPatrick Mooney #endif
583bf21cd93STycho Nightingale 
584bf21cd93STycho Nightingale 	pthread_once(&blockif_once, blockif_init);
585bf21cd93STycho Nightingale 
5864c87aefeSPatrick Mooney 	fd = -1;
5872b948146SAndy Fiddaman 	extra = 0;
5884c87aefeSPatrick Mooney 	ssopt = 0;
5892b948146SAndy Fiddaman #ifndef __FreeBSD__
5902b948146SAndy Fiddaman 	pssopt = 0;
5912b948146SAndy Fiddaman #endif
592bf21cd93STycho Nightingale 	ro = 0;
593282a8ecbSJason King 	nodelete = 0;
594*32640292SAndy Fiddaman 	bootindex = -1;
595bf21cd93STycho Nightingale 
5962b948146SAndy Fiddaman 	if (get_config_bool_node_default(nvl, "nocache", false))
5972b948146SAndy Fiddaman 		extra |= O_DIRECT;
5982b948146SAndy Fiddaman 	if (get_config_bool_node_default(nvl, "nodelete", false))
599282a8ecbSJason King 		nodelete = 1;
6002b948146SAndy Fiddaman 	if (get_config_bool_node_default(nvl, "sync", false) ||
6012b948146SAndy Fiddaman 	    get_config_bool_node_default(nvl, "direct", false))
6022b948146SAndy Fiddaman 		extra |= O_SYNC;
6032b948146SAndy Fiddaman 	if (get_config_bool_node_default(nvl, "ro", false))
604bf21cd93STycho Nightingale 		ro = 1;
6052b948146SAndy Fiddaman 	ssval = get_config_value_node(nvl, "sectorsize");
6062b948146SAndy Fiddaman 	if (ssval != NULL) {
6072b948146SAndy Fiddaman 		ssopt = strtol(ssval, &cp, 10);
6082b948146SAndy Fiddaman 		if (cp == ssval) {
6092b948146SAndy Fiddaman 			EPRINTLN("Invalid sector size \"%s\"", ssval);
6102b948146SAndy Fiddaman 			goto err;
6112b948146SAndy Fiddaman 		}
6122b948146SAndy Fiddaman 		if (*cp == '\0') {
6134c87aefeSPatrick Mooney 			pssopt = ssopt;
6142b948146SAndy Fiddaman 		} else if (*cp == '/') {
6152b948146SAndy Fiddaman 			pssval = cp + 1;
6162b948146SAndy Fiddaman 			pssopt = strtol(pssval, &cp, 10);
6172b948146SAndy Fiddaman 			if (cp == pssval || *cp != '\0') {
6182b948146SAndy Fiddaman 				EPRINTLN("Invalid sector size \"%s\"", ssval);
6192b948146SAndy Fiddaman 				goto err;
6202b948146SAndy Fiddaman 			}
6212b948146SAndy Fiddaman 		} else {
6222b948146SAndy Fiddaman 			EPRINTLN("Invalid sector size \"%s\"", ssval);
6234c87aefeSPatrick Mooney 			goto err;
6244c87aefeSPatrick Mooney 		}
625bf21cd93STycho Nightingale 	}
626bf21cd93STycho Nightingale 
627*32640292SAndy Fiddaman 	bootindex_val = get_config_value_node(nvl, "bootindex");
628*32640292SAndy Fiddaman 	if (bootindex_val != NULL) {
629*32640292SAndy Fiddaman 		bootindex = atoi(bootindex_val);
630*32640292SAndy Fiddaman 	}
631*32640292SAndy Fiddaman 
6322b948146SAndy Fiddaman 	path = get_config_value_node(nvl, "path");
6332b948146SAndy Fiddaman 	if (path == NULL) {
6342b948146SAndy Fiddaman 		EPRINTLN("Missing \"path\" for block device.");
6352b948146SAndy Fiddaman 		goto err;
6362b948146SAndy Fiddaman 	}
637bf21cd93STycho Nightingale 
6382b948146SAndy Fiddaman 	fd = open(path, (ro ? O_RDONLY : O_RDWR) | extra);
639bf21cd93STycho Nightingale 	if (fd < 0 && !ro) {
640bf21cd93STycho Nightingale 		/* Attempt a r/w fail with a r/o open */
6412b948146SAndy Fiddaman 		fd = open(path, O_RDONLY | extra);
642bf21cd93STycho Nightingale 		ro = 1;
643bf21cd93STycho Nightingale 	}
644bf21cd93STycho Nightingale 
645bf21cd93STycho Nightingale 	if (fd < 0) {
6462b948146SAndy Fiddaman 		warn("Could not open backing file: %s", path);
6474c87aefeSPatrick Mooney 		goto err;
648bf21cd93STycho Nightingale 	}
649bf21cd93STycho Nightingale 
650bf21cd93STycho Nightingale         if (fstat(fd, &sbuf) < 0) {
6512b948146SAndy Fiddaman 		warn("Could not stat backing file %s", path);
6524c87aefeSPatrick Mooney 		goto err;
653bf21cd93STycho Nightingale         }
654bf21cd93STycho Nightingale 
6554c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM
6564c87aefeSPatrick Mooney 	cap_rights_init(&rights, CAP_FSYNC, CAP_IOCTL, CAP_READ, CAP_SEEK,
6576dc98349SAndy Fiddaman 	    CAP_WRITE, CAP_FSTAT, CAP_EVENT, CAP_FPATHCONF);
6584c87aefeSPatrick Mooney 	if (ro)
6594c87aefeSPatrick Mooney 		cap_rights_clear(&rights, CAP_FSYNC, CAP_WRITE);
6604c87aefeSPatrick Mooney 
6614c87aefeSPatrick Mooney 	if (caph_rights_limit(fd, &rights) == -1)
6624c87aefeSPatrick Mooney 		errx(EX_OSERR, "Unable to apply rights for sandbox");
6634c87aefeSPatrick Mooney #endif
6644c87aefeSPatrick Mooney 
665bf21cd93STycho Nightingale         /*
666bf21cd93STycho Nightingale 	 * Deal with raw devices
667bf21cd93STycho Nightingale 	 */
668bf21cd93STycho Nightingale         size = sbuf.st_size;
669bf21cd93STycho Nightingale 	sectsz = DEV_BSIZE;
6704c87aefeSPatrick Mooney 	psectsz = psectoff = 0;
6714c87aefeSPatrick Mooney 	candelete = geom = 0;
672bf21cd93STycho Nightingale #ifdef	__FreeBSD__
673bf21cd93STycho Nightingale 	if (S_ISCHR(sbuf.st_mode)) {
674bf21cd93STycho Nightingale 		if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
675bf21cd93STycho Nightingale 		    ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
676bf21cd93STycho Nightingale 			perror("Could not fetch dev blk/sector size");
6774c87aefeSPatrick Mooney 			goto err;
678bf21cd93STycho Nightingale 		}
679bf21cd93STycho Nightingale 		assert(size != 0);
680bf21cd93STycho Nightingale 		assert(sectsz != 0);
6814c87aefeSPatrick Mooney 		if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0)
6824c87aefeSPatrick Mooney 			ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff);
6834c87aefeSPatrick Mooney 		strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
6844c87aefeSPatrick Mooney 		arg.len = sizeof(arg.value.i);
685282a8ecbSJason King 		if (nodelete == 0 && ioctl(fd, DIOCGATTR, &arg) == 0)
6864c87aefeSPatrick Mooney 			candelete = arg.value.i;
6874c87aefeSPatrick Mooney 		if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0)
6884c87aefeSPatrick Mooney 			geom = 1;
6894c87aefeSPatrick Mooney 	} else {
6904c87aefeSPatrick Mooney 		psectsz = sbuf.st_blksize;
691b0de25cbSAndy Fiddaman 		/* Avoid fallback implementation */
692b0de25cbSAndy Fiddaman 		candelete = fpathconf(fd, _PC_DEALLOC_PRESENT) == 1;
6934c87aefeSPatrick Mooney 	}
6944c87aefeSPatrick Mooney #else
6954c87aefeSPatrick Mooney 	psectsz = sbuf.st_blksize;
6964c87aefeSPatrick Mooney 	if (S_ISCHR(sbuf.st_mode)) {
6974c87aefeSPatrick Mooney 		struct dk_minfo_ext dkmext;
6984c87aefeSPatrick Mooney 		int wce_val;
6994c87aefeSPatrick Mooney 
700d7b72f7bSAndy Fiddaman 		/* Look for a more accurate physical block/media size */
7014c87aefeSPatrick Mooney 		if (ioctl(fd, DKIOCGMEDIAINFOEXT, &dkmext) == 0) {
7024c87aefeSPatrick Mooney 			psectsz = dkmext.dki_pbsize;
703d7b72f7bSAndy Fiddaman 			size = dkmext.dki_lbsize * dkmext.dki_capacity;
7044c87aefeSPatrick Mooney 		}
7054c87aefeSPatrick Mooney 		/* See if a configurable write cache is present and working */
7064c87aefeSPatrick Mooney 		if (ioctl(fd, DKIOCGETWCE, &wce_val) == 0) {
7074c87aefeSPatrick Mooney 			/*
7084c87aefeSPatrick Mooney 			 * If WCE is already active, disable it until the
7094c87aefeSPatrick Mooney 			 * specific device driver calls for its return.  If it
7104c87aefeSPatrick Mooney 			 * is not active, toggle it on and off to verify that
7114c87aefeSPatrick Mooney 			 * such actions are possible.
7124c87aefeSPatrick Mooney 			 */
7134c87aefeSPatrick Mooney 			if (wce_val != 0) {
7144c87aefeSPatrick Mooney 				wce_val = 0;
7154c87aefeSPatrick Mooney 				/*
7164c87aefeSPatrick Mooney 				 * Inability to disable the cache is a threat
7174c87aefeSPatrick Mooney 				 * to data durability.
7184c87aefeSPatrick Mooney 				 */
7194c87aefeSPatrick Mooney 				assert(ioctl(fd, DKIOCSETWCE, &wce_val) == 0);
7204c87aefeSPatrick Mooney 				wce = WCE_IOCTL;
7214c87aefeSPatrick Mooney 			} else {
7224c87aefeSPatrick Mooney 				int r1, r2;
7234c87aefeSPatrick Mooney 
7244c87aefeSPatrick Mooney 				wce_val = 1;
7254c87aefeSPatrick Mooney 				r1 = ioctl(fd, DKIOCSETWCE, &wce_val);
7264c87aefeSPatrick Mooney 				wce_val = 0;
7274c87aefeSPatrick Mooney 				r2 = ioctl(fd, DKIOCSETWCE, &wce_val);
7284c87aefeSPatrick Mooney 
7294c87aefeSPatrick Mooney 				if (r1 == 0 && r2 == 0) {
7304c87aefeSPatrick Mooney 					wce = WCE_IOCTL;
7314c87aefeSPatrick Mooney 				} else {
7324c87aefeSPatrick Mooney 					/*
7334c87aefeSPatrick Mooney 					 * If the cache cache toggle was not
7344c87aefeSPatrick Mooney 					 * successful, ensure that the cache
7354c87aefeSPatrick Mooney 					 * was not left enabled.
7364c87aefeSPatrick Mooney 					 */
7374c87aefeSPatrick Mooney 					assert(r1 != 0);
7384c87aefeSPatrick Mooney 				}
7394c87aefeSPatrick Mooney 			}
7404c87aefeSPatrick Mooney 		}
741282a8ecbSJason King 
742282a8ecbSJason King 		if (nodelete == 0 && ioctl(fd, DKIOC_CANFREE, &candelete))
743282a8ecbSJason King 			candelete = 0;
744282a8ecbSJason King 
7454c87aefeSPatrick Mooney 	} else {
7464c87aefeSPatrick Mooney 		int flags;
7474c87aefeSPatrick Mooney 
7484c87aefeSPatrick Mooney 		if ((flags = fcntl(fd, F_GETFL)) >= 0) {
7494c87aefeSPatrick Mooney 			flags |= O_DSYNC;
7504c87aefeSPatrick Mooney 			if (fcntl(fd, F_SETFL, flags) != -1) {
7514c87aefeSPatrick Mooney 				wce = WCE_FCNTL;
7524c87aefeSPatrick Mooney 			}
7534c87aefeSPatrick Mooney 		}
754282a8ecbSJason King 
755282a8ecbSJason King 		/*
756282a8ecbSJason King 		 * We don't have a way to discover if a file supports the
757282a8ecbSJason King 		 * FREESP fcntl cmd (other than trying it).  However,
758282a8ecbSJason King 		 * zfs, ufs, tmpfs, and udfs all support the FREESP fcntl cmd.
759282a8ecbSJason King 		 * Nfsv4 and nfsv4 also forward the FREESP request
760282a8ecbSJason King 		 * to the server, so we always enable it for file based
761282a8ecbSJason King 		 * volumes. Anyone trying to run volumes on an unsupported
762282a8ecbSJason King 		 * configuration is on their own, and should be prepared
763282a8ecbSJason King 		 * for the requests to fail.
764282a8ecbSJason King 		 */
765282a8ecbSJason King 		if (nodelete == 0)
766282a8ecbSJason King 			candelete = 1;
767bf21cd93STycho Nightingale 	}
768bf21cd93STycho Nightingale #endif
769bf21cd93STycho Nightingale 
7704c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM
7714c87aefeSPatrick Mooney 	if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1)
7724c87aefeSPatrick Mooney 		errx(EX_OSERR, "Unable to apply rights for sandbox");
7734c87aefeSPatrick Mooney #endif
7744c87aefeSPatrick Mooney 
7754c87aefeSPatrick Mooney 	if (ssopt != 0) {
7764c87aefeSPatrick Mooney 		if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 ||
7774c87aefeSPatrick Mooney 		    ssopt > pssopt) {
778154972afSPatrick Mooney 			EPRINTLN("Invalid sector size %d/%d",
7794c87aefeSPatrick Mooney 			    ssopt, pssopt);
7804c87aefeSPatrick Mooney 			goto err;
7814c87aefeSPatrick Mooney 		}
7824c87aefeSPatrick Mooney 
7834c87aefeSPatrick Mooney 		/*
7844c87aefeSPatrick Mooney 		 * Some backend drivers (e.g. cd0, ada0) require that the I/O
7854c87aefeSPatrick Mooney 		 * size be a multiple of the device's sector size.
7864c87aefeSPatrick Mooney 		 *
7874c87aefeSPatrick Mooney 		 * Validate that the emulated sector size complies with this
7884c87aefeSPatrick Mooney 		 * requirement.
7894c87aefeSPatrick Mooney 		 */
7904c87aefeSPatrick Mooney 		if (S_ISCHR(sbuf.st_mode)) {
7914c87aefeSPatrick Mooney 			if (ssopt < sectsz || (ssopt % sectsz) != 0) {
792154972afSPatrick Mooney 				EPRINTLN("Sector size %d incompatible "
793154972afSPatrick Mooney 				    "with underlying device sector size %d",
7944c87aefeSPatrick Mooney 				    ssopt, sectsz);
7954c87aefeSPatrick Mooney 				goto err;
7964c87aefeSPatrick Mooney 			}
7974c87aefeSPatrick Mooney 		}
7984c87aefeSPatrick Mooney 
7994c87aefeSPatrick Mooney 		sectsz = ssopt;
8004c87aefeSPatrick Mooney 		psectsz = pssopt;
8014c87aefeSPatrick Mooney 		psectoff = 0;
8024c87aefeSPatrick Mooney 	}
8034c87aefeSPatrick Mooney 
804bf21cd93STycho Nightingale 	bc = calloc(1, sizeof(struct blockif_ctxt));
805bf21cd93STycho Nightingale 	if (bc == NULL) {
8064c87aefeSPatrick Mooney 		perror("calloc");
8074c87aefeSPatrick Mooney 		goto err;
808bf21cd93STycho Nightingale 	}
809bf21cd93STycho Nightingale 
810bf21cd93STycho Nightingale 	bc->bc_magic = BLOCKIF_SIG;
811bf21cd93STycho Nightingale 	bc->bc_fd = fd;
8124c87aefeSPatrick Mooney 	bc->bc_ischr = S_ISCHR(sbuf.st_mode);
8134c87aefeSPatrick Mooney 	bc->bc_isgeom = geom;
8144c87aefeSPatrick Mooney 	bc->bc_candelete = candelete;
8154c87aefeSPatrick Mooney #ifndef __FreeBSD__
8164c87aefeSPatrick Mooney 	bc->bc_wce = wce;
8174c87aefeSPatrick Mooney #endif
818bf21cd93STycho Nightingale 	bc->bc_rdonly = ro;
819bf21cd93STycho Nightingale 	bc->bc_size = size;
820bf21cd93STycho Nightingale 	bc->bc_sectsz = sectsz;
8214c87aefeSPatrick Mooney 	bc->bc_psectsz = psectsz;
8224c87aefeSPatrick Mooney 	bc->bc_psectoff = psectoff;
823bf21cd93STycho Nightingale 	pthread_mutex_init(&bc->bc_mtx, NULL);
824bf21cd93STycho Nightingale 	pthread_cond_init(&bc->bc_cond, NULL);
825bf21cd93STycho Nightingale 	TAILQ_INIT(&bc->bc_freeq);
826bf21cd93STycho Nightingale 	TAILQ_INIT(&bc->bc_pendq);
827bf21cd93STycho Nightingale 	TAILQ_INIT(&bc->bc_busyq);
828*32640292SAndy Fiddaman 	bc->bc_bootindex = bootindex;
829bf21cd93STycho Nightingale 	for (i = 0; i < BLOCKIF_MAXREQ; i++) {
830bf21cd93STycho Nightingale 		bc->bc_reqs[i].be_status = BST_FREE;
831bf21cd93STycho Nightingale 		TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link);
832bf21cd93STycho Nightingale 	}
833bf21cd93STycho Nightingale 
8344c87aefeSPatrick Mooney 	for (i = 0; i < BLOCKIF_NUMTHR; i++) {
8354c87aefeSPatrick Mooney 		pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc);
8364c87aefeSPatrick Mooney 		snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i);
8374c87aefeSPatrick Mooney 		pthread_set_name_np(bc->bc_btid[i], tname);
8384c87aefeSPatrick Mooney 	}
839bf21cd93STycho Nightingale 
840bf21cd93STycho Nightingale 	return (bc);
8414c87aefeSPatrick Mooney err:
8424c87aefeSPatrick Mooney 	if (fd >= 0)
8434c87aefeSPatrick Mooney 		close(fd);
8444c87aefeSPatrick Mooney 	return (NULL);
845bf21cd93STycho Nightingale }
846bf21cd93STycho Nightingale 
847b0de25cbSAndy Fiddaman static void
blockif_resized(int fd,enum ev_type type __unused,void * arg)84859d65d31SAndy Fiddaman blockif_resized(int fd, enum ev_type type __unused, void *arg)
849b0de25cbSAndy Fiddaman {
850b0de25cbSAndy Fiddaman 	struct blockif_ctxt *bc;
851b0de25cbSAndy Fiddaman 	struct stat sb;
852d7b72f7bSAndy Fiddaman 	off_t mediasize;
853b0de25cbSAndy Fiddaman 
854b0de25cbSAndy Fiddaman 	if (fstat(fd, &sb) != 0)
855b0de25cbSAndy Fiddaman 		return;
856b0de25cbSAndy Fiddaman 
857d7b72f7bSAndy Fiddaman #ifdef __FreeBSD__
858d7b72f7bSAndy Fiddaman 	if (S_ISCHR(sb.st_mode)) {
859d7b72f7bSAndy Fiddaman 		if (ioctl(fd, DIOCGMEDIASIZE, &mediasize) < 0) {
860d7b72f7bSAndy Fiddaman 			EPRINTLN("blockif_resized: get mediasize failed: %s",
861d7b72f7bSAndy Fiddaman 			    strerror(errno));
862d7b72f7bSAndy Fiddaman 			return;
863d7b72f7bSAndy Fiddaman 		}
864d7b72f7bSAndy Fiddaman 	} else
865d7b72f7bSAndy Fiddaman 		mediasize = sb.st_size;
866d7b72f7bSAndy Fiddaman #else
867d7b72f7bSAndy Fiddaman 	mediasize = sb.st_size;
868d7b72f7bSAndy Fiddaman 	if (S_ISCHR(sb.st_mode)) {
869d7b72f7bSAndy Fiddaman 		struct dk_minfo dkm;
870d7b72f7bSAndy Fiddaman 
871d7b72f7bSAndy Fiddaman 		if (ioctl(fd, DKIOCGMEDIAINFO, &dkm) == 0)
872d7b72f7bSAndy Fiddaman 			mediasize = dkm.dki_lbsize * dkm.dki_capacity;
873d7b72f7bSAndy Fiddaman 	}
874d7b72f7bSAndy Fiddaman #endif
875d7b72f7bSAndy Fiddaman 
876b0de25cbSAndy Fiddaman 	bc = arg;
877b0de25cbSAndy Fiddaman 	pthread_mutex_lock(&bc->bc_mtx);
878d7b72f7bSAndy Fiddaman 	if (mediasize != bc->bc_size) {
879d7b72f7bSAndy Fiddaman 		bc->bc_size = mediasize;
880b0de25cbSAndy Fiddaman 		bc->bc_resize_cb(bc, bc->bc_resize_cb_arg, bc->bc_size);
881b0de25cbSAndy Fiddaman 	}
882b0de25cbSAndy Fiddaman 	pthread_mutex_unlock(&bc->bc_mtx);
883b0de25cbSAndy Fiddaman }
884b0de25cbSAndy Fiddaman 
885b0de25cbSAndy Fiddaman int
blockif_register_resize_callback(struct blockif_ctxt * bc,blockif_resize_cb * cb,void * cb_arg)886b0de25cbSAndy Fiddaman blockif_register_resize_callback(struct blockif_ctxt *bc, blockif_resize_cb *cb,
887b0de25cbSAndy Fiddaman     void *cb_arg)
888b0de25cbSAndy Fiddaman {
889b0de25cbSAndy Fiddaman 	struct stat sb;
890b0de25cbSAndy Fiddaman 	int err;
891b0de25cbSAndy Fiddaman 
892b0de25cbSAndy Fiddaman 	if (cb == NULL)
893b0de25cbSAndy Fiddaman 		return (EINVAL);
894b0de25cbSAndy Fiddaman 
89559d65d31SAndy Fiddaman 	err = 0;
89659d65d31SAndy Fiddaman 
897b0de25cbSAndy Fiddaman 	pthread_mutex_lock(&bc->bc_mtx);
898b0de25cbSAndy Fiddaman 	if (bc->bc_resize_cb != NULL) {
899b0de25cbSAndy Fiddaman 		err = EBUSY;
900b0de25cbSAndy Fiddaman 		goto out;
901b0de25cbSAndy Fiddaman 	}
902b0de25cbSAndy Fiddaman 
903b0de25cbSAndy Fiddaman 	assert(bc->bc_closing == 0);
904b0de25cbSAndy Fiddaman 
905b0de25cbSAndy Fiddaman 	if (fstat(bc->bc_fd, &sb) != 0) {
906b0de25cbSAndy Fiddaman 		err = errno;
907b0de25cbSAndy Fiddaman 		goto out;
908b0de25cbSAndy Fiddaman 	}
909b0de25cbSAndy Fiddaman 
910b0de25cbSAndy Fiddaman 	bc->bc_resize_event = mevent_add_flags(bc->bc_fd, EVF_VNODE,
911b0de25cbSAndy Fiddaman 	    EVFF_ATTRIB, blockif_resized, bc);
912b0de25cbSAndy Fiddaman 	if (bc->bc_resize_event == NULL) {
913b0de25cbSAndy Fiddaman 		err = ENXIO;
914b0de25cbSAndy Fiddaman 		goto out;
915b0de25cbSAndy Fiddaman 	}
916b0de25cbSAndy Fiddaman 
917b0de25cbSAndy Fiddaman 	bc->bc_resize_cb = cb;
918b0de25cbSAndy Fiddaman 	bc->bc_resize_cb_arg = cb_arg;
919b0de25cbSAndy Fiddaman out:
920b0de25cbSAndy Fiddaman 	pthread_mutex_unlock(&bc->bc_mtx);
921b0de25cbSAndy Fiddaman 
922b0de25cbSAndy Fiddaman 	return (err);
923b0de25cbSAndy Fiddaman }
924b0de25cbSAndy Fiddaman 
925bf21cd93STycho Nightingale static int
blockif_request(struct blockif_ctxt * bc,struct blockif_req * breq,enum blockop op)926bf21cd93STycho Nightingale blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq,
927bf21cd93STycho Nightingale 		enum blockop op)
928bf21cd93STycho Nightingale {
929bf21cd93STycho Nightingale 	int err;
930bf21cd93STycho Nightingale 
931bf21cd93STycho Nightingale 	err = 0;
932bf21cd93STycho Nightingale 
933bf21cd93STycho Nightingale 	pthread_mutex_lock(&bc->bc_mtx);
9344c87aefeSPatrick Mooney 	if (!TAILQ_EMPTY(&bc->bc_freeq)) {
935bf21cd93STycho Nightingale 		/*
936bf21cd93STycho Nightingale 		 * Enqueue and inform the block i/o thread
937bf21cd93STycho Nightingale 		 * that there is work available
938bf21cd93STycho Nightingale 		 */
9394c87aefeSPatrick Mooney 		if (blockif_enqueue(bc, breq, op))
940bf21cd93STycho Nightingale 			pthread_cond_signal(&bc->bc_cond);
941bf21cd93STycho Nightingale 	} else {
942bf21cd93STycho Nightingale 		/*
943bf21cd93STycho Nightingale 		 * Callers are not allowed to enqueue more than
944bf21cd93STycho Nightingale 		 * the specified blockif queue limit. Return an
945bf21cd93STycho Nightingale 		 * error to indicate that the queue length has been
946bf21cd93STycho Nightingale 		 * exceeded.
947bf21cd93STycho Nightingale 		 */
948bf21cd93STycho Nightingale 		err = E2BIG;
949bf21cd93STycho Nightingale 	}
950bf21cd93STycho Nightingale 	pthread_mutex_unlock(&bc->bc_mtx);
951bf21cd93STycho Nightingale 
952bf21cd93STycho Nightingale 	return (err);
953bf21cd93STycho Nightingale }
954bf21cd93STycho Nightingale 
955bf21cd93STycho Nightingale int
blockif_read(struct blockif_ctxt * bc,struct blockif_req * breq)956bf21cd93STycho Nightingale blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq)
957bf21cd93STycho Nightingale {
958bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
959bf21cd93STycho Nightingale 	return (blockif_request(bc, breq, BOP_READ));
960bf21cd93STycho Nightingale }
961bf21cd93STycho Nightingale 
962bf21cd93STycho Nightingale int
blockif_write(struct blockif_ctxt * bc,struct blockif_req * breq)963bf21cd93STycho Nightingale blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq)
964bf21cd93STycho Nightingale {
965bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
966bf21cd93STycho Nightingale 	return (blockif_request(bc, breq, BOP_WRITE));
967bf21cd93STycho Nightingale }
968bf21cd93STycho Nightingale 
969bf21cd93STycho Nightingale int
blockif_flush(struct blockif_ctxt * bc,struct blockif_req * breq)970bf21cd93STycho Nightingale blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
971bf21cd93STycho Nightingale {
972bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
973bf21cd93STycho Nightingale 	return (blockif_request(bc, breq, BOP_FLUSH));
974bf21cd93STycho Nightingale }
975bf21cd93STycho Nightingale 
976bf21cd93STycho Nightingale int
blockif_delete(struct blockif_ctxt * bc,struct blockif_req * breq)9774c87aefeSPatrick Mooney blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq)
9784c87aefeSPatrick Mooney {
9794c87aefeSPatrick Mooney 	assert(bc->bc_magic == BLOCKIF_SIG);
9804c87aefeSPatrick Mooney 	return (blockif_request(bc, breq, BOP_DELETE));
9814c87aefeSPatrick Mooney }
9824c87aefeSPatrick Mooney 
9834c87aefeSPatrick Mooney int
blockif_cancel(struct blockif_ctxt * bc,struct blockif_req * breq)984bf21cd93STycho Nightingale blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
985bf21cd93STycho Nightingale {
986bf21cd93STycho Nightingale 	struct blockif_elem *be;
987bf21cd93STycho Nightingale 
988bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
989bf21cd93STycho Nightingale 
990bf21cd93STycho Nightingale 	pthread_mutex_lock(&bc->bc_mtx);
991bf21cd93STycho Nightingale 	/*
992bf21cd93STycho Nightingale 	 * Check pending requests.
993bf21cd93STycho Nightingale 	 */
994bf21cd93STycho Nightingale 	TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
995bf21cd93STycho Nightingale 		if (be->be_req == breq)
996bf21cd93STycho Nightingale 			break;
997bf21cd93STycho Nightingale 	}
998bf21cd93STycho Nightingale 	if (be != NULL) {
999bf21cd93STycho Nightingale 		/*
1000bf21cd93STycho Nightingale 		 * Found it.
1001bf21cd93STycho Nightingale 		 */
10024c87aefeSPatrick Mooney 		blockif_complete(bc, be);
1003bf21cd93STycho Nightingale 		pthread_mutex_unlock(&bc->bc_mtx);
1004bf21cd93STycho Nightingale 
1005bf21cd93STycho Nightingale 		return (0);
1006bf21cd93STycho Nightingale 	}
1007bf21cd93STycho Nightingale 
1008bf21cd93STycho Nightingale 	/*
1009bf21cd93STycho Nightingale 	 * Check in-flight requests.
1010bf21cd93STycho Nightingale 	 */
1011bf21cd93STycho Nightingale 	TAILQ_FOREACH(be, &bc->bc_busyq, be_link) {
1012bf21cd93STycho Nightingale 		if (be->be_req == breq)
1013bf21cd93STycho Nightingale 			break;
1014bf21cd93STycho Nightingale 	}
1015bf21cd93STycho Nightingale 	if (be == NULL) {
1016bf21cd93STycho Nightingale 		/*
1017bf21cd93STycho Nightingale 		 * Didn't find it.
1018bf21cd93STycho Nightingale 		 */
1019bf21cd93STycho Nightingale 		pthread_mutex_unlock(&bc->bc_mtx);
1020bf21cd93STycho Nightingale 		return (EINVAL);
1021bf21cd93STycho Nightingale 	}
1022bf21cd93STycho Nightingale 
1023bf21cd93STycho Nightingale 	/*
1024bf21cd93STycho Nightingale 	 * Interrupt the processing thread to force it return
1025bf21cd93STycho Nightingale 	 * prematurely via it's normal callback path.
1026bf21cd93STycho Nightingale 	 */
1027bf21cd93STycho Nightingale 	while (be->be_status == BST_BUSY) {
1028bf21cd93STycho Nightingale 		struct blockif_sig_elem bse, *old_head;
1029bf21cd93STycho Nightingale 
1030bf21cd93STycho Nightingale 		pthread_mutex_init(&bse.bse_mtx, NULL);
1031bf21cd93STycho Nightingale 		pthread_cond_init(&bse.bse_cond, NULL);
1032bf21cd93STycho Nightingale 
1033bf21cd93STycho Nightingale 		bse.bse_pending = 1;
1034bf21cd93STycho Nightingale 
1035bf21cd93STycho Nightingale 		do {
1036bf21cd93STycho Nightingale 			old_head = blockif_bse_head;
1037bf21cd93STycho Nightingale 			bse.bse_next = old_head;
1038bf21cd93STycho Nightingale 		} while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
1039bf21cd93STycho Nightingale 					    (uintptr_t)old_head,
1040bf21cd93STycho Nightingale 					    (uintptr_t)&bse));
1041bf21cd93STycho Nightingale 
1042bf21cd93STycho Nightingale 		pthread_kill(be->be_tid, SIGCONT);
1043bf21cd93STycho Nightingale 
1044bf21cd93STycho Nightingale 		pthread_mutex_lock(&bse.bse_mtx);
1045bf21cd93STycho Nightingale 		while (bse.bse_pending)
1046bf21cd93STycho Nightingale 			pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx);
1047bf21cd93STycho Nightingale 		pthread_mutex_unlock(&bse.bse_mtx);
1048bf21cd93STycho Nightingale 	}
1049bf21cd93STycho Nightingale 
1050bf21cd93STycho Nightingale 	pthread_mutex_unlock(&bc->bc_mtx);
1051bf21cd93STycho Nightingale 
1052bf21cd93STycho Nightingale 	/*
1053bf21cd93STycho Nightingale 	 * The processing thread has been interrupted.  Since it's not
1054bf21cd93STycho Nightingale 	 * clear if the callback has been invoked yet, return EBUSY.
1055bf21cd93STycho Nightingale 	 */
1056bf21cd93STycho Nightingale 	return (EBUSY);
1057bf21cd93STycho Nightingale }
1058bf21cd93STycho Nightingale 
1059bf21cd93STycho Nightingale int
blockif_close(struct blockif_ctxt * bc)1060bf21cd93STycho Nightingale blockif_close(struct blockif_ctxt *bc)
1061bf21cd93STycho Nightingale {
1062bf21cd93STycho Nightingale 	void *jval;
10634c87aefeSPatrick Mooney 	int i;
1064bf21cd93STycho Nightingale 
1065bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
1066bf21cd93STycho Nightingale 
1067bf21cd93STycho Nightingale 	/*
1068bf21cd93STycho Nightingale 	 * Stop the block i/o thread
1069bf21cd93STycho Nightingale 	 */
10704c87aefeSPatrick Mooney 	pthread_mutex_lock(&bc->bc_mtx);
1071bf21cd93STycho Nightingale 	bc->bc_closing = 1;
1072b0de25cbSAndy Fiddaman 	if (bc->bc_resize_event != NULL)
1073b0de25cbSAndy Fiddaman 		mevent_disable(bc->bc_resize_event);
10744c87aefeSPatrick Mooney 	pthread_mutex_unlock(&bc->bc_mtx);
10754c87aefeSPatrick Mooney 	pthread_cond_broadcast(&bc->bc_cond);
10764c87aefeSPatrick Mooney 	for (i = 0; i < BLOCKIF_NUMTHR; i++)
10774c87aefeSPatrick Mooney 		pthread_join(bc->bc_btid[i], &jval);
1078bf21cd93STycho Nightingale 
1079bf21cd93STycho Nightingale 	/* XXX Cancel queued i/o's ??? */
1080bf21cd93STycho Nightingale 
1081bf21cd93STycho Nightingale 	/*
1082bf21cd93STycho Nightingale 	 * Release resources
1083bf21cd93STycho Nightingale 	 */
1084bf21cd93STycho Nightingale 	bc->bc_magic = 0;
1085bf21cd93STycho Nightingale 	close(bc->bc_fd);
1086bf21cd93STycho Nightingale 	free(bc);
1087bf21cd93STycho Nightingale 
1088bf21cd93STycho Nightingale 	return (0);
1089bf21cd93STycho Nightingale }
1090bf21cd93STycho Nightingale 
1091bf21cd93STycho Nightingale /*
1092bf21cd93STycho Nightingale  * Return virtual C/H/S values for a given block. Use the algorithm
1093bf21cd93STycho Nightingale  * outlined in the VHD specification to calculate values.
1094bf21cd93STycho Nightingale  */
1095bf21cd93STycho Nightingale void
blockif_chs(struct blockif_ctxt * bc,uint16_t * c,uint8_t * h,uint8_t * s)1096bf21cd93STycho Nightingale blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s)
1097bf21cd93STycho Nightingale {
1098bf21cd93STycho Nightingale 	off_t sectors;		/* total sectors of the block dev */
1099bf21cd93STycho Nightingale 	off_t hcyl;		/* cylinders times heads */
1100bf21cd93STycho Nightingale 	uint16_t secpt;		/* sectors per track */
1101bf21cd93STycho Nightingale 	uint8_t heads;
1102bf21cd93STycho Nightingale 
1103bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
1104bf21cd93STycho Nightingale 
1105bf21cd93STycho Nightingale 	sectors = bc->bc_size / bc->bc_sectsz;
1106bf21cd93STycho Nightingale 
1107bf21cd93STycho Nightingale 	/* Clamp the size to the largest possible with CHS */
110859d65d31SAndy Fiddaman 	if (sectors > 65535L * 16 * 255)
110959d65d31SAndy Fiddaman 		sectors = 65535L * 16 * 255;
1110bf21cd93STycho Nightingale 
111159d65d31SAndy Fiddaman 	if (sectors >= 65536L * 16 * 63) {
1112bf21cd93STycho Nightingale 		secpt = 255;
1113bf21cd93STycho Nightingale 		heads = 16;
1114bf21cd93STycho Nightingale 		hcyl = sectors / secpt;
1115bf21cd93STycho Nightingale 	} else {
1116bf21cd93STycho Nightingale 		secpt = 17;
1117bf21cd93STycho Nightingale 		hcyl = sectors / secpt;
1118bf21cd93STycho Nightingale 		heads = (hcyl + 1023) / 1024;
1119bf21cd93STycho Nightingale 
1120bf21cd93STycho Nightingale 		if (heads < 4)
1121bf21cd93STycho Nightingale 			heads = 4;
1122bf21cd93STycho Nightingale 
1123bf21cd93STycho Nightingale 		if (hcyl >= (heads * 1024) || heads > 16) {
1124bf21cd93STycho Nightingale 			secpt = 31;
1125bf21cd93STycho Nightingale 			heads = 16;
1126bf21cd93STycho Nightingale 			hcyl = sectors / secpt;
1127bf21cd93STycho Nightingale 		}
1128bf21cd93STycho Nightingale 		if (hcyl >= (heads * 1024)) {
1129bf21cd93STycho Nightingale 			secpt = 63;
1130bf21cd93STycho Nightingale 			heads = 16;
1131bf21cd93STycho Nightingale 			hcyl = sectors / secpt;
1132bf21cd93STycho Nightingale 		}
1133bf21cd93STycho Nightingale 	}
1134bf21cd93STycho Nightingale 
1135bf21cd93STycho Nightingale 	*c = hcyl / heads;
1136bf21cd93STycho Nightingale 	*h = heads;
1137bf21cd93STycho Nightingale 	*s = secpt;
1138bf21cd93STycho Nightingale }
1139bf21cd93STycho Nightingale 
1140bf21cd93STycho Nightingale /*
1141bf21cd93STycho Nightingale  * Accessors
1142bf21cd93STycho Nightingale  */
1143bf21cd93STycho Nightingale off_t
blockif_size(struct blockif_ctxt * bc)1144bf21cd93STycho Nightingale blockif_size(struct blockif_ctxt *bc)
1145bf21cd93STycho Nightingale {
1146bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
1147bf21cd93STycho Nightingale 	return (bc->bc_size);
1148bf21cd93STycho Nightingale }
1149bf21cd93STycho Nightingale 
1150bf21cd93STycho Nightingale int
blockif_sectsz(struct blockif_ctxt * bc)1151bf21cd93STycho Nightingale blockif_sectsz(struct blockif_ctxt *bc)
1152bf21cd93STycho Nightingale {
1153bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
1154bf21cd93STycho Nightingale 	return (bc->bc_sectsz);
1155bf21cd93STycho Nightingale }
1156bf21cd93STycho Nightingale 
11574c87aefeSPatrick Mooney void
blockif_psectsz(struct blockif_ctxt * bc,int * size,int * off)11584c87aefeSPatrick Mooney blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off)
11594c87aefeSPatrick Mooney {
11604c87aefeSPatrick Mooney 	assert(bc->bc_magic == BLOCKIF_SIG);
11614c87aefeSPatrick Mooney 	*size = bc->bc_psectsz;
11624c87aefeSPatrick Mooney 	*off = bc->bc_psectoff;
11634c87aefeSPatrick Mooney }
11644c87aefeSPatrick Mooney 
1165bf21cd93STycho Nightingale int
blockif_queuesz(struct blockif_ctxt * bc)1166bf21cd93STycho Nightingale blockif_queuesz(struct blockif_ctxt *bc)
1167bf21cd93STycho Nightingale {
1168bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
1169bf21cd93STycho Nightingale 	return (BLOCKIF_MAXREQ - 1);
1170bf21cd93STycho Nightingale }
1171bf21cd93STycho Nightingale 
1172bf21cd93STycho Nightingale int
blockif_is_ro(struct blockif_ctxt * bc)1173bf21cd93STycho Nightingale blockif_is_ro(struct blockif_ctxt *bc)
1174bf21cd93STycho Nightingale {
1175bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
1176bf21cd93STycho Nightingale 	return (bc->bc_rdonly);
1177bf21cd93STycho Nightingale }
11784c87aefeSPatrick Mooney 
11794c87aefeSPatrick Mooney int
blockif_candelete(struct blockif_ctxt * bc)11804c87aefeSPatrick Mooney blockif_candelete(struct blockif_ctxt *bc)
11814c87aefeSPatrick Mooney {
11824c87aefeSPatrick Mooney 	assert(bc->bc_magic == BLOCKIF_SIG);
11834c87aefeSPatrick Mooney 	return (bc->bc_candelete);
11844c87aefeSPatrick Mooney }
11854c87aefeSPatrick Mooney 
11864c87aefeSPatrick Mooney #ifndef __FreeBSD__
11874c87aefeSPatrick Mooney int
blockif_set_wce(struct blockif_ctxt * bc,int wc_enable)11884c87aefeSPatrick Mooney blockif_set_wce(struct blockif_ctxt *bc, int wc_enable)
11894c87aefeSPatrick Mooney {
11904c87aefeSPatrick Mooney 	int res = 0, flags;
11914c87aefeSPatrick Mooney 	int clean_val = (wc_enable != 0) ? 1 : 0;
11924c87aefeSPatrick Mooney 
11934c87aefeSPatrick Mooney 	(void) pthread_mutex_lock(&bc->bc_mtx);
11944c87aefeSPatrick Mooney 	switch (bc->bc_wce) {
11954c87aefeSPatrick Mooney 	case WCE_IOCTL:
11964c87aefeSPatrick Mooney 		res = ioctl(bc->bc_fd, DKIOCSETWCE, &clean_val);
11974c87aefeSPatrick Mooney 		break;
11984c87aefeSPatrick Mooney 	case WCE_FCNTL:
11994c87aefeSPatrick Mooney 		if ((flags = fcntl(bc->bc_fd, F_GETFL)) >= 0) {
12004c87aefeSPatrick Mooney 			if (wc_enable == 0) {
12014c87aefeSPatrick Mooney 				flags |= O_DSYNC;
12024c87aefeSPatrick Mooney 			} else {
12034c87aefeSPatrick Mooney 				flags &= ~O_DSYNC;
12044c87aefeSPatrick Mooney 			}
12054c87aefeSPatrick Mooney 			if (fcntl(bc->bc_fd, F_SETFL, flags) == -1) {
12064c87aefeSPatrick Mooney 				res = -1;
12074c87aefeSPatrick Mooney 			}
12084c87aefeSPatrick Mooney 		} else {
12094c87aefeSPatrick Mooney 			res = -1;
12104c87aefeSPatrick Mooney 		}
12114c87aefeSPatrick Mooney 		break;
12124c87aefeSPatrick Mooney 	default:
12134c87aefeSPatrick Mooney 		break;
12144c87aefeSPatrick Mooney 	}
12154c87aefeSPatrick Mooney 
12164c87aefeSPatrick Mooney 	/*
12174c87aefeSPatrick Mooney 	 * After a successful disable of the write cache, ensure that any
12184c87aefeSPatrick Mooney 	 * lingering data in the cache is synced out.
12194c87aefeSPatrick Mooney 	 */
12204c87aefeSPatrick Mooney 	if (res == 0 && wc_enable == 0) {
12214c87aefeSPatrick Mooney 		res = fsync(bc->bc_fd);
12224c87aefeSPatrick Mooney 	}
12234c87aefeSPatrick Mooney 	(void) pthread_mutex_unlock(&bc->bc_mtx);
12244c87aefeSPatrick Mooney 
12254c87aefeSPatrick Mooney 	return (res);
12264c87aefeSPatrick Mooney }
12274c87aefeSPatrick Mooney #endif /* __FreeBSD__ */
1228