xref: /illumos-gate/usr/src/cmd/bhyve/block_if.c (revision 6dc983494b0ffef2565cc4d91371ee345425ffab)
1bf21cd93STycho Nightingale /*-
24c87aefeSPatrick Mooney  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
34c87aefeSPatrick Mooney  *
4bf21cd93STycho Nightingale  * Copyright (c) 2013  Peter Grehan <grehan@freebsd.org>
5bf21cd93STycho Nightingale  * All rights reserved.
6154972afSPatrick Mooney  * Copyright 2020 Joyent, Inc.
7bf21cd93STycho Nightingale  *
8bf21cd93STycho Nightingale  * Redistribution and use in source and binary forms, with or without
9bf21cd93STycho Nightingale  * modification, are permitted provided that the following conditions
10bf21cd93STycho Nightingale  * are met:
11bf21cd93STycho Nightingale  * 1. Redistributions of source code must retain the above copyright
12bf21cd93STycho Nightingale  *    notice, this list of conditions and the following disclaimer.
13bf21cd93STycho Nightingale  * 2. Redistributions in binary form must reproduce the above copyright
14bf21cd93STycho Nightingale  *    notice, this list of conditions and the following disclaimer in the
15bf21cd93STycho Nightingale  *    documentation and/or other materials provided with the distribution.
16bf21cd93STycho Nightingale  *
17bf21cd93STycho Nightingale  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
18bf21cd93STycho Nightingale  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19bf21cd93STycho Nightingale  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20bf21cd93STycho Nightingale  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21bf21cd93STycho Nightingale  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22bf21cd93STycho Nightingale  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23bf21cd93STycho Nightingale  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24bf21cd93STycho Nightingale  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25bf21cd93STycho Nightingale  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26bf21cd93STycho Nightingale  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27bf21cd93STycho Nightingale  * SUCH DAMAGE.
28bf21cd93STycho Nightingale  *
294c87aefeSPatrick Mooney  * $FreeBSD$
304c87aefeSPatrick Mooney  */
314c87aefeSPatrick Mooney 
324c87aefeSPatrick Mooney /*
33282a8ecbSJason King  * Copyright 2020 Joyent, Inc.
34bf21cd93STycho Nightingale  */
35bf21cd93STycho Nightingale 
36bf21cd93STycho Nightingale #include <sys/cdefs.h>
374c87aefeSPatrick Mooney __FBSDID("$FreeBSD$");
38bf21cd93STycho Nightingale 
39bf21cd93STycho Nightingale #include <sys/param.h>
404c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM
414c87aefeSPatrick Mooney #include <sys/capsicum.h>
424c87aefeSPatrick Mooney #endif
43bf21cd93STycho Nightingale #include <sys/queue.h>
44bf21cd93STycho Nightingale #include <sys/errno.h>
45bf21cd93STycho Nightingale #include <sys/stat.h>
46bf21cd93STycho Nightingale #include <sys/ioctl.h>
47bf21cd93STycho Nightingale #include <sys/disk.h>
484c87aefeSPatrick Mooney #include <sys/limits.h>
494c87aefeSPatrick Mooney #include <sys/uio.h>
504c87aefeSPatrick Mooney #ifndef __FreeBSD__
514c87aefeSPatrick Mooney #include <sys/dkio.h>
524c87aefeSPatrick Mooney #endif
53bf21cd93STycho Nightingale 
54bf21cd93STycho Nightingale #include <assert.h>
554c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM
564c87aefeSPatrick Mooney #include <capsicum_helpers.h>
574c87aefeSPatrick Mooney #endif
584c87aefeSPatrick Mooney #include <err.h>
59bf21cd93STycho Nightingale #include <fcntl.h>
60bf21cd93STycho Nightingale #include <stdio.h>
61bf21cd93STycho Nightingale #include <stdlib.h>
62bf21cd93STycho Nightingale #include <string.h>
63bf21cd93STycho Nightingale #include <pthread.h>
64bf21cd93STycho Nightingale #include <pthread_np.h>
65bf21cd93STycho Nightingale #include <signal.h>
664c87aefeSPatrick Mooney #include <sysexits.h>
67bf21cd93STycho Nightingale #include <unistd.h>
68bf21cd93STycho Nightingale 
69bf21cd93STycho Nightingale #include <machine/atomic.h>
70bf21cd93STycho Nightingale 
71bf21cd93STycho Nightingale #include "bhyverun.h"
722b948146SAndy Fiddaman #include "config.h"
73154972afSPatrick Mooney #include "debug.h"
74bf21cd93STycho Nightingale #include "mevent.h"
752b948146SAndy Fiddaman #include "pci_emul.h"
76bf21cd93STycho Nightingale #include "block_if.h"
77bf21cd93STycho Nightingale 
78bf21cd93STycho Nightingale #define BLOCKIF_SIG	0xb109b109
79bf21cd93STycho Nightingale 
804c87aefeSPatrick Mooney #ifdef __FreeBSD__
814c87aefeSPatrick Mooney #define BLOCKIF_NUMTHR	8
824c87aefeSPatrick Mooney #else
834c87aefeSPatrick Mooney /* Enlarge to keep pace with the virtio-block ring size */
844c87aefeSPatrick Mooney #define BLOCKIF_NUMTHR	16
854c87aefeSPatrick Mooney #endif
864c87aefeSPatrick Mooney #define BLOCKIF_MAXREQ	(BLOCKIF_RING_MAX + BLOCKIF_NUMTHR)
87bf21cd93STycho Nightingale 
88bf21cd93STycho Nightingale enum blockop {
89bf21cd93STycho Nightingale 	BOP_READ,
90bf21cd93STycho Nightingale 	BOP_WRITE,
914c87aefeSPatrick Mooney #ifndef __FreeBSD__
924c87aefeSPatrick Mooney 	BOP_WRITE_SYNC,
934c87aefeSPatrick Mooney #endif
944c87aefeSPatrick Mooney 	BOP_FLUSH,
954c87aefeSPatrick Mooney 	BOP_DELETE
96bf21cd93STycho Nightingale };
97bf21cd93STycho Nightingale 
98bf21cd93STycho Nightingale enum blockstat {
99bf21cd93STycho Nightingale 	BST_FREE,
1004c87aefeSPatrick Mooney 	BST_BLOCK,
101bf21cd93STycho Nightingale 	BST_PEND,
102bf21cd93STycho Nightingale 	BST_BUSY,
103bf21cd93STycho Nightingale 	BST_DONE
104bf21cd93STycho Nightingale };
105bf21cd93STycho Nightingale 
106bf21cd93STycho Nightingale struct blockif_elem {
107bf21cd93STycho Nightingale 	TAILQ_ENTRY(blockif_elem) be_link;
108bf21cd93STycho Nightingale 	struct blockif_req  *be_req;
109bf21cd93STycho Nightingale 	enum blockop	     be_op;
110bf21cd93STycho Nightingale 	enum blockstat	     be_status;
111bf21cd93STycho Nightingale 	pthread_t            be_tid;
1124c87aefeSPatrick Mooney 	off_t		     be_block;
113bf21cd93STycho Nightingale };
114bf21cd93STycho Nightingale 
1154c87aefeSPatrick Mooney #ifndef __FreeBSD__
1164c87aefeSPatrick Mooney enum blockif_wce {
1174c87aefeSPatrick Mooney 	WCE_NONE = 0,
1184c87aefeSPatrick Mooney 	WCE_IOCTL,
1194c87aefeSPatrick Mooney 	WCE_FCNTL
1204c87aefeSPatrick Mooney };
1214c87aefeSPatrick Mooney #endif
1224c87aefeSPatrick Mooney 
123bf21cd93STycho Nightingale struct blockif_ctxt {
124bf21cd93STycho Nightingale 	int			bc_magic;
125bf21cd93STycho Nightingale 	int			bc_fd;
1264c87aefeSPatrick Mooney 	int			bc_ischr;
1274c87aefeSPatrick Mooney 	int			bc_isgeom;
1284c87aefeSPatrick Mooney 	int			bc_candelete;
1294c87aefeSPatrick Mooney #ifndef __FreeBSD__
1304c87aefeSPatrick Mooney 	enum blockif_wce	bc_wce;
1314c87aefeSPatrick Mooney #endif
132bf21cd93STycho Nightingale 	int			bc_rdonly;
133bf21cd93STycho Nightingale 	off_t			bc_size;
134bf21cd93STycho Nightingale 	int			bc_sectsz;
1354c87aefeSPatrick Mooney 	int			bc_psectsz;
1364c87aefeSPatrick Mooney 	int			bc_psectoff;
1374c87aefeSPatrick Mooney 	int			bc_closing;
1384c87aefeSPatrick Mooney 	pthread_t		bc_btid[BLOCKIF_NUMTHR];
139bf21cd93STycho Nightingale 	pthread_mutex_t		bc_mtx;
140bf21cd93STycho Nightingale 	pthread_cond_t		bc_cond;
141b0de25cbSAndy Fiddaman 	blockif_resize_cb	*bc_resize_cb;
142b0de25cbSAndy Fiddaman 	void			*bc_resize_cb_arg;
143b0de25cbSAndy Fiddaman 	struct mevent		*bc_resize_event;
144bf21cd93STycho Nightingale 
145bf21cd93STycho Nightingale 	/* Request elements and free/pending/busy queues */
146bf21cd93STycho Nightingale 	TAILQ_HEAD(, blockif_elem) bc_freeq;
147bf21cd93STycho Nightingale 	TAILQ_HEAD(, blockif_elem) bc_pendq;
148bf21cd93STycho Nightingale 	TAILQ_HEAD(, blockif_elem) bc_busyq;
149bf21cd93STycho Nightingale 	struct blockif_elem	bc_reqs[BLOCKIF_MAXREQ];
150bf21cd93STycho Nightingale };
151bf21cd93STycho Nightingale 
152bf21cd93STycho Nightingale static pthread_once_t blockif_once = PTHREAD_ONCE_INIT;
153bf21cd93STycho Nightingale 
154bf21cd93STycho Nightingale struct blockif_sig_elem {
155bf21cd93STycho Nightingale 	pthread_mutex_t			bse_mtx;
156bf21cd93STycho Nightingale 	pthread_cond_t			bse_cond;
157bf21cd93STycho Nightingale 	int				bse_pending;
158bf21cd93STycho Nightingale 	struct blockif_sig_elem		*bse_next;
159bf21cd93STycho Nightingale };
160bf21cd93STycho Nightingale 
161bf21cd93STycho Nightingale static struct blockif_sig_elem *blockif_bse_head;
162bf21cd93STycho Nightingale 
163bf21cd93STycho Nightingale static int
164bf21cd93STycho Nightingale blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq,
165bf21cd93STycho Nightingale 		enum blockop op)
166bf21cd93STycho Nightingale {
1674c87aefeSPatrick Mooney 	struct blockif_elem *be, *tbe;
1684c87aefeSPatrick Mooney 	off_t off;
1694c87aefeSPatrick Mooney 	int i;
170bf21cd93STycho Nightingale 
171bf21cd93STycho Nightingale 	be = TAILQ_FIRST(&bc->bc_freeq);
172bf21cd93STycho Nightingale 	assert(be != NULL);
173bf21cd93STycho Nightingale 	assert(be->be_status == BST_FREE);
174bf21cd93STycho Nightingale 	TAILQ_REMOVE(&bc->bc_freeq, be, be_link);
175bf21cd93STycho Nightingale 	be->be_req = breq;
176bf21cd93STycho Nightingale 	be->be_op = op;
1774c87aefeSPatrick Mooney 	switch (op) {
1784c87aefeSPatrick Mooney 	case BOP_READ:
1794c87aefeSPatrick Mooney 	case BOP_WRITE:
1804c87aefeSPatrick Mooney #ifndef __FreeBSD__
1814c87aefeSPatrick Mooney 	case BOP_WRITE_SYNC:
1824c87aefeSPatrick Mooney #endif
1834c87aefeSPatrick Mooney 	case BOP_DELETE:
1844c87aefeSPatrick Mooney 		off = breq->br_offset;
1854c87aefeSPatrick Mooney 		for (i = 0; i < breq->br_iovcnt; i++)
1864c87aefeSPatrick Mooney 			off += breq->br_iov[i].iov_len;
1874c87aefeSPatrick Mooney 		break;
1884c87aefeSPatrick Mooney 	default:
1894c87aefeSPatrick Mooney 		off = OFF_MAX;
1904c87aefeSPatrick Mooney 	}
1914c87aefeSPatrick Mooney 	be->be_block = off;
1924c87aefeSPatrick Mooney 	TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
1934c87aefeSPatrick Mooney 		if (tbe->be_block == breq->br_offset)
1944c87aefeSPatrick Mooney 			break;
1954c87aefeSPatrick Mooney 	}
1964c87aefeSPatrick Mooney 	if (tbe == NULL) {
1974c87aefeSPatrick Mooney 		TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) {
1984c87aefeSPatrick Mooney 			if (tbe->be_block == breq->br_offset)
1994c87aefeSPatrick Mooney 				break;
2004c87aefeSPatrick Mooney 		}
2014c87aefeSPatrick Mooney 	}
2024c87aefeSPatrick Mooney 	if (tbe == NULL)
2034c87aefeSPatrick Mooney 		be->be_status = BST_PEND;
2044c87aefeSPatrick Mooney 	else
2054c87aefeSPatrick Mooney 		be->be_status = BST_BLOCK;
206bf21cd93STycho Nightingale 	TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link);
2074c87aefeSPatrick Mooney 	return (be->be_status == BST_PEND);
208bf21cd93STycho Nightingale }
209bf21cd93STycho Nightingale 
210bf21cd93STycho Nightingale static int
2114c87aefeSPatrick Mooney blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep)
212bf21cd93STycho Nightingale {
213bf21cd93STycho Nightingale 	struct blockif_elem *be;
214bf21cd93STycho Nightingale 
2154c87aefeSPatrick Mooney 	TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
2164c87aefeSPatrick Mooney 		if (be->be_status == BST_PEND)
2174c87aefeSPatrick Mooney 			break;
2184c87aefeSPatrick Mooney 		assert(be->be_status == BST_BLOCK);
2194c87aefeSPatrick Mooney 	}
2204c87aefeSPatrick Mooney 	if (be == NULL)
2214c87aefeSPatrick Mooney 		return (0);
222bf21cd93STycho Nightingale 	TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
223bf21cd93STycho Nightingale 	be->be_status = BST_BUSY;
2244c87aefeSPatrick Mooney 	be->be_tid = t;
225bf21cd93STycho Nightingale 	TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link);
226bf21cd93STycho Nightingale 	*bep = be;
2274c87aefeSPatrick Mooney 	return (1);
228bf21cd93STycho Nightingale }
229bf21cd93STycho Nightingale 
230bf21cd93STycho Nightingale static void
231bf21cd93STycho Nightingale blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
232bf21cd93STycho Nightingale {
2334c87aefeSPatrick Mooney 	struct blockif_elem *tbe;
234bf21cd93STycho Nightingale 
2354c87aefeSPatrick Mooney 	if (be->be_status == BST_DONE || be->be_status == BST_BUSY)
236bf21cd93STycho Nightingale 		TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
2374c87aefeSPatrick Mooney 	else
2384c87aefeSPatrick Mooney 		TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
2394c87aefeSPatrick Mooney 	TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
2404c87aefeSPatrick Mooney 		if (tbe->be_req->br_offset == be->be_block)
2414c87aefeSPatrick Mooney 			tbe->be_status = BST_PEND;
2424c87aefeSPatrick Mooney 	}
243bf21cd93STycho Nightingale 	be->be_tid = 0;
244bf21cd93STycho Nightingale 	be->be_status = BST_FREE;
245bf21cd93STycho Nightingale 	be->be_req = NULL;
246bf21cd93STycho Nightingale 	TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
247bf21cd93STycho Nightingale }
248bf21cd93STycho Nightingale 
249bf21cd93STycho Nightingale static void
2504c87aefeSPatrick Mooney blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
251bf21cd93STycho Nightingale {
252bf21cd93STycho Nightingale 	struct blockif_req *br;
2534c87aefeSPatrick Mooney #ifdef	__FreeBSD__
2544c87aefeSPatrick Mooney 	off_t arg[2];
2554c87aefeSPatrick Mooney #endif
2564c87aefeSPatrick Mooney 	ssize_t clen, len, off, boff, voff;
2574c87aefeSPatrick Mooney 	int i, err;
258bf21cd93STycho Nightingale 
259bf21cd93STycho Nightingale 	br = be->be_req;
2604c87aefeSPatrick Mooney 	if (br->br_iovcnt <= 1)
2614c87aefeSPatrick Mooney 		buf = NULL;
262bf21cd93STycho Nightingale 	err = 0;
263bf21cd93STycho Nightingale 	switch (be->be_op) {
264bf21cd93STycho Nightingale 	case BOP_READ:
2654c87aefeSPatrick Mooney 		if (buf == NULL) {
2664c87aefeSPatrick Mooney 			if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
2674c87aefeSPatrick Mooney 				   br->br_offset)) < 0)
268bf21cd93STycho Nightingale 				err = errno;
2694c87aefeSPatrick Mooney 			else
2704c87aefeSPatrick Mooney 				br->br_resid -= len;
2714c87aefeSPatrick Mooney 			break;
2724c87aefeSPatrick Mooney 		}
2734c87aefeSPatrick Mooney 		i = 0;
2744c87aefeSPatrick Mooney 		off = voff = 0;
2754c87aefeSPatrick Mooney 		while (br->br_resid > 0) {
2764c87aefeSPatrick Mooney 			len = MIN(br->br_resid, MAXPHYS);
2774c87aefeSPatrick Mooney 			if (pread(bc->bc_fd, buf, len, br->br_offset +
2784c87aefeSPatrick Mooney 			    off) < 0) {
2794c87aefeSPatrick Mooney 				err = errno;
2804c87aefeSPatrick Mooney 				break;
2814c87aefeSPatrick Mooney 			}
2824c87aefeSPatrick Mooney 			boff = 0;
2834c87aefeSPatrick Mooney 			do {
2844c87aefeSPatrick Mooney 				clen = MIN(len - boff, br->br_iov[i].iov_len -
2854c87aefeSPatrick Mooney 				    voff);
2864c87aefeSPatrick Mooney 				memcpy(br->br_iov[i].iov_base + voff,
2874c87aefeSPatrick Mooney 				    buf + boff, clen);
2884c87aefeSPatrick Mooney 				if (clen < br->br_iov[i].iov_len - voff)
2894c87aefeSPatrick Mooney 					voff += clen;
2904c87aefeSPatrick Mooney 				else {
2914c87aefeSPatrick Mooney 					i++;
2924c87aefeSPatrick Mooney 					voff = 0;
2934c87aefeSPatrick Mooney 				}
2944c87aefeSPatrick Mooney 				boff += clen;
2954c87aefeSPatrick Mooney 			} while (boff < len);
2964c87aefeSPatrick Mooney 			off += len;
2974c87aefeSPatrick Mooney 			br->br_resid -= len;
2984c87aefeSPatrick Mooney 		}
299bf21cd93STycho Nightingale 		break;
300bf21cd93STycho Nightingale 	case BOP_WRITE:
3014c87aefeSPatrick Mooney 		if (bc->bc_rdonly) {
302bf21cd93STycho Nightingale 			err = EROFS;
3034c87aefeSPatrick Mooney 			break;
3044c87aefeSPatrick Mooney 		}
3054c87aefeSPatrick Mooney 		if (buf == NULL) {
3064c87aefeSPatrick Mooney 			if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
3074c87aefeSPatrick Mooney 				    br->br_offset)) < 0)
3084c87aefeSPatrick Mooney 				err = errno;
3094c87aefeSPatrick Mooney 			else
3104c87aefeSPatrick Mooney 				br->br_resid -= len;
3114c87aefeSPatrick Mooney 			break;
3124c87aefeSPatrick Mooney 		}
3134c87aefeSPatrick Mooney 		i = 0;
3144c87aefeSPatrick Mooney 		off = voff = 0;
3154c87aefeSPatrick Mooney 		while (br->br_resid > 0) {
3164c87aefeSPatrick Mooney 			len = MIN(br->br_resid, MAXPHYS);
3174c87aefeSPatrick Mooney 			boff = 0;
3184c87aefeSPatrick Mooney 			do {
3194c87aefeSPatrick Mooney 				clen = MIN(len - boff, br->br_iov[i].iov_len -
3204c87aefeSPatrick Mooney 				    voff);
3214c87aefeSPatrick Mooney 				memcpy(buf + boff,
3224c87aefeSPatrick Mooney 				    br->br_iov[i].iov_base + voff, clen);
3234c87aefeSPatrick Mooney 				if (clen < br->br_iov[i].iov_len - voff)
3244c87aefeSPatrick Mooney 					voff += clen;
3254c87aefeSPatrick Mooney 				else {
3264c87aefeSPatrick Mooney 					i++;
3274c87aefeSPatrick Mooney 					voff = 0;
3284c87aefeSPatrick Mooney 				}
3294c87aefeSPatrick Mooney 				boff += clen;
3304c87aefeSPatrick Mooney 			} while (boff < len);
3314c87aefeSPatrick Mooney 			if (pwrite(bc->bc_fd, buf, len, br->br_offset +
3324c87aefeSPatrick Mooney 			    off) < 0) {
333bf21cd93STycho Nightingale 				err = errno;
334bf21cd93STycho Nightingale 				break;
3354c87aefeSPatrick Mooney 			}
3364c87aefeSPatrick Mooney 			off += len;
3374c87aefeSPatrick Mooney 			br->br_resid -= len;
3384c87aefeSPatrick Mooney 		}
3394c87aefeSPatrick Mooney 		break;
340bf21cd93STycho Nightingale 	case BOP_FLUSH:
3414c87aefeSPatrick Mooney #ifdef	__FreeBSD__
3424c87aefeSPatrick Mooney 		if (bc->bc_ischr) {
3434c87aefeSPatrick Mooney 			if (ioctl(bc->bc_fd, DIOCGFLUSH))
3444c87aefeSPatrick Mooney 				err = errno;
3454c87aefeSPatrick Mooney 		} else if (fsync(bc->bc_fd))
3464c87aefeSPatrick Mooney 			err = errno;
3474c87aefeSPatrick Mooney #else
3484c87aefeSPatrick Mooney 		/*
3494c87aefeSPatrick Mooney 		 * This fsync() should be adequate to flush the cache of a file
3504c87aefeSPatrick Mooney 		 * or device.  In VFS, the VOP_SYNC operation is converted to
3514c87aefeSPatrick Mooney 		 * the appropriate ioctl in both sdev (for real devices) and
3524c87aefeSPatrick Mooney 		 * zfs (for zvols).
3534c87aefeSPatrick Mooney 		 */
3544c87aefeSPatrick Mooney 		if (fsync(bc->bc_fd))
3554c87aefeSPatrick Mooney 			err = errno;
3564c87aefeSPatrick Mooney #endif
3574c87aefeSPatrick Mooney 		break;
3584c87aefeSPatrick Mooney 	case BOP_DELETE:
3594c87aefeSPatrick Mooney 		if (!bc->bc_candelete)
3604c87aefeSPatrick Mooney 			err = EOPNOTSUPP;
3614c87aefeSPatrick Mooney 		else if (bc->bc_rdonly)
3624c87aefeSPatrick Mooney 			err = EROFS;
3634c87aefeSPatrick Mooney #ifdef	__FreeBSD__
3644c87aefeSPatrick Mooney 		else if (bc->bc_ischr) {
3654c87aefeSPatrick Mooney 			arg[0] = br->br_offset;
3664c87aefeSPatrick Mooney 			arg[1] = br->br_resid;
3674c87aefeSPatrick Mooney 			if (ioctl(bc->bc_fd, DIOCGDELETE, arg))
3684c87aefeSPatrick Mooney 				err = errno;
3694c87aefeSPatrick Mooney 			else
3704c87aefeSPatrick Mooney 				br->br_resid = 0;
371b0de25cbSAndy Fiddaman 		} else {
372b0de25cbSAndy Fiddaman 			range.r_offset = br->br_offset;
373b0de25cbSAndy Fiddaman 			range.r_len = br->br_resid;
374b0de25cbSAndy Fiddaman 
375b0de25cbSAndy Fiddaman 			while (range.r_len > 0) {
376b0de25cbSAndy Fiddaman 				if (fspacectl(bc->bc_fd, SPACECTL_DEALLOC,
377b0de25cbSAndy Fiddaman 				    &range, 0, &range) != 0) {
378b0de25cbSAndy Fiddaman 					err = errno;
379b0de25cbSAndy Fiddaman 					break;
3804c87aefeSPatrick Mooney 				}
381b0de25cbSAndy Fiddaman 			}
382b0de25cbSAndy Fiddaman 			if (err == 0)
383b0de25cbSAndy Fiddaman 				br->br_resid = 0;
384b0de25cbSAndy Fiddaman 		}
385282a8ecbSJason King #else
386282a8ecbSJason King 		else if (bc->bc_ischr) {
387282a8ecbSJason King 			dkioc_free_list_t dfl = {
388282a8ecbSJason King 				.dfl_num_exts = 1,
389282a8ecbSJason King 				.dfl_offset = 0,
390282a8ecbSJason King 				.dfl_flags = 0,
391282a8ecbSJason King 				.dfl_exts = {
392282a8ecbSJason King 					{
393282a8ecbSJason King 						.dfle_start = br->br_offset,
394282a8ecbSJason King 						.dfle_length = br->br_resid
395282a8ecbSJason King 					}
396282a8ecbSJason King 				}
397282a8ecbSJason King 			};
398282a8ecbSJason King 
399282a8ecbSJason King 			if (ioctl(bc->bc_fd, DKIOCFREE, &dfl))
400282a8ecbSJason King 				err = errno;
401282a8ecbSJason King 			else
402282a8ecbSJason King 				br->br_resid = 0;
403282a8ecbSJason King 		} else {
404282a8ecbSJason King 			struct flock fl = {
405282a8ecbSJason King 				.l_whence = 0,
406282a8ecbSJason King 				.l_type = F_WRLCK,
407282a8ecbSJason King 				.l_start = br->br_offset,
408282a8ecbSJason King 				.l_len = br->br_resid
409282a8ecbSJason King 			};
410282a8ecbSJason King 
411282a8ecbSJason King 			if (fcntl(bc->bc_fd, F_FREESP, &fl))
412282a8ecbSJason King 				err = errno;
413282a8ecbSJason King 			else
414282a8ecbSJason King 				br->br_resid = 0;
415282a8ecbSJason King 		}
416282a8ecbSJason King #endif
417bf21cd93STycho Nightingale 		break;
418bf21cd93STycho Nightingale 	default:
419bf21cd93STycho Nightingale 		err = EINVAL;
420bf21cd93STycho Nightingale 		break;
421bf21cd93STycho Nightingale 	}
422bf21cd93STycho Nightingale 
423bf21cd93STycho Nightingale 	be->be_status = BST_DONE;
424bf21cd93STycho Nightingale 
425bf21cd93STycho Nightingale 	(*br->br_callback)(br, err);
426bf21cd93STycho Nightingale }
427bf21cd93STycho Nightingale 
428bf21cd93STycho Nightingale static void *
429bf21cd93STycho Nightingale blockif_thr(void *arg)
430bf21cd93STycho Nightingale {
431bf21cd93STycho Nightingale 	struct blockif_ctxt *bc;
432bf21cd93STycho Nightingale 	struct blockif_elem *be;
4334c87aefeSPatrick Mooney 	pthread_t t;
4344c87aefeSPatrick Mooney 	uint8_t *buf;
435bf21cd93STycho Nightingale 
436bf21cd93STycho Nightingale 	bc = arg;
4374c87aefeSPatrick Mooney 	if (bc->bc_isgeom)
4384c87aefeSPatrick Mooney 		buf = malloc(MAXPHYS);
4394c87aefeSPatrick Mooney 	else
4404c87aefeSPatrick Mooney 		buf = NULL;
4414c87aefeSPatrick Mooney 	t = pthread_self();
442bf21cd93STycho Nightingale 
443bf21cd93STycho Nightingale 	pthread_mutex_lock(&bc->bc_mtx);
4444c87aefeSPatrick Mooney 	for (;;) {
4454c87aefeSPatrick Mooney 		while (blockif_dequeue(bc, t, &be)) {
446bf21cd93STycho Nightingale 			pthread_mutex_unlock(&bc->bc_mtx);
4474c87aefeSPatrick Mooney 			blockif_proc(bc, be, buf);
448bf21cd93STycho Nightingale 			pthread_mutex_lock(&bc->bc_mtx);
449bf21cd93STycho Nightingale 			blockif_complete(bc, be);
450bf21cd93STycho Nightingale 		}
4514c87aefeSPatrick Mooney 		/* Check ctxt status here to see if exit requested */
4524c87aefeSPatrick Mooney 		if (bc->bc_closing)
4534c87aefeSPatrick Mooney 			break;
454bf21cd93STycho Nightingale 		pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
4554c87aefeSPatrick Mooney 	}
456bf21cd93STycho Nightingale 	pthread_mutex_unlock(&bc->bc_mtx);
457bf21cd93STycho Nightingale 
4584c87aefeSPatrick Mooney 	if (buf)
4594c87aefeSPatrick Mooney 		free(buf);
460bf21cd93STycho Nightingale 	pthread_exit(NULL);
461bf21cd93STycho Nightingale 	return (NULL);
462bf21cd93STycho Nightingale }
463bf21cd93STycho Nightingale 
464bf21cd93STycho Nightingale #ifdef	__FreeBSD__
465bf21cd93STycho Nightingale static void
466bf21cd93STycho Nightingale blockif_sigcont_handler(int signal, enum ev_type type, void *arg)
467bf21cd93STycho Nightingale #else
468bf21cd93STycho Nightingale static void
469bf21cd93STycho Nightingale blockif_sigcont_handler(int signal)
470bf21cd93STycho Nightingale #endif
471bf21cd93STycho Nightingale {
472bf21cd93STycho Nightingale 	struct blockif_sig_elem *bse;
473bf21cd93STycho Nightingale 
474bf21cd93STycho Nightingale 	for (;;) {
475bf21cd93STycho Nightingale 		/*
476bf21cd93STycho Nightingale 		 * Process the entire list even if not intended for
477bf21cd93STycho Nightingale 		 * this thread.
478bf21cd93STycho Nightingale 		 */
479bf21cd93STycho Nightingale 		do {
480bf21cd93STycho Nightingale 			bse = blockif_bse_head;
481bf21cd93STycho Nightingale 			if (bse == NULL)
482bf21cd93STycho Nightingale 				return;
483bf21cd93STycho Nightingale 		} while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
484bf21cd93STycho Nightingale 					    (uintptr_t)bse,
485bf21cd93STycho Nightingale 					    (uintptr_t)bse->bse_next));
486bf21cd93STycho Nightingale 
487bf21cd93STycho Nightingale 		pthread_mutex_lock(&bse->bse_mtx);
488bf21cd93STycho Nightingale 		bse->bse_pending = 0;
489bf21cd93STycho Nightingale 		pthread_cond_signal(&bse->bse_cond);
490bf21cd93STycho Nightingale 		pthread_mutex_unlock(&bse->bse_mtx);
491bf21cd93STycho Nightingale 	}
492bf21cd93STycho Nightingale }
493bf21cd93STycho Nightingale 
494bf21cd93STycho Nightingale static void
495bf21cd93STycho Nightingale blockif_init(void)
496bf21cd93STycho Nightingale {
497bf21cd93STycho Nightingale #ifdef	__FreeBSD__
498bf21cd93STycho Nightingale 	mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL);
499bf21cd93STycho Nightingale 	(void) signal(SIGCONT, SIG_IGN);
500bf21cd93STycho Nightingale #else
501bf21cd93STycho Nightingale 	(void) sigset(SIGCONT, blockif_sigcont_handler);
502bf21cd93STycho Nightingale #endif
503bf21cd93STycho Nightingale }
504bf21cd93STycho Nightingale 
5052b948146SAndy Fiddaman int
5062b948146SAndy Fiddaman blockif_legacy_config(nvlist_t *nvl, const char *opts)
5072b948146SAndy Fiddaman {
5082b948146SAndy Fiddaman 	char *cp, *path;
5092b948146SAndy Fiddaman 
5102b948146SAndy Fiddaman 	if (opts == NULL)
5112b948146SAndy Fiddaman 		return (0);
5122b948146SAndy Fiddaman 
5132b948146SAndy Fiddaman 	cp = strchr(opts, ',');
5142b948146SAndy Fiddaman 	if (cp == NULL) {
5152b948146SAndy Fiddaman 		set_config_value_node(nvl, "path", opts);
5162b948146SAndy Fiddaman 		return (0);
5172b948146SAndy Fiddaman 	}
5182b948146SAndy Fiddaman 	path = strndup(opts, cp - opts);
5192b948146SAndy Fiddaman 	set_config_value_node(nvl, "path", path);
5202b948146SAndy Fiddaman 	free(path);
5212b948146SAndy Fiddaman 	return (pci_parse_legacy_config(nvl, cp + 1));
5222b948146SAndy Fiddaman }
5232b948146SAndy Fiddaman 
524bf21cd93STycho Nightingale struct blockif_ctxt *
5252b948146SAndy Fiddaman blockif_open(nvlist_t *nvl, const char *ident)
526bf21cd93STycho Nightingale {
527bf21cd93STycho Nightingale 	char tname[MAXCOMLEN + 1];
5284c87aefeSPatrick Mooney #ifdef	__FreeBSD__
5294c87aefeSPatrick Mooney 	char name[MAXPATHLEN];
5304c87aefeSPatrick Mooney #endif
5312b948146SAndy Fiddaman 	const char *path, *pssval, *ssval;
5322b948146SAndy Fiddaman 	char *cp;
533bf21cd93STycho Nightingale 	struct blockif_ctxt *bc;
534bf21cd93STycho Nightingale 	struct stat sbuf;
5354c87aefeSPatrick Mooney #ifdef	__FreeBSD__
5364c87aefeSPatrick Mooney 	struct diocgattr_arg arg;
5374c87aefeSPatrick Mooney #else
5384c87aefeSPatrick Mooney 	enum blockif_wce wce = WCE_NONE;
5394c87aefeSPatrick Mooney #endif
5404c87aefeSPatrick Mooney 	off_t size, psectsz, psectoff;
541bf21cd93STycho Nightingale 	int extra, fd, i, sectsz;
5422b948146SAndy Fiddaman 	int ro, candelete, geom, ssopt, pssopt;
543282a8ecbSJason King 	int nodelete;
544282a8ecbSJason King 
5454c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM
5464c87aefeSPatrick Mooney 	cap_rights_t rights;
5474c87aefeSPatrick Mooney 	cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE };
5484c87aefeSPatrick Mooney #endif
549bf21cd93STycho Nightingale 
550bf21cd93STycho Nightingale 	pthread_once(&blockif_once, blockif_init);
551bf21cd93STycho Nightingale 
5524c87aefeSPatrick Mooney 	fd = -1;
5532b948146SAndy Fiddaman 	extra = 0;
5544c87aefeSPatrick Mooney 	ssopt = 0;
5552b948146SAndy Fiddaman #ifndef __FreeBSD__
5562b948146SAndy Fiddaman 	pssopt = 0;
5572b948146SAndy Fiddaman #endif
558bf21cd93STycho Nightingale 	ro = 0;
559282a8ecbSJason King 	nodelete = 0;
560bf21cd93STycho Nightingale 
5612b948146SAndy Fiddaman 	if (get_config_bool_node_default(nvl, "nocache", false))
5622b948146SAndy Fiddaman 		extra |= O_DIRECT;
5632b948146SAndy Fiddaman 	if (get_config_bool_node_default(nvl, "nodelete", false))
564282a8ecbSJason King 		nodelete = 1;
5652b948146SAndy Fiddaman 	if (get_config_bool_node_default(nvl, "sync", false) ||
5662b948146SAndy Fiddaman 	    get_config_bool_node_default(nvl, "direct", false))
5672b948146SAndy Fiddaman 		extra |= O_SYNC;
5682b948146SAndy Fiddaman 	if (get_config_bool_node_default(nvl, "ro", false))
569bf21cd93STycho Nightingale 		ro = 1;
5702b948146SAndy Fiddaman 	ssval = get_config_value_node(nvl, "sectorsize");
5712b948146SAndy Fiddaman 	if (ssval != NULL) {
5722b948146SAndy Fiddaman 		ssopt = strtol(ssval, &cp, 10);
5732b948146SAndy Fiddaman 		if (cp == ssval) {
5742b948146SAndy Fiddaman 			EPRINTLN("Invalid sector size \"%s\"", ssval);
5752b948146SAndy Fiddaman 			goto err;
5762b948146SAndy Fiddaman 		}
5772b948146SAndy Fiddaman 		if (*cp == '\0') {
5784c87aefeSPatrick Mooney 			pssopt = ssopt;
5792b948146SAndy Fiddaman 		} else if (*cp == '/') {
5802b948146SAndy Fiddaman 			pssval = cp + 1;
5812b948146SAndy Fiddaman 			pssopt = strtol(pssval, &cp, 10);
5822b948146SAndy Fiddaman 			if (cp == pssval || *cp != '\0') {
5832b948146SAndy Fiddaman 				EPRINTLN("Invalid sector size \"%s\"", ssval);
5842b948146SAndy Fiddaman 				goto err;
5852b948146SAndy Fiddaman 			}
5862b948146SAndy Fiddaman 		} else {
5872b948146SAndy Fiddaman 			EPRINTLN("Invalid sector size \"%s\"", ssval);
5884c87aefeSPatrick Mooney 			goto err;
5894c87aefeSPatrick Mooney 		}
590bf21cd93STycho Nightingale 	}
591bf21cd93STycho Nightingale 
5922b948146SAndy Fiddaman 	path = get_config_value_node(nvl, "path");
5932b948146SAndy Fiddaman 	if (path == NULL) {
5942b948146SAndy Fiddaman 		EPRINTLN("Missing \"path\" for block device.");
5952b948146SAndy Fiddaman 		goto err;
5962b948146SAndy Fiddaman 	}
597bf21cd93STycho Nightingale 
5982b948146SAndy Fiddaman 	fd = open(path, (ro ? O_RDONLY : O_RDWR) | extra);
599bf21cd93STycho Nightingale 	if (fd < 0 && !ro) {
600bf21cd93STycho Nightingale 		/* Attempt a r/w fail with a r/o open */
6012b948146SAndy Fiddaman 		fd = open(path, O_RDONLY | extra);
602bf21cd93STycho Nightingale 		ro = 1;
603bf21cd93STycho Nightingale 	}
604bf21cd93STycho Nightingale 
605bf21cd93STycho Nightingale 	if (fd < 0) {
6062b948146SAndy Fiddaman 		warn("Could not open backing file: %s", path);
6074c87aefeSPatrick Mooney 		goto err;
608bf21cd93STycho Nightingale 	}
609bf21cd93STycho Nightingale 
610bf21cd93STycho Nightingale         if (fstat(fd, &sbuf) < 0) {
6112b948146SAndy Fiddaman 		warn("Could not stat backing file %s", path);
6124c87aefeSPatrick Mooney 		goto err;
613bf21cd93STycho Nightingale         }
614bf21cd93STycho Nightingale 
6154c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM
6164c87aefeSPatrick Mooney 	cap_rights_init(&rights, CAP_FSYNC, CAP_IOCTL, CAP_READ, CAP_SEEK,
617*6dc98349SAndy Fiddaman 	    CAP_WRITE, CAP_FSTAT, CAP_EVENT, CAP_FPATHCONF);
6184c87aefeSPatrick Mooney 	if (ro)
6194c87aefeSPatrick Mooney 		cap_rights_clear(&rights, CAP_FSYNC, CAP_WRITE);
6204c87aefeSPatrick Mooney 
6214c87aefeSPatrick Mooney 	if (caph_rights_limit(fd, &rights) == -1)
6224c87aefeSPatrick Mooney 		errx(EX_OSERR, "Unable to apply rights for sandbox");
6234c87aefeSPatrick Mooney #endif
6244c87aefeSPatrick Mooney 
625bf21cd93STycho Nightingale         /*
626bf21cd93STycho Nightingale 	 * Deal with raw devices
627bf21cd93STycho Nightingale 	 */
628bf21cd93STycho Nightingale         size = sbuf.st_size;
629bf21cd93STycho Nightingale 	sectsz = DEV_BSIZE;
6304c87aefeSPatrick Mooney 	psectsz = psectoff = 0;
6314c87aefeSPatrick Mooney 	candelete = geom = 0;
632bf21cd93STycho Nightingale #ifdef	__FreeBSD__
633bf21cd93STycho Nightingale 	if (S_ISCHR(sbuf.st_mode)) {
634bf21cd93STycho Nightingale 		if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
635bf21cd93STycho Nightingale 		    ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
636bf21cd93STycho Nightingale 			perror("Could not fetch dev blk/sector size");
6374c87aefeSPatrick Mooney 			goto err;
638bf21cd93STycho Nightingale 		}
639bf21cd93STycho Nightingale 		assert(size != 0);
640bf21cd93STycho Nightingale 		assert(sectsz != 0);
6414c87aefeSPatrick Mooney 		if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0)
6424c87aefeSPatrick Mooney 			ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff);
6434c87aefeSPatrick Mooney 		strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
6444c87aefeSPatrick Mooney 		arg.len = sizeof(arg.value.i);
645282a8ecbSJason King 		if (nodelete == 0 && ioctl(fd, DIOCGATTR, &arg) == 0)
6464c87aefeSPatrick Mooney 			candelete = arg.value.i;
6474c87aefeSPatrick Mooney 		if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0)
6484c87aefeSPatrick Mooney 			geom = 1;
6494c87aefeSPatrick Mooney 	} else {
6504c87aefeSPatrick Mooney 		psectsz = sbuf.st_blksize;
651b0de25cbSAndy Fiddaman 		/* Avoid fallback implementation */
652b0de25cbSAndy Fiddaman 		candelete = fpathconf(fd, _PC_DEALLOC_PRESENT) == 1;
6534c87aefeSPatrick Mooney 	}
6544c87aefeSPatrick Mooney #else
6554c87aefeSPatrick Mooney 	psectsz = sbuf.st_blksize;
6564c87aefeSPatrick Mooney 	if (S_ISCHR(sbuf.st_mode)) {
6574c87aefeSPatrick Mooney 		struct dk_minfo_ext dkmext;
6584c87aefeSPatrick Mooney 		int wce_val;
6594c87aefeSPatrick Mooney 
6604c87aefeSPatrick Mooney 		/* Look for a more accurate physical blocksize */
6614c87aefeSPatrick Mooney 		if (ioctl(fd, DKIOCGMEDIAINFOEXT, &dkmext) == 0) {
6624c87aefeSPatrick Mooney 			psectsz = dkmext.dki_pbsize;
6634c87aefeSPatrick Mooney 		}
6644c87aefeSPatrick Mooney 		/* See if a configurable write cache is present and working */
6654c87aefeSPatrick Mooney 		if (ioctl(fd, DKIOCGETWCE, &wce_val) == 0) {
6664c87aefeSPatrick Mooney 			/*
6674c87aefeSPatrick Mooney 			 * If WCE is already active, disable it until the
6684c87aefeSPatrick Mooney 			 * specific device driver calls for its return.  If it
6694c87aefeSPatrick Mooney 			 * is not active, toggle it on and off to verify that
6704c87aefeSPatrick Mooney 			 * such actions are possible.
6714c87aefeSPatrick Mooney 			 */
6724c87aefeSPatrick Mooney 			if (wce_val != 0) {
6734c87aefeSPatrick Mooney 				wce_val = 0;
6744c87aefeSPatrick Mooney 				/*
6754c87aefeSPatrick Mooney 				 * Inability to disable the cache is a threat
6764c87aefeSPatrick Mooney 				 * to data durability.
6774c87aefeSPatrick Mooney 				 */
6784c87aefeSPatrick Mooney 				assert(ioctl(fd, DKIOCSETWCE, &wce_val) == 0);
6794c87aefeSPatrick Mooney 				wce = WCE_IOCTL;
6804c87aefeSPatrick Mooney 			} else {
6814c87aefeSPatrick Mooney 				int r1, r2;
6824c87aefeSPatrick Mooney 
6834c87aefeSPatrick Mooney 				wce_val = 1;
6844c87aefeSPatrick Mooney 				r1 = ioctl(fd, DKIOCSETWCE, &wce_val);
6854c87aefeSPatrick Mooney 				wce_val = 0;
6864c87aefeSPatrick Mooney 				r2 = ioctl(fd, DKIOCSETWCE, &wce_val);
6874c87aefeSPatrick Mooney 
6884c87aefeSPatrick Mooney 				if (r1 == 0 && r2 == 0) {
6894c87aefeSPatrick Mooney 					wce = WCE_IOCTL;
6904c87aefeSPatrick Mooney 				} else {
6914c87aefeSPatrick Mooney 					/*
6924c87aefeSPatrick Mooney 					 * If the cache cache toggle was not
6934c87aefeSPatrick Mooney 					 * successful, ensure that the cache
6944c87aefeSPatrick Mooney 					 * was not left enabled.
6954c87aefeSPatrick Mooney 					 */
6964c87aefeSPatrick Mooney 					assert(r1 != 0);
6974c87aefeSPatrick Mooney 				}
6984c87aefeSPatrick Mooney 			}
6994c87aefeSPatrick Mooney 		}
700282a8ecbSJason King 
701282a8ecbSJason King 		if (nodelete == 0 && ioctl(fd, DKIOC_CANFREE, &candelete))
702282a8ecbSJason King 			candelete = 0;
703282a8ecbSJason King 
7044c87aefeSPatrick Mooney 	} else {
7054c87aefeSPatrick Mooney 		int flags;
7064c87aefeSPatrick Mooney 
7074c87aefeSPatrick Mooney 		if ((flags = fcntl(fd, F_GETFL)) >= 0) {
7084c87aefeSPatrick Mooney 			flags |= O_DSYNC;
7094c87aefeSPatrick Mooney 			if (fcntl(fd, F_SETFL, flags) != -1) {
7104c87aefeSPatrick Mooney 				wce = WCE_FCNTL;
7114c87aefeSPatrick Mooney 			}
7124c87aefeSPatrick Mooney 		}
713282a8ecbSJason King 
714282a8ecbSJason King 		/*
715282a8ecbSJason King 		 * We don't have a way to discover if a file supports the
716282a8ecbSJason King 		 * FREESP fcntl cmd (other than trying it).  However,
717282a8ecbSJason King 		 * zfs, ufs, tmpfs, and udfs all support the FREESP fcntl cmd.
718282a8ecbSJason King 		 * Nfsv4 and nfsv4 also forward the FREESP request
719282a8ecbSJason King 		 * to the server, so we always enable it for file based
720282a8ecbSJason King 		 * volumes. Anyone trying to run volumes on an unsupported
721282a8ecbSJason King 		 * configuration is on their own, and should be prepared
722282a8ecbSJason King 		 * for the requests to fail.
723282a8ecbSJason King 		 */
724282a8ecbSJason King 		if (nodelete == 0)
725282a8ecbSJason King 			candelete = 1;
726bf21cd93STycho Nightingale 	}
727bf21cd93STycho Nightingale #endif
728bf21cd93STycho Nightingale 
7294c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM
7304c87aefeSPatrick Mooney 	if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1)
7314c87aefeSPatrick Mooney 		errx(EX_OSERR, "Unable to apply rights for sandbox");
7324c87aefeSPatrick Mooney #endif
7334c87aefeSPatrick Mooney 
7344c87aefeSPatrick Mooney 	if (ssopt != 0) {
7354c87aefeSPatrick Mooney 		if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 ||
7364c87aefeSPatrick Mooney 		    ssopt > pssopt) {
737154972afSPatrick Mooney 			EPRINTLN("Invalid sector size %d/%d",
7384c87aefeSPatrick Mooney 			    ssopt, pssopt);
7394c87aefeSPatrick Mooney 			goto err;
7404c87aefeSPatrick Mooney 		}
7414c87aefeSPatrick Mooney 
7424c87aefeSPatrick Mooney 		/*
7434c87aefeSPatrick Mooney 		 * Some backend drivers (e.g. cd0, ada0) require that the I/O
7444c87aefeSPatrick Mooney 		 * size be a multiple of the device's sector size.
7454c87aefeSPatrick Mooney 		 *
7464c87aefeSPatrick Mooney 		 * Validate that the emulated sector size complies with this
7474c87aefeSPatrick Mooney 		 * requirement.
7484c87aefeSPatrick Mooney 		 */
7494c87aefeSPatrick Mooney 		if (S_ISCHR(sbuf.st_mode)) {
7504c87aefeSPatrick Mooney 			if (ssopt < sectsz || (ssopt % sectsz) != 0) {
751154972afSPatrick Mooney 				EPRINTLN("Sector size %d incompatible "
752154972afSPatrick Mooney 				    "with underlying device sector size %d",
7534c87aefeSPatrick Mooney 				    ssopt, sectsz);
7544c87aefeSPatrick Mooney 				goto err;
7554c87aefeSPatrick Mooney 			}
7564c87aefeSPatrick Mooney 		}
7574c87aefeSPatrick Mooney 
7584c87aefeSPatrick Mooney 		sectsz = ssopt;
7594c87aefeSPatrick Mooney 		psectsz = pssopt;
7604c87aefeSPatrick Mooney 		psectoff = 0;
7614c87aefeSPatrick Mooney 	}
7624c87aefeSPatrick Mooney 
763bf21cd93STycho Nightingale 	bc = calloc(1, sizeof(struct blockif_ctxt));
764bf21cd93STycho Nightingale 	if (bc == NULL) {
7654c87aefeSPatrick Mooney 		perror("calloc");
7664c87aefeSPatrick Mooney 		goto err;
767bf21cd93STycho Nightingale 	}
768bf21cd93STycho Nightingale 
769bf21cd93STycho Nightingale 	bc->bc_magic = BLOCKIF_SIG;
770bf21cd93STycho Nightingale 	bc->bc_fd = fd;
7714c87aefeSPatrick Mooney 	bc->bc_ischr = S_ISCHR(sbuf.st_mode);
7724c87aefeSPatrick Mooney 	bc->bc_isgeom = geom;
7734c87aefeSPatrick Mooney 	bc->bc_candelete = candelete;
7744c87aefeSPatrick Mooney #ifndef __FreeBSD__
7754c87aefeSPatrick Mooney 	bc->bc_wce = wce;
7764c87aefeSPatrick Mooney #endif
777bf21cd93STycho Nightingale 	bc->bc_rdonly = ro;
778bf21cd93STycho Nightingale 	bc->bc_size = size;
779bf21cd93STycho Nightingale 	bc->bc_sectsz = sectsz;
7804c87aefeSPatrick Mooney 	bc->bc_psectsz = psectsz;
7814c87aefeSPatrick Mooney 	bc->bc_psectoff = psectoff;
782bf21cd93STycho Nightingale 	pthread_mutex_init(&bc->bc_mtx, NULL);
783bf21cd93STycho Nightingale 	pthread_cond_init(&bc->bc_cond, NULL);
784bf21cd93STycho Nightingale 	TAILQ_INIT(&bc->bc_freeq);
785bf21cd93STycho Nightingale 	TAILQ_INIT(&bc->bc_pendq);
786bf21cd93STycho Nightingale 	TAILQ_INIT(&bc->bc_busyq);
787bf21cd93STycho Nightingale 	for (i = 0; i < BLOCKIF_MAXREQ; i++) {
788bf21cd93STycho Nightingale 		bc->bc_reqs[i].be_status = BST_FREE;
789bf21cd93STycho Nightingale 		TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link);
790bf21cd93STycho Nightingale 	}
791bf21cd93STycho Nightingale 
7924c87aefeSPatrick Mooney 	for (i = 0; i < BLOCKIF_NUMTHR; i++) {
7934c87aefeSPatrick Mooney 		pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc);
7944c87aefeSPatrick Mooney 		snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i);
7954c87aefeSPatrick Mooney 		pthread_set_name_np(bc->bc_btid[i], tname);
7964c87aefeSPatrick Mooney 	}
797bf21cd93STycho Nightingale 
798bf21cd93STycho Nightingale 	return (bc);
7994c87aefeSPatrick Mooney err:
8004c87aefeSPatrick Mooney 	if (fd >= 0)
8014c87aefeSPatrick Mooney 		close(fd);
8024c87aefeSPatrick Mooney 	return (NULL);
803bf21cd93STycho Nightingale }
804bf21cd93STycho Nightingale 
805b0de25cbSAndy Fiddaman static void
806b0de25cbSAndy Fiddaman blockif_resized(int fd, enum ev_type type, void *arg)
807b0de25cbSAndy Fiddaman {
808b0de25cbSAndy Fiddaman 	struct blockif_ctxt *bc;
809b0de25cbSAndy Fiddaman 	struct stat sb;
810b0de25cbSAndy Fiddaman 
811b0de25cbSAndy Fiddaman 	if (fstat(fd, &sb) != 0)
812b0de25cbSAndy Fiddaman 		return;
813b0de25cbSAndy Fiddaman 
814b0de25cbSAndy Fiddaman 	bc = arg;
815b0de25cbSAndy Fiddaman 	pthread_mutex_lock(&bc->bc_mtx);
816b0de25cbSAndy Fiddaman 	if (sb.st_size != bc->bc_size) {
817b0de25cbSAndy Fiddaman 		bc->bc_size = sb.st_size;
818b0de25cbSAndy Fiddaman 		bc->bc_resize_cb(bc, bc->bc_resize_cb_arg, bc->bc_size);
819b0de25cbSAndy Fiddaman 	}
820b0de25cbSAndy Fiddaman 	pthread_mutex_unlock(&bc->bc_mtx);
821b0de25cbSAndy Fiddaman }
822b0de25cbSAndy Fiddaman 
823b0de25cbSAndy Fiddaman int
824b0de25cbSAndy Fiddaman blockif_register_resize_callback(struct blockif_ctxt *bc, blockif_resize_cb *cb,
825b0de25cbSAndy Fiddaman     void *cb_arg)
826b0de25cbSAndy Fiddaman {
827b0de25cbSAndy Fiddaman 	struct stat sb;
828b0de25cbSAndy Fiddaman 	int err;
829b0de25cbSAndy Fiddaman #ifndef __FreeBSD__
830b0de25cbSAndy Fiddaman 	err = 0;
831b0de25cbSAndy Fiddaman #endif
832b0de25cbSAndy Fiddaman 
833b0de25cbSAndy Fiddaman 	if (cb == NULL)
834b0de25cbSAndy Fiddaman 		return (EINVAL);
835b0de25cbSAndy Fiddaman 
836b0de25cbSAndy Fiddaman 	pthread_mutex_lock(&bc->bc_mtx);
837b0de25cbSAndy Fiddaman 	if (bc->bc_resize_cb != NULL) {
838b0de25cbSAndy Fiddaman 		err = EBUSY;
839b0de25cbSAndy Fiddaman 		goto out;
840b0de25cbSAndy Fiddaman 	}
841b0de25cbSAndy Fiddaman 
842b0de25cbSAndy Fiddaman 	assert(bc->bc_closing == 0);
843b0de25cbSAndy Fiddaman 
844b0de25cbSAndy Fiddaman 	if (fstat(bc->bc_fd, &sb) != 0) {
845b0de25cbSAndy Fiddaman 		err = errno;
846b0de25cbSAndy Fiddaman 		goto out;
847b0de25cbSAndy Fiddaman 	}
848b0de25cbSAndy Fiddaman 
849b0de25cbSAndy Fiddaman 	bc->bc_resize_event = mevent_add_flags(bc->bc_fd, EVF_VNODE,
850b0de25cbSAndy Fiddaman 	    EVFF_ATTRIB, blockif_resized, bc);
851b0de25cbSAndy Fiddaman 	if (bc->bc_resize_event == NULL) {
852b0de25cbSAndy Fiddaman 		err = ENXIO;
853b0de25cbSAndy Fiddaman 		goto out;
854b0de25cbSAndy Fiddaman 	}
855b0de25cbSAndy Fiddaman 
856b0de25cbSAndy Fiddaman 	bc->bc_resize_cb = cb;
857b0de25cbSAndy Fiddaman 	bc->bc_resize_cb_arg = cb_arg;
858b0de25cbSAndy Fiddaman out:
859b0de25cbSAndy Fiddaman 	pthread_mutex_unlock(&bc->bc_mtx);
860b0de25cbSAndy Fiddaman 
861b0de25cbSAndy Fiddaman 	return (err);
862b0de25cbSAndy Fiddaman }
863b0de25cbSAndy Fiddaman 
864bf21cd93STycho Nightingale static int
865bf21cd93STycho Nightingale blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq,
866bf21cd93STycho Nightingale 		enum blockop op)
867bf21cd93STycho Nightingale {
868bf21cd93STycho Nightingale 	int err;
869bf21cd93STycho Nightingale 
870bf21cd93STycho Nightingale 	err = 0;
871bf21cd93STycho Nightingale 
872bf21cd93STycho Nightingale 	pthread_mutex_lock(&bc->bc_mtx);
8734c87aefeSPatrick Mooney 	if (!TAILQ_EMPTY(&bc->bc_freeq)) {
874bf21cd93STycho Nightingale 		/*
875bf21cd93STycho Nightingale 		 * Enqueue and inform the block i/o thread
876bf21cd93STycho Nightingale 		 * that there is work available
877bf21cd93STycho Nightingale 		 */
8784c87aefeSPatrick Mooney 		if (blockif_enqueue(bc, breq, op))
879bf21cd93STycho Nightingale 			pthread_cond_signal(&bc->bc_cond);
880bf21cd93STycho Nightingale 	} else {
881bf21cd93STycho Nightingale 		/*
882bf21cd93STycho Nightingale 		 * Callers are not allowed to enqueue more than
883bf21cd93STycho Nightingale 		 * the specified blockif queue limit. Return an
884bf21cd93STycho Nightingale 		 * error to indicate that the queue length has been
885bf21cd93STycho Nightingale 		 * exceeded.
886bf21cd93STycho Nightingale 		 */
887bf21cd93STycho Nightingale 		err = E2BIG;
888bf21cd93STycho Nightingale 	}
889bf21cd93STycho Nightingale 	pthread_mutex_unlock(&bc->bc_mtx);
890bf21cd93STycho Nightingale 
891bf21cd93STycho Nightingale 	return (err);
892bf21cd93STycho Nightingale }
893bf21cd93STycho Nightingale 
894bf21cd93STycho Nightingale int
895bf21cd93STycho Nightingale blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq)
896bf21cd93STycho Nightingale {
897bf21cd93STycho Nightingale 
898bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
899bf21cd93STycho Nightingale 	return (blockif_request(bc, breq, BOP_READ));
900bf21cd93STycho Nightingale }
901bf21cd93STycho Nightingale 
902bf21cd93STycho Nightingale int
903bf21cd93STycho Nightingale blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq)
904bf21cd93STycho Nightingale {
905bf21cd93STycho Nightingale 
906bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
907bf21cd93STycho Nightingale 	return (blockif_request(bc, breq, BOP_WRITE));
908bf21cd93STycho Nightingale }
909bf21cd93STycho Nightingale 
910bf21cd93STycho Nightingale int
911bf21cd93STycho Nightingale blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
912bf21cd93STycho Nightingale {
913bf21cd93STycho Nightingale 
914bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
915bf21cd93STycho Nightingale 	return (blockif_request(bc, breq, BOP_FLUSH));
916bf21cd93STycho Nightingale }
917bf21cd93STycho Nightingale 
918bf21cd93STycho Nightingale int
9194c87aefeSPatrick Mooney blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq)
9204c87aefeSPatrick Mooney {
9214c87aefeSPatrick Mooney 
9224c87aefeSPatrick Mooney 	assert(bc->bc_magic == BLOCKIF_SIG);
9234c87aefeSPatrick Mooney 	return (blockif_request(bc, breq, BOP_DELETE));
9244c87aefeSPatrick Mooney }
9254c87aefeSPatrick Mooney 
9264c87aefeSPatrick Mooney int
927bf21cd93STycho Nightingale blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
928bf21cd93STycho Nightingale {
929bf21cd93STycho Nightingale 	struct blockif_elem *be;
930bf21cd93STycho Nightingale 
931bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
932bf21cd93STycho Nightingale 
933bf21cd93STycho Nightingale 	pthread_mutex_lock(&bc->bc_mtx);
934bf21cd93STycho Nightingale 	/*
935bf21cd93STycho Nightingale 	 * Check pending requests.
936bf21cd93STycho Nightingale 	 */
937bf21cd93STycho Nightingale 	TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
938bf21cd93STycho Nightingale 		if (be->be_req == breq)
939bf21cd93STycho Nightingale 			break;
940bf21cd93STycho Nightingale 	}
941bf21cd93STycho Nightingale 	if (be != NULL) {
942bf21cd93STycho Nightingale 		/*
943bf21cd93STycho Nightingale 		 * Found it.
944bf21cd93STycho Nightingale 		 */
9454c87aefeSPatrick Mooney 		blockif_complete(bc, be);
946bf21cd93STycho Nightingale 		pthread_mutex_unlock(&bc->bc_mtx);
947bf21cd93STycho Nightingale 
948bf21cd93STycho Nightingale 		return (0);
949bf21cd93STycho Nightingale 	}
950bf21cd93STycho Nightingale 
951bf21cd93STycho Nightingale 	/*
952bf21cd93STycho Nightingale 	 * Check in-flight requests.
953bf21cd93STycho Nightingale 	 */
954bf21cd93STycho Nightingale 	TAILQ_FOREACH(be, &bc->bc_busyq, be_link) {
955bf21cd93STycho Nightingale 		if (be->be_req == breq)
956bf21cd93STycho Nightingale 			break;
957bf21cd93STycho Nightingale 	}
958bf21cd93STycho Nightingale 	if (be == NULL) {
959bf21cd93STycho Nightingale 		/*
960bf21cd93STycho Nightingale 		 * Didn't find it.
961bf21cd93STycho Nightingale 		 */
962bf21cd93STycho Nightingale 		pthread_mutex_unlock(&bc->bc_mtx);
963bf21cd93STycho Nightingale 		return (EINVAL);
964bf21cd93STycho Nightingale 	}
965bf21cd93STycho Nightingale 
966bf21cd93STycho Nightingale 	/*
967bf21cd93STycho Nightingale 	 * Interrupt the processing thread to force it return
968bf21cd93STycho Nightingale 	 * prematurely via it's normal callback path.
969bf21cd93STycho Nightingale 	 */
970bf21cd93STycho Nightingale 	while (be->be_status == BST_BUSY) {
971bf21cd93STycho Nightingale 		struct blockif_sig_elem bse, *old_head;
972bf21cd93STycho Nightingale 
973bf21cd93STycho Nightingale 		pthread_mutex_init(&bse.bse_mtx, NULL);
974bf21cd93STycho Nightingale 		pthread_cond_init(&bse.bse_cond, NULL);
975bf21cd93STycho Nightingale 
976bf21cd93STycho Nightingale 		bse.bse_pending = 1;
977bf21cd93STycho Nightingale 
978bf21cd93STycho Nightingale 		do {
979bf21cd93STycho Nightingale 			old_head = blockif_bse_head;
980bf21cd93STycho Nightingale 			bse.bse_next = old_head;
981bf21cd93STycho Nightingale 		} while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
982bf21cd93STycho Nightingale 					    (uintptr_t)old_head,
983bf21cd93STycho Nightingale 					    (uintptr_t)&bse));
984bf21cd93STycho Nightingale 
985bf21cd93STycho Nightingale 		pthread_kill(be->be_tid, SIGCONT);
986bf21cd93STycho Nightingale 
987bf21cd93STycho Nightingale 		pthread_mutex_lock(&bse.bse_mtx);
988bf21cd93STycho Nightingale 		while (bse.bse_pending)
989bf21cd93STycho Nightingale 			pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx);
990bf21cd93STycho Nightingale 		pthread_mutex_unlock(&bse.bse_mtx);
991bf21cd93STycho Nightingale 	}
992bf21cd93STycho Nightingale 
993bf21cd93STycho Nightingale 	pthread_mutex_unlock(&bc->bc_mtx);
994bf21cd93STycho Nightingale 
995bf21cd93STycho Nightingale 	/*
996bf21cd93STycho Nightingale 	 * The processing thread has been interrupted.  Since it's not
997bf21cd93STycho Nightingale 	 * clear if the callback has been invoked yet, return EBUSY.
998bf21cd93STycho Nightingale 	 */
999bf21cd93STycho Nightingale 	return (EBUSY);
1000bf21cd93STycho Nightingale }
1001bf21cd93STycho Nightingale 
1002bf21cd93STycho Nightingale int
1003bf21cd93STycho Nightingale blockif_close(struct blockif_ctxt *bc)
1004bf21cd93STycho Nightingale {
1005bf21cd93STycho Nightingale 	void *jval;
10064c87aefeSPatrick Mooney 	int i;
1007bf21cd93STycho Nightingale 
1008bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
1009bf21cd93STycho Nightingale 
1010bf21cd93STycho Nightingale 	/*
1011bf21cd93STycho Nightingale 	 * Stop the block i/o thread
1012bf21cd93STycho Nightingale 	 */
10134c87aefeSPatrick Mooney 	pthread_mutex_lock(&bc->bc_mtx);
1014bf21cd93STycho Nightingale 	bc->bc_closing = 1;
1015b0de25cbSAndy Fiddaman 	if (bc->bc_resize_event != NULL)
1016b0de25cbSAndy Fiddaman 		mevent_disable(bc->bc_resize_event);
10174c87aefeSPatrick Mooney 	pthread_mutex_unlock(&bc->bc_mtx);
10184c87aefeSPatrick Mooney 	pthread_cond_broadcast(&bc->bc_cond);
10194c87aefeSPatrick Mooney 	for (i = 0; i < BLOCKIF_NUMTHR; i++)
10204c87aefeSPatrick Mooney 		pthread_join(bc->bc_btid[i], &jval);
1021bf21cd93STycho Nightingale 
1022bf21cd93STycho Nightingale 	/* XXX Cancel queued i/o's ??? */
1023bf21cd93STycho Nightingale 
1024bf21cd93STycho Nightingale 	/*
1025bf21cd93STycho Nightingale 	 * Release resources
1026bf21cd93STycho Nightingale 	 */
1027bf21cd93STycho Nightingale 	bc->bc_magic = 0;
1028bf21cd93STycho Nightingale 	close(bc->bc_fd);
1029bf21cd93STycho Nightingale 	free(bc);
1030bf21cd93STycho Nightingale 
1031bf21cd93STycho Nightingale 	return (0);
1032bf21cd93STycho Nightingale }
1033bf21cd93STycho Nightingale 
1034bf21cd93STycho Nightingale /*
1035bf21cd93STycho Nightingale  * Return virtual C/H/S values for a given block. Use the algorithm
1036bf21cd93STycho Nightingale  * outlined in the VHD specification to calculate values.
1037bf21cd93STycho Nightingale  */
1038bf21cd93STycho Nightingale void
1039bf21cd93STycho Nightingale blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s)
1040bf21cd93STycho Nightingale {
1041bf21cd93STycho Nightingale 	off_t sectors;		/* total sectors of the block dev */
1042bf21cd93STycho Nightingale 	off_t hcyl;		/* cylinders times heads */
1043bf21cd93STycho Nightingale 	uint16_t secpt;		/* sectors per track */
1044bf21cd93STycho Nightingale 	uint8_t heads;
1045bf21cd93STycho Nightingale 
1046bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
1047bf21cd93STycho Nightingale 
1048bf21cd93STycho Nightingale 	sectors = bc->bc_size / bc->bc_sectsz;
1049bf21cd93STycho Nightingale 
1050bf21cd93STycho Nightingale 	/* Clamp the size to the largest possible with CHS */
1051bf21cd93STycho Nightingale 	if (sectors > 65535UL*16*255)
1052bf21cd93STycho Nightingale 		sectors = 65535UL*16*255;
1053bf21cd93STycho Nightingale 
1054bf21cd93STycho Nightingale 	if (sectors >= 65536UL*16*63) {
1055bf21cd93STycho Nightingale 		secpt = 255;
1056bf21cd93STycho Nightingale 		heads = 16;
1057bf21cd93STycho Nightingale 		hcyl = sectors / secpt;
1058bf21cd93STycho Nightingale 	} else {
1059bf21cd93STycho Nightingale 		secpt = 17;
1060bf21cd93STycho Nightingale 		hcyl = sectors / secpt;
1061bf21cd93STycho Nightingale 		heads = (hcyl + 1023) / 1024;
1062bf21cd93STycho Nightingale 
1063bf21cd93STycho Nightingale 		if (heads < 4)
1064bf21cd93STycho Nightingale 			heads = 4;
1065bf21cd93STycho Nightingale 
1066bf21cd93STycho Nightingale 		if (hcyl >= (heads * 1024) || heads > 16) {
1067bf21cd93STycho Nightingale 			secpt = 31;
1068bf21cd93STycho Nightingale 			heads = 16;
1069bf21cd93STycho Nightingale 			hcyl = sectors / secpt;
1070bf21cd93STycho Nightingale 		}
1071bf21cd93STycho Nightingale 		if (hcyl >= (heads * 1024)) {
1072bf21cd93STycho Nightingale 			secpt = 63;
1073bf21cd93STycho Nightingale 			heads = 16;
1074bf21cd93STycho Nightingale 			hcyl = sectors / secpt;
1075bf21cd93STycho Nightingale 		}
1076bf21cd93STycho Nightingale 	}
1077bf21cd93STycho Nightingale 
1078bf21cd93STycho Nightingale 	*c = hcyl / heads;
1079bf21cd93STycho Nightingale 	*h = heads;
1080bf21cd93STycho Nightingale 	*s = secpt;
1081bf21cd93STycho Nightingale }
1082bf21cd93STycho Nightingale 
1083bf21cd93STycho Nightingale /*
1084bf21cd93STycho Nightingale  * Accessors
1085bf21cd93STycho Nightingale  */
1086bf21cd93STycho Nightingale off_t
1087bf21cd93STycho Nightingale blockif_size(struct blockif_ctxt *bc)
1088bf21cd93STycho Nightingale {
1089bf21cd93STycho Nightingale 
1090bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
1091bf21cd93STycho Nightingale 	return (bc->bc_size);
1092bf21cd93STycho Nightingale }
1093bf21cd93STycho Nightingale 
1094bf21cd93STycho Nightingale int
1095bf21cd93STycho Nightingale blockif_sectsz(struct blockif_ctxt *bc)
1096bf21cd93STycho Nightingale {
1097bf21cd93STycho Nightingale 
1098bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
1099bf21cd93STycho Nightingale 	return (bc->bc_sectsz);
1100bf21cd93STycho Nightingale }
1101bf21cd93STycho Nightingale 
11024c87aefeSPatrick Mooney void
11034c87aefeSPatrick Mooney blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off)
11044c87aefeSPatrick Mooney {
11054c87aefeSPatrick Mooney 
11064c87aefeSPatrick Mooney 	assert(bc->bc_magic == BLOCKIF_SIG);
11074c87aefeSPatrick Mooney 	*size = bc->bc_psectsz;
11084c87aefeSPatrick Mooney 	*off = bc->bc_psectoff;
11094c87aefeSPatrick Mooney }
11104c87aefeSPatrick Mooney 
1111bf21cd93STycho Nightingale int
1112bf21cd93STycho Nightingale blockif_queuesz(struct blockif_ctxt *bc)
1113bf21cd93STycho Nightingale {
1114bf21cd93STycho Nightingale 
1115bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
1116bf21cd93STycho Nightingale 	return (BLOCKIF_MAXREQ - 1);
1117bf21cd93STycho Nightingale }
1118bf21cd93STycho Nightingale 
1119bf21cd93STycho Nightingale int
1120bf21cd93STycho Nightingale blockif_is_ro(struct blockif_ctxt *bc)
1121bf21cd93STycho Nightingale {
1122bf21cd93STycho Nightingale 
1123bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
1124bf21cd93STycho Nightingale 	return (bc->bc_rdonly);
1125bf21cd93STycho Nightingale }
11264c87aefeSPatrick Mooney 
11274c87aefeSPatrick Mooney int
11284c87aefeSPatrick Mooney blockif_candelete(struct blockif_ctxt *bc)
11294c87aefeSPatrick Mooney {
11304c87aefeSPatrick Mooney 
11314c87aefeSPatrick Mooney 	assert(bc->bc_magic == BLOCKIF_SIG);
11324c87aefeSPatrick Mooney 	return (bc->bc_candelete);
11334c87aefeSPatrick Mooney }
11344c87aefeSPatrick Mooney 
11354c87aefeSPatrick Mooney #ifndef __FreeBSD__
11364c87aefeSPatrick Mooney int
11374c87aefeSPatrick Mooney blockif_set_wce(struct blockif_ctxt *bc, int wc_enable)
11384c87aefeSPatrick Mooney {
11394c87aefeSPatrick Mooney 	int res = 0, flags;
11404c87aefeSPatrick Mooney 	int clean_val = (wc_enable != 0) ? 1 : 0;
11414c87aefeSPatrick Mooney 
11424c87aefeSPatrick Mooney 	(void) pthread_mutex_lock(&bc->bc_mtx);
11434c87aefeSPatrick Mooney 	switch (bc->bc_wce) {
11444c87aefeSPatrick Mooney 	case WCE_IOCTL:
11454c87aefeSPatrick Mooney 		res = ioctl(bc->bc_fd, DKIOCSETWCE, &clean_val);
11464c87aefeSPatrick Mooney 		break;
11474c87aefeSPatrick Mooney 	case WCE_FCNTL:
11484c87aefeSPatrick Mooney 		if ((flags = fcntl(bc->bc_fd, F_GETFL)) >= 0) {
11494c87aefeSPatrick Mooney 			if (wc_enable == 0) {
11504c87aefeSPatrick Mooney 				flags |= O_DSYNC;
11514c87aefeSPatrick Mooney 			} else {
11524c87aefeSPatrick Mooney 				flags &= ~O_DSYNC;
11534c87aefeSPatrick Mooney 			}
11544c87aefeSPatrick Mooney 			if (fcntl(bc->bc_fd, F_SETFL, flags) == -1) {
11554c87aefeSPatrick Mooney 				res = -1;
11564c87aefeSPatrick Mooney 			}
11574c87aefeSPatrick Mooney 		} else {
11584c87aefeSPatrick Mooney 			res = -1;
11594c87aefeSPatrick Mooney 		}
11604c87aefeSPatrick Mooney 		break;
11614c87aefeSPatrick Mooney 	default:
11624c87aefeSPatrick Mooney 		break;
11634c87aefeSPatrick Mooney 	}
11644c87aefeSPatrick Mooney 
11654c87aefeSPatrick Mooney 	/*
11664c87aefeSPatrick Mooney 	 * After a successful disable of the write cache, ensure that any
11674c87aefeSPatrick Mooney 	 * lingering data in the cache is synced out.
11684c87aefeSPatrick Mooney 	 */
11694c87aefeSPatrick Mooney 	if (res == 0 && wc_enable == 0) {
11704c87aefeSPatrick Mooney 		res = fsync(bc->bc_fd);
11714c87aefeSPatrick Mooney 	}
11724c87aefeSPatrick Mooney 	(void) pthread_mutex_unlock(&bc->bc_mtx);
11734c87aefeSPatrick Mooney 
11744c87aefeSPatrick Mooney 	return (res);
11754c87aefeSPatrick Mooney }
11764c87aefeSPatrick Mooney #endif /* __FreeBSD__ */
1177