1bf21cd93STycho Nightingale /*- 2*4c87aefeSPatrick Mooney * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3*4c87aefeSPatrick Mooney * 4bf21cd93STycho Nightingale * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org> 5bf21cd93STycho Nightingale * All rights reserved. 6bf21cd93STycho Nightingale * 7bf21cd93STycho Nightingale * Redistribution and use in source and binary forms, with or without 8bf21cd93STycho Nightingale * modification, are permitted provided that the following conditions 9bf21cd93STycho Nightingale * are met: 10bf21cd93STycho Nightingale * 1. Redistributions of source code must retain the above copyright 11bf21cd93STycho Nightingale * notice, this list of conditions and the following disclaimer. 12bf21cd93STycho Nightingale * 2. Redistributions in binary form must reproduce the above copyright 13bf21cd93STycho Nightingale * notice, this list of conditions and the following disclaimer in the 14bf21cd93STycho Nightingale * documentation and/or other materials provided with the distribution. 15bf21cd93STycho Nightingale * 16bf21cd93STycho Nightingale * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 17bf21cd93STycho Nightingale * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18bf21cd93STycho Nightingale * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19bf21cd93STycho Nightingale * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20bf21cd93STycho Nightingale * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21bf21cd93STycho Nightingale * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22bf21cd93STycho Nightingale * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23bf21cd93STycho Nightingale * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24bf21cd93STycho Nightingale * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25bf21cd93STycho Nightingale * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26bf21cd93STycho Nightingale * SUCH DAMAGE. 27bf21cd93STycho Nightingale * 28*4c87aefeSPatrick Mooney * $FreeBSD$ 29*4c87aefeSPatrick Mooney */ 30*4c87aefeSPatrick Mooney 31*4c87aefeSPatrick Mooney /* 32*4c87aefeSPatrick Mooney * Copyright 2018 Joyent, Inc. 33bf21cd93STycho Nightingale */ 34bf21cd93STycho Nightingale 35bf21cd93STycho Nightingale #include <sys/cdefs.h> 36*4c87aefeSPatrick Mooney __FBSDID("$FreeBSD$"); 37bf21cd93STycho Nightingale 38bf21cd93STycho Nightingale #include <sys/param.h> 39*4c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM 40*4c87aefeSPatrick Mooney #include <sys/capsicum.h> 41*4c87aefeSPatrick Mooney #endif 42bf21cd93STycho Nightingale #include <sys/queue.h> 43bf21cd93STycho Nightingale #include <sys/errno.h> 44bf21cd93STycho Nightingale #include <sys/stat.h> 45bf21cd93STycho Nightingale #include <sys/ioctl.h> 46bf21cd93STycho Nightingale #include <sys/disk.h> 47*4c87aefeSPatrick Mooney #include <sys/limits.h> 48*4c87aefeSPatrick Mooney #include <sys/uio.h> 49*4c87aefeSPatrick Mooney #ifndef __FreeBSD__ 50*4c87aefeSPatrick Mooney #include <sys/dkio.h> 51*4c87aefeSPatrick Mooney #endif 52bf21cd93STycho Nightingale 53bf21cd93STycho Nightingale #include <assert.h> 54*4c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM 55*4c87aefeSPatrick Mooney #include <capsicum_helpers.h> 56*4c87aefeSPatrick Mooney #endif 57*4c87aefeSPatrick Mooney #include <err.h> 58bf21cd93STycho Nightingale #include <fcntl.h> 59bf21cd93STycho Nightingale #include <stdio.h> 60bf21cd93STycho Nightingale #include <stdlib.h> 61bf21cd93STycho Nightingale #include <string.h> 62bf21cd93STycho Nightingale #include <pthread.h> 63bf21cd93STycho Nightingale #include <pthread_np.h> 64bf21cd93STycho Nightingale #include <signal.h> 65*4c87aefeSPatrick Mooney #include <sysexits.h> 66bf21cd93STycho Nightingale #include <unistd.h> 67bf21cd93STycho Nightingale 68bf21cd93STycho Nightingale #include <machine/atomic.h> 69bf21cd93STycho Nightingale 70bf21cd93STycho Nightingale #include "bhyverun.h" 71bf21cd93STycho Nightingale #ifdef __FreeBSD__ 72bf21cd93STycho Nightingale #include "mevent.h" 73bf21cd93STycho Nightingale #endif 74bf21cd93STycho Nightingale #include "block_if.h" 75bf21cd93STycho Nightingale 76bf21cd93STycho Nightingale #define BLOCKIF_SIG 0xb109b109 77bf21cd93STycho Nightingale 78*4c87aefeSPatrick Mooney #ifdef __FreeBSD__ 79*4c87aefeSPatrick Mooney #define BLOCKIF_NUMTHR 8 80*4c87aefeSPatrick Mooney #else 81*4c87aefeSPatrick Mooney /* Enlarge to keep pace with the virtio-block ring size */ 82*4c87aefeSPatrick Mooney #define BLOCKIF_NUMTHR 16 83*4c87aefeSPatrick Mooney #endif 84*4c87aefeSPatrick Mooney #define BLOCKIF_MAXREQ (BLOCKIF_RING_MAX + BLOCKIF_NUMTHR) 85bf21cd93STycho Nightingale 86bf21cd93STycho Nightingale enum blockop { 87bf21cd93STycho Nightingale BOP_READ, 88bf21cd93STycho Nightingale BOP_WRITE, 89*4c87aefeSPatrick Mooney #ifndef __FreeBSD__ 90*4c87aefeSPatrick Mooney BOP_WRITE_SYNC, 91*4c87aefeSPatrick Mooney #endif 92*4c87aefeSPatrick Mooney BOP_FLUSH, 93*4c87aefeSPatrick Mooney BOP_DELETE 94bf21cd93STycho Nightingale }; 95bf21cd93STycho Nightingale 96bf21cd93STycho Nightingale enum blockstat { 97bf21cd93STycho Nightingale BST_FREE, 98*4c87aefeSPatrick Mooney BST_BLOCK, 99bf21cd93STycho Nightingale BST_PEND, 100bf21cd93STycho Nightingale BST_BUSY, 101bf21cd93STycho Nightingale BST_DONE 102bf21cd93STycho Nightingale }; 103bf21cd93STycho Nightingale 104bf21cd93STycho Nightingale struct blockif_elem { 105bf21cd93STycho Nightingale TAILQ_ENTRY(blockif_elem) be_link; 106bf21cd93STycho Nightingale struct blockif_req *be_req; 107bf21cd93STycho Nightingale enum blockop be_op; 108bf21cd93STycho Nightingale enum blockstat be_status; 109bf21cd93STycho Nightingale pthread_t be_tid; 110*4c87aefeSPatrick Mooney off_t be_block; 111bf21cd93STycho Nightingale }; 112bf21cd93STycho Nightingale 113*4c87aefeSPatrick Mooney #ifndef __FreeBSD__ 114*4c87aefeSPatrick Mooney enum blockif_wce { 115*4c87aefeSPatrick Mooney WCE_NONE = 0, 116*4c87aefeSPatrick Mooney WCE_IOCTL, 117*4c87aefeSPatrick Mooney WCE_FCNTL 118*4c87aefeSPatrick Mooney }; 119*4c87aefeSPatrick Mooney #endif 120*4c87aefeSPatrick Mooney 121bf21cd93STycho Nightingale struct blockif_ctxt { 122bf21cd93STycho Nightingale int bc_magic; 123bf21cd93STycho Nightingale int bc_fd; 124*4c87aefeSPatrick Mooney int bc_ischr; 125*4c87aefeSPatrick Mooney int bc_isgeom; 126*4c87aefeSPatrick Mooney int bc_candelete; 127*4c87aefeSPatrick Mooney #ifndef __FreeBSD__ 128*4c87aefeSPatrick Mooney enum blockif_wce bc_wce; 129*4c87aefeSPatrick Mooney #endif 130bf21cd93STycho Nightingale int bc_rdonly; 131bf21cd93STycho Nightingale off_t bc_size; 132bf21cd93STycho Nightingale int bc_sectsz; 133*4c87aefeSPatrick Mooney int bc_psectsz; 134*4c87aefeSPatrick Mooney int bc_psectoff; 135*4c87aefeSPatrick Mooney int bc_closing; 136*4c87aefeSPatrick Mooney pthread_t bc_btid[BLOCKIF_NUMTHR]; 137bf21cd93STycho Nightingale pthread_mutex_t bc_mtx; 138bf21cd93STycho Nightingale pthread_cond_t bc_cond; 139bf21cd93STycho Nightingale 140bf21cd93STycho Nightingale /* Request elements and free/pending/busy queues */ 141bf21cd93STycho Nightingale TAILQ_HEAD(, blockif_elem) bc_freeq; 142bf21cd93STycho Nightingale TAILQ_HEAD(, blockif_elem) bc_pendq; 143bf21cd93STycho Nightingale TAILQ_HEAD(, blockif_elem) bc_busyq; 144bf21cd93STycho Nightingale struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; 145bf21cd93STycho Nightingale }; 146bf21cd93STycho Nightingale 147bf21cd93STycho Nightingale static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; 148bf21cd93STycho Nightingale 149bf21cd93STycho Nightingale struct blockif_sig_elem { 150bf21cd93STycho Nightingale pthread_mutex_t bse_mtx; 151bf21cd93STycho Nightingale pthread_cond_t bse_cond; 152bf21cd93STycho Nightingale int bse_pending; 153bf21cd93STycho Nightingale struct blockif_sig_elem *bse_next; 154bf21cd93STycho Nightingale }; 155bf21cd93STycho Nightingale 156bf21cd93STycho Nightingale static struct blockif_sig_elem *blockif_bse_head; 157bf21cd93STycho Nightingale 158bf21cd93STycho Nightingale static int 159bf21cd93STycho Nightingale blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, 160bf21cd93STycho Nightingale enum blockop op) 161bf21cd93STycho Nightingale { 162*4c87aefeSPatrick Mooney struct blockif_elem *be, *tbe; 163*4c87aefeSPatrick Mooney off_t off; 164*4c87aefeSPatrick Mooney int i; 165bf21cd93STycho Nightingale 166bf21cd93STycho Nightingale be = TAILQ_FIRST(&bc->bc_freeq); 167bf21cd93STycho Nightingale assert(be != NULL); 168bf21cd93STycho Nightingale assert(be->be_status == BST_FREE); 169bf21cd93STycho Nightingale TAILQ_REMOVE(&bc->bc_freeq, be, be_link); 170bf21cd93STycho Nightingale be->be_req = breq; 171bf21cd93STycho Nightingale be->be_op = op; 172*4c87aefeSPatrick Mooney switch (op) { 173*4c87aefeSPatrick Mooney case BOP_READ: 174*4c87aefeSPatrick Mooney case BOP_WRITE: 175*4c87aefeSPatrick Mooney #ifndef __FreeBSD__ 176*4c87aefeSPatrick Mooney case BOP_WRITE_SYNC: 177*4c87aefeSPatrick Mooney #endif 178*4c87aefeSPatrick Mooney case BOP_DELETE: 179*4c87aefeSPatrick Mooney off = breq->br_offset; 180*4c87aefeSPatrick Mooney for (i = 0; i < breq->br_iovcnt; i++) 181*4c87aefeSPatrick Mooney off += breq->br_iov[i].iov_len; 182*4c87aefeSPatrick Mooney break; 183*4c87aefeSPatrick Mooney default: 184*4c87aefeSPatrick Mooney off = OFF_MAX; 185*4c87aefeSPatrick Mooney } 186*4c87aefeSPatrick Mooney be->be_block = off; 187*4c87aefeSPatrick Mooney TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { 188*4c87aefeSPatrick Mooney if (tbe->be_block == breq->br_offset) 189*4c87aefeSPatrick Mooney break; 190*4c87aefeSPatrick Mooney } 191*4c87aefeSPatrick Mooney if (tbe == NULL) { 192*4c87aefeSPatrick Mooney TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) { 193*4c87aefeSPatrick Mooney if (tbe->be_block == breq->br_offset) 194*4c87aefeSPatrick Mooney break; 195*4c87aefeSPatrick Mooney } 196*4c87aefeSPatrick Mooney } 197*4c87aefeSPatrick Mooney if (tbe == NULL) 198*4c87aefeSPatrick Mooney be->be_status = BST_PEND; 199*4c87aefeSPatrick Mooney else 200*4c87aefeSPatrick Mooney be->be_status = BST_BLOCK; 201bf21cd93STycho Nightingale TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); 202*4c87aefeSPatrick Mooney return (be->be_status == BST_PEND); 203bf21cd93STycho Nightingale } 204bf21cd93STycho Nightingale 205bf21cd93STycho Nightingale static int 206*4c87aefeSPatrick Mooney blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep) 207bf21cd93STycho Nightingale { 208bf21cd93STycho Nightingale struct blockif_elem *be; 209bf21cd93STycho Nightingale 210*4c87aefeSPatrick Mooney TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { 211*4c87aefeSPatrick Mooney if (be->be_status == BST_PEND) 212*4c87aefeSPatrick Mooney break; 213*4c87aefeSPatrick Mooney assert(be->be_status == BST_BLOCK); 214*4c87aefeSPatrick Mooney } 215*4c87aefeSPatrick Mooney if (be == NULL) 216*4c87aefeSPatrick Mooney return (0); 217bf21cd93STycho Nightingale TAILQ_REMOVE(&bc->bc_pendq, be, be_link); 218bf21cd93STycho Nightingale be->be_status = BST_BUSY; 219*4c87aefeSPatrick Mooney be->be_tid = t; 220bf21cd93STycho Nightingale TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); 221bf21cd93STycho Nightingale *bep = be; 222*4c87aefeSPatrick Mooney return (1); 223bf21cd93STycho Nightingale } 224bf21cd93STycho Nightingale 225bf21cd93STycho Nightingale static void 226bf21cd93STycho Nightingale blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) 227bf21cd93STycho Nightingale { 228*4c87aefeSPatrick Mooney struct blockif_elem *tbe; 229bf21cd93STycho Nightingale 230*4c87aefeSPatrick Mooney if (be->be_status == BST_DONE || be->be_status == BST_BUSY) 231bf21cd93STycho Nightingale TAILQ_REMOVE(&bc->bc_busyq, be, be_link); 232*4c87aefeSPatrick Mooney else 233*4c87aefeSPatrick Mooney TAILQ_REMOVE(&bc->bc_pendq, be, be_link); 234*4c87aefeSPatrick Mooney TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { 235*4c87aefeSPatrick Mooney if (tbe->be_req->br_offset == be->be_block) 236*4c87aefeSPatrick Mooney tbe->be_status = BST_PEND; 237*4c87aefeSPatrick Mooney } 238bf21cd93STycho Nightingale be->be_tid = 0; 239bf21cd93STycho Nightingale be->be_status = BST_FREE; 240bf21cd93STycho Nightingale be->be_req = NULL; 241bf21cd93STycho Nightingale TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); 242bf21cd93STycho Nightingale } 243bf21cd93STycho Nightingale 244bf21cd93STycho Nightingale static void 245*4c87aefeSPatrick Mooney blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) 246bf21cd93STycho Nightingale { 247bf21cd93STycho Nightingale struct blockif_req *br; 248*4c87aefeSPatrick Mooney #ifdef __FreeBSD__ 249*4c87aefeSPatrick Mooney off_t arg[2]; 250*4c87aefeSPatrick Mooney #endif 251*4c87aefeSPatrick Mooney ssize_t clen, len, off, boff, voff; 252*4c87aefeSPatrick Mooney int i, err; 253bf21cd93STycho Nightingale 254bf21cd93STycho Nightingale br = be->be_req; 255*4c87aefeSPatrick Mooney if (br->br_iovcnt <= 1) 256*4c87aefeSPatrick Mooney buf = NULL; 257bf21cd93STycho Nightingale err = 0; 258bf21cd93STycho Nightingale switch (be->be_op) { 259bf21cd93STycho Nightingale case BOP_READ: 260*4c87aefeSPatrick Mooney if (buf == NULL) { 261*4c87aefeSPatrick Mooney if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, 262*4c87aefeSPatrick Mooney br->br_offset)) < 0) 263bf21cd93STycho Nightingale err = errno; 264*4c87aefeSPatrick Mooney else 265*4c87aefeSPatrick Mooney br->br_resid -= len; 266*4c87aefeSPatrick Mooney break; 267*4c87aefeSPatrick Mooney } 268*4c87aefeSPatrick Mooney i = 0; 269*4c87aefeSPatrick Mooney off = voff = 0; 270*4c87aefeSPatrick Mooney while (br->br_resid > 0) { 271*4c87aefeSPatrick Mooney len = MIN(br->br_resid, MAXPHYS); 272*4c87aefeSPatrick Mooney if (pread(bc->bc_fd, buf, len, br->br_offset + 273*4c87aefeSPatrick Mooney off) < 0) { 274*4c87aefeSPatrick Mooney err = errno; 275*4c87aefeSPatrick Mooney break; 276*4c87aefeSPatrick Mooney } 277*4c87aefeSPatrick Mooney boff = 0; 278*4c87aefeSPatrick Mooney do { 279*4c87aefeSPatrick Mooney clen = MIN(len - boff, br->br_iov[i].iov_len - 280*4c87aefeSPatrick Mooney voff); 281*4c87aefeSPatrick Mooney memcpy(br->br_iov[i].iov_base + voff, 282*4c87aefeSPatrick Mooney buf + boff, clen); 283*4c87aefeSPatrick Mooney if (clen < br->br_iov[i].iov_len - voff) 284*4c87aefeSPatrick Mooney voff += clen; 285*4c87aefeSPatrick Mooney else { 286*4c87aefeSPatrick Mooney i++; 287*4c87aefeSPatrick Mooney voff = 0; 288*4c87aefeSPatrick Mooney } 289*4c87aefeSPatrick Mooney boff += clen; 290*4c87aefeSPatrick Mooney } while (boff < len); 291*4c87aefeSPatrick Mooney off += len; 292*4c87aefeSPatrick Mooney br->br_resid -= len; 293*4c87aefeSPatrick Mooney } 294bf21cd93STycho Nightingale break; 295bf21cd93STycho Nightingale case BOP_WRITE: 296*4c87aefeSPatrick Mooney if (bc->bc_rdonly) { 297bf21cd93STycho Nightingale err = EROFS; 298*4c87aefeSPatrick Mooney break; 299*4c87aefeSPatrick Mooney } 300*4c87aefeSPatrick Mooney if (buf == NULL) { 301*4c87aefeSPatrick Mooney if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, 302*4c87aefeSPatrick Mooney br->br_offset)) < 0) 303*4c87aefeSPatrick Mooney err = errno; 304*4c87aefeSPatrick Mooney else 305*4c87aefeSPatrick Mooney br->br_resid -= len; 306*4c87aefeSPatrick Mooney break; 307*4c87aefeSPatrick Mooney } 308*4c87aefeSPatrick Mooney i = 0; 309*4c87aefeSPatrick Mooney off = voff = 0; 310*4c87aefeSPatrick Mooney while (br->br_resid > 0) { 311*4c87aefeSPatrick Mooney len = MIN(br->br_resid, MAXPHYS); 312*4c87aefeSPatrick Mooney boff = 0; 313*4c87aefeSPatrick Mooney do { 314*4c87aefeSPatrick Mooney clen = MIN(len - boff, br->br_iov[i].iov_len - 315*4c87aefeSPatrick Mooney voff); 316*4c87aefeSPatrick Mooney memcpy(buf + boff, 317*4c87aefeSPatrick Mooney br->br_iov[i].iov_base + voff, clen); 318*4c87aefeSPatrick Mooney if (clen < br->br_iov[i].iov_len - voff) 319*4c87aefeSPatrick Mooney voff += clen; 320*4c87aefeSPatrick Mooney else { 321*4c87aefeSPatrick Mooney i++; 322*4c87aefeSPatrick Mooney voff = 0; 323*4c87aefeSPatrick Mooney } 324*4c87aefeSPatrick Mooney boff += clen; 325*4c87aefeSPatrick Mooney } while (boff < len); 326*4c87aefeSPatrick Mooney if (pwrite(bc->bc_fd, buf, len, br->br_offset + 327*4c87aefeSPatrick Mooney off) < 0) { 328bf21cd93STycho Nightingale err = errno; 329bf21cd93STycho Nightingale break; 330*4c87aefeSPatrick Mooney } 331*4c87aefeSPatrick Mooney off += len; 332*4c87aefeSPatrick Mooney br->br_resid -= len; 333*4c87aefeSPatrick Mooney } 334*4c87aefeSPatrick Mooney break; 335bf21cd93STycho Nightingale case BOP_FLUSH: 336*4c87aefeSPatrick Mooney #ifdef __FreeBSD__ 337*4c87aefeSPatrick Mooney if (bc->bc_ischr) { 338*4c87aefeSPatrick Mooney if (ioctl(bc->bc_fd, DIOCGFLUSH)) 339*4c87aefeSPatrick Mooney err = errno; 340*4c87aefeSPatrick Mooney } else if (fsync(bc->bc_fd)) 341*4c87aefeSPatrick Mooney err = errno; 342*4c87aefeSPatrick Mooney #else 343*4c87aefeSPatrick Mooney /* 344*4c87aefeSPatrick Mooney * This fsync() should be adequate to flush the cache of a file 345*4c87aefeSPatrick Mooney * or device. In VFS, the VOP_SYNC operation is converted to 346*4c87aefeSPatrick Mooney * the appropriate ioctl in both sdev (for real devices) and 347*4c87aefeSPatrick Mooney * zfs (for zvols). 348*4c87aefeSPatrick Mooney */ 349*4c87aefeSPatrick Mooney if (fsync(bc->bc_fd)) 350*4c87aefeSPatrick Mooney err = errno; 351*4c87aefeSPatrick Mooney #endif 352*4c87aefeSPatrick Mooney break; 353*4c87aefeSPatrick Mooney case BOP_DELETE: 354*4c87aefeSPatrick Mooney if (!bc->bc_candelete) 355*4c87aefeSPatrick Mooney err = EOPNOTSUPP; 356*4c87aefeSPatrick Mooney else if (bc->bc_rdonly) 357*4c87aefeSPatrick Mooney err = EROFS; 358*4c87aefeSPatrick Mooney #ifdef __FreeBSD__ 359*4c87aefeSPatrick Mooney else if (bc->bc_ischr) { 360*4c87aefeSPatrick Mooney arg[0] = br->br_offset; 361*4c87aefeSPatrick Mooney arg[1] = br->br_resid; 362*4c87aefeSPatrick Mooney if (ioctl(bc->bc_fd, DIOCGDELETE, arg)) 363*4c87aefeSPatrick Mooney err = errno; 364*4c87aefeSPatrick Mooney else 365*4c87aefeSPatrick Mooney br->br_resid = 0; 366*4c87aefeSPatrick Mooney } 367*4c87aefeSPatrick Mooney #endif 368*4c87aefeSPatrick Mooney else 369*4c87aefeSPatrick Mooney err = EOPNOTSUPP; 370bf21cd93STycho Nightingale break; 371bf21cd93STycho Nightingale default: 372bf21cd93STycho Nightingale err = EINVAL; 373bf21cd93STycho Nightingale break; 374bf21cd93STycho Nightingale } 375bf21cd93STycho Nightingale 376bf21cd93STycho Nightingale be->be_status = BST_DONE; 377bf21cd93STycho Nightingale 378bf21cd93STycho Nightingale (*br->br_callback)(br, err); 379bf21cd93STycho Nightingale } 380bf21cd93STycho Nightingale 381bf21cd93STycho Nightingale static void * 382bf21cd93STycho Nightingale blockif_thr(void *arg) 383bf21cd93STycho Nightingale { 384bf21cd93STycho Nightingale struct blockif_ctxt *bc; 385bf21cd93STycho Nightingale struct blockif_elem *be; 386*4c87aefeSPatrick Mooney pthread_t t; 387*4c87aefeSPatrick Mooney uint8_t *buf; 388bf21cd93STycho Nightingale 389bf21cd93STycho Nightingale bc = arg; 390*4c87aefeSPatrick Mooney if (bc->bc_isgeom) 391*4c87aefeSPatrick Mooney buf = malloc(MAXPHYS); 392*4c87aefeSPatrick Mooney else 393*4c87aefeSPatrick Mooney buf = NULL; 394*4c87aefeSPatrick Mooney t = pthread_self(); 395bf21cd93STycho Nightingale 396bf21cd93STycho Nightingale pthread_mutex_lock(&bc->bc_mtx); 397*4c87aefeSPatrick Mooney for (;;) { 398*4c87aefeSPatrick Mooney while (blockif_dequeue(bc, t, &be)) { 399bf21cd93STycho Nightingale pthread_mutex_unlock(&bc->bc_mtx); 400*4c87aefeSPatrick Mooney blockif_proc(bc, be, buf); 401bf21cd93STycho Nightingale pthread_mutex_lock(&bc->bc_mtx); 402bf21cd93STycho Nightingale blockif_complete(bc, be); 403bf21cd93STycho Nightingale } 404*4c87aefeSPatrick Mooney /* Check ctxt status here to see if exit requested */ 405*4c87aefeSPatrick Mooney if (bc->bc_closing) 406*4c87aefeSPatrick Mooney break; 407bf21cd93STycho Nightingale pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); 408*4c87aefeSPatrick Mooney } 409bf21cd93STycho Nightingale pthread_mutex_unlock(&bc->bc_mtx); 410bf21cd93STycho Nightingale 411*4c87aefeSPatrick Mooney if (buf) 412*4c87aefeSPatrick Mooney free(buf); 413bf21cd93STycho Nightingale pthread_exit(NULL); 414bf21cd93STycho Nightingale return (NULL); 415bf21cd93STycho Nightingale } 416bf21cd93STycho Nightingale 417bf21cd93STycho Nightingale #ifdef __FreeBSD__ 418bf21cd93STycho Nightingale static void 419bf21cd93STycho Nightingale blockif_sigcont_handler(int signal, enum ev_type type, void *arg) 420bf21cd93STycho Nightingale #else 421bf21cd93STycho Nightingale static void 422bf21cd93STycho Nightingale blockif_sigcont_handler(int signal) 423bf21cd93STycho Nightingale #endif 424bf21cd93STycho Nightingale { 425bf21cd93STycho Nightingale struct blockif_sig_elem *bse; 426bf21cd93STycho Nightingale 427bf21cd93STycho Nightingale for (;;) { 428bf21cd93STycho Nightingale /* 429bf21cd93STycho Nightingale * Process the entire list even if not intended for 430bf21cd93STycho Nightingale * this thread. 431bf21cd93STycho Nightingale */ 432bf21cd93STycho Nightingale do { 433bf21cd93STycho Nightingale bse = blockif_bse_head; 434bf21cd93STycho Nightingale if (bse == NULL) 435bf21cd93STycho Nightingale return; 436bf21cd93STycho Nightingale } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, 437bf21cd93STycho Nightingale (uintptr_t)bse, 438bf21cd93STycho Nightingale (uintptr_t)bse->bse_next)); 439bf21cd93STycho Nightingale 440bf21cd93STycho Nightingale pthread_mutex_lock(&bse->bse_mtx); 441bf21cd93STycho Nightingale bse->bse_pending = 0; 442bf21cd93STycho Nightingale pthread_cond_signal(&bse->bse_cond); 443bf21cd93STycho Nightingale pthread_mutex_unlock(&bse->bse_mtx); 444bf21cd93STycho Nightingale } 445bf21cd93STycho Nightingale } 446bf21cd93STycho Nightingale 447bf21cd93STycho Nightingale static void 448bf21cd93STycho Nightingale blockif_init(void) 449bf21cd93STycho Nightingale { 450bf21cd93STycho Nightingale #ifdef __FreeBSD__ 451bf21cd93STycho Nightingale mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); 452bf21cd93STycho Nightingale (void) signal(SIGCONT, SIG_IGN); 453bf21cd93STycho Nightingale #else 454bf21cd93STycho Nightingale (void) sigset(SIGCONT, blockif_sigcont_handler); 455bf21cd93STycho Nightingale #endif 456bf21cd93STycho Nightingale } 457bf21cd93STycho Nightingale 458bf21cd93STycho Nightingale struct blockif_ctxt * 459bf21cd93STycho Nightingale blockif_open(const char *optstr, const char *ident) 460bf21cd93STycho Nightingale { 461bf21cd93STycho Nightingale char tname[MAXCOMLEN + 1]; 462*4c87aefeSPatrick Mooney #ifdef __FreeBSD__ 463*4c87aefeSPatrick Mooney char name[MAXPATHLEN]; 464*4c87aefeSPatrick Mooney char *nopt, *xopts, *cp; 465*4c87aefeSPatrick Mooney #else 466*4c87aefeSPatrick Mooney char *nopt, *xopts, *cp = NULL; 467*4c87aefeSPatrick Mooney #endif 468bf21cd93STycho Nightingale struct blockif_ctxt *bc; 469bf21cd93STycho Nightingale struct stat sbuf; 470*4c87aefeSPatrick Mooney #ifdef __FreeBSD__ 471*4c87aefeSPatrick Mooney struct diocgattr_arg arg; 472*4c87aefeSPatrick Mooney #else 473*4c87aefeSPatrick Mooney enum blockif_wce wce = WCE_NONE; 474*4c87aefeSPatrick Mooney #endif 475*4c87aefeSPatrick Mooney off_t size, psectsz, psectoff; 476bf21cd93STycho Nightingale int extra, fd, i, sectsz; 477*4c87aefeSPatrick Mooney int nocache, sync, ro, candelete, geom, ssopt, pssopt; 478*4c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM 479*4c87aefeSPatrick Mooney cap_rights_t rights; 480*4c87aefeSPatrick Mooney cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE }; 481*4c87aefeSPatrick Mooney #endif 482bf21cd93STycho Nightingale 483bf21cd93STycho Nightingale pthread_once(&blockif_once, blockif_init); 484bf21cd93STycho Nightingale 485*4c87aefeSPatrick Mooney fd = -1; 486*4c87aefeSPatrick Mooney ssopt = 0; 487bf21cd93STycho Nightingale nocache = 0; 488bf21cd93STycho Nightingale sync = 0; 489bf21cd93STycho Nightingale ro = 0; 490bf21cd93STycho Nightingale 491bf21cd93STycho Nightingale /* 492bf21cd93STycho Nightingale * The first element in the optstring is always a pathname. 493bf21cd93STycho Nightingale * Optional elements follow 494bf21cd93STycho Nightingale */ 495*4c87aefeSPatrick Mooney nopt = xopts = strdup(optstr); 496*4c87aefeSPatrick Mooney while (xopts != NULL) { 497*4c87aefeSPatrick Mooney cp = strsep(&xopts, ","); 498*4c87aefeSPatrick Mooney if (cp == nopt) /* file or device pathname */ 499*4c87aefeSPatrick Mooney continue; 500*4c87aefeSPatrick Mooney else if (!strcmp(cp, "nocache")) 501bf21cd93STycho Nightingale nocache = 1; 502*4c87aefeSPatrick Mooney else if (!strcmp(cp, "sync") || !strcmp(cp, "direct")) 503bf21cd93STycho Nightingale sync = 1; 504*4c87aefeSPatrick Mooney else if (!strcmp(cp, "ro")) 505bf21cd93STycho Nightingale ro = 1; 506*4c87aefeSPatrick Mooney else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2) 507*4c87aefeSPatrick Mooney ; 508*4c87aefeSPatrick Mooney else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1) 509*4c87aefeSPatrick Mooney pssopt = ssopt; 510*4c87aefeSPatrick Mooney else { 511*4c87aefeSPatrick Mooney fprintf(stderr, "Invalid device option \"%s\"\n", cp); 512*4c87aefeSPatrick Mooney goto err; 513*4c87aefeSPatrick Mooney } 514bf21cd93STycho Nightingale } 515bf21cd93STycho Nightingale 516bf21cd93STycho Nightingale extra = 0; 517bf21cd93STycho Nightingale if (nocache) 518bf21cd93STycho Nightingale extra |= O_DIRECT; 519bf21cd93STycho Nightingale if (sync) 520bf21cd93STycho Nightingale extra |= O_SYNC; 521bf21cd93STycho Nightingale 522bf21cd93STycho Nightingale fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); 523bf21cd93STycho Nightingale if (fd < 0 && !ro) { 524bf21cd93STycho Nightingale /* Attempt a r/w fail with a r/o open */ 525bf21cd93STycho Nightingale fd = open(nopt, O_RDONLY | extra); 526bf21cd93STycho Nightingale ro = 1; 527bf21cd93STycho Nightingale } 528bf21cd93STycho Nightingale 529bf21cd93STycho Nightingale if (fd < 0) { 530*4c87aefeSPatrick Mooney warn("Could not open backing file: %s", nopt); 531*4c87aefeSPatrick Mooney goto err; 532bf21cd93STycho Nightingale } 533bf21cd93STycho Nightingale 534bf21cd93STycho Nightingale if (fstat(fd, &sbuf) < 0) { 535*4c87aefeSPatrick Mooney warn("Could not stat backing file %s", nopt); 536*4c87aefeSPatrick Mooney goto err; 537bf21cd93STycho Nightingale } 538bf21cd93STycho Nightingale 539*4c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM 540*4c87aefeSPatrick Mooney cap_rights_init(&rights, CAP_FSYNC, CAP_IOCTL, CAP_READ, CAP_SEEK, 541*4c87aefeSPatrick Mooney CAP_WRITE); 542*4c87aefeSPatrick Mooney if (ro) 543*4c87aefeSPatrick Mooney cap_rights_clear(&rights, CAP_FSYNC, CAP_WRITE); 544*4c87aefeSPatrick Mooney 545*4c87aefeSPatrick Mooney if (caph_rights_limit(fd, &rights) == -1) 546*4c87aefeSPatrick Mooney errx(EX_OSERR, "Unable to apply rights for sandbox"); 547*4c87aefeSPatrick Mooney #endif 548*4c87aefeSPatrick Mooney 549bf21cd93STycho Nightingale /* 550bf21cd93STycho Nightingale * Deal with raw devices 551bf21cd93STycho Nightingale */ 552bf21cd93STycho Nightingale size = sbuf.st_size; 553bf21cd93STycho Nightingale sectsz = DEV_BSIZE; 554*4c87aefeSPatrick Mooney psectsz = psectoff = 0; 555*4c87aefeSPatrick Mooney candelete = geom = 0; 556bf21cd93STycho Nightingale #ifdef __FreeBSD__ 557bf21cd93STycho Nightingale if (S_ISCHR(sbuf.st_mode)) { 558bf21cd93STycho Nightingale if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || 559bf21cd93STycho Nightingale ioctl(fd, DIOCGSECTORSIZE, §sz)) { 560bf21cd93STycho Nightingale perror("Could not fetch dev blk/sector size"); 561*4c87aefeSPatrick Mooney goto err; 562bf21cd93STycho Nightingale } 563bf21cd93STycho Nightingale assert(size != 0); 564bf21cd93STycho Nightingale assert(sectsz != 0); 565*4c87aefeSPatrick Mooney if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0) 566*4c87aefeSPatrick Mooney ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff); 567*4c87aefeSPatrick Mooney strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); 568*4c87aefeSPatrick Mooney arg.len = sizeof(arg.value.i); 569*4c87aefeSPatrick Mooney if (ioctl(fd, DIOCGATTR, &arg) == 0) 570*4c87aefeSPatrick Mooney candelete = arg.value.i; 571*4c87aefeSPatrick Mooney if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0) 572*4c87aefeSPatrick Mooney geom = 1; 573*4c87aefeSPatrick Mooney } else { 574*4c87aefeSPatrick Mooney psectsz = sbuf.st_blksize; 575*4c87aefeSPatrick Mooney } 576*4c87aefeSPatrick Mooney #else 577*4c87aefeSPatrick Mooney psectsz = sbuf.st_blksize; 578*4c87aefeSPatrick Mooney if (S_ISCHR(sbuf.st_mode)) { 579*4c87aefeSPatrick Mooney struct dk_minfo_ext dkmext; 580*4c87aefeSPatrick Mooney int wce_val; 581*4c87aefeSPatrick Mooney 582*4c87aefeSPatrick Mooney /* Look for a more accurate physical blocksize */ 583*4c87aefeSPatrick Mooney if (ioctl(fd, DKIOCGMEDIAINFOEXT, &dkmext) == 0) { 584*4c87aefeSPatrick Mooney psectsz = dkmext.dki_pbsize; 585*4c87aefeSPatrick Mooney } 586*4c87aefeSPatrick Mooney /* See if a configurable write cache is present and working */ 587*4c87aefeSPatrick Mooney if (ioctl(fd, DKIOCGETWCE, &wce_val) == 0) { 588*4c87aefeSPatrick Mooney /* 589*4c87aefeSPatrick Mooney * If WCE is already active, disable it until the 590*4c87aefeSPatrick Mooney * specific device driver calls for its return. If it 591*4c87aefeSPatrick Mooney * is not active, toggle it on and off to verify that 592*4c87aefeSPatrick Mooney * such actions are possible. 593*4c87aefeSPatrick Mooney */ 594*4c87aefeSPatrick Mooney if (wce_val != 0) { 595*4c87aefeSPatrick Mooney wce_val = 0; 596*4c87aefeSPatrick Mooney /* 597*4c87aefeSPatrick Mooney * Inability to disable the cache is a threat 598*4c87aefeSPatrick Mooney * to data durability. 599*4c87aefeSPatrick Mooney */ 600*4c87aefeSPatrick Mooney assert(ioctl(fd, DKIOCSETWCE, &wce_val) == 0); 601*4c87aefeSPatrick Mooney wce = WCE_IOCTL; 602*4c87aefeSPatrick Mooney } else { 603*4c87aefeSPatrick Mooney int r1, r2; 604*4c87aefeSPatrick Mooney 605*4c87aefeSPatrick Mooney wce_val = 1; 606*4c87aefeSPatrick Mooney r1 = ioctl(fd, DKIOCSETWCE, &wce_val); 607*4c87aefeSPatrick Mooney wce_val = 0; 608*4c87aefeSPatrick Mooney r2 = ioctl(fd, DKIOCSETWCE, &wce_val); 609*4c87aefeSPatrick Mooney 610*4c87aefeSPatrick Mooney if (r1 == 0 && r2 == 0) { 611*4c87aefeSPatrick Mooney wce = WCE_IOCTL; 612*4c87aefeSPatrick Mooney } else { 613*4c87aefeSPatrick Mooney /* 614*4c87aefeSPatrick Mooney * If the cache cache toggle was not 615*4c87aefeSPatrick Mooney * successful, ensure that the cache 616*4c87aefeSPatrick Mooney * was not left enabled. 617*4c87aefeSPatrick Mooney */ 618*4c87aefeSPatrick Mooney assert(r1 != 0); 619*4c87aefeSPatrick Mooney } 620*4c87aefeSPatrick Mooney } 621*4c87aefeSPatrick Mooney } 622*4c87aefeSPatrick Mooney } else { 623*4c87aefeSPatrick Mooney int flags; 624*4c87aefeSPatrick Mooney 625*4c87aefeSPatrick Mooney if ((flags = fcntl(fd, F_GETFL)) >= 0) { 626*4c87aefeSPatrick Mooney flags |= O_DSYNC; 627*4c87aefeSPatrick Mooney if (fcntl(fd, F_SETFL, flags) != -1) { 628*4c87aefeSPatrick Mooney wce = WCE_FCNTL; 629*4c87aefeSPatrick Mooney } 630*4c87aefeSPatrick Mooney } 631bf21cd93STycho Nightingale } 632bf21cd93STycho Nightingale #endif 633bf21cd93STycho Nightingale 634*4c87aefeSPatrick Mooney #ifndef WITHOUT_CAPSICUM 635*4c87aefeSPatrick Mooney if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1) 636*4c87aefeSPatrick Mooney errx(EX_OSERR, "Unable to apply rights for sandbox"); 637*4c87aefeSPatrick Mooney #endif 638*4c87aefeSPatrick Mooney 639*4c87aefeSPatrick Mooney if (ssopt != 0) { 640*4c87aefeSPatrick Mooney if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 || 641*4c87aefeSPatrick Mooney ssopt > pssopt) { 642*4c87aefeSPatrick Mooney fprintf(stderr, "Invalid sector size %d/%d\n", 643*4c87aefeSPatrick Mooney ssopt, pssopt); 644*4c87aefeSPatrick Mooney goto err; 645*4c87aefeSPatrick Mooney } 646*4c87aefeSPatrick Mooney 647*4c87aefeSPatrick Mooney /* 648*4c87aefeSPatrick Mooney * Some backend drivers (e.g. cd0, ada0) require that the I/O 649*4c87aefeSPatrick Mooney * size be a multiple of the device's sector size. 650*4c87aefeSPatrick Mooney * 651*4c87aefeSPatrick Mooney * Validate that the emulated sector size complies with this 652*4c87aefeSPatrick Mooney * requirement. 653*4c87aefeSPatrick Mooney */ 654*4c87aefeSPatrick Mooney if (S_ISCHR(sbuf.st_mode)) { 655*4c87aefeSPatrick Mooney if (ssopt < sectsz || (ssopt % sectsz) != 0) { 656*4c87aefeSPatrick Mooney fprintf(stderr, "Sector size %d incompatible " 657*4c87aefeSPatrick Mooney "with underlying device sector size %d\n", 658*4c87aefeSPatrick Mooney ssopt, sectsz); 659*4c87aefeSPatrick Mooney goto err; 660*4c87aefeSPatrick Mooney } 661*4c87aefeSPatrick Mooney } 662*4c87aefeSPatrick Mooney 663*4c87aefeSPatrick Mooney sectsz = ssopt; 664*4c87aefeSPatrick Mooney psectsz = pssopt; 665*4c87aefeSPatrick Mooney psectoff = 0; 666*4c87aefeSPatrick Mooney } 667*4c87aefeSPatrick Mooney 668bf21cd93STycho Nightingale bc = calloc(1, sizeof(struct blockif_ctxt)); 669bf21cd93STycho Nightingale if (bc == NULL) { 670*4c87aefeSPatrick Mooney perror("calloc"); 671*4c87aefeSPatrick Mooney goto err; 672bf21cd93STycho Nightingale } 673bf21cd93STycho Nightingale 674bf21cd93STycho Nightingale bc->bc_magic = BLOCKIF_SIG; 675bf21cd93STycho Nightingale bc->bc_fd = fd; 676*4c87aefeSPatrick Mooney bc->bc_ischr = S_ISCHR(sbuf.st_mode); 677*4c87aefeSPatrick Mooney bc->bc_isgeom = geom; 678*4c87aefeSPatrick Mooney bc->bc_candelete = candelete; 679*4c87aefeSPatrick Mooney #ifndef __FreeBSD__ 680*4c87aefeSPatrick Mooney bc->bc_wce = wce; 681*4c87aefeSPatrick Mooney #endif 682bf21cd93STycho Nightingale bc->bc_rdonly = ro; 683bf21cd93STycho Nightingale bc->bc_size = size; 684bf21cd93STycho Nightingale bc->bc_sectsz = sectsz; 685*4c87aefeSPatrick Mooney bc->bc_psectsz = psectsz; 686*4c87aefeSPatrick Mooney bc->bc_psectoff = psectoff; 687bf21cd93STycho Nightingale pthread_mutex_init(&bc->bc_mtx, NULL); 688bf21cd93STycho Nightingale pthread_cond_init(&bc->bc_cond, NULL); 689bf21cd93STycho Nightingale TAILQ_INIT(&bc->bc_freeq); 690bf21cd93STycho Nightingale TAILQ_INIT(&bc->bc_pendq); 691bf21cd93STycho Nightingale TAILQ_INIT(&bc->bc_busyq); 692bf21cd93STycho Nightingale for (i = 0; i < BLOCKIF_MAXREQ; i++) { 693bf21cd93STycho Nightingale bc->bc_reqs[i].be_status = BST_FREE; 694bf21cd93STycho Nightingale TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); 695bf21cd93STycho Nightingale } 696bf21cd93STycho Nightingale 697*4c87aefeSPatrick Mooney for (i = 0; i < BLOCKIF_NUMTHR; i++) { 698*4c87aefeSPatrick Mooney pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc); 699*4c87aefeSPatrick Mooney snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i); 700*4c87aefeSPatrick Mooney pthread_set_name_np(bc->bc_btid[i], tname); 701*4c87aefeSPatrick Mooney } 702bf21cd93STycho Nightingale 703bf21cd93STycho Nightingale return (bc); 704*4c87aefeSPatrick Mooney err: 705*4c87aefeSPatrick Mooney if (fd >= 0) 706*4c87aefeSPatrick Mooney close(fd); 707*4c87aefeSPatrick Mooney free(nopt); 708*4c87aefeSPatrick Mooney return (NULL); 709bf21cd93STycho Nightingale } 710bf21cd93STycho Nightingale 711bf21cd93STycho Nightingale static int 712bf21cd93STycho Nightingale blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, 713bf21cd93STycho Nightingale enum blockop op) 714bf21cd93STycho Nightingale { 715bf21cd93STycho Nightingale int err; 716bf21cd93STycho Nightingale 717bf21cd93STycho Nightingale err = 0; 718bf21cd93STycho Nightingale 719bf21cd93STycho Nightingale pthread_mutex_lock(&bc->bc_mtx); 720*4c87aefeSPatrick Mooney if (!TAILQ_EMPTY(&bc->bc_freeq)) { 721bf21cd93STycho Nightingale /* 722bf21cd93STycho Nightingale * Enqueue and inform the block i/o thread 723bf21cd93STycho Nightingale * that there is work available 724bf21cd93STycho Nightingale */ 725*4c87aefeSPatrick Mooney if (blockif_enqueue(bc, breq, op)) 726bf21cd93STycho Nightingale pthread_cond_signal(&bc->bc_cond); 727bf21cd93STycho Nightingale } else { 728bf21cd93STycho Nightingale /* 729bf21cd93STycho Nightingale * Callers are not allowed to enqueue more than 730bf21cd93STycho Nightingale * the specified blockif queue limit. Return an 731bf21cd93STycho Nightingale * error to indicate that the queue length has been 732bf21cd93STycho Nightingale * exceeded. 733bf21cd93STycho Nightingale */ 734bf21cd93STycho Nightingale err = E2BIG; 735bf21cd93STycho Nightingale } 736bf21cd93STycho Nightingale pthread_mutex_unlock(&bc->bc_mtx); 737bf21cd93STycho Nightingale 738bf21cd93STycho Nightingale return (err); 739bf21cd93STycho Nightingale } 740bf21cd93STycho Nightingale 741bf21cd93STycho Nightingale int 742bf21cd93STycho Nightingale blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) 743bf21cd93STycho Nightingale { 744bf21cd93STycho Nightingale 745bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 746bf21cd93STycho Nightingale return (blockif_request(bc, breq, BOP_READ)); 747bf21cd93STycho Nightingale } 748bf21cd93STycho Nightingale 749bf21cd93STycho Nightingale int 750bf21cd93STycho Nightingale blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) 751bf21cd93STycho Nightingale { 752bf21cd93STycho Nightingale 753bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 754bf21cd93STycho Nightingale return (blockif_request(bc, breq, BOP_WRITE)); 755bf21cd93STycho Nightingale } 756bf21cd93STycho Nightingale 757bf21cd93STycho Nightingale int 758bf21cd93STycho Nightingale blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) 759bf21cd93STycho Nightingale { 760bf21cd93STycho Nightingale 761bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 762bf21cd93STycho Nightingale return (blockif_request(bc, breq, BOP_FLUSH)); 763bf21cd93STycho Nightingale } 764bf21cd93STycho Nightingale 765bf21cd93STycho Nightingale int 766*4c87aefeSPatrick Mooney blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) 767*4c87aefeSPatrick Mooney { 768*4c87aefeSPatrick Mooney 769*4c87aefeSPatrick Mooney assert(bc->bc_magic == BLOCKIF_SIG); 770*4c87aefeSPatrick Mooney return (blockif_request(bc, breq, BOP_DELETE)); 771*4c87aefeSPatrick Mooney } 772*4c87aefeSPatrick Mooney 773*4c87aefeSPatrick Mooney int 774bf21cd93STycho Nightingale blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) 775bf21cd93STycho Nightingale { 776bf21cd93STycho Nightingale struct blockif_elem *be; 777bf21cd93STycho Nightingale 778bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 779bf21cd93STycho Nightingale 780bf21cd93STycho Nightingale pthread_mutex_lock(&bc->bc_mtx); 781bf21cd93STycho Nightingale /* 782bf21cd93STycho Nightingale * Check pending requests. 783bf21cd93STycho Nightingale */ 784bf21cd93STycho Nightingale TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { 785bf21cd93STycho Nightingale if (be->be_req == breq) 786bf21cd93STycho Nightingale break; 787bf21cd93STycho Nightingale } 788bf21cd93STycho Nightingale if (be != NULL) { 789bf21cd93STycho Nightingale /* 790bf21cd93STycho Nightingale * Found it. 791bf21cd93STycho Nightingale */ 792*4c87aefeSPatrick Mooney blockif_complete(bc, be); 793bf21cd93STycho Nightingale pthread_mutex_unlock(&bc->bc_mtx); 794bf21cd93STycho Nightingale 795bf21cd93STycho Nightingale return (0); 796bf21cd93STycho Nightingale } 797bf21cd93STycho Nightingale 798bf21cd93STycho Nightingale /* 799bf21cd93STycho Nightingale * Check in-flight requests. 800bf21cd93STycho Nightingale */ 801bf21cd93STycho Nightingale TAILQ_FOREACH(be, &bc->bc_busyq, be_link) { 802bf21cd93STycho Nightingale if (be->be_req == breq) 803bf21cd93STycho Nightingale break; 804bf21cd93STycho Nightingale } 805bf21cd93STycho Nightingale if (be == NULL) { 806bf21cd93STycho Nightingale /* 807bf21cd93STycho Nightingale * Didn't find it. 808bf21cd93STycho Nightingale */ 809bf21cd93STycho Nightingale pthread_mutex_unlock(&bc->bc_mtx); 810bf21cd93STycho Nightingale return (EINVAL); 811bf21cd93STycho Nightingale } 812bf21cd93STycho Nightingale 813bf21cd93STycho Nightingale /* 814bf21cd93STycho Nightingale * Interrupt the processing thread to force it return 815bf21cd93STycho Nightingale * prematurely via it's normal callback path. 816bf21cd93STycho Nightingale */ 817bf21cd93STycho Nightingale while (be->be_status == BST_BUSY) { 818bf21cd93STycho Nightingale struct blockif_sig_elem bse, *old_head; 819bf21cd93STycho Nightingale 820bf21cd93STycho Nightingale pthread_mutex_init(&bse.bse_mtx, NULL); 821bf21cd93STycho Nightingale pthread_cond_init(&bse.bse_cond, NULL); 822bf21cd93STycho Nightingale 823bf21cd93STycho Nightingale bse.bse_pending = 1; 824bf21cd93STycho Nightingale 825bf21cd93STycho Nightingale do { 826bf21cd93STycho Nightingale old_head = blockif_bse_head; 827bf21cd93STycho Nightingale bse.bse_next = old_head; 828bf21cd93STycho Nightingale } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, 829bf21cd93STycho Nightingale (uintptr_t)old_head, 830bf21cd93STycho Nightingale (uintptr_t)&bse)); 831bf21cd93STycho Nightingale 832bf21cd93STycho Nightingale pthread_kill(be->be_tid, SIGCONT); 833bf21cd93STycho Nightingale 834bf21cd93STycho Nightingale pthread_mutex_lock(&bse.bse_mtx); 835bf21cd93STycho Nightingale while (bse.bse_pending) 836bf21cd93STycho Nightingale pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx); 837bf21cd93STycho Nightingale pthread_mutex_unlock(&bse.bse_mtx); 838bf21cd93STycho Nightingale } 839bf21cd93STycho Nightingale 840bf21cd93STycho Nightingale pthread_mutex_unlock(&bc->bc_mtx); 841bf21cd93STycho Nightingale 842bf21cd93STycho Nightingale /* 843bf21cd93STycho Nightingale * The processing thread has been interrupted. Since it's not 844bf21cd93STycho Nightingale * clear if the callback has been invoked yet, return EBUSY. 845bf21cd93STycho Nightingale */ 846bf21cd93STycho Nightingale return (EBUSY); 847bf21cd93STycho Nightingale } 848bf21cd93STycho Nightingale 849bf21cd93STycho Nightingale int 850bf21cd93STycho Nightingale blockif_close(struct blockif_ctxt *bc) 851bf21cd93STycho Nightingale { 852bf21cd93STycho Nightingale void *jval; 853*4c87aefeSPatrick Mooney int i; 854bf21cd93STycho Nightingale 855bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 856bf21cd93STycho Nightingale 857bf21cd93STycho Nightingale /* 858bf21cd93STycho Nightingale * Stop the block i/o thread 859bf21cd93STycho Nightingale */ 860*4c87aefeSPatrick Mooney pthread_mutex_lock(&bc->bc_mtx); 861bf21cd93STycho Nightingale bc->bc_closing = 1; 862*4c87aefeSPatrick Mooney pthread_mutex_unlock(&bc->bc_mtx); 863*4c87aefeSPatrick Mooney pthread_cond_broadcast(&bc->bc_cond); 864*4c87aefeSPatrick Mooney for (i = 0; i < BLOCKIF_NUMTHR; i++) 865*4c87aefeSPatrick Mooney pthread_join(bc->bc_btid[i], &jval); 866bf21cd93STycho Nightingale 867bf21cd93STycho Nightingale /* XXX Cancel queued i/o's ??? */ 868bf21cd93STycho Nightingale 869bf21cd93STycho Nightingale /* 870bf21cd93STycho Nightingale * Release resources 871bf21cd93STycho Nightingale */ 872bf21cd93STycho Nightingale bc->bc_magic = 0; 873bf21cd93STycho Nightingale close(bc->bc_fd); 874bf21cd93STycho Nightingale free(bc); 875bf21cd93STycho Nightingale 876bf21cd93STycho Nightingale return (0); 877bf21cd93STycho Nightingale } 878bf21cd93STycho Nightingale 879bf21cd93STycho Nightingale /* 880bf21cd93STycho Nightingale * Return virtual C/H/S values for a given block. Use the algorithm 881bf21cd93STycho Nightingale * outlined in the VHD specification to calculate values. 882bf21cd93STycho Nightingale */ 883bf21cd93STycho Nightingale void 884bf21cd93STycho Nightingale blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) 885bf21cd93STycho Nightingale { 886bf21cd93STycho Nightingale off_t sectors; /* total sectors of the block dev */ 887bf21cd93STycho Nightingale off_t hcyl; /* cylinders times heads */ 888bf21cd93STycho Nightingale uint16_t secpt; /* sectors per track */ 889bf21cd93STycho Nightingale uint8_t heads; 890bf21cd93STycho Nightingale 891bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 892bf21cd93STycho Nightingale 893bf21cd93STycho Nightingale sectors = bc->bc_size / bc->bc_sectsz; 894bf21cd93STycho Nightingale 895bf21cd93STycho Nightingale /* Clamp the size to the largest possible with CHS */ 896bf21cd93STycho Nightingale if (sectors > 65535UL*16*255) 897bf21cd93STycho Nightingale sectors = 65535UL*16*255; 898bf21cd93STycho Nightingale 899bf21cd93STycho Nightingale if (sectors >= 65536UL*16*63) { 900bf21cd93STycho Nightingale secpt = 255; 901bf21cd93STycho Nightingale heads = 16; 902bf21cd93STycho Nightingale hcyl = sectors / secpt; 903bf21cd93STycho Nightingale } else { 904bf21cd93STycho Nightingale secpt = 17; 905bf21cd93STycho Nightingale hcyl = sectors / secpt; 906bf21cd93STycho Nightingale heads = (hcyl + 1023) / 1024; 907bf21cd93STycho Nightingale 908bf21cd93STycho Nightingale if (heads < 4) 909bf21cd93STycho Nightingale heads = 4; 910bf21cd93STycho Nightingale 911bf21cd93STycho Nightingale if (hcyl >= (heads * 1024) || heads > 16) { 912bf21cd93STycho Nightingale secpt = 31; 913bf21cd93STycho Nightingale heads = 16; 914bf21cd93STycho Nightingale hcyl = sectors / secpt; 915bf21cd93STycho Nightingale } 916bf21cd93STycho Nightingale if (hcyl >= (heads * 1024)) { 917bf21cd93STycho Nightingale secpt = 63; 918bf21cd93STycho Nightingale heads = 16; 919bf21cd93STycho Nightingale hcyl = sectors / secpt; 920bf21cd93STycho Nightingale } 921bf21cd93STycho Nightingale } 922bf21cd93STycho Nightingale 923bf21cd93STycho Nightingale *c = hcyl / heads; 924bf21cd93STycho Nightingale *h = heads; 925bf21cd93STycho Nightingale *s = secpt; 926bf21cd93STycho Nightingale } 927bf21cd93STycho Nightingale 928bf21cd93STycho Nightingale /* 929bf21cd93STycho Nightingale * Accessors 930bf21cd93STycho Nightingale */ 931bf21cd93STycho Nightingale off_t 932bf21cd93STycho Nightingale blockif_size(struct blockif_ctxt *bc) 933bf21cd93STycho Nightingale { 934bf21cd93STycho Nightingale 935bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 936bf21cd93STycho Nightingale return (bc->bc_size); 937bf21cd93STycho Nightingale } 938bf21cd93STycho Nightingale 939bf21cd93STycho Nightingale int 940bf21cd93STycho Nightingale blockif_sectsz(struct blockif_ctxt *bc) 941bf21cd93STycho Nightingale { 942bf21cd93STycho Nightingale 943bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 944bf21cd93STycho Nightingale return (bc->bc_sectsz); 945bf21cd93STycho Nightingale } 946bf21cd93STycho Nightingale 947*4c87aefeSPatrick Mooney void 948*4c87aefeSPatrick Mooney blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) 949*4c87aefeSPatrick Mooney { 950*4c87aefeSPatrick Mooney 951*4c87aefeSPatrick Mooney assert(bc->bc_magic == BLOCKIF_SIG); 952*4c87aefeSPatrick Mooney *size = bc->bc_psectsz; 953*4c87aefeSPatrick Mooney *off = bc->bc_psectoff; 954*4c87aefeSPatrick Mooney } 955*4c87aefeSPatrick Mooney 956bf21cd93STycho Nightingale int 957bf21cd93STycho Nightingale blockif_queuesz(struct blockif_ctxt *bc) 958bf21cd93STycho Nightingale { 959bf21cd93STycho Nightingale 960bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 961bf21cd93STycho Nightingale return (BLOCKIF_MAXREQ - 1); 962bf21cd93STycho Nightingale } 963bf21cd93STycho Nightingale 964bf21cd93STycho Nightingale int 965bf21cd93STycho Nightingale blockif_is_ro(struct blockif_ctxt *bc) 966bf21cd93STycho Nightingale { 967bf21cd93STycho Nightingale 968bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 969bf21cd93STycho Nightingale return (bc->bc_rdonly); 970bf21cd93STycho Nightingale } 971*4c87aefeSPatrick Mooney 972*4c87aefeSPatrick Mooney int 973*4c87aefeSPatrick Mooney blockif_candelete(struct blockif_ctxt *bc) 974*4c87aefeSPatrick Mooney { 975*4c87aefeSPatrick Mooney 976*4c87aefeSPatrick Mooney assert(bc->bc_magic == BLOCKIF_SIG); 977*4c87aefeSPatrick Mooney return (bc->bc_candelete); 978*4c87aefeSPatrick Mooney } 979*4c87aefeSPatrick Mooney 980*4c87aefeSPatrick Mooney #ifndef __FreeBSD__ 981*4c87aefeSPatrick Mooney int 982*4c87aefeSPatrick Mooney blockif_set_wce(struct blockif_ctxt *bc, int wc_enable) 983*4c87aefeSPatrick Mooney { 984*4c87aefeSPatrick Mooney int res = 0, flags; 985*4c87aefeSPatrick Mooney int clean_val = (wc_enable != 0) ? 1 : 0; 986*4c87aefeSPatrick Mooney 987*4c87aefeSPatrick Mooney (void) pthread_mutex_lock(&bc->bc_mtx); 988*4c87aefeSPatrick Mooney switch (bc->bc_wce) { 989*4c87aefeSPatrick Mooney case WCE_IOCTL: 990*4c87aefeSPatrick Mooney res = ioctl(bc->bc_fd, DKIOCSETWCE, &clean_val); 991*4c87aefeSPatrick Mooney break; 992*4c87aefeSPatrick Mooney case WCE_FCNTL: 993*4c87aefeSPatrick Mooney if ((flags = fcntl(bc->bc_fd, F_GETFL)) >= 0) { 994*4c87aefeSPatrick Mooney if (wc_enable == 0) { 995*4c87aefeSPatrick Mooney flags |= O_DSYNC; 996*4c87aefeSPatrick Mooney } else { 997*4c87aefeSPatrick Mooney flags &= ~O_DSYNC; 998*4c87aefeSPatrick Mooney } 999*4c87aefeSPatrick Mooney if (fcntl(bc->bc_fd, F_SETFL, flags) == -1) { 1000*4c87aefeSPatrick Mooney res = -1; 1001*4c87aefeSPatrick Mooney } 1002*4c87aefeSPatrick Mooney } else { 1003*4c87aefeSPatrick Mooney res = -1; 1004*4c87aefeSPatrick Mooney } 1005*4c87aefeSPatrick Mooney break; 1006*4c87aefeSPatrick Mooney default: 1007*4c87aefeSPatrick Mooney break; 1008*4c87aefeSPatrick Mooney } 1009*4c87aefeSPatrick Mooney 1010*4c87aefeSPatrick Mooney /* 1011*4c87aefeSPatrick Mooney * After a successful disable of the write cache, ensure that any 1012*4c87aefeSPatrick Mooney * lingering data in the cache is synced out. 1013*4c87aefeSPatrick Mooney */ 1014*4c87aefeSPatrick Mooney if (res == 0 && wc_enable == 0) { 1015*4c87aefeSPatrick Mooney res = fsync(bc->bc_fd); 1016*4c87aefeSPatrick Mooney } 1017*4c87aefeSPatrick Mooney (void) pthread_mutex_unlock(&bc->bc_mtx); 1018*4c87aefeSPatrick Mooney 1019*4c87aefeSPatrick Mooney return (res); 1020*4c87aefeSPatrick Mooney } 1021*4c87aefeSPatrick Mooney #endif /* __FreeBSD__ */ 1022