1 /*- 2 * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/queue.h> 34 #include <sys/errno.h> 35 #include <sys/stat.h> 36 #include <sys/ioctl.h> 37 #include <sys/disk.h> 38 39 #include <assert.h> 40 #include <fcntl.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <pthread.h> 45 #include <pthread_np.h> 46 #include <unistd.h> 47 48 #include "bhyverun.h" 49 #include "block_if.h" 50 51 #define BLOCKIF_SIG 0xb109b109 52 53 #define BLOCKIF_MAXREQ 16 54 55 enum blockop { 56 BOP_READ, 57 BOP_WRITE, 58 BOP_FLUSH, 59 BOP_CANCEL 60 }; 61 62 enum blockstat { 63 BST_FREE, 64 BST_INUSE 65 }; 66 67 struct blockif_elem { 68 TAILQ_ENTRY(blockif_elem) be_link; 69 struct blockif_req *be_req; 70 enum blockop be_op; 71 enum blockstat be_status; 72 }; 73 74 struct blockif_ctxt { 75 int bc_magic; 76 int bc_fd; 77 int bc_rdonly; 78 off_t bc_size; 79 int bc_sectsz; 80 pthread_t bc_btid; 81 pthread_mutex_t bc_mtx; 82 pthread_cond_t bc_cond; 83 int bc_closing; 84 85 /* Request elements and free/inuse queues */ 86 TAILQ_HEAD(, blockif_elem) bc_freeq; 87 TAILQ_HEAD(, blockif_elem) bc_inuseq; 88 u_int bc_req_count; 89 struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; 90 }; 91 92 static int 93 blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, 94 enum blockop op) 95 { 96 struct blockif_elem *be; 97 98 assert(bc->bc_req_count < BLOCKIF_MAXREQ); 99 100 be = TAILQ_FIRST(&bc->bc_freeq); 101 assert(be != NULL); 102 assert(be->be_status == BST_FREE); 103 104 TAILQ_REMOVE(&bc->bc_freeq, be, be_link); 105 be->be_status = BST_INUSE; 106 be->be_req = breq; 107 be->be_op = op; 108 TAILQ_INSERT_TAIL(&bc->bc_inuseq, be, be_link); 109 110 bc->bc_req_count++; 111 112 return (0); 113 } 114 115 static int 116 blockif_dequeue(struct blockif_ctxt *bc, struct blockif_elem *el) 117 { 118 struct blockif_elem *be; 119 120 if (bc->bc_req_count == 0) 121 return (ENOENT); 122 123 be = TAILQ_FIRST(&bc->bc_inuseq); 124 assert(be != NULL); 125 assert(be->be_status == BST_INUSE); 126 *el = *be; 127 128 TAILQ_REMOVE(&bc->bc_inuseq, be, be_link); 129 be->be_status = BST_FREE; 130 be->be_req = NULL; 131 TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); 132 133 bc->bc_req_count--; 134 135 return (0); 136 } 137 138 static void 139 blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be) 140 { 141 struct blockif_req *br; 142 int err; 143 144 br = be->be_req; 145 err = 0; 146 147 switch (be->be_op) { 148 case BOP_READ: 149 if (preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, 150 br->br_offset) < 0) 151 err = errno; 152 break; 153 case BOP_WRITE: 154 if (bc->bc_rdonly) 155 err = EROFS; 156 else if (pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, 157 br->br_offset) < 0) 158 err = errno; 159 break; 160 case BOP_FLUSH: 161 break; 162 case BOP_CANCEL: 163 err = EINTR; 164 break; 165 default: 166 err = EINVAL; 167 break; 168 } 169 170 (*br->br_callback)(br, err); 171 } 172 173 static void * 174 blockif_thr(void *arg) 175 { 176 struct blockif_ctxt *bc; 177 struct blockif_elem req; 178 179 bc = arg; 180 181 for (;;) { 182 pthread_mutex_lock(&bc->bc_mtx); 183 while (!blockif_dequeue(bc, &req)) { 184 pthread_mutex_unlock(&bc->bc_mtx); 185 blockif_proc(bc, &req); 186 pthread_mutex_lock(&bc->bc_mtx); 187 } 188 pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); 189 pthread_mutex_unlock(&bc->bc_mtx); 190 191 /* 192 * Check ctxt status here to see if exit requested 193 */ 194 if (bc->bc_closing) 195 pthread_exit(NULL); 196 } 197 198 /* Not reached */ 199 return (NULL); 200 } 201 202 struct blockif_ctxt * 203 blockif_open(const char *optstr, const char *ident) 204 { 205 char tname[MAXCOMLEN + 1]; 206 char *nopt, *xopts; 207 struct blockif_ctxt *bc; 208 struct stat sbuf; 209 off_t size; 210 int extra, fd, i, sectsz; 211 int nocache, sync, ro; 212 213 nocache = 0; 214 sync = 0; 215 ro = 0; 216 217 /* 218 * The first element in the optstring is always a pathname. 219 * Optional elements follow 220 */ 221 nopt = strdup(optstr); 222 for (xopts = strtok(nopt, ","); 223 xopts != NULL; 224 xopts = strtok(NULL, ",")) { 225 if (!strcmp(xopts, "nocache")) 226 nocache = 1; 227 else if (!strcmp(xopts, "sync")) 228 sync = 1; 229 else if (!strcmp(xopts, "ro")) 230 ro = 1; 231 } 232 233 extra = 0; 234 if (nocache) 235 extra |= O_DIRECT; 236 if (sync) 237 extra |= O_SYNC; 238 239 fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); 240 if (fd < 0 && !ro) { 241 /* Attempt a r/w fail with a r/o open */ 242 fd = open(nopt, O_RDONLY | extra); 243 ro = 1; 244 } 245 246 if (fd < 0) { 247 perror("Could not open backing file"); 248 return (NULL); 249 } 250 251 if (fstat(fd, &sbuf) < 0) { 252 perror("Could not stat backing file"); 253 close(fd); 254 return (NULL); 255 } 256 257 /* 258 * Deal with raw devices 259 */ 260 size = sbuf.st_size; 261 sectsz = DEV_BSIZE; 262 if (S_ISCHR(sbuf.st_mode)) { 263 if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || 264 ioctl(fd, DIOCGSECTORSIZE, §sz)) { 265 perror("Could not fetch dev blk/sector size"); 266 close(fd); 267 return (NULL); 268 } 269 assert(size != 0); 270 assert(sectsz != 0); 271 } 272 273 bc = malloc(sizeof(struct blockif_ctxt)); 274 if (bc == NULL) { 275 close(fd); 276 return (NULL); 277 } 278 279 memset(bc, 0, sizeof(*bc)); 280 bc->bc_magic = BLOCKIF_SIG; 281 bc->bc_fd = fd; 282 bc->bc_size = size; 283 bc->bc_sectsz = sectsz; 284 pthread_mutex_init(&bc->bc_mtx, NULL); 285 pthread_cond_init(&bc->bc_cond, NULL); 286 TAILQ_INIT(&bc->bc_freeq); 287 TAILQ_INIT(&bc->bc_inuseq); 288 bc->bc_req_count = 0; 289 for (i = 0; i < BLOCKIF_MAXREQ; i++) { 290 bc->bc_reqs[i].be_status = BST_FREE; 291 TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); 292 } 293 294 pthread_create(&bc->bc_btid, NULL, blockif_thr, bc); 295 296 snprintf(tname, sizeof(tname), "blk-%s", ident); 297 pthread_set_name_np(bc->bc_btid, tname); 298 299 return (bc); 300 } 301 302 static int 303 blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, 304 enum blockop op) 305 { 306 int err; 307 308 err = 0; 309 310 pthread_mutex_lock(&bc->bc_mtx); 311 if (bc->bc_req_count < BLOCKIF_MAXREQ) { 312 /* 313 * Enqueue and inform the block i/o thread 314 * that there is work available 315 */ 316 blockif_enqueue(bc, breq, op); 317 pthread_cond_signal(&bc->bc_cond); 318 } else { 319 /* 320 * Callers are not allowed to enqueue more than 321 * the specified blockif queue limit. Return an 322 * error to indicate that the queue length has been 323 * exceeded. 324 */ 325 err = E2BIG; 326 } 327 pthread_mutex_unlock(&bc->bc_mtx); 328 329 return (err); 330 } 331 332 int 333 blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) 334 { 335 336 assert(bc->bc_magic == BLOCKIF_SIG); 337 return (blockif_request(bc, breq, BOP_READ)); 338 } 339 340 int 341 blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) 342 { 343 344 assert(bc->bc_magic == BLOCKIF_SIG); 345 return (blockif_request(bc, breq, BOP_WRITE)); 346 } 347 348 int 349 blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) 350 { 351 352 assert(bc->bc_magic == BLOCKIF_SIG); 353 return (blockif_request(bc, breq, BOP_FLUSH)); 354 } 355 356 int 357 blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) 358 { 359 360 assert(bc->bc_magic == BLOCKIF_SIG); 361 return (blockif_request(bc, breq, BOP_CANCEL)); 362 } 363 364 int 365 blockif_close(struct blockif_ctxt *bc) 366 { 367 void *jval; 368 int err; 369 370 err = 0; 371 372 assert(bc->bc_magic == BLOCKIF_SIG); 373 374 /* 375 * Stop the block i/o thread 376 */ 377 bc->bc_closing = 1; 378 pthread_cond_signal(&bc->bc_cond); 379 pthread_join(bc->bc_btid, &jval); 380 381 /* XXX Cancel queued i/o's ??? */ 382 383 /* 384 * Release resources 385 */ 386 bc->bc_magic = 0; 387 close(bc->bc_fd); 388 free(bc); 389 390 return (0); 391 } 392 393 /* 394 * Accessors 395 */ 396 off_t 397 blockif_size(struct blockif_ctxt *bc) 398 { 399 400 assert(bc->bc_magic == BLOCKIF_SIG); 401 return (bc->bc_size); 402 } 403 404 int 405 blockif_sectsz(struct blockif_ctxt *bc) 406 { 407 408 assert(bc->bc_magic == BLOCKIF_SIG); 409 return (bc->bc_sectsz); 410 } 411 412 int 413 blockif_queuesz(struct blockif_ctxt *bc) 414 { 415 416 assert(bc->bc_magic == BLOCKIF_SIG); 417 return (BLOCKIF_MAXREQ); 418 } 419 420 int 421 blockif_is_ro(struct blockif_ctxt *bc) 422 { 423 424 assert(bc->bc_magic == BLOCKIF_SIG); 425 return (bc->bc_rdonly); 426 } 427