1 /*- 2 * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/queue.h> 34 #include <sys/errno.h> 35 #include <sys/stat.h> 36 #include <sys/ioctl.h> 37 #include <sys/disk.h> 38 39 #include <assert.h> 40 #include <fcntl.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <pthread.h> 45 #include <pthread_np.h> 46 #include <unistd.h> 47 48 #include "bhyverun.h" 49 #include "block_if.h" 50 51 #define BLOCKIF_SIG 0xb109b109 52 53 #define BLOCKIF_MAXREQ 32 54 55 enum blockop { 56 BOP_READ, 57 BOP_WRITE, 58 BOP_FLUSH 59 }; 60 61 enum blockstat { 62 BST_FREE, 63 BST_INUSE 64 }; 65 66 struct blockif_elem { 67 TAILQ_ENTRY(blockif_elem) be_link; 68 struct blockif_req *be_req; 69 enum blockop be_op; 70 enum blockstat be_status; 71 }; 72 73 struct blockif_ctxt { 74 int bc_magic; 75 int bc_fd; 76 int bc_rdonly; 77 off_t bc_size; 78 int bc_sectsz; 79 pthread_t bc_btid; 80 pthread_mutex_t bc_mtx; 81 pthread_cond_t bc_cond; 82 int bc_closing; 83 84 /* Request elements and free/inuse queues */ 85 TAILQ_HEAD(, blockif_elem) bc_freeq; 86 TAILQ_HEAD(, blockif_elem) bc_inuseq; 87 u_int bc_req_count; 88 struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; 89 }; 90 91 static int 92 blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, 93 enum blockop op) 94 { 95 struct blockif_elem *be; 96 97 assert(bc->bc_req_count < BLOCKIF_MAXREQ); 98 99 be = TAILQ_FIRST(&bc->bc_freeq); 100 assert(be != NULL); 101 assert(be->be_status == BST_FREE); 102 103 TAILQ_REMOVE(&bc->bc_freeq, be, be_link); 104 be->be_status = BST_INUSE; 105 be->be_req = breq; 106 be->be_op = op; 107 TAILQ_INSERT_TAIL(&bc->bc_inuseq, be, be_link); 108 109 bc->bc_req_count++; 110 111 return (0); 112 } 113 114 static int 115 blockif_dequeue(struct blockif_ctxt *bc, struct blockif_elem *el) 116 { 117 struct blockif_elem *be; 118 119 if (bc->bc_req_count == 0) 120 return (ENOENT); 121 122 be = TAILQ_FIRST(&bc->bc_inuseq); 123 assert(be != NULL); 124 assert(be->be_status == BST_INUSE); 125 *el = *be; 126 127 TAILQ_REMOVE(&bc->bc_inuseq, be, be_link); 128 be->be_status = BST_FREE; 129 be->be_req = NULL; 130 TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); 131 132 bc->bc_req_count--; 133 134 return (0); 135 } 136 137 static void 138 blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be) 139 { 140 struct blockif_req *br; 141 int err; 142 143 br = be->be_req; 144 err = 0; 145 146 switch (be->be_op) { 147 case BOP_READ: 148 if (preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, 149 br->br_offset) < 0) 150 err = errno; 151 break; 152 case BOP_WRITE: 153 if (bc->bc_rdonly) 154 err = EROFS; 155 else if (pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, 156 br->br_offset) < 0) 157 err = errno; 158 break; 159 case BOP_FLUSH: 160 break; 161 default: 162 err = EINVAL; 163 break; 164 } 165 166 (*br->br_callback)(br, err); 167 } 168 169 static void * 170 blockif_thr(void *arg) 171 { 172 struct blockif_ctxt *bc; 173 struct blockif_elem req; 174 175 bc = arg; 176 177 for (;;) { 178 pthread_mutex_lock(&bc->bc_mtx); 179 while (!blockif_dequeue(bc, &req)) { 180 pthread_mutex_unlock(&bc->bc_mtx); 181 blockif_proc(bc, &req); 182 pthread_mutex_lock(&bc->bc_mtx); 183 } 184 pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); 185 pthread_mutex_unlock(&bc->bc_mtx); 186 187 /* 188 * Check ctxt status here to see if exit requested 189 */ 190 if (bc->bc_closing) 191 pthread_exit(NULL); 192 } 193 194 /* Not reached */ 195 return (NULL); 196 } 197 198 struct blockif_ctxt * 199 blockif_open(const char *optstr, const char *ident) 200 { 201 char tname[MAXCOMLEN + 1]; 202 char *nopt, *xopts; 203 struct blockif_ctxt *bc; 204 struct stat sbuf; 205 off_t size; 206 int extra, fd, i, sectsz; 207 int nocache, sync, ro; 208 209 nocache = 0; 210 sync = 0; 211 ro = 0; 212 213 /* 214 * The first element in the optstring is always a pathname. 215 * Optional elements follow 216 */ 217 nopt = strdup(optstr); 218 for (xopts = strtok(nopt, ","); 219 xopts != NULL; 220 xopts = strtok(NULL, ",")) { 221 if (!strcmp(xopts, "nocache")) 222 nocache = 1; 223 else if (!strcmp(xopts, "sync")) 224 sync = 1; 225 else if (!strcmp(xopts, "ro")) 226 ro = 1; 227 } 228 229 extra = 0; 230 if (nocache) 231 extra |= O_DIRECT; 232 if (sync) 233 extra |= O_SYNC; 234 235 fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); 236 if (fd < 0 && !ro) { 237 /* Attempt a r/w fail with a r/o open */ 238 fd = open(nopt, O_RDONLY | extra); 239 ro = 1; 240 } 241 242 if (fd < 0) { 243 perror("Could not open backing file"); 244 return (NULL); 245 } 246 247 if (fstat(fd, &sbuf) < 0) { 248 perror("Could not stat backing file"); 249 close(fd); 250 return (NULL); 251 } 252 253 /* 254 * Deal with raw devices 255 */ 256 size = sbuf.st_size; 257 sectsz = DEV_BSIZE; 258 if (S_ISCHR(sbuf.st_mode)) { 259 if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || 260 ioctl(fd, DIOCGSECTORSIZE, §sz)) { 261 perror("Could not fetch dev blk/sector size"); 262 close(fd); 263 return (NULL); 264 } 265 assert(size != 0); 266 assert(sectsz != 0); 267 } 268 269 bc = calloc(1, sizeof(struct blockif_ctxt)); 270 if (bc == NULL) { 271 close(fd); 272 return (NULL); 273 } 274 275 bc->bc_magic = BLOCKIF_SIG; 276 bc->bc_fd = fd; 277 bc->bc_rdonly = ro; 278 bc->bc_size = size; 279 bc->bc_sectsz = sectsz; 280 pthread_mutex_init(&bc->bc_mtx, NULL); 281 pthread_cond_init(&bc->bc_cond, NULL); 282 TAILQ_INIT(&bc->bc_freeq); 283 TAILQ_INIT(&bc->bc_inuseq); 284 bc->bc_req_count = 0; 285 for (i = 0; i < BLOCKIF_MAXREQ; i++) { 286 bc->bc_reqs[i].be_status = BST_FREE; 287 TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); 288 } 289 290 pthread_create(&bc->bc_btid, NULL, blockif_thr, bc); 291 292 snprintf(tname, sizeof(tname), "blk-%s", ident); 293 pthread_set_name_np(bc->bc_btid, tname); 294 295 return (bc); 296 } 297 298 static int 299 blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, 300 enum blockop op) 301 { 302 int err; 303 304 err = 0; 305 306 pthread_mutex_lock(&bc->bc_mtx); 307 if (bc->bc_req_count < BLOCKIF_MAXREQ) { 308 /* 309 * Enqueue and inform the block i/o thread 310 * that there is work available 311 */ 312 blockif_enqueue(bc, breq, op); 313 pthread_cond_signal(&bc->bc_cond); 314 } else { 315 /* 316 * Callers are not allowed to enqueue more than 317 * the specified blockif queue limit. Return an 318 * error to indicate that the queue length has been 319 * exceeded. 320 */ 321 err = E2BIG; 322 } 323 pthread_mutex_unlock(&bc->bc_mtx); 324 325 return (err); 326 } 327 328 int 329 blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) 330 { 331 332 assert(bc->bc_magic == BLOCKIF_SIG); 333 return (blockif_request(bc, breq, BOP_READ)); 334 } 335 336 int 337 blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) 338 { 339 340 assert(bc->bc_magic == BLOCKIF_SIG); 341 return (blockif_request(bc, breq, BOP_WRITE)); 342 } 343 344 int 345 blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) 346 { 347 348 assert(bc->bc_magic == BLOCKIF_SIG); 349 return (blockif_request(bc, breq, BOP_FLUSH)); 350 } 351 352 int 353 blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) 354 { 355 struct blockif_elem *be; 356 357 assert(bc->bc_magic == BLOCKIF_SIG); 358 359 pthread_mutex_lock(&bc->bc_mtx); 360 TAILQ_FOREACH(be, &bc->bc_inuseq, be_link) { 361 if (be->be_req == breq) 362 break; 363 } 364 if (be == NULL) { 365 pthread_mutex_unlock(&bc->bc_mtx); 366 return (EINVAL); 367 } 368 369 TAILQ_REMOVE(&bc->bc_inuseq, be, be_link); 370 be->be_status = BST_FREE; 371 be->be_req = NULL; 372 TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); 373 bc->bc_req_count--; 374 pthread_mutex_unlock(&bc->bc_mtx); 375 376 return (0); 377 } 378 379 int 380 blockif_close(struct blockif_ctxt *bc) 381 { 382 void *jval; 383 int err; 384 385 err = 0; 386 387 assert(bc->bc_magic == BLOCKIF_SIG); 388 389 /* 390 * Stop the block i/o thread 391 */ 392 bc->bc_closing = 1; 393 pthread_cond_signal(&bc->bc_cond); 394 pthread_join(bc->bc_btid, &jval); 395 396 /* XXX Cancel queued i/o's ??? */ 397 398 /* 399 * Release resources 400 */ 401 bc->bc_magic = 0; 402 close(bc->bc_fd); 403 free(bc); 404 405 return (0); 406 } 407 408 /* 409 * Return virtual C/H/S values for a given block. Use the algorithm 410 * outlined in the VHD specification to calculate values. 411 */ 412 void 413 blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) 414 { 415 off_t sectors; /* total sectors of the block dev */ 416 off_t hcyl; /* cylinders times heads */ 417 uint16_t secpt; /* sectors per track */ 418 uint8_t heads; 419 420 assert(bc->bc_magic == BLOCKIF_SIG); 421 422 sectors = bc->bc_size / bc->bc_sectsz; 423 424 /* Clamp the size to the largest possible with CHS */ 425 if (sectors > 65535UL*16*255) 426 sectors = 65535UL*16*255; 427 428 if (sectors >= 65536UL*16*63) { 429 secpt = 255; 430 heads = 16; 431 hcyl = sectors / secpt; 432 } else { 433 secpt = 17; 434 hcyl = sectors / secpt; 435 heads = (hcyl + 1023) / 1024; 436 437 if (heads < 4) 438 heads = 4; 439 440 if (hcyl >= (heads * 1024) || heads > 16) { 441 secpt = 31; 442 heads = 16; 443 hcyl = sectors / secpt; 444 } 445 if (hcyl >= (heads * 1024)) { 446 secpt = 63; 447 heads = 16; 448 hcyl = sectors / secpt; 449 } 450 } 451 452 *c = hcyl / heads; 453 *h = heads; 454 *s = secpt; 455 } 456 457 /* 458 * Accessors 459 */ 460 off_t 461 blockif_size(struct blockif_ctxt *bc) 462 { 463 464 assert(bc->bc_magic == BLOCKIF_SIG); 465 return (bc->bc_size); 466 } 467 468 int 469 blockif_sectsz(struct blockif_ctxt *bc) 470 { 471 472 assert(bc->bc_magic == BLOCKIF_SIG); 473 return (bc->bc_sectsz); 474 } 475 476 int 477 blockif_queuesz(struct blockif_ctxt *bc) 478 { 479 480 assert(bc->bc_magic == BLOCKIF_SIG); 481 return (BLOCKIF_MAXREQ); 482 } 483 484 int 485 blockif_is_ro(struct blockif_ctxt *bc) 486 { 487 488 assert(bc->bc_magic == BLOCKIF_SIG); 489 return (bc->bc_rdonly); 490 } 491