1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2003 Silicon Graphics International Corp. 5 * Copyright (c) 2009-2011 Spectra Logic Corporation 6 * Copyright (c) 2012 The FreeBSD Foundation 7 * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org> 8 * All rights reserved. 9 * 10 * Portions of this software were developed by Edward Tomasz Napierala 11 * under sponsorship from the FreeBSD Foundation. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions, and the following disclaimer, 18 * without modification. 19 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 20 * substantially similar to the "NO WARRANTY" disclaimer below 21 * ("Disclaimer") and any redistribution must be conditioned upon 22 * including a substantially similar Disclaimer requirement for further 23 * binary redistribution. 24 * 25 * NO WARRANTY 26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 34 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 35 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGES. 37 * 38 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $ 39 */ 40 /* 41 * CAM Target Layer driver backend for block devices. 42 * 43 * Author: Ken Merry <ken@FreeBSD.org> 44 */ 45 #include <sys/cdefs.h> 46 __FBSDID("$FreeBSD$"); 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/kernel.h> 51 #include <sys/types.h> 52 #include <sys/kthread.h> 53 #include <sys/bio.h> 54 #include <sys/fcntl.h> 55 #include <sys/limits.h> 56 #include <sys/lock.h> 57 #include <sys/mutex.h> 58 #include <sys/condvar.h> 59 #include <sys/malloc.h> 60 #include <sys/conf.h> 61 #include <sys/ioccom.h> 62 #include <sys/queue.h> 63 #include <sys/sbuf.h> 64 #include <sys/endian.h> 65 #include <sys/uio.h> 66 #include <sys/buf.h> 67 #include <sys/taskqueue.h> 68 #include <sys/vnode.h> 69 #include <sys/namei.h> 70 #include <sys/mount.h> 71 #include <sys/disk.h> 72 #include <sys/fcntl.h> 73 #include <sys/filedesc.h> 74 #include <sys/filio.h> 75 #include <sys/proc.h> 76 #include <sys/pcpu.h> 77 #include <sys/module.h> 78 #include <sys/sdt.h> 79 #include <sys/devicestat.h> 80 #include <sys/sysctl.h> 81 #include <sys/nv.h> 82 #include <sys/dnv.h> 83 #include <sys/sx.h> 84 85 #include <geom/geom.h> 86 87 #include <cam/cam.h> 88 #include <cam/scsi/scsi_all.h> 89 #include <cam/scsi/scsi_da.h> 90 #include <cam/ctl/ctl_io.h> 91 #include <cam/ctl/ctl.h> 92 #include <cam/ctl/ctl_backend.h> 93 #include <cam/ctl/ctl_ioctl.h> 94 #include <cam/ctl/ctl_ha.h> 95 #include <cam/ctl/ctl_scsi_all.h> 96 #include <cam/ctl/ctl_private.h> 97 #include <cam/ctl/ctl_error.h> 98 99 /* 100 * The idea here is that we'll allocate enough S/G space to hold a 1MB 101 * I/O. If we get an I/O larger than that, we'll split it. 102 */ 103 #define CTLBLK_HALF_IO_SIZE (512 * 1024) 104 #define CTLBLK_MAX_IO_SIZE (CTLBLK_HALF_IO_SIZE * 2) 105 #define CTLBLK_MIN_SEG (128 * 1024) 106 #define CTLBLK_MAX_SEG MIN(CTLBLK_HALF_IO_SIZE, maxphys) 107 #define CTLBLK_HALF_SEGS MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MIN_SEG, 1) 108 #define CTLBLK_MAX_SEGS (CTLBLK_HALF_SEGS * 2) 109 #define CTLBLK_NUM_SEGS (CTLBLK_MAX_IO_SIZE / CTLBLK_MAX_SEG) 110 111 #ifdef CTLBLK_DEBUG 112 #define DPRINTF(fmt, args...) \ 113 printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 114 #else 115 #define DPRINTF(fmt, args...) do {} while(0) 116 #endif 117 118 #define PRIV(io) \ 119 ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND]) 120 #define ARGS(io) \ 121 ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]) 122 123 SDT_PROVIDER_DEFINE(cbb); 124 125 typedef enum { 126 CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01, 127 CTL_BE_BLOCK_LUN_WAITING = 0x04, 128 } ctl_be_block_lun_flags; 129 130 typedef enum { 131 CTL_BE_BLOCK_NONE, 132 CTL_BE_BLOCK_DEV, 133 CTL_BE_BLOCK_FILE 134 } ctl_be_block_type; 135 136 struct ctl_be_block_filedata { 137 struct ucred *cred; 138 }; 139 140 union ctl_be_block_bedata { 141 struct ctl_be_block_filedata file; 142 }; 143 144 struct ctl_be_block_io; 145 struct ctl_be_block_lun; 146 147 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun, 148 struct ctl_be_block_io *beio); 149 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun, 150 const char *attrname); 151 152 /* 153 * Backend LUN structure. There is a 1:1 mapping between a block device 154 * and a backend block LUN, and between a backend block LUN and a CTL LUN. 155 */ 156 struct ctl_be_block_lun { 157 struct ctl_be_lun cbe_lun; /* Must be first element. */ 158 struct ctl_lun_create_params params; 159 char *dev_path; 160 ctl_be_block_type dev_type; 161 struct vnode *vn; 162 union ctl_be_block_bedata backend; 163 cbb_dispatch_t dispatch; 164 cbb_dispatch_t lun_flush; 165 cbb_dispatch_t unmap; 166 cbb_dispatch_t get_lba_status; 167 cbb_getattr_t getattr; 168 uint64_t size_blocks; 169 uint64_t size_bytes; 170 struct ctl_be_block_softc *softc; 171 struct devstat *disk_stats; 172 ctl_be_block_lun_flags flags; 173 SLIST_ENTRY(ctl_be_block_lun) links; 174 struct taskqueue *io_taskqueue; 175 struct task io_task; 176 int num_threads; 177 STAILQ_HEAD(, ctl_io_hdr) input_queue; 178 STAILQ_HEAD(, ctl_io_hdr) config_read_queue; 179 STAILQ_HEAD(, ctl_io_hdr) config_write_queue; 180 STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 181 struct mtx_padalign io_lock; 182 struct mtx_padalign queue_lock; 183 }; 184 185 /* 186 * Overall softc structure for the block backend module. 187 */ 188 struct ctl_be_block_softc { 189 struct sx modify_lock; 190 struct mtx lock; 191 int num_luns; 192 SLIST_HEAD(, ctl_be_block_lun) lun_list; 193 uma_zone_t beio_zone; 194 uma_zone_t bufmin_zone; 195 uma_zone_t bufmax_zone; 196 }; 197 198 static struct ctl_be_block_softc backend_block_softc; 199 200 /* 201 * Per-I/O information. 202 */ 203 struct ctl_be_block_io { 204 union ctl_io *io; 205 struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS]; 206 struct iovec xiovecs[CTLBLK_MAX_SEGS]; 207 int refcnt; 208 int bio_cmd; 209 int two_sglists; 210 int num_segs; 211 int num_bios_sent; 212 int num_bios_done; 213 int send_complete; 214 int first_error; 215 uint64_t first_error_offset; 216 struct bintime ds_t0; 217 devstat_tag_type ds_tag_type; 218 devstat_trans_flags ds_trans_type; 219 uint64_t io_len; 220 uint64_t io_offset; 221 int io_arg; 222 struct ctl_be_block_softc *softc; 223 struct ctl_be_block_lun *lun; 224 void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */ 225 }; 226 227 extern struct ctl_softc *control_softc; 228 229 static int cbb_num_threads = 32; 230 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 231 "CAM Target Layer Block Backend"); 232 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN, 233 &cbb_num_threads, 0, "Number of threads per backing file"); 234 235 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc); 236 static void ctl_free_beio(struct ctl_be_block_io *beio); 237 static void ctl_complete_beio(struct ctl_be_block_io *beio); 238 static int ctl_be_block_move_done(union ctl_io *io, bool samethr); 239 static void ctl_be_block_biodone(struct bio *bio); 240 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 241 struct ctl_be_block_io *beio); 242 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 243 struct ctl_be_block_io *beio); 244 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 245 struct ctl_be_block_io *beio); 246 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, 247 const char *attrname); 248 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 249 struct ctl_be_block_io *beio); 250 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 251 struct ctl_be_block_io *beio); 252 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 253 struct ctl_be_block_io *beio); 254 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, 255 const char *attrname); 256 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 257 union ctl_io *io); 258 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 259 union ctl_io *io); 260 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 261 union ctl_io *io); 262 static void ctl_be_block_worker(void *context, int pending); 263 static int ctl_be_block_submit(union ctl_io *io); 264 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 265 int flag, struct thread *td); 266 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, 267 struct ctl_lun_req *req); 268 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, 269 struct ctl_lun_req *req); 270 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun); 271 static int ctl_be_block_open(struct ctl_be_block_lun *be_lun, 272 struct ctl_lun_req *req); 273 static int ctl_be_block_create(struct ctl_be_block_softc *softc, 274 struct ctl_lun_req *req); 275 static int ctl_be_block_rm(struct ctl_be_block_softc *softc, 276 struct ctl_lun_req *req); 277 static int ctl_be_block_modify(struct ctl_be_block_softc *softc, 278 struct ctl_lun_req *req); 279 static void ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun); 280 static int ctl_be_block_config_write(union ctl_io *io); 281 static int ctl_be_block_config_read(union ctl_io *io); 282 static int ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb); 283 static uint64_t ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname); 284 static int ctl_be_block_init(void); 285 static int ctl_be_block_shutdown(void); 286 287 static struct ctl_backend_driver ctl_be_block_driver = 288 { 289 .name = "block", 290 .flags = CTL_BE_FLAG_HAS_CONFIG, 291 .init = ctl_be_block_init, 292 .shutdown = ctl_be_block_shutdown, 293 .data_submit = ctl_be_block_submit, 294 .config_read = ctl_be_block_config_read, 295 .config_write = ctl_be_block_config_write, 296 .ioctl = ctl_be_block_ioctl, 297 .lun_info = ctl_be_block_lun_info, 298 .lun_attr = ctl_be_block_lun_attr 299 }; 300 301 MALLOC_DEFINE(M_CTLBLK, "ctlblock", "Memory used for CTL block backend"); 302 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver); 303 304 static void 305 ctl_alloc_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg, 306 size_t len) 307 { 308 309 if (len <= CTLBLK_MIN_SEG) { 310 sg->addr = uma_zalloc(softc->bufmin_zone, M_WAITOK); 311 } else { 312 KASSERT(len <= CTLBLK_MAX_SEG, 313 ("Too large alloc %zu > %lu", len, CTLBLK_MAX_SEG)); 314 sg->addr = uma_zalloc(softc->bufmax_zone, M_WAITOK); 315 } 316 sg->len = len; 317 } 318 319 static void 320 ctl_free_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg) 321 { 322 323 if (sg->len <= CTLBLK_MIN_SEG) { 324 uma_zfree(softc->bufmin_zone, sg->addr); 325 } else { 326 KASSERT(sg->len <= CTLBLK_MAX_SEG, 327 ("Too large free %zu > %lu", sg->len, CTLBLK_MAX_SEG)); 328 uma_zfree(softc->bufmax_zone, sg->addr); 329 } 330 } 331 332 static struct ctl_be_block_io * 333 ctl_alloc_beio(struct ctl_be_block_softc *softc) 334 { 335 struct ctl_be_block_io *beio; 336 337 beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO); 338 beio->softc = softc; 339 beio->refcnt = 1; 340 return (beio); 341 } 342 343 static void 344 ctl_real_free_beio(struct ctl_be_block_io *beio) 345 { 346 struct ctl_be_block_softc *softc = beio->softc; 347 int i; 348 349 for (i = 0; i < beio->num_segs; i++) { 350 ctl_free_seg(softc, &beio->sg_segs[i]); 351 352 /* For compare we had two equal S/G lists. */ 353 if (beio->two_sglists) { 354 ctl_free_seg(softc, 355 &beio->sg_segs[i + CTLBLK_HALF_SEGS]); 356 } 357 } 358 359 uma_zfree(softc->beio_zone, beio); 360 } 361 362 static void 363 ctl_refcnt_beio(void *arg, int diff) 364 { 365 struct ctl_be_block_io *beio = arg; 366 367 if (atomic_fetchadd_int(&beio->refcnt, diff) + diff == 0) 368 ctl_real_free_beio(beio); 369 } 370 371 static void 372 ctl_free_beio(struct ctl_be_block_io *beio) 373 { 374 375 ctl_refcnt_beio(beio, -1); 376 } 377 378 static void 379 ctl_complete_beio(struct ctl_be_block_io *beio) 380 { 381 union ctl_io *io = beio->io; 382 383 if (beio->beio_cont != NULL) { 384 beio->beio_cont(beio); 385 } else { 386 ctl_free_beio(beio); 387 ctl_data_submit_done(io); 388 } 389 } 390 391 static size_t 392 cmp(uint8_t *a, uint8_t *b, size_t size) 393 { 394 size_t i; 395 396 for (i = 0; i < size; i++) { 397 if (a[i] != b[i]) 398 break; 399 } 400 return (i); 401 } 402 403 static void 404 ctl_be_block_compare(union ctl_io *io) 405 { 406 struct ctl_be_block_io *beio; 407 uint64_t off, res; 408 int i; 409 uint8_t info[8]; 410 411 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 412 off = 0; 413 for (i = 0; i < beio->num_segs; i++) { 414 res = cmp(beio->sg_segs[i].addr, 415 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr, 416 beio->sg_segs[i].len); 417 off += res; 418 if (res < beio->sg_segs[i].len) 419 break; 420 } 421 if (i < beio->num_segs) { 422 scsi_u64to8b(off, info); 423 ctl_set_sense(&io->scsiio, /*current_error*/ 1, 424 /*sense_key*/ SSD_KEY_MISCOMPARE, 425 /*asc*/ 0x1D, /*ascq*/ 0x00, 426 /*type*/ SSD_ELEM_INFO, 427 /*size*/ sizeof(info), /*data*/ &info, 428 /*type*/ SSD_ELEM_NONE); 429 } else 430 ctl_set_success(&io->scsiio); 431 } 432 433 static int 434 ctl_be_block_move_done(union ctl_io *io, bool samethr) 435 { 436 struct ctl_be_block_io *beio; 437 struct ctl_be_block_lun *be_lun; 438 struct ctl_lba_len_flags *lbalen; 439 440 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 441 442 DPRINTF("entered\n"); 443 io->scsiio.kern_rel_offset += io->scsiio.kern_data_len; 444 445 /* 446 * We set status at this point for read and compare commands. 447 */ 448 if ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0 && 449 (io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE) { 450 lbalen = ARGS(io); 451 if (lbalen->flags & CTL_LLF_READ) { 452 ctl_set_success(&io->scsiio); 453 } else if (lbalen->flags & CTL_LLF_COMPARE) { 454 /* We have two data blocks ready for comparison. */ 455 ctl_be_block_compare(io); 456 } 457 } 458 459 /* 460 * If this is a read, or a write with errors, it is done. 461 */ 462 if ((beio->bio_cmd == BIO_READ) 463 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0) 464 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) { 465 ctl_complete_beio(beio); 466 return (0); 467 } 468 469 /* 470 * At this point, we have a write and the DMA completed successfully. 471 * If we were called synchronously in the original thread then just 472 * dispatch, otherwise we now have to queue it to the task queue to 473 * execute the backend I/O. That is because we do blocking 474 * memory allocations, and in the file backing case, blocking I/O. 475 * This move done routine is generally called in the SIM's 476 * interrupt context, and therefore we cannot block. 477 */ 478 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 479 if (samethr) { 480 be_lun->dispatch(be_lun, beio); 481 } else { 482 mtx_lock(&be_lun->queue_lock); 483 STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links); 484 mtx_unlock(&be_lun->queue_lock); 485 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 486 } 487 return (0); 488 } 489 490 static void 491 ctl_be_block_biodone(struct bio *bio) 492 { 493 struct ctl_be_block_io *beio = bio->bio_caller1; 494 struct ctl_be_block_lun *be_lun = beio->lun; 495 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 496 union ctl_io *io; 497 int error; 498 499 io = beio->io; 500 501 DPRINTF("entered\n"); 502 503 error = bio->bio_error; 504 mtx_lock(&be_lun->io_lock); 505 if (error != 0 && 506 (beio->first_error == 0 || 507 bio->bio_offset < beio->first_error_offset)) { 508 beio->first_error = error; 509 beio->first_error_offset = bio->bio_offset; 510 } 511 512 beio->num_bios_done++; 513 514 /* 515 * XXX KDM will this cause WITNESS to complain? Holding a lock 516 * during the free might cause it to complain. 517 */ 518 g_destroy_bio(bio); 519 520 /* 521 * If the send complete bit isn't set, or we aren't the last I/O to 522 * complete, then we're done. 523 */ 524 if ((beio->send_complete == 0) 525 || (beio->num_bios_done < beio->num_bios_sent)) { 526 mtx_unlock(&be_lun->io_lock); 527 return; 528 } 529 530 /* 531 * At this point, we've verified that we are the last I/O to 532 * complete, so it's safe to drop the lock. 533 */ 534 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 535 beio->ds_tag_type, beio->ds_trans_type, 536 /*now*/ NULL, /*then*/&beio->ds_t0); 537 mtx_unlock(&be_lun->io_lock); 538 539 /* 540 * If there are any errors from the backing device, we fail the 541 * entire I/O with a medium error. 542 */ 543 error = beio->first_error; 544 if (error != 0) { 545 if (error == EOPNOTSUPP) { 546 ctl_set_invalid_opcode(&io->scsiio); 547 } else if (error == ENOSPC || error == EDQUOT) { 548 ctl_set_space_alloc_fail(&io->scsiio); 549 } else if (error == EROFS || error == EACCES) { 550 ctl_set_hw_write_protected(&io->scsiio); 551 } else if (beio->bio_cmd == BIO_FLUSH) { 552 /* XXX KDM is there is a better error here? */ 553 ctl_set_internal_failure(&io->scsiio, 554 /*sks_valid*/ 1, 555 /*retry_count*/ 0xbad2); 556 } else { 557 ctl_set_medium_error(&io->scsiio, 558 beio->bio_cmd == BIO_READ); 559 } 560 ctl_complete_beio(beio); 561 return; 562 } 563 564 /* 565 * If this is a write, a flush, a delete or verify, we're all done. 566 * If this is a read, we can now send the data to the user. 567 */ 568 if ((beio->bio_cmd == BIO_WRITE) 569 || (beio->bio_cmd == BIO_FLUSH) 570 || (beio->bio_cmd == BIO_DELETE) 571 || (ARGS(io)->flags & CTL_LLF_VERIFY)) { 572 ctl_set_success(&io->scsiio); 573 ctl_complete_beio(beio); 574 } else { 575 if ((ARGS(io)->flags & CTL_LLF_READ) && 576 beio->beio_cont == NULL) { 577 ctl_set_success(&io->scsiio); 578 if (cbe_lun->serseq >= CTL_LUN_SERSEQ_SOFT) 579 ctl_serseq_done(io); 580 } 581 ctl_datamove(io); 582 } 583 } 584 585 static void 586 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 587 struct ctl_be_block_io *beio) 588 { 589 union ctl_io *io = beio->io; 590 struct mount *mountpoint; 591 int error, lock_flags; 592 593 DPRINTF("entered\n"); 594 595 binuptime(&beio->ds_t0); 596 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 597 598 (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 599 600 if (MNT_SHARED_WRITES(mountpoint) || 601 ((mountpoint == NULL) && MNT_SHARED_WRITES(be_lun->vn->v_mount))) 602 lock_flags = LK_SHARED; 603 else 604 lock_flags = LK_EXCLUSIVE; 605 vn_lock(be_lun->vn, lock_flags | LK_RETRY); 606 error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT, 607 curthread); 608 VOP_UNLOCK(be_lun->vn); 609 610 vn_finished_write(mountpoint); 611 612 mtx_lock(&be_lun->io_lock); 613 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 614 beio->ds_tag_type, beio->ds_trans_type, 615 /*now*/ NULL, /*then*/&beio->ds_t0); 616 mtx_unlock(&be_lun->io_lock); 617 618 if (error == 0) 619 ctl_set_success(&io->scsiio); 620 else { 621 /* XXX KDM is there is a better error here? */ 622 ctl_set_internal_failure(&io->scsiio, 623 /*sks_valid*/ 1, 624 /*retry_count*/ 0xbad1); 625 } 626 627 ctl_complete_beio(beio); 628 } 629 630 SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t"); 631 SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t"); 632 SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t"); 633 SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t"); 634 635 static void 636 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 637 struct ctl_be_block_io *beio) 638 { 639 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 640 struct ctl_be_block_filedata *file_data; 641 union ctl_io *io; 642 struct uio xuio; 643 struct iovec *xiovec; 644 size_t s; 645 int error, flags, i; 646 647 DPRINTF("entered\n"); 648 649 file_data = &be_lun->backend.file; 650 io = beio->io; 651 flags = 0; 652 if (ARGS(io)->flags & CTL_LLF_DPO) 653 flags |= IO_DIRECT; 654 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 655 flags |= IO_SYNC; 656 657 bzero(&xuio, sizeof(xuio)); 658 if (beio->bio_cmd == BIO_READ) { 659 SDT_PROBE0(cbb, , read, file_start); 660 xuio.uio_rw = UIO_READ; 661 } else { 662 SDT_PROBE0(cbb, , write, file_start); 663 xuio.uio_rw = UIO_WRITE; 664 } 665 xuio.uio_offset = beio->io_offset; 666 xuio.uio_resid = beio->io_len; 667 xuio.uio_segflg = UIO_SYSSPACE; 668 xuio.uio_iov = beio->xiovecs; 669 xuio.uio_iovcnt = beio->num_segs; 670 xuio.uio_td = curthread; 671 672 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 673 xiovec->iov_base = beio->sg_segs[i].addr; 674 xiovec->iov_len = beio->sg_segs[i].len; 675 } 676 677 binuptime(&beio->ds_t0); 678 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 679 680 if (beio->bio_cmd == BIO_READ) { 681 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 682 683 if (beio->beio_cont == NULL && 684 cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT) 685 ctl_serseq_done(io); 686 /* 687 * UFS pays attention to IO_DIRECT for reads. If the 688 * DIRECTIO option is configured into the kernel, it calls 689 * ffs_rawread(). But that only works for single-segment 690 * uios with user space addresses. In our case, with a 691 * kernel uio, it still reads into the buffer cache, but it 692 * will just try to release the buffer from the cache later 693 * on in ffs_read(). 694 * 695 * ZFS does not pay attention to IO_DIRECT for reads. 696 * 697 * UFS does not pay attention to IO_SYNC for reads. 698 * 699 * ZFS pays attention to IO_SYNC (which translates into the 700 * Solaris define FRSYNC for zfs_read()) for reads. It 701 * attempts to sync the file before reading. 702 */ 703 error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred); 704 705 VOP_UNLOCK(be_lun->vn); 706 SDT_PROBE0(cbb, , read, file_done); 707 if (error == 0 && xuio.uio_resid > 0) { 708 /* 709 * If we red less then requested (EOF), then 710 * we should clean the rest of the buffer. 711 */ 712 s = beio->io_len - xuio.uio_resid; 713 for (i = 0; i < beio->num_segs; i++) { 714 if (s >= beio->sg_segs[i].len) { 715 s -= beio->sg_segs[i].len; 716 continue; 717 } 718 bzero((uint8_t *)beio->sg_segs[i].addr + s, 719 beio->sg_segs[i].len - s); 720 s = 0; 721 } 722 } 723 } else { 724 struct mount *mountpoint; 725 int lock_flags; 726 727 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 728 729 if (MNT_SHARED_WRITES(mountpoint) || ((mountpoint == NULL) 730 && MNT_SHARED_WRITES(be_lun->vn->v_mount))) 731 lock_flags = LK_SHARED; 732 else 733 lock_flags = LK_EXCLUSIVE; 734 vn_lock(be_lun->vn, lock_flags | LK_RETRY); 735 736 /* 737 * UFS pays attention to IO_DIRECT for writes. The write 738 * is done asynchronously. (Normally the write would just 739 * get put into cache. 740 * 741 * UFS pays attention to IO_SYNC for writes. It will 742 * attempt to write the buffer out synchronously if that 743 * flag is set. 744 * 745 * ZFS does not pay attention to IO_DIRECT for writes. 746 * 747 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) 748 * for writes. It will flush the transaction from the 749 * cache before returning. 750 */ 751 error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred); 752 VOP_UNLOCK(be_lun->vn); 753 754 vn_finished_write(mountpoint); 755 SDT_PROBE0(cbb, , write, file_done); 756 } 757 758 mtx_lock(&be_lun->io_lock); 759 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 760 beio->ds_tag_type, beio->ds_trans_type, 761 /*now*/ NULL, /*then*/&beio->ds_t0); 762 mtx_unlock(&be_lun->io_lock); 763 764 /* 765 * If we got an error, set the sense data to "MEDIUM ERROR" and 766 * return the I/O to the user. 767 */ 768 if (error != 0) { 769 if (error == ENOSPC || error == EDQUOT) { 770 ctl_set_space_alloc_fail(&io->scsiio); 771 } else if (error == EROFS || error == EACCES) { 772 ctl_set_hw_write_protected(&io->scsiio); 773 } else { 774 ctl_set_medium_error(&io->scsiio, 775 beio->bio_cmd == BIO_READ); 776 } 777 ctl_complete_beio(beio); 778 return; 779 } 780 781 /* 782 * If this is a write or a verify, we're all done. 783 * If this is a read, we can now send the data to the user. 784 */ 785 if ((beio->bio_cmd == BIO_WRITE) || 786 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 787 ctl_set_success(&io->scsiio); 788 ctl_complete_beio(beio); 789 } else { 790 if ((ARGS(io)->flags & CTL_LLF_READ) && 791 beio->beio_cont == NULL) { 792 ctl_set_success(&io->scsiio); 793 if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT) 794 ctl_serseq_done(io); 795 } 796 ctl_datamove(io); 797 } 798 } 799 800 static void 801 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 802 struct ctl_be_block_io *beio) 803 { 804 union ctl_io *io = beio->io; 805 struct ctl_lba_len_flags *lbalen = ARGS(io); 806 struct scsi_get_lba_status_data *data; 807 off_t roff, off; 808 int error, status; 809 810 DPRINTF("entered\n"); 811 812 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 813 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 814 error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off, 815 0, curthread->td_ucred, curthread); 816 if (error == 0 && off > roff) 817 status = 0; /* mapped up to off */ 818 else { 819 error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off, 820 0, curthread->td_ucred, curthread); 821 if (error == 0 && off > roff) 822 status = 1; /* deallocated up to off */ 823 else { 824 status = 0; /* unknown up to the end */ 825 off = be_lun->size_bytes; 826 } 827 } 828 VOP_UNLOCK(be_lun->vn); 829 830 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 831 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 832 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 833 lbalen->lba), data->descr[0].length); 834 data->descr[0].status = status; 835 836 ctl_complete_beio(beio); 837 } 838 839 static uint64_t 840 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname) 841 { 842 struct vattr vattr; 843 struct statfs statfs; 844 uint64_t val; 845 int error; 846 847 val = UINT64_MAX; 848 if (be_lun->vn == NULL) 849 return (val); 850 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 851 if (strcmp(attrname, "blocksused") == 0) { 852 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 853 if (error == 0) 854 val = vattr.va_bytes / be_lun->cbe_lun.blocksize; 855 } 856 if (strcmp(attrname, "blocksavail") == 0 && 857 !VN_IS_DOOMED(be_lun->vn)) { 858 error = VFS_STATFS(be_lun->vn->v_mount, &statfs); 859 if (error == 0) 860 val = statfs.f_bavail * statfs.f_bsize / 861 be_lun->cbe_lun.blocksize; 862 } 863 VOP_UNLOCK(be_lun->vn); 864 return (val); 865 } 866 867 static void 868 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun, 869 struct ctl_be_block_io *beio) 870 { 871 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 872 union ctl_io *io; 873 struct cdevsw *csw; 874 struct cdev *dev; 875 struct uio xuio; 876 struct iovec *xiovec; 877 int error, flags, i, ref; 878 879 DPRINTF("entered\n"); 880 881 io = beio->io; 882 flags = 0; 883 if (ARGS(io)->flags & CTL_LLF_DPO) 884 flags |= IO_DIRECT; 885 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 886 flags |= IO_SYNC; 887 888 bzero(&xuio, sizeof(xuio)); 889 if (beio->bio_cmd == BIO_READ) { 890 SDT_PROBE0(cbb, , read, file_start); 891 xuio.uio_rw = UIO_READ; 892 } else { 893 SDT_PROBE0(cbb, , write, file_start); 894 xuio.uio_rw = UIO_WRITE; 895 } 896 xuio.uio_offset = beio->io_offset; 897 xuio.uio_resid = beio->io_len; 898 xuio.uio_segflg = UIO_SYSSPACE; 899 xuio.uio_iov = beio->xiovecs; 900 xuio.uio_iovcnt = beio->num_segs; 901 xuio.uio_td = curthread; 902 903 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 904 xiovec->iov_base = beio->sg_segs[i].addr; 905 xiovec->iov_len = beio->sg_segs[i].len; 906 } 907 908 binuptime(&beio->ds_t0); 909 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 910 911 csw = devvn_refthread(be_lun->vn, &dev, &ref); 912 if (csw) { 913 if (beio->bio_cmd == BIO_READ) { 914 if (beio->beio_cont == NULL && 915 cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT) 916 ctl_serseq_done(io); 917 error = csw->d_read(dev, &xuio, flags); 918 } else 919 error = csw->d_write(dev, &xuio, flags); 920 dev_relthread(dev, ref); 921 } else 922 error = ENXIO; 923 924 if (beio->bio_cmd == BIO_READ) 925 SDT_PROBE0(cbb, , read, file_done); 926 else 927 SDT_PROBE0(cbb, , write, file_done); 928 929 mtx_lock(&be_lun->io_lock); 930 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 931 beio->ds_tag_type, beio->ds_trans_type, 932 /*now*/ NULL, /*then*/&beio->ds_t0); 933 mtx_unlock(&be_lun->io_lock); 934 935 /* 936 * If we got an error, set the sense data to "MEDIUM ERROR" and 937 * return the I/O to the user. 938 */ 939 if (error != 0) { 940 if (error == ENOSPC || error == EDQUOT) { 941 ctl_set_space_alloc_fail(&io->scsiio); 942 } else if (error == EROFS || error == EACCES) { 943 ctl_set_hw_write_protected(&io->scsiio); 944 } else { 945 ctl_set_medium_error(&io->scsiio, 946 beio->bio_cmd == BIO_READ); 947 } 948 ctl_complete_beio(beio); 949 return; 950 } 951 952 /* 953 * If this is a write or a verify, we're all done. 954 * If this is a read, we can now send the data to the user. 955 */ 956 if ((beio->bio_cmd == BIO_WRITE) || 957 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 958 ctl_set_success(&io->scsiio); 959 ctl_complete_beio(beio); 960 } else { 961 if ((ARGS(io)->flags & CTL_LLF_READ) && 962 beio->beio_cont == NULL) { 963 ctl_set_success(&io->scsiio); 964 if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT) 965 ctl_serseq_done(io); 966 } 967 ctl_datamove(io); 968 } 969 } 970 971 static void 972 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun, 973 struct ctl_be_block_io *beio) 974 { 975 union ctl_io *io = beio->io; 976 struct cdevsw *csw; 977 struct cdev *dev; 978 struct ctl_lba_len_flags *lbalen = ARGS(io); 979 struct scsi_get_lba_status_data *data; 980 off_t roff, off; 981 int error, ref, status; 982 983 DPRINTF("entered\n"); 984 985 csw = devvn_refthread(be_lun->vn, &dev, &ref); 986 if (csw == NULL) { 987 status = 0; /* unknown up to the end */ 988 off = be_lun->size_bytes; 989 goto done; 990 } 991 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 992 error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD, 993 curthread); 994 if (error == 0 && off > roff) 995 status = 0; /* mapped up to off */ 996 else { 997 error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD, 998 curthread); 999 if (error == 0 && off > roff) 1000 status = 1; /* deallocated up to off */ 1001 else { 1002 status = 0; /* unknown up to the end */ 1003 off = be_lun->size_bytes; 1004 } 1005 } 1006 dev_relthread(dev, ref); 1007 1008 done: 1009 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 1010 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 1011 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 1012 lbalen->lba), data->descr[0].length); 1013 data->descr[0].status = status; 1014 1015 ctl_complete_beio(beio); 1016 } 1017 1018 static void 1019 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 1020 struct ctl_be_block_io *beio) 1021 { 1022 struct bio *bio; 1023 struct cdevsw *csw; 1024 struct cdev *dev; 1025 int ref; 1026 1027 DPRINTF("entered\n"); 1028 1029 /* This can't fail, it's a blocking allocation. */ 1030 bio = g_alloc_bio(); 1031 1032 bio->bio_cmd = BIO_FLUSH; 1033 bio->bio_offset = 0; 1034 bio->bio_data = 0; 1035 bio->bio_done = ctl_be_block_biodone; 1036 bio->bio_caller1 = beio; 1037 bio->bio_pblkno = 0; 1038 1039 /* 1040 * We don't need to acquire the LUN lock here, because we are only 1041 * sending one bio, and so there is no other context to synchronize 1042 * with. 1043 */ 1044 beio->num_bios_sent = 1; 1045 beio->send_complete = 1; 1046 1047 binuptime(&beio->ds_t0); 1048 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1049 1050 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1051 if (csw) { 1052 bio->bio_dev = dev; 1053 csw->d_strategy(bio); 1054 dev_relthread(dev, ref); 1055 } else { 1056 bio->bio_error = ENXIO; 1057 ctl_be_block_biodone(bio); 1058 } 1059 } 1060 1061 static void 1062 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun, 1063 struct ctl_be_block_io *beio, 1064 uint64_t off, uint64_t len, int last) 1065 { 1066 struct bio *bio; 1067 uint64_t maxlen; 1068 struct cdevsw *csw; 1069 struct cdev *dev; 1070 int ref; 1071 1072 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1073 maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize); 1074 while (len > 0) { 1075 bio = g_alloc_bio(); 1076 bio->bio_cmd = BIO_DELETE; 1077 bio->bio_dev = dev; 1078 bio->bio_offset = off; 1079 bio->bio_length = MIN(len, maxlen); 1080 bio->bio_data = 0; 1081 bio->bio_done = ctl_be_block_biodone; 1082 bio->bio_caller1 = beio; 1083 bio->bio_pblkno = off / be_lun->cbe_lun.blocksize; 1084 1085 off += bio->bio_length; 1086 len -= bio->bio_length; 1087 1088 mtx_lock(&be_lun->io_lock); 1089 beio->num_bios_sent++; 1090 if (last && len == 0) 1091 beio->send_complete = 1; 1092 mtx_unlock(&be_lun->io_lock); 1093 1094 if (csw) { 1095 csw->d_strategy(bio); 1096 } else { 1097 bio->bio_error = ENXIO; 1098 ctl_be_block_biodone(bio); 1099 } 1100 } 1101 if (csw) 1102 dev_relthread(dev, ref); 1103 } 1104 1105 static void 1106 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 1107 struct ctl_be_block_io *beio) 1108 { 1109 union ctl_io *io; 1110 struct ctl_ptr_len_flags *ptrlen; 1111 struct scsi_unmap_desc *buf, *end; 1112 uint64_t len; 1113 1114 io = beio->io; 1115 1116 DPRINTF("entered\n"); 1117 1118 binuptime(&beio->ds_t0); 1119 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1120 1121 if (beio->io_offset == -1) { 1122 beio->io_len = 0; 1123 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1124 buf = (struct scsi_unmap_desc *)ptrlen->ptr; 1125 end = buf + ptrlen->len / sizeof(*buf); 1126 for (; buf < end; buf++) { 1127 len = (uint64_t)scsi_4btoul(buf->length) * 1128 be_lun->cbe_lun.blocksize; 1129 beio->io_len += len; 1130 ctl_be_block_unmap_dev_range(be_lun, beio, 1131 scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize, 1132 len, (end - buf < 2) ? TRUE : FALSE); 1133 } 1134 } else 1135 ctl_be_block_unmap_dev_range(be_lun, beio, 1136 beio->io_offset, beio->io_len, TRUE); 1137 } 1138 1139 static void 1140 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 1141 struct ctl_be_block_io *beio) 1142 { 1143 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 1144 struct bio *bio; 1145 struct cdevsw *csw; 1146 struct cdev *dev; 1147 off_t cur_offset; 1148 int i, max_iosize, ref; 1149 1150 DPRINTF("entered\n"); 1151 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1152 1153 /* 1154 * We have to limit our I/O size to the maximum supported by the 1155 * backend device. 1156 */ 1157 if (csw) { 1158 max_iosize = dev->si_iosize_max; 1159 if (max_iosize < PAGE_SIZE) 1160 max_iosize = DFLTPHYS; 1161 } else 1162 max_iosize = DFLTPHYS; 1163 1164 cur_offset = beio->io_offset; 1165 for (i = 0; i < beio->num_segs; i++) { 1166 size_t cur_size; 1167 uint8_t *cur_ptr; 1168 1169 cur_size = beio->sg_segs[i].len; 1170 cur_ptr = beio->sg_segs[i].addr; 1171 1172 while (cur_size > 0) { 1173 /* This can't fail, it's a blocking allocation. */ 1174 bio = g_alloc_bio(); 1175 1176 KASSERT(bio != NULL, ("g_alloc_bio() failed!\n")); 1177 1178 bio->bio_cmd = beio->bio_cmd; 1179 bio->bio_dev = dev; 1180 bio->bio_caller1 = beio; 1181 bio->bio_length = min(cur_size, max_iosize); 1182 bio->bio_offset = cur_offset; 1183 bio->bio_data = cur_ptr; 1184 bio->bio_done = ctl_be_block_biodone; 1185 bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize; 1186 1187 cur_offset += bio->bio_length; 1188 cur_ptr += bio->bio_length; 1189 cur_size -= bio->bio_length; 1190 1191 TAILQ_INSERT_TAIL(&queue, bio, bio_queue); 1192 beio->num_bios_sent++; 1193 } 1194 } 1195 beio->send_complete = 1; 1196 binuptime(&beio->ds_t0); 1197 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1198 1199 /* 1200 * Fire off all allocated requests! 1201 */ 1202 while ((bio = TAILQ_FIRST(&queue)) != NULL) { 1203 TAILQ_REMOVE(&queue, bio, bio_queue); 1204 if (csw) 1205 csw->d_strategy(bio); 1206 else { 1207 bio->bio_error = ENXIO; 1208 ctl_be_block_biodone(bio); 1209 } 1210 } 1211 if (csw) 1212 dev_relthread(dev, ref); 1213 } 1214 1215 static uint64_t 1216 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname) 1217 { 1218 struct diocgattr_arg arg; 1219 struct cdevsw *csw; 1220 struct cdev *dev; 1221 int error, ref; 1222 1223 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1224 if (csw == NULL) 1225 return (UINT64_MAX); 1226 strlcpy(arg.name, attrname, sizeof(arg.name)); 1227 arg.len = sizeof(arg.value.off); 1228 if (csw->d_ioctl) { 1229 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 1230 curthread); 1231 } else 1232 error = ENODEV; 1233 dev_relthread(dev, ref); 1234 if (error != 0) 1235 return (UINT64_MAX); 1236 return (arg.value.off); 1237 } 1238 1239 static void 1240 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun, 1241 union ctl_io *io) 1242 { 1243 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1244 struct ctl_be_block_io *beio; 1245 struct ctl_lba_len_flags *lbalen; 1246 1247 DPRINTF("entered\n"); 1248 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1249 lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1250 1251 beio->io_len = lbalen->len * cbe_lun->blocksize; 1252 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1253 beio->io_arg = (lbalen->flags & SSC_IMMED) != 0; 1254 beio->bio_cmd = BIO_FLUSH; 1255 beio->ds_trans_type = DEVSTAT_NO_DATA; 1256 DPRINTF("SYNC\n"); 1257 be_lun->lun_flush(be_lun, beio); 1258 } 1259 1260 static void 1261 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio) 1262 { 1263 union ctl_io *io; 1264 1265 io = beio->io; 1266 ctl_free_beio(beio); 1267 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1268 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1269 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1270 ctl_config_write_done(io); 1271 return; 1272 } 1273 1274 ctl_be_block_config_write(io); 1275 } 1276 1277 static void 1278 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun, 1279 union ctl_io *io) 1280 { 1281 struct ctl_be_block_softc *softc = be_lun->softc; 1282 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1283 struct ctl_be_block_io *beio; 1284 struct ctl_lba_len_flags *lbalen; 1285 uint64_t len_left, lba; 1286 uint32_t pb, pbo, adj; 1287 int i, seglen; 1288 uint8_t *buf, *end; 1289 1290 DPRINTF("entered\n"); 1291 1292 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1293 lbalen = ARGS(io); 1294 1295 if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) || 1296 (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) { 1297 ctl_free_beio(beio); 1298 ctl_set_invalid_field(&io->scsiio, 1299 /*sks_valid*/ 1, 1300 /*command*/ 1, 1301 /*field*/ 1, 1302 /*bit_valid*/ 0, 1303 /*bit*/ 0); 1304 ctl_config_write_done(io); 1305 return; 1306 } 1307 1308 if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) { 1309 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1310 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize; 1311 beio->bio_cmd = BIO_DELETE; 1312 beio->ds_trans_type = DEVSTAT_FREE; 1313 1314 be_lun->unmap(be_lun, beio); 1315 return; 1316 } 1317 1318 beio->bio_cmd = BIO_WRITE; 1319 beio->ds_trans_type = DEVSTAT_WRITE; 1320 1321 DPRINTF("WRITE SAME at LBA %jx len %u\n", 1322 (uintmax_t)lbalen->lba, lbalen->len); 1323 1324 pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp; 1325 if (be_lun->cbe_lun.pblockoff > 0) 1326 pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff; 1327 else 1328 pbo = 0; 1329 len_left = (uint64_t)lbalen->len * cbe_lun->blocksize; 1330 for (i = 0, lba = 0; i < CTLBLK_NUM_SEGS && len_left > 0; i++) { 1331 /* 1332 * Setup the S/G entry for this chunk. 1333 */ 1334 seglen = MIN(CTLBLK_MAX_SEG, len_left); 1335 if (pb > cbe_lun->blocksize) { 1336 adj = ((lbalen->lba + lba) * cbe_lun->blocksize + 1337 seglen - pbo) % pb; 1338 if (seglen > adj) 1339 seglen -= adj; 1340 else 1341 seglen -= seglen % cbe_lun->blocksize; 1342 } else 1343 seglen -= seglen % cbe_lun->blocksize; 1344 ctl_alloc_seg(softc, &beio->sg_segs[i], seglen); 1345 1346 DPRINTF("segment %d addr %p len %zd\n", i, 1347 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1348 1349 beio->num_segs++; 1350 len_left -= seglen; 1351 1352 buf = beio->sg_segs[i].addr; 1353 end = buf + seglen; 1354 for (; buf < end; buf += cbe_lun->blocksize) { 1355 if (lbalen->flags & SWS_NDOB) { 1356 memset(buf, 0, cbe_lun->blocksize); 1357 } else { 1358 memcpy(buf, io->scsiio.kern_data_ptr, 1359 cbe_lun->blocksize); 1360 } 1361 if (lbalen->flags & SWS_LBDATA) 1362 scsi_ulto4b(lbalen->lba + lba, buf); 1363 lba++; 1364 } 1365 } 1366 1367 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1368 beio->io_len = lba * cbe_lun->blocksize; 1369 1370 /* We can not do all in one run. Correct and schedule rerun. */ 1371 if (len_left > 0) { 1372 lbalen->lba += lba; 1373 lbalen->len -= lba; 1374 beio->beio_cont = ctl_be_block_cw_done_ws; 1375 } 1376 1377 be_lun->dispatch(be_lun, beio); 1378 } 1379 1380 static void 1381 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun, 1382 union ctl_io *io) 1383 { 1384 struct ctl_be_block_io *beio; 1385 struct ctl_ptr_len_flags *ptrlen; 1386 1387 DPRINTF("entered\n"); 1388 1389 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1390 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1391 1392 if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) { 1393 ctl_free_beio(beio); 1394 ctl_set_invalid_field(&io->scsiio, 1395 /*sks_valid*/ 0, 1396 /*command*/ 1, 1397 /*field*/ 0, 1398 /*bit_valid*/ 0, 1399 /*bit*/ 0); 1400 ctl_config_write_done(io); 1401 return; 1402 } 1403 1404 beio->io_len = 0; 1405 beio->io_offset = -1; 1406 beio->bio_cmd = BIO_DELETE; 1407 beio->ds_trans_type = DEVSTAT_FREE; 1408 DPRINTF("UNMAP\n"); 1409 be_lun->unmap(be_lun, beio); 1410 } 1411 1412 static void 1413 ctl_be_block_cr_done(struct ctl_be_block_io *beio) 1414 { 1415 union ctl_io *io; 1416 1417 io = beio->io; 1418 ctl_free_beio(beio); 1419 ctl_config_read_done(io); 1420 } 1421 1422 static void 1423 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 1424 union ctl_io *io) 1425 { 1426 struct ctl_be_block_io *beio; 1427 struct ctl_be_block_softc *softc; 1428 1429 DPRINTF("entered\n"); 1430 1431 softc = be_lun->softc; 1432 beio = ctl_alloc_beio(softc); 1433 beio->io = io; 1434 beio->lun = be_lun; 1435 beio->beio_cont = ctl_be_block_cr_done; 1436 PRIV(io)->ptr = (void *)beio; 1437 1438 switch (io->scsiio.cdb[0]) { 1439 case SERVICE_ACTION_IN: /* GET LBA STATUS */ 1440 beio->bio_cmd = -1; 1441 beio->ds_trans_type = DEVSTAT_NO_DATA; 1442 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1443 beio->io_len = 0; 1444 if (be_lun->get_lba_status) 1445 be_lun->get_lba_status(be_lun, beio); 1446 else 1447 ctl_be_block_cr_done(beio); 1448 break; 1449 default: 1450 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1451 break; 1452 } 1453 } 1454 1455 static void 1456 ctl_be_block_cw_done(struct ctl_be_block_io *beio) 1457 { 1458 union ctl_io *io; 1459 1460 io = beio->io; 1461 ctl_free_beio(beio); 1462 ctl_config_write_done(io); 1463 } 1464 1465 static void 1466 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 1467 union ctl_io *io) 1468 { 1469 struct ctl_be_block_io *beio; 1470 struct ctl_be_block_softc *softc; 1471 1472 DPRINTF("entered\n"); 1473 1474 softc = be_lun->softc; 1475 beio = ctl_alloc_beio(softc); 1476 beio->io = io; 1477 beio->lun = be_lun; 1478 beio->beio_cont = ctl_be_block_cw_done; 1479 switch (io->scsiio.tag_type) { 1480 case CTL_TAG_ORDERED: 1481 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1482 break; 1483 case CTL_TAG_HEAD_OF_QUEUE: 1484 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1485 break; 1486 case CTL_TAG_UNTAGGED: 1487 case CTL_TAG_SIMPLE: 1488 case CTL_TAG_ACA: 1489 default: 1490 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1491 break; 1492 } 1493 PRIV(io)->ptr = (void *)beio; 1494 1495 switch (io->scsiio.cdb[0]) { 1496 case SYNCHRONIZE_CACHE: 1497 case SYNCHRONIZE_CACHE_16: 1498 ctl_be_block_cw_dispatch_sync(be_lun, io); 1499 break; 1500 case WRITE_SAME_10: 1501 case WRITE_SAME_16: 1502 ctl_be_block_cw_dispatch_ws(be_lun, io); 1503 break; 1504 case UNMAP: 1505 ctl_be_block_cw_dispatch_unmap(be_lun, io); 1506 break; 1507 default: 1508 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1509 break; 1510 } 1511 } 1512 1513 SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t"); 1514 SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t"); 1515 SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t"); 1516 SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t"); 1517 1518 static void 1519 ctl_be_block_next(struct ctl_be_block_io *beio) 1520 { 1521 struct ctl_be_block_lun *be_lun; 1522 union ctl_io *io; 1523 1524 io = beio->io; 1525 be_lun = beio->lun; 1526 ctl_free_beio(beio); 1527 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1528 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1529 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1530 ctl_data_submit_done(io); 1531 return; 1532 } 1533 1534 io->io_hdr.status &= ~CTL_STATUS_MASK; 1535 io->io_hdr.status |= CTL_STATUS_NONE; 1536 1537 mtx_lock(&be_lun->queue_lock); 1538 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1539 mtx_unlock(&be_lun->queue_lock); 1540 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1541 } 1542 1543 static void 1544 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 1545 union ctl_io *io) 1546 { 1547 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1548 struct ctl_be_block_io *beio; 1549 struct ctl_be_block_softc *softc; 1550 struct ctl_lba_len_flags *lbalen; 1551 struct ctl_ptr_len_flags *bptrlen; 1552 uint64_t len_left, lbas; 1553 int i; 1554 1555 softc = be_lun->softc; 1556 1557 DPRINTF("entered\n"); 1558 1559 lbalen = ARGS(io); 1560 if (lbalen->flags & CTL_LLF_WRITE) { 1561 SDT_PROBE0(cbb, , write, start); 1562 } else { 1563 SDT_PROBE0(cbb, , read, start); 1564 } 1565 1566 beio = ctl_alloc_beio(softc); 1567 beio->io = io; 1568 beio->lun = be_lun; 1569 bptrlen = PRIV(io); 1570 bptrlen->ptr = (void *)beio; 1571 1572 switch (io->scsiio.tag_type) { 1573 case CTL_TAG_ORDERED: 1574 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1575 break; 1576 case CTL_TAG_HEAD_OF_QUEUE: 1577 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1578 break; 1579 case CTL_TAG_UNTAGGED: 1580 case CTL_TAG_SIMPLE: 1581 case CTL_TAG_ACA: 1582 default: 1583 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1584 break; 1585 } 1586 1587 if (lbalen->flags & CTL_LLF_WRITE) { 1588 beio->bio_cmd = BIO_WRITE; 1589 beio->ds_trans_type = DEVSTAT_WRITE; 1590 } else { 1591 beio->bio_cmd = BIO_READ; 1592 beio->ds_trans_type = DEVSTAT_READ; 1593 } 1594 1595 DPRINTF("%s at LBA %jx len %u @%ju\n", 1596 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", 1597 (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len); 1598 if (lbalen->flags & CTL_LLF_COMPARE) { 1599 beio->two_sglists = 1; 1600 lbas = CTLBLK_HALF_IO_SIZE; 1601 } else { 1602 lbas = CTLBLK_MAX_IO_SIZE; 1603 } 1604 lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize); 1605 beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize; 1606 beio->io_len = lbas * cbe_lun->blocksize; 1607 bptrlen->len += lbas; 1608 1609 for (i = 0, len_left = beio->io_len; len_left > 0; i++) { 1610 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)", 1611 i, CTLBLK_MAX_SEGS)); 1612 1613 /* 1614 * Setup the S/G entry for this chunk. 1615 */ 1616 ctl_alloc_seg(softc, &beio->sg_segs[i], 1617 MIN(CTLBLK_MAX_SEG, len_left)); 1618 1619 DPRINTF("segment %d addr %p len %zd\n", i, 1620 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1621 1622 /* Set up second segment for compare operation. */ 1623 if (beio->two_sglists) { 1624 ctl_alloc_seg(softc, 1625 &beio->sg_segs[i + CTLBLK_HALF_SEGS], 1626 beio->sg_segs[i].len); 1627 } 1628 1629 beio->num_segs++; 1630 len_left -= beio->sg_segs[i].len; 1631 } 1632 if (bptrlen->len < lbalen->len) 1633 beio->beio_cont = ctl_be_block_next; 1634 io->scsiio.be_move_done = ctl_be_block_move_done; 1635 /* For compare we have separate S/G lists for read and datamove. */ 1636 if (beio->two_sglists) 1637 io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS]; 1638 else 1639 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs; 1640 io->scsiio.kern_data_len = beio->io_len; 1641 io->scsiio.kern_sg_entries = beio->num_segs; 1642 io->scsiio.kern_data_ref = ctl_refcnt_beio; 1643 io->scsiio.kern_data_arg = beio; 1644 io->io_hdr.flags |= CTL_FLAG_ALLOCATED; 1645 1646 /* 1647 * For the read case, we need to read the data into our buffers and 1648 * then we can send it back to the user. For the write case, we 1649 * need to get the data from the user first. 1650 */ 1651 if (beio->bio_cmd == BIO_READ) { 1652 SDT_PROBE0(cbb, , read, alloc_done); 1653 be_lun->dispatch(be_lun, beio); 1654 } else { 1655 SDT_PROBE0(cbb, , write, alloc_done); 1656 ctl_datamove(io); 1657 } 1658 } 1659 1660 static void 1661 ctl_be_block_worker(void *context, int pending) 1662 { 1663 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context; 1664 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1665 union ctl_io *io; 1666 struct ctl_be_block_io *beio; 1667 1668 DPRINTF("entered\n"); 1669 /* 1670 * Fetch and process I/Os from all queues. If we detect LUN 1671 * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race, 1672 * so make response maximally opaque to not confuse initiator. 1673 */ 1674 for (;;) { 1675 mtx_lock(&be_lun->queue_lock); 1676 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue); 1677 if (io != NULL) { 1678 DPRINTF("datamove queue\n"); 1679 STAILQ_REMOVE_HEAD(&be_lun->datamove_queue, links); 1680 mtx_unlock(&be_lun->queue_lock); 1681 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1682 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1683 ctl_set_busy(&io->scsiio); 1684 ctl_complete_beio(beio); 1685 continue; 1686 } 1687 be_lun->dispatch(be_lun, beio); 1688 continue; 1689 } 1690 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue); 1691 if (io != NULL) { 1692 DPRINTF("config write queue\n"); 1693 STAILQ_REMOVE_HEAD(&be_lun->config_write_queue, links); 1694 mtx_unlock(&be_lun->queue_lock); 1695 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1696 ctl_set_busy(&io->scsiio); 1697 ctl_config_write_done(io); 1698 continue; 1699 } 1700 ctl_be_block_cw_dispatch(be_lun, io); 1701 continue; 1702 } 1703 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue); 1704 if (io != NULL) { 1705 DPRINTF("config read queue\n"); 1706 STAILQ_REMOVE_HEAD(&be_lun->config_read_queue, links); 1707 mtx_unlock(&be_lun->queue_lock); 1708 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1709 ctl_set_busy(&io->scsiio); 1710 ctl_config_read_done(io); 1711 continue; 1712 } 1713 ctl_be_block_cr_dispatch(be_lun, io); 1714 continue; 1715 } 1716 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue); 1717 if (io != NULL) { 1718 DPRINTF("input queue\n"); 1719 STAILQ_REMOVE_HEAD(&be_lun->input_queue, links); 1720 mtx_unlock(&be_lun->queue_lock); 1721 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1722 ctl_set_busy(&io->scsiio); 1723 ctl_data_submit_done(io); 1724 continue; 1725 } 1726 ctl_be_block_dispatch(be_lun, io); 1727 continue; 1728 } 1729 1730 /* 1731 * If we get here, there is no work left in the queues, so 1732 * just break out and let the task queue go to sleep. 1733 */ 1734 mtx_unlock(&be_lun->queue_lock); 1735 break; 1736 } 1737 } 1738 1739 /* 1740 * Entry point from CTL to the backend for I/O. We queue everything to a 1741 * work thread, so this just puts the I/O on a queue and wakes up the 1742 * thread. 1743 */ 1744 static int 1745 ctl_be_block_submit(union ctl_io *io) 1746 { 1747 struct ctl_be_block_lun *be_lun; 1748 1749 DPRINTF("entered\n"); 1750 1751 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 1752 1753 KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, 1754 ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type)); 1755 1756 PRIV(io)->len = 0; 1757 1758 mtx_lock(&be_lun->queue_lock); 1759 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1760 mtx_unlock(&be_lun->queue_lock); 1761 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1762 1763 return (CTL_RETVAL_COMPLETE); 1764 } 1765 1766 static int 1767 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 1768 int flag, struct thread *td) 1769 { 1770 struct ctl_be_block_softc *softc = &backend_block_softc; 1771 int error; 1772 1773 error = 0; 1774 switch (cmd) { 1775 case CTL_LUN_REQ: { 1776 struct ctl_lun_req *lun_req; 1777 1778 lun_req = (struct ctl_lun_req *)addr; 1779 1780 switch (lun_req->reqtype) { 1781 case CTL_LUNREQ_CREATE: 1782 error = ctl_be_block_create(softc, lun_req); 1783 break; 1784 case CTL_LUNREQ_RM: 1785 error = ctl_be_block_rm(softc, lun_req); 1786 break; 1787 case CTL_LUNREQ_MODIFY: 1788 error = ctl_be_block_modify(softc, lun_req); 1789 break; 1790 default: 1791 lun_req->status = CTL_LUN_ERROR; 1792 snprintf(lun_req->error_str, sizeof(lun_req->error_str), 1793 "invalid LUN request type %d", 1794 lun_req->reqtype); 1795 break; 1796 } 1797 break; 1798 } 1799 default: 1800 error = ENOTTY; 1801 break; 1802 } 1803 1804 return (error); 1805 } 1806 1807 static int 1808 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1809 { 1810 struct ctl_be_lun *cbe_lun; 1811 struct ctl_be_block_filedata *file_data; 1812 struct ctl_lun_create_params *params; 1813 const char *value; 1814 struct vattr vattr; 1815 off_t ps, pss, po, pos, us, uss, uo, uos; 1816 int error; 1817 1818 cbe_lun = &be_lun->cbe_lun; 1819 file_data = &be_lun->backend.file; 1820 params = &be_lun->params; 1821 1822 be_lun->dev_type = CTL_BE_BLOCK_FILE; 1823 be_lun->dispatch = ctl_be_block_dispatch_file; 1824 be_lun->lun_flush = ctl_be_block_flush_file; 1825 be_lun->get_lba_status = ctl_be_block_gls_file; 1826 be_lun->getattr = ctl_be_block_getattr_file; 1827 be_lun->unmap = NULL; 1828 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 1829 1830 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 1831 if (error != 0) { 1832 snprintf(req->error_str, sizeof(req->error_str), 1833 "error calling VOP_GETATTR() for file %s", 1834 be_lun->dev_path); 1835 return (error); 1836 } 1837 1838 file_data->cred = crhold(curthread->td_ucred); 1839 if (params->lun_size_bytes != 0) 1840 be_lun->size_bytes = params->lun_size_bytes; 1841 else 1842 be_lun->size_bytes = vattr.va_size; 1843 1844 /* 1845 * For files we can use any logical block size. Prefer 512 bytes 1846 * for compatibility reasons. If file's vattr.va_blocksize 1847 * (preferred I/O block size) is bigger and multiple to chosen 1848 * logical block size -- report it as physical block size. 1849 */ 1850 if (params->blocksize_bytes != 0) 1851 cbe_lun->blocksize = params->blocksize_bytes; 1852 else if (cbe_lun->lun_type == T_CDROM) 1853 cbe_lun->blocksize = 2048; 1854 else 1855 cbe_lun->blocksize = 512; 1856 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 1857 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 1858 0 : (be_lun->size_blocks - 1); 1859 1860 us = ps = vattr.va_blocksize; 1861 uo = po = 0; 1862 1863 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL); 1864 if (value != NULL) 1865 ctl_expand_number(value, &ps); 1866 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL); 1867 if (value != NULL) 1868 ctl_expand_number(value, &po); 1869 pss = ps / cbe_lun->blocksize; 1870 pos = po / cbe_lun->blocksize; 1871 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 1872 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 1873 cbe_lun->pblockexp = fls(pss) - 1; 1874 cbe_lun->pblockoff = (pss - pos) % pss; 1875 } 1876 1877 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL); 1878 if (value != NULL) 1879 ctl_expand_number(value, &us); 1880 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL); 1881 if (value != NULL) 1882 ctl_expand_number(value, &uo); 1883 uss = us / cbe_lun->blocksize; 1884 uos = uo / cbe_lun->blocksize; 1885 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 1886 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 1887 cbe_lun->ublockexp = fls(uss) - 1; 1888 cbe_lun->ublockoff = (uss - uos) % uss; 1889 } 1890 1891 /* 1892 * Sanity check. The media size has to be at least one 1893 * sector long. 1894 */ 1895 if (be_lun->size_bytes < cbe_lun->blocksize) { 1896 error = EINVAL; 1897 snprintf(req->error_str, sizeof(req->error_str), 1898 "file %s size %ju < block size %u", be_lun->dev_path, 1899 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize); 1900 } 1901 1902 cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize; 1903 return (error); 1904 } 1905 1906 static int 1907 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1908 { 1909 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1910 struct ctl_lun_create_params *params; 1911 struct cdevsw *csw; 1912 struct cdev *dev; 1913 const char *value; 1914 int error, atomic, maxio, ref, unmap, tmp; 1915 off_t ps, pss, po, pos, us, uss, uo, uos, otmp; 1916 1917 params = &be_lun->params; 1918 1919 be_lun->dev_type = CTL_BE_BLOCK_DEV; 1920 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1921 if (csw == NULL) 1922 return (ENXIO); 1923 if (strcmp(csw->d_name, "zvol") == 0) { 1924 be_lun->dispatch = ctl_be_block_dispatch_zvol; 1925 be_lun->get_lba_status = ctl_be_block_gls_zvol; 1926 atomic = maxio = CTLBLK_MAX_IO_SIZE; 1927 } else { 1928 be_lun->dispatch = ctl_be_block_dispatch_dev; 1929 be_lun->get_lba_status = NULL; 1930 atomic = 0; 1931 maxio = dev->si_iosize_max; 1932 if (maxio <= 0) 1933 maxio = DFLTPHYS; 1934 if (maxio > CTLBLK_MAX_SEG) 1935 maxio = CTLBLK_MAX_SEG; 1936 } 1937 be_lun->lun_flush = ctl_be_block_flush_dev; 1938 be_lun->getattr = ctl_be_block_getattr_dev; 1939 be_lun->unmap = ctl_be_block_unmap_dev; 1940 1941 if (!csw->d_ioctl) { 1942 dev_relthread(dev, ref); 1943 snprintf(req->error_str, sizeof(req->error_str), 1944 "no d_ioctl for device %s!", be_lun->dev_path); 1945 return (ENODEV); 1946 } 1947 1948 error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD, 1949 curthread); 1950 if (error) { 1951 dev_relthread(dev, ref); 1952 snprintf(req->error_str, sizeof(req->error_str), 1953 "error %d returned for DIOCGSECTORSIZE ioctl " 1954 "on %s!", error, be_lun->dev_path); 1955 return (error); 1956 } 1957 1958 /* 1959 * If the user has asked for a blocksize that is greater than the 1960 * backing device's blocksize, we can do it only if the blocksize 1961 * the user is asking for is an even multiple of the underlying 1962 * device's blocksize. 1963 */ 1964 if ((params->blocksize_bytes != 0) && 1965 (params->blocksize_bytes >= tmp)) { 1966 if (params->blocksize_bytes % tmp == 0) { 1967 cbe_lun->blocksize = params->blocksize_bytes; 1968 } else { 1969 dev_relthread(dev, ref); 1970 snprintf(req->error_str, sizeof(req->error_str), 1971 "requested blocksize %u is not an even " 1972 "multiple of backing device blocksize %u", 1973 params->blocksize_bytes, tmp); 1974 return (EINVAL); 1975 } 1976 } else if (params->blocksize_bytes != 0) { 1977 dev_relthread(dev, ref); 1978 snprintf(req->error_str, sizeof(req->error_str), 1979 "requested blocksize %u < backing device " 1980 "blocksize %u", params->blocksize_bytes, tmp); 1981 return (EINVAL); 1982 } else if (cbe_lun->lun_type == T_CDROM) 1983 cbe_lun->blocksize = MAX(tmp, 2048); 1984 else 1985 cbe_lun->blocksize = tmp; 1986 1987 error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD, 1988 curthread); 1989 if (error) { 1990 dev_relthread(dev, ref); 1991 snprintf(req->error_str, sizeof(req->error_str), 1992 "error %d returned for DIOCGMEDIASIZE " 1993 " ioctl on %s!", error, 1994 be_lun->dev_path); 1995 return (error); 1996 } 1997 1998 if (params->lun_size_bytes != 0) { 1999 if (params->lun_size_bytes > otmp) { 2000 dev_relthread(dev, ref); 2001 snprintf(req->error_str, sizeof(req->error_str), 2002 "requested LUN size %ju > backing device " 2003 "size %ju", 2004 (uintmax_t)params->lun_size_bytes, 2005 (uintmax_t)otmp); 2006 return (EINVAL); 2007 } 2008 2009 be_lun->size_bytes = params->lun_size_bytes; 2010 } else 2011 be_lun->size_bytes = otmp; 2012 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2013 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2014 0 : (be_lun->size_blocks - 1); 2015 2016 error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD, 2017 curthread); 2018 if (error) 2019 ps = po = 0; 2020 else { 2021 error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po, 2022 FREAD, curthread); 2023 if (error) 2024 po = 0; 2025 } 2026 us = ps; 2027 uo = po; 2028 2029 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL); 2030 if (value != NULL) 2031 ctl_expand_number(value, &ps); 2032 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL); 2033 if (value != NULL) 2034 ctl_expand_number(value, &po); 2035 pss = ps / cbe_lun->blocksize; 2036 pos = po / cbe_lun->blocksize; 2037 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 2038 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 2039 cbe_lun->pblockexp = fls(pss) - 1; 2040 cbe_lun->pblockoff = (pss - pos) % pss; 2041 } 2042 2043 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL); 2044 if (value != NULL) 2045 ctl_expand_number(value, &us); 2046 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL); 2047 if (value != NULL) 2048 ctl_expand_number(value, &uo); 2049 uss = us / cbe_lun->blocksize; 2050 uos = uo / cbe_lun->blocksize; 2051 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 2052 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 2053 cbe_lun->ublockexp = fls(uss) - 1; 2054 cbe_lun->ublockoff = (uss - uos) % uss; 2055 } 2056 2057 cbe_lun->atomicblock = atomic / cbe_lun->blocksize; 2058 cbe_lun->opttxferlen = maxio / cbe_lun->blocksize; 2059 2060 if (be_lun->dispatch == ctl_be_block_dispatch_zvol) { 2061 unmap = 1; 2062 } else { 2063 struct diocgattr_arg arg; 2064 2065 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); 2066 arg.len = sizeof(arg.value.i); 2067 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 2068 curthread); 2069 unmap = (error == 0) ? arg.value.i : 0; 2070 } 2071 value = dnvlist_get_string(cbe_lun->options, "unmap", NULL); 2072 if (value != NULL) 2073 unmap = (strcmp(value, "on") == 0); 2074 if (unmap) 2075 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; 2076 else 2077 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 2078 2079 dev_relthread(dev, ref); 2080 return (0); 2081 } 2082 2083 static int 2084 ctl_be_block_close(struct ctl_be_block_lun *be_lun) 2085 { 2086 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2087 int flags; 2088 2089 if (be_lun->vn) { 2090 flags = FREAD; 2091 if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0) 2092 flags |= FWRITE; 2093 (void)vn_close(be_lun->vn, flags, NOCRED, curthread); 2094 be_lun->vn = NULL; 2095 2096 switch (be_lun->dev_type) { 2097 case CTL_BE_BLOCK_DEV: 2098 break; 2099 case CTL_BE_BLOCK_FILE: 2100 if (be_lun->backend.file.cred != NULL) { 2101 crfree(be_lun->backend.file.cred); 2102 be_lun->backend.file.cred = NULL; 2103 } 2104 break; 2105 case CTL_BE_BLOCK_NONE: 2106 break; 2107 default: 2108 panic("Unexpected backend type %d", be_lun->dev_type); 2109 break; 2110 } 2111 be_lun->dev_type = CTL_BE_BLOCK_NONE; 2112 } 2113 return (0); 2114 } 2115 2116 static int 2117 ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 2118 { 2119 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2120 struct nameidata nd; 2121 const char *value; 2122 int error, flags; 2123 2124 error = 0; 2125 if (rootvnode == NULL) { 2126 snprintf(req->error_str, sizeof(req->error_str), 2127 "Root filesystem is not mounted"); 2128 return (1); 2129 } 2130 pwd_ensure_dirs(); 2131 2132 value = dnvlist_get_string(cbe_lun->options, "file", NULL); 2133 if (value == NULL) { 2134 snprintf(req->error_str, sizeof(req->error_str), 2135 "no file argument specified"); 2136 return (1); 2137 } 2138 free(be_lun->dev_path, M_CTLBLK); 2139 be_lun->dev_path = strdup(value, M_CTLBLK); 2140 2141 flags = FREAD; 2142 value = dnvlist_get_string(cbe_lun->options, "readonly", NULL); 2143 if (value != NULL) { 2144 if (strcmp(value, "on") != 0) 2145 flags |= FWRITE; 2146 } else if (cbe_lun->lun_type == T_DIRECT) 2147 flags |= FWRITE; 2148 2149 again: 2150 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread); 2151 error = vn_open(&nd, &flags, 0, NULL); 2152 if ((error == EROFS || error == EACCES) && (flags & FWRITE)) { 2153 flags &= ~FWRITE; 2154 goto again; 2155 } 2156 if (error) { 2157 /* 2158 * This is the only reasonable guess we can make as far as 2159 * path if the user doesn't give us a fully qualified path. 2160 * If they want to specify a file, they need to specify the 2161 * full path. 2162 */ 2163 if (be_lun->dev_path[0] != '/') { 2164 char *dev_name; 2165 2166 asprintf(&dev_name, M_CTLBLK, "/dev/%s", 2167 be_lun->dev_path); 2168 free(be_lun->dev_path, M_CTLBLK); 2169 be_lun->dev_path = dev_name; 2170 goto again; 2171 } 2172 snprintf(req->error_str, sizeof(req->error_str), 2173 "error opening %s: %d", be_lun->dev_path, error); 2174 return (error); 2175 } 2176 if (flags & FWRITE) 2177 cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY; 2178 else 2179 cbe_lun->flags |= CTL_LUN_FLAG_READONLY; 2180 2181 NDFREE(&nd, NDF_ONLY_PNBUF); 2182 be_lun->vn = nd.ni_vp; 2183 2184 /* We only support disks and files. */ 2185 if (vn_isdisk_error(be_lun->vn, &error)) { 2186 error = ctl_be_block_open_dev(be_lun, req); 2187 } else if (be_lun->vn->v_type == VREG) { 2188 error = ctl_be_block_open_file(be_lun, req); 2189 } else { 2190 error = EINVAL; 2191 snprintf(req->error_str, sizeof(req->error_str), 2192 "%s is not a disk or plain file", be_lun->dev_path); 2193 } 2194 VOP_UNLOCK(be_lun->vn); 2195 2196 if (error != 0) 2197 ctl_be_block_close(be_lun); 2198 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2199 if (be_lun->dispatch != ctl_be_block_dispatch_dev) 2200 cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT; 2201 value = dnvlist_get_string(cbe_lun->options, "serseq", NULL); 2202 if (value != NULL && strcmp(value, "on") == 0) 2203 cbe_lun->serseq = CTL_LUN_SERSEQ_ON; 2204 else if (value != NULL && strcmp(value, "read") == 0) 2205 cbe_lun->serseq = CTL_LUN_SERSEQ_READ; 2206 else if (value != NULL && strcmp(value, "soft") == 0) 2207 cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT; 2208 else if (value != NULL && strcmp(value, "off") == 0) 2209 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2210 return (0); 2211 } 2212 2213 static int 2214 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2215 { 2216 struct ctl_be_lun *cbe_lun; 2217 struct ctl_be_block_lun *be_lun; 2218 struct ctl_lun_create_params *params; 2219 char num_thread_str[16]; 2220 char tmpstr[32]; 2221 const char *value; 2222 int retval, num_threads; 2223 int tmp_num_threads; 2224 2225 params = &req->reqdata.create; 2226 retval = 0; 2227 req->status = CTL_LUN_OK; 2228 2229 be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK); 2230 cbe_lun = &be_lun->cbe_lun; 2231 be_lun->params = req->reqdata.create; 2232 be_lun->softc = softc; 2233 STAILQ_INIT(&be_lun->input_queue); 2234 STAILQ_INIT(&be_lun->config_read_queue); 2235 STAILQ_INIT(&be_lun->config_write_queue); 2236 STAILQ_INIT(&be_lun->datamove_queue); 2237 mtx_init(&be_lun->io_lock, "ctlblock io", NULL, MTX_DEF); 2238 mtx_init(&be_lun->queue_lock, "ctlblock queue", NULL, MTX_DEF); 2239 cbe_lun->options = nvlist_clone(req->args_nvl); 2240 2241 if (params->flags & CTL_LUN_FLAG_DEV_TYPE) 2242 cbe_lun->lun_type = params->device_type; 2243 else 2244 cbe_lun->lun_type = T_DIRECT; 2245 be_lun->flags = 0; 2246 cbe_lun->flags = 0; 2247 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL); 2248 if (value != NULL) { 2249 if (strcmp(value, "primary") == 0) 2250 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2251 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2252 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2253 2254 if (cbe_lun->lun_type == T_DIRECT || 2255 cbe_lun->lun_type == T_CDROM) { 2256 be_lun->size_bytes = params->lun_size_bytes; 2257 if (params->blocksize_bytes != 0) 2258 cbe_lun->blocksize = params->blocksize_bytes; 2259 else if (cbe_lun->lun_type == T_CDROM) 2260 cbe_lun->blocksize = 2048; 2261 else 2262 cbe_lun->blocksize = 512; 2263 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2264 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2265 0 : (be_lun->size_blocks - 1); 2266 2267 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2268 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2269 retval = ctl_be_block_open(be_lun, req); 2270 if (retval != 0) { 2271 retval = 0; 2272 req->status = CTL_LUN_WARNING; 2273 } 2274 } 2275 num_threads = cbb_num_threads; 2276 } else { 2277 num_threads = 1; 2278 } 2279 2280 value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL); 2281 if (value != NULL) { 2282 tmp_num_threads = strtol(value, NULL, 0); 2283 2284 /* 2285 * We don't let the user specify less than one 2286 * thread, but hope he's clueful enough not to 2287 * specify 1000 threads. 2288 */ 2289 if (tmp_num_threads < 1) { 2290 snprintf(req->error_str, sizeof(req->error_str), 2291 "invalid number of threads %s", 2292 num_thread_str); 2293 goto bailout_error; 2294 } 2295 num_threads = tmp_num_threads; 2296 } 2297 2298 if (be_lun->vn == NULL) 2299 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2300 /* Tell the user the blocksize we ended up using */ 2301 params->lun_size_bytes = be_lun->size_bytes; 2302 params->blocksize_bytes = cbe_lun->blocksize; 2303 if (params->flags & CTL_LUN_FLAG_ID_REQ) { 2304 cbe_lun->req_lun_id = params->req_lun_id; 2305 cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ; 2306 } else 2307 cbe_lun->req_lun_id = 0; 2308 2309 cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown; 2310 cbe_lun->be = &ctl_be_block_driver; 2311 2312 if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) { 2313 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d", 2314 softc->num_luns); 2315 strncpy((char *)cbe_lun->serial_num, tmpstr, 2316 MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr))); 2317 2318 /* Tell the user what we used for a serial number */ 2319 strncpy((char *)params->serial_num, tmpstr, 2320 MIN(sizeof(params->serial_num), sizeof(tmpstr))); 2321 } else { 2322 strncpy((char *)cbe_lun->serial_num, params->serial_num, 2323 MIN(sizeof(cbe_lun->serial_num), 2324 sizeof(params->serial_num))); 2325 } 2326 if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) { 2327 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns); 2328 strncpy((char *)cbe_lun->device_id, tmpstr, 2329 MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr))); 2330 2331 /* Tell the user what we used for a device ID */ 2332 strncpy((char *)params->device_id, tmpstr, 2333 MIN(sizeof(params->device_id), sizeof(tmpstr))); 2334 } else { 2335 strncpy((char *)cbe_lun->device_id, params->device_id, 2336 MIN(sizeof(cbe_lun->device_id), 2337 sizeof(params->device_id))); 2338 } 2339 2340 TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun); 2341 2342 be_lun->io_taskqueue = taskqueue_create("ctlblocktq", M_WAITOK, 2343 taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue); 2344 2345 if (be_lun->io_taskqueue == NULL) { 2346 snprintf(req->error_str, sizeof(req->error_str), 2347 "unable to create taskqueue"); 2348 goto bailout_error; 2349 } 2350 2351 /* 2352 * Note that we start the same number of threads by default for 2353 * both the file case and the block device case. For the file 2354 * case, we need multiple threads to allow concurrency, because the 2355 * vnode interface is designed to be a blocking interface. For the 2356 * block device case, ZFS zvols at least will block the caller's 2357 * context in many instances, and so we need multiple threads to 2358 * overcome that problem. Other block devices don't need as many 2359 * threads, but they shouldn't cause too many problems. 2360 * 2361 * If the user wants to just have a single thread for a block 2362 * device, he can specify that when the LUN is created, or change 2363 * the tunable/sysctl to alter the default number of threads. 2364 */ 2365 retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue, 2366 /*num threads*/num_threads, 2367 /*priority*/PUSER, 2368 /*proc*/control_softc->ctl_proc, 2369 /*thread name*/"block"); 2370 2371 if (retval != 0) 2372 goto bailout_error; 2373 2374 be_lun->num_threads = num_threads; 2375 2376 retval = ctl_add_lun(&be_lun->cbe_lun); 2377 if (retval != 0) { 2378 snprintf(req->error_str, sizeof(req->error_str), 2379 "ctl_add_lun() returned error %d, see dmesg for " 2380 "details", retval); 2381 retval = 0; 2382 goto bailout_error; 2383 } 2384 2385 be_lun->disk_stats = devstat_new_entry("cbb", cbe_lun->lun_id, 2386 cbe_lun->blocksize, 2387 DEVSTAT_ALL_SUPPORTED, 2388 cbe_lun->lun_type 2389 | DEVSTAT_TYPE_IF_OTHER, 2390 DEVSTAT_PRIORITY_OTHER); 2391 2392 mtx_lock(&softc->lock); 2393 softc->num_luns++; 2394 SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links); 2395 mtx_unlock(&softc->lock); 2396 2397 params->req_lun_id = cbe_lun->lun_id; 2398 2399 return (retval); 2400 2401 bailout_error: 2402 req->status = CTL_LUN_ERROR; 2403 2404 if (be_lun->io_taskqueue != NULL) 2405 taskqueue_free(be_lun->io_taskqueue); 2406 ctl_be_block_close(be_lun); 2407 if (be_lun->dev_path != NULL) 2408 free(be_lun->dev_path, M_CTLBLK); 2409 nvlist_destroy(cbe_lun->options); 2410 mtx_destroy(&be_lun->queue_lock); 2411 mtx_destroy(&be_lun->io_lock); 2412 free(be_lun, M_CTLBLK); 2413 2414 return (retval); 2415 } 2416 2417 static int 2418 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2419 { 2420 struct ctl_lun_rm_params *params; 2421 struct ctl_be_block_lun *be_lun; 2422 struct ctl_be_lun *cbe_lun; 2423 int retval; 2424 2425 params = &req->reqdata.rm; 2426 2427 sx_xlock(&softc->modify_lock); 2428 mtx_lock(&softc->lock); 2429 SLIST_FOREACH(be_lun, &softc->lun_list, links) { 2430 if (be_lun->cbe_lun.lun_id == params->lun_id) { 2431 SLIST_REMOVE(&softc->lun_list, be_lun, 2432 ctl_be_block_lun, links); 2433 softc->num_luns--; 2434 break; 2435 } 2436 } 2437 mtx_unlock(&softc->lock); 2438 sx_xunlock(&softc->modify_lock); 2439 if (be_lun == NULL) { 2440 snprintf(req->error_str, sizeof(req->error_str), 2441 "LUN %u is not managed by the block backend", 2442 params->lun_id); 2443 goto bailout_error; 2444 } 2445 cbe_lun = &be_lun->cbe_lun; 2446 2447 if (be_lun->vn != NULL) { 2448 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2449 ctl_lun_no_media(cbe_lun); 2450 taskqueue_drain_all(be_lun->io_taskqueue); 2451 ctl_be_block_close(be_lun); 2452 } 2453 2454 mtx_lock(&softc->lock); 2455 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2456 mtx_unlock(&softc->lock); 2457 2458 retval = ctl_remove_lun(cbe_lun); 2459 if (retval != 0) { 2460 snprintf(req->error_str, sizeof(req->error_str), 2461 "error %d returned from ctl_remove_lun() for " 2462 "LUN %d", retval, params->lun_id); 2463 mtx_lock(&softc->lock); 2464 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2465 mtx_unlock(&softc->lock); 2466 goto bailout_error; 2467 } 2468 2469 mtx_lock(&softc->lock); 2470 while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2471 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblockrm", 0); 2472 if (retval == EINTR) 2473 break; 2474 } 2475 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2476 if (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) { 2477 mtx_unlock(&softc->lock); 2478 free(be_lun, M_CTLBLK); 2479 } else { 2480 mtx_unlock(&softc->lock); 2481 return (EINTR); 2482 } 2483 2484 req->status = CTL_LUN_OK; 2485 return (0); 2486 2487 bailout_error: 2488 req->status = CTL_LUN_ERROR; 2489 return (0); 2490 } 2491 2492 static int 2493 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2494 { 2495 struct ctl_lun_modify_params *params; 2496 struct ctl_be_block_lun *be_lun; 2497 struct ctl_be_lun *cbe_lun; 2498 const char *value; 2499 uint64_t oldsize; 2500 int error, wasprim; 2501 2502 params = &req->reqdata.modify; 2503 2504 sx_xlock(&softc->modify_lock); 2505 mtx_lock(&softc->lock); 2506 SLIST_FOREACH(be_lun, &softc->lun_list, links) { 2507 if (be_lun->cbe_lun.lun_id == params->lun_id) 2508 break; 2509 } 2510 mtx_unlock(&softc->lock); 2511 if (be_lun == NULL) { 2512 snprintf(req->error_str, sizeof(req->error_str), 2513 "LUN %u is not managed by the block backend", 2514 params->lun_id); 2515 goto bailout_error; 2516 } 2517 cbe_lun = &be_lun->cbe_lun; 2518 2519 if (params->lun_size_bytes != 0) 2520 be_lun->params.lun_size_bytes = params->lun_size_bytes; 2521 2522 if (req->args_nvl != NULL) { 2523 nvlist_destroy(cbe_lun->options); 2524 cbe_lun->options = nvlist_clone(req->args_nvl); 2525 } 2526 2527 wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY); 2528 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL); 2529 if (value != NULL) { 2530 if (strcmp(value, "primary") == 0) 2531 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2532 else 2533 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2534 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2535 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2536 else 2537 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2538 if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) { 2539 if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) 2540 ctl_lun_primary(cbe_lun); 2541 else 2542 ctl_lun_secondary(cbe_lun); 2543 } 2544 2545 oldsize = be_lun->size_blocks; 2546 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2547 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2548 if (be_lun->vn == NULL) 2549 error = ctl_be_block_open(be_lun, req); 2550 else if (vn_isdisk_error(be_lun->vn, &error)) 2551 error = ctl_be_block_open_dev(be_lun, req); 2552 else if (be_lun->vn->v_type == VREG) { 2553 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 2554 error = ctl_be_block_open_file(be_lun, req); 2555 VOP_UNLOCK(be_lun->vn); 2556 } else 2557 error = EINVAL; 2558 if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) && 2559 be_lun->vn != NULL) { 2560 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; 2561 ctl_lun_has_media(cbe_lun); 2562 } else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 && 2563 be_lun->vn == NULL) { 2564 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2565 ctl_lun_no_media(cbe_lun); 2566 } 2567 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; 2568 } else { 2569 if (be_lun->vn != NULL) { 2570 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2571 ctl_lun_no_media(cbe_lun); 2572 taskqueue_drain_all(be_lun->io_taskqueue); 2573 error = ctl_be_block_close(be_lun); 2574 } else 2575 error = 0; 2576 } 2577 if (be_lun->size_blocks != oldsize) 2578 ctl_lun_capacity_changed(cbe_lun); 2579 2580 /* Tell the user the exact size we ended up using */ 2581 params->lun_size_bytes = be_lun->size_bytes; 2582 2583 sx_xunlock(&softc->modify_lock); 2584 req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK; 2585 return (0); 2586 2587 bailout_error: 2588 sx_xunlock(&softc->modify_lock); 2589 req->status = CTL_LUN_ERROR; 2590 return (0); 2591 } 2592 2593 static void 2594 ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun) 2595 { 2596 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)cbe_lun; 2597 struct ctl_be_block_softc *softc = be_lun->softc; 2598 2599 taskqueue_drain_all(be_lun->io_taskqueue); 2600 taskqueue_free(be_lun->io_taskqueue); 2601 if (be_lun->disk_stats != NULL) 2602 devstat_remove_entry(be_lun->disk_stats); 2603 nvlist_destroy(be_lun->cbe_lun.options); 2604 free(be_lun->dev_path, M_CTLBLK); 2605 mtx_destroy(&be_lun->queue_lock); 2606 mtx_destroy(&be_lun->io_lock); 2607 2608 mtx_lock(&softc->lock); 2609 be_lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED; 2610 if (be_lun->flags & CTL_BE_BLOCK_LUN_WAITING) 2611 wakeup(be_lun); 2612 else 2613 free(be_lun, M_CTLBLK); 2614 mtx_unlock(&softc->lock); 2615 } 2616 2617 static int 2618 ctl_be_block_config_write(union ctl_io *io) 2619 { 2620 struct ctl_be_block_lun *be_lun; 2621 struct ctl_be_lun *cbe_lun; 2622 int retval; 2623 2624 DPRINTF("entered\n"); 2625 2626 cbe_lun = CTL_BACKEND_LUN(io); 2627 be_lun = (struct ctl_be_block_lun *)cbe_lun; 2628 2629 retval = 0; 2630 switch (io->scsiio.cdb[0]) { 2631 case SYNCHRONIZE_CACHE: 2632 case SYNCHRONIZE_CACHE_16: 2633 case WRITE_SAME_10: 2634 case WRITE_SAME_16: 2635 case UNMAP: 2636 /* 2637 * The upper level CTL code will filter out any CDBs with 2638 * the immediate bit set and return the proper error. 2639 * 2640 * We don't really need to worry about what LBA range the 2641 * user asked to be synced out. When they issue a sync 2642 * cache command, we'll sync out the whole thing. 2643 */ 2644 mtx_lock(&be_lun->queue_lock); 2645 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, 2646 links); 2647 mtx_unlock(&be_lun->queue_lock); 2648 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 2649 break; 2650 case START_STOP_UNIT: { 2651 struct scsi_start_stop_unit *cdb; 2652 struct ctl_lun_req req; 2653 2654 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb; 2655 if ((cdb->how & SSS_PC_MASK) != 0) { 2656 ctl_set_success(&io->scsiio); 2657 ctl_config_write_done(io); 2658 break; 2659 } 2660 if (cdb->how & SSS_START) { 2661 if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) { 2662 retval = ctl_be_block_open(be_lun, &req); 2663 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; 2664 if (retval == 0) { 2665 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; 2666 ctl_lun_has_media(cbe_lun); 2667 } else { 2668 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2669 ctl_lun_no_media(cbe_lun); 2670 } 2671 } 2672 ctl_start_lun(cbe_lun); 2673 } else { 2674 ctl_stop_lun(cbe_lun); 2675 if (cdb->how & SSS_LOEJ) { 2676 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2677 cbe_lun->flags |= CTL_LUN_FLAG_EJECTED; 2678 ctl_lun_ejected(cbe_lun); 2679 if (be_lun->vn != NULL) 2680 ctl_be_block_close(be_lun); 2681 } 2682 } 2683 2684 ctl_set_success(&io->scsiio); 2685 ctl_config_write_done(io); 2686 break; 2687 } 2688 case PREVENT_ALLOW: 2689 ctl_set_success(&io->scsiio); 2690 ctl_config_write_done(io); 2691 break; 2692 default: 2693 ctl_set_invalid_opcode(&io->scsiio); 2694 ctl_config_write_done(io); 2695 retval = CTL_RETVAL_COMPLETE; 2696 break; 2697 } 2698 2699 return (retval); 2700 } 2701 2702 static int 2703 ctl_be_block_config_read(union ctl_io *io) 2704 { 2705 struct ctl_be_block_lun *be_lun; 2706 int retval = 0; 2707 2708 DPRINTF("entered\n"); 2709 2710 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 2711 2712 switch (io->scsiio.cdb[0]) { 2713 case SERVICE_ACTION_IN: 2714 if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) { 2715 mtx_lock(&be_lun->queue_lock); 2716 STAILQ_INSERT_TAIL(&be_lun->config_read_queue, 2717 &io->io_hdr, links); 2718 mtx_unlock(&be_lun->queue_lock); 2719 taskqueue_enqueue(be_lun->io_taskqueue, 2720 &be_lun->io_task); 2721 retval = CTL_RETVAL_QUEUED; 2722 break; 2723 } 2724 ctl_set_invalid_field(&io->scsiio, 2725 /*sks_valid*/ 1, 2726 /*command*/ 1, 2727 /*field*/ 1, 2728 /*bit_valid*/ 1, 2729 /*bit*/ 4); 2730 ctl_config_read_done(io); 2731 retval = CTL_RETVAL_COMPLETE; 2732 break; 2733 default: 2734 ctl_set_invalid_opcode(&io->scsiio); 2735 ctl_config_read_done(io); 2736 retval = CTL_RETVAL_COMPLETE; 2737 break; 2738 } 2739 2740 return (retval); 2741 } 2742 2743 static int 2744 ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb) 2745 { 2746 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun; 2747 int retval; 2748 2749 retval = sbuf_printf(sb, "\t<num_threads>"); 2750 if (retval != 0) 2751 goto bailout; 2752 retval = sbuf_printf(sb, "%d", lun->num_threads); 2753 if (retval != 0) 2754 goto bailout; 2755 retval = sbuf_printf(sb, "</num_threads>\n"); 2756 2757 bailout: 2758 return (retval); 2759 } 2760 2761 static uint64_t 2762 ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname) 2763 { 2764 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun; 2765 2766 if (lun->getattr == NULL) 2767 return (UINT64_MAX); 2768 return (lun->getattr(lun, attrname)); 2769 } 2770 2771 static int 2772 ctl_be_block_init(void) 2773 { 2774 struct ctl_be_block_softc *softc = &backend_block_softc; 2775 2776 sx_init(&softc->modify_lock, "ctlblock modify"); 2777 mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF); 2778 softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io), 2779 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2780 softc->bufmin_zone = uma_zcreate("ctlblockmin", CTLBLK_MIN_SEG, 2781 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 2782 if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG) 2783 softc->bufmax_zone = uma_zcreate("ctlblockmax", CTLBLK_MAX_SEG, 2784 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 2785 SLIST_INIT(&softc->lun_list); 2786 return (0); 2787 } 2788 2789 static int 2790 ctl_be_block_shutdown(void) 2791 { 2792 struct ctl_be_block_softc *softc = &backend_block_softc; 2793 struct ctl_be_block_lun *lun; 2794 2795 mtx_lock(&softc->lock); 2796 while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) { 2797 SLIST_REMOVE_HEAD(&softc->lun_list, links); 2798 softc->num_luns--; 2799 /* 2800 * Drop our lock here. Since ctl_remove_lun() can call 2801 * back into us, this could potentially lead to a recursive 2802 * lock of the same mutex, which would cause a hang. 2803 */ 2804 mtx_unlock(&softc->lock); 2805 ctl_remove_lun(&lun->cbe_lun); 2806 mtx_lock(&softc->lock); 2807 } 2808 mtx_unlock(&softc->lock); 2809 uma_zdestroy(softc->bufmin_zone); 2810 if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG) 2811 uma_zdestroy(softc->bufmax_zone); 2812 uma_zdestroy(softc->beio_zone); 2813 mtx_destroy(&softc->lock); 2814 sx_destroy(&softc->modify_lock); 2815 return (0); 2816 } 2817