1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2003 Silicon Graphics International Corp. 5 * Copyright (c) 2009-2011 Spectra Logic Corporation 6 * Copyright (c) 2012,2021 The FreeBSD Foundation 7 * Copyright (c) 2014-2021 Alexander Motin <mav@FreeBSD.org> 8 * All rights reserved. 9 * 10 * Portions of this software were developed by Edward Tomasz Napierala 11 * under sponsorship from the FreeBSD Foundation. 12 * 13 * Portions of this software were developed by Ka Ho Ng <khng@FreeBSD.org> 14 * under sponsorship from the FreeBSD Foundation. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions, and the following disclaimer, 21 * without modification. 22 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 23 * substantially similar to the "NO WARRANTY" disclaimer below 24 * ("Disclaimer") and any redistribution must be conditioned upon 25 * including a substantially similar Disclaimer requirement for further 26 * binary redistribution. 27 * 28 * NO WARRANTY 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 37 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 38 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 * POSSIBILITY OF SUCH DAMAGES. 40 * 41 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $ 42 */ 43 /* 44 * CAM Target Layer driver backend for block devices. 45 * 46 * Author: Ken Merry <ken@FreeBSD.org> 47 */ 48 #include <sys/cdefs.h> 49 __FBSDID("$FreeBSD$"); 50 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/kernel.h> 54 #include <sys/types.h> 55 #include <sys/kthread.h> 56 #include <sys/bio.h> 57 #include <sys/fcntl.h> 58 #include <sys/limits.h> 59 #include <sys/lock.h> 60 #include <sys/mutex.h> 61 #include <sys/condvar.h> 62 #include <sys/malloc.h> 63 #include <sys/conf.h> 64 #include <sys/ioccom.h> 65 #include <sys/queue.h> 66 #include <sys/sbuf.h> 67 #include <sys/endian.h> 68 #include <sys/uio.h> 69 #include <sys/buf.h> 70 #include <sys/taskqueue.h> 71 #include <sys/vnode.h> 72 #include <sys/namei.h> 73 #include <sys/mount.h> 74 #include <sys/disk.h> 75 #include <sys/fcntl.h> 76 #include <sys/filedesc.h> 77 #include <sys/filio.h> 78 #include <sys/proc.h> 79 #include <sys/pcpu.h> 80 #include <sys/module.h> 81 #include <sys/sdt.h> 82 #include <sys/devicestat.h> 83 #include <sys/sysctl.h> 84 #include <sys/nv.h> 85 #include <sys/dnv.h> 86 #include <sys/sx.h> 87 #include <sys/unistd.h> 88 89 #include <geom/geom.h> 90 91 #include <cam/cam.h> 92 #include <cam/scsi/scsi_all.h> 93 #include <cam/scsi/scsi_da.h> 94 #include <cam/ctl/ctl_io.h> 95 #include <cam/ctl/ctl.h> 96 #include <cam/ctl/ctl_backend.h> 97 #include <cam/ctl/ctl_ioctl.h> 98 #include <cam/ctl/ctl_ha.h> 99 #include <cam/ctl/ctl_scsi_all.h> 100 #include <cam/ctl/ctl_private.h> 101 #include <cam/ctl/ctl_error.h> 102 103 /* 104 * The idea here is to allocate enough S/G space to handle at least 1MB I/Os. 105 * On systems with small maxphys it can be 8 128KB segments. On large systems 106 * it can be up to 8 1MB segments. I/Os larger than that we'll split. 107 */ 108 #define CTLBLK_MAX_SEGS 8 109 #define CTLBLK_HALF_SEGS (CTLBLK_MAX_SEGS / 2) 110 #define CTLBLK_MIN_SEG (128 * 1024) 111 #define CTLBLK_MAX_SEG MIN(1024 * 1024, MAX(CTLBLK_MIN_SEG, maxphys)) 112 #define CTLBLK_MAX_IO_SIZE (CTLBLK_MAX_SEG * CTLBLK_MAX_SEGS) 113 114 #ifdef CTLBLK_DEBUG 115 #define DPRINTF(fmt, args...) \ 116 printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 117 #else 118 #define DPRINTF(fmt, args...) do {} while(0) 119 #endif 120 121 #define PRIV(io) \ 122 ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND]) 123 #define ARGS(io) \ 124 ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]) 125 126 SDT_PROVIDER_DEFINE(cbb); 127 128 typedef enum { 129 CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01, 130 CTL_BE_BLOCK_LUN_WAITING = 0x04, 131 } ctl_be_block_lun_flags; 132 133 typedef enum { 134 CTL_BE_BLOCK_NONE, 135 CTL_BE_BLOCK_DEV, 136 CTL_BE_BLOCK_FILE 137 } ctl_be_block_type; 138 139 struct ctl_be_block_filedata { 140 struct ucred *cred; 141 }; 142 143 union ctl_be_block_bedata { 144 struct ctl_be_block_filedata file; 145 }; 146 147 struct ctl_be_block_io; 148 struct ctl_be_block_lun; 149 150 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun, 151 struct ctl_be_block_io *beio); 152 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun, 153 const char *attrname); 154 155 /* 156 * Backend LUN structure. There is a 1:1 mapping between a block device 157 * and a backend block LUN, and between a backend block LUN and a CTL LUN. 158 */ 159 struct ctl_be_block_lun { 160 struct ctl_be_lun cbe_lun; /* Must be first element. */ 161 struct ctl_lun_create_params params; 162 char *dev_path; 163 ctl_be_block_type dev_type; 164 struct vnode *vn; 165 union ctl_be_block_bedata backend; 166 cbb_dispatch_t dispatch; 167 cbb_dispatch_t lun_flush; 168 cbb_dispatch_t unmap; 169 cbb_dispatch_t get_lba_status; 170 cbb_getattr_t getattr; 171 uint64_t size_blocks; 172 uint64_t size_bytes; 173 struct ctl_be_block_softc *softc; 174 struct devstat *disk_stats; 175 ctl_be_block_lun_flags flags; 176 SLIST_ENTRY(ctl_be_block_lun) links; 177 struct taskqueue *io_taskqueue; 178 struct task io_task; 179 int num_threads; 180 STAILQ_HEAD(, ctl_io_hdr) input_queue; 181 STAILQ_HEAD(, ctl_io_hdr) config_read_queue; 182 STAILQ_HEAD(, ctl_io_hdr) config_write_queue; 183 STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 184 struct mtx_padalign io_lock; 185 struct mtx_padalign queue_lock; 186 }; 187 188 /* 189 * Overall softc structure for the block backend module. 190 */ 191 struct ctl_be_block_softc { 192 struct sx modify_lock; 193 struct mtx lock; 194 int num_luns; 195 SLIST_HEAD(, ctl_be_block_lun) lun_list; 196 uma_zone_t beio_zone; 197 uma_zone_t bufmin_zone; 198 uma_zone_t bufmax_zone; 199 }; 200 201 static struct ctl_be_block_softc backend_block_softc; 202 203 /* 204 * Per-I/O information. 205 */ 206 struct ctl_be_block_io { 207 union ctl_io *io; 208 struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS]; 209 struct iovec xiovecs[CTLBLK_MAX_SEGS]; 210 int refcnt; 211 int bio_cmd; 212 int two_sglists; 213 int num_segs; 214 int num_bios_sent; 215 int num_bios_done; 216 int send_complete; 217 int first_error; 218 uint64_t first_error_offset; 219 struct bintime ds_t0; 220 devstat_tag_type ds_tag_type; 221 devstat_trans_flags ds_trans_type; 222 uint64_t io_len; 223 uint64_t io_offset; 224 int io_arg; 225 struct ctl_be_block_softc *softc; 226 struct ctl_be_block_lun *lun; 227 void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */ 228 }; 229 230 extern struct ctl_softc *control_softc; 231 232 static int cbb_num_threads = 32; 233 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 234 "CAM Target Layer Block Backend"); 235 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN, 236 &cbb_num_threads, 0, "Number of threads per backing file"); 237 238 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc); 239 static void ctl_free_beio(struct ctl_be_block_io *beio); 240 static void ctl_complete_beio(struct ctl_be_block_io *beio); 241 static int ctl_be_block_move_done(union ctl_io *io, bool samethr); 242 static void ctl_be_block_biodone(struct bio *bio); 243 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 244 struct ctl_be_block_io *beio); 245 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 246 struct ctl_be_block_io *beio); 247 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 248 struct ctl_be_block_io *beio); 249 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, 250 const char *attrname); 251 static void ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun, 252 struct ctl_be_block_io *beio); 253 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 254 struct ctl_be_block_io *beio); 255 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 256 struct ctl_be_block_io *beio); 257 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 258 struct ctl_be_block_io *beio); 259 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, 260 const char *attrname); 261 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 262 union ctl_io *io); 263 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 264 union ctl_io *io); 265 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 266 union ctl_io *io); 267 static void ctl_be_block_worker(void *context, int pending); 268 static int ctl_be_block_submit(union ctl_io *io); 269 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 270 int flag, struct thread *td); 271 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, 272 struct ctl_lun_req *req); 273 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, 274 struct ctl_lun_req *req); 275 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun); 276 static int ctl_be_block_open(struct ctl_be_block_lun *be_lun, 277 struct ctl_lun_req *req); 278 static int ctl_be_block_create(struct ctl_be_block_softc *softc, 279 struct ctl_lun_req *req); 280 static int ctl_be_block_rm(struct ctl_be_block_softc *softc, 281 struct ctl_lun_req *req); 282 static int ctl_be_block_modify(struct ctl_be_block_softc *softc, 283 struct ctl_lun_req *req); 284 static void ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun); 285 static int ctl_be_block_config_write(union ctl_io *io); 286 static int ctl_be_block_config_read(union ctl_io *io); 287 static int ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb); 288 static uint64_t ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname); 289 static int ctl_be_block_init(void); 290 static int ctl_be_block_shutdown(void); 291 292 static struct ctl_backend_driver ctl_be_block_driver = 293 { 294 .name = "block", 295 .flags = CTL_BE_FLAG_HAS_CONFIG, 296 .init = ctl_be_block_init, 297 .shutdown = ctl_be_block_shutdown, 298 .data_submit = ctl_be_block_submit, 299 .config_read = ctl_be_block_config_read, 300 .config_write = ctl_be_block_config_write, 301 .ioctl = ctl_be_block_ioctl, 302 .lun_info = ctl_be_block_lun_info, 303 .lun_attr = ctl_be_block_lun_attr 304 }; 305 306 MALLOC_DEFINE(M_CTLBLK, "ctlblock", "Memory used for CTL block backend"); 307 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver); 308 309 static void 310 ctl_alloc_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg, 311 size_t len) 312 { 313 314 if (len <= CTLBLK_MIN_SEG) { 315 sg->addr = uma_zalloc(softc->bufmin_zone, M_WAITOK); 316 } else { 317 KASSERT(len <= CTLBLK_MAX_SEG, 318 ("Too large alloc %zu > %lu", len, CTLBLK_MAX_SEG)); 319 sg->addr = uma_zalloc(softc->bufmax_zone, M_WAITOK); 320 } 321 sg->len = len; 322 } 323 324 static void 325 ctl_free_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg) 326 { 327 328 if (sg->len <= CTLBLK_MIN_SEG) { 329 uma_zfree(softc->bufmin_zone, sg->addr); 330 } else { 331 KASSERT(sg->len <= CTLBLK_MAX_SEG, 332 ("Too large free %zu > %lu", sg->len, CTLBLK_MAX_SEG)); 333 uma_zfree(softc->bufmax_zone, sg->addr); 334 } 335 } 336 337 static struct ctl_be_block_io * 338 ctl_alloc_beio(struct ctl_be_block_softc *softc) 339 { 340 struct ctl_be_block_io *beio; 341 342 beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO); 343 beio->softc = softc; 344 beio->refcnt = 1; 345 return (beio); 346 } 347 348 static void 349 ctl_real_free_beio(struct ctl_be_block_io *beio) 350 { 351 struct ctl_be_block_softc *softc = beio->softc; 352 int i; 353 354 for (i = 0; i < beio->num_segs; i++) { 355 ctl_free_seg(softc, &beio->sg_segs[i]); 356 357 /* For compare we had two equal S/G lists. */ 358 if (beio->two_sglists) { 359 ctl_free_seg(softc, 360 &beio->sg_segs[i + CTLBLK_HALF_SEGS]); 361 } 362 } 363 364 uma_zfree(softc->beio_zone, beio); 365 } 366 367 static void 368 ctl_refcnt_beio(void *arg, int diff) 369 { 370 struct ctl_be_block_io *beio = arg; 371 372 if (atomic_fetchadd_int(&beio->refcnt, diff) + diff == 0) 373 ctl_real_free_beio(beio); 374 } 375 376 static void 377 ctl_free_beio(struct ctl_be_block_io *beio) 378 { 379 380 ctl_refcnt_beio(beio, -1); 381 } 382 383 static void 384 ctl_complete_beio(struct ctl_be_block_io *beio) 385 { 386 union ctl_io *io = beio->io; 387 388 if (beio->beio_cont != NULL) { 389 beio->beio_cont(beio); 390 } else { 391 ctl_free_beio(beio); 392 ctl_data_submit_done(io); 393 } 394 } 395 396 static size_t 397 cmp(uint8_t *a, uint8_t *b, size_t size) 398 { 399 size_t i; 400 401 for (i = 0; i < size; i++) { 402 if (a[i] != b[i]) 403 break; 404 } 405 return (i); 406 } 407 408 static void 409 ctl_be_block_compare(union ctl_io *io) 410 { 411 struct ctl_be_block_io *beio; 412 uint64_t off, res; 413 int i; 414 uint8_t info[8]; 415 416 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 417 off = 0; 418 for (i = 0; i < beio->num_segs; i++) { 419 res = cmp(beio->sg_segs[i].addr, 420 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr, 421 beio->sg_segs[i].len); 422 off += res; 423 if (res < beio->sg_segs[i].len) 424 break; 425 } 426 if (i < beio->num_segs) { 427 scsi_u64to8b(off, info); 428 ctl_set_sense(&io->scsiio, /*current_error*/ 1, 429 /*sense_key*/ SSD_KEY_MISCOMPARE, 430 /*asc*/ 0x1D, /*ascq*/ 0x00, 431 /*type*/ SSD_ELEM_INFO, 432 /*size*/ sizeof(info), /*data*/ &info, 433 /*type*/ SSD_ELEM_NONE); 434 } else 435 ctl_set_success(&io->scsiio); 436 } 437 438 static int 439 ctl_be_block_move_done(union ctl_io *io, bool samethr) 440 { 441 struct ctl_be_block_io *beio; 442 struct ctl_be_block_lun *be_lun; 443 struct ctl_lba_len_flags *lbalen; 444 445 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 446 447 DPRINTF("entered\n"); 448 io->scsiio.kern_rel_offset += io->scsiio.kern_data_len; 449 450 /* 451 * We set status at this point for read and compare commands. 452 */ 453 if ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0 && 454 (io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE) { 455 lbalen = ARGS(io); 456 if (lbalen->flags & CTL_LLF_READ) { 457 ctl_set_success(&io->scsiio); 458 } else if (lbalen->flags & CTL_LLF_COMPARE) { 459 /* We have two data blocks ready for comparison. */ 460 ctl_be_block_compare(io); 461 } 462 } 463 464 /* 465 * If this is a read, or a write with errors, it is done. 466 */ 467 if ((beio->bio_cmd == BIO_READ) 468 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0) 469 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) { 470 ctl_complete_beio(beio); 471 return (0); 472 } 473 474 /* 475 * At this point, we have a write and the DMA completed successfully. 476 * If we were called synchronously in the original thread then just 477 * dispatch, otherwise we now have to queue it to the task queue to 478 * execute the backend I/O. That is because we do blocking 479 * memory allocations, and in the file backing case, blocking I/O. 480 * This move done routine is generally called in the SIM's 481 * interrupt context, and therefore we cannot block. 482 */ 483 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 484 if (samethr) { 485 be_lun->dispatch(be_lun, beio); 486 } else { 487 mtx_lock(&be_lun->queue_lock); 488 STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links); 489 mtx_unlock(&be_lun->queue_lock); 490 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 491 } 492 return (0); 493 } 494 495 static void 496 ctl_be_block_biodone(struct bio *bio) 497 { 498 struct ctl_be_block_io *beio = bio->bio_caller1; 499 struct ctl_be_block_lun *be_lun = beio->lun; 500 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 501 union ctl_io *io; 502 int error; 503 504 io = beio->io; 505 506 DPRINTF("entered\n"); 507 508 error = bio->bio_error; 509 mtx_lock(&be_lun->io_lock); 510 if (error != 0 && 511 (beio->first_error == 0 || 512 bio->bio_offset < beio->first_error_offset)) { 513 beio->first_error = error; 514 beio->first_error_offset = bio->bio_offset; 515 } 516 517 beio->num_bios_done++; 518 519 /* 520 * XXX KDM will this cause WITNESS to complain? Holding a lock 521 * during the free might cause it to complain. 522 */ 523 g_destroy_bio(bio); 524 525 /* 526 * If the send complete bit isn't set, or we aren't the last I/O to 527 * complete, then we're done. 528 */ 529 if ((beio->send_complete == 0) 530 || (beio->num_bios_done < beio->num_bios_sent)) { 531 mtx_unlock(&be_lun->io_lock); 532 return; 533 } 534 535 /* 536 * At this point, we've verified that we are the last I/O to 537 * complete, so it's safe to drop the lock. 538 */ 539 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 540 beio->ds_tag_type, beio->ds_trans_type, 541 /*now*/ NULL, /*then*/&beio->ds_t0); 542 mtx_unlock(&be_lun->io_lock); 543 544 /* 545 * If there are any errors from the backing device, we fail the 546 * entire I/O with a medium error. 547 */ 548 error = beio->first_error; 549 if (error != 0) { 550 if (error == EOPNOTSUPP) { 551 ctl_set_invalid_opcode(&io->scsiio); 552 } else if (error == ENOSPC || error == EDQUOT) { 553 ctl_set_space_alloc_fail(&io->scsiio); 554 } else if (error == EROFS || error == EACCES) { 555 ctl_set_hw_write_protected(&io->scsiio); 556 } else if (beio->bio_cmd == BIO_FLUSH) { 557 /* XXX KDM is there is a better error here? */ 558 ctl_set_internal_failure(&io->scsiio, 559 /*sks_valid*/ 1, 560 /*retry_count*/ 0xbad2); 561 } else { 562 ctl_set_medium_error(&io->scsiio, 563 beio->bio_cmd == BIO_READ); 564 } 565 ctl_complete_beio(beio); 566 return; 567 } 568 569 /* 570 * If this is a write, a flush, a delete or verify, we're all done. 571 * If this is a read, we can now send the data to the user. 572 */ 573 if ((beio->bio_cmd == BIO_WRITE) 574 || (beio->bio_cmd == BIO_FLUSH) 575 || (beio->bio_cmd == BIO_DELETE) 576 || (ARGS(io)->flags & CTL_LLF_VERIFY)) { 577 ctl_set_success(&io->scsiio); 578 ctl_complete_beio(beio); 579 } else { 580 if ((ARGS(io)->flags & CTL_LLF_READ) && 581 beio->beio_cont == NULL) { 582 ctl_set_success(&io->scsiio); 583 if (cbe_lun->serseq >= CTL_LUN_SERSEQ_SOFT) 584 ctl_serseq_done(io); 585 } 586 ctl_datamove(io); 587 } 588 } 589 590 static void 591 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 592 struct ctl_be_block_io *beio) 593 { 594 union ctl_io *io = beio->io; 595 struct mount *mountpoint; 596 int error; 597 598 DPRINTF("entered\n"); 599 600 binuptime(&beio->ds_t0); 601 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 602 603 (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 604 605 vn_lock(be_lun->vn, vn_lktype_write(mountpoint, be_lun->vn) | 606 LK_RETRY); 607 error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT, 608 curthread); 609 VOP_UNLOCK(be_lun->vn); 610 611 vn_finished_write(mountpoint); 612 613 mtx_lock(&be_lun->io_lock); 614 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 615 beio->ds_tag_type, beio->ds_trans_type, 616 /*now*/ NULL, /*then*/&beio->ds_t0); 617 mtx_unlock(&be_lun->io_lock); 618 619 if (error == 0) 620 ctl_set_success(&io->scsiio); 621 else { 622 /* XXX KDM is there is a better error here? */ 623 ctl_set_internal_failure(&io->scsiio, 624 /*sks_valid*/ 1, 625 /*retry_count*/ 0xbad1); 626 } 627 628 ctl_complete_beio(beio); 629 } 630 631 SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t"); 632 SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t"); 633 SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t"); 634 SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t"); 635 636 static void 637 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 638 struct ctl_be_block_io *beio) 639 { 640 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 641 struct ctl_be_block_filedata *file_data; 642 union ctl_io *io; 643 struct uio xuio; 644 struct iovec *xiovec; 645 size_t s; 646 int error, flags, i; 647 648 DPRINTF("entered\n"); 649 650 file_data = &be_lun->backend.file; 651 io = beio->io; 652 flags = 0; 653 if (ARGS(io)->flags & CTL_LLF_DPO) 654 flags |= IO_DIRECT; 655 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 656 flags |= IO_SYNC; 657 658 bzero(&xuio, sizeof(xuio)); 659 if (beio->bio_cmd == BIO_READ) { 660 SDT_PROBE0(cbb, , read, file_start); 661 xuio.uio_rw = UIO_READ; 662 } else { 663 SDT_PROBE0(cbb, , write, file_start); 664 xuio.uio_rw = UIO_WRITE; 665 } 666 xuio.uio_offset = beio->io_offset; 667 xuio.uio_resid = beio->io_len; 668 xuio.uio_segflg = UIO_SYSSPACE; 669 xuio.uio_iov = beio->xiovecs; 670 xuio.uio_iovcnt = beio->num_segs; 671 xuio.uio_td = curthread; 672 673 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 674 xiovec->iov_base = beio->sg_segs[i].addr; 675 xiovec->iov_len = beio->sg_segs[i].len; 676 } 677 678 binuptime(&beio->ds_t0); 679 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 680 681 if (beio->bio_cmd == BIO_READ) { 682 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 683 684 if (beio->beio_cont == NULL && 685 cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT) 686 ctl_serseq_done(io); 687 /* 688 * UFS pays attention to IO_DIRECT for reads. If the 689 * DIRECTIO option is configured into the kernel, it calls 690 * ffs_rawread(). But that only works for single-segment 691 * uios with user space addresses. In our case, with a 692 * kernel uio, it still reads into the buffer cache, but it 693 * will just try to release the buffer from the cache later 694 * on in ffs_read(). 695 * 696 * ZFS does not pay attention to IO_DIRECT for reads. 697 * 698 * UFS does not pay attention to IO_SYNC for reads. 699 * 700 * ZFS pays attention to IO_SYNC (which translates into the 701 * Solaris define FRSYNC for zfs_read()) for reads. It 702 * attempts to sync the file before reading. 703 */ 704 error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred); 705 706 VOP_UNLOCK(be_lun->vn); 707 SDT_PROBE0(cbb, , read, file_done); 708 if (error == 0 && xuio.uio_resid > 0) { 709 /* 710 * If we red less then requested (EOF), then 711 * we should clean the rest of the buffer. 712 */ 713 s = beio->io_len - xuio.uio_resid; 714 for (i = 0; i < beio->num_segs; i++) { 715 if (s >= beio->sg_segs[i].len) { 716 s -= beio->sg_segs[i].len; 717 continue; 718 } 719 bzero((uint8_t *)beio->sg_segs[i].addr + s, 720 beio->sg_segs[i].len - s); 721 s = 0; 722 } 723 } 724 } else { 725 struct mount *mountpoint; 726 727 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 728 vn_lock(be_lun->vn, vn_lktype_write(mountpoint, 729 be_lun->vn) | LK_RETRY); 730 731 /* 732 * UFS pays attention to IO_DIRECT for writes. The write 733 * is done asynchronously. (Normally the write would just 734 * get put into cache. 735 * 736 * UFS pays attention to IO_SYNC for writes. It will 737 * attempt to write the buffer out synchronously if that 738 * flag is set. 739 * 740 * ZFS does not pay attention to IO_DIRECT for writes. 741 * 742 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) 743 * for writes. It will flush the transaction from the 744 * cache before returning. 745 */ 746 error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred); 747 VOP_UNLOCK(be_lun->vn); 748 749 vn_finished_write(mountpoint); 750 SDT_PROBE0(cbb, , write, file_done); 751 } 752 753 mtx_lock(&be_lun->io_lock); 754 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 755 beio->ds_tag_type, beio->ds_trans_type, 756 /*now*/ NULL, /*then*/&beio->ds_t0); 757 mtx_unlock(&be_lun->io_lock); 758 759 /* 760 * If we got an error, set the sense data to "MEDIUM ERROR" and 761 * return the I/O to the user. 762 */ 763 if (error != 0) { 764 if (error == ENOSPC || error == EDQUOT) { 765 ctl_set_space_alloc_fail(&io->scsiio); 766 } else if (error == EROFS || error == EACCES) { 767 ctl_set_hw_write_protected(&io->scsiio); 768 } else { 769 ctl_set_medium_error(&io->scsiio, 770 beio->bio_cmd == BIO_READ); 771 } 772 ctl_complete_beio(beio); 773 return; 774 } 775 776 /* 777 * If this is a write or a verify, we're all done. 778 * If this is a read, we can now send the data to the user. 779 */ 780 if ((beio->bio_cmd == BIO_WRITE) || 781 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 782 ctl_set_success(&io->scsiio); 783 ctl_complete_beio(beio); 784 } else { 785 if ((ARGS(io)->flags & CTL_LLF_READ) && 786 beio->beio_cont == NULL) { 787 ctl_set_success(&io->scsiio); 788 if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT) 789 ctl_serseq_done(io); 790 } 791 ctl_datamove(io); 792 } 793 } 794 795 static void 796 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 797 struct ctl_be_block_io *beio) 798 { 799 union ctl_io *io = beio->io; 800 struct ctl_lba_len_flags *lbalen = ARGS(io); 801 struct scsi_get_lba_status_data *data; 802 off_t roff, off; 803 int error, status; 804 805 DPRINTF("entered\n"); 806 807 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 808 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 809 error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off, 810 0, curthread->td_ucred, curthread); 811 if (error == 0 && off > roff) 812 status = 0; /* mapped up to off */ 813 else { 814 error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off, 815 0, curthread->td_ucred, curthread); 816 if (error == 0 && off > roff) 817 status = 1; /* deallocated up to off */ 818 else { 819 status = 0; /* unknown up to the end */ 820 off = be_lun->size_bytes; 821 } 822 } 823 VOP_UNLOCK(be_lun->vn); 824 825 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 826 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 827 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 828 lbalen->lba), data->descr[0].length); 829 data->descr[0].status = status; 830 831 ctl_complete_beio(beio); 832 } 833 834 static uint64_t 835 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname) 836 { 837 struct vattr vattr; 838 struct statfs statfs; 839 uint64_t val; 840 int error; 841 842 val = UINT64_MAX; 843 if (be_lun->vn == NULL) 844 return (val); 845 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 846 if (strcmp(attrname, "blocksused") == 0) { 847 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 848 if (error == 0) 849 val = vattr.va_bytes / be_lun->cbe_lun.blocksize; 850 } 851 if (strcmp(attrname, "blocksavail") == 0 && 852 !VN_IS_DOOMED(be_lun->vn)) { 853 error = VFS_STATFS(be_lun->vn->v_mount, &statfs); 854 if (error == 0) 855 val = statfs.f_bavail * statfs.f_bsize / 856 be_lun->cbe_lun.blocksize; 857 } 858 VOP_UNLOCK(be_lun->vn); 859 return (val); 860 } 861 862 static void 863 ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun, 864 struct ctl_be_block_io *beio) 865 { 866 struct ctl_be_block_filedata *file_data; 867 union ctl_io *io; 868 struct ctl_ptr_len_flags *ptrlen; 869 struct scsi_unmap_desc *buf, *end; 870 struct mount *mp; 871 off_t off, len; 872 int error; 873 874 io = beio->io; 875 file_data = &be_lun->backend.file; 876 mp = NULL; 877 error = 0; 878 879 binuptime(&beio->ds_t0); 880 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 881 882 (void)vn_start_write(be_lun->vn, &mp, V_WAIT); 883 vn_lock(be_lun->vn, vn_lktype_write(mp, be_lun->vn) | LK_RETRY); 884 if (beio->io_offset == -1) { 885 beio->io_len = 0; 886 ptrlen = (struct ctl_ptr_len_flags *) 887 &io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 888 buf = (struct scsi_unmap_desc *)ptrlen->ptr; 889 end = buf + ptrlen->len / sizeof(*buf); 890 for (; buf < end; buf++) { 891 off = (off_t)scsi_8btou64(buf->lba) * 892 be_lun->cbe_lun.blocksize; 893 len = (off_t)scsi_4btoul(buf->length) * 894 be_lun->cbe_lun.blocksize; 895 beio->io_len += len; 896 error = vn_deallocate(be_lun->vn, &off, &len, 897 0, IO_NOMACCHECK | IO_NODELOCKED, file_data->cred, 898 NOCRED); 899 if (error != 0) 900 break; 901 } 902 } else { 903 /* WRITE_SAME */ 904 off = beio->io_offset; 905 len = beio->io_len; 906 error = vn_deallocate(be_lun->vn, &off, &len, 0, 907 IO_NOMACCHECK | IO_NODELOCKED, file_data->cred, NOCRED); 908 } 909 VOP_UNLOCK(be_lun->vn); 910 vn_finished_write(mp); 911 912 mtx_lock(&be_lun->io_lock); 913 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 914 beio->ds_tag_type, beio->ds_trans_type, 915 /*now*/ NULL, /*then*/&beio->ds_t0); 916 mtx_unlock(&be_lun->io_lock); 917 918 /* 919 * If we got an error, set the sense data to "MEDIUM ERROR" and 920 * return the I/O to the user. 921 */ 922 switch (error) { 923 case 0: 924 ctl_set_success(&io->scsiio); 925 break; 926 case ENOSPC: 927 case EDQUOT: 928 ctl_set_space_alloc_fail(&io->scsiio); 929 break; 930 case EROFS: 931 case EACCES: 932 ctl_set_hw_write_protected(&io->scsiio); 933 break; 934 default: 935 ctl_set_medium_error(&io->scsiio, false); 936 } 937 ctl_complete_beio(beio); 938 } 939 940 static void 941 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun, 942 struct ctl_be_block_io *beio) 943 { 944 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 945 union ctl_io *io; 946 struct cdevsw *csw; 947 struct cdev *dev; 948 struct uio xuio; 949 struct iovec *xiovec; 950 int error, flags, i, ref; 951 952 DPRINTF("entered\n"); 953 954 io = beio->io; 955 flags = 0; 956 if (ARGS(io)->flags & CTL_LLF_DPO) 957 flags |= IO_DIRECT; 958 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 959 flags |= IO_SYNC; 960 961 bzero(&xuio, sizeof(xuio)); 962 if (beio->bio_cmd == BIO_READ) { 963 SDT_PROBE0(cbb, , read, file_start); 964 xuio.uio_rw = UIO_READ; 965 } else { 966 SDT_PROBE0(cbb, , write, file_start); 967 xuio.uio_rw = UIO_WRITE; 968 } 969 xuio.uio_offset = beio->io_offset; 970 xuio.uio_resid = beio->io_len; 971 xuio.uio_segflg = UIO_SYSSPACE; 972 xuio.uio_iov = beio->xiovecs; 973 xuio.uio_iovcnt = beio->num_segs; 974 xuio.uio_td = curthread; 975 976 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 977 xiovec->iov_base = beio->sg_segs[i].addr; 978 xiovec->iov_len = beio->sg_segs[i].len; 979 } 980 981 binuptime(&beio->ds_t0); 982 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 983 984 csw = devvn_refthread(be_lun->vn, &dev, &ref); 985 if (csw) { 986 if (beio->bio_cmd == BIO_READ) { 987 if (beio->beio_cont == NULL && 988 cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT) 989 ctl_serseq_done(io); 990 error = csw->d_read(dev, &xuio, flags); 991 } else 992 error = csw->d_write(dev, &xuio, flags); 993 dev_relthread(dev, ref); 994 } else 995 error = ENXIO; 996 997 if (beio->bio_cmd == BIO_READ) 998 SDT_PROBE0(cbb, , read, file_done); 999 else 1000 SDT_PROBE0(cbb, , write, file_done); 1001 1002 mtx_lock(&be_lun->io_lock); 1003 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 1004 beio->ds_tag_type, beio->ds_trans_type, 1005 /*now*/ NULL, /*then*/&beio->ds_t0); 1006 mtx_unlock(&be_lun->io_lock); 1007 1008 /* 1009 * If we got an error, set the sense data to "MEDIUM ERROR" and 1010 * return the I/O to the user. 1011 */ 1012 if (error != 0) { 1013 if (error == ENOSPC || error == EDQUOT) { 1014 ctl_set_space_alloc_fail(&io->scsiio); 1015 } else if (error == EROFS || error == EACCES) { 1016 ctl_set_hw_write_protected(&io->scsiio); 1017 } else { 1018 ctl_set_medium_error(&io->scsiio, 1019 beio->bio_cmd == BIO_READ); 1020 } 1021 ctl_complete_beio(beio); 1022 return; 1023 } 1024 1025 /* 1026 * If this is a write or a verify, we're all done. 1027 * If this is a read, we can now send the data to the user. 1028 */ 1029 if ((beio->bio_cmd == BIO_WRITE) || 1030 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 1031 ctl_set_success(&io->scsiio); 1032 ctl_complete_beio(beio); 1033 } else { 1034 if ((ARGS(io)->flags & CTL_LLF_READ) && 1035 beio->beio_cont == NULL) { 1036 ctl_set_success(&io->scsiio); 1037 if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT) 1038 ctl_serseq_done(io); 1039 } 1040 ctl_datamove(io); 1041 } 1042 } 1043 1044 static void 1045 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun, 1046 struct ctl_be_block_io *beio) 1047 { 1048 union ctl_io *io = beio->io; 1049 struct cdevsw *csw; 1050 struct cdev *dev; 1051 struct ctl_lba_len_flags *lbalen = ARGS(io); 1052 struct scsi_get_lba_status_data *data; 1053 off_t roff, off; 1054 int error, ref, status; 1055 1056 DPRINTF("entered\n"); 1057 1058 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1059 if (csw == NULL) { 1060 status = 0; /* unknown up to the end */ 1061 off = be_lun->size_bytes; 1062 goto done; 1063 } 1064 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 1065 error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD, 1066 curthread); 1067 if (error == 0 && off > roff) 1068 status = 0; /* mapped up to off */ 1069 else { 1070 error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD, 1071 curthread); 1072 if (error == 0 && off > roff) 1073 status = 1; /* deallocated up to off */ 1074 else { 1075 status = 0; /* unknown up to the end */ 1076 off = be_lun->size_bytes; 1077 } 1078 } 1079 dev_relthread(dev, ref); 1080 1081 done: 1082 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 1083 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 1084 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 1085 lbalen->lba), data->descr[0].length); 1086 data->descr[0].status = status; 1087 1088 ctl_complete_beio(beio); 1089 } 1090 1091 static void 1092 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 1093 struct ctl_be_block_io *beio) 1094 { 1095 struct bio *bio; 1096 struct cdevsw *csw; 1097 struct cdev *dev; 1098 int ref; 1099 1100 DPRINTF("entered\n"); 1101 1102 /* This can't fail, it's a blocking allocation. */ 1103 bio = g_alloc_bio(); 1104 1105 bio->bio_cmd = BIO_FLUSH; 1106 bio->bio_offset = 0; 1107 bio->bio_data = 0; 1108 bio->bio_done = ctl_be_block_biodone; 1109 bio->bio_caller1 = beio; 1110 bio->bio_pblkno = 0; 1111 1112 /* 1113 * We don't need to acquire the LUN lock here, because we are only 1114 * sending one bio, and so there is no other context to synchronize 1115 * with. 1116 */ 1117 beio->num_bios_sent = 1; 1118 beio->send_complete = 1; 1119 1120 binuptime(&beio->ds_t0); 1121 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1122 1123 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1124 if (csw) { 1125 bio->bio_dev = dev; 1126 csw->d_strategy(bio); 1127 dev_relthread(dev, ref); 1128 } else { 1129 bio->bio_error = ENXIO; 1130 ctl_be_block_biodone(bio); 1131 } 1132 } 1133 1134 static void 1135 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun, 1136 struct ctl_be_block_io *beio, 1137 uint64_t off, uint64_t len, int last) 1138 { 1139 struct bio *bio; 1140 uint64_t maxlen; 1141 struct cdevsw *csw; 1142 struct cdev *dev; 1143 int ref; 1144 1145 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1146 maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize); 1147 while (len > 0) { 1148 bio = g_alloc_bio(); 1149 bio->bio_cmd = BIO_DELETE; 1150 bio->bio_dev = dev; 1151 bio->bio_offset = off; 1152 bio->bio_length = MIN(len, maxlen); 1153 bio->bio_data = 0; 1154 bio->bio_done = ctl_be_block_biodone; 1155 bio->bio_caller1 = beio; 1156 bio->bio_pblkno = off / be_lun->cbe_lun.blocksize; 1157 1158 off += bio->bio_length; 1159 len -= bio->bio_length; 1160 1161 mtx_lock(&be_lun->io_lock); 1162 beio->num_bios_sent++; 1163 if (last && len == 0) 1164 beio->send_complete = 1; 1165 mtx_unlock(&be_lun->io_lock); 1166 1167 if (csw) { 1168 csw->d_strategy(bio); 1169 } else { 1170 bio->bio_error = ENXIO; 1171 ctl_be_block_biodone(bio); 1172 } 1173 } 1174 if (csw) 1175 dev_relthread(dev, ref); 1176 } 1177 1178 static void 1179 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 1180 struct ctl_be_block_io *beio) 1181 { 1182 union ctl_io *io; 1183 struct ctl_ptr_len_flags *ptrlen; 1184 struct scsi_unmap_desc *buf, *end; 1185 uint64_t len; 1186 1187 io = beio->io; 1188 1189 DPRINTF("entered\n"); 1190 1191 binuptime(&beio->ds_t0); 1192 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1193 1194 if (beio->io_offset == -1) { 1195 beio->io_len = 0; 1196 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1197 buf = (struct scsi_unmap_desc *)ptrlen->ptr; 1198 end = buf + ptrlen->len / sizeof(*buf); 1199 for (; buf < end; buf++) { 1200 len = (uint64_t)scsi_4btoul(buf->length) * 1201 be_lun->cbe_lun.blocksize; 1202 beio->io_len += len; 1203 ctl_be_block_unmap_dev_range(be_lun, beio, 1204 scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize, 1205 len, (end - buf < 2) ? TRUE : FALSE); 1206 } 1207 } else 1208 ctl_be_block_unmap_dev_range(be_lun, beio, 1209 beio->io_offset, beio->io_len, TRUE); 1210 } 1211 1212 static void 1213 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 1214 struct ctl_be_block_io *beio) 1215 { 1216 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 1217 struct bio *bio; 1218 struct cdevsw *csw; 1219 struct cdev *dev; 1220 off_t cur_offset; 1221 int i, max_iosize, ref; 1222 1223 DPRINTF("entered\n"); 1224 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1225 1226 /* 1227 * We have to limit our I/O size to the maximum supported by the 1228 * backend device. 1229 */ 1230 if (csw) { 1231 max_iosize = dev->si_iosize_max; 1232 if (max_iosize <= 0) 1233 max_iosize = DFLTPHYS; 1234 } else 1235 max_iosize = maxphys; 1236 1237 cur_offset = beio->io_offset; 1238 for (i = 0; i < beio->num_segs; i++) { 1239 size_t cur_size; 1240 uint8_t *cur_ptr; 1241 1242 cur_size = beio->sg_segs[i].len; 1243 cur_ptr = beio->sg_segs[i].addr; 1244 1245 while (cur_size > 0) { 1246 /* This can't fail, it's a blocking allocation. */ 1247 bio = g_alloc_bio(); 1248 1249 KASSERT(bio != NULL, ("g_alloc_bio() failed!\n")); 1250 1251 bio->bio_cmd = beio->bio_cmd; 1252 bio->bio_dev = dev; 1253 bio->bio_caller1 = beio; 1254 bio->bio_length = min(cur_size, max_iosize); 1255 bio->bio_offset = cur_offset; 1256 bio->bio_data = cur_ptr; 1257 bio->bio_done = ctl_be_block_biodone; 1258 bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize; 1259 1260 cur_offset += bio->bio_length; 1261 cur_ptr += bio->bio_length; 1262 cur_size -= bio->bio_length; 1263 1264 TAILQ_INSERT_TAIL(&queue, bio, bio_queue); 1265 beio->num_bios_sent++; 1266 } 1267 } 1268 beio->send_complete = 1; 1269 binuptime(&beio->ds_t0); 1270 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1271 1272 /* 1273 * Fire off all allocated requests! 1274 */ 1275 while ((bio = TAILQ_FIRST(&queue)) != NULL) { 1276 TAILQ_REMOVE(&queue, bio, bio_queue); 1277 if (csw) 1278 csw->d_strategy(bio); 1279 else { 1280 bio->bio_error = ENXIO; 1281 ctl_be_block_biodone(bio); 1282 } 1283 } 1284 if (csw) 1285 dev_relthread(dev, ref); 1286 } 1287 1288 static uint64_t 1289 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname) 1290 { 1291 struct diocgattr_arg arg; 1292 struct cdevsw *csw; 1293 struct cdev *dev; 1294 int error, ref; 1295 1296 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1297 if (csw == NULL) 1298 return (UINT64_MAX); 1299 strlcpy(arg.name, attrname, sizeof(arg.name)); 1300 arg.len = sizeof(arg.value.off); 1301 if (csw->d_ioctl) { 1302 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 1303 curthread); 1304 } else 1305 error = ENODEV; 1306 dev_relthread(dev, ref); 1307 if (error != 0) 1308 return (UINT64_MAX); 1309 return (arg.value.off); 1310 } 1311 1312 static void 1313 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun, 1314 union ctl_io *io) 1315 { 1316 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1317 struct ctl_be_block_io *beio; 1318 struct ctl_lba_len_flags *lbalen; 1319 1320 DPRINTF("entered\n"); 1321 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1322 lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1323 1324 beio->io_len = lbalen->len * cbe_lun->blocksize; 1325 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1326 beio->io_arg = (lbalen->flags & SSC_IMMED) != 0; 1327 beio->bio_cmd = BIO_FLUSH; 1328 beio->ds_trans_type = DEVSTAT_NO_DATA; 1329 DPRINTF("SYNC\n"); 1330 be_lun->lun_flush(be_lun, beio); 1331 } 1332 1333 static void 1334 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio) 1335 { 1336 union ctl_io *io; 1337 1338 io = beio->io; 1339 ctl_free_beio(beio); 1340 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1341 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1342 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1343 ctl_config_write_done(io); 1344 return; 1345 } 1346 1347 ctl_be_block_config_write(io); 1348 } 1349 1350 static void 1351 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun, 1352 union ctl_io *io) 1353 { 1354 struct ctl_be_block_softc *softc = be_lun->softc; 1355 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1356 struct ctl_be_block_io *beio; 1357 struct ctl_lba_len_flags *lbalen; 1358 uint64_t len_left, lba; 1359 uint32_t pb, pbo, adj; 1360 int i, seglen; 1361 uint8_t *buf, *end; 1362 1363 DPRINTF("entered\n"); 1364 1365 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1366 lbalen = ARGS(io); 1367 1368 if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) || 1369 (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) { 1370 ctl_free_beio(beio); 1371 ctl_set_invalid_field(&io->scsiio, 1372 /*sks_valid*/ 1, 1373 /*command*/ 1, 1374 /*field*/ 1, 1375 /*bit_valid*/ 0, 1376 /*bit*/ 0); 1377 ctl_config_write_done(io); 1378 return; 1379 } 1380 1381 if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) { 1382 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1383 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize; 1384 beio->bio_cmd = BIO_DELETE; 1385 beio->ds_trans_type = DEVSTAT_FREE; 1386 1387 be_lun->unmap(be_lun, beio); 1388 return; 1389 } 1390 1391 beio->bio_cmd = BIO_WRITE; 1392 beio->ds_trans_type = DEVSTAT_WRITE; 1393 1394 DPRINTF("WRITE SAME at LBA %jx len %u\n", 1395 (uintmax_t)lbalen->lba, lbalen->len); 1396 1397 pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp; 1398 if (be_lun->cbe_lun.pblockoff > 0) 1399 pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff; 1400 else 1401 pbo = 0; 1402 len_left = (uint64_t)lbalen->len * cbe_lun->blocksize; 1403 for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { 1404 /* 1405 * Setup the S/G entry for this chunk. 1406 */ 1407 seglen = MIN(CTLBLK_MAX_SEG, len_left); 1408 if (pb > cbe_lun->blocksize) { 1409 adj = ((lbalen->lba + lba) * cbe_lun->blocksize + 1410 seglen - pbo) % pb; 1411 if (seglen > adj) 1412 seglen -= adj; 1413 else 1414 seglen -= seglen % cbe_lun->blocksize; 1415 } else 1416 seglen -= seglen % cbe_lun->blocksize; 1417 ctl_alloc_seg(softc, &beio->sg_segs[i], seglen); 1418 1419 DPRINTF("segment %d addr %p len %zd\n", i, 1420 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1421 1422 beio->num_segs++; 1423 len_left -= seglen; 1424 1425 buf = beio->sg_segs[i].addr; 1426 end = buf + seglen; 1427 for (; buf < end; buf += cbe_lun->blocksize) { 1428 if (lbalen->flags & SWS_NDOB) { 1429 memset(buf, 0, cbe_lun->blocksize); 1430 } else { 1431 memcpy(buf, io->scsiio.kern_data_ptr, 1432 cbe_lun->blocksize); 1433 } 1434 if (lbalen->flags & SWS_LBDATA) 1435 scsi_ulto4b(lbalen->lba + lba, buf); 1436 lba++; 1437 } 1438 } 1439 1440 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1441 beio->io_len = lba * cbe_lun->blocksize; 1442 1443 /* We can not do all in one run. Correct and schedule rerun. */ 1444 if (len_left > 0) { 1445 lbalen->lba += lba; 1446 lbalen->len -= lba; 1447 beio->beio_cont = ctl_be_block_cw_done_ws; 1448 } 1449 1450 be_lun->dispatch(be_lun, beio); 1451 } 1452 1453 static void 1454 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun, 1455 union ctl_io *io) 1456 { 1457 struct ctl_be_block_io *beio; 1458 struct ctl_ptr_len_flags *ptrlen; 1459 1460 DPRINTF("entered\n"); 1461 1462 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1463 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1464 1465 if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) { 1466 ctl_free_beio(beio); 1467 ctl_set_invalid_field(&io->scsiio, 1468 /*sks_valid*/ 0, 1469 /*command*/ 1, 1470 /*field*/ 0, 1471 /*bit_valid*/ 0, 1472 /*bit*/ 0); 1473 ctl_config_write_done(io); 1474 return; 1475 } 1476 1477 beio->io_len = 0; 1478 beio->io_offset = -1; 1479 beio->bio_cmd = BIO_DELETE; 1480 beio->ds_trans_type = DEVSTAT_FREE; 1481 DPRINTF("UNMAP\n"); 1482 be_lun->unmap(be_lun, beio); 1483 } 1484 1485 static void 1486 ctl_be_block_cr_done(struct ctl_be_block_io *beio) 1487 { 1488 union ctl_io *io; 1489 1490 io = beio->io; 1491 ctl_free_beio(beio); 1492 ctl_config_read_done(io); 1493 } 1494 1495 static void 1496 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 1497 union ctl_io *io) 1498 { 1499 struct ctl_be_block_io *beio; 1500 struct ctl_be_block_softc *softc; 1501 1502 DPRINTF("entered\n"); 1503 1504 softc = be_lun->softc; 1505 beio = ctl_alloc_beio(softc); 1506 beio->io = io; 1507 beio->lun = be_lun; 1508 beio->beio_cont = ctl_be_block_cr_done; 1509 PRIV(io)->ptr = (void *)beio; 1510 1511 switch (io->scsiio.cdb[0]) { 1512 case SERVICE_ACTION_IN: /* GET LBA STATUS */ 1513 beio->bio_cmd = -1; 1514 beio->ds_trans_type = DEVSTAT_NO_DATA; 1515 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1516 beio->io_len = 0; 1517 if (be_lun->get_lba_status) 1518 be_lun->get_lba_status(be_lun, beio); 1519 else 1520 ctl_be_block_cr_done(beio); 1521 break; 1522 default: 1523 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1524 break; 1525 } 1526 } 1527 1528 static void 1529 ctl_be_block_cw_done(struct ctl_be_block_io *beio) 1530 { 1531 union ctl_io *io; 1532 1533 io = beio->io; 1534 ctl_free_beio(beio); 1535 ctl_config_write_done(io); 1536 } 1537 1538 static void 1539 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 1540 union ctl_io *io) 1541 { 1542 struct ctl_be_block_io *beio; 1543 struct ctl_be_block_softc *softc; 1544 1545 DPRINTF("entered\n"); 1546 1547 softc = be_lun->softc; 1548 beio = ctl_alloc_beio(softc); 1549 beio->io = io; 1550 beio->lun = be_lun; 1551 beio->beio_cont = ctl_be_block_cw_done; 1552 switch (io->scsiio.tag_type) { 1553 case CTL_TAG_ORDERED: 1554 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1555 break; 1556 case CTL_TAG_HEAD_OF_QUEUE: 1557 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1558 break; 1559 case CTL_TAG_UNTAGGED: 1560 case CTL_TAG_SIMPLE: 1561 case CTL_TAG_ACA: 1562 default: 1563 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1564 break; 1565 } 1566 PRIV(io)->ptr = (void *)beio; 1567 1568 switch (io->scsiio.cdb[0]) { 1569 case SYNCHRONIZE_CACHE: 1570 case SYNCHRONIZE_CACHE_16: 1571 ctl_be_block_cw_dispatch_sync(be_lun, io); 1572 break; 1573 case WRITE_SAME_10: 1574 case WRITE_SAME_16: 1575 ctl_be_block_cw_dispatch_ws(be_lun, io); 1576 break; 1577 case UNMAP: 1578 ctl_be_block_cw_dispatch_unmap(be_lun, io); 1579 break; 1580 default: 1581 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1582 break; 1583 } 1584 } 1585 1586 SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t"); 1587 SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t"); 1588 SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t"); 1589 SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t"); 1590 1591 static void 1592 ctl_be_block_next(struct ctl_be_block_io *beio) 1593 { 1594 struct ctl_be_block_lun *be_lun; 1595 union ctl_io *io; 1596 1597 io = beio->io; 1598 be_lun = beio->lun; 1599 ctl_free_beio(beio); 1600 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1601 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1602 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1603 ctl_data_submit_done(io); 1604 return; 1605 } 1606 1607 io->io_hdr.status &= ~CTL_STATUS_MASK; 1608 io->io_hdr.status |= CTL_STATUS_NONE; 1609 1610 mtx_lock(&be_lun->queue_lock); 1611 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1612 mtx_unlock(&be_lun->queue_lock); 1613 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1614 } 1615 1616 static void 1617 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 1618 union ctl_io *io) 1619 { 1620 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1621 struct ctl_be_block_io *beio; 1622 struct ctl_be_block_softc *softc; 1623 struct ctl_lba_len_flags *lbalen; 1624 struct ctl_ptr_len_flags *bptrlen; 1625 uint64_t len_left, lbas; 1626 int i; 1627 1628 softc = be_lun->softc; 1629 1630 DPRINTF("entered\n"); 1631 1632 lbalen = ARGS(io); 1633 if (lbalen->flags & CTL_LLF_WRITE) { 1634 SDT_PROBE0(cbb, , write, start); 1635 } else { 1636 SDT_PROBE0(cbb, , read, start); 1637 } 1638 1639 beio = ctl_alloc_beio(softc); 1640 beio->io = io; 1641 beio->lun = be_lun; 1642 bptrlen = PRIV(io); 1643 bptrlen->ptr = (void *)beio; 1644 1645 switch (io->scsiio.tag_type) { 1646 case CTL_TAG_ORDERED: 1647 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1648 break; 1649 case CTL_TAG_HEAD_OF_QUEUE: 1650 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1651 break; 1652 case CTL_TAG_UNTAGGED: 1653 case CTL_TAG_SIMPLE: 1654 case CTL_TAG_ACA: 1655 default: 1656 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1657 break; 1658 } 1659 1660 if (lbalen->flags & CTL_LLF_WRITE) { 1661 beio->bio_cmd = BIO_WRITE; 1662 beio->ds_trans_type = DEVSTAT_WRITE; 1663 } else { 1664 beio->bio_cmd = BIO_READ; 1665 beio->ds_trans_type = DEVSTAT_READ; 1666 } 1667 1668 DPRINTF("%s at LBA %jx len %u @%ju\n", 1669 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", 1670 (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len); 1671 lbas = CTLBLK_MAX_IO_SIZE; 1672 if (lbalen->flags & CTL_LLF_COMPARE) { 1673 beio->two_sglists = 1; 1674 lbas /= 2; 1675 } 1676 lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize); 1677 beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize; 1678 beio->io_len = lbas * cbe_lun->blocksize; 1679 bptrlen->len += lbas; 1680 1681 for (i = 0, len_left = beio->io_len; len_left > 0; i++) { 1682 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)", 1683 i, CTLBLK_MAX_SEGS)); 1684 1685 /* 1686 * Setup the S/G entry for this chunk. 1687 */ 1688 ctl_alloc_seg(softc, &beio->sg_segs[i], 1689 MIN(CTLBLK_MAX_SEG, len_left)); 1690 1691 DPRINTF("segment %d addr %p len %zd\n", i, 1692 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1693 1694 /* Set up second segment for compare operation. */ 1695 if (beio->two_sglists) { 1696 ctl_alloc_seg(softc, 1697 &beio->sg_segs[i + CTLBLK_HALF_SEGS], 1698 beio->sg_segs[i].len); 1699 } 1700 1701 beio->num_segs++; 1702 len_left -= beio->sg_segs[i].len; 1703 } 1704 if (bptrlen->len < lbalen->len) 1705 beio->beio_cont = ctl_be_block_next; 1706 io->scsiio.be_move_done = ctl_be_block_move_done; 1707 /* For compare we have separate S/G lists for read and datamove. */ 1708 if (beio->two_sglists) 1709 io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS]; 1710 else 1711 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs; 1712 io->scsiio.kern_data_len = beio->io_len; 1713 io->scsiio.kern_sg_entries = beio->num_segs; 1714 io->scsiio.kern_data_ref = ctl_refcnt_beio; 1715 io->scsiio.kern_data_arg = beio; 1716 io->io_hdr.flags |= CTL_FLAG_ALLOCATED; 1717 1718 /* 1719 * For the read case, we need to read the data into our buffers and 1720 * then we can send it back to the user. For the write case, we 1721 * need to get the data from the user first. 1722 */ 1723 if (beio->bio_cmd == BIO_READ) { 1724 SDT_PROBE0(cbb, , read, alloc_done); 1725 be_lun->dispatch(be_lun, beio); 1726 } else { 1727 SDT_PROBE0(cbb, , write, alloc_done); 1728 ctl_datamove(io); 1729 } 1730 } 1731 1732 static void 1733 ctl_be_block_worker(void *context, int pending) 1734 { 1735 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context; 1736 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1737 union ctl_io *io; 1738 struct ctl_be_block_io *beio; 1739 1740 DPRINTF("entered\n"); 1741 /* 1742 * Fetch and process I/Os from all queues. If we detect LUN 1743 * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race, 1744 * so make response maximally opaque to not confuse initiator. 1745 */ 1746 for (;;) { 1747 mtx_lock(&be_lun->queue_lock); 1748 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue); 1749 if (io != NULL) { 1750 DPRINTF("datamove queue\n"); 1751 STAILQ_REMOVE_HEAD(&be_lun->datamove_queue, links); 1752 mtx_unlock(&be_lun->queue_lock); 1753 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1754 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1755 ctl_set_busy(&io->scsiio); 1756 ctl_complete_beio(beio); 1757 continue; 1758 } 1759 be_lun->dispatch(be_lun, beio); 1760 continue; 1761 } 1762 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue); 1763 if (io != NULL) { 1764 DPRINTF("config write queue\n"); 1765 STAILQ_REMOVE_HEAD(&be_lun->config_write_queue, links); 1766 mtx_unlock(&be_lun->queue_lock); 1767 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1768 ctl_set_busy(&io->scsiio); 1769 ctl_config_write_done(io); 1770 continue; 1771 } 1772 ctl_be_block_cw_dispatch(be_lun, io); 1773 continue; 1774 } 1775 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue); 1776 if (io != NULL) { 1777 DPRINTF("config read queue\n"); 1778 STAILQ_REMOVE_HEAD(&be_lun->config_read_queue, links); 1779 mtx_unlock(&be_lun->queue_lock); 1780 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1781 ctl_set_busy(&io->scsiio); 1782 ctl_config_read_done(io); 1783 continue; 1784 } 1785 ctl_be_block_cr_dispatch(be_lun, io); 1786 continue; 1787 } 1788 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue); 1789 if (io != NULL) { 1790 DPRINTF("input queue\n"); 1791 STAILQ_REMOVE_HEAD(&be_lun->input_queue, links); 1792 mtx_unlock(&be_lun->queue_lock); 1793 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1794 ctl_set_busy(&io->scsiio); 1795 ctl_data_submit_done(io); 1796 continue; 1797 } 1798 ctl_be_block_dispatch(be_lun, io); 1799 continue; 1800 } 1801 1802 /* 1803 * If we get here, there is no work left in the queues, so 1804 * just break out and let the task queue go to sleep. 1805 */ 1806 mtx_unlock(&be_lun->queue_lock); 1807 break; 1808 } 1809 } 1810 1811 /* 1812 * Entry point from CTL to the backend for I/O. We queue everything to a 1813 * work thread, so this just puts the I/O on a queue and wakes up the 1814 * thread. 1815 */ 1816 static int 1817 ctl_be_block_submit(union ctl_io *io) 1818 { 1819 struct ctl_be_block_lun *be_lun; 1820 1821 DPRINTF("entered\n"); 1822 1823 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 1824 1825 KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, 1826 ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type)); 1827 1828 PRIV(io)->len = 0; 1829 1830 mtx_lock(&be_lun->queue_lock); 1831 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1832 mtx_unlock(&be_lun->queue_lock); 1833 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1834 1835 return (CTL_RETVAL_COMPLETE); 1836 } 1837 1838 static int 1839 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 1840 int flag, struct thread *td) 1841 { 1842 struct ctl_be_block_softc *softc = &backend_block_softc; 1843 int error; 1844 1845 error = 0; 1846 switch (cmd) { 1847 case CTL_LUN_REQ: { 1848 struct ctl_lun_req *lun_req; 1849 1850 lun_req = (struct ctl_lun_req *)addr; 1851 1852 switch (lun_req->reqtype) { 1853 case CTL_LUNREQ_CREATE: 1854 error = ctl_be_block_create(softc, lun_req); 1855 break; 1856 case CTL_LUNREQ_RM: 1857 error = ctl_be_block_rm(softc, lun_req); 1858 break; 1859 case CTL_LUNREQ_MODIFY: 1860 error = ctl_be_block_modify(softc, lun_req); 1861 break; 1862 default: 1863 lun_req->status = CTL_LUN_ERROR; 1864 snprintf(lun_req->error_str, sizeof(lun_req->error_str), 1865 "invalid LUN request type %d", 1866 lun_req->reqtype); 1867 break; 1868 } 1869 break; 1870 } 1871 default: 1872 error = ENOTTY; 1873 break; 1874 } 1875 1876 return (error); 1877 } 1878 1879 static int 1880 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1881 { 1882 struct ctl_be_lun *cbe_lun; 1883 struct ctl_be_block_filedata *file_data; 1884 struct ctl_lun_create_params *params; 1885 const char *value; 1886 struct vattr vattr; 1887 off_t ps, pss, po, pos, us, uss, uo, uos; 1888 int error; 1889 long pconf; 1890 1891 cbe_lun = &be_lun->cbe_lun; 1892 file_data = &be_lun->backend.file; 1893 params = &be_lun->params; 1894 1895 be_lun->dev_type = CTL_BE_BLOCK_FILE; 1896 be_lun->dispatch = ctl_be_block_dispatch_file; 1897 be_lun->lun_flush = ctl_be_block_flush_file; 1898 be_lun->get_lba_status = ctl_be_block_gls_file; 1899 be_lun->getattr = ctl_be_block_getattr_file; 1900 be_lun->unmap = ctl_be_block_unmap_file; 1901 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 1902 1903 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 1904 if (error != 0) { 1905 snprintf(req->error_str, sizeof(req->error_str), 1906 "error calling VOP_GETATTR() for file %s", 1907 be_lun->dev_path); 1908 return (error); 1909 } 1910 1911 error = VOP_PATHCONF(be_lun->vn, _PC_DEALLOC_PRESENT, &pconf); 1912 if (error != 0) { 1913 snprintf(req->error_str, sizeof(req->error_str), 1914 "error calling VOP_PATHCONF() for file %s", 1915 be_lun->dev_path); 1916 return (error); 1917 } 1918 if (pconf == 1) 1919 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; 1920 1921 file_data->cred = crhold(curthread->td_ucred); 1922 if (params->lun_size_bytes != 0) 1923 be_lun->size_bytes = params->lun_size_bytes; 1924 else 1925 be_lun->size_bytes = vattr.va_size; 1926 1927 /* 1928 * For files we can use any logical block size. Prefer 512 bytes 1929 * for compatibility reasons. If file's vattr.va_blocksize 1930 * (preferred I/O block size) is bigger and multiple to chosen 1931 * logical block size -- report it as physical block size. 1932 */ 1933 if (params->blocksize_bytes != 0) 1934 cbe_lun->blocksize = params->blocksize_bytes; 1935 else if (cbe_lun->lun_type == T_CDROM) 1936 cbe_lun->blocksize = 2048; 1937 else 1938 cbe_lun->blocksize = 512; 1939 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 1940 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 1941 0 : (be_lun->size_blocks - 1); 1942 1943 us = ps = vattr.va_blocksize; 1944 uo = po = 0; 1945 1946 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL); 1947 if (value != NULL) 1948 ctl_expand_number(value, &ps); 1949 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL); 1950 if (value != NULL) 1951 ctl_expand_number(value, &po); 1952 pss = ps / cbe_lun->blocksize; 1953 pos = po / cbe_lun->blocksize; 1954 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 1955 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 1956 cbe_lun->pblockexp = fls(pss) - 1; 1957 cbe_lun->pblockoff = (pss - pos) % pss; 1958 } 1959 1960 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL); 1961 if (value != NULL) 1962 ctl_expand_number(value, &us); 1963 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL); 1964 if (value != NULL) 1965 ctl_expand_number(value, &uo); 1966 uss = us / cbe_lun->blocksize; 1967 uos = uo / cbe_lun->blocksize; 1968 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 1969 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 1970 cbe_lun->ublockexp = fls(uss) - 1; 1971 cbe_lun->ublockoff = (uss - uos) % uss; 1972 } 1973 1974 /* 1975 * Sanity check. The media size has to be at least one 1976 * sector long. 1977 */ 1978 if (be_lun->size_bytes < cbe_lun->blocksize) { 1979 error = EINVAL; 1980 snprintf(req->error_str, sizeof(req->error_str), 1981 "file %s size %ju < block size %u", be_lun->dev_path, 1982 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize); 1983 } 1984 1985 cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize; 1986 return (error); 1987 } 1988 1989 static int 1990 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1991 { 1992 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1993 struct ctl_lun_create_params *params; 1994 struct cdevsw *csw; 1995 struct cdev *dev; 1996 const char *value; 1997 int error, atomic, maxio, ref, unmap, tmp; 1998 off_t ps, pss, po, pos, us, uss, uo, uos, otmp; 1999 2000 params = &be_lun->params; 2001 2002 be_lun->dev_type = CTL_BE_BLOCK_DEV; 2003 csw = devvn_refthread(be_lun->vn, &dev, &ref); 2004 if (csw == NULL) 2005 return (ENXIO); 2006 if (strcmp(csw->d_name, "zvol") == 0) { 2007 be_lun->dispatch = ctl_be_block_dispatch_zvol; 2008 be_lun->get_lba_status = ctl_be_block_gls_zvol; 2009 atomic = maxio = CTLBLK_MAX_IO_SIZE; 2010 } else { 2011 be_lun->dispatch = ctl_be_block_dispatch_dev; 2012 be_lun->get_lba_status = NULL; 2013 atomic = 0; 2014 maxio = dev->si_iosize_max; 2015 if (maxio <= 0) 2016 maxio = DFLTPHYS; 2017 if (maxio > CTLBLK_MAX_SEG) 2018 maxio = CTLBLK_MAX_SEG; 2019 } 2020 be_lun->lun_flush = ctl_be_block_flush_dev; 2021 be_lun->getattr = ctl_be_block_getattr_dev; 2022 be_lun->unmap = ctl_be_block_unmap_dev; 2023 2024 if (!csw->d_ioctl) { 2025 dev_relthread(dev, ref); 2026 snprintf(req->error_str, sizeof(req->error_str), 2027 "no d_ioctl for device %s!", be_lun->dev_path); 2028 return (ENODEV); 2029 } 2030 2031 error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD, 2032 curthread); 2033 if (error) { 2034 dev_relthread(dev, ref); 2035 snprintf(req->error_str, sizeof(req->error_str), 2036 "error %d returned for DIOCGSECTORSIZE ioctl " 2037 "on %s!", error, be_lun->dev_path); 2038 return (error); 2039 } 2040 2041 /* 2042 * If the user has asked for a blocksize that is greater than the 2043 * backing device's blocksize, we can do it only if the blocksize 2044 * the user is asking for is an even multiple of the underlying 2045 * device's blocksize. 2046 */ 2047 if ((params->blocksize_bytes != 0) && 2048 (params->blocksize_bytes >= tmp)) { 2049 if (params->blocksize_bytes % tmp == 0) { 2050 cbe_lun->blocksize = params->blocksize_bytes; 2051 } else { 2052 dev_relthread(dev, ref); 2053 snprintf(req->error_str, sizeof(req->error_str), 2054 "requested blocksize %u is not an even " 2055 "multiple of backing device blocksize %u", 2056 params->blocksize_bytes, tmp); 2057 return (EINVAL); 2058 } 2059 } else if (params->blocksize_bytes != 0) { 2060 dev_relthread(dev, ref); 2061 snprintf(req->error_str, sizeof(req->error_str), 2062 "requested blocksize %u < backing device " 2063 "blocksize %u", params->blocksize_bytes, tmp); 2064 return (EINVAL); 2065 } else if (cbe_lun->lun_type == T_CDROM) 2066 cbe_lun->blocksize = MAX(tmp, 2048); 2067 else 2068 cbe_lun->blocksize = tmp; 2069 2070 error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD, 2071 curthread); 2072 if (error) { 2073 dev_relthread(dev, ref); 2074 snprintf(req->error_str, sizeof(req->error_str), 2075 "error %d returned for DIOCGMEDIASIZE " 2076 " ioctl on %s!", error, 2077 be_lun->dev_path); 2078 return (error); 2079 } 2080 2081 if (params->lun_size_bytes != 0) { 2082 if (params->lun_size_bytes > otmp) { 2083 dev_relthread(dev, ref); 2084 snprintf(req->error_str, sizeof(req->error_str), 2085 "requested LUN size %ju > backing device " 2086 "size %ju", 2087 (uintmax_t)params->lun_size_bytes, 2088 (uintmax_t)otmp); 2089 return (EINVAL); 2090 } 2091 2092 be_lun->size_bytes = params->lun_size_bytes; 2093 } else 2094 be_lun->size_bytes = otmp; 2095 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2096 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2097 0 : (be_lun->size_blocks - 1); 2098 2099 error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD, 2100 curthread); 2101 if (error) 2102 ps = po = 0; 2103 else { 2104 error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po, 2105 FREAD, curthread); 2106 if (error) 2107 po = 0; 2108 } 2109 us = ps; 2110 uo = po; 2111 2112 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL); 2113 if (value != NULL) 2114 ctl_expand_number(value, &ps); 2115 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL); 2116 if (value != NULL) 2117 ctl_expand_number(value, &po); 2118 pss = ps / cbe_lun->blocksize; 2119 pos = po / cbe_lun->blocksize; 2120 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 2121 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 2122 cbe_lun->pblockexp = fls(pss) - 1; 2123 cbe_lun->pblockoff = (pss - pos) % pss; 2124 } 2125 2126 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL); 2127 if (value != NULL) 2128 ctl_expand_number(value, &us); 2129 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL); 2130 if (value != NULL) 2131 ctl_expand_number(value, &uo); 2132 uss = us / cbe_lun->blocksize; 2133 uos = uo / cbe_lun->blocksize; 2134 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 2135 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 2136 cbe_lun->ublockexp = fls(uss) - 1; 2137 cbe_lun->ublockoff = (uss - uos) % uss; 2138 } 2139 2140 cbe_lun->atomicblock = atomic / cbe_lun->blocksize; 2141 cbe_lun->opttxferlen = maxio / cbe_lun->blocksize; 2142 2143 if (be_lun->dispatch == ctl_be_block_dispatch_zvol) { 2144 unmap = 1; 2145 } else { 2146 struct diocgattr_arg arg; 2147 2148 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); 2149 arg.len = sizeof(arg.value.i); 2150 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 2151 curthread); 2152 unmap = (error == 0) ? arg.value.i : 0; 2153 } 2154 value = dnvlist_get_string(cbe_lun->options, "unmap", NULL); 2155 if (value != NULL) 2156 unmap = (strcmp(value, "on") == 0); 2157 if (unmap) 2158 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; 2159 else 2160 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 2161 2162 dev_relthread(dev, ref); 2163 return (0); 2164 } 2165 2166 static int 2167 ctl_be_block_close(struct ctl_be_block_lun *be_lun) 2168 { 2169 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2170 int flags; 2171 2172 if (be_lun->vn) { 2173 flags = FREAD; 2174 if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0) 2175 flags |= FWRITE; 2176 (void)vn_close(be_lun->vn, flags, NOCRED, curthread); 2177 be_lun->vn = NULL; 2178 2179 switch (be_lun->dev_type) { 2180 case CTL_BE_BLOCK_DEV: 2181 break; 2182 case CTL_BE_BLOCK_FILE: 2183 if (be_lun->backend.file.cred != NULL) { 2184 crfree(be_lun->backend.file.cred); 2185 be_lun->backend.file.cred = NULL; 2186 } 2187 break; 2188 case CTL_BE_BLOCK_NONE: 2189 break; 2190 default: 2191 panic("Unexpected backend type %d", be_lun->dev_type); 2192 break; 2193 } 2194 be_lun->dev_type = CTL_BE_BLOCK_NONE; 2195 } 2196 return (0); 2197 } 2198 2199 static int 2200 ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 2201 { 2202 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2203 struct nameidata nd; 2204 const char *value; 2205 int error, flags; 2206 2207 error = 0; 2208 if (rootvnode == NULL) { 2209 snprintf(req->error_str, sizeof(req->error_str), 2210 "Root filesystem is not mounted"); 2211 return (1); 2212 } 2213 pwd_ensure_dirs(); 2214 2215 value = dnvlist_get_string(cbe_lun->options, "file", NULL); 2216 if (value == NULL) { 2217 snprintf(req->error_str, sizeof(req->error_str), 2218 "no file argument specified"); 2219 return (1); 2220 } 2221 free(be_lun->dev_path, M_CTLBLK); 2222 be_lun->dev_path = strdup(value, M_CTLBLK); 2223 2224 flags = FREAD; 2225 value = dnvlist_get_string(cbe_lun->options, "readonly", NULL); 2226 if (value != NULL) { 2227 if (strcmp(value, "on") != 0) 2228 flags |= FWRITE; 2229 } else if (cbe_lun->lun_type == T_DIRECT) 2230 flags |= FWRITE; 2231 2232 again: 2233 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path); 2234 error = vn_open(&nd, &flags, 0, NULL); 2235 if ((error == EROFS || error == EACCES) && (flags & FWRITE)) { 2236 flags &= ~FWRITE; 2237 goto again; 2238 } 2239 if (error) { 2240 /* 2241 * This is the only reasonable guess we can make as far as 2242 * path if the user doesn't give us a fully qualified path. 2243 * If they want to specify a file, they need to specify the 2244 * full path. 2245 */ 2246 if (be_lun->dev_path[0] != '/') { 2247 char *dev_name; 2248 2249 asprintf(&dev_name, M_CTLBLK, "/dev/%s", 2250 be_lun->dev_path); 2251 free(be_lun->dev_path, M_CTLBLK); 2252 be_lun->dev_path = dev_name; 2253 goto again; 2254 } 2255 snprintf(req->error_str, sizeof(req->error_str), 2256 "error opening %s: %d", be_lun->dev_path, error); 2257 return (error); 2258 } 2259 if (flags & FWRITE) 2260 cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY; 2261 else 2262 cbe_lun->flags |= CTL_LUN_FLAG_READONLY; 2263 2264 NDFREE(&nd, NDF_ONLY_PNBUF); 2265 be_lun->vn = nd.ni_vp; 2266 2267 /* We only support disks and files. */ 2268 if (vn_isdisk_error(be_lun->vn, &error)) { 2269 error = ctl_be_block_open_dev(be_lun, req); 2270 } else if (be_lun->vn->v_type == VREG) { 2271 error = ctl_be_block_open_file(be_lun, req); 2272 } else { 2273 error = EINVAL; 2274 snprintf(req->error_str, sizeof(req->error_str), 2275 "%s is not a disk or plain file", be_lun->dev_path); 2276 } 2277 VOP_UNLOCK(be_lun->vn); 2278 2279 if (error != 0) 2280 ctl_be_block_close(be_lun); 2281 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2282 if (be_lun->dispatch != ctl_be_block_dispatch_dev) 2283 cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT; 2284 value = dnvlist_get_string(cbe_lun->options, "serseq", NULL); 2285 if (value != NULL && strcmp(value, "on") == 0) 2286 cbe_lun->serseq = CTL_LUN_SERSEQ_ON; 2287 else if (value != NULL && strcmp(value, "read") == 0) 2288 cbe_lun->serseq = CTL_LUN_SERSEQ_READ; 2289 else if (value != NULL && strcmp(value, "soft") == 0) 2290 cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT; 2291 else if (value != NULL && strcmp(value, "off") == 0) 2292 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2293 return (0); 2294 } 2295 2296 static int 2297 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2298 { 2299 struct ctl_be_lun *cbe_lun; 2300 struct ctl_be_block_lun *be_lun; 2301 struct ctl_lun_create_params *params; 2302 char num_thread_str[16]; 2303 char tmpstr[32]; 2304 const char *value; 2305 int retval, num_threads; 2306 int tmp_num_threads; 2307 2308 params = &req->reqdata.create; 2309 retval = 0; 2310 req->status = CTL_LUN_OK; 2311 2312 be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK); 2313 cbe_lun = &be_lun->cbe_lun; 2314 be_lun->params = req->reqdata.create; 2315 be_lun->softc = softc; 2316 STAILQ_INIT(&be_lun->input_queue); 2317 STAILQ_INIT(&be_lun->config_read_queue); 2318 STAILQ_INIT(&be_lun->config_write_queue); 2319 STAILQ_INIT(&be_lun->datamove_queue); 2320 mtx_init(&be_lun->io_lock, "ctlblock io", NULL, MTX_DEF); 2321 mtx_init(&be_lun->queue_lock, "ctlblock queue", NULL, MTX_DEF); 2322 cbe_lun->options = nvlist_clone(req->args_nvl); 2323 2324 if (params->flags & CTL_LUN_FLAG_DEV_TYPE) 2325 cbe_lun->lun_type = params->device_type; 2326 else 2327 cbe_lun->lun_type = T_DIRECT; 2328 be_lun->flags = 0; 2329 cbe_lun->flags = 0; 2330 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL); 2331 if (value != NULL) { 2332 if (strcmp(value, "primary") == 0) 2333 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2334 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2335 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2336 2337 if (cbe_lun->lun_type == T_DIRECT || 2338 cbe_lun->lun_type == T_CDROM) { 2339 be_lun->size_bytes = params->lun_size_bytes; 2340 if (params->blocksize_bytes != 0) 2341 cbe_lun->blocksize = params->blocksize_bytes; 2342 else if (cbe_lun->lun_type == T_CDROM) 2343 cbe_lun->blocksize = 2048; 2344 else 2345 cbe_lun->blocksize = 512; 2346 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2347 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2348 0 : (be_lun->size_blocks - 1); 2349 2350 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2351 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2352 retval = ctl_be_block_open(be_lun, req); 2353 if (retval != 0) { 2354 retval = 0; 2355 req->status = CTL_LUN_WARNING; 2356 } 2357 } 2358 num_threads = cbb_num_threads; 2359 } else { 2360 num_threads = 1; 2361 } 2362 2363 value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL); 2364 if (value != NULL) { 2365 tmp_num_threads = strtol(value, NULL, 0); 2366 2367 /* 2368 * We don't let the user specify less than one 2369 * thread, but hope he's clueful enough not to 2370 * specify 1000 threads. 2371 */ 2372 if (tmp_num_threads < 1) { 2373 snprintf(req->error_str, sizeof(req->error_str), 2374 "invalid number of threads %s", 2375 num_thread_str); 2376 goto bailout_error; 2377 } 2378 num_threads = tmp_num_threads; 2379 } 2380 2381 if (be_lun->vn == NULL) 2382 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2383 /* Tell the user the blocksize we ended up using */ 2384 params->lun_size_bytes = be_lun->size_bytes; 2385 params->blocksize_bytes = cbe_lun->blocksize; 2386 if (params->flags & CTL_LUN_FLAG_ID_REQ) { 2387 cbe_lun->req_lun_id = params->req_lun_id; 2388 cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ; 2389 } else 2390 cbe_lun->req_lun_id = 0; 2391 2392 cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown; 2393 cbe_lun->be = &ctl_be_block_driver; 2394 2395 if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) { 2396 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d", 2397 softc->num_luns); 2398 strncpy((char *)cbe_lun->serial_num, tmpstr, 2399 MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr))); 2400 2401 /* Tell the user what we used for a serial number */ 2402 strncpy((char *)params->serial_num, tmpstr, 2403 MIN(sizeof(params->serial_num), sizeof(tmpstr))); 2404 } else { 2405 strncpy((char *)cbe_lun->serial_num, params->serial_num, 2406 MIN(sizeof(cbe_lun->serial_num), 2407 sizeof(params->serial_num))); 2408 } 2409 if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) { 2410 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns); 2411 strncpy((char *)cbe_lun->device_id, tmpstr, 2412 MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr))); 2413 2414 /* Tell the user what we used for a device ID */ 2415 strncpy((char *)params->device_id, tmpstr, 2416 MIN(sizeof(params->device_id), sizeof(tmpstr))); 2417 } else { 2418 strncpy((char *)cbe_lun->device_id, params->device_id, 2419 MIN(sizeof(cbe_lun->device_id), 2420 sizeof(params->device_id))); 2421 } 2422 2423 TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun); 2424 2425 be_lun->io_taskqueue = taskqueue_create("ctlblocktq", M_WAITOK, 2426 taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue); 2427 2428 if (be_lun->io_taskqueue == NULL) { 2429 snprintf(req->error_str, sizeof(req->error_str), 2430 "unable to create taskqueue"); 2431 goto bailout_error; 2432 } 2433 2434 /* 2435 * Note that we start the same number of threads by default for 2436 * both the file case and the block device case. For the file 2437 * case, we need multiple threads to allow concurrency, because the 2438 * vnode interface is designed to be a blocking interface. For the 2439 * block device case, ZFS zvols at least will block the caller's 2440 * context in many instances, and so we need multiple threads to 2441 * overcome that problem. Other block devices don't need as many 2442 * threads, but they shouldn't cause too many problems. 2443 * 2444 * If the user wants to just have a single thread for a block 2445 * device, he can specify that when the LUN is created, or change 2446 * the tunable/sysctl to alter the default number of threads. 2447 */ 2448 retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue, 2449 /*num threads*/num_threads, 2450 /*priority*/PUSER, 2451 /*proc*/control_softc->ctl_proc, 2452 /*thread name*/"block"); 2453 2454 if (retval != 0) 2455 goto bailout_error; 2456 2457 be_lun->num_threads = num_threads; 2458 2459 retval = ctl_add_lun(&be_lun->cbe_lun); 2460 if (retval != 0) { 2461 snprintf(req->error_str, sizeof(req->error_str), 2462 "ctl_add_lun() returned error %d, see dmesg for " 2463 "details", retval); 2464 retval = 0; 2465 goto bailout_error; 2466 } 2467 2468 be_lun->disk_stats = devstat_new_entry("cbb", cbe_lun->lun_id, 2469 cbe_lun->blocksize, 2470 DEVSTAT_ALL_SUPPORTED, 2471 cbe_lun->lun_type 2472 | DEVSTAT_TYPE_IF_OTHER, 2473 DEVSTAT_PRIORITY_OTHER); 2474 2475 mtx_lock(&softc->lock); 2476 softc->num_luns++; 2477 SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links); 2478 mtx_unlock(&softc->lock); 2479 2480 params->req_lun_id = cbe_lun->lun_id; 2481 2482 return (retval); 2483 2484 bailout_error: 2485 req->status = CTL_LUN_ERROR; 2486 2487 if (be_lun->io_taskqueue != NULL) 2488 taskqueue_free(be_lun->io_taskqueue); 2489 ctl_be_block_close(be_lun); 2490 if (be_lun->dev_path != NULL) 2491 free(be_lun->dev_path, M_CTLBLK); 2492 nvlist_destroy(cbe_lun->options); 2493 mtx_destroy(&be_lun->queue_lock); 2494 mtx_destroy(&be_lun->io_lock); 2495 free(be_lun, M_CTLBLK); 2496 2497 return (retval); 2498 } 2499 2500 static int 2501 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2502 { 2503 struct ctl_lun_rm_params *params; 2504 struct ctl_be_block_lun *be_lun; 2505 struct ctl_be_lun *cbe_lun; 2506 int retval; 2507 2508 params = &req->reqdata.rm; 2509 2510 sx_xlock(&softc->modify_lock); 2511 mtx_lock(&softc->lock); 2512 SLIST_FOREACH(be_lun, &softc->lun_list, links) { 2513 if (be_lun->cbe_lun.lun_id == params->lun_id) { 2514 SLIST_REMOVE(&softc->lun_list, be_lun, 2515 ctl_be_block_lun, links); 2516 softc->num_luns--; 2517 break; 2518 } 2519 } 2520 mtx_unlock(&softc->lock); 2521 sx_xunlock(&softc->modify_lock); 2522 if (be_lun == NULL) { 2523 snprintf(req->error_str, sizeof(req->error_str), 2524 "LUN %u is not managed by the block backend", 2525 params->lun_id); 2526 goto bailout_error; 2527 } 2528 cbe_lun = &be_lun->cbe_lun; 2529 2530 if (be_lun->vn != NULL) { 2531 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2532 ctl_lun_no_media(cbe_lun); 2533 taskqueue_drain_all(be_lun->io_taskqueue); 2534 ctl_be_block_close(be_lun); 2535 } 2536 2537 mtx_lock(&softc->lock); 2538 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2539 mtx_unlock(&softc->lock); 2540 2541 retval = ctl_remove_lun(cbe_lun); 2542 if (retval != 0) { 2543 snprintf(req->error_str, sizeof(req->error_str), 2544 "error %d returned from ctl_remove_lun() for " 2545 "LUN %d", retval, params->lun_id); 2546 mtx_lock(&softc->lock); 2547 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2548 mtx_unlock(&softc->lock); 2549 goto bailout_error; 2550 } 2551 2552 mtx_lock(&softc->lock); 2553 while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2554 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblockrm", 0); 2555 if (retval == EINTR) 2556 break; 2557 } 2558 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2559 if (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) { 2560 mtx_unlock(&softc->lock); 2561 free(be_lun, M_CTLBLK); 2562 } else { 2563 mtx_unlock(&softc->lock); 2564 return (EINTR); 2565 } 2566 2567 req->status = CTL_LUN_OK; 2568 return (0); 2569 2570 bailout_error: 2571 req->status = CTL_LUN_ERROR; 2572 return (0); 2573 } 2574 2575 static int 2576 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2577 { 2578 struct ctl_lun_modify_params *params; 2579 struct ctl_be_block_lun *be_lun; 2580 struct ctl_be_lun *cbe_lun; 2581 const char *value; 2582 uint64_t oldsize; 2583 int error, wasprim; 2584 2585 params = &req->reqdata.modify; 2586 2587 sx_xlock(&softc->modify_lock); 2588 mtx_lock(&softc->lock); 2589 SLIST_FOREACH(be_lun, &softc->lun_list, links) { 2590 if (be_lun->cbe_lun.lun_id == params->lun_id) 2591 break; 2592 } 2593 mtx_unlock(&softc->lock); 2594 if (be_lun == NULL) { 2595 snprintf(req->error_str, sizeof(req->error_str), 2596 "LUN %u is not managed by the block backend", 2597 params->lun_id); 2598 goto bailout_error; 2599 } 2600 cbe_lun = &be_lun->cbe_lun; 2601 2602 if (params->lun_size_bytes != 0) 2603 be_lun->params.lun_size_bytes = params->lun_size_bytes; 2604 2605 if (req->args_nvl != NULL) { 2606 nvlist_destroy(cbe_lun->options); 2607 cbe_lun->options = nvlist_clone(req->args_nvl); 2608 } 2609 2610 wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY); 2611 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL); 2612 if (value != NULL) { 2613 if (strcmp(value, "primary") == 0) 2614 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2615 else 2616 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2617 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2618 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2619 else 2620 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2621 if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) { 2622 if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) 2623 ctl_lun_primary(cbe_lun); 2624 else 2625 ctl_lun_secondary(cbe_lun); 2626 } 2627 2628 oldsize = be_lun->size_blocks; 2629 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2630 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2631 if (be_lun->vn == NULL) 2632 error = ctl_be_block_open(be_lun, req); 2633 else if (vn_isdisk_error(be_lun->vn, &error)) 2634 error = ctl_be_block_open_dev(be_lun, req); 2635 else if (be_lun->vn->v_type == VREG) { 2636 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 2637 error = ctl_be_block_open_file(be_lun, req); 2638 VOP_UNLOCK(be_lun->vn); 2639 } else 2640 error = EINVAL; 2641 if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) && 2642 be_lun->vn != NULL) { 2643 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; 2644 ctl_lun_has_media(cbe_lun); 2645 } else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 && 2646 be_lun->vn == NULL) { 2647 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2648 ctl_lun_no_media(cbe_lun); 2649 } 2650 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; 2651 } else { 2652 if (be_lun->vn != NULL) { 2653 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2654 ctl_lun_no_media(cbe_lun); 2655 taskqueue_drain_all(be_lun->io_taskqueue); 2656 error = ctl_be_block_close(be_lun); 2657 } else 2658 error = 0; 2659 } 2660 if (be_lun->size_blocks != oldsize) 2661 ctl_lun_capacity_changed(cbe_lun); 2662 2663 /* Tell the user the exact size we ended up using */ 2664 params->lun_size_bytes = be_lun->size_bytes; 2665 2666 sx_xunlock(&softc->modify_lock); 2667 req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK; 2668 return (0); 2669 2670 bailout_error: 2671 sx_xunlock(&softc->modify_lock); 2672 req->status = CTL_LUN_ERROR; 2673 return (0); 2674 } 2675 2676 static void 2677 ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun) 2678 { 2679 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)cbe_lun; 2680 struct ctl_be_block_softc *softc = be_lun->softc; 2681 2682 taskqueue_drain_all(be_lun->io_taskqueue); 2683 taskqueue_free(be_lun->io_taskqueue); 2684 if (be_lun->disk_stats != NULL) 2685 devstat_remove_entry(be_lun->disk_stats); 2686 nvlist_destroy(be_lun->cbe_lun.options); 2687 free(be_lun->dev_path, M_CTLBLK); 2688 mtx_destroy(&be_lun->queue_lock); 2689 mtx_destroy(&be_lun->io_lock); 2690 2691 mtx_lock(&softc->lock); 2692 be_lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED; 2693 if (be_lun->flags & CTL_BE_BLOCK_LUN_WAITING) 2694 wakeup(be_lun); 2695 else 2696 free(be_lun, M_CTLBLK); 2697 mtx_unlock(&softc->lock); 2698 } 2699 2700 static int 2701 ctl_be_block_config_write(union ctl_io *io) 2702 { 2703 struct ctl_be_block_lun *be_lun; 2704 struct ctl_be_lun *cbe_lun; 2705 int retval; 2706 2707 DPRINTF("entered\n"); 2708 2709 cbe_lun = CTL_BACKEND_LUN(io); 2710 be_lun = (struct ctl_be_block_lun *)cbe_lun; 2711 2712 retval = 0; 2713 switch (io->scsiio.cdb[0]) { 2714 case SYNCHRONIZE_CACHE: 2715 case SYNCHRONIZE_CACHE_16: 2716 case WRITE_SAME_10: 2717 case WRITE_SAME_16: 2718 case UNMAP: 2719 /* 2720 * The upper level CTL code will filter out any CDBs with 2721 * the immediate bit set and return the proper error. 2722 * 2723 * We don't really need to worry about what LBA range the 2724 * user asked to be synced out. When they issue a sync 2725 * cache command, we'll sync out the whole thing. 2726 */ 2727 mtx_lock(&be_lun->queue_lock); 2728 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, 2729 links); 2730 mtx_unlock(&be_lun->queue_lock); 2731 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 2732 break; 2733 case START_STOP_UNIT: { 2734 struct scsi_start_stop_unit *cdb; 2735 struct ctl_lun_req req; 2736 2737 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb; 2738 if ((cdb->how & SSS_PC_MASK) != 0) { 2739 ctl_set_success(&io->scsiio); 2740 ctl_config_write_done(io); 2741 break; 2742 } 2743 if (cdb->how & SSS_START) { 2744 if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) { 2745 retval = ctl_be_block_open(be_lun, &req); 2746 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; 2747 if (retval == 0) { 2748 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; 2749 ctl_lun_has_media(cbe_lun); 2750 } else { 2751 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2752 ctl_lun_no_media(cbe_lun); 2753 } 2754 } 2755 ctl_start_lun(cbe_lun); 2756 } else { 2757 ctl_stop_lun(cbe_lun); 2758 if (cdb->how & SSS_LOEJ) { 2759 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2760 cbe_lun->flags |= CTL_LUN_FLAG_EJECTED; 2761 ctl_lun_ejected(cbe_lun); 2762 if (be_lun->vn != NULL) 2763 ctl_be_block_close(be_lun); 2764 } 2765 } 2766 2767 ctl_set_success(&io->scsiio); 2768 ctl_config_write_done(io); 2769 break; 2770 } 2771 case PREVENT_ALLOW: 2772 ctl_set_success(&io->scsiio); 2773 ctl_config_write_done(io); 2774 break; 2775 default: 2776 ctl_set_invalid_opcode(&io->scsiio); 2777 ctl_config_write_done(io); 2778 retval = CTL_RETVAL_COMPLETE; 2779 break; 2780 } 2781 2782 return (retval); 2783 } 2784 2785 static int 2786 ctl_be_block_config_read(union ctl_io *io) 2787 { 2788 struct ctl_be_block_lun *be_lun; 2789 int retval = 0; 2790 2791 DPRINTF("entered\n"); 2792 2793 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 2794 2795 switch (io->scsiio.cdb[0]) { 2796 case SERVICE_ACTION_IN: 2797 if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) { 2798 mtx_lock(&be_lun->queue_lock); 2799 STAILQ_INSERT_TAIL(&be_lun->config_read_queue, 2800 &io->io_hdr, links); 2801 mtx_unlock(&be_lun->queue_lock); 2802 taskqueue_enqueue(be_lun->io_taskqueue, 2803 &be_lun->io_task); 2804 retval = CTL_RETVAL_QUEUED; 2805 break; 2806 } 2807 ctl_set_invalid_field(&io->scsiio, 2808 /*sks_valid*/ 1, 2809 /*command*/ 1, 2810 /*field*/ 1, 2811 /*bit_valid*/ 1, 2812 /*bit*/ 4); 2813 ctl_config_read_done(io); 2814 retval = CTL_RETVAL_COMPLETE; 2815 break; 2816 default: 2817 ctl_set_invalid_opcode(&io->scsiio); 2818 ctl_config_read_done(io); 2819 retval = CTL_RETVAL_COMPLETE; 2820 break; 2821 } 2822 2823 return (retval); 2824 } 2825 2826 static int 2827 ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb) 2828 { 2829 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun; 2830 int retval; 2831 2832 retval = sbuf_printf(sb, "\t<num_threads>"); 2833 if (retval != 0) 2834 goto bailout; 2835 retval = sbuf_printf(sb, "%d", lun->num_threads); 2836 if (retval != 0) 2837 goto bailout; 2838 retval = sbuf_printf(sb, "</num_threads>\n"); 2839 2840 bailout: 2841 return (retval); 2842 } 2843 2844 static uint64_t 2845 ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname) 2846 { 2847 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun; 2848 2849 if (lun->getattr == NULL) 2850 return (UINT64_MAX); 2851 return (lun->getattr(lun, attrname)); 2852 } 2853 2854 static int 2855 ctl_be_block_init(void) 2856 { 2857 struct ctl_be_block_softc *softc = &backend_block_softc; 2858 2859 sx_init(&softc->modify_lock, "ctlblock modify"); 2860 mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF); 2861 softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io), 2862 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2863 softc->bufmin_zone = uma_zcreate("ctlblockmin", CTLBLK_MIN_SEG, 2864 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 2865 if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG) 2866 softc->bufmax_zone = uma_zcreate("ctlblockmax", CTLBLK_MAX_SEG, 2867 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 2868 SLIST_INIT(&softc->lun_list); 2869 return (0); 2870 } 2871 2872 static int 2873 ctl_be_block_shutdown(void) 2874 { 2875 struct ctl_be_block_softc *softc = &backend_block_softc; 2876 struct ctl_be_block_lun *lun; 2877 2878 mtx_lock(&softc->lock); 2879 while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) { 2880 SLIST_REMOVE_HEAD(&softc->lun_list, links); 2881 softc->num_luns--; 2882 /* 2883 * Drop our lock here. Since ctl_remove_lun() can call 2884 * back into us, this could potentially lead to a recursive 2885 * lock of the same mutex, which would cause a hang. 2886 */ 2887 mtx_unlock(&softc->lock); 2888 ctl_remove_lun(&lun->cbe_lun); 2889 mtx_lock(&softc->lock); 2890 } 2891 mtx_unlock(&softc->lock); 2892 uma_zdestroy(softc->bufmin_zone); 2893 if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG) 2894 uma_zdestroy(softc->bufmax_zone); 2895 uma_zdestroy(softc->beio_zone); 2896 mtx_destroy(&softc->lock); 2897 sx_destroy(&softc->modify_lock); 2898 return (0); 2899 } 2900