1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2003 Silicon Graphics International Corp. 5 * Copyright (c) 2009-2011 Spectra Logic Corporation 6 * Copyright (c) 2012,2021 The FreeBSD Foundation 7 * Copyright (c) 2014-2021 Alexander Motin <mav@FreeBSD.org> 8 * All rights reserved. 9 * 10 * Portions of this software were developed by Edward Tomasz Napierala 11 * under sponsorship from the FreeBSD Foundation. 12 * 13 * Portions of this software were developed by Ka Ho Ng <khng@FreeBSD.org> 14 * under sponsorship from the FreeBSD Foundation. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions, and the following disclaimer, 21 * without modification. 22 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 23 * substantially similar to the "NO WARRANTY" disclaimer below 24 * ("Disclaimer") and any redistribution must be conditioned upon 25 * including a substantially similar Disclaimer requirement for further 26 * binary redistribution. 27 * 28 * NO WARRANTY 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 37 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 38 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 * POSSIBILITY OF SUCH DAMAGES. 40 * 41 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $ 42 */ 43 /* 44 * CAM Target Layer driver backend for block devices. 45 * 46 * Author: Ken Merry <ken@FreeBSD.org> 47 */ 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/kernel.h> 51 #include <sys/types.h> 52 #include <sys/kthread.h> 53 #include <sys/bio.h> 54 #include <sys/fcntl.h> 55 #include <sys/limits.h> 56 #include <sys/lock.h> 57 #include <sys/mutex.h> 58 #include <sys/condvar.h> 59 #include <sys/malloc.h> 60 #include <sys/conf.h> 61 #include <sys/ioccom.h> 62 #include <sys/queue.h> 63 #include <sys/sbuf.h> 64 #include <sys/endian.h> 65 #include <sys/uio.h> 66 #include <sys/buf.h> 67 #include <sys/taskqueue.h> 68 #include <sys/vnode.h> 69 #include <sys/namei.h> 70 #include <sys/mount.h> 71 #include <sys/disk.h> 72 #include <sys/fcntl.h> 73 #include <sys/filedesc.h> 74 #include <sys/filio.h> 75 #include <sys/proc.h> 76 #include <sys/pcpu.h> 77 #include <sys/module.h> 78 #include <sys/sdt.h> 79 #include <sys/devicestat.h> 80 #include <sys/sysctl.h> 81 #include <sys/nv.h> 82 #include <sys/dnv.h> 83 #include <sys/sx.h> 84 #include <sys/unistd.h> 85 86 #include <geom/geom.h> 87 88 #include <cam/cam.h> 89 #include <cam/scsi/scsi_all.h> 90 #include <cam/scsi/scsi_da.h> 91 #include <cam/ctl/ctl_io.h> 92 #include <cam/ctl/ctl.h> 93 #include <cam/ctl/ctl_backend.h> 94 #include <cam/ctl/ctl_ioctl.h> 95 #include <cam/ctl/ctl_ha.h> 96 #include <cam/ctl/ctl_scsi_all.h> 97 #include <cam/ctl/ctl_private.h> 98 #include <cam/ctl/ctl_error.h> 99 100 /* 101 * The idea here is to allocate enough S/G space to handle at least 1MB I/Os. 102 * On systems with small maxphys it can be 8 128KB segments. On large systems 103 * it can be up to 8 1MB segments. I/Os larger than that we'll split. 104 */ 105 #define CTLBLK_MAX_SEGS 8 106 #define CTLBLK_HALF_SEGS (CTLBLK_MAX_SEGS / 2) 107 #define CTLBLK_MIN_SEG (128 * 1024) 108 #define CTLBLK_MAX_SEG MIN(1024 * 1024, MAX(CTLBLK_MIN_SEG, maxphys)) 109 #define CTLBLK_MAX_IO_SIZE (CTLBLK_MAX_SEG * CTLBLK_MAX_SEGS) 110 111 #ifdef CTLBLK_DEBUG 112 #define DPRINTF(fmt, args...) \ 113 printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 114 #else 115 #define DPRINTF(fmt, args...) do {} while(0) 116 #endif 117 118 #define PRIV(io) \ 119 ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND]) 120 #define ARGS(io) \ 121 ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]) 122 #define DSM_RANGE(io) ((io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN].integer) 123 124 SDT_PROVIDER_DEFINE(cbb); 125 126 typedef enum { 127 CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01, 128 CTL_BE_BLOCK_LUN_WAITING = 0x04, 129 } ctl_be_block_lun_flags; 130 131 typedef enum { 132 CTL_BE_BLOCK_NONE, 133 CTL_BE_BLOCK_DEV, 134 CTL_BE_BLOCK_FILE 135 } ctl_be_block_type; 136 137 struct ctl_be_block_filedata { 138 struct ucred *cred; 139 }; 140 141 union ctl_be_block_bedata { 142 struct ctl_be_block_filedata file; 143 }; 144 145 struct ctl_be_block_io; 146 struct ctl_be_block_lun; 147 148 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun, 149 struct ctl_be_block_io *beio); 150 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun, 151 const char *attrname); 152 153 /* 154 * Backend LUN structure. There is a 1:1 mapping between a block device 155 * and a backend block LUN, and between a backend block LUN and a CTL LUN. 156 */ 157 struct ctl_be_block_lun { 158 struct ctl_be_lun cbe_lun; /* Must be first element. */ 159 struct ctl_lun_create_params params; 160 char *dev_path; 161 ctl_be_block_type dev_type; 162 struct vnode *vn; 163 union ctl_be_block_bedata backend; 164 cbb_dispatch_t dispatch; 165 cbb_dispatch_t lun_flush; 166 cbb_dispatch_t unmap; 167 cbb_dispatch_t get_lba_status; 168 cbb_getattr_t getattr; 169 uint64_t size_blocks; 170 uint64_t size_bytes; 171 struct ctl_be_block_softc *softc; 172 struct devstat *disk_stats; 173 ctl_be_block_lun_flags flags; 174 SLIST_ENTRY(ctl_be_block_lun) links; 175 struct taskqueue *io_taskqueue; 176 struct task io_task; 177 int num_threads; 178 STAILQ_HEAD(, ctl_io_hdr) input_queue; 179 STAILQ_HEAD(, ctl_io_hdr) config_read_queue; 180 STAILQ_HEAD(, ctl_io_hdr) config_write_queue; 181 STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 182 struct mtx_padalign io_lock; 183 struct mtx_padalign queue_lock; 184 }; 185 186 /* 187 * Overall softc structure for the block backend module. 188 */ 189 struct ctl_be_block_softc { 190 struct sx modify_lock; 191 struct mtx lock; 192 int num_luns; 193 SLIST_HEAD(, ctl_be_block_lun) lun_list; 194 uma_zone_t beio_zone; 195 uma_zone_t bufmin_zone; 196 uma_zone_t bufmax_zone; 197 }; 198 199 static struct ctl_be_block_softc backend_block_softc; 200 201 /* 202 * Per-I/O information. 203 */ 204 struct ctl_be_block_io { 205 union ctl_io *io; 206 struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS]; 207 struct iovec xiovecs[CTLBLK_MAX_SEGS]; 208 int refcnt; 209 int bio_cmd; 210 int two_sglists; 211 int num_segs; 212 int num_bios_sent; 213 int num_bios_done; 214 int send_complete; 215 int first_error; 216 uint64_t first_error_offset; 217 struct bintime ds_t0; 218 devstat_tag_type ds_tag_type; 219 devstat_trans_flags ds_trans_type; 220 uint64_t io_len; 221 uint64_t io_offset; 222 int io_arg; 223 struct ctl_be_block_softc *softc; 224 struct ctl_be_block_lun *lun; 225 void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */ 226 }; 227 228 static int cbb_num_threads = 32; 229 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 230 "CAM Target Layer Block Backend"); 231 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN, 232 &cbb_num_threads, 0, "Number of threads per backing file"); 233 234 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc); 235 static void ctl_free_beio(struct ctl_be_block_io *beio); 236 static void ctl_complete_beio(struct ctl_be_block_io *beio); 237 static int ctl_be_block_move_done(union ctl_io *io, bool samethr); 238 static void ctl_be_block_biodone(struct bio *bio); 239 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 240 struct ctl_be_block_io *beio); 241 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 242 struct ctl_be_block_io *beio); 243 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 244 struct ctl_be_block_io *beio); 245 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, 246 const char *attrname); 247 static void ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun, 248 struct ctl_be_block_io *beio); 249 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 250 struct ctl_be_block_io *beio); 251 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 252 struct ctl_be_block_io *beio); 253 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 254 struct ctl_be_block_io *beio); 255 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, 256 const char *attrname); 257 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 258 union ctl_io *io); 259 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 260 union ctl_io *io); 261 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 262 union ctl_io *io); 263 static void ctl_be_block_worker(void *context, int pending); 264 static int ctl_be_block_submit(union ctl_io *io); 265 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 266 int flag, struct thread *td); 267 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, 268 struct ctl_lun_req *req); 269 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, 270 struct ctl_lun_req *req); 271 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun); 272 static int ctl_be_block_open(struct ctl_be_block_lun *be_lun, 273 struct ctl_lun_req *req); 274 static int ctl_be_block_create(struct ctl_be_block_softc *softc, 275 struct ctl_lun_req *req); 276 static int ctl_be_block_rm(struct ctl_be_block_softc *softc, 277 struct ctl_lun_req *req); 278 static int ctl_be_block_modify(struct ctl_be_block_softc *softc, 279 struct ctl_lun_req *req); 280 static void ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun); 281 static int ctl_be_block_config_write(union ctl_io *io); 282 static int ctl_be_block_config_read(union ctl_io *io); 283 static int ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb); 284 static uint64_t ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname); 285 static int ctl_be_block_init(void); 286 static int ctl_be_block_shutdown(void); 287 288 static struct ctl_backend_driver ctl_be_block_driver = 289 { 290 .name = "block", 291 .flags = CTL_BE_FLAG_HAS_CONFIG, 292 .init = ctl_be_block_init, 293 .shutdown = ctl_be_block_shutdown, 294 .data_submit = ctl_be_block_submit, 295 .config_read = ctl_be_block_config_read, 296 .config_write = ctl_be_block_config_write, 297 .ioctl = ctl_be_block_ioctl, 298 .lun_info = ctl_be_block_lun_info, 299 .lun_attr = ctl_be_block_lun_attr 300 }; 301 302 MALLOC_DEFINE(M_CTLBLK, "ctlblock", "Memory used for CTL block backend"); 303 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver); 304 305 static void 306 ctl_alloc_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg, 307 size_t len) 308 { 309 310 if (len <= CTLBLK_MIN_SEG) { 311 sg->addr = uma_zalloc(softc->bufmin_zone, M_WAITOK); 312 } else { 313 KASSERT(len <= CTLBLK_MAX_SEG, 314 ("Too large alloc %zu > %lu", len, CTLBLK_MAX_SEG)); 315 sg->addr = uma_zalloc(softc->bufmax_zone, M_WAITOK); 316 } 317 sg->len = len; 318 } 319 320 static void 321 ctl_free_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg) 322 { 323 324 if (sg->len <= CTLBLK_MIN_SEG) { 325 uma_zfree(softc->bufmin_zone, sg->addr); 326 } else { 327 KASSERT(sg->len <= CTLBLK_MAX_SEG, 328 ("Too large free %zu > %lu", sg->len, CTLBLK_MAX_SEG)); 329 uma_zfree(softc->bufmax_zone, sg->addr); 330 } 331 } 332 333 static struct ctl_be_block_io * 334 ctl_alloc_beio(struct ctl_be_block_softc *softc) 335 { 336 struct ctl_be_block_io *beio; 337 338 beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO); 339 beio->softc = softc; 340 beio->refcnt = 1; 341 return (beio); 342 } 343 344 static void 345 ctl_real_free_beio(struct ctl_be_block_io *beio) 346 { 347 struct ctl_be_block_softc *softc = beio->softc; 348 int i; 349 350 for (i = 0; i < beio->num_segs; i++) { 351 ctl_free_seg(softc, &beio->sg_segs[i]); 352 353 /* For compare we had two equal S/G lists. */ 354 if (beio->two_sglists) { 355 ctl_free_seg(softc, 356 &beio->sg_segs[i + CTLBLK_HALF_SEGS]); 357 } 358 } 359 360 uma_zfree(softc->beio_zone, beio); 361 } 362 363 static void 364 ctl_refcnt_beio(void *arg, int diff) 365 { 366 struct ctl_be_block_io *beio = arg; 367 368 if (atomic_fetchadd_int(&beio->refcnt, diff) + diff == 0) 369 ctl_real_free_beio(beio); 370 } 371 372 static void 373 ctl_free_beio(struct ctl_be_block_io *beio) 374 { 375 376 ctl_refcnt_beio(beio, -1); 377 } 378 379 static void 380 ctl_complete_beio(struct ctl_be_block_io *beio) 381 { 382 union ctl_io *io = beio->io; 383 384 if (beio->beio_cont != NULL) { 385 beio->beio_cont(beio); 386 } else { 387 ctl_free_beio(beio); 388 ctl_data_submit_done(io); 389 } 390 } 391 392 static void 393 ctl_be_block_io_error(union ctl_io *io, int bio_cmd, uint16_t retry_count) 394 { 395 switch (io->io_hdr.io_type) { 396 case CTL_IO_SCSI: 397 if (bio_cmd == BIO_FLUSH) { 398 /* XXX KDM is there is a better error here? */ 399 ctl_set_internal_failure(&io->scsiio, 400 /*sks_valid*/ 1, 401 retry_count); 402 } else { 403 ctl_set_medium_error(&io->scsiio, bio_cmd == BIO_READ); 404 } 405 break; 406 case CTL_IO_NVME: 407 switch (bio_cmd) { 408 case BIO_FLUSH: 409 case BIO_WRITE: 410 ctl_nvme_set_write_fault(&io->nvmeio); 411 break; 412 case BIO_READ: 413 ctl_nvme_set_unrecoverable_read_error(&io->nvmeio); 414 break; 415 default: 416 ctl_nvme_set_internal_error(&io->nvmeio); 417 break; 418 } 419 break; 420 default: 421 __assert_unreachable(); 422 } 423 } 424 425 static size_t 426 cmp(uint8_t *a, uint8_t *b, size_t size) 427 { 428 size_t i; 429 430 for (i = 0; i < size; i++) { 431 if (a[i] != b[i]) 432 break; 433 } 434 return (i); 435 } 436 437 static void 438 ctl_be_block_compare(union ctl_io *io) 439 { 440 struct ctl_be_block_io *beio; 441 uint64_t off, res; 442 int i; 443 444 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 445 off = 0; 446 for (i = 0; i < beio->num_segs; i++) { 447 res = cmp(beio->sg_segs[i].addr, 448 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr, 449 beio->sg_segs[i].len); 450 off += res; 451 if (res < beio->sg_segs[i].len) 452 break; 453 } 454 if (i < beio->num_segs) { 455 ctl_io_set_compare_failure(io, off); 456 } else 457 ctl_io_set_success(io); 458 } 459 460 static int 461 ctl_be_block_move_done(union ctl_io *io, bool samethr) 462 { 463 struct ctl_be_block_io *beio; 464 struct ctl_be_block_lun *be_lun; 465 struct ctl_lba_len_flags *lbalen; 466 467 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 468 469 DPRINTF("entered\n"); 470 ctl_add_kern_rel_offset(io, ctl_kern_data_len(io)); 471 472 /* 473 * We set status at this point for read and compare commands. 474 */ 475 if ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0 && 476 (io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE) { 477 lbalen = ARGS(io); 478 if (lbalen->flags & CTL_LLF_READ) { 479 ctl_io_set_success(io); 480 } else if (lbalen->flags & CTL_LLF_COMPARE) { 481 /* We have two data blocks ready for comparison. */ 482 ctl_be_block_compare(io); 483 } 484 } 485 486 /* 487 * If this is a read, or a write with errors, it is done. 488 */ 489 if ((beio->bio_cmd == BIO_READ) 490 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0) 491 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) { 492 ctl_complete_beio(beio); 493 return (0); 494 } 495 496 /* 497 * At this point, we have a write and the DMA completed successfully. 498 * If we were called synchronously in the original thread then just 499 * dispatch, otherwise we now have to queue it to the task queue to 500 * execute the backend I/O. That is because we do blocking 501 * memory allocations, and in the file backing case, blocking I/O. 502 * This move done routine is generally called in the SIM's 503 * interrupt context, and therefore we cannot block. 504 */ 505 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 506 if (samethr) { 507 be_lun->dispatch(be_lun, beio); 508 } else { 509 mtx_lock(&be_lun->queue_lock); 510 STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links); 511 mtx_unlock(&be_lun->queue_lock); 512 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 513 } 514 return (0); 515 } 516 517 static void 518 ctl_be_block_biodone(struct bio *bio) 519 { 520 struct ctl_be_block_io *beio = bio->bio_caller1; 521 struct ctl_be_block_lun *be_lun = beio->lun; 522 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 523 union ctl_io *io; 524 int error; 525 526 io = beio->io; 527 528 DPRINTF("entered\n"); 529 530 error = bio->bio_error; 531 mtx_lock(&be_lun->io_lock); 532 if (error != 0 && 533 (beio->first_error == 0 || 534 bio->bio_offset < beio->first_error_offset)) { 535 beio->first_error = error; 536 beio->first_error_offset = bio->bio_offset; 537 } 538 539 beio->num_bios_done++; 540 541 /* 542 * XXX KDM will this cause WITNESS to complain? Holding a lock 543 * during the free might cause it to complain. 544 */ 545 g_destroy_bio(bio); 546 547 /* 548 * If the send complete bit isn't set, or we aren't the last I/O to 549 * complete, then we're done. 550 */ 551 if ((beio->send_complete == 0) 552 || (beio->num_bios_done < beio->num_bios_sent)) { 553 mtx_unlock(&be_lun->io_lock); 554 return; 555 } 556 557 /* 558 * At this point, we've verified that we are the last I/O to 559 * complete, so it's safe to drop the lock. 560 */ 561 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 562 beio->ds_tag_type, beio->ds_trans_type, 563 /*now*/ NULL, /*then*/&beio->ds_t0); 564 mtx_unlock(&be_lun->io_lock); 565 566 /* 567 * If there are any errors from the backing device, we fail the 568 * entire I/O with a medium error. 569 */ 570 error = beio->first_error; 571 if (error != 0) { 572 if (error == EOPNOTSUPP) { 573 ctl_io_set_invalid_opcode(io); 574 } else if (error == ENOSPC || error == EDQUOT) { 575 ctl_io_set_space_alloc_fail(io); 576 } else if (error == EROFS || error == EACCES) { 577 ctl_io_set_hw_write_protected(io); 578 } else { 579 ctl_be_block_io_error(io, beio->bio_cmd, 580 /*retry_count*/ 0xbad2); 581 } 582 ctl_complete_beio(beio); 583 return; 584 } 585 586 /* 587 * If this is a write, a flush, a delete or verify, we're all done. 588 * If this is a read, we can now send the data to the user. 589 */ 590 if ((beio->bio_cmd == BIO_WRITE) 591 || (beio->bio_cmd == BIO_FLUSH) 592 || (beio->bio_cmd == BIO_DELETE) 593 || (ARGS(io)->flags & CTL_LLF_VERIFY)) { 594 ctl_io_set_success(io); 595 ctl_complete_beio(beio); 596 } else { 597 if ((ARGS(io)->flags & CTL_LLF_READ) && 598 beio->beio_cont == NULL) { 599 ctl_io_set_success(io); 600 if (cbe_lun->serseq >= CTL_LUN_SERSEQ_SOFT) 601 ctl_serseq_done(io); 602 } 603 ctl_datamove(io); 604 } 605 } 606 607 static void 608 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 609 struct ctl_be_block_io *beio) 610 { 611 union ctl_io *io = beio->io; 612 struct mount *mountpoint; 613 int error; 614 615 DPRINTF("entered\n"); 616 617 binuptime(&beio->ds_t0); 618 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 619 620 (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 621 622 vn_lock(be_lun->vn, vn_lktype_write(mountpoint, be_lun->vn) | 623 LK_RETRY); 624 error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT, 625 curthread); 626 VOP_UNLOCK(be_lun->vn); 627 628 vn_finished_write(mountpoint); 629 630 mtx_lock(&be_lun->io_lock); 631 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 632 beio->ds_tag_type, beio->ds_trans_type, 633 /*now*/ NULL, /*then*/&beio->ds_t0); 634 mtx_unlock(&be_lun->io_lock); 635 636 if (error == 0) 637 ctl_io_set_success(io); 638 else { 639 ctl_be_block_io_error(io, BIO_FLUSH, 640 /*retry_count*/ 0xbad1); 641 } 642 643 ctl_complete_beio(beio); 644 } 645 646 SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t"); 647 SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t"); 648 SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t"); 649 SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t"); 650 651 static void 652 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 653 struct ctl_be_block_io *beio) 654 { 655 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 656 struct ctl_be_block_filedata *file_data; 657 union ctl_io *io; 658 struct uio xuio; 659 struct iovec *xiovec; 660 size_t s; 661 int error, flags, i; 662 663 DPRINTF("entered\n"); 664 665 file_data = &be_lun->backend.file; 666 io = beio->io; 667 flags = 0; 668 if (ARGS(io)->flags & CTL_LLF_DPO) 669 flags |= IO_DIRECT; 670 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 671 flags |= IO_SYNC; 672 673 bzero(&xuio, sizeof(xuio)); 674 if (beio->bio_cmd == BIO_READ) { 675 SDT_PROBE0(cbb, , read, file_start); 676 xuio.uio_rw = UIO_READ; 677 } else { 678 SDT_PROBE0(cbb, , write, file_start); 679 xuio.uio_rw = UIO_WRITE; 680 } 681 xuio.uio_offset = beio->io_offset; 682 xuio.uio_resid = beio->io_len; 683 xuio.uio_segflg = UIO_SYSSPACE; 684 xuio.uio_iov = beio->xiovecs; 685 xuio.uio_iovcnt = beio->num_segs; 686 xuio.uio_td = curthread; 687 688 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 689 xiovec->iov_base = beio->sg_segs[i].addr; 690 xiovec->iov_len = beio->sg_segs[i].len; 691 } 692 693 binuptime(&beio->ds_t0); 694 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 695 696 if (beio->bio_cmd == BIO_READ) { 697 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 698 699 if (beio->beio_cont == NULL && 700 cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT) 701 ctl_serseq_done(io); 702 /* 703 * UFS pays attention to IO_DIRECT for reads. If the 704 * DIRECTIO option is configured into the kernel, it calls 705 * ffs_rawread(). But that only works for single-segment 706 * uios with user space addresses. In our case, with a 707 * kernel uio, it still reads into the buffer cache, but it 708 * will just try to release the buffer from the cache later 709 * on in ffs_read(). 710 * 711 * ZFS does not pay attention to IO_DIRECT for reads. 712 * 713 * UFS does not pay attention to IO_SYNC for reads. 714 * 715 * ZFS pays attention to IO_SYNC (which translates into the 716 * Solaris define FRSYNC for zfs_read()) for reads. It 717 * attempts to sync the file before reading. 718 */ 719 error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred); 720 721 VOP_UNLOCK(be_lun->vn); 722 SDT_PROBE0(cbb, , read, file_done); 723 if (error == 0 && xuio.uio_resid > 0) { 724 /* 725 * If we read less then requested (EOF), then 726 * we should clean the rest of the buffer. 727 */ 728 s = beio->io_len - xuio.uio_resid; 729 for (i = 0; i < beio->num_segs; i++) { 730 if (s >= beio->sg_segs[i].len) { 731 s -= beio->sg_segs[i].len; 732 continue; 733 } 734 bzero((uint8_t *)beio->sg_segs[i].addr + s, 735 beio->sg_segs[i].len - s); 736 s = 0; 737 } 738 } 739 } else { 740 struct mount *mountpoint; 741 742 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 743 vn_lock(be_lun->vn, vn_lktype_write(mountpoint, 744 be_lun->vn) | LK_RETRY); 745 746 /* 747 * UFS pays attention to IO_DIRECT for writes. The write 748 * is done asynchronously. (Normally the write would just 749 * get put into cache. 750 * 751 * UFS pays attention to IO_SYNC for writes. It will 752 * attempt to write the buffer out synchronously if that 753 * flag is set. 754 * 755 * ZFS does not pay attention to IO_DIRECT for writes. 756 * 757 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) 758 * for writes. It will flush the transaction from the 759 * cache before returning. 760 */ 761 error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred); 762 VOP_UNLOCK(be_lun->vn); 763 764 vn_finished_write(mountpoint); 765 SDT_PROBE0(cbb, , write, file_done); 766 } 767 768 mtx_lock(&be_lun->io_lock); 769 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 770 beio->ds_tag_type, beio->ds_trans_type, 771 /*now*/ NULL, /*then*/&beio->ds_t0); 772 mtx_unlock(&be_lun->io_lock); 773 774 /* 775 * If we got an error, set the sense data to "MEDIUM ERROR" and 776 * return the I/O to the user. 777 */ 778 if (error != 0) { 779 if (error == ENOSPC || error == EDQUOT) { 780 ctl_io_set_space_alloc_fail(io); 781 } else if (error == EROFS || error == EACCES) { 782 ctl_io_set_hw_write_protected(io); 783 } else { 784 ctl_be_block_io_error(io, beio->bio_cmd, 0); 785 } 786 ctl_complete_beio(beio); 787 return; 788 } 789 790 /* 791 * If this is a write or a verify, we're all done. 792 * If this is a read, we can now send the data to the user. 793 */ 794 if ((beio->bio_cmd == BIO_WRITE) || 795 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 796 ctl_io_set_success(io); 797 ctl_complete_beio(beio); 798 } else { 799 if ((ARGS(io)->flags & CTL_LLF_READ) && 800 beio->beio_cont == NULL) { 801 ctl_io_set_success(io); 802 if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT) 803 ctl_serseq_done(io); 804 } 805 ctl_datamove(io); 806 } 807 } 808 809 static void 810 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 811 struct ctl_be_block_io *beio) 812 { 813 union ctl_io *io = beio->io; 814 struct ctl_lba_len_flags *lbalen = ARGS(io); 815 struct scsi_get_lba_status_data *data; 816 off_t roff, off; 817 int error, status; 818 819 DPRINTF("entered\n"); 820 821 CTL_IO_ASSERT(io, SCSI); 822 823 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 824 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 825 error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off, 826 0, curthread->td_ucred, curthread); 827 if (error == 0 && off > roff) 828 status = 0; /* mapped up to off */ 829 else { 830 error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off, 831 0, curthread->td_ucred, curthread); 832 if (error == 0 && off > roff) 833 status = 1; /* deallocated up to off */ 834 else { 835 status = 0; /* unknown up to the end */ 836 off = be_lun->size_bytes; 837 } 838 } 839 VOP_UNLOCK(be_lun->vn); 840 841 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 842 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 843 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 844 lbalen->lba), data->descr[0].length); 845 data->descr[0].status = status; 846 847 ctl_complete_beio(beio); 848 } 849 850 static uint64_t 851 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname) 852 { 853 struct vattr vattr; 854 struct statfs statfs; 855 uint64_t val; 856 int error; 857 858 val = UINT64_MAX; 859 if (be_lun->vn == NULL) 860 return (val); 861 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 862 if (strcmp(attrname, "blocksused") == 0) { 863 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 864 if (error == 0) 865 val = vattr.va_bytes / be_lun->cbe_lun.blocksize; 866 } 867 if (strcmp(attrname, "blocksavail") == 0 && 868 !VN_IS_DOOMED(be_lun->vn)) { 869 error = VFS_STATFS(be_lun->vn->v_mount, &statfs); 870 if (error == 0) 871 val = statfs.f_bavail * statfs.f_bsize / 872 be_lun->cbe_lun.blocksize; 873 } 874 VOP_UNLOCK(be_lun->vn); 875 return (val); 876 } 877 878 static void 879 ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun, 880 struct ctl_be_block_io *beio) 881 { 882 struct ctl_be_block_filedata *file_data; 883 union ctl_io *io; 884 struct ctl_ptr_len_flags *ptrlen; 885 struct scsi_unmap_desc *buf, *end; 886 struct mount *mp; 887 off_t off, len; 888 int error; 889 890 io = beio->io; 891 file_data = &be_lun->backend.file; 892 mp = NULL; 893 error = 0; 894 895 binuptime(&beio->ds_t0); 896 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 897 898 (void)vn_start_write(be_lun->vn, &mp, V_WAIT); 899 vn_lock(be_lun->vn, vn_lktype_write(mp, be_lun->vn) | LK_RETRY); 900 if (beio->io_offset == -1) { 901 beio->io_len = 0; 902 ptrlen = (struct ctl_ptr_len_flags *) 903 &io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 904 buf = (struct scsi_unmap_desc *)ptrlen->ptr; 905 end = buf + ptrlen->len / sizeof(*buf); 906 for (; buf < end; buf++) { 907 off = (off_t)scsi_8btou64(buf->lba) * 908 be_lun->cbe_lun.blocksize; 909 len = (off_t)scsi_4btoul(buf->length) * 910 be_lun->cbe_lun.blocksize; 911 beio->io_len += len; 912 error = vn_deallocate(be_lun->vn, &off, &len, 913 0, IO_NOMACCHECK | IO_NODELOCKED, file_data->cred, 914 NOCRED); 915 if (error != 0) 916 break; 917 } 918 } else { 919 /* WRITE_SAME */ 920 off = beio->io_offset; 921 len = beio->io_len; 922 error = vn_deallocate(be_lun->vn, &off, &len, 0, 923 IO_NOMACCHECK | IO_NODELOCKED, file_data->cred, NOCRED); 924 } 925 VOP_UNLOCK(be_lun->vn); 926 vn_finished_write(mp); 927 928 mtx_lock(&be_lun->io_lock); 929 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 930 beio->ds_tag_type, beio->ds_trans_type, 931 /*now*/ NULL, /*then*/&beio->ds_t0); 932 mtx_unlock(&be_lun->io_lock); 933 934 /* 935 * If we got an error, set the sense data to "MEDIUM ERROR" and 936 * return the I/O to the user. 937 */ 938 switch (error) { 939 case 0: 940 ctl_io_set_success(io); 941 break; 942 case ENOSPC: 943 case EDQUOT: 944 ctl_io_set_space_alloc_fail(io); 945 break; 946 case EROFS: 947 case EACCES: 948 ctl_io_set_hw_write_protected(io); 949 break; 950 default: 951 ctl_be_block_io_error(io, BIO_DELETE, 0); 952 } 953 ctl_complete_beio(beio); 954 } 955 956 static void 957 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun, 958 struct ctl_be_block_io *beio) 959 { 960 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 961 union ctl_io *io; 962 struct cdevsw *csw; 963 struct cdev *dev; 964 struct uio xuio; 965 struct iovec *xiovec; 966 int error, flags, i, ref; 967 968 DPRINTF("entered\n"); 969 970 io = beio->io; 971 flags = 0; 972 if (ARGS(io)->flags & CTL_LLF_DPO) 973 flags |= IO_DIRECT; 974 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 975 flags |= IO_SYNC; 976 977 bzero(&xuio, sizeof(xuio)); 978 if (beio->bio_cmd == BIO_READ) { 979 SDT_PROBE0(cbb, , read, file_start); 980 xuio.uio_rw = UIO_READ; 981 } else { 982 SDT_PROBE0(cbb, , write, file_start); 983 xuio.uio_rw = UIO_WRITE; 984 } 985 xuio.uio_offset = beio->io_offset; 986 xuio.uio_resid = beio->io_len; 987 xuio.uio_segflg = UIO_SYSSPACE; 988 xuio.uio_iov = beio->xiovecs; 989 xuio.uio_iovcnt = beio->num_segs; 990 xuio.uio_td = curthread; 991 992 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 993 xiovec->iov_base = beio->sg_segs[i].addr; 994 xiovec->iov_len = beio->sg_segs[i].len; 995 } 996 997 binuptime(&beio->ds_t0); 998 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 999 1000 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1001 if (csw) { 1002 if (beio->bio_cmd == BIO_READ) { 1003 if (beio->beio_cont == NULL && 1004 cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT) 1005 ctl_serseq_done(io); 1006 error = csw->d_read(dev, &xuio, flags); 1007 } else 1008 error = csw->d_write(dev, &xuio, flags); 1009 dev_relthread(dev, ref); 1010 } else 1011 error = ENXIO; 1012 1013 if (beio->bio_cmd == BIO_READ) 1014 SDT_PROBE0(cbb, , read, file_done); 1015 else 1016 SDT_PROBE0(cbb, , write, file_done); 1017 1018 mtx_lock(&be_lun->io_lock); 1019 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 1020 beio->ds_tag_type, beio->ds_trans_type, 1021 /*now*/ NULL, /*then*/&beio->ds_t0); 1022 mtx_unlock(&be_lun->io_lock); 1023 1024 /* 1025 * If we got an error, set the sense data to "MEDIUM ERROR" and 1026 * return the I/O to the user. 1027 */ 1028 if (error != 0) { 1029 if (error == ENOSPC || error == EDQUOT) { 1030 ctl_io_set_space_alloc_fail(io); 1031 } else if (error == EROFS || error == EACCES) { 1032 ctl_io_set_hw_write_protected(io); 1033 } else { 1034 ctl_be_block_io_error(io, beio->bio_cmd, 0); 1035 } 1036 ctl_complete_beio(beio); 1037 return; 1038 } 1039 1040 /* 1041 * If this is a write or a verify, we're all done. 1042 * If this is a read, we can now send the data to the user. 1043 */ 1044 if ((beio->bio_cmd == BIO_WRITE) || 1045 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 1046 ctl_io_set_success(io); 1047 ctl_complete_beio(beio); 1048 } else { 1049 if ((ARGS(io)->flags & CTL_LLF_READ) && 1050 beio->beio_cont == NULL) { 1051 ctl_io_set_success(io); 1052 if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT) 1053 ctl_serseq_done(io); 1054 } 1055 ctl_datamove(io); 1056 } 1057 } 1058 1059 static void 1060 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun, 1061 struct ctl_be_block_io *beio) 1062 { 1063 union ctl_io *io = beio->io; 1064 struct cdevsw *csw; 1065 struct cdev *dev; 1066 struct ctl_lba_len_flags *lbalen = ARGS(io); 1067 struct scsi_get_lba_status_data *data; 1068 off_t roff, off; 1069 int error, ref, status; 1070 1071 DPRINTF("entered\n"); 1072 1073 CTL_IO_ASSERT(io, SCSI); 1074 1075 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1076 if (csw == NULL) { 1077 status = 0; /* unknown up to the end */ 1078 off = be_lun->size_bytes; 1079 goto done; 1080 } 1081 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 1082 error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD, 1083 curthread); 1084 if (error == 0 && off > roff) 1085 status = 0; /* mapped up to off */ 1086 else { 1087 error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD, 1088 curthread); 1089 if (error == 0 && off > roff) 1090 status = 1; /* deallocated up to off */ 1091 else { 1092 status = 0; /* unknown up to the end */ 1093 off = be_lun->size_bytes; 1094 } 1095 } 1096 dev_relthread(dev, ref); 1097 1098 done: 1099 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 1100 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 1101 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 1102 lbalen->lba), data->descr[0].length); 1103 data->descr[0].status = status; 1104 1105 ctl_complete_beio(beio); 1106 } 1107 1108 static void 1109 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 1110 struct ctl_be_block_io *beio) 1111 { 1112 struct bio *bio; 1113 struct cdevsw *csw; 1114 struct cdev *dev; 1115 int ref; 1116 1117 DPRINTF("entered\n"); 1118 1119 /* This can't fail, it's a blocking allocation. */ 1120 bio = g_alloc_bio(); 1121 1122 bio->bio_cmd = BIO_FLUSH; 1123 bio->bio_offset = 0; 1124 bio->bio_data = 0; 1125 bio->bio_done = ctl_be_block_biodone; 1126 bio->bio_caller1 = beio; 1127 bio->bio_pblkno = 0; 1128 1129 /* 1130 * We don't need to acquire the LUN lock here, because we are only 1131 * sending one bio, and so there is no other context to synchronize 1132 * with. 1133 */ 1134 beio->num_bios_sent = 1; 1135 beio->send_complete = 1; 1136 1137 binuptime(&beio->ds_t0); 1138 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1139 1140 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1141 if (csw) { 1142 bio->bio_dev = dev; 1143 csw->d_strategy(bio); 1144 dev_relthread(dev, ref); 1145 } else { 1146 bio->bio_error = ENXIO; 1147 ctl_be_block_biodone(bio); 1148 } 1149 } 1150 1151 static void 1152 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun, 1153 struct ctl_be_block_io *beio, 1154 uint64_t off, uint64_t len, int last) 1155 { 1156 struct bio *bio; 1157 uint64_t maxlen; 1158 struct cdevsw *csw; 1159 struct cdev *dev; 1160 int ref; 1161 1162 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1163 maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize); 1164 while (len > 0) { 1165 bio = g_alloc_bio(); 1166 bio->bio_cmd = BIO_DELETE; 1167 bio->bio_dev = dev; 1168 bio->bio_offset = off; 1169 bio->bio_length = MIN(len, maxlen); 1170 bio->bio_data = 0; 1171 bio->bio_done = ctl_be_block_biodone; 1172 bio->bio_caller1 = beio; 1173 bio->bio_pblkno = off / be_lun->cbe_lun.blocksize; 1174 1175 off += bio->bio_length; 1176 len -= bio->bio_length; 1177 1178 mtx_lock(&be_lun->io_lock); 1179 beio->num_bios_sent++; 1180 if (last && len == 0) 1181 beio->send_complete = 1; 1182 mtx_unlock(&be_lun->io_lock); 1183 1184 if (csw) { 1185 csw->d_strategy(bio); 1186 } else { 1187 bio->bio_error = ENXIO; 1188 ctl_be_block_biodone(bio); 1189 } 1190 } 1191 if (csw) 1192 dev_relthread(dev, ref); 1193 } 1194 1195 static void 1196 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 1197 struct ctl_be_block_io *beio) 1198 { 1199 union ctl_io *io; 1200 struct ctl_ptr_len_flags *ptrlen; 1201 struct scsi_unmap_desc *buf, *end; 1202 uint64_t len; 1203 1204 io = beio->io; 1205 1206 DPRINTF("entered\n"); 1207 1208 binuptime(&beio->ds_t0); 1209 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1210 1211 if (beio->io_offset == -1) { 1212 beio->io_len = 0; 1213 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1214 buf = (struct scsi_unmap_desc *)ptrlen->ptr; 1215 end = buf + ptrlen->len / sizeof(*buf); 1216 for (; buf < end; buf++) { 1217 len = (uint64_t)scsi_4btoul(buf->length) * 1218 be_lun->cbe_lun.blocksize; 1219 beio->io_len += len; 1220 ctl_be_block_unmap_dev_range(be_lun, beio, 1221 scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize, 1222 len, (end - buf < 2) ? TRUE : FALSE); 1223 } 1224 } else 1225 ctl_be_block_unmap_dev_range(be_lun, beio, 1226 beio->io_offset, beio->io_len, TRUE); 1227 } 1228 1229 static void 1230 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 1231 struct ctl_be_block_io *beio) 1232 { 1233 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 1234 struct bio *bio; 1235 struct cdevsw *csw; 1236 struct cdev *dev; 1237 off_t cur_offset; 1238 int i, max_iosize, ref; 1239 1240 DPRINTF("entered\n"); 1241 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1242 1243 /* 1244 * We have to limit our I/O size to the maximum supported by the 1245 * backend device. 1246 */ 1247 if (csw) { 1248 max_iosize = dev->si_iosize_max; 1249 if (max_iosize <= 0) 1250 max_iosize = DFLTPHYS; 1251 } else 1252 max_iosize = maxphys; 1253 1254 cur_offset = beio->io_offset; 1255 for (i = 0; i < beio->num_segs; i++) { 1256 size_t cur_size; 1257 uint8_t *cur_ptr; 1258 1259 cur_size = beio->sg_segs[i].len; 1260 cur_ptr = beio->sg_segs[i].addr; 1261 1262 while (cur_size > 0) { 1263 /* This can't fail, it's a blocking allocation. */ 1264 bio = g_alloc_bio(); 1265 1266 KASSERT(bio != NULL, ("g_alloc_bio() failed!\n")); 1267 1268 bio->bio_cmd = beio->bio_cmd; 1269 bio->bio_dev = dev; 1270 bio->bio_caller1 = beio; 1271 bio->bio_length = min(cur_size, max_iosize); 1272 bio->bio_offset = cur_offset; 1273 bio->bio_data = cur_ptr; 1274 bio->bio_done = ctl_be_block_biodone; 1275 bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize; 1276 1277 cur_offset += bio->bio_length; 1278 cur_ptr += bio->bio_length; 1279 cur_size -= bio->bio_length; 1280 1281 TAILQ_INSERT_TAIL(&queue, bio, bio_queue); 1282 beio->num_bios_sent++; 1283 } 1284 } 1285 beio->send_complete = 1; 1286 binuptime(&beio->ds_t0); 1287 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1288 1289 /* 1290 * Fire off all allocated requests! 1291 */ 1292 while ((bio = TAILQ_FIRST(&queue)) != NULL) { 1293 TAILQ_REMOVE(&queue, bio, bio_queue); 1294 if (csw) 1295 csw->d_strategy(bio); 1296 else { 1297 bio->bio_error = ENXIO; 1298 ctl_be_block_biodone(bio); 1299 } 1300 } 1301 if (csw) 1302 dev_relthread(dev, ref); 1303 } 1304 1305 static uint64_t 1306 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname) 1307 { 1308 struct diocgattr_arg arg; 1309 struct cdevsw *csw; 1310 struct cdev *dev; 1311 int error, ref; 1312 1313 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1314 if (csw == NULL) 1315 return (UINT64_MAX); 1316 strlcpy(arg.name, attrname, sizeof(arg.name)); 1317 arg.len = sizeof(arg.value.off); 1318 if (csw->d_ioctl) { 1319 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 1320 curthread); 1321 } else 1322 error = ENODEV; 1323 dev_relthread(dev, ref); 1324 if (error != 0) 1325 return (UINT64_MAX); 1326 return (arg.value.off); 1327 } 1328 1329 static void 1330 ctl_be_block_namespace_data(struct ctl_be_block_lun *be_lun, 1331 union ctl_io *io) 1332 { 1333 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1334 struct nvme_namespace_data *nsdata; 1335 1336 nsdata = (struct nvme_namespace_data *)io->nvmeio.kern_data_ptr; 1337 memset(nsdata, 0, sizeof(*nsdata)); 1338 nsdata->nsze = htole64(be_lun->size_blocks); 1339 nsdata->ncap = nsdata->nsze; 1340 nsdata->nuse = nsdata->nsze; 1341 nsdata->nlbaf = 1 - 1; 1342 nsdata->dlfeat = NVMEM(NVME_NS_DATA_DLFEAT_DWZ) | 1343 NVMEF(NVME_NS_DATA_DLFEAT_READ, NVME_NS_DATA_DLFEAT_READ_00); 1344 nsdata->flbas = NVMEF(NVME_NS_DATA_FLBAS_FORMAT, 0); 1345 nsdata->lbaf[0] = NVMEF(NVME_NS_DATA_LBAF_LBADS, 1346 ffs(cbe_lun->blocksize) - 1); 1347 1348 ctl_lun_nsdata_ids(cbe_lun, nsdata); 1349 ctl_config_read_done(io); 1350 } 1351 1352 static void 1353 ctl_be_block_nvme_ids(struct ctl_be_block_lun *be_lun, 1354 union ctl_io *io) 1355 { 1356 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1357 1358 ctl_lun_nvme_ids(cbe_lun, io->nvmeio.kern_data_ptr); 1359 ctl_config_read_done(io); 1360 } 1361 1362 static void 1363 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun, 1364 union ctl_io *io) 1365 { 1366 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1367 struct ctl_be_block_io *beio; 1368 struct ctl_lba_len_flags *lbalen; 1369 1370 DPRINTF("entered\n"); 1371 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1372 lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1373 1374 beio->io_len = lbalen->len * cbe_lun->blocksize; 1375 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1376 beio->io_arg = (lbalen->flags & SSC_IMMED) != 0; 1377 beio->bio_cmd = BIO_FLUSH; 1378 beio->ds_trans_type = DEVSTAT_NO_DATA; 1379 DPRINTF("SYNC\n"); 1380 be_lun->lun_flush(be_lun, beio); 1381 } 1382 1383 static void 1384 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio) 1385 { 1386 union ctl_io *io; 1387 1388 io = beio->io; 1389 ctl_free_beio(beio); 1390 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1391 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1392 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1393 ctl_config_write_done(io); 1394 return; 1395 } 1396 1397 ctl_be_block_config_write(io); 1398 } 1399 1400 static void 1401 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun, 1402 union ctl_io *io) 1403 { 1404 struct ctl_be_block_softc *softc = be_lun->softc; 1405 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1406 struct ctl_be_block_io *beio; 1407 struct ctl_lba_len_flags *lbalen; 1408 uint64_t len_left, lba; 1409 uint32_t pb, pbo, adj; 1410 int i, seglen; 1411 uint8_t *buf, *end; 1412 1413 DPRINTF("entered\n"); 1414 1415 CTL_IO_ASSERT(io, SCSI); 1416 1417 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1418 lbalen = ARGS(io); 1419 1420 if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) || 1421 (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) { 1422 ctl_free_beio(beio); 1423 ctl_set_invalid_field(&io->scsiio, 1424 /*sks_valid*/ 1, 1425 /*command*/ 1, 1426 /*field*/ 1, 1427 /*bit_valid*/ 0, 1428 /*bit*/ 0); 1429 ctl_config_write_done(io); 1430 return; 1431 } 1432 1433 if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) { 1434 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1435 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize; 1436 beio->bio_cmd = BIO_DELETE; 1437 beio->ds_trans_type = DEVSTAT_FREE; 1438 1439 be_lun->unmap(be_lun, beio); 1440 return; 1441 } 1442 1443 beio->bio_cmd = BIO_WRITE; 1444 beio->ds_trans_type = DEVSTAT_WRITE; 1445 1446 DPRINTF("WRITE SAME at LBA %jx len %u\n", 1447 (uintmax_t)lbalen->lba, lbalen->len); 1448 1449 pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp; 1450 if (be_lun->cbe_lun.pblockoff > 0) 1451 pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff; 1452 else 1453 pbo = 0; 1454 len_left = (uint64_t)lbalen->len * cbe_lun->blocksize; 1455 for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { 1456 /* 1457 * Setup the S/G entry for this chunk. 1458 */ 1459 seglen = MIN(CTLBLK_MAX_SEG, len_left); 1460 if (pb > cbe_lun->blocksize) { 1461 adj = ((lbalen->lba + lba) * cbe_lun->blocksize + 1462 seglen - pbo) % pb; 1463 if (seglen > adj) 1464 seglen -= adj; 1465 else 1466 seglen -= seglen % cbe_lun->blocksize; 1467 } else 1468 seglen -= seglen % cbe_lun->blocksize; 1469 ctl_alloc_seg(softc, &beio->sg_segs[i], seglen); 1470 1471 DPRINTF("segment %d addr %p len %zd\n", i, 1472 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1473 1474 beio->num_segs++; 1475 len_left -= seglen; 1476 1477 buf = beio->sg_segs[i].addr; 1478 end = buf + seglen; 1479 for (; buf < end; buf += cbe_lun->blocksize) { 1480 if (lbalen->flags & SWS_NDOB) { 1481 memset(buf, 0, cbe_lun->blocksize); 1482 } else { 1483 memcpy(buf, io->scsiio.kern_data_ptr, 1484 cbe_lun->blocksize); 1485 } 1486 if (lbalen->flags & SWS_LBDATA) 1487 scsi_ulto4b(lbalen->lba + lba, buf); 1488 lba++; 1489 } 1490 } 1491 1492 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1493 beio->io_len = lba * cbe_lun->blocksize; 1494 1495 /* We can not do all in one run. Correct and schedule rerun. */ 1496 if (len_left > 0) { 1497 lbalen->lba += lba; 1498 lbalen->len -= lba; 1499 beio->beio_cont = ctl_be_block_cw_done_ws; 1500 } 1501 1502 be_lun->dispatch(be_lun, beio); 1503 } 1504 1505 static void 1506 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun, 1507 union ctl_io *io) 1508 { 1509 struct ctl_be_block_io *beio; 1510 struct ctl_ptr_len_flags *ptrlen; 1511 1512 DPRINTF("entered\n"); 1513 1514 CTL_IO_ASSERT(io, SCSI); 1515 1516 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1517 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1518 1519 if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) { 1520 ctl_free_beio(beio); 1521 ctl_set_invalid_field(&io->scsiio, 1522 /*sks_valid*/ 0, 1523 /*command*/ 1, 1524 /*field*/ 0, 1525 /*bit_valid*/ 0, 1526 /*bit*/ 0); 1527 ctl_config_write_done(io); 1528 return; 1529 } 1530 1531 beio->io_len = 0; 1532 beio->io_offset = -1; 1533 beio->bio_cmd = BIO_DELETE; 1534 beio->ds_trans_type = DEVSTAT_FREE; 1535 DPRINTF("UNMAP\n"); 1536 be_lun->unmap(be_lun, beio); 1537 } 1538 1539 static void 1540 ctl_be_block_cw_dispatch_flush(struct ctl_be_block_lun *be_lun, 1541 union ctl_io *io) 1542 { 1543 struct ctl_be_block_io *beio; 1544 1545 DPRINTF("entered\n"); 1546 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1547 1548 beio->io_len = be_lun->size_bytes; 1549 beio->io_offset = 0; 1550 beio->io_arg = 1; 1551 beio->bio_cmd = BIO_FLUSH; 1552 beio->ds_trans_type = DEVSTAT_NO_DATA; 1553 DPRINTF("FLUSH\n"); 1554 be_lun->lun_flush(be_lun, beio); 1555 } 1556 1557 static void 1558 ctl_be_block_cw_dispatch_wu(struct ctl_be_block_lun *be_lun, 1559 union ctl_io *io) 1560 { 1561 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1562 struct ctl_be_block_io *beio; 1563 struct ctl_lba_len_flags *lbalen; 1564 1565 CTL_IO_ASSERT(io, NVME); 1566 1567 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1568 lbalen = ARGS(io); 1569 1570 /* 1571 * XXX: Not quite right as reads will return zeroes rather 1572 * than failing. 1573 */ 1574 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1575 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize; 1576 beio->bio_cmd = BIO_DELETE; 1577 beio->ds_trans_type = DEVSTAT_FREE; 1578 1579 be_lun->unmap(be_lun, beio); 1580 } 1581 1582 static void 1583 ctl_be_block_cw_dispatch_wz(struct ctl_be_block_lun *be_lun, 1584 union ctl_io *io) 1585 { 1586 struct ctl_be_block_softc *softc = be_lun->softc; 1587 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1588 struct ctl_be_block_io *beio; 1589 struct ctl_lba_len_flags *lbalen; 1590 uint64_t len_left, lba; 1591 uint32_t pb, pbo, adj; 1592 int i, seglen; 1593 1594 DPRINTF("entered\n"); 1595 1596 CTL_IO_ASSERT(io, NVME); 1597 1598 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1599 lbalen = ARGS(io); 1600 1601 if ((le32toh(io->nvmeio.cmd.cdw12) & (1U << 25)) != 0 && 1602 be_lun->unmap != NULL) { 1603 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1604 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize; 1605 beio->bio_cmd = BIO_DELETE; 1606 beio->ds_trans_type = DEVSTAT_FREE; 1607 1608 be_lun->unmap(be_lun, beio); 1609 return; 1610 } 1611 1612 beio->bio_cmd = BIO_WRITE; 1613 beio->ds_trans_type = DEVSTAT_WRITE; 1614 1615 DPRINTF("WRITE ZEROES at LBA %jx len %u\n", 1616 (uintmax_t)lbalen->lba, lbalen->len); 1617 1618 pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp; 1619 if (be_lun->cbe_lun.pblockoff > 0) 1620 pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff; 1621 else 1622 pbo = 0; 1623 len_left = (uint64_t)lbalen->len * cbe_lun->blocksize; 1624 for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { 1625 /* 1626 * Setup the S/G entry for this chunk. 1627 */ 1628 seglen = MIN(CTLBLK_MAX_SEG, len_left); 1629 if (pb > cbe_lun->blocksize) { 1630 adj = ((lbalen->lba + lba) * cbe_lun->blocksize + 1631 seglen - pbo) % pb; 1632 if (seglen > adj) 1633 seglen -= adj; 1634 else 1635 seglen -= seglen % cbe_lun->blocksize; 1636 } else 1637 seglen -= seglen % cbe_lun->blocksize; 1638 ctl_alloc_seg(softc, &beio->sg_segs[i], seglen); 1639 1640 DPRINTF("segment %d addr %p len %zd\n", i, 1641 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1642 1643 beio->num_segs++; 1644 len_left -= seglen; 1645 1646 memset(beio->sg_segs[i].addr, 0, seglen); 1647 lba += seglen / cbe_lun->blocksize; 1648 } 1649 1650 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1651 beio->io_len = lba * cbe_lun->blocksize; 1652 1653 /* We can not do all in one run. Correct and schedule rerun. */ 1654 if (len_left > 0) { 1655 lbalen->lba += lba; 1656 lbalen->len -= lba; 1657 beio->beio_cont = ctl_be_block_cw_done_ws; 1658 } 1659 1660 be_lun->dispatch(be_lun, beio); 1661 } 1662 1663 static void 1664 ctl_be_block_cw_dispatch_dsm(struct ctl_be_block_lun *be_lun, 1665 union ctl_io *io) 1666 { 1667 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1668 struct ctl_be_block_io *beio; 1669 struct nvme_dsm_range *r; 1670 uint64_t lba; 1671 uint32_t num_blocks; 1672 u_int i, ranges; 1673 1674 CTL_IO_ASSERT(io, NVME); 1675 1676 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1677 1678 if (be_lun->unmap == NULL) { 1679 ctl_free_beio(beio); 1680 ctl_nvme_set_success(&io->nvmeio); 1681 ctl_config_write_done(io); 1682 return; 1683 } 1684 1685 ranges = le32toh(io->nvmeio.cmd.cdw10) & 0xff; 1686 r = (struct nvme_dsm_range *)io->nvmeio.kern_data_ptr; 1687 1688 /* Find the next range to delete. */ 1689 for (i = DSM_RANGE(io); i < ranges; i++) { 1690 if ((le32toh(r[i].attributes) & (1U << 2)) != 0) 1691 break; 1692 } 1693 1694 /* If no range to delete, complete the operation. */ 1695 if (i == ranges) { 1696 ctl_free_beio(beio); 1697 ctl_nvme_set_success(&io->nvmeio); 1698 ctl_config_write_done(io); 1699 return; 1700 } 1701 1702 /* If this is not the last range, request a rerun after this range. */ 1703 if (i + 1 < ranges) { 1704 DSM_RANGE(io) = i + 1; 1705 beio->beio_cont = ctl_be_block_cw_done_ws; 1706 } 1707 1708 lba = le64toh(r[i].starting_lba); 1709 num_blocks = le32toh(r[i].length); 1710 1711 beio->io_offset = lba * cbe_lun->blocksize; 1712 beio->io_len = (uint64_t)num_blocks * cbe_lun->blocksize; 1713 beio->bio_cmd = BIO_DELETE; 1714 beio->ds_trans_type = DEVSTAT_FREE; 1715 1716 be_lun->unmap(be_lun, beio); 1717 } 1718 1719 static void 1720 ctl_be_block_scsi_cr_done(struct ctl_be_block_io *beio) 1721 { 1722 union ctl_io *io; 1723 1724 io = beio->io; 1725 ctl_free_beio(beio); 1726 ctl_config_read_done(io); 1727 } 1728 1729 static void 1730 ctl_be_block_scsi_cr_dispatch(struct ctl_be_block_lun *be_lun, 1731 union ctl_io *io) 1732 { 1733 struct ctl_be_block_io *beio; 1734 struct ctl_be_block_softc *softc; 1735 1736 DPRINTF("entered\n"); 1737 1738 softc = be_lun->softc; 1739 beio = ctl_alloc_beio(softc); 1740 beio->io = io; 1741 beio->lun = be_lun; 1742 beio->beio_cont = ctl_be_block_scsi_cr_done; 1743 PRIV(io)->ptr = (void *)beio; 1744 1745 switch (io->scsiio.cdb[0]) { 1746 case SERVICE_ACTION_IN: /* GET LBA STATUS */ 1747 beio->bio_cmd = -1; 1748 beio->ds_trans_type = DEVSTAT_NO_DATA; 1749 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1750 beio->io_len = 0; 1751 if (be_lun->get_lba_status) 1752 be_lun->get_lba_status(be_lun, beio); 1753 else 1754 ctl_be_block_scsi_cr_done(beio); 1755 break; 1756 default: 1757 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1758 break; 1759 } 1760 } 1761 1762 static void 1763 ctl_be_block_nvme_cr_dispatch(struct ctl_be_block_lun *be_lun, 1764 union ctl_io *io) 1765 { 1766 uint8_t cns; 1767 1768 DPRINTF("entered\n"); 1769 1770 MPASS(io->nvmeio.cmd.opc == NVME_OPC_IDENTIFY); 1771 1772 cns = le32toh(io->nvmeio.cmd.cdw10) & 0xff; 1773 switch (cns) { 1774 case 0: 1775 ctl_be_block_namespace_data(be_lun, io); 1776 break; 1777 case 3: 1778 ctl_be_block_nvme_ids(be_lun, io); 1779 break; 1780 default: 1781 __assert_unreachable(); 1782 } 1783 } 1784 1785 static void 1786 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 1787 union ctl_io *io) 1788 { 1789 switch (io->io_hdr.io_type) { 1790 case CTL_IO_SCSI: 1791 ctl_be_block_scsi_cr_dispatch(be_lun, io); 1792 break; 1793 case CTL_IO_NVME_ADMIN: 1794 ctl_be_block_nvme_cr_dispatch(be_lun, io); 1795 break; 1796 default: 1797 __assert_unreachable(); 1798 } 1799 } 1800 1801 static void 1802 ctl_be_block_cw_done(struct ctl_be_block_io *beio) 1803 { 1804 union ctl_io *io; 1805 1806 io = beio->io; 1807 ctl_free_beio(beio); 1808 ctl_config_write_done(io); 1809 } 1810 1811 static void 1812 ctl_be_block_scsi_cw_dispatch(struct ctl_be_block_lun *be_lun, 1813 union ctl_io *io) 1814 { 1815 struct ctl_be_block_io *beio; 1816 1817 DPRINTF("entered\n"); 1818 1819 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1820 1821 switch (io->scsiio.tag_type) { 1822 case CTL_TAG_ORDERED: 1823 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1824 break; 1825 case CTL_TAG_HEAD_OF_QUEUE: 1826 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1827 break; 1828 case CTL_TAG_UNTAGGED: 1829 case CTL_TAG_SIMPLE: 1830 case CTL_TAG_ACA: 1831 default: 1832 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1833 break; 1834 } 1835 1836 switch (io->scsiio.cdb[0]) { 1837 case SYNCHRONIZE_CACHE: 1838 case SYNCHRONIZE_CACHE_16: 1839 ctl_be_block_cw_dispatch_sync(be_lun, io); 1840 break; 1841 case WRITE_SAME_10: 1842 case WRITE_SAME_16: 1843 ctl_be_block_cw_dispatch_ws(be_lun, io); 1844 break; 1845 case UNMAP: 1846 ctl_be_block_cw_dispatch_unmap(be_lun, io); 1847 break; 1848 default: 1849 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1850 break; 1851 } 1852 } 1853 1854 static void 1855 ctl_be_block_nvme_cw_dispatch(struct ctl_be_block_lun *be_lun, 1856 union ctl_io *io) 1857 { 1858 struct ctl_be_block_io *beio; 1859 1860 DPRINTF("entered\n"); 1861 1862 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1863 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1864 1865 switch (io->nvmeio.cmd.opc) { 1866 case NVME_OPC_FLUSH: 1867 ctl_be_block_cw_dispatch_flush(be_lun, io); 1868 break; 1869 case NVME_OPC_WRITE_UNCORRECTABLE: 1870 ctl_be_block_cw_dispatch_wu(be_lun, io); 1871 break; 1872 case NVME_OPC_WRITE_ZEROES: 1873 ctl_be_block_cw_dispatch_wz(be_lun, io); 1874 break; 1875 case NVME_OPC_DATASET_MANAGEMENT: 1876 ctl_be_block_cw_dispatch_dsm(be_lun, io); 1877 break; 1878 default: 1879 __assert_unreachable(); 1880 } 1881 } 1882 1883 static void 1884 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 1885 union ctl_io *io) 1886 { 1887 struct ctl_be_block_io *beio; 1888 struct ctl_be_block_softc *softc; 1889 1890 softc = be_lun->softc; 1891 beio = ctl_alloc_beio(softc); 1892 beio->io = io; 1893 beio->lun = be_lun; 1894 beio->beio_cont = ctl_be_block_cw_done; 1895 PRIV(io)->ptr = (void *)beio; 1896 1897 switch (io->io_hdr.io_type) { 1898 case CTL_IO_SCSI: 1899 ctl_be_block_scsi_cw_dispatch(be_lun, io); 1900 break; 1901 case CTL_IO_NVME: 1902 ctl_be_block_nvme_cw_dispatch(be_lun, io); 1903 break; 1904 default: 1905 __assert_unreachable(); 1906 } 1907 } 1908 1909 SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t"); 1910 SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t"); 1911 SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t"); 1912 SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t"); 1913 1914 static void 1915 ctl_be_block_next(struct ctl_be_block_io *beio) 1916 { 1917 struct ctl_be_block_lun *be_lun; 1918 union ctl_io *io; 1919 1920 io = beio->io; 1921 be_lun = beio->lun; 1922 ctl_free_beio(beio); 1923 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1924 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1925 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1926 ctl_data_submit_done(io); 1927 return; 1928 } 1929 1930 io->io_hdr.status &= ~CTL_STATUS_MASK; 1931 io->io_hdr.status |= CTL_STATUS_NONE; 1932 1933 mtx_lock(&be_lun->queue_lock); 1934 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1935 mtx_unlock(&be_lun->queue_lock); 1936 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1937 } 1938 1939 static void 1940 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 1941 union ctl_io *io) 1942 { 1943 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1944 struct ctl_be_block_io *beio; 1945 struct ctl_be_block_softc *softc; 1946 struct ctl_lba_len_flags *lbalen; 1947 struct ctl_ptr_len_flags *bptrlen; 1948 uint64_t len_left, lbas; 1949 int i; 1950 1951 softc = be_lun->softc; 1952 1953 DPRINTF("entered\n"); 1954 1955 lbalen = ARGS(io); 1956 if (lbalen->flags & CTL_LLF_WRITE) { 1957 SDT_PROBE0(cbb, , write, start); 1958 } else { 1959 SDT_PROBE0(cbb, , read, start); 1960 } 1961 1962 beio = ctl_alloc_beio(softc); 1963 beio->io = io; 1964 beio->lun = be_lun; 1965 bptrlen = PRIV(io); 1966 bptrlen->ptr = (void *)beio; 1967 1968 switch (io->io_hdr.io_type) { 1969 case CTL_IO_SCSI: 1970 switch (io->scsiio.tag_type) { 1971 case CTL_TAG_ORDERED: 1972 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1973 break; 1974 case CTL_TAG_HEAD_OF_QUEUE: 1975 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1976 break; 1977 case CTL_TAG_UNTAGGED: 1978 case CTL_TAG_SIMPLE: 1979 case CTL_TAG_ACA: 1980 default: 1981 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1982 break; 1983 } 1984 break; 1985 case CTL_IO_NVME: 1986 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1987 break; 1988 default: 1989 __assert_unreachable(); 1990 } 1991 1992 if (lbalen->flags & CTL_LLF_WRITE) { 1993 beio->bio_cmd = BIO_WRITE; 1994 beio->ds_trans_type = DEVSTAT_WRITE; 1995 } else { 1996 beio->bio_cmd = BIO_READ; 1997 beio->ds_trans_type = DEVSTAT_READ; 1998 } 1999 2000 DPRINTF("%s at LBA %jx len %u @%ju\n", 2001 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", 2002 (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len); 2003 lbas = CTLBLK_MAX_IO_SIZE; 2004 if (lbalen->flags & CTL_LLF_COMPARE) { 2005 beio->two_sglists = 1; 2006 lbas /= 2; 2007 } 2008 lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize); 2009 beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize; 2010 beio->io_len = lbas * cbe_lun->blocksize; 2011 bptrlen->len += lbas; 2012 2013 for (i = 0, len_left = beio->io_len; len_left > 0; i++) { 2014 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)", 2015 i, CTLBLK_MAX_SEGS)); 2016 2017 /* 2018 * Setup the S/G entry for this chunk. 2019 */ 2020 ctl_alloc_seg(softc, &beio->sg_segs[i], 2021 MIN(CTLBLK_MAX_SEG, len_left)); 2022 2023 DPRINTF("segment %d addr %p len %zd\n", i, 2024 beio->sg_segs[i].addr, beio->sg_segs[i].len); 2025 2026 /* Set up second segment for compare operation. */ 2027 if (beio->two_sglists) { 2028 ctl_alloc_seg(softc, 2029 &beio->sg_segs[i + CTLBLK_HALF_SEGS], 2030 beio->sg_segs[i].len); 2031 } 2032 2033 beio->num_segs++; 2034 len_left -= beio->sg_segs[i].len; 2035 } 2036 if (bptrlen->len < lbalen->len) 2037 beio->beio_cont = ctl_be_block_next; 2038 ctl_set_be_move_done(io, ctl_be_block_move_done); 2039 /* For compare we have separate S/G lists for read and datamove. */ 2040 if (beio->two_sglists) 2041 ctl_set_kern_data_ptr(io, &beio->sg_segs[CTLBLK_HALF_SEGS]); 2042 else 2043 ctl_set_kern_data_ptr(io, beio->sg_segs); 2044 ctl_set_kern_data_len(io, beio->io_len); 2045 ctl_set_kern_sg_entries(io, beio->num_segs); 2046 ctl_set_kern_data_ref(io, ctl_refcnt_beio); 2047 ctl_set_kern_data_arg(io, beio); 2048 io->io_hdr.flags |= CTL_FLAG_ALLOCATED; 2049 2050 /* 2051 * For the read case, we need to read the data into our buffers and 2052 * then we can send it back to the user. For the write case, we 2053 * need to get the data from the user first. 2054 */ 2055 if (beio->bio_cmd == BIO_READ) { 2056 SDT_PROBE0(cbb, , read, alloc_done); 2057 be_lun->dispatch(be_lun, beio); 2058 } else { 2059 SDT_PROBE0(cbb, , write, alloc_done); 2060 ctl_datamove(io); 2061 } 2062 } 2063 2064 static void 2065 ctl_be_block_worker(void *context, int pending) 2066 { 2067 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context; 2068 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2069 union ctl_io *io; 2070 struct ctl_be_block_io *beio; 2071 2072 DPRINTF("entered\n"); 2073 /* 2074 * Fetch and process I/Os from all queues. If we detect LUN 2075 * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race, 2076 * so make response maximally opaque to not confuse initiator. 2077 */ 2078 for (;;) { 2079 mtx_lock(&be_lun->queue_lock); 2080 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue); 2081 if (io != NULL) { 2082 DPRINTF("datamove queue\n"); 2083 STAILQ_REMOVE_HEAD(&be_lun->datamove_queue, links); 2084 mtx_unlock(&be_lun->queue_lock); 2085 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 2086 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 2087 ctl_io_set_busy(io); 2088 ctl_complete_beio(beio); 2089 continue; 2090 } 2091 be_lun->dispatch(be_lun, beio); 2092 continue; 2093 } 2094 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue); 2095 if (io != NULL) { 2096 DPRINTF("config write queue\n"); 2097 STAILQ_REMOVE_HEAD(&be_lun->config_write_queue, links); 2098 mtx_unlock(&be_lun->queue_lock); 2099 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 2100 ctl_io_set_busy(io); 2101 ctl_config_write_done(io); 2102 continue; 2103 } 2104 ctl_be_block_cw_dispatch(be_lun, io); 2105 continue; 2106 } 2107 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue); 2108 if (io != NULL) { 2109 DPRINTF("config read queue\n"); 2110 STAILQ_REMOVE_HEAD(&be_lun->config_read_queue, links); 2111 mtx_unlock(&be_lun->queue_lock); 2112 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 2113 ctl_io_set_busy(io); 2114 ctl_config_read_done(io); 2115 continue; 2116 } 2117 ctl_be_block_cr_dispatch(be_lun, io); 2118 continue; 2119 } 2120 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue); 2121 if (io != NULL) { 2122 DPRINTF("input queue\n"); 2123 STAILQ_REMOVE_HEAD(&be_lun->input_queue, links); 2124 mtx_unlock(&be_lun->queue_lock); 2125 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 2126 ctl_io_set_busy(io); 2127 ctl_data_submit_done(io); 2128 continue; 2129 } 2130 ctl_be_block_dispatch(be_lun, io); 2131 continue; 2132 } 2133 2134 /* 2135 * If we get here, there is no work left in the queues, so 2136 * just break out and let the task queue go to sleep. 2137 */ 2138 mtx_unlock(&be_lun->queue_lock); 2139 break; 2140 } 2141 } 2142 2143 /* 2144 * Entry point from CTL to the backend for I/O. We queue everything to a 2145 * work thread, so this just puts the I/O on a queue and wakes up the 2146 * thread. 2147 */ 2148 static int 2149 ctl_be_block_submit(union ctl_io *io) 2150 { 2151 struct ctl_be_block_lun *be_lun; 2152 2153 DPRINTF("entered\n"); 2154 2155 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 2156 2157 CTL_IO_ASSERT(io, SCSI, NVME); 2158 2159 PRIV(io)->len = 0; 2160 2161 mtx_lock(&be_lun->queue_lock); 2162 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 2163 mtx_unlock(&be_lun->queue_lock); 2164 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 2165 2166 return (CTL_RETVAL_COMPLETE); 2167 } 2168 2169 static int 2170 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 2171 int flag, struct thread *td) 2172 { 2173 struct ctl_be_block_softc *softc = &backend_block_softc; 2174 int error; 2175 2176 error = 0; 2177 switch (cmd) { 2178 case CTL_LUN_REQ: { 2179 struct ctl_lun_req *lun_req; 2180 2181 lun_req = (struct ctl_lun_req *)addr; 2182 2183 switch (lun_req->reqtype) { 2184 case CTL_LUNREQ_CREATE: 2185 error = ctl_be_block_create(softc, lun_req); 2186 break; 2187 case CTL_LUNREQ_RM: 2188 error = ctl_be_block_rm(softc, lun_req); 2189 break; 2190 case CTL_LUNREQ_MODIFY: 2191 error = ctl_be_block_modify(softc, lun_req); 2192 break; 2193 default: 2194 lun_req->status = CTL_LUN_ERROR; 2195 snprintf(lun_req->error_str, sizeof(lun_req->error_str), 2196 "invalid LUN request type %d", 2197 lun_req->reqtype); 2198 break; 2199 } 2200 break; 2201 } 2202 default: 2203 error = ENOTTY; 2204 break; 2205 } 2206 2207 return (error); 2208 } 2209 2210 static int 2211 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 2212 { 2213 struct ctl_be_lun *cbe_lun; 2214 struct ctl_be_block_filedata *file_data; 2215 struct ctl_lun_create_params *params; 2216 const char *value; 2217 struct vattr vattr; 2218 off_t ps, pss, po, pos, us, uss, uo, uos; 2219 int error; 2220 long pconf; 2221 2222 cbe_lun = &be_lun->cbe_lun; 2223 file_data = &be_lun->backend.file; 2224 params = &be_lun->params; 2225 2226 be_lun->dev_type = CTL_BE_BLOCK_FILE; 2227 be_lun->dispatch = ctl_be_block_dispatch_file; 2228 be_lun->lun_flush = ctl_be_block_flush_file; 2229 be_lun->get_lba_status = ctl_be_block_gls_file; 2230 be_lun->getattr = ctl_be_block_getattr_file; 2231 be_lun->unmap = ctl_be_block_unmap_file; 2232 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 2233 2234 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 2235 if (error != 0) { 2236 snprintf(req->error_str, sizeof(req->error_str), 2237 "error calling VOP_GETATTR() for file %s", 2238 be_lun->dev_path); 2239 return (error); 2240 } 2241 2242 error = VOP_PATHCONF(be_lun->vn, _PC_DEALLOC_PRESENT, &pconf); 2243 if (error != 0) { 2244 snprintf(req->error_str, sizeof(req->error_str), 2245 "error calling VOP_PATHCONF() for file %s", 2246 be_lun->dev_path); 2247 return (error); 2248 } 2249 if (pconf == 1) 2250 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; 2251 2252 file_data->cred = crhold(curthread->td_ucred); 2253 if (params->lun_size_bytes != 0) 2254 be_lun->size_bytes = params->lun_size_bytes; 2255 else 2256 be_lun->size_bytes = vattr.va_size; 2257 2258 /* 2259 * For files we can use any logical block size. Prefer 512 bytes 2260 * for compatibility reasons. If file's vattr.va_blocksize 2261 * (preferred I/O block size) is bigger and multiple to chosen 2262 * logical block size -- report it as physical block size. 2263 */ 2264 if (params->blocksize_bytes != 0) 2265 cbe_lun->blocksize = params->blocksize_bytes; 2266 else if (cbe_lun->lun_type == T_CDROM) 2267 cbe_lun->blocksize = 2048; 2268 else 2269 cbe_lun->blocksize = 512; 2270 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2271 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2272 0 : (be_lun->size_blocks - 1); 2273 2274 us = ps = vattr.va_blocksize; 2275 uo = po = 0; 2276 2277 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL); 2278 if (value != NULL) 2279 ctl_expand_number(value, &ps); 2280 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL); 2281 if (value != NULL) 2282 ctl_expand_number(value, &po); 2283 pss = ps / cbe_lun->blocksize; 2284 pos = po / cbe_lun->blocksize; 2285 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 2286 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 2287 cbe_lun->pblockexp = fls(pss) - 1; 2288 cbe_lun->pblockoff = (pss - pos) % pss; 2289 } 2290 2291 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL); 2292 if (value != NULL) 2293 ctl_expand_number(value, &us); 2294 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL); 2295 if (value != NULL) 2296 ctl_expand_number(value, &uo); 2297 uss = us / cbe_lun->blocksize; 2298 uos = uo / cbe_lun->blocksize; 2299 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 2300 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 2301 cbe_lun->ublockexp = fls(uss) - 1; 2302 cbe_lun->ublockoff = (uss - uos) % uss; 2303 } 2304 2305 /* 2306 * Sanity check. The media size has to be at least one 2307 * sector long. 2308 */ 2309 if (be_lun->size_bytes < cbe_lun->blocksize) { 2310 error = EINVAL; 2311 snprintf(req->error_str, sizeof(req->error_str), 2312 "file %s size %ju < block size %u", be_lun->dev_path, 2313 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize); 2314 } 2315 2316 cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize; 2317 return (error); 2318 } 2319 2320 static int 2321 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 2322 { 2323 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2324 struct ctl_lun_create_params *params; 2325 struct cdevsw *csw; 2326 struct cdev *dev; 2327 const char *value; 2328 int error, atomic, maxio, ref, unmap, tmp; 2329 off_t ps, pss, po, pos, us, uss, uo, uos, otmp; 2330 2331 params = &be_lun->params; 2332 2333 be_lun->dev_type = CTL_BE_BLOCK_DEV; 2334 csw = devvn_refthread(be_lun->vn, &dev, &ref); 2335 if (csw == NULL) 2336 return (ENXIO); 2337 if (strcmp(csw->d_name, "zvol") == 0) { 2338 be_lun->dispatch = ctl_be_block_dispatch_zvol; 2339 be_lun->get_lba_status = ctl_be_block_gls_zvol; 2340 atomic = maxio = CTLBLK_MAX_IO_SIZE; 2341 } else { 2342 be_lun->dispatch = ctl_be_block_dispatch_dev; 2343 be_lun->get_lba_status = NULL; 2344 atomic = 0; 2345 maxio = dev->si_iosize_max; 2346 if (maxio <= 0) 2347 maxio = DFLTPHYS; 2348 if (maxio > CTLBLK_MAX_SEG) 2349 maxio = CTLBLK_MAX_SEG; 2350 } 2351 be_lun->lun_flush = ctl_be_block_flush_dev; 2352 be_lun->getattr = ctl_be_block_getattr_dev; 2353 be_lun->unmap = ctl_be_block_unmap_dev; 2354 2355 if (!csw->d_ioctl) { 2356 dev_relthread(dev, ref); 2357 snprintf(req->error_str, sizeof(req->error_str), 2358 "no d_ioctl for device %s!", be_lun->dev_path); 2359 return (ENODEV); 2360 } 2361 2362 error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD, 2363 curthread); 2364 if (error) { 2365 dev_relthread(dev, ref); 2366 snprintf(req->error_str, sizeof(req->error_str), 2367 "error %d returned for DIOCGSECTORSIZE ioctl " 2368 "on %s!", error, be_lun->dev_path); 2369 return (error); 2370 } 2371 2372 /* 2373 * If the user has asked for a blocksize that is greater than the 2374 * backing device's blocksize, we can do it only if the blocksize 2375 * the user is asking for is an even multiple of the underlying 2376 * device's blocksize. 2377 */ 2378 if ((params->blocksize_bytes != 0) && 2379 (params->blocksize_bytes >= tmp)) { 2380 if (params->blocksize_bytes % tmp == 0) { 2381 cbe_lun->blocksize = params->blocksize_bytes; 2382 } else { 2383 dev_relthread(dev, ref); 2384 snprintf(req->error_str, sizeof(req->error_str), 2385 "requested blocksize %u is not an even " 2386 "multiple of backing device blocksize %u", 2387 params->blocksize_bytes, tmp); 2388 return (EINVAL); 2389 } 2390 } else if (params->blocksize_bytes != 0) { 2391 dev_relthread(dev, ref); 2392 snprintf(req->error_str, sizeof(req->error_str), 2393 "requested blocksize %u < backing device " 2394 "blocksize %u", params->blocksize_bytes, tmp); 2395 return (EINVAL); 2396 } else if (cbe_lun->lun_type == T_CDROM) 2397 cbe_lun->blocksize = MAX(tmp, 2048); 2398 else 2399 cbe_lun->blocksize = tmp; 2400 2401 error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD, 2402 curthread); 2403 if (error) { 2404 dev_relthread(dev, ref); 2405 snprintf(req->error_str, sizeof(req->error_str), 2406 "error %d returned for DIOCGMEDIASIZE " 2407 " ioctl on %s!", error, 2408 be_lun->dev_path); 2409 return (error); 2410 } 2411 2412 if (params->lun_size_bytes != 0) { 2413 if (params->lun_size_bytes > otmp) { 2414 dev_relthread(dev, ref); 2415 snprintf(req->error_str, sizeof(req->error_str), 2416 "requested LUN size %ju > backing device " 2417 "size %ju", 2418 (uintmax_t)params->lun_size_bytes, 2419 (uintmax_t)otmp); 2420 return (EINVAL); 2421 } 2422 2423 be_lun->size_bytes = params->lun_size_bytes; 2424 } else 2425 be_lun->size_bytes = otmp; 2426 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2427 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2428 0 : (be_lun->size_blocks - 1); 2429 2430 error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD, 2431 curthread); 2432 if (error) 2433 ps = po = 0; 2434 else { 2435 error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po, 2436 FREAD, curthread); 2437 if (error) 2438 po = 0; 2439 } 2440 us = ps; 2441 uo = po; 2442 2443 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL); 2444 if (value != NULL) 2445 ctl_expand_number(value, &ps); 2446 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL); 2447 if (value != NULL) 2448 ctl_expand_number(value, &po); 2449 pss = ps / cbe_lun->blocksize; 2450 pos = po / cbe_lun->blocksize; 2451 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 2452 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 2453 cbe_lun->pblockexp = fls(pss) - 1; 2454 cbe_lun->pblockoff = (pss - pos) % pss; 2455 } 2456 2457 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL); 2458 if (value != NULL) 2459 ctl_expand_number(value, &us); 2460 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL); 2461 if (value != NULL) 2462 ctl_expand_number(value, &uo); 2463 uss = us / cbe_lun->blocksize; 2464 uos = uo / cbe_lun->blocksize; 2465 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 2466 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 2467 cbe_lun->ublockexp = fls(uss) - 1; 2468 cbe_lun->ublockoff = (uss - uos) % uss; 2469 } 2470 2471 cbe_lun->atomicblock = atomic / cbe_lun->blocksize; 2472 cbe_lun->opttxferlen = maxio / cbe_lun->blocksize; 2473 2474 if (be_lun->dispatch == ctl_be_block_dispatch_zvol) { 2475 unmap = 1; 2476 } else { 2477 struct diocgattr_arg arg; 2478 2479 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); 2480 arg.len = sizeof(arg.value.i); 2481 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 2482 curthread); 2483 unmap = (error == 0) ? arg.value.i : 0; 2484 } 2485 value = dnvlist_get_string(cbe_lun->options, "unmap", NULL); 2486 if (value != NULL) 2487 unmap = (strcmp(value, "on") == 0); 2488 if (unmap) 2489 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; 2490 else 2491 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 2492 2493 dev_relthread(dev, ref); 2494 return (0); 2495 } 2496 2497 static int 2498 ctl_be_block_close(struct ctl_be_block_lun *be_lun) 2499 { 2500 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2501 int flags; 2502 2503 if (be_lun->vn) { 2504 flags = FREAD; 2505 if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0) 2506 flags |= FWRITE; 2507 (void)vn_close(be_lun->vn, flags, NOCRED, curthread); 2508 be_lun->vn = NULL; 2509 2510 switch (be_lun->dev_type) { 2511 case CTL_BE_BLOCK_DEV: 2512 break; 2513 case CTL_BE_BLOCK_FILE: 2514 if (be_lun->backend.file.cred != NULL) { 2515 crfree(be_lun->backend.file.cred); 2516 be_lun->backend.file.cred = NULL; 2517 } 2518 break; 2519 case CTL_BE_BLOCK_NONE: 2520 break; 2521 default: 2522 panic("Unexpected backend type %d", be_lun->dev_type); 2523 break; 2524 } 2525 be_lun->dev_type = CTL_BE_BLOCK_NONE; 2526 } 2527 return (0); 2528 } 2529 2530 static int 2531 ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 2532 { 2533 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2534 struct nameidata nd; 2535 const char *value; 2536 int error, flags; 2537 2538 error = 0; 2539 if (rootvnode == NULL) { 2540 snprintf(req->error_str, sizeof(req->error_str), 2541 "Root filesystem is not mounted"); 2542 return (1); 2543 } 2544 pwd_ensure_dirs(); 2545 2546 value = dnvlist_get_string(cbe_lun->options, "file", NULL); 2547 if (value == NULL) { 2548 snprintf(req->error_str, sizeof(req->error_str), 2549 "no file argument specified"); 2550 return (1); 2551 } 2552 free(be_lun->dev_path, M_CTLBLK); 2553 be_lun->dev_path = strdup(value, M_CTLBLK); 2554 2555 flags = FREAD; 2556 value = dnvlist_get_string(cbe_lun->options, "readonly", NULL); 2557 if (value != NULL) { 2558 if (strcmp(value, "on") != 0) 2559 flags |= FWRITE; 2560 } else if (cbe_lun->lun_type == T_DIRECT) 2561 flags |= FWRITE; 2562 2563 again: 2564 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path); 2565 error = vn_open(&nd, &flags, 0, NULL); 2566 if ((error == EROFS || error == EACCES) && (flags & FWRITE)) { 2567 flags &= ~FWRITE; 2568 goto again; 2569 } 2570 if (error) { 2571 /* 2572 * This is the only reasonable guess we can make as far as 2573 * path if the user doesn't give us a fully qualified path. 2574 * If they want to specify a file, they need to specify the 2575 * full path. 2576 */ 2577 if (be_lun->dev_path[0] != '/') { 2578 char *dev_name; 2579 2580 asprintf(&dev_name, M_CTLBLK, "/dev/%s", 2581 be_lun->dev_path); 2582 free(be_lun->dev_path, M_CTLBLK); 2583 be_lun->dev_path = dev_name; 2584 goto again; 2585 } 2586 snprintf(req->error_str, sizeof(req->error_str), 2587 "error opening %s: %d", be_lun->dev_path, error); 2588 return (error); 2589 } 2590 if (flags & FWRITE) 2591 cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY; 2592 else 2593 cbe_lun->flags |= CTL_LUN_FLAG_READONLY; 2594 2595 NDFREE_PNBUF(&nd); 2596 be_lun->vn = nd.ni_vp; 2597 2598 /* We only support disks and files. */ 2599 if (vn_isdisk_error(be_lun->vn, &error)) { 2600 error = ctl_be_block_open_dev(be_lun, req); 2601 } else if (be_lun->vn->v_type == VREG) { 2602 error = ctl_be_block_open_file(be_lun, req); 2603 } else { 2604 error = EINVAL; 2605 snprintf(req->error_str, sizeof(req->error_str), 2606 "%s is not a disk or plain file", be_lun->dev_path); 2607 } 2608 VOP_UNLOCK(be_lun->vn); 2609 2610 if (error != 0) 2611 ctl_be_block_close(be_lun); 2612 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2613 if (be_lun->dispatch != ctl_be_block_dispatch_dev) 2614 cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT; 2615 value = dnvlist_get_string(cbe_lun->options, "serseq", NULL); 2616 if (value != NULL && strcmp(value, "on") == 0) 2617 cbe_lun->serseq = CTL_LUN_SERSEQ_ON; 2618 else if (value != NULL && strcmp(value, "read") == 0) 2619 cbe_lun->serseq = CTL_LUN_SERSEQ_READ; 2620 else if (value != NULL && strcmp(value, "soft") == 0) 2621 cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT; 2622 else if (value != NULL && strcmp(value, "off") == 0) 2623 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2624 return (0); 2625 } 2626 2627 static int 2628 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2629 { 2630 struct ctl_be_lun *cbe_lun; 2631 struct ctl_be_block_lun *be_lun; 2632 struct ctl_lun_create_params *params; 2633 char num_thread_str[16]; 2634 char tmpstr[32]; 2635 const char *value; 2636 int retval, num_threads; 2637 int tmp_num_threads; 2638 2639 params = &req->reqdata.create; 2640 retval = 0; 2641 req->status = CTL_LUN_OK; 2642 2643 be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK); 2644 cbe_lun = &be_lun->cbe_lun; 2645 be_lun->params = req->reqdata.create; 2646 be_lun->softc = softc; 2647 STAILQ_INIT(&be_lun->input_queue); 2648 STAILQ_INIT(&be_lun->config_read_queue); 2649 STAILQ_INIT(&be_lun->config_write_queue); 2650 STAILQ_INIT(&be_lun->datamove_queue); 2651 mtx_init(&be_lun->io_lock, "ctlblock io", NULL, MTX_DEF); 2652 mtx_init(&be_lun->queue_lock, "ctlblock queue", NULL, MTX_DEF); 2653 cbe_lun->options = nvlist_clone(req->args_nvl); 2654 2655 if (params->flags & CTL_LUN_FLAG_DEV_TYPE) 2656 cbe_lun->lun_type = params->device_type; 2657 else 2658 cbe_lun->lun_type = T_DIRECT; 2659 be_lun->flags = 0; 2660 cbe_lun->flags = 0; 2661 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL); 2662 if (value != NULL) { 2663 if (strcmp(value, "primary") == 0) 2664 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2665 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2666 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2667 2668 if (cbe_lun->lun_type == T_DIRECT || 2669 cbe_lun->lun_type == T_CDROM) { 2670 be_lun->size_bytes = params->lun_size_bytes; 2671 if (params->blocksize_bytes != 0) 2672 cbe_lun->blocksize = params->blocksize_bytes; 2673 else if (cbe_lun->lun_type == T_CDROM) 2674 cbe_lun->blocksize = 2048; 2675 else 2676 cbe_lun->blocksize = 512; 2677 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2678 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2679 0 : (be_lun->size_blocks - 1); 2680 2681 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2682 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2683 retval = ctl_be_block_open(be_lun, req); 2684 if (retval != 0) { 2685 retval = 0; 2686 req->status = CTL_LUN_WARNING; 2687 } 2688 } 2689 num_threads = cbb_num_threads; 2690 } else { 2691 num_threads = 1; 2692 } 2693 2694 value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL); 2695 if (value != NULL) { 2696 tmp_num_threads = strtol(value, NULL, 0); 2697 2698 /* 2699 * We don't let the user specify less than one 2700 * thread, but hope he's clueful enough not to 2701 * specify 1000 threads. 2702 */ 2703 if (tmp_num_threads < 1) { 2704 snprintf(req->error_str, sizeof(req->error_str), 2705 "invalid number of threads %s", 2706 num_thread_str); 2707 goto bailout_error; 2708 } 2709 num_threads = tmp_num_threads; 2710 } 2711 2712 if (be_lun->vn == NULL) 2713 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2714 /* Tell the user the blocksize we ended up using */ 2715 params->lun_size_bytes = be_lun->size_bytes; 2716 params->blocksize_bytes = cbe_lun->blocksize; 2717 if (params->flags & CTL_LUN_FLAG_ID_REQ) { 2718 cbe_lun->req_lun_id = params->req_lun_id; 2719 cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ; 2720 } else 2721 cbe_lun->req_lun_id = 0; 2722 2723 cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown; 2724 cbe_lun->be = &ctl_be_block_driver; 2725 2726 if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) { 2727 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d", 2728 softc->num_luns); 2729 strncpy((char *)cbe_lun->serial_num, tmpstr, 2730 MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr))); 2731 2732 /* Tell the user what we used for a serial number */ 2733 strncpy((char *)params->serial_num, tmpstr, 2734 MIN(sizeof(params->serial_num), sizeof(tmpstr))); 2735 } else { 2736 strncpy((char *)cbe_lun->serial_num, params->serial_num, 2737 MIN(sizeof(cbe_lun->serial_num), 2738 sizeof(params->serial_num))); 2739 } 2740 if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) { 2741 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns); 2742 strncpy((char *)cbe_lun->device_id, tmpstr, 2743 MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr))); 2744 2745 /* Tell the user what we used for a device ID */ 2746 strncpy((char *)params->device_id, tmpstr, 2747 MIN(sizeof(params->device_id), sizeof(tmpstr))); 2748 } else { 2749 strncpy((char *)cbe_lun->device_id, params->device_id, 2750 MIN(sizeof(cbe_lun->device_id), 2751 sizeof(params->device_id))); 2752 } 2753 2754 TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun); 2755 2756 be_lun->io_taskqueue = taskqueue_create("ctlblocktq", M_WAITOK, 2757 taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue); 2758 2759 if (be_lun->io_taskqueue == NULL) { 2760 snprintf(req->error_str, sizeof(req->error_str), 2761 "unable to create taskqueue"); 2762 goto bailout_error; 2763 } 2764 2765 /* 2766 * Note that we start the same number of threads by default for 2767 * both the file case and the block device case. For the file 2768 * case, we need multiple threads to allow concurrency, because the 2769 * vnode interface is designed to be a blocking interface. For the 2770 * block device case, ZFS zvols at least will block the caller's 2771 * context in many instances, and so we need multiple threads to 2772 * overcome that problem. Other block devices don't need as many 2773 * threads, but they shouldn't cause too many problems. 2774 * 2775 * If the user wants to just have a single thread for a block 2776 * device, he can specify that when the LUN is created, or change 2777 * the tunable/sysctl to alter the default number of threads. 2778 */ 2779 retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue, 2780 /*num threads*/num_threads, 2781 /*priority*/PUSER, 2782 /*proc*/control_softc->ctl_proc, 2783 /*thread name*/"block"); 2784 2785 if (retval != 0) 2786 goto bailout_error; 2787 2788 be_lun->num_threads = num_threads; 2789 2790 retval = ctl_add_lun(&be_lun->cbe_lun); 2791 if (retval != 0) { 2792 snprintf(req->error_str, sizeof(req->error_str), 2793 "ctl_add_lun() returned error %d, see dmesg for " 2794 "details", retval); 2795 retval = 0; 2796 goto bailout_error; 2797 } 2798 2799 be_lun->disk_stats = devstat_new_entry("cbb", cbe_lun->lun_id, 2800 cbe_lun->blocksize, 2801 DEVSTAT_ALL_SUPPORTED, 2802 cbe_lun->lun_type 2803 | DEVSTAT_TYPE_IF_OTHER, 2804 DEVSTAT_PRIORITY_OTHER); 2805 2806 mtx_lock(&softc->lock); 2807 softc->num_luns++; 2808 SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links); 2809 mtx_unlock(&softc->lock); 2810 2811 params->req_lun_id = cbe_lun->lun_id; 2812 2813 return (retval); 2814 2815 bailout_error: 2816 req->status = CTL_LUN_ERROR; 2817 2818 if (be_lun->io_taskqueue != NULL) 2819 taskqueue_free(be_lun->io_taskqueue); 2820 ctl_be_block_close(be_lun); 2821 if (be_lun->dev_path != NULL) 2822 free(be_lun->dev_path, M_CTLBLK); 2823 nvlist_destroy(cbe_lun->options); 2824 mtx_destroy(&be_lun->queue_lock); 2825 mtx_destroy(&be_lun->io_lock); 2826 free(be_lun, M_CTLBLK); 2827 2828 return (retval); 2829 } 2830 2831 static int 2832 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2833 { 2834 struct ctl_lun_rm_params *params; 2835 struct ctl_be_block_lun *be_lun; 2836 struct ctl_be_lun *cbe_lun; 2837 int retval; 2838 2839 params = &req->reqdata.rm; 2840 2841 sx_xlock(&softc->modify_lock); 2842 mtx_lock(&softc->lock); 2843 SLIST_FOREACH(be_lun, &softc->lun_list, links) { 2844 if (be_lun->cbe_lun.lun_id == params->lun_id) { 2845 SLIST_REMOVE(&softc->lun_list, be_lun, 2846 ctl_be_block_lun, links); 2847 softc->num_luns--; 2848 break; 2849 } 2850 } 2851 mtx_unlock(&softc->lock); 2852 sx_xunlock(&softc->modify_lock); 2853 if (be_lun == NULL) { 2854 snprintf(req->error_str, sizeof(req->error_str), 2855 "LUN %u is not managed by the block backend", 2856 params->lun_id); 2857 goto bailout_error; 2858 } 2859 cbe_lun = &be_lun->cbe_lun; 2860 2861 if (be_lun->vn != NULL) { 2862 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2863 ctl_lun_no_media(cbe_lun); 2864 taskqueue_drain_all(be_lun->io_taskqueue); 2865 ctl_be_block_close(be_lun); 2866 } 2867 2868 mtx_lock(&softc->lock); 2869 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2870 mtx_unlock(&softc->lock); 2871 2872 retval = ctl_remove_lun(cbe_lun); 2873 if (retval != 0) { 2874 snprintf(req->error_str, sizeof(req->error_str), 2875 "error %d returned from ctl_remove_lun() for " 2876 "LUN %d", retval, params->lun_id); 2877 mtx_lock(&softc->lock); 2878 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2879 mtx_unlock(&softc->lock); 2880 goto bailout_error; 2881 } 2882 2883 mtx_lock(&softc->lock); 2884 while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2885 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblockrm", 0); 2886 if (retval == EINTR) 2887 break; 2888 } 2889 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2890 if (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) { 2891 mtx_unlock(&softc->lock); 2892 free(be_lun, M_CTLBLK); 2893 } else { 2894 mtx_unlock(&softc->lock); 2895 return (EINTR); 2896 } 2897 2898 req->status = CTL_LUN_OK; 2899 return (0); 2900 2901 bailout_error: 2902 req->status = CTL_LUN_ERROR; 2903 return (0); 2904 } 2905 2906 static int 2907 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2908 { 2909 struct ctl_lun_modify_params *params; 2910 struct ctl_be_block_lun *be_lun; 2911 struct ctl_be_lun *cbe_lun; 2912 const char *value; 2913 uint64_t oldsize; 2914 int error, wasprim; 2915 2916 params = &req->reqdata.modify; 2917 2918 sx_xlock(&softc->modify_lock); 2919 mtx_lock(&softc->lock); 2920 SLIST_FOREACH(be_lun, &softc->lun_list, links) { 2921 if (be_lun->cbe_lun.lun_id == params->lun_id) 2922 break; 2923 } 2924 mtx_unlock(&softc->lock); 2925 if (be_lun == NULL) { 2926 snprintf(req->error_str, sizeof(req->error_str), 2927 "LUN %u is not managed by the block backend", 2928 params->lun_id); 2929 goto bailout_error; 2930 } 2931 cbe_lun = &be_lun->cbe_lun; 2932 2933 if (params->lun_size_bytes != 0) 2934 be_lun->params.lun_size_bytes = params->lun_size_bytes; 2935 2936 if (req->args_nvl != NULL) { 2937 nvlist_destroy(cbe_lun->options); 2938 cbe_lun->options = nvlist_clone(req->args_nvl); 2939 } 2940 2941 wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY); 2942 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL); 2943 if (value != NULL) { 2944 if (strcmp(value, "primary") == 0) 2945 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2946 else 2947 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2948 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2949 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2950 else 2951 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2952 if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) { 2953 if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) 2954 ctl_lun_primary(cbe_lun); 2955 else 2956 ctl_lun_secondary(cbe_lun); 2957 } 2958 2959 oldsize = be_lun->size_blocks; 2960 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2961 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2962 if (be_lun->vn == NULL) 2963 error = ctl_be_block_open(be_lun, req); 2964 else if (vn_isdisk_error(be_lun->vn, &error)) 2965 error = ctl_be_block_open_dev(be_lun, req); 2966 else if (be_lun->vn->v_type == VREG) { 2967 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 2968 error = ctl_be_block_open_file(be_lun, req); 2969 VOP_UNLOCK(be_lun->vn); 2970 } else 2971 error = EINVAL; 2972 if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) && 2973 be_lun->vn != NULL) { 2974 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; 2975 ctl_lun_has_media(cbe_lun); 2976 } else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 && 2977 be_lun->vn == NULL) { 2978 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2979 ctl_lun_no_media(cbe_lun); 2980 } 2981 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; 2982 } else { 2983 if (be_lun->vn != NULL) { 2984 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2985 ctl_lun_no_media(cbe_lun); 2986 taskqueue_drain_all(be_lun->io_taskqueue); 2987 error = ctl_be_block_close(be_lun); 2988 } else 2989 error = 0; 2990 } 2991 if (be_lun->size_blocks != oldsize) 2992 ctl_lun_capacity_changed(cbe_lun); 2993 2994 /* Tell the user the exact size we ended up using */ 2995 params->lun_size_bytes = be_lun->size_bytes; 2996 2997 sx_xunlock(&softc->modify_lock); 2998 req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK; 2999 return (0); 3000 3001 bailout_error: 3002 sx_xunlock(&softc->modify_lock); 3003 req->status = CTL_LUN_ERROR; 3004 return (0); 3005 } 3006 3007 static void 3008 ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun) 3009 { 3010 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)cbe_lun; 3011 struct ctl_be_block_softc *softc = be_lun->softc; 3012 3013 taskqueue_drain_all(be_lun->io_taskqueue); 3014 taskqueue_free(be_lun->io_taskqueue); 3015 if (be_lun->disk_stats != NULL) 3016 devstat_remove_entry(be_lun->disk_stats); 3017 nvlist_destroy(be_lun->cbe_lun.options); 3018 free(be_lun->dev_path, M_CTLBLK); 3019 mtx_destroy(&be_lun->queue_lock); 3020 mtx_destroy(&be_lun->io_lock); 3021 3022 mtx_lock(&softc->lock); 3023 be_lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED; 3024 if (be_lun->flags & CTL_BE_BLOCK_LUN_WAITING) 3025 wakeup(be_lun); 3026 else 3027 free(be_lun, M_CTLBLK); 3028 mtx_unlock(&softc->lock); 3029 } 3030 3031 static int 3032 ctl_be_block_scsi_config_write(union ctl_io *io) 3033 { 3034 struct ctl_be_block_lun *be_lun; 3035 struct ctl_be_lun *cbe_lun; 3036 int retval; 3037 3038 DPRINTF("entered\n"); 3039 3040 cbe_lun = CTL_BACKEND_LUN(io); 3041 be_lun = (struct ctl_be_block_lun *)cbe_lun; 3042 3043 retval = 0; 3044 switch (io->scsiio.cdb[0]) { 3045 case SYNCHRONIZE_CACHE: 3046 case SYNCHRONIZE_CACHE_16: 3047 case WRITE_SAME_10: 3048 case WRITE_SAME_16: 3049 case UNMAP: 3050 /* 3051 * The upper level CTL code will filter out any CDBs with 3052 * the immediate bit set and return the proper error. 3053 * 3054 * We don't really need to worry about what LBA range the 3055 * user asked to be synced out. When they issue a sync 3056 * cache command, we'll sync out the whole thing. 3057 */ 3058 mtx_lock(&be_lun->queue_lock); 3059 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, 3060 links); 3061 mtx_unlock(&be_lun->queue_lock); 3062 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 3063 break; 3064 case START_STOP_UNIT: { 3065 struct scsi_start_stop_unit *cdb; 3066 struct ctl_lun_req req; 3067 3068 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb; 3069 if ((cdb->how & SSS_PC_MASK) != 0) { 3070 ctl_set_success(&io->scsiio); 3071 ctl_config_write_done(io); 3072 break; 3073 } 3074 if (cdb->how & SSS_START) { 3075 if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) { 3076 retval = ctl_be_block_open(be_lun, &req); 3077 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; 3078 if (retval == 0) { 3079 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; 3080 ctl_lun_has_media(cbe_lun); 3081 } else { 3082 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 3083 ctl_lun_no_media(cbe_lun); 3084 } 3085 } 3086 ctl_start_lun(cbe_lun); 3087 } else { 3088 ctl_stop_lun(cbe_lun); 3089 if (cdb->how & SSS_LOEJ) { 3090 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 3091 cbe_lun->flags |= CTL_LUN_FLAG_EJECTED; 3092 ctl_lun_ejected(cbe_lun); 3093 if (be_lun->vn != NULL) 3094 ctl_be_block_close(be_lun); 3095 } 3096 } 3097 3098 ctl_set_success(&io->scsiio); 3099 ctl_config_write_done(io); 3100 break; 3101 } 3102 case PREVENT_ALLOW: 3103 ctl_set_success(&io->scsiio); 3104 ctl_config_write_done(io); 3105 break; 3106 default: 3107 ctl_set_invalid_opcode(&io->scsiio); 3108 ctl_config_write_done(io); 3109 retval = CTL_RETVAL_COMPLETE; 3110 break; 3111 } 3112 3113 return (retval); 3114 } 3115 3116 static int 3117 ctl_be_block_nvme_config_write(union ctl_io *io) 3118 { 3119 struct ctl_be_block_lun *be_lun; 3120 3121 DPRINTF("entered\n"); 3122 3123 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 3124 3125 switch (io->nvmeio.cmd.opc) { 3126 case NVME_OPC_DATASET_MANAGEMENT: 3127 DSM_RANGE(io) = 0; 3128 /* FALLTHROUGH */ 3129 case NVME_OPC_FLUSH: 3130 case NVME_OPC_WRITE_UNCORRECTABLE: 3131 case NVME_OPC_WRITE_ZEROES: 3132 mtx_lock(&be_lun->queue_lock); 3133 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, 3134 links); 3135 mtx_unlock(&be_lun->queue_lock); 3136 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 3137 break; 3138 default: 3139 ctl_nvme_set_invalid_opcode(&io->nvmeio); 3140 ctl_config_write_done(io); 3141 break; 3142 } 3143 return (CTL_RETVAL_COMPLETE); 3144 } 3145 3146 static int 3147 ctl_be_block_config_write(union ctl_io *io) 3148 { 3149 switch (io->io_hdr.io_type) { 3150 case CTL_IO_SCSI: 3151 return (ctl_be_block_scsi_config_write(io)); 3152 case CTL_IO_NVME: 3153 return (ctl_be_block_nvme_config_write(io)); 3154 default: 3155 __assert_unreachable(); 3156 } 3157 } 3158 3159 static int 3160 ctl_be_block_scsi_config_read(union ctl_io *io) 3161 { 3162 struct ctl_be_block_lun *be_lun; 3163 int retval = 0; 3164 3165 DPRINTF("entered\n"); 3166 3167 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 3168 3169 switch (io->scsiio.cdb[0]) { 3170 case SERVICE_ACTION_IN: 3171 if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) { 3172 mtx_lock(&be_lun->queue_lock); 3173 STAILQ_INSERT_TAIL(&be_lun->config_read_queue, 3174 &io->io_hdr, links); 3175 mtx_unlock(&be_lun->queue_lock); 3176 taskqueue_enqueue(be_lun->io_taskqueue, 3177 &be_lun->io_task); 3178 retval = CTL_RETVAL_QUEUED; 3179 break; 3180 } 3181 ctl_set_invalid_field(&io->scsiio, 3182 /*sks_valid*/ 1, 3183 /*command*/ 1, 3184 /*field*/ 1, 3185 /*bit_valid*/ 1, 3186 /*bit*/ 4); 3187 ctl_config_read_done(io); 3188 retval = CTL_RETVAL_COMPLETE; 3189 break; 3190 default: 3191 ctl_set_invalid_opcode(&io->scsiio); 3192 ctl_config_read_done(io); 3193 retval = CTL_RETVAL_COMPLETE; 3194 break; 3195 } 3196 3197 return (retval); 3198 } 3199 3200 static int 3201 ctl_be_block_nvme_config_read(union ctl_io *io) 3202 { 3203 struct ctl_be_block_lun *be_lun; 3204 3205 DPRINTF("entered\n"); 3206 3207 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 3208 3209 switch (io->nvmeio.cmd.opc) { 3210 case NVME_OPC_IDENTIFY: 3211 { 3212 uint8_t cns; 3213 3214 cns = le32toh(io->nvmeio.cmd.cdw10) & 0xff; 3215 switch (cns) { 3216 case 0: 3217 case 3: 3218 mtx_lock(&be_lun->queue_lock); 3219 STAILQ_INSERT_TAIL(&be_lun->config_read_queue, 3220 &io->io_hdr, links); 3221 mtx_unlock(&be_lun->queue_lock); 3222 taskqueue_enqueue(be_lun->io_taskqueue, 3223 &be_lun->io_task); 3224 return (CTL_RETVAL_QUEUED); 3225 default: 3226 ctl_nvme_set_invalid_field(&io->nvmeio); 3227 ctl_config_read_done(io); 3228 break; 3229 } 3230 break; 3231 } 3232 default: 3233 ctl_nvme_set_invalid_opcode(&io->nvmeio); 3234 ctl_config_read_done(io); 3235 break; 3236 } 3237 return (CTL_RETVAL_COMPLETE); 3238 } 3239 3240 static int 3241 ctl_be_block_config_read(union ctl_io *io) 3242 { 3243 switch (io->io_hdr.io_type) { 3244 case CTL_IO_SCSI: 3245 return (ctl_be_block_scsi_config_read(io)); 3246 case CTL_IO_NVME_ADMIN: 3247 return (ctl_be_block_nvme_config_read(io)); 3248 default: 3249 __assert_unreachable(); 3250 } 3251 } 3252 3253 static int 3254 ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb) 3255 { 3256 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun; 3257 int retval; 3258 3259 retval = sbuf_cat(sb, "\t<num_threads>"); 3260 if (retval != 0) 3261 goto bailout; 3262 retval = sbuf_printf(sb, "%d", lun->num_threads); 3263 if (retval != 0) 3264 goto bailout; 3265 retval = sbuf_cat(sb, "</num_threads>\n"); 3266 3267 bailout: 3268 return (retval); 3269 } 3270 3271 static uint64_t 3272 ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname) 3273 { 3274 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun; 3275 3276 if (lun->getattr == NULL) 3277 return (UINT64_MAX); 3278 return (lun->getattr(lun, attrname)); 3279 } 3280 3281 static int 3282 ctl_be_block_init(void) 3283 { 3284 struct ctl_be_block_softc *softc = &backend_block_softc; 3285 3286 sx_init(&softc->modify_lock, "ctlblock modify"); 3287 mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF); 3288 softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io), 3289 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3290 softc->bufmin_zone = uma_zcreate("ctlblockmin", CTLBLK_MIN_SEG, 3291 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 3292 if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG) 3293 softc->bufmax_zone = uma_zcreate("ctlblockmax", CTLBLK_MAX_SEG, 3294 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 3295 SLIST_INIT(&softc->lun_list); 3296 return (0); 3297 } 3298 3299 static int 3300 ctl_be_block_shutdown(void) 3301 { 3302 struct ctl_be_block_softc *softc = &backend_block_softc; 3303 struct ctl_be_block_lun *lun; 3304 3305 mtx_lock(&softc->lock); 3306 while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) { 3307 SLIST_REMOVE_HEAD(&softc->lun_list, links); 3308 softc->num_luns--; 3309 /* 3310 * Drop our lock here. Since ctl_remove_lun() can call 3311 * back into us, this could potentially lead to a recursive 3312 * lock of the same mutex, which would cause a hang. 3313 */ 3314 mtx_unlock(&softc->lock); 3315 ctl_remove_lun(&lun->cbe_lun); 3316 mtx_lock(&softc->lock); 3317 } 3318 mtx_unlock(&softc->lock); 3319 uma_zdestroy(softc->bufmin_zone); 3320 if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG) 3321 uma_zdestroy(softc->bufmax_zone); 3322 uma_zdestroy(softc->beio_zone); 3323 mtx_destroy(&softc->lock); 3324 sx_destroy(&softc->modify_lock); 3325 return (0); 3326 } 3327