1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2003 Silicon Graphics International Corp. 5 * Copyright (c) 2009-2011 Spectra Logic Corporation 6 * Copyright (c) 2012,2021 The FreeBSD Foundation 7 * Copyright (c) 2014-2021 Alexander Motin <mav@FreeBSD.org> 8 * All rights reserved. 9 * 10 * Portions of this software were developed by Edward Tomasz Napierala 11 * under sponsorship from the FreeBSD Foundation. 12 * 13 * Portions of this software were developed by Ka Ho Ng <khng@FreeBSD.org> 14 * under sponsorship from the FreeBSD Foundation. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions, and the following disclaimer, 21 * without modification. 22 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 23 * substantially similar to the "NO WARRANTY" disclaimer below 24 * ("Disclaimer") and any redistribution must be conditioned upon 25 * including a substantially similar Disclaimer requirement for further 26 * binary redistribution. 27 * 28 * NO WARRANTY 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 37 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 38 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 * POSSIBILITY OF SUCH DAMAGES. 40 * 41 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $ 42 */ 43 /* 44 * CAM Target Layer driver backend for block devices. 45 * 46 * Author: Ken Merry <ken@FreeBSD.org> 47 */ 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/kernel.h> 51 #include <sys/types.h> 52 #include <sys/kthread.h> 53 #include <sys/bio.h> 54 #include <sys/fcntl.h> 55 #include <sys/limits.h> 56 #include <sys/lock.h> 57 #include <sys/mutex.h> 58 #include <sys/condvar.h> 59 #include <sys/malloc.h> 60 #include <sys/conf.h> 61 #include <sys/ioccom.h> 62 #include <sys/queue.h> 63 #include <sys/sbuf.h> 64 #include <sys/endian.h> 65 #include <sys/uio.h> 66 #include <sys/buf.h> 67 #include <sys/taskqueue.h> 68 #include <sys/vnode.h> 69 #include <sys/namei.h> 70 #include <sys/mount.h> 71 #include <sys/disk.h> 72 #include <sys/fcntl.h> 73 #include <sys/filedesc.h> 74 #include <sys/filio.h> 75 #include <sys/proc.h> 76 #include <sys/pcpu.h> 77 #include <sys/module.h> 78 #include <sys/sdt.h> 79 #include <sys/devicestat.h> 80 #include <sys/sysctl.h> 81 #include <sys/nv.h> 82 #include <sys/dnv.h> 83 #include <sys/sx.h> 84 #include <sys/unistd.h> 85 86 #include <geom/geom.h> 87 88 #include <cam/cam.h> 89 #include <cam/scsi/scsi_all.h> 90 #include <cam/scsi/scsi_da.h> 91 #include <cam/ctl/ctl_io.h> 92 #include <cam/ctl/ctl.h> 93 #include <cam/ctl/ctl_backend.h> 94 #include <cam/ctl/ctl_ioctl.h> 95 #include <cam/ctl/ctl_ha.h> 96 #include <cam/ctl/ctl_scsi_all.h> 97 #include <cam/ctl/ctl_private.h> 98 #include <cam/ctl/ctl_error.h> 99 100 /* 101 * The idea here is to allocate enough S/G space to handle at least 1MB I/Os. 102 * On systems with small maxphys it can be 8 128KB segments. On large systems 103 * it can be up to 8 1MB segments. I/Os larger than that we'll split. 104 */ 105 #define CTLBLK_MAX_SEGS 8 106 #define CTLBLK_HALF_SEGS (CTLBLK_MAX_SEGS / 2) 107 #define CTLBLK_MIN_SEG (128 * 1024) 108 #define CTLBLK_MAX_SEG MIN(1024 * 1024, MAX(CTLBLK_MIN_SEG, maxphys)) 109 #define CTLBLK_MAX_IO_SIZE (CTLBLK_MAX_SEG * CTLBLK_MAX_SEGS) 110 111 #ifdef CTLBLK_DEBUG 112 #define DPRINTF(fmt, args...) \ 113 printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 114 #else 115 #define DPRINTF(fmt, args...) do {} while(0) 116 #endif 117 118 #define PRIV(io) \ 119 ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND]) 120 #define ARGS(io) \ 121 ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]) 122 #define DSM_RANGE(io) ((io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN].integer) 123 124 SDT_PROVIDER_DEFINE(cbb); 125 126 typedef enum { 127 CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01, 128 CTL_BE_BLOCK_LUN_WAITING = 0x04, 129 } ctl_be_block_lun_flags; 130 131 typedef enum { 132 CTL_BE_BLOCK_NONE, 133 CTL_BE_BLOCK_DEV, 134 CTL_BE_BLOCK_FILE 135 } ctl_be_block_type; 136 137 struct ctl_be_block_filedata { 138 struct ucred *cred; 139 }; 140 141 union ctl_be_block_bedata { 142 struct ctl_be_block_filedata file; 143 }; 144 145 struct ctl_be_block_io; 146 struct ctl_be_block_lun; 147 148 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun, 149 struct ctl_be_block_io *beio); 150 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun, 151 const char *attrname); 152 153 /* 154 * Backend LUN structure. There is a 1:1 mapping between a block device 155 * and a backend block LUN, and between a backend block LUN and a CTL LUN. 156 */ 157 struct ctl_be_block_lun { 158 struct ctl_be_lun cbe_lun; /* Must be first element. */ 159 struct ctl_lun_create_params params; 160 char *dev_path; 161 ctl_be_block_type dev_type; 162 struct vnode *vn; 163 union ctl_be_block_bedata backend; 164 cbb_dispatch_t dispatch; 165 cbb_dispatch_t lun_flush; 166 cbb_dispatch_t unmap; 167 cbb_dispatch_t get_lba_status; 168 cbb_getattr_t getattr; 169 uint64_t size_blocks; 170 uint64_t size_bytes; 171 struct ctl_be_block_softc *softc; 172 struct devstat *disk_stats; 173 ctl_be_block_lun_flags flags; 174 SLIST_ENTRY(ctl_be_block_lun) links; 175 struct taskqueue *io_taskqueue; 176 struct task io_task; 177 int num_threads; 178 STAILQ_HEAD(, ctl_io_hdr) input_queue; 179 STAILQ_HEAD(, ctl_io_hdr) config_read_queue; 180 STAILQ_HEAD(, ctl_io_hdr) config_write_queue; 181 STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 182 struct mtx_padalign io_lock; 183 struct mtx_padalign queue_lock; 184 }; 185 186 /* 187 * Overall softc structure for the block backend module. 188 */ 189 struct ctl_be_block_softc { 190 struct sx modify_lock; 191 struct mtx lock; 192 int num_luns; 193 SLIST_HEAD(, ctl_be_block_lun) lun_list; 194 uma_zone_t beio_zone; 195 uma_zone_t bufmin_zone; 196 uma_zone_t bufmax_zone; 197 }; 198 199 static struct ctl_be_block_softc backend_block_softc; 200 201 /* 202 * Per-I/O information. 203 */ 204 struct ctl_be_block_io { 205 union ctl_io *io; 206 struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS]; 207 struct iovec xiovecs[CTLBLK_MAX_SEGS]; 208 int refcnt; 209 int bio_cmd; 210 int two_sglists; 211 int num_segs; 212 int num_bios_sent; 213 int num_bios_done; 214 int send_complete; 215 int first_error; 216 uint64_t first_error_offset; 217 struct bintime ds_t0; 218 devstat_tag_type ds_tag_type; 219 devstat_trans_flags ds_trans_type; 220 uint64_t io_len; 221 uint64_t io_offset; 222 int io_arg; 223 struct ctl_be_block_softc *softc; 224 struct ctl_be_block_lun *lun; 225 void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */ 226 }; 227 228 extern struct ctl_softc *control_softc; 229 230 static int cbb_num_threads = 32; 231 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 232 "CAM Target Layer Block Backend"); 233 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN, 234 &cbb_num_threads, 0, "Number of threads per backing file"); 235 236 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc); 237 static void ctl_free_beio(struct ctl_be_block_io *beio); 238 static void ctl_complete_beio(struct ctl_be_block_io *beio); 239 static int ctl_be_block_move_done(union ctl_io *io, bool samethr); 240 static void ctl_be_block_biodone(struct bio *bio); 241 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 242 struct ctl_be_block_io *beio); 243 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 244 struct ctl_be_block_io *beio); 245 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 246 struct ctl_be_block_io *beio); 247 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, 248 const char *attrname); 249 static void ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun, 250 struct ctl_be_block_io *beio); 251 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 252 struct ctl_be_block_io *beio); 253 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 254 struct ctl_be_block_io *beio); 255 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 256 struct ctl_be_block_io *beio); 257 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, 258 const char *attrname); 259 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 260 union ctl_io *io); 261 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 262 union ctl_io *io); 263 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 264 union ctl_io *io); 265 static void ctl_be_block_worker(void *context, int pending); 266 static int ctl_be_block_submit(union ctl_io *io); 267 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 268 int flag, struct thread *td); 269 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, 270 struct ctl_lun_req *req); 271 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, 272 struct ctl_lun_req *req); 273 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun); 274 static int ctl_be_block_open(struct ctl_be_block_lun *be_lun, 275 struct ctl_lun_req *req); 276 static int ctl_be_block_create(struct ctl_be_block_softc *softc, 277 struct ctl_lun_req *req); 278 static int ctl_be_block_rm(struct ctl_be_block_softc *softc, 279 struct ctl_lun_req *req); 280 static int ctl_be_block_modify(struct ctl_be_block_softc *softc, 281 struct ctl_lun_req *req); 282 static void ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun); 283 static int ctl_be_block_config_write(union ctl_io *io); 284 static int ctl_be_block_config_read(union ctl_io *io); 285 static int ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb); 286 static uint64_t ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname); 287 static int ctl_be_block_init(void); 288 static int ctl_be_block_shutdown(void); 289 290 static struct ctl_backend_driver ctl_be_block_driver = 291 { 292 .name = "block", 293 .flags = CTL_BE_FLAG_HAS_CONFIG, 294 .init = ctl_be_block_init, 295 .shutdown = ctl_be_block_shutdown, 296 .data_submit = ctl_be_block_submit, 297 .config_read = ctl_be_block_config_read, 298 .config_write = ctl_be_block_config_write, 299 .ioctl = ctl_be_block_ioctl, 300 .lun_info = ctl_be_block_lun_info, 301 .lun_attr = ctl_be_block_lun_attr 302 }; 303 304 MALLOC_DEFINE(M_CTLBLK, "ctlblock", "Memory used for CTL block backend"); 305 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver); 306 307 static void 308 ctl_alloc_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg, 309 size_t len) 310 { 311 312 if (len <= CTLBLK_MIN_SEG) { 313 sg->addr = uma_zalloc(softc->bufmin_zone, M_WAITOK); 314 } else { 315 KASSERT(len <= CTLBLK_MAX_SEG, 316 ("Too large alloc %zu > %lu", len, CTLBLK_MAX_SEG)); 317 sg->addr = uma_zalloc(softc->bufmax_zone, M_WAITOK); 318 } 319 sg->len = len; 320 } 321 322 static void 323 ctl_free_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg) 324 { 325 326 if (sg->len <= CTLBLK_MIN_SEG) { 327 uma_zfree(softc->bufmin_zone, sg->addr); 328 } else { 329 KASSERT(sg->len <= CTLBLK_MAX_SEG, 330 ("Too large free %zu > %lu", sg->len, CTLBLK_MAX_SEG)); 331 uma_zfree(softc->bufmax_zone, sg->addr); 332 } 333 } 334 335 static struct ctl_be_block_io * 336 ctl_alloc_beio(struct ctl_be_block_softc *softc) 337 { 338 struct ctl_be_block_io *beio; 339 340 beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO); 341 beio->softc = softc; 342 beio->refcnt = 1; 343 return (beio); 344 } 345 346 static void 347 ctl_real_free_beio(struct ctl_be_block_io *beio) 348 { 349 struct ctl_be_block_softc *softc = beio->softc; 350 int i; 351 352 for (i = 0; i < beio->num_segs; i++) { 353 ctl_free_seg(softc, &beio->sg_segs[i]); 354 355 /* For compare we had two equal S/G lists. */ 356 if (beio->two_sglists) { 357 ctl_free_seg(softc, 358 &beio->sg_segs[i + CTLBLK_HALF_SEGS]); 359 } 360 } 361 362 uma_zfree(softc->beio_zone, beio); 363 } 364 365 static void 366 ctl_refcnt_beio(void *arg, int diff) 367 { 368 struct ctl_be_block_io *beio = arg; 369 370 if (atomic_fetchadd_int(&beio->refcnt, diff) + diff == 0) 371 ctl_real_free_beio(beio); 372 } 373 374 static void 375 ctl_free_beio(struct ctl_be_block_io *beio) 376 { 377 378 ctl_refcnt_beio(beio, -1); 379 } 380 381 static void 382 ctl_complete_beio(struct ctl_be_block_io *beio) 383 { 384 union ctl_io *io = beio->io; 385 386 if (beio->beio_cont != NULL) { 387 beio->beio_cont(beio); 388 } else { 389 ctl_free_beio(beio); 390 ctl_data_submit_done(io); 391 } 392 } 393 394 static void 395 ctl_be_block_io_error(union ctl_io *io, int bio_cmd, uint16_t retry_count) 396 { 397 switch (io->io_hdr.io_type) { 398 case CTL_IO_SCSI: 399 if (bio_cmd == BIO_FLUSH) { 400 /* XXX KDM is there is a better error here? */ 401 ctl_set_internal_failure(&io->scsiio, 402 /*sks_valid*/ 1, 403 retry_count); 404 } else { 405 ctl_set_medium_error(&io->scsiio, bio_cmd == BIO_READ); 406 } 407 break; 408 case CTL_IO_NVME: 409 switch (bio_cmd) { 410 case BIO_FLUSH: 411 case BIO_WRITE: 412 ctl_nvme_set_write_fault(&io->nvmeio); 413 break; 414 case BIO_READ: 415 ctl_nvme_set_unrecoverable_read_error(&io->nvmeio); 416 break; 417 default: 418 ctl_nvme_set_internal_error(&io->nvmeio); 419 break; 420 } 421 break; 422 default: 423 __assert_unreachable(); 424 } 425 } 426 427 static size_t 428 cmp(uint8_t *a, uint8_t *b, size_t size) 429 { 430 size_t i; 431 432 for (i = 0; i < size; i++) { 433 if (a[i] != b[i]) 434 break; 435 } 436 return (i); 437 } 438 439 static void 440 ctl_be_block_compare(union ctl_io *io) 441 { 442 struct ctl_be_block_io *beio; 443 uint64_t off, res; 444 int i; 445 446 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 447 off = 0; 448 for (i = 0; i < beio->num_segs; i++) { 449 res = cmp(beio->sg_segs[i].addr, 450 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr, 451 beio->sg_segs[i].len); 452 off += res; 453 if (res < beio->sg_segs[i].len) 454 break; 455 } 456 if (i < beio->num_segs) { 457 ctl_io_set_compare_failure(io, off); 458 } else 459 ctl_io_set_success(io); 460 } 461 462 static int 463 ctl_be_block_move_done(union ctl_io *io, bool samethr) 464 { 465 struct ctl_be_block_io *beio; 466 struct ctl_be_block_lun *be_lun; 467 struct ctl_lba_len_flags *lbalen; 468 469 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 470 471 DPRINTF("entered\n"); 472 ctl_add_kern_rel_offset(io, ctl_kern_data_len(io)); 473 474 /* 475 * We set status at this point for read and compare commands. 476 */ 477 if ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0 && 478 (io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE) { 479 lbalen = ARGS(io); 480 if (lbalen->flags & CTL_LLF_READ) { 481 ctl_io_set_success(io); 482 } else if (lbalen->flags & CTL_LLF_COMPARE) { 483 /* We have two data blocks ready for comparison. */ 484 ctl_be_block_compare(io); 485 } 486 } 487 488 /* 489 * If this is a read, or a write with errors, it is done. 490 */ 491 if ((beio->bio_cmd == BIO_READ) 492 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0) 493 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) { 494 ctl_complete_beio(beio); 495 return (0); 496 } 497 498 /* 499 * At this point, we have a write and the DMA completed successfully. 500 * If we were called synchronously in the original thread then just 501 * dispatch, otherwise we now have to queue it to the task queue to 502 * execute the backend I/O. That is because we do blocking 503 * memory allocations, and in the file backing case, blocking I/O. 504 * This move done routine is generally called in the SIM's 505 * interrupt context, and therefore we cannot block. 506 */ 507 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 508 if (samethr) { 509 be_lun->dispatch(be_lun, beio); 510 } else { 511 mtx_lock(&be_lun->queue_lock); 512 STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links); 513 mtx_unlock(&be_lun->queue_lock); 514 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 515 } 516 return (0); 517 } 518 519 static void 520 ctl_be_block_biodone(struct bio *bio) 521 { 522 struct ctl_be_block_io *beio = bio->bio_caller1; 523 struct ctl_be_block_lun *be_lun = beio->lun; 524 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 525 union ctl_io *io; 526 int error; 527 528 io = beio->io; 529 530 DPRINTF("entered\n"); 531 532 error = bio->bio_error; 533 mtx_lock(&be_lun->io_lock); 534 if (error != 0 && 535 (beio->first_error == 0 || 536 bio->bio_offset < beio->first_error_offset)) { 537 beio->first_error = error; 538 beio->first_error_offset = bio->bio_offset; 539 } 540 541 beio->num_bios_done++; 542 543 /* 544 * XXX KDM will this cause WITNESS to complain? Holding a lock 545 * during the free might cause it to complain. 546 */ 547 g_destroy_bio(bio); 548 549 /* 550 * If the send complete bit isn't set, or we aren't the last I/O to 551 * complete, then we're done. 552 */ 553 if ((beio->send_complete == 0) 554 || (beio->num_bios_done < beio->num_bios_sent)) { 555 mtx_unlock(&be_lun->io_lock); 556 return; 557 } 558 559 /* 560 * At this point, we've verified that we are the last I/O to 561 * complete, so it's safe to drop the lock. 562 */ 563 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 564 beio->ds_tag_type, beio->ds_trans_type, 565 /*now*/ NULL, /*then*/&beio->ds_t0); 566 mtx_unlock(&be_lun->io_lock); 567 568 /* 569 * If there are any errors from the backing device, we fail the 570 * entire I/O with a medium error. 571 */ 572 error = beio->first_error; 573 if (error != 0) { 574 if (error == EOPNOTSUPP) { 575 ctl_io_set_invalid_opcode(io); 576 } else if (error == ENOSPC || error == EDQUOT) { 577 ctl_io_set_space_alloc_fail(io); 578 } else if (error == EROFS || error == EACCES) { 579 ctl_io_set_hw_write_protected(io); 580 } else { 581 ctl_be_block_io_error(io, beio->bio_cmd, 582 /*retry_count*/ 0xbad2); 583 } 584 ctl_complete_beio(beio); 585 return; 586 } 587 588 /* 589 * If this is a write, a flush, a delete or verify, we're all done. 590 * If this is a read, we can now send the data to the user. 591 */ 592 if ((beio->bio_cmd == BIO_WRITE) 593 || (beio->bio_cmd == BIO_FLUSH) 594 || (beio->bio_cmd == BIO_DELETE) 595 || (ARGS(io)->flags & CTL_LLF_VERIFY)) { 596 ctl_io_set_success(io); 597 ctl_complete_beio(beio); 598 } else { 599 if ((ARGS(io)->flags & CTL_LLF_READ) && 600 beio->beio_cont == NULL) { 601 ctl_io_set_success(io); 602 if (cbe_lun->serseq >= CTL_LUN_SERSEQ_SOFT) 603 ctl_serseq_done(io); 604 } 605 ctl_datamove(io); 606 } 607 } 608 609 static void 610 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 611 struct ctl_be_block_io *beio) 612 { 613 union ctl_io *io = beio->io; 614 struct mount *mountpoint; 615 int error; 616 617 DPRINTF("entered\n"); 618 619 binuptime(&beio->ds_t0); 620 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 621 622 (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 623 624 vn_lock(be_lun->vn, vn_lktype_write(mountpoint, be_lun->vn) | 625 LK_RETRY); 626 error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT, 627 curthread); 628 VOP_UNLOCK(be_lun->vn); 629 630 vn_finished_write(mountpoint); 631 632 mtx_lock(&be_lun->io_lock); 633 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 634 beio->ds_tag_type, beio->ds_trans_type, 635 /*now*/ NULL, /*then*/&beio->ds_t0); 636 mtx_unlock(&be_lun->io_lock); 637 638 if (error == 0) 639 ctl_io_set_success(io); 640 else { 641 ctl_be_block_io_error(io, BIO_FLUSH, 642 /*retry_count*/ 0xbad1); 643 } 644 645 ctl_complete_beio(beio); 646 } 647 648 SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t"); 649 SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t"); 650 SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t"); 651 SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t"); 652 653 static void 654 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 655 struct ctl_be_block_io *beio) 656 { 657 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 658 struct ctl_be_block_filedata *file_data; 659 union ctl_io *io; 660 struct uio xuio; 661 struct iovec *xiovec; 662 size_t s; 663 int error, flags, i; 664 665 DPRINTF("entered\n"); 666 667 file_data = &be_lun->backend.file; 668 io = beio->io; 669 flags = 0; 670 if (ARGS(io)->flags & CTL_LLF_DPO) 671 flags |= IO_DIRECT; 672 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 673 flags |= IO_SYNC; 674 675 bzero(&xuio, sizeof(xuio)); 676 if (beio->bio_cmd == BIO_READ) { 677 SDT_PROBE0(cbb, , read, file_start); 678 xuio.uio_rw = UIO_READ; 679 } else { 680 SDT_PROBE0(cbb, , write, file_start); 681 xuio.uio_rw = UIO_WRITE; 682 } 683 xuio.uio_offset = beio->io_offset; 684 xuio.uio_resid = beio->io_len; 685 xuio.uio_segflg = UIO_SYSSPACE; 686 xuio.uio_iov = beio->xiovecs; 687 xuio.uio_iovcnt = beio->num_segs; 688 xuio.uio_td = curthread; 689 690 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 691 xiovec->iov_base = beio->sg_segs[i].addr; 692 xiovec->iov_len = beio->sg_segs[i].len; 693 } 694 695 binuptime(&beio->ds_t0); 696 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 697 698 if (beio->bio_cmd == BIO_READ) { 699 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 700 701 if (beio->beio_cont == NULL && 702 cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT) 703 ctl_serseq_done(io); 704 /* 705 * UFS pays attention to IO_DIRECT for reads. If the 706 * DIRECTIO option is configured into the kernel, it calls 707 * ffs_rawread(). But that only works for single-segment 708 * uios with user space addresses. In our case, with a 709 * kernel uio, it still reads into the buffer cache, but it 710 * will just try to release the buffer from the cache later 711 * on in ffs_read(). 712 * 713 * ZFS does not pay attention to IO_DIRECT for reads. 714 * 715 * UFS does not pay attention to IO_SYNC for reads. 716 * 717 * ZFS pays attention to IO_SYNC (which translates into the 718 * Solaris define FRSYNC for zfs_read()) for reads. It 719 * attempts to sync the file before reading. 720 */ 721 error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred); 722 723 VOP_UNLOCK(be_lun->vn); 724 SDT_PROBE0(cbb, , read, file_done); 725 if (error == 0 && xuio.uio_resid > 0) { 726 /* 727 * If we read less then requested (EOF), then 728 * we should clean the rest of the buffer. 729 */ 730 s = beio->io_len - xuio.uio_resid; 731 for (i = 0; i < beio->num_segs; i++) { 732 if (s >= beio->sg_segs[i].len) { 733 s -= beio->sg_segs[i].len; 734 continue; 735 } 736 bzero((uint8_t *)beio->sg_segs[i].addr + s, 737 beio->sg_segs[i].len - s); 738 s = 0; 739 } 740 } 741 } else { 742 struct mount *mountpoint; 743 744 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 745 vn_lock(be_lun->vn, vn_lktype_write(mountpoint, 746 be_lun->vn) | LK_RETRY); 747 748 /* 749 * UFS pays attention to IO_DIRECT for writes. The write 750 * is done asynchronously. (Normally the write would just 751 * get put into cache. 752 * 753 * UFS pays attention to IO_SYNC for writes. It will 754 * attempt to write the buffer out synchronously if that 755 * flag is set. 756 * 757 * ZFS does not pay attention to IO_DIRECT for writes. 758 * 759 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) 760 * for writes. It will flush the transaction from the 761 * cache before returning. 762 */ 763 error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred); 764 VOP_UNLOCK(be_lun->vn); 765 766 vn_finished_write(mountpoint); 767 SDT_PROBE0(cbb, , write, file_done); 768 } 769 770 mtx_lock(&be_lun->io_lock); 771 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 772 beio->ds_tag_type, beio->ds_trans_type, 773 /*now*/ NULL, /*then*/&beio->ds_t0); 774 mtx_unlock(&be_lun->io_lock); 775 776 /* 777 * If we got an error, set the sense data to "MEDIUM ERROR" and 778 * return the I/O to the user. 779 */ 780 if (error != 0) { 781 if (error == ENOSPC || error == EDQUOT) { 782 ctl_io_set_space_alloc_fail(io); 783 } else if (error == EROFS || error == EACCES) { 784 ctl_io_set_hw_write_protected(io); 785 } else { 786 ctl_be_block_io_error(io, beio->bio_cmd, 0); 787 } 788 ctl_complete_beio(beio); 789 return; 790 } 791 792 /* 793 * If this is a write or a verify, we're all done. 794 * If this is a read, we can now send the data to the user. 795 */ 796 if ((beio->bio_cmd == BIO_WRITE) || 797 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 798 ctl_io_set_success(io); 799 ctl_complete_beio(beio); 800 } else { 801 if ((ARGS(io)->flags & CTL_LLF_READ) && 802 beio->beio_cont == NULL) { 803 ctl_io_set_success(io); 804 if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT) 805 ctl_serseq_done(io); 806 } 807 ctl_datamove(io); 808 } 809 } 810 811 static void 812 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 813 struct ctl_be_block_io *beio) 814 { 815 union ctl_io *io = beio->io; 816 struct ctl_lba_len_flags *lbalen = ARGS(io); 817 struct scsi_get_lba_status_data *data; 818 off_t roff, off; 819 int error, status; 820 821 DPRINTF("entered\n"); 822 823 CTL_IO_ASSERT(io, SCSI); 824 825 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 826 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 827 error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off, 828 0, curthread->td_ucred, curthread); 829 if (error == 0 && off > roff) 830 status = 0; /* mapped up to off */ 831 else { 832 error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off, 833 0, curthread->td_ucred, curthread); 834 if (error == 0 && off > roff) 835 status = 1; /* deallocated up to off */ 836 else { 837 status = 0; /* unknown up to the end */ 838 off = be_lun->size_bytes; 839 } 840 } 841 VOP_UNLOCK(be_lun->vn); 842 843 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 844 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 845 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 846 lbalen->lba), data->descr[0].length); 847 data->descr[0].status = status; 848 849 ctl_complete_beio(beio); 850 } 851 852 static uint64_t 853 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname) 854 { 855 struct vattr vattr; 856 struct statfs statfs; 857 uint64_t val; 858 int error; 859 860 val = UINT64_MAX; 861 if (be_lun->vn == NULL) 862 return (val); 863 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 864 if (strcmp(attrname, "blocksused") == 0) { 865 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 866 if (error == 0) 867 val = vattr.va_bytes / be_lun->cbe_lun.blocksize; 868 } 869 if (strcmp(attrname, "blocksavail") == 0 && 870 !VN_IS_DOOMED(be_lun->vn)) { 871 error = VFS_STATFS(be_lun->vn->v_mount, &statfs); 872 if (error == 0) 873 val = statfs.f_bavail * statfs.f_bsize / 874 be_lun->cbe_lun.blocksize; 875 } 876 VOP_UNLOCK(be_lun->vn); 877 return (val); 878 } 879 880 static void 881 ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun, 882 struct ctl_be_block_io *beio) 883 { 884 struct ctl_be_block_filedata *file_data; 885 union ctl_io *io; 886 struct ctl_ptr_len_flags *ptrlen; 887 struct scsi_unmap_desc *buf, *end; 888 struct mount *mp; 889 off_t off, len; 890 int error; 891 892 io = beio->io; 893 file_data = &be_lun->backend.file; 894 mp = NULL; 895 error = 0; 896 897 binuptime(&beio->ds_t0); 898 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 899 900 (void)vn_start_write(be_lun->vn, &mp, V_WAIT); 901 vn_lock(be_lun->vn, vn_lktype_write(mp, be_lun->vn) | LK_RETRY); 902 if (beio->io_offset == -1) { 903 beio->io_len = 0; 904 ptrlen = (struct ctl_ptr_len_flags *) 905 &io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 906 buf = (struct scsi_unmap_desc *)ptrlen->ptr; 907 end = buf + ptrlen->len / sizeof(*buf); 908 for (; buf < end; buf++) { 909 off = (off_t)scsi_8btou64(buf->lba) * 910 be_lun->cbe_lun.blocksize; 911 len = (off_t)scsi_4btoul(buf->length) * 912 be_lun->cbe_lun.blocksize; 913 beio->io_len += len; 914 error = vn_deallocate(be_lun->vn, &off, &len, 915 0, IO_NOMACCHECK | IO_NODELOCKED, file_data->cred, 916 NOCRED); 917 if (error != 0) 918 break; 919 } 920 } else { 921 /* WRITE_SAME */ 922 off = beio->io_offset; 923 len = beio->io_len; 924 error = vn_deallocate(be_lun->vn, &off, &len, 0, 925 IO_NOMACCHECK | IO_NODELOCKED, file_data->cred, NOCRED); 926 } 927 VOP_UNLOCK(be_lun->vn); 928 vn_finished_write(mp); 929 930 mtx_lock(&be_lun->io_lock); 931 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 932 beio->ds_tag_type, beio->ds_trans_type, 933 /*now*/ NULL, /*then*/&beio->ds_t0); 934 mtx_unlock(&be_lun->io_lock); 935 936 /* 937 * If we got an error, set the sense data to "MEDIUM ERROR" and 938 * return the I/O to the user. 939 */ 940 switch (error) { 941 case 0: 942 ctl_io_set_success(io); 943 break; 944 case ENOSPC: 945 case EDQUOT: 946 ctl_io_set_space_alloc_fail(io); 947 break; 948 case EROFS: 949 case EACCES: 950 ctl_io_set_hw_write_protected(io); 951 break; 952 default: 953 ctl_be_block_io_error(io, BIO_DELETE, 0); 954 } 955 ctl_complete_beio(beio); 956 } 957 958 static void 959 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun, 960 struct ctl_be_block_io *beio) 961 { 962 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 963 union ctl_io *io; 964 struct cdevsw *csw; 965 struct cdev *dev; 966 struct uio xuio; 967 struct iovec *xiovec; 968 int error, flags, i, ref; 969 970 DPRINTF("entered\n"); 971 972 io = beio->io; 973 flags = 0; 974 if (ARGS(io)->flags & CTL_LLF_DPO) 975 flags |= IO_DIRECT; 976 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 977 flags |= IO_SYNC; 978 979 bzero(&xuio, sizeof(xuio)); 980 if (beio->bio_cmd == BIO_READ) { 981 SDT_PROBE0(cbb, , read, file_start); 982 xuio.uio_rw = UIO_READ; 983 } else { 984 SDT_PROBE0(cbb, , write, file_start); 985 xuio.uio_rw = UIO_WRITE; 986 } 987 xuio.uio_offset = beio->io_offset; 988 xuio.uio_resid = beio->io_len; 989 xuio.uio_segflg = UIO_SYSSPACE; 990 xuio.uio_iov = beio->xiovecs; 991 xuio.uio_iovcnt = beio->num_segs; 992 xuio.uio_td = curthread; 993 994 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 995 xiovec->iov_base = beio->sg_segs[i].addr; 996 xiovec->iov_len = beio->sg_segs[i].len; 997 } 998 999 binuptime(&beio->ds_t0); 1000 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 1001 1002 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1003 if (csw) { 1004 if (beio->bio_cmd == BIO_READ) { 1005 if (beio->beio_cont == NULL && 1006 cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT) 1007 ctl_serseq_done(io); 1008 error = csw->d_read(dev, &xuio, flags); 1009 } else 1010 error = csw->d_write(dev, &xuio, flags); 1011 dev_relthread(dev, ref); 1012 } else 1013 error = ENXIO; 1014 1015 if (beio->bio_cmd == BIO_READ) 1016 SDT_PROBE0(cbb, , read, file_done); 1017 else 1018 SDT_PROBE0(cbb, , write, file_done); 1019 1020 mtx_lock(&be_lun->io_lock); 1021 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 1022 beio->ds_tag_type, beio->ds_trans_type, 1023 /*now*/ NULL, /*then*/&beio->ds_t0); 1024 mtx_unlock(&be_lun->io_lock); 1025 1026 /* 1027 * If we got an error, set the sense data to "MEDIUM ERROR" and 1028 * return the I/O to the user. 1029 */ 1030 if (error != 0) { 1031 if (error == ENOSPC || error == EDQUOT) { 1032 ctl_io_set_space_alloc_fail(io); 1033 } else if (error == EROFS || error == EACCES) { 1034 ctl_io_set_hw_write_protected(io); 1035 } else { 1036 ctl_be_block_io_error(io, beio->bio_cmd, 0); 1037 } 1038 ctl_complete_beio(beio); 1039 return; 1040 } 1041 1042 /* 1043 * If this is a write or a verify, we're all done. 1044 * If this is a read, we can now send the data to the user. 1045 */ 1046 if ((beio->bio_cmd == BIO_WRITE) || 1047 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 1048 ctl_io_set_success(io); 1049 ctl_complete_beio(beio); 1050 } else { 1051 if ((ARGS(io)->flags & CTL_LLF_READ) && 1052 beio->beio_cont == NULL) { 1053 ctl_io_set_success(io); 1054 if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT) 1055 ctl_serseq_done(io); 1056 } 1057 ctl_datamove(io); 1058 } 1059 } 1060 1061 static void 1062 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun, 1063 struct ctl_be_block_io *beio) 1064 { 1065 union ctl_io *io = beio->io; 1066 struct cdevsw *csw; 1067 struct cdev *dev; 1068 struct ctl_lba_len_flags *lbalen = ARGS(io); 1069 struct scsi_get_lba_status_data *data; 1070 off_t roff, off; 1071 int error, ref, status; 1072 1073 DPRINTF("entered\n"); 1074 1075 CTL_IO_ASSERT(io, SCSI); 1076 1077 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1078 if (csw == NULL) { 1079 status = 0; /* unknown up to the end */ 1080 off = be_lun->size_bytes; 1081 goto done; 1082 } 1083 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 1084 error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD, 1085 curthread); 1086 if (error == 0 && off > roff) 1087 status = 0; /* mapped up to off */ 1088 else { 1089 error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD, 1090 curthread); 1091 if (error == 0 && off > roff) 1092 status = 1; /* deallocated up to off */ 1093 else { 1094 status = 0; /* unknown up to the end */ 1095 off = be_lun->size_bytes; 1096 } 1097 } 1098 dev_relthread(dev, ref); 1099 1100 done: 1101 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 1102 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 1103 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 1104 lbalen->lba), data->descr[0].length); 1105 data->descr[0].status = status; 1106 1107 ctl_complete_beio(beio); 1108 } 1109 1110 static void 1111 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 1112 struct ctl_be_block_io *beio) 1113 { 1114 struct bio *bio; 1115 struct cdevsw *csw; 1116 struct cdev *dev; 1117 int ref; 1118 1119 DPRINTF("entered\n"); 1120 1121 /* This can't fail, it's a blocking allocation. */ 1122 bio = g_alloc_bio(); 1123 1124 bio->bio_cmd = BIO_FLUSH; 1125 bio->bio_offset = 0; 1126 bio->bio_data = 0; 1127 bio->bio_done = ctl_be_block_biodone; 1128 bio->bio_caller1 = beio; 1129 bio->bio_pblkno = 0; 1130 1131 /* 1132 * We don't need to acquire the LUN lock here, because we are only 1133 * sending one bio, and so there is no other context to synchronize 1134 * with. 1135 */ 1136 beio->num_bios_sent = 1; 1137 beio->send_complete = 1; 1138 1139 binuptime(&beio->ds_t0); 1140 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1141 1142 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1143 if (csw) { 1144 bio->bio_dev = dev; 1145 csw->d_strategy(bio); 1146 dev_relthread(dev, ref); 1147 } else { 1148 bio->bio_error = ENXIO; 1149 ctl_be_block_biodone(bio); 1150 } 1151 } 1152 1153 static void 1154 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun, 1155 struct ctl_be_block_io *beio, 1156 uint64_t off, uint64_t len, int last) 1157 { 1158 struct bio *bio; 1159 uint64_t maxlen; 1160 struct cdevsw *csw; 1161 struct cdev *dev; 1162 int ref; 1163 1164 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1165 maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize); 1166 while (len > 0) { 1167 bio = g_alloc_bio(); 1168 bio->bio_cmd = BIO_DELETE; 1169 bio->bio_dev = dev; 1170 bio->bio_offset = off; 1171 bio->bio_length = MIN(len, maxlen); 1172 bio->bio_data = 0; 1173 bio->bio_done = ctl_be_block_biodone; 1174 bio->bio_caller1 = beio; 1175 bio->bio_pblkno = off / be_lun->cbe_lun.blocksize; 1176 1177 off += bio->bio_length; 1178 len -= bio->bio_length; 1179 1180 mtx_lock(&be_lun->io_lock); 1181 beio->num_bios_sent++; 1182 if (last && len == 0) 1183 beio->send_complete = 1; 1184 mtx_unlock(&be_lun->io_lock); 1185 1186 if (csw) { 1187 csw->d_strategy(bio); 1188 } else { 1189 bio->bio_error = ENXIO; 1190 ctl_be_block_biodone(bio); 1191 } 1192 } 1193 if (csw) 1194 dev_relthread(dev, ref); 1195 } 1196 1197 static void 1198 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 1199 struct ctl_be_block_io *beio) 1200 { 1201 union ctl_io *io; 1202 struct ctl_ptr_len_flags *ptrlen; 1203 struct scsi_unmap_desc *buf, *end; 1204 uint64_t len; 1205 1206 io = beio->io; 1207 1208 DPRINTF("entered\n"); 1209 1210 binuptime(&beio->ds_t0); 1211 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1212 1213 if (beio->io_offset == -1) { 1214 beio->io_len = 0; 1215 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1216 buf = (struct scsi_unmap_desc *)ptrlen->ptr; 1217 end = buf + ptrlen->len / sizeof(*buf); 1218 for (; buf < end; buf++) { 1219 len = (uint64_t)scsi_4btoul(buf->length) * 1220 be_lun->cbe_lun.blocksize; 1221 beio->io_len += len; 1222 ctl_be_block_unmap_dev_range(be_lun, beio, 1223 scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize, 1224 len, (end - buf < 2) ? TRUE : FALSE); 1225 } 1226 } else 1227 ctl_be_block_unmap_dev_range(be_lun, beio, 1228 beio->io_offset, beio->io_len, TRUE); 1229 } 1230 1231 static void 1232 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 1233 struct ctl_be_block_io *beio) 1234 { 1235 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 1236 struct bio *bio; 1237 struct cdevsw *csw; 1238 struct cdev *dev; 1239 off_t cur_offset; 1240 int i, max_iosize, ref; 1241 1242 DPRINTF("entered\n"); 1243 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1244 1245 /* 1246 * We have to limit our I/O size to the maximum supported by the 1247 * backend device. 1248 */ 1249 if (csw) { 1250 max_iosize = dev->si_iosize_max; 1251 if (max_iosize <= 0) 1252 max_iosize = DFLTPHYS; 1253 } else 1254 max_iosize = maxphys; 1255 1256 cur_offset = beio->io_offset; 1257 for (i = 0; i < beio->num_segs; i++) { 1258 size_t cur_size; 1259 uint8_t *cur_ptr; 1260 1261 cur_size = beio->sg_segs[i].len; 1262 cur_ptr = beio->sg_segs[i].addr; 1263 1264 while (cur_size > 0) { 1265 /* This can't fail, it's a blocking allocation. */ 1266 bio = g_alloc_bio(); 1267 1268 KASSERT(bio != NULL, ("g_alloc_bio() failed!\n")); 1269 1270 bio->bio_cmd = beio->bio_cmd; 1271 bio->bio_dev = dev; 1272 bio->bio_caller1 = beio; 1273 bio->bio_length = min(cur_size, max_iosize); 1274 bio->bio_offset = cur_offset; 1275 bio->bio_data = cur_ptr; 1276 bio->bio_done = ctl_be_block_biodone; 1277 bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize; 1278 1279 cur_offset += bio->bio_length; 1280 cur_ptr += bio->bio_length; 1281 cur_size -= bio->bio_length; 1282 1283 TAILQ_INSERT_TAIL(&queue, bio, bio_queue); 1284 beio->num_bios_sent++; 1285 } 1286 } 1287 beio->send_complete = 1; 1288 binuptime(&beio->ds_t0); 1289 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1290 1291 /* 1292 * Fire off all allocated requests! 1293 */ 1294 while ((bio = TAILQ_FIRST(&queue)) != NULL) { 1295 TAILQ_REMOVE(&queue, bio, bio_queue); 1296 if (csw) 1297 csw->d_strategy(bio); 1298 else { 1299 bio->bio_error = ENXIO; 1300 ctl_be_block_biodone(bio); 1301 } 1302 } 1303 if (csw) 1304 dev_relthread(dev, ref); 1305 } 1306 1307 static uint64_t 1308 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname) 1309 { 1310 struct diocgattr_arg arg; 1311 struct cdevsw *csw; 1312 struct cdev *dev; 1313 int error, ref; 1314 1315 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1316 if (csw == NULL) 1317 return (UINT64_MAX); 1318 strlcpy(arg.name, attrname, sizeof(arg.name)); 1319 arg.len = sizeof(arg.value.off); 1320 if (csw->d_ioctl) { 1321 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 1322 curthread); 1323 } else 1324 error = ENODEV; 1325 dev_relthread(dev, ref); 1326 if (error != 0) 1327 return (UINT64_MAX); 1328 return (arg.value.off); 1329 } 1330 1331 static void 1332 ctl_be_block_namespace_data(struct ctl_be_block_lun *be_lun, 1333 union ctl_io *io) 1334 { 1335 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1336 struct nvme_namespace_data *nsdata; 1337 1338 nsdata = (struct nvme_namespace_data *)io->nvmeio.kern_data_ptr; 1339 memset(nsdata, 0, sizeof(*nsdata)); 1340 nsdata->nsze = htole64(be_lun->size_blocks); 1341 nsdata->ncap = nsdata->nsze; 1342 nsdata->nuse = nsdata->nsze; 1343 nsdata->nlbaf = 1 - 1; 1344 nsdata->dlfeat = NVMEM(NVME_NS_DATA_DLFEAT_DWZ) | 1345 NVMEF(NVME_NS_DATA_DLFEAT_READ, NVME_NS_DATA_DLFEAT_READ_00); 1346 nsdata->flbas = NVMEF(NVME_NS_DATA_FLBAS_FORMAT, 0); 1347 nsdata->lbaf[0] = NVMEF(NVME_NS_DATA_LBAF_LBADS, 1348 ffs(cbe_lun->blocksize) - 1); 1349 1350 ctl_lun_nsdata_ids(cbe_lun, nsdata); 1351 ctl_config_read_done(io); 1352 } 1353 1354 static void 1355 ctl_be_block_nvme_ids(struct ctl_be_block_lun *be_lun, 1356 union ctl_io *io) 1357 { 1358 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1359 1360 ctl_lun_nvme_ids(cbe_lun, io->nvmeio.kern_data_ptr); 1361 ctl_config_read_done(io); 1362 } 1363 1364 static void 1365 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun, 1366 union ctl_io *io) 1367 { 1368 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1369 struct ctl_be_block_io *beio; 1370 struct ctl_lba_len_flags *lbalen; 1371 1372 DPRINTF("entered\n"); 1373 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1374 lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1375 1376 beio->io_len = lbalen->len * cbe_lun->blocksize; 1377 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1378 beio->io_arg = (lbalen->flags & SSC_IMMED) != 0; 1379 beio->bio_cmd = BIO_FLUSH; 1380 beio->ds_trans_type = DEVSTAT_NO_DATA; 1381 DPRINTF("SYNC\n"); 1382 be_lun->lun_flush(be_lun, beio); 1383 } 1384 1385 static void 1386 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio) 1387 { 1388 union ctl_io *io; 1389 1390 io = beio->io; 1391 ctl_free_beio(beio); 1392 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1393 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1394 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1395 ctl_config_write_done(io); 1396 return; 1397 } 1398 1399 ctl_be_block_config_write(io); 1400 } 1401 1402 static void 1403 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun, 1404 union ctl_io *io) 1405 { 1406 struct ctl_be_block_softc *softc = be_lun->softc; 1407 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1408 struct ctl_be_block_io *beio; 1409 struct ctl_lba_len_flags *lbalen; 1410 uint64_t len_left, lba; 1411 uint32_t pb, pbo, adj; 1412 int i, seglen; 1413 uint8_t *buf, *end; 1414 1415 DPRINTF("entered\n"); 1416 1417 CTL_IO_ASSERT(io, SCSI); 1418 1419 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1420 lbalen = ARGS(io); 1421 1422 if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) || 1423 (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) { 1424 ctl_free_beio(beio); 1425 ctl_set_invalid_field(&io->scsiio, 1426 /*sks_valid*/ 1, 1427 /*command*/ 1, 1428 /*field*/ 1, 1429 /*bit_valid*/ 0, 1430 /*bit*/ 0); 1431 ctl_config_write_done(io); 1432 return; 1433 } 1434 1435 if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) { 1436 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1437 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize; 1438 beio->bio_cmd = BIO_DELETE; 1439 beio->ds_trans_type = DEVSTAT_FREE; 1440 1441 be_lun->unmap(be_lun, beio); 1442 return; 1443 } 1444 1445 beio->bio_cmd = BIO_WRITE; 1446 beio->ds_trans_type = DEVSTAT_WRITE; 1447 1448 DPRINTF("WRITE SAME at LBA %jx len %u\n", 1449 (uintmax_t)lbalen->lba, lbalen->len); 1450 1451 pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp; 1452 if (be_lun->cbe_lun.pblockoff > 0) 1453 pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff; 1454 else 1455 pbo = 0; 1456 len_left = (uint64_t)lbalen->len * cbe_lun->blocksize; 1457 for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { 1458 /* 1459 * Setup the S/G entry for this chunk. 1460 */ 1461 seglen = MIN(CTLBLK_MAX_SEG, len_left); 1462 if (pb > cbe_lun->blocksize) { 1463 adj = ((lbalen->lba + lba) * cbe_lun->blocksize + 1464 seglen - pbo) % pb; 1465 if (seglen > adj) 1466 seglen -= adj; 1467 else 1468 seglen -= seglen % cbe_lun->blocksize; 1469 } else 1470 seglen -= seglen % cbe_lun->blocksize; 1471 ctl_alloc_seg(softc, &beio->sg_segs[i], seglen); 1472 1473 DPRINTF("segment %d addr %p len %zd\n", i, 1474 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1475 1476 beio->num_segs++; 1477 len_left -= seglen; 1478 1479 buf = beio->sg_segs[i].addr; 1480 end = buf + seglen; 1481 for (; buf < end; buf += cbe_lun->blocksize) { 1482 if (lbalen->flags & SWS_NDOB) { 1483 memset(buf, 0, cbe_lun->blocksize); 1484 } else { 1485 memcpy(buf, io->scsiio.kern_data_ptr, 1486 cbe_lun->blocksize); 1487 } 1488 if (lbalen->flags & SWS_LBDATA) 1489 scsi_ulto4b(lbalen->lba + lba, buf); 1490 lba++; 1491 } 1492 } 1493 1494 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1495 beio->io_len = lba * cbe_lun->blocksize; 1496 1497 /* We can not do all in one run. Correct and schedule rerun. */ 1498 if (len_left > 0) { 1499 lbalen->lba += lba; 1500 lbalen->len -= lba; 1501 beio->beio_cont = ctl_be_block_cw_done_ws; 1502 } 1503 1504 be_lun->dispatch(be_lun, beio); 1505 } 1506 1507 static void 1508 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun, 1509 union ctl_io *io) 1510 { 1511 struct ctl_be_block_io *beio; 1512 struct ctl_ptr_len_flags *ptrlen; 1513 1514 DPRINTF("entered\n"); 1515 1516 CTL_IO_ASSERT(io, SCSI); 1517 1518 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1519 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1520 1521 if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) { 1522 ctl_free_beio(beio); 1523 ctl_set_invalid_field(&io->scsiio, 1524 /*sks_valid*/ 0, 1525 /*command*/ 1, 1526 /*field*/ 0, 1527 /*bit_valid*/ 0, 1528 /*bit*/ 0); 1529 ctl_config_write_done(io); 1530 return; 1531 } 1532 1533 beio->io_len = 0; 1534 beio->io_offset = -1; 1535 beio->bio_cmd = BIO_DELETE; 1536 beio->ds_trans_type = DEVSTAT_FREE; 1537 DPRINTF("UNMAP\n"); 1538 be_lun->unmap(be_lun, beio); 1539 } 1540 1541 static void 1542 ctl_be_block_cw_dispatch_flush(struct ctl_be_block_lun *be_lun, 1543 union ctl_io *io) 1544 { 1545 struct ctl_be_block_io *beio; 1546 1547 DPRINTF("entered\n"); 1548 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1549 1550 beio->io_len = be_lun->size_bytes; 1551 beio->io_offset = 0; 1552 beio->io_arg = 1; 1553 beio->bio_cmd = BIO_FLUSH; 1554 beio->ds_trans_type = DEVSTAT_NO_DATA; 1555 DPRINTF("FLUSH\n"); 1556 be_lun->lun_flush(be_lun, beio); 1557 } 1558 1559 static void 1560 ctl_be_block_cw_dispatch_wu(struct ctl_be_block_lun *be_lun, 1561 union ctl_io *io) 1562 { 1563 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1564 struct ctl_be_block_io *beio; 1565 struct ctl_lba_len_flags *lbalen; 1566 1567 CTL_IO_ASSERT(io, NVME); 1568 1569 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1570 lbalen = ARGS(io); 1571 1572 /* 1573 * XXX: Not quite right as reads will return zeroes rather 1574 * than failing. 1575 */ 1576 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1577 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize; 1578 beio->bio_cmd = BIO_DELETE; 1579 beio->ds_trans_type = DEVSTAT_FREE; 1580 1581 be_lun->unmap(be_lun, beio); 1582 } 1583 1584 static void 1585 ctl_be_block_cw_dispatch_wz(struct ctl_be_block_lun *be_lun, 1586 union ctl_io *io) 1587 { 1588 struct ctl_be_block_softc *softc = be_lun->softc; 1589 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1590 struct ctl_be_block_io *beio; 1591 struct ctl_lba_len_flags *lbalen; 1592 uint64_t len_left, lba; 1593 uint32_t pb, pbo, adj; 1594 int i, seglen; 1595 1596 DPRINTF("entered\n"); 1597 1598 CTL_IO_ASSERT(io, NVME); 1599 1600 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1601 lbalen = ARGS(io); 1602 1603 if ((le32toh(io->nvmeio.cmd.cdw12) & (1U << 25)) != 0 && 1604 be_lun->unmap != NULL) { 1605 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1606 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize; 1607 beio->bio_cmd = BIO_DELETE; 1608 beio->ds_trans_type = DEVSTAT_FREE; 1609 1610 be_lun->unmap(be_lun, beio); 1611 return; 1612 } 1613 1614 beio->bio_cmd = BIO_WRITE; 1615 beio->ds_trans_type = DEVSTAT_WRITE; 1616 1617 DPRINTF("WRITE ZEROES at LBA %jx len %u\n", 1618 (uintmax_t)lbalen->lba, lbalen->len); 1619 1620 pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp; 1621 if (be_lun->cbe_lun.pblockoff > 0) 1622 pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff; 1623 else 1624 pbo = 0; 1625 len_left = (uint64_t)lbalen->len * cbe_lun->blocksize; 1626 for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { 1627 /* 1628 * Setup the S/G entry for this chunk. 1629 */ 1630 seglen = MIN(CTLBLK_MAX_SEG, len_left); 1631 if (pb > cbe_lun->blocksize) { 1632 adj = ((lbalen->lba + lba) * cbe_lun->blocksize + 1633 seglen - pbo) % pb; 1634 if (seglen > adj) 1635 seglen -= adj; 1636 else 1637 seglen -= seglen % cbe_lun->blocksize; 1638 } else 1639 seglen -= seglen % cbe_lun->blocksize; 1640 ctl_alloc_seg(softc, &beio->sg_segs[i], seglen); 1641 1642 DPRINTF("segment %d addr %p len %zd\n", i, 1643 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1644 1645 beio->num_segs++; 1646 len_left -= seglen; 1647 1648 memset(beio->sg_segs[i].addr, 0, seglen); 1649 lba += seglen / cbe_lun->blocksize; 1650 } 1651 1652 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1653 beio->io_len = lba * cbe_lun->blocksize; 1654 1655 /* We can not do all in one run. Correct and schedule rerun. */ 1656 if (len_left > 0) { 1657 lbalen->lba += lba; 1658 lbalen->len -= lba; 1659 beio->beio_cont = ctl_be_block_cw_done_ws; 1660 } 1661 1662 be_lun->dispatch(be_lun, beio); 1663 } 1664 1665 static void 1666 ctl_be_block_cw_dispatch_dsm(struct ctl_be_block_lun *be_lun, 1667 union ctl_io *io) 1668 { 1669 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1670 struct ctl_be_block_io *beio; 1671 struct nvme_dsm_range *r; 1672 uint64_t lba; 1673 uint32_t num_blocks; 1674 u_int i, ranges; 1675 1676 CTL_IO_ASSERT(io, NVME); 1677 1678 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1679 1680 if (be_lun->unmap == NULL) { 1681 ctl_free_beio(beio); 1682 ctl_nvme_set_success(&io->nvmeio); 1683 ctl_config_write_done(io); 1684 return; 1685 } 1686 1687 ranges = le32toh(io->nvmeio.cmd.cdw10) & 0xff; 1688 r = (struct nvme_dsm_range *)io->nvmeio.kern_data_ptr; 1689 1690 /* Find the next range to delete. */ 1691 for (i = DSM_RANGE(io); i < ranges; i++) { 1692 if ((le32toh(r[i].attributes) & (1U << 2)) != 0) 1693 break; 1694 } 1695 1696 /* If no range to delete, complete the operation. */ 1697 if (i == ranges) { 1698 ctl_free_beio(beio); 1699 ctl_nvme_set_success(&io->nvmeio); 1700 ctl_config_write_done(io); 1701 return; 1702 } 1703 1704 /* If this is not the last range, request a rerun after this range. */ 1705 if (i + 1 < ranges) { 1706 DSM_RANGE(io) = i + 1; 1707 beio->beio_cont = ctl_be_block_cw_done_ws; 1708 } 1709 1710 lba = le64toh(r[i].starting_lba); 1711 num_blocks = le32toh(r[i].length); 1712 1713 beio->io_offset = lba * cbe_lun->blocksize; 1714 beio->io_len = (uint64_t)num_blocks * cbe_lun->blocksize; 1715 beio->bio_cmd = BIO_DELETE; 1716 beio->ds_trans_type = DEVSTAT_FREE; 1717 1718 be_lun->unmap(be_lun, beio); 1719 } 1720 1721 static void 1722 ctl_be_block_scsi_cr_done(struct ctl_be_block_io *beio) 1723 { 1724 union ctl_io *io; 1725 1726 io = beio->io; 1727 ctl_free_beio(beio); 1728 ctl_config_read_done(io); 1729 } 1730 1731 static void 1732 ctl_be_block_scsi_cr_dispatch(struct ctl_be_block_lun *be_lun, 1733 union ctl_io *io) 1734 { 1735 struct ctl_be_block_io *beio; 1736 struct ctl_be_block_softc *softc; 1737 1738 DPRINTF("entered\n"); 1739 1740 softc = be_lun->softc; 1741 beio = ctl_alloc_beio(softc); 1742 beio->io = io; 1743 beio->lun = be_lun; 1744 beio->beio_cont = ctl_be_block_scsi_cr_done; 1745 PRIV(io)->ptr = (void *)beio; 1746 1747 switch (io->scsiio.cdb[0]) { 1748 case SERVICE_ACTION_IN: /* GET LBA STATUS */ 1749 beio->bio_cmd = -1; 1750 beio->ds_trans_type = DEVSTAT_NO_DATA; 1751 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1752 beio->io_len = 0; 1753 if (be_lun->get_lba_status) 1754 be_lun->get_lba_status(be_lun, beio); 1755 else 1756 ctl_be_block_scsi_cr_done(beio); 1757 break; 1758 default: 1759 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1760 break; 1761 } 1762 } 1763 1764 static void 1765 ctl_be_block_nvme_cr_dispatch(struct ctl_be_block_lun *be_lun, 1766 union ctl_io *io) 1767 { 1768 uint8_t cns; 1769 1770 DPRINTF("entered\n"); 1771 1772 MPASS(io->nvmeio.cmd.opc == NVME_OPC_IDENTIFY); 1773 1774 cns = le32toh(io->nvmeio.cmd.cdw10) & 0xff; 1775 switch (cns) { 1776 case 0: 1777 ctl_be_block_namespace_data(be_lun, io); 1778 break; 1779 case 3: 1780 ctl_be_block_nvme_ids(be_lun, io); 1781 break; 1782 default: 1783 __assert_unreachable(); 1784 } 1785 } 1786 1787 static void 1788 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 1789 union ctl_io *io) 1790 { 1791 switch (io->io_hdr.io_type) { 1792 case CTL_IO_SCSI: 1793 ctl_be_block_scsi_cr_dispatch(be_lun, io); 1794 break; 1795 case CTL_IO_NVME_ADMIN: 1796 ctl_be_block_nvme_cr_dispatch(be_lun, io); 1797 break; 1798 default: 1799 __assert_unreachable(); 1800 } 1801 } 1802 1803 static void 1804 ctl_be_block_cw_done(struct ctl_be_block_io *beio) 1805 { 1806 union ctl_io *io; 1807 1808 io = beio->io; 1809 ctl_free_beio(beio); 1810 ctl_config_write_done(io); 1811 } 1812 1813 static void 1814 ctl_be_block_scsi_cw_dispatch(struct ctl_be_block_lun *be_lun, 1815 union ctl_io *io) 1816 { 1817 struct ctl_be_block_io *beio; 1818 1819 DPRINTF("entered\n"); 1820 1821 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1822 1823 switch (io->scsiio.tag_type) { 1824 case CTL_TAG_ORDERED: 1825 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1826 break; 1827 case CTL_TAG_HEAD_OF_QUEUE: 1828 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1829 break; 1830 case CTL_TAG_UNTAGGED: 1831 case CTL_TAG_SIMPLE: 1832 case CTL_TAG_ACA: 1833 default: 1834 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1835 break; 1836 } 1837 1838 switch (io->scsiio.cdb[0]) { 1839 case SYNCHRONIZE_CACHE: 1840 case SYNCHRONIZE_CACHE_16: 1841 ctl_be_block_cw_dispatch_sync(be_lun, io); 1842 break; 1843 case WRITE_SAME_10: 1844 case WRITE_SAME_16: 1845 ctl_be_block_cw_dispatch_ws(be_lun, io); 1846 break; 1847 case UNMAP: 1848 ctl_be_block_cw_dispatch_unmap(be_lun, io); 1849 break; 1850 default: 1851 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1852 break; 1853 } 1854 } 1855 1856 static void 1857 ctl_be_block_nvme_cw_dispatch(struct ctl_be_block_lun *be_lun, 1858 union ctl_io *io) 1859 { 1860 struct ctl_be_block_io *beio; 1861 1862 DPRINTF("entered\n"); 1863 1864 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1865 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1866 1867 switch (io->nvmeio.cmd.opc) { 1868 case NVME_OPC_FLUSH: 1869 ctl_be_block_cw_dispatch_flush(be_lun, io); 1870 break; 1871 case NVME_OPC_WRITE_UNCORRECTABLE: 1872 ctl_be_block_cw_dispatch_wu(be_lun, io); 1873 break; 1874 case NVME_OPC_WRITE_ZEROES: 1875 ctl_be_block_cw_dispatch_wz(be_lun, io); 1876 break; 1877 case NVME_OPC_DATASET_MANAGEMENT: 1878 ctl_be_block_cw_dispatch_dsm(be_lun, io); 1879 break; 1880 default: 1881 __assert_unreachable(); 1882 } 1883 } 1884 1885 static void 1886 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 1887 union ctl_io *io) 1888 { 1889 struct ctl_be_block_io *beio; 1890 struct ctl_be_block_softc *softc; 1891 1892 softc = be_lun->softc; 1893 beio = ctl_alloc_beio(softc); 1894 beio->io = io; 1895 beio->lun = be_lun; 1896 beio->beio_cont = ctl_be_block_cw_done; 1897 PRIV(io)->ptr = (void *)beio; 1898 1899 switch (io->io_hdr.io_type) { 1900 case CTL_IO_SCSI: 1901 ctl_be_block_scsi_cw_dispatch(be_lun, io); 1902 break; 1903 case CTL_IO_NVME: 1904 ctl_be_block_nvme_cw_dispatch(be_lun, io); 1905 break; 1906 default: 1907 __assert_unreachable(); 1908 } 1909 } 1910 1911 SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t"); 1912 SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t"); 1913 SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t"); 1914 SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t"); 1915 1916 static void 1917 ctl_be_block_next(struct ctl_be_block_io *beio) 1918 { 1919 struct ctl_be_block_lun *be_lun; 1920 union ctl_io *io; 1921 1922 io = beio->io; 1923 be_lun = beio->lun; 1924 ctl_free_beio(beio); 1925 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1926 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1927 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1928 ctl_data_submit_done(io); 1929 return; 1930 } 1931 1932 io->io_hdr.status &= ~CTL_STATUS_MASK; 1933 io->io_hdr.status |= CTL_STATUS_NONE; 1934 1935 mtx_lock(&be_lun->queue_lock); 1936 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1937 mtx_unlock(&be_lun->queue_lock); 1938 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1939 } 1940 1941 static void 1942 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 1943 union ctl_io *io) 1944 { 1945 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1946 struct ctl_be_block_io *beio; 1947 struct ctl_be_block_softc *softc; 1948 struct ctl_lba_len_flags *lbalen; 1949 struct ctl_ptr_len_flags *bptrlen; 1950 uint64_t len_left, lbas; 1951 int i; 1952 1953 softc = be_lun->softc; 1954 1955 DPRINTF("entered\n"); 1956 1957 lbalen = ARGS(io); 1958 if (lbalen->flags & CTL_LLF_WRITE) { 1959 SDT_PROBE0(cbb, , write, start); 1960 } else { 1961 SDT_PROBE0(cbb, , read, start); 1962 } 1963 1964 beio = ctl_alloc_beio(softc); 1965 beio->io = io; 1966 beio->lun = be_lun; 1967 bptrlen = PRIV(io); 1968 bptrlen->ptr = (void *)beio; 1969 1970 switch (io->io_hdr.io_type) { 1971 case CTL_IO_SCSI: 1972 switch (io->scsiio.tag_type) { 1973 case CTL_TAG_ORDERED: 1974 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1975 break; 1976 case CTL_TAG_HEAD_OF_QUEUE: 1977 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1978 break; 1979 case CTL_TAG_UNTAGGED: 1980 case CTL_TAG_SIMPLE: 1981 case CTL_TAG_ACA: 1982 default: 1983 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1984 break; 1985 } 1986 break; 1987 case CTL_IO_NVME: 1988 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1989 break; 1990 default: 1991 __assert_unreachable(); 1992 } 1993 1994 if (lbalen->flags & CTL_LLF_WRITE) { 1995 beio->bio_cmd = BIO_WRITE; 1996 beio->ds_trans_type = DEVSTAT_WRITE; 1997 } else { 1998 beio->bio_cmd = BIO_READ; 1999 beio->ds_trans_type = DEVSTAT_READ; 2000 } 2001 2002 DPRINTF("%s at LBA %jx len %u @%ju\n", 2003 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", 2004 (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len); 2005 lbas = CTLBLK_MAX_IO_SIZE; 2006 if (lbalen->flags & CTL_LLF_COMPARE) { 2007 beio->two_sglists = 1; 2008 lbas /= 2; 2009 } 2010 lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize); 2011 beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize; 2012 beio->io_len = lbas * cbe_lun->blocksize; 2013 bptrlen->len += lbas; 2014 2015 for (i = 0, len_left = beio->io_len; len_left > 0; i++) { 2016 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)", 2017 i, CTLBLK_MAX_SEGS)); 2018 2019 /* 2020 * Setup the S/G entry for this chunk. 2021 */ 2022 ctl_alloc_seg(softc, &beio->sg_segs[i], 2023 MIN(CTLBLK_MAX_SEG, len_left)); 2024 2025 DPRINTF("segment %d addr %p len %zd\n", i, 2026 beio->sg_segs[i].addr, beio->sg_segs[i].len); 2027 2028 /* Set up second segment for compare operation. */ 2029 if (beio->two_sglists) { 2030 ctl_alloc_seg(softc, 2031 &beio->sg_segs[i + CTLBLK_HALF_SEGS], 2032 beio->sg_segs[i].len); 2033 } 2034 2035 beio->num_segs++; 2036 len_left -= beio->sg_segs[i].len; 2037 } 2038 if (bptrlen->len < lbalen->len) 2039 beio->beio_cont = ctl_be_block_next; 2040 ctl_set_be_move_done(io, ctl_be_block_move_done); 2041 /* For compare we have separate S/G lists for read and datamove. */ 2042 if (beio->two_sglists) 2043 ctl_set_kern_data_ptr(io, &beio->sg_segs[CTLBLK_HALF_SEGS]); 2044 else 2045 ctl_set_kern_data_ptr(io, beio->sg_segs); 2046 ctl_set_kern_data_len(io, beio->io_len); 2047 ctl_set_kern_sg_entries(io, beio->num_segs); 2048 ctl_set_kern_data_ref(io, ctl_refcnt_beio); 2049 ctl_set_kern_data_arg(io, beio); 2050 io->io_hdr.flags |= CTL_FLAG_ALLOCATED; 2051 2052 /* 2053 * For the read case, we need to read the data into our buffers and 2054 * then we can send it back to the user. For the write case, we 2055 * need to get the data from the user first. 2056 */ 2057 if (beio->bio_cmd == BIO_READ) { 2058 SDT_PROBE0(cbb, , read, alloc_done); 2059 be_lun->dispatch(be_lun, beio); 2060 } else { 2061 SDT_PROBE0(cbb, , write, alloc_done); 2062 ctl_datamove(io); 2063 } 2064 } 2065 2066 static void 2067 ctl_be_block_worker(void *context, int pending) 2068 { 2069 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context; 2070 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2071 union ctl_io *io; 2072 struct ctl_be_block_io *beio; 2073 2074 DPRINTF("entered\n"); 2075 /* 2076 * Fetch and process I/Os from all queues. If we detect LUN 2077 * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race, 2078 * so make response maximally opaque to not confuse initiator. 2079 */ 2080 for (;;) { 2081 mtx_lock(&be_lun->queue_lock); 2082 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue); 2083 if (io != NULL) { 2084 DPRINTF("datamove queue\n"); 2085 STAILQ_REMOVE_HEAD(&be_lun->datamove_queue, links); 2086 mtx_unlock(&be_lun->queue_lock); 2087 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 2088 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 2089 ctl_io_set_busy(io); 2090 ctl_complete_beio(beio); 2091 continue; 2092 } 2093 be_lun->dispatch(be_lun, beio); 2094 continue; 2095 } 2096 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue); 2097 if (io != NULL) { 2098 DPRINTF("config write queue\n"); 2099 STAILQ_REMOVE_HEAD(&be_lun->config_write_queue, links); 2100 mtx_unlock(&be_lun->queue_lock); 2101 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 2102 ctl_io_set_busy(io); 2103 ctl_config_write_done(io); 2104 continue; 2105 } 2106 ctl_be_block_cw_dispatch(be_lun, io); 2107 continue; 2108 } 2109 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue); 2110 if (io != NULL) { 2111 DPRINTF("config read queue\n"); 2112 STAILQ_REMOVE_HEAD(&be_lun->config_read_queue, links); 2113 mtx_unlock(&be_lun->queue_lock); 2114 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 2115 ctl_io_set_busy(io); 2116 ctl_config_read_done(io); 2117 continue; 2118 } 2119 ctl_be_block_cr_dispatch(be_lun, io); 2120 continue; 2121 } 2122 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue); 2123 if (io != NULL) { 2124 DPRINTF("input queue\n"); 2125 STAILQ_REMOVE_HEAD(&be_lun->input_queue, links); 2126 mtx_unlock(&be_lun->queue_lock); 2127 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 2128 ctl_io_set_busy(io); 2129 ctl_data_submit_done(io); 2130 continue; 2131 } 2132 ctl_be_block_dispatch(be_lun, io); 2133 continue; 2134 } 2135 2136 /* 2137 * If we get here, there is no work left in the queues, so 2138 * just break out and let the task queue go to sleep. 2139 */ 2140 mtx_unlock(&be_lun->queue_lock); 2141 break; 2142 } 2143 } 2144 2145 /* 2146 * Entry point from CTL to the backend for I/O. We queue everything to a 2147 * work thread, so this just puts the I/O on a queue and wakes up the 2148 * thread. 2149 */ 2150 static int 2151 ctl_be_block_submit(union ctl_io *io) 2152 { 2153 struct ctl_be_block_lun *be_lun; 2154 2155 DPRINTF("entered\n"); 2156 2157 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 2158 2159 CTL_IO_ASSERT(io, SCSI, NVME); 2160 2161 PRIV(io)->len = 0; 2162 2163 mtx_lock(&be_lun->queue_lock); 2164 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 2165 mtx_unlock(&be_lun->queue_lock); 2166 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 2167 2168 return (CTL_RETVAL_COMPLETE); 2169 } 2170 2171 static int 2172 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 2173 int flag, struct thread *td) 2174 { 2175 struct ctl_be_block_softc *softc = &backend_block_softc; 2176 int error; 2177 2178 error = 0; 2179 switch (cmd) { 2180 case CTL_LUN_REQ: { 2181 struct ctl_lun_req *lun_req; 2182 2183 lun_req = (struct ctl_lun_req *)addr; 2184 2185 switch (lun_req->reqtype) { 2186 case CTL_LUNREQ_CREATE: 2187 error = ctl_be_block_create(softc, lun_req); 2188 break; 2189 case CTL_LUNREQ_RM: 2190 error = ctl_be_block_rm(softc, lun_req); 2191 break; 2192 case CTL_LUNREQ_MODIFY: 2193 error = ctl_be_block_modify(softc, lun_req); 2194 break; 2195 default: 2196 lun_req->status = CTL_LUN_ERROR; 2197 snprintf(lun_req->error_str, sizeof(lun_req->error_str), 2198 "invalid LUN request type %d", 2199 lun_req->reqtype); 2200 break; 2201 } 2202 break; 2203 } 2204 default: 2205 error = ENOTTY; 2206 break; 2207 } 2208 2209 return (error); 2210 } 2211 2212 static int 2213 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 2214 { 2215 struct ctl_be_lun *cbe_lun; 2216 struct ctl_be_block_filedata *file_data; 2217 struct ctl_lun_create_params *params; 2218 const char *value; 2219 struct vattr vattr; 2220 off_t ps, pss, po, pos, us, uss, uo, uos; 2221 int error; 2222 long pconf; 2223 2224 cbe_lun = &be_lun->cbe_lun; 2225 file_data = &be_lun->backend.file; 2226 params = &be_lun->params; 2227 2228 be_lun->dev_type = CTL_BE_BLOCK_FILE; 2229 be_lun->dispatch = ctl_be_block_dispatch_file; 2230 be_lun->lun_flush = ctl_be_block_flush_file; 2231 be_lun->get_lba_status = ctl_be_block_gls_file; 2232 be_lun->getattr = ctl_be_block_getattr_file; 2233 be_lun->unmap = ctl_be_block_unmap_file; 2234 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 2235 2236 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 2237 if (error != 0) { 2238 snprintf(req->error_str, sizeof(req->error_str), 2239 "error calling VOP_GETATTR() for file %s", 2240 be_lun->dev_path); 2241 return (error); 2242 } 2243 2244 error = VOP_PATHCONF(be_lun->vn, _PC_DEALLOC_PRESENT, &pconf); 2245 if (error != 0) { 2246 snprintf(req->error_str, sizeof(req->error_str), 2247 "error calling VOP_PATHCONF() for file %s", 2248 be_lun->dev_path); 2249 return (error); 2250 } 2251 if (pconf == 1) 2252 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; 2253 2254 file_data->cred = crhold(curthread->td_ucred); 2255 if (params->lun_size_bytes != 0) 2256 be_lun->size_bytes = params->lun_size_bytes; 2257 else 2258 be_lun->size_bytes = vattr.va_size; 2259 2260 /* 2261 * For files we can use any logical block size. Prefer 512 bytes 2262 * for compatibility reasons. If file's vattr.va_blocksize 2263 * (preferred I/O block size) is bigger and multiple to chosen 2264 * logical block size -- report it as physical block size. 2265 */ 2266 if (params->blocksize_bytes != 0) 2267 cbe_lun->blocksize = params->blocksize_bytes; 2268 else if (cbe_lun->lun_type == T_CDROM) 2269 cbe_lun->blocksize = 2048; 2270 else 2271 cbe_lun->blocksize = 512; 2272 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2273 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2274 0 : (be_lun->size_blocks - 1); 2275 2276 us = ps = vattr.va_blocksize; 2277 uo = po = 0; 2278 2279 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL); 2280 if (value != NULL) 2281 ctl_expand_number(value, &ps); 2282 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL); 2283 if (value != NULL) 2284 ctl_expand_number(value, &po); 2285 pss = ps / cbe_lun->blocksize; 2286 pos = po / cbe_lun->blocksize; 2287 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 2288 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 2289 cbe_lun->pblockexp = fls(pss) - 1; 2290 cbe_lun->pblockoff = (pss - pos) % pss; 2291 } 2292 2293 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL); 2294 if (value != NULL) 2295 ctl_expand_number(value, &us); 2296 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL); 2297 if (value != NULL) 2298 ctl_expand_number(value, &uo); 2299 uss = us / cbe_lun->blocksize; 2300 uos = uo / cbe_lun->blocksize; 2301 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 2302 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 2303 cbe_lun->ublockexp = fls(uss) - 1; 2304 cbe_lun->ublockoff = (uss - uos) % uss; 2305 } 2306 2307 /* 2308 * Sanity check. The media size has to be at least one 2309 * sector long. 2310 */ 2311 if (be_lun->size_bytes < cbe_lun->blocksize) { 2312 error = EINVAL; 2313 snprintf(req->error_str, sizeof(req->error_str), 2314 "file %s size %ju < block size %u", be_lun->dev_path, 2315 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize); 2316 } 2317 2318 cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize; 2319 return (error); 2320 } 2321 2322 static int 2323 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 2324 { 2325 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2326 struct ctl_lun_create_params *params; 2327 struct cdevsw *csw; 2328 struct cdev *dev; 2329 const char *value; 2330 int error, atomic, maxio, ref, unmap, tmp; 2331 off_t ps, pss, po, pos, us, uss, uo, uos, otmp; 2332 2333 params = &be_lun->params; 2334 2335 be_lun->dev_type = CTL_BE_BLOCK_DEV; 2336 csw = devvn_refthread(be_lun->vn, &dev, &ref); 2337 if (csw == NULL) 2338 return (ENXIO); 2339 if (strcmp(csw->d_name, "zvol") == 0) { 2340 be_lun->dispatch = ctl_be_block_dispatch_zvol; 2341 be_lun->get_lba_status = ctl_be_block_gls_zvol; 2342 atomic = maxio = CTLBLK_MAX_IO_SIZE; 2343 } else { 2344 be_lun->dispatch = ctl_be_block_dispatch_dev; 2345 be_lun->get_lba_status = NULL; 2346 atomic = 0; 2347 maxio = dev->si_iosize_max; 2348 if (maxio <= 0) 2349 maxio = DFLTPHYS; 2350 if (maxio > CTLBLK_MAX_SEG) 2351 maxio = CTLBLK_MAX_SEG; 2352 } 2353 be_lun->lun_flush = ctl_be_block_flush_dev; 2354 be_lun->getattr = ctl_be_block_getattr_dev; 2355 be_lun->unmap = ctl_be_block_unmap_dev; 2356 2357 if (!csw->d_ioctl) { 2358 dev_relthread(dev, ref); 2359 snprintf(req->error_str, sizeof(req->error_str), 2360 "no d_ioctl for device %s!", be_lun->dev_path); 2361 return (ENODEV); 2362 } 2363 2364 error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD, 2365 curthread); 2366 if (error) { 2367 dev_relthread(dev, ref); 2368 snprintf(req->error_str, sizeof(req->error_str), 2369 "error %d returned for DIOCGSECTORSIZE ioctl " 2370 "on %s!", error, be_lun->dev_path); 2371 return (error); 2372 } 2373 2374 /* 2375 * If the user has asked for a blocksize that is greater than the 2376 * backing device's blocksize, we can do it only if the blocksize 2377 * the user is asking for is an even multiple of the underlying 2378 * device's blocksize. 2379 */ 2380 if ((params->blocksize_bytes != 0) && 2381 (params->blocksize_bytes >= tmp)) { 2382 if (params->blocksize_bytes % tmp == 0) { 2383 cbe_lun->blocksize = params->blocksize_bytes; 2384 } else { 2385 dev_relthread(dev, ref); 2386 snprintf(req->error_str, sizeof(req->error_str), 2387 "requested blocksize %u is not an even " 2388 "multiple of backing device blocksize %u", 2389 params->blocksize_bytes, tmp); 2390 return (EINVAL); 2391 } 2392 } else if (params->blocksize_bytes != 0) { 2393 dev_relthread(dev, ref); 2394 snprintf(req->error_str, sizeof(req->error_str), 2395 "requested blocksize %u < backing device " 2396 "blocksize %u", params->blocksize_bytes, tmp); 2397 return (EINVAL); 2398 } else if (cbe_lun->lun_type == T_CDROM) 2399 cbe_lun->blocksize = MAX(tmp, 2048); 2400 else 2401 cbe_lun->blocksize = tmp; 2402 2403 error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD, 2404 curthread); 2405 if (error) { 2406 dev_relthread(dev, ref); 2407 snprintf(req->error_str, sizeof(req->error_str), 2408 "error %d returned for DIOCGMEDIASIZE " 2409 " ioctl on %s!", error, 2410 be_lun->dev_path); 2411 return (error); 2412 } 2413 2414 if (params->lun_size_bytes != 0) { 2415 if (params->lun_size_bytes > otmp) { 2416 dev_relthread(dev, ref); 2417 snprintf(req->error_str, sizeof(req->error_str), 2418 "requested LUN size %ju > backing device " 2419 "size %ju", 2420 (uintmax_t)params->lun_size_bytes, 2421 (uintmax_t)otmp); 2422 return (EINVAL); 2423 } 2424 2425 be_lun->size_bytes = params->lun_size_bytes; 2426 } else 2427 be_lun->size_bytes = otmp; 2428 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2429 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2430 0 : (be_lun->size_blocks - 1); 2431 2432 error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD, 2433 curthread); 2434 if (error) 2435 ps = po = 0; 2436 else { 2437 error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po, 2438 FREAD, curthread); 2439 if (error) 2440 po = 0; 2441 } 2442 us = ps; 2443 uo = po; 2444 2445 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL); 2446 if (value != NULL) 2447 ctl_expand_number(value, &ps); 2448 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL); 2449 if (value != NULL) 2450 ctl_expand_number(value, &po); 2451 pss = ps / cbe_lun->blocksize; 2452 pos = po / cbe_lun->blocksize; 2453 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 2454 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 2455 cbe_lun->pblockexp = fls(pss) - 1; 2456 cbe_lun->pblockoff = (pss - pos) % pss; 2457 } 2458 2459 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL); 2460 if (value != NULL) 2461 ctl_expand_number(value, &us); 2462 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL); 2463 if (value != NULL) 2464 ctl_expand_number(value, &uo); 2465 uss = us / cbe_lun->blocksize; 2466 uos = uo / cbe_lun->blocksize; 2467 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 2468 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 2469 cbe_lun->ublockexp = fls(uss) - 1; 2470 cbe_lun->ublockoff = (uss - uos) % uss; 2471 } 2472 2473 cbe_lun->atomicblock = atomic / cbe_lun->blocksize; 2474 cbe_lun->opttxferlen = maxio / cbe_lun->blocksize; 2475 2476 if (be_lun->dispatch == ctl_be_block_dispatch_zvol) { 2477 unmap = 1; 2478 } else { 2479 struct diocgattr_arg arg; 2480 2481 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); 2482 arg.len = sizeof(arg.value.i); 2483 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 2484 curthread); 2485 unmap = (error == 0) ? arg.value.i : 0; 2486 } 2487 value = dnvlist_get_string(cbe_lun->options, "unmap", NULL); 2488 if (value != NULL) 2489 unmap = (strcmp(value, "on") == 0); 2490 if (unmap) 2491 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; 2492 else 2493 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 2494 2495 dev_relthread(dev, ref); 2496 return (0); 2497 } 2498 2499 static int 2500 ctl_be_block_close(struct ctl_be_block_lun *be_lun) 2501 { 2502 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2503 int flags; 2504 2505 if (be_lun->vn) { 2506 flags = FREAD; 2507 if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0) 2508 flags |= FWRITE; 2509 (void)vn_close(be_lun->vn, flags, NOCRED, curthread); 2510 be_lun->vn = NULL; 2511 2512 switch (be_lun->dev_type) { 2513 case CTL_BE_BLOCK_DEV: 2514 break; 2515 case CTL_BE_BLOCK_FILE: 2516 if (be_lun->backend.file.cred != NULL) { 2517 crfree(be_lun->backend.file.cred); 2518 be_lun->backend.file.cred = NULL; 2519 } 2520 break; 2521 case CTL_BE_BLOCK_NONE: 2522 break; 2523 default: 2524 panic("Unexpected backend type %d", be_lun->dev_type); 2525 break; 2526 } 2527 be_lun->dev_type = CTL_BE_BLOCK_NONE; 2528 } 2529 return (0); 2530 } 2531 2532 static int 2533 ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 2534 { 2535 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2536 struct nameidata nd; 2537 const char *value; 2538 int error, flags; 2539 2540 error = 0; 2541 if (rootvnode == NULL) { 2542 snprintf(req->error_str, sizeof(req->error_str), 2543 "Root filesystem is not mounted"); 2544 return (1); 2545 } 2546 pwd_ensure_dirs(); 2547 2548 value = dnvlist_get_string(cbe_lun->options, "file", NULL); 2549 if (value == NULL) { 2550 snprintf(req->error_str, sizeof(req->error_str), 2551 "no file argument specified"); 2552 return (1); 2553 } 2554 free(be_lun->dev_path, M_CTLBLK); 2555 be_lun->dev_path = strdup(value, M_CTLBLK); 2556 2557 flags = FREAD; 2558 value = dnvlist_get_string(cbe_lun->options, "readonly", NULL); 2559 if (value != NULL) { 2560 if (strcmp(value, "on") != 0) 2561 flags |= FWRITE; 2562 } else if (cbe_lun->lun_type == T_DIRECT) 2563 flags |= FWRITE; 2564 2565 again: 2566 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path); 2567 error = vn_open(&nd, &flags, 0, NULL); 2568 if ((error == EROFS || error == EACCES) && (flags & FWRITE)) { 2569 flags &= ~FWRITE; 2570 goto again; 2571 } 2572 if (error) { 2573 /* 2574 * This is the only reasonable guess we can make as far as 2575 * path if the user doesn't give us a fully qualified path. 2576 * If they want to specify a file, they need to specify the 2577 * full path. 2578 */ 2579 if (be_lun->dev_path[0] != '/') { 2580 char *dev_name; 2581 2582 asprintf(&dev_name, M_CTLBLK, "/dev/%s", 2583 be_lun->dev_path); 2584 free(be_lun->dev_path, M_CTLBLK); 2585 be_lun->dev_path = dev_name; 2586 goto again; 2587 } 2588 snprintf(req->error_str, sizeof(req->error_str), 2589 "error opening %s: %d", be_lun->dev_path, error); 2590 return (error); 2591 } 2592 if (flags & FWRITE) 2593 cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY; 2594 else 2595 cbe_lun->flags |= CTL_LUN_FLAG_READONLY; 2596 2597 NDFREE_PNBUF(&nd); 2598 be_lun->vn = nd.ni_vp; 2599 2600 /* We only support disks and files. */ 2601 if (vn_isdisk_error(be_lun->vn, &error)) { 2602 error = ctl_be_block_open_dev(be_lun, req); 2603 } else if (be_lun->vn->v_type == VREG) { 2604 error = ctl_be_block_open_file(be_lun, req); 2605 } else { 2606 error = EINVAL; 2607 snprintf(req->error_str, sizeof(req->error_str), 2608 "%s is not a disk or plain file", be_lun->dev_path); 2609 } 2610 VOP_UNLOCK(be_lun->vn); 2611 2612 if (error != 0) 2613 ctl_be_block_close(be_lun); 2614 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2615 if (be_lun->dispatch != ctl_be_block_dispatch_dev) 2616 cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT; 2617 value = dnvlist_get_string(cbe_lun->options, "serseq", NULL); 2618 if (value != NULL && strcmp(value, "on") == 0) 2619 cbe_lun->serseq = CTL_LUN_SERSEQ_ON; 2620 else if (value != NULL && strcmp(value, "read") == 0) 2621 cbe_lun->serseq = CTL_LUN_SERSEQ_READ; 2622 else if (value != NULL && strcmp(value, "soft") == 0) 2623 cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT; 2624 else if (value != NULL && strcmp(value, "off") == 0) 2625 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2626 return (0); 2627 } 2628 2629 static int 2630 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2631 { 2632 struct ctl_be_lun *cbe_lun; 2633 struct ctl_be_block_lun *be_lun; 2634 struct ctl_lun_create_params *params; 2635 char num_thread_str[16]; 2636 char tmpstr[32]; 2637 const char *value; 2638 int retval, num_threads; 2639 int tmp_num_threads; 2640 2641 params = &req->reqdata.create; 2642 retval = 0; 2643 req->status = CTL_LUN_OK; 2644 2645 be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK); 2646 cbe_lun = &be_lun->cbe_lun; 2647 be_lun->params = req->reqdata.create; 2648 be_lun->softc = softc; 2649 STAILQ_INIT(&be_lun->input_queue); 2650 STAILQ_INIT(&be_lun->config_read_queue); 2651 STAILQ_INIT(&be_lun->config_write_queue); 2652 STAILQ_INIT(&be_lun->datamove_queue); 2653 mtx_init(&be_lun->io_lock, "ctlblock io", NULL, MTX_DEF); 2654 mtx_init(&be_lun->queue_lock, "ctlblock queue", NULL, MTX_DEF); 2655 cbe_lun->options = nvlist_clone(req->args_nvl); 2656 2657 if (params->flags & CTL_LUN_FLAG_DEV_TYPE) 2658 cbe_lun->lun_type = params->device_type; 2659 else 2660 cbe_lun->lun_type = T_DIRECT; 2661 be_lun->flags = 0; 2662 cbe_lun->flags = 0; 2663 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL); 2664 if (value != NULL) { 2665 if (strcmp(value, "primary") == 0) 2666 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2667 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2668 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2669 2670 if (cbe_lun->lun_type == T_DIRECT || 2671 cbe_lun->lun_type == T_CDROM) { 2672 be_lun->size_bytes = params->lun_size_bytes; 2673 if (params->blocksize_bytes != 0) 2674 cbe_lun->blocksize = params->blocksize_bytes; 2675 else if (cbe_lun->lun_type == T_CDROM) 2676 cbe_lun->blocksize = 2048; 2677 else 2678 cbe_lun->blocksize = 512; 2679 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2680 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2681 0 : (be_lun->size_blocks - 1); 2682 2683 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2684 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2685 retval = ctl_be_block_open(be_lun, req); 2686 if (retval != 0) { 2687 retval = 0; 2688 req->status = CTL_LUN_WARNING; 2689 } 2690 } 2691 num_threads = cbb_num_threads; 2692 } else { 2693 num_threads = 1; 2694 } 2695 2696 value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL); 2697 if (value != NULL) { 2698 tmp_num_threads = strtol(value, NULL, 0); 2699 2700 /* 2701 * We don't let the user specify less than one 2702 * thread, but hope he's clueful enough not to 2703 * specify 1000 threads. 2704 */ 2705 if (tmp_num_threads < 1) { 2706 snprintf(req->error_str, sizeof(req->error_str), 2707 "invalid number of threads %s", 2708 num_thread_str); 2709 goto bailout_error; 2710 } 2711 num_threads = tmp_num_threads; 2712 } 2713 2714 if (be_lun->vn == NULL) 2715 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2716 /* Tell the user the blocksize we ended up using */ 2717 params->lun_size_bytes = be_lun->size_bytes; 2718 params->blocksize_bytes = cbe_lun->blocksize; 2719 if (params->flags & CTL_LUN_FLAG_ID_REQ) { 2720 cbe_lun->req_lun_id = params->req_lun_id; 2721 cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ; 2722 } else 2723 cbe_lun->req_lun_id = 0; 2724 2725 cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown; 2726 cbe_lun->be = &ctl_be_block_driver; 2727 2728 if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) { 2729 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d", 2730 softc->num_luns); 2731 strncpy((char *)cbe_lun->serial_num, tmpstr, 2732 MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr))); 2733 2734 /* Tell the user what we used for a serial number */ 2735 strncpy((char *)params->serial_num, tmpstr, 2736 MIN(sizeof(params->serial_num), sizeof(tmpstr))); 2737 } else { 2738 strncpy((char *)cbe_lun->serial_num, params->serial_num, 2739 MIN(sizeof(cbe_lun->serial_num), 2740 sizeof(params->serial_num))); 2741 } 2742 if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) { 2743 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns); 2744 strncpy((char *)cbe_lun->device_id, tmpstr, 2745 MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr))); 2746 2747 /* Tell the user what we used for a device ID */ 2748 strncpy((char *)params->device_id, tmpstr, 2749 MIN(sizeof(params->device_id), sizeof(tmpstr))); 2750 } else { 2751 strncpy((char *)cbe_lun->device_id, params->device_id, 2752 MIN(sizeof(cbe_lun->device_id), 2753 sizeof(params->device_id))); 2754 } 2755 2756 TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun); 2757 2758 be_lun->io_taskqueue = taskqueue_create("ctlblocktq", M_WAITOK, 2759 taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue); 2760 2761 if (be_lun->io_taskqueue == NULL) { 2762 snprintf(req->error_str, sizeof(req->error_str), 2763 "unable to create taskqueue"); 2764 goto bailout_error; 2765 } 2766 2767 /* 2768 * Note that we start the same number of threads by default for 2769 * both the file case and the block device case. For the file 2770 * case, we need multiple threads to allow concurrency, because the 2771 * vnode interface is designed to be a blocking interface. For the 2772 * block device case, ZFS zvols at least will block the caller's 2773 * context in many instances, and so we need multiple threads to 2774 * overcome that problem. Other block devices don't need as many 2775 * threads, but they shouldn't cause too many problems. 2776 * 2777 * If the user wants to just have a single thread for a block 2778 * device, he can specify that when the LUN is created, or change 2779 * the tunable/sysctl to alter the default number of threads. 2780 */ 2781 retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue, 2782 /*num threads*/num_threads, 2783 /*priority*/PUSER, 2784 /*proc*/control_softc->ctl_proc, 2785 /*thread name*/"block"); 2786 2787 if (retval != 0) 2788 goto bailout_error; 2789 2790 be_lun->num_threads = num_threads; 2791 2792 retval = ctl_add_lun(&be_lun->cbe_lun); 2793 if (retval != 0) { 2794 snprintf(req->error_str, sizeof(req->error_str), 2795 "ctl_add_lun() returned error %d, see dmesg for " 2796 "details", retval); 2797 retval = 0; 2798 goto bailout_error; 2799 } 2800 2801 be_lun->disk_stats = devstat_new_entry("cbb", cbe_lun->lun_id, 2802 cbe_lun->blocksize, 2803 DEVSTAT_ALL_SUPPORTED, 2804 cbe_lun->lun_type 2805 | DEVSTAT_TYPE_IF_OTHER, 2806 DEVSTAT_PRIORITY_OTHER); 2807 2808 mtx_lock(&softc->lock); 2809 softc->num_luns++; 2810 SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links); 2811 mtx_unlock(&softc->lock); 2812 2813 params->req_lun_id = cbe_lun->lun_id; 2814 2815 return (retval); 2816 2817 bailout_error: 2818 req->status = CTL_LUN_ERROR; 2819 2820 if (be_lun->io_taskqueue != NULL) 2821 taskqueue_free(be_lun->io_taskqueue); 2822 ctl_be_block_close(be_lun); 2823 if (be_lun->dev_path != NULL) 2824 free(be_lun->dev_path, M_CTLBLK); 2825 nvlist_destroy(cbe_lun->options); 2826 mtx_destroy(&be_lun->queue_lock); 2827 mtx_destroy(&be_lun->io_lock); 2828 free(be_lun, M_CTLBLK); 2829 2830 return (retval); 2831 } 2832 2833 static int 2834 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2835 { 2836 struct ctl_lun_rm_params *params; 2837 struct ctl_be_block_lun *be_lun; 2838 struct ctl_be_lun *cbe_lun; 2839 int retval; 2840 2841 params = &req->reqdata.rm; 2842 2843 sx_xlock(&softc->modify_lock); 2844 mtx_lock(&softc->lock); 2845 SLIST_FOREACH(be_lun, &softc->lun_list, links) { 2846 if (be_lun->cbe_lun.lun_id == params->lun_id) { 2847 SLIST_REMOVE(&softc->lun_list, be_lun, 2848 ctl_be_block_lun, links); 2849 softc->num_luns--; 2850 break; 2851 } 2852 } 2853 mtx_unlock(&softc->lock); 2854 sx_xunlock(&softc->modify_lock); 2855 if (be_lun == NULL) { 2856 snprintf(req->error_str, sizeof(req->error_str), 2857 "LUN %u is not managed by the block backend", 2858 params->lun_id); 2859 goto bailout_error; 2860 } 2861 cbe_lun = &be_lun->cbe_lun; 2862 2863 if (be_lun->vn != NULL) { 2864 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2865 ctl_lun_no_media(cbe_lun); 2866 taskqueue_drain_all(be_lun->io_taskqueue); 2867 ctl_be_block_close(be_lun); 2868 } 2869 2870 mtx_lock(&softc->lock); 2871 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2872 mtx_unlock(&softc->lock); 2873 2874 retval = ctl_remove_lun(cbe_lun); 2875 if (retval != 0) { 2876 snprintf(req->error_str, sizeof(req->error_str), 2877 "error %d returned from ctl_remove_lun() for " 2878 "LUN %d", retval, params->lun_id); 2879 mtx_lock(&softc->lock); 2880 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2881 mtx_unlock(&softc->lock); 2882 goto bailout_error; 2883 } 2884 2885 mtx_lock(&softc->lock); 2886 while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2887 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblockrm", 0); 2888 if (retval == EINTR) 2889 break; 2890 } 2891 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2892 if (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) { 2893 mtx_unlock(&softc->lock); 2894 free(be_lun, M_CTLBLK); 2895 } else { 2896 mtx_unlock(&softc->lock); 2897 return (EINTR); 2898 } 2899 2900 req->status = CTL_LUN_OK; 2901 return (0); 2902 2903 bailout_error: 2904 req->status = CTL_LUN_ERROR; 2905 return (0); 2906 } 2907 2908 static int 2909 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2910 { 2911 struct ctl_lun_modify_params *params; 2912 struct ctl_be_block_lun *be_lun; 2913 struct ctl_be_lun *cbe_lun; 2914 const char *value; 2915 uint64_t oldsize; 2916 int error, wasprim; 2917 2918 params = &req->reqdata.modify; 2919 2920 sx_xlock(&softc->modify_lock); 2921 mtx_lock(&softc->lock); 2922 SLIST_FOREACH(be_lun, &softc->lun_list, links) { 2923 if (be_lun->cbe_lun.lun_id == params->lun_id) 2924 break; 2925 } 2926 mtx_unlock(&softc->lock); 2927 if (be_lun == NULL) { 2928 snprintf(req->error_str, sizeof(req->error_str), 2929 "LUN %u is not managed by the block backend", 2930 params->lun_id); 2931 goto bailout_error; 2932 } 2933 cbe_lun = &be_lun->cbe_lun; 2934 2935 if (params->lun_size_bytes != 0) 2936 be_lun->params.lun_size_bytes = params->lun_size_bytes; 2937 2938 if (req->args_nvl != NULL) { 2939 nvlist_destroy(cbe_lun->options); 2940 cbe_lun->options = nvlist_clone(req->args_nvl); 2941 } 2942 2943 wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY); 2944 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL); 2945 if (value != NULL) { 2946 if (strcmp(value, "primary") == 0) 2947 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2948 else 2949 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2950 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2951 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2952 else 2953 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2954 if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) { 2955 if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) 2956 ctl_lun_primary(cbe_lun); 2957 else 2958 ctl_lun_secondary(cbe_lun); 2959 } 2960 2961 oldsize = be_lun->size_blocks; 2962 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2963 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2964 if (be_lun->vn == NULL) 2965 error = ctl_be_block_open(be_lun, req); 2966 else if (vn_isdisk_error(be_lun->vn, &error)) 2967 error = ctl_be_block_open_dev(be_lun, req); 2968 else if (be_lun->vn->v_type == VREG) { 2969 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 2970 error = ctl_be_block_open_file(be_lun, req); 2971 VOP_UNLOCK(be_lun->vn); 2972 } else 2973 error = EINVAL; 2974 if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) && 2975 be_lun->vn != NULL) { 2976 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; 2977 ctl_lun_has_media(cbe_lun); 2978 } else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 && 2979 be_lun->vn == NULL) { 2980 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2981 ctl_lun_no_media(cbe_lun); 2982 } 2983 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; 2984 } else { 2985 if (be_lun->vn != NULL) { 2986 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2987 ctl_lun_no_media(cbe_lun); 2988 taskqueue_drain_all(be_lun->io_taskqueue); 2989 error = ctl_be_block_close(be_lun); 2990 } else 2991 error = 0; 2992 } 2993 if (be_lun->size_blocks != oldsize) 2994 ctl_lun_capacity_changed(cbe_lun); 2995 2996 /* Tell the user the exact size we ended up using */ 2997 params->lun_size_bytes = be_lun->size_bytes; 2998 2999 sx_xunlock(&softc->modify_lock); 3000 req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK; 3001 return (0); 3002 3003 bailout_error: 3004 sx_xunlock(&softc->modify_lock); 3005 req->status = CTL_LUN_ERROR; 3006 return (0); 3007 } 3008 3009 static void 3010 ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun) 3011 { 3012 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)cbe_lun; 3013 struct ctl_be_block_softc *softc = be_lun->softc; 3014 3015 taskqueue_drain_all(be_lun->io_taskqueue); 3016 taskqueue_free(be_lun->io_taskqueue); 3017 if (be_lun->disk_stats != NULL) 3018 devstat_remove_entry(be_lun->disk_stats); 3019 nvlist_destroy(be_lun->cbe_lun.options); 3020 free(be_lun->dev_path, M_CTLBLK); 3021 mtx_destroy(&be_lun->queue_lock); 3022 mtx_destroy(&be_lun->io_lock); 3023 3024 mtx_lock(&softc->lock); 3025 be_lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED; 3026 if (be_lun->flags & CTL_BE_BLOCK_LUN_WAITING) 3027 wakeup(be_lun); 3028 else 3029 free(be_lun, M_CTLBLK); 3030 mtx_unlock(&softc->lock); 3031 } 3032 3033 static int 3034 ctl_be_block_scsi_config_write(union ctl_io *io) 3035 { 3036 struct ctl_be_block_lun *be_lun; 3037 struct ctl_be_lun *cbe_lun; 3038 int retval; 3039 3040 DPRINTF("entered\n"); 3041 3042 cbe_lun = CTL_BACKEND_LUN(io); 3043 be_lun = (struct ctl_be_block_lun *)cbe_lun; 3044 3045 retval = 0; 3046 switch (io->scsiio.cdb[0]) { 3047 case SYNCHRONIZE_CACHE: 3048 case SYNCHRONIZE_CACHE_16: 3049 case WRITE_SAME_10: 3050 case WRITE_SAME_16: 3051 case UNMAP: 3052 /* 3053 * The upper level CTL code will filter out any CDBs with 3054 * the immediate bit set and return the proper error. 3055 * 3056 * We don't really need to worry about what LBA range the 3057 * user asked to be synced out. When they issue a sync 3058 * cache command, we'll sync out the whole thing. 3059 */ 3060 mtx_lock(&be_lun->queue_lock); 3061 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, 3062 links); 3063 mtx_unlock(&be_lun->queue_lock); 3064 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 3065 break; 3066 case START_STOP_UNIT: { 3067 struct scsi_start_stop_unit *cdb; 3068 struct ctl_lun_req req; 3069 3070 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb; 3071 if ((cdb->how & SSS_PC_MASK) != 0) { 3072 ctl_set_success(&io->scsiio); 3073 ctl_config_write_done(io); 3074 break; 3075 } 3076 if (cdb->how & SSS_START) { 3077 if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) { 3078 retval = ctl_be_block_open(be_lun, &req); 3079 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; 3080 if (retval == 0) { 3081 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; 3082 ctl_lun_has_media(cbe_lun); 3083 } else { 3084 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 3085 ctl_lun_no_media(cbe_lun); 3086 } 3087 } 3088 ctl_start_lun(cbe_lun); 3089 } else { 3090 ctl_stop_lun(cbe_lun); 3091 if (cdb->how & SSS_LOEJ) { 3092 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 3093 cbe_lun->flags |= CTL_LUN_FLAG_EJECTED; 3094 ctl_lun_ejected(cbe_lun); 3095 if (be_lun->vn != NULL) 3096 ctl_be_block_close(be_lun); 3097 } 3098 } 3099 3100 ctl_set_success(&io->scsiio); 3101 ctl_config_write_done(io); 3102 break; 3103 } 3104 case PREVENT_ALLOW: 3105 ctl_set_success(&io->scsiio); 3106 ctl_config_write_done(io); 3107 break; 3108 default: 3109 ctl_set_invalid_opcode(&io->scsiio); 3110 ctl_config_write_done(io); 3111 retval = CTL_RETVAL_COMPLETE; 3112 break; 3113 } 3114 3115 return (retval); 3116 } 3117 3118 static int 3119 ctl_be_block_nvme_config_write(union ctl_io *io) 3120 { 3121 struct ctl_be_block_lun *be_lun; 3122 3123 DPRINTF("entered\n"); 3124 3125 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 3126 3127 switch (io->nvmeio.cmd.opc) { 3128 case NVME_OPC_DATASET_MANAGEMENT: 3129 DSM_RANGE(io) = 0; 3130 /* FALLTHROUGH */ 3131 case NVME_OPC_FLUSH: 3132 case NVME_OPC_WRITE_UNCORRECTABLE: 3133 case NVME_OPC_WRITE_ZEROES: 3134 mtx_lock(&be_lun->queue_lock); 3135 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, 3136 links); 3137 mtx_unlock(&be_lun->queue_lock); 3138 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 3139 break; 3140 default: 3141 ctl_nvme_set_invalid_opcode(&io->nvmeio); 3142 ctl_config_write_done(io); 3143 break; 3144 } 3145 return (CTL_RETVAL_COMPLETE); 3146 } 3147 3148 static int 3149 ctl_be_block_config_write(union ctl_io *io) 3150 { 3151 switch (io->io_hdr.io_type) { 3152 case CTL_IO_SCSI: 3153 return (ctl_be_block_scsi_config_write(io)); 3154 case CTL_IO_NVME: 3155 return (ctl_be_block_nvme_config_write(io)); 3156 default: 3157 __assert_unreachable(); 3158 } 3159 } 3160 3161 static int 3162 ctl_be_block_scsi_config_read(union ctl_io *io) 3163 { 3164 struct ctl_be_block_lun *be_lun; 3165 int retval = 0; 3166 3167 DPRINTF("entered\n"); 3168 3169 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 3170 3171 switch (io->scsiio.cdb[0]) { 3172 case SERVICE_ACTION_IN: 3173 if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) { 3174 mtx_lock(&be_lun->queue_lock); 3175 STAILQ_INSERT_TAIL(&be_lun->config_read_queue, 3176 &io->io_hdr, links); 3177 mtx_unlock(&be_lun->queue_lock); 3178 taskqueue_enqueue(be_lun->io_taskqueue, 3179 &be_lun->io_task); 3180 retval = CTL_RETVAL_QUEUED; 3181 break; 3182 } 3183 ctl_set_invalid_field(&io->scsiio, 3184 /*sks_valid*/ 1, 3185 /*command*/ 1, 3186 /*field*/ 1, 3187 /*bit_valid*/ 1, 3188 /*bit*/ 4); 3189 ctl_config_read_done(io); 3190 retval = CTL_RETVAL_COMPLETE; 3191 break; 3192 default: 3193 ctl_set_invalid_opcode(&io->scsiio); 3194 ctl_config_read_done(io); 3195 retval = CTL_RETVAL_COMPLETE; 3196 break; 3197 } 3198 3199 return (retval); 3200 } 3201 3202 static int 3203 ctl_be_block_nvme_config_read(union ctl_io *io) 3204 { 3205 struct ctl_be_block_lun *be_lun; 3206 3207 DPRINTF("entered\n"); 3208 3209 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 3210 3211 switch (io->nvmeio.cmd.opc) { 3212 case NVME_OPC_IDENTIFY: 3213 { 3214 uint8_t cns; 3215 3216 cns = le32toh(io->nvmeio.cmd.cdw10) & 0xff; 3217 switch (cns) { 3218 case 0: 3219 case 3: 3220 mtx_lock(&be_lun->queue_lock); 3221 STAILQ_INSERT_TAIL(&be_lun->config_read_queue, 3222 &io->io_hdr, links); 3223 mtx_unlock(&be_lun->queue_lock); 3224 taskqueue_enqueue(be_lun->io_taskqueue, 3225 &be_lun->io_task); 3226 return (CTL_RETVAL_QUEUED); 3227 default: 3228 ctl_nvme_set_invalid_field(&io->nvmeio); 3229 ctl_config_read_done(io); 3230 break; 3231 } 3232 break; 3233 } 3234 default: 3235 ctl_nvme_set_invalid_opcode(&io->nvmeio); 3236 ctl_config_read_done(io); 3237 break; 3238 } 3239 return (CTL_RETVAL_COMPLETE); 3240 } 3241 3242 static int 3243 ctl_be_block_config_read(union ctl_io *io) 3244 { 3245 switch (io->io_hdr.io_type) { 3246 case CTL_IO_SCSI: 3247 return (ctl_be_block_scsi_config_read(io)); 3248 case CTL_IO_NVME_ADMIN: 3249 return (ctl_be_block_nvme_config_read(io)); 3250 default: 3251 __assert_unreachable(); 3252 } 3253 } 3254 3255 static int 3256 ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb) 3257 { 3258 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun; 3259 int retval; 3260 3261 retval = sbuf_cat(sb, "\t<num_threads>"); 3262 if (retval != 0) 3263 goto bailout; 3264 retval = sbuf_printf(sb, "%d", lun->num_threads); 3265 if (retval != 0) 3266 goto bailout; 3267 retval = sbuf_cat(sb, "</num_threads>\n"); 3268 3269 bailout: 3270 return (retval); 3271 } 3272 3273 static uint64_t 3274 ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname) 3275 { 3276 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun; 3277 3278 if (lun->getattr == NULL) 3279 return (UINT64_MAX); 3280 return (lun->getattr(lun, attrname)); 3281 } 3282 3283 static int 3284 ctl_be_block_init(void) 3285 { 3286 struct ctl_be_block_softc *softc = &backend_block_softc; 3287 3288 sx_init(&softc->modify_lock, "ctlblock modify"); 3289 mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF); 3290 softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io), 3291 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3292 softc->bufmin_zone = uma_zcreate("ctlblockmin", CTLBLK_MIN_SEG, 3293 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 3294 if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG) 3295 softc->bufmax_zone = uma_zcreate("ctlblockmax", CTLBLK_MAX_SEG, 3296 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 3297 SLIST_INIT(&softc->lun_list); 3298 return (0); 3299 } 3300 3301 static int 3302 ctl_be_block_shutdown(void) 3303 { 3304 struct ctl_be_block_softc *softc = &backend_block_softc; 3305 struct ctl_be_block_lun *lun; 3306 3307 mtx_lock(&softc->lock); 3308 while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) { 3309 SLIST_REMOVE_HEAD(&softc->lun_list, links); 3310 softc->num_luns--; 3311 /* 3312 * Drop our lock here. Since ctl_remove_lun() can call 3313 * back into us, this could potentially lead to a recursive 3314 * lock of the same mutex, which would cause a hang. 3315 */ 3316 mtx_unlock(&softc->lock); 3317 ctl_remove_lun(&lun->cbe_lun); 3318 mtx_lock(&softc->lock); 3319 } 3320 mtx_unlock(&softc->lock); 3321 uma_zdestroy(softc->bufmin_zone); 3322 if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG) 3323 uma_zdestroy(softc->bufmax_zone); 3324 uma_zdestroy(softc->beio_zone); 3325 mtx_destroy(&softc->lock); 3326 sx_destroy(&softc->modify_lock); 3327 return (0); 3328 } 3329