1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2003 Silicon Graphics International Corp. 5 * Copyright (c) 2009-2011 Spectra Logic Corporation 6 * Copyright (c) 2012,2021 The FreeBSD Foundation 7 * Copyright (c) 2014-2021 Alexander Motin <mav@FreeBSD.org> 8 * All rights reserved. 9 * 10 * Portions of this software were developed by Edward Tomasz Napierala 11 * under sponsorship from the FreeBSD Foundation. 12 * 13 * Portions of this software were developed by Ka Ho Ng <khng@FreeBSD.org> 14 * under sponsorship from the FreeBSD Foundation. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions, and the following disclaimer, 21 * without modification. 22 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 23 * substantially similar to the "NO WARRANTY" disclaimer below 24 * ("Disclaimer") and any redistribution must be conditioned upon 25 * including a substantially similar Disclaimer requirement for further 26 * binary redistribution. 27 * 28 * NO WARRANTY 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 37 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 38 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 * POSSIBILITY OF SUCH DAMAGES. 40 * 41 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $ 42 */ 43 /* 44 * CAM Target Layer driver backend for block devices. 45 * 46 * Author: Ken Merry <ken@FreeBSD.org> 47 */ 48 #include <sys/cdefs.h> 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/kernel.h> 52 #include <sys/types.h> 53 #include <sys/kthread.h> 54 #include <sys/bio.h> 55 #include <sys/fcntl.h> 56 #include <sys/limits.h> 57 #include <sys/lock.h> 58 #include <sys/mutex.h> 59 #include <sys/condvar.h> 60 #include <sys/malloc.h> 61 #include <sys/conf.h> 62 #include <sys/ioccom.h> 63 #include <sys/queue.h> 64 #include <sys/sbuf.h> 65 #include <sys/endian.h> 66 #include <sys/uio.h> 67 #include <sys/buf.h> 68 #include <sys/taskqueue.h> 69 #include <sys/vnode.h> 70 #include <sys/namei.h> 71 #include <sys/mount.h> 72 #include <sys/disk.h> 73 #include <sys/fcntl.h> 74 #include <sys/filedesc.h> 75 #include <sys/filio.h> 76 #include <sys/proc.h> 77 #include <sys/pcpu.h> 78 #include <sys/module.h> 79 #include <sys/sdt.h> 80 #include <sys/devicestat.h> 81 #include <sys/sysctl.h> 82 #include <sys/nv.h> 83 #include <sys/dnv.h> 84 #include <sys/sx.h> 85 #include <sys/unistd.h> 86 87 #include <geom/geom.h> 88 89 #include <cam/cam.h> 90 #include <cam/scsi/scsi_all.h> 91 #include <cam/scsi/scsi_da.h> 92 #include <cam/ctl/ctl_io.h> 93 #include <cam/ctl/ctl.h> 94 #include <cam/ctl/ctl_backend.h> 95 #include <cam/ctl/ctl_ioctl.h> 96 #include <cam/ctl/ctl_ha.h> 97 #include <cam/ctl/ctl_scsi_all.h> 98 #include <cam/ctl/ctl_private.h> 99 #include <cam/ctl/ctl_error.h> 100 101 /* 102 * The idea here is to allocate enough S/G space to handle at least 1MB I/Os. 103 * On systems with small maxphys it can be 8 128KB segments. On large systems 104 * it can be up to 8 1MB segments. I/Os larger than that we'll split. 105 */ 106 #define CTLBLK_MAX_SEGS 8 107 #define CTLBLK_HALF_SEGS (CTLBLK_MAX_SEGS / 2) 108 #define CTLBLK_MIN_SEG (128 * 1024) 109 #define CTLBLK_MAX_SEG MIN(1024 * 1024, MAX(CTLBLK_MIN_SEG, maxphys)) 110 #define CTLBLK_MAX_IO_SIZE (CTLBLK_MAX_SEG * CTLBLK_MAX_SEGS) 111 112 #ifdef CTLBLK_DEBUG 113 #define DPRINTF(fmt, args...) \ 114 printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 115 #else 116 #define DPRINTF(fmt, args...) do {} while(0) 117 #endif 118 119 #define PRIV(io) \ 120 ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND]) 121 #define ARGS(io) \ 122 ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]) 123 124 SDT_PROVIDER_DEFINE(cbb); 125 126 typedef enum { 127 CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01, 128 CTL_BE_BLOCK_LUN_WAITING = 0x04, 129 } ctl_be_block_lun_flags; 130 131 typedef enum { 132 CTL_BE_BLOCK_NONE, 133 CTL_BE_BLOCK_DEV, 134 CTL_BE_BLOCK_FILE 135 } ctl_be_block_type; 136 137 struct ctl_be_block_filedata { 138 struct ucred *cred; 139 }; 140 141 union ctl_be_block_bedata { 142 struct ctl_be_block_filedata file; 143 }; 144 145 struct ctl_be_block_io; 146 struct ctl_be_block_lun; 147 148 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun, 149 struct ctl_be_block_io *beio); 150 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun, 151 const char *attrname); 152 153 /* 154 * Backend LUN structure. There is a 1:1 mapping between a block device 155 * and a backend block LUN, and between a backend block LUN and a CTL LUN. 156 */ 157 struct ctl_be_block_lun { 158 struct ctl_be_lun cbe_lun; /* Must be first element. */ 159 struct ctl_lun_create_params params; 160 char *dev_path; 161 ctl_be_block_type dev_type; 162 struct vnode *vn; 163 union ctl_be_block_bedata backend; 164 cbb_dispatch_t dispatch; 165 cbb_dispatch_t lun_flush; 166 cbb_dispatch_t unmap; 167 cbb_dispatch_t get_lba_status; 168 cbb_getattr_t getattr; 169 uint64_t size_blocks; 170 uint64_t size_bytes; 171 struct ctl_be_block_softc *softc; 172 struct devstat *disk_stats; 173 ctl_be_block_lun_flags flags; 174 SLIST_ENTRY(ctl_be_block_lun) links; 175 struct taskqueue *io_taskqueue; 176 struct task io_task; 177 int num_threads; 178 STAILQ_HEAD(, ctl_io_hdr) input_queue; 179 STAILQ_HEAD(, ctl_io_hdr) config_read_queue; 180 STAILQ_HEAD(, ctl_io_hdr) config_write_queue; 181 STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 182 struct mtx_padalign io_lock; 183 struct mtx_padalign queue_lock; 184 }; 185 186 /* 187 * Overall softc structure for the block backend module. 188 */ 189 struct ctl_be_block_softc { 190 struct sx modify_lock; 191 struct mtx lock; 192 int num_luns; 193 SLIST_HEAD(, ctl_be_block_lun) lun_list; 194 uma_zone_t beio_zone; 195 uma_zone_t bufmin_zone; 196 uma_zone_t bufmax_zone; 197 }; 198 199 static struct ctl_be_block_softc backend_block_softc; 200 201 /* 202 * Per-I/O information. 203 */ 204 struct ctl_be_block_io { 205 union ctl_io *io; 206 struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS]; 207 struct iovec xiovecs[CTLBLK_MAX_SEGS]; 208 int refcnt; 209 int bio_cmd; 210 int two_sglists; 211 int num_segs; 212 int num_bios_sent; 213 int num_bios_done; 214 int send_complete; 215 int first_error; 216 uint64_t first_error_offset; 217 struct bintime ds_t0; 218 devstat_tag_type ds_tag_type; 219 devstat_trans_flags ds_trans_type; 220 uint64_t io_len; 221 uint64_t io_offset; 222 int io_arg; 223 struct ctl_be_block_softc *softc; 224 struct ctl_be_block_lun *lun; 225 void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */ 226 }; 227 228 extern struct ctl_softc *control_softc; 229 230 static int cbb_num_threads = 32; 231 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 232 "CAM Target Layer Block Backend"); 233 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN, 234 &cbb_num_threads, 0, "Number of threads per backing file"); 235 236 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc); 237 static void ctl_free_beio(struct ctl_be_block_io *beio); 238 static void ctl_complete_beio(struct ctl_be_block_io *beio); 239 static int ctl_be_block_move_done(union ctl_io *io, bool samethr); 240 static void ctl_be_block_biodone(struct bio *bio); 241 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 242 struct ctl_be_block_io *beio); 243 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 244 struct ctl_be_block_io *beio); 245 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 246 struct ctl_be_block_io *beio); 247 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, 248 const char *attrname); 249 static void ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun, 250 struct ctl_be_block_io *beio); 251 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 252 struct ctl_be_block_io *beio); 253 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 254 struct ctl_be_block_io *beio); 255 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 256 struct ctl_be_block_io *beio); 257 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, 258 const char *attrname); 259 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 260 union ctl_io *io); 261 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 262 union ctl_io *io); 263 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 264 union ctl_io *io); 265 static void ctl_be_block_worker(void *context, int pending); 266 static int ctl_be_block_submit(union ctl_io *io); 267 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 268 int flag, struct thread *td); 269 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, 270 struct ctl_lun_req *req); 271 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, 272 struct ctl_lun_req *req); 273 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun); 274 static int ctl_be_block_open(struct ctl_be_block_lun *be_lun, 275 struct ctl_lun_req *req); 276 static int ctl_be_block_create(struct ctl_be_block_softc *softc, 277 struct ctl_lun_req *req); 278 static int ctl_be_block_rm(struct ctl_be_block_softc *softc, 279 struct ctl_lun_req *req); 280 static int ctl_be_block_modify(struct ctl_be_block_softc *softc, 281 struct ctl_lun_req *req); 282 static void ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun); 283 static int ctl_be_block_config_write(union ctl_io *io); 284 static int ctl_be_block_config_read(union ctl_io *io); 285 static int ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb); 286 static uint64_t ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname); 287 static int ctl_be_block_init(void); 288 static int ctl_be_block_shutdown(void); 289 290 static struct ctl_backend_driver ctl_be_block_driver = 291 { 292 .name = "block", 293 .flags = CTL_BE_FLAG_HAS_CONFIG, 294 .init = ctl_be_block_init, 295 .shutdown = ctl_be_block_shutdown, 296 .data_submit = ctl_be_block_submit, 297 .config_read = ctl_be_block_config_read, 298 .config_write = ctl_be_block_config_write, 299 .ioctl = ctl_be_block_ioctl, 300 .lun_info = ctl_be_block_lun_info, 301 .lun_attr = ctl_be_block_lun_attr 302 }; 303 304 MALLOC_DEFINE(M_CTLBLK, "ctlblock", "Memory used for CTL block backend"); 305 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver); 306 307 static void 308 ctl_alloc_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg, 309 size_t len) 310 { 311 312 if (len <= CTLBLK_MIN_SEG) { 313 sg->addr = uma_zalloc(softc->bufmin_zone, M_WAITOK); 314 } else { 315 KASSERT(len <= CTLBLK_MAX_SEG, 316 ("Too large alloc %zu > %lu", len, CTLBLK_MAX_SEG)); 317 sg->addr = uma_zalloc(softc->bufmax_zone, M_WAITOK); 318 } 319 sg->len = len; 320 } 321 322 static void 323 ctl_free_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg) 324 { 325 326 if (sg->len <= CTLBLK_MIN_SEG) { 327 uma_zfree(softc->bufmin_zone, sg->addr); 328 } else { 329 KASSERT(sg->len <= CTLBLK_MAX_SEG, 330 ("Too large free %zu > %lu", sg->len, CTLBLK_MAX_SEG)); 331 uma_zfree(softc->bufmax_zone, sg->addr); 332 } 333 } 334 335 static struct ctl_be_block_io * 336 ctl_alloc_beio(struct ctl_be_block_softc *softc) 337 { 338 struct ctl_be_block_io *beio; 339 340 beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO); 341 beio->softc = softc; 342 beio->refcnt = 1; 343 return (beio); 344 } 345 346 static void 347 ctl_real_free_beio(struct ctl_be_block_io *beio) 348 { 349 struct ctl_be_block_softc *softc = beio->softc; 350 int i; 351 352 for (i = 0; i < beio->num_segs; i++) { 353 ctl_free_seg(softc, &beio->sg_segs[i]); 354 355 /* For compare we had two equal S/G lists. */ 356 if (beio->two_sglists) { 357 ctl_free_seg(softc, 358 &beio->sg_segs[i + CTLBLK_HALF_SEGS]); 359 } 360 } 361 362 uma_zfree(softc->beio_zone, beio); 363 } 364 365 static void 366 ctl_refcnt_beio(void *arg, int diff) 367 { 368 struct ctl_be_block_io *beio = arg; 369 370 if (atomic_fetchadd_int(&beio->refcnt, diff) + diff == 0) 371 ctl_real_free_beio(beio); 372 } 373 374 static void 375 ctl_free_beio(struct ctl_be_block_io *beio) 376 { 377 378 ctl_refcnt_beio(beio, -1); 379 } 380 381 static void 382 ctl_complete_beio(struct ctl_be_block_io *beio) 383 { 384 union ctl_io *io = beio->io; 385 386 if (beio->beio_cont != NULL) { 387 beio->beio_cont(beio); 388 } else { 389 ctl_free_beio(beio); 390 ctl_data_submit_done(io); 391 } 392 } 393 394 static size_t 395 cmp(uint8_t *a, uint8_t *b, size_t size) 396 { 397 size_t i; 398 399 for (i = 0; i < size; i++) { 400 if (a[i] != b[i]) 401 break; 402 } 403 return (i); 404 } 405 406 static void 407 ctl_be_block_compare(union ctl_io *io) 408 { 409 struct ctl_be_block_io *beio; 410 uint64_t off, res; 411 int i; 412 uint8_t info[8]; 413 414 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 415 off = 0; 416 for (i = 0; i < beio->num_segs; i++) { 417 res = cmp(beio->sg_segs[i].addr, 418 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr, 419 beio->sg_segs[i].len); 420 off += res; 421 if (res < beio->sg_segs[i].len) 422 break; 423 } 424 if (i < beio->num_segs) { 425 scsi_u64to8b(off, info); 426 ctl_set_sense(&io->scsiio, /*current_error*/ 1, 427 /*sense_key*/ SSD_KEY_MISCOMPARE, 428 /*asc*/ 0x1D, /*ascq*/ 0x00, 429 /*type*/ SSD_ELEM_INFO, 430 /*size*/ sizeof(info), /*data*/ &info, 431 /*type*/ SSD_ELEM_NONE); 432 } else 433 ctl_set_success(&io->scsiio); 434 } 435 436 static int 437 ctl_be_block_move_done(union ctl_io *io, bool samethr) 438 { 439 struct ctl_be_block_io *beio; 440 struct ctl_be_block_lun *be_lun; 441 struct ctl_lba_len_flags *lbalen; 442 443 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 444 445 DPRINTF("entered\n"); 446 io->scsiio.kern_rel_offset += io->scsiio.kern_data_len; 447 448 /* 449 * We set status at this point for read and compare commands. 450 */ 451 if ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0 && 452 (io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE) { 453 lbalen = ARGS(io); 454 if (lbalen->flags & CTL_LLF_READ) { 455 ctl_set_success(&io->scsiio); 456 } else if (lbalen->flags & CTL_LLF_COMPARE) { 457 /* We have two data blocks ready for comparison. */ 458 ctl_be_block_compare(io); 459 } 460 } 461 462 /* 463 * If this is a read, or a write with errors, it is done. 464 */ 465 if ((beio->bio_cmd == BIO_READ) 466 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0) 467 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) { 468 ctl_complete_beio(beio); 469 return (0); 470 } 471 472 /* 473 * At this point, we have a write and the DMA completed successfully. 474 * If we were called synchronously in the original thread then just 475 * dispatch, otherwise we now have to queue it to the task queue to 476 * execute the backend I/O. That is because we do blocking 477 * memory allocations, and in the file backing case, blocking I/O. 478 * This move done routine is generally called in the SIM's 479 * interrupt context, and therefore we cannot block. 480 */ 481 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 482 if (samethr) { 483 be_lun->dispatch(be_lun, beio); 484 } else { 485 mtx_lock(&be_lun->queue_lock); 486 STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links); 487 mtx_unlock(&be_lun->queue_lock); 488 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 489 } 490 return (0); 491 } 492 493 static void 494 ctl_be_block_biodone(struct bio *bio) 495 { 496 struct ctl_be_block_io *beio = bio->bio_caller1; 497 struct ctl_be_block_lun *be_lun = beio->lun; 498 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 499 union ctl_io *io; 500 int error; 501 502 io = beio->io; 503 504 DPRINTF("entered\n"); 505 506 error = bio->bio_error; 507 mtx_lock(&be_lun->io_lock); 508 if (error != 0 && 509 (beio->first_error == 0 || 510 bio->bio_offset < beio->first_error_offset)) { 511 beio->first_error = error; 512 beio->first_error_offset = bio->bio_offset; 513 } 514 515 beio->num_bios_done++; 516 517 /* 518 * XXX KDM will this cause WITNESS to complain? Holding a lock 519 * during the free might cause it to complain. 520 */ 521 g_destroy_bio(bio); 522 523 /* 524 * If the send complete bit isn't set, or we aren't the last I/O to 525 * complete, then we're done. 526 */ 527 if ((beio->send_complete == 0) 528 || (beio->num_bios_done < beio->num_bios_sent)) { 529 mtx_unlock(&be_lun->io_lock); 530 return; 531 } 532 533 /* 534 * At this point, we've verified that we are the last I/O to 535 * complete, so it's safe to drop the lock. 536 */ 537 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 538 beio->ds_tag_type, beio->ds_trans_type, 539 /*now*/ NULL, /*then*/&beio->ds_t0); 540 mtx_unlock(&be_lun->io_lock); 541 542 /* 543 * If there are any errors from the backing device, we fail the 544 * entire I/O with a medium error. 545 */ 546 error = beio->first_error; 547 if (error != 0) { 548 if (error == EOPNOTSUPP) { 549 ctl_set_invalid_opcode(&io->scsiio); 550 } else if (error == ENOSPC || error == EDQUOT) { 551 ctl_set_space_alloc_fail(&io->scsiio); 552 } else if (error == EROFS || error == EACCES) { 553 ctl_set_hw_write_protected(&io->scsiio); 554 } else if (beio->bio_cmd == BIO_FLUSH) { 555 /* XXX KDM is there is a better error here? */ 556 ctl_set_internal_failure(&io->scsiio, 557 /*sks_valid*/ 1, 558 /*retry_count*/ 0xbad2); 559 } else { 560 ctl_set_medium_error(&io->scsiio, 561 beio->bio_cmd == BIO_READ); 562 } 563 ctl_complete_beio(beio); 564 return; 565 } 566 567 /* 568 * If this is a write, a flush, a delete or verify, we're all done. 569 * If this is a read, we can now send the data to the user. 570 */ 571 if ((beio->bio_cmd == BIO_WRITE) 572 || (beio->bio_cmd == BIO_FLUSH) 573 || (beio->bio_cmd == BIO_DELETE) 574 || (ARGS(io)->flags & CTL_LLF_VERIFY)) { 575 ctl_set_success(&io->scsiio); 576 ctl_complete_beio(beio); 577 } else { 578 if ((ARGS(io)->flags & CTL_LLF_READ) && 579 beio->beio_cont == NULL) { 580 ctl_set_success(&io->scsiio); 581 if (cbe_lun->serseq >= CTL_LUN_SERSEQ_SOFT) 582 ctl_serseq_done(io); 583 } 584 ctl_datamove(io); 585 } 586 } 587 588 static void 589 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 590 struct ctl_be_block_io *beio) 591 { 592 union ctl_io *io = beio->io; 593 struct mount *mountpoint; 594 int error; 595 596 DPRINTF("entered\n"); 597 598 binuptime(&beio->ds_t0); 599 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 600 601 (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 602 603 vn_lock(be_lun->vn, vn_lktype_write(mountpoint, be_lun->vn) | 604 LK_RETRY); 605 error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT, 606 curthread); 607 VOP_UNLOCK(be_lun->vn); 608 609 vn_finished_write(mountpoint); 610 611 mtx_lock(&be_lun->io_lock); 612 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 613 beio->ds_tag_type, beio->ds_trans_type, 614 /*now*/ NULL, /*then*/&beio->ds_t0); 615 mtx_unlock(&be_lun->io_lock); 616 617 if (error == 0) 618 ctl_set_success(&io->scsiio); 619 else { 620 /* XXX KDM is there is a better error here? */ 621 ctl_set_internal_failure(&io->scsiio, 622 /*sks_valid*/ 1, 623 /*retry_count*/ 0xbad1); 624 } 625 626 ctl_complete_beio(beio); 627 } 628 629 SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t"); 630 SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t"); 631 SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t"); 632 SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t"); 633 634 static void 635 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 636 struct ctl_be_block_io *beio) 637 { 638 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 639 struct ctl_be_block_filedata *file_data; 640 union ctl_io *io; 641 struct uio xuio; 642 struct iovec *xiovec; 643 size_t s; 644 int error, flags, i; 645 646 DPRINTF("entered\n"); 647 648 file_data = &be_lun->backend.file; 649 io = beio->io; 650 flags = 0; 651 if (ARGS(io)->flags & CTL_LLF_DPO) 652 flags |= IO_DIRECT; 653 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 654 flags |= IO_SYNC; 655 656 bzero(&xuio, sizeof(xuio)); 657 if (beio->bio_cmd == BIO_READ) { 658 SDT_PROBE0(cbb, , read, file_start); 659 xuio.uio_rw = UIO_READ; 660 } else { 661 SDT_PROBE0(cbb, , write, file_start); 662 xuio.uio_rw = UIO_WRITE; 663 } 664 xuio.uio_offset = beio->io_offset; 665 xuio.uio_resid = beio->io_len; 666 xuio.uio_segflg = UIO_SYSSPACE; 667 xuio.uio_iov = beio->xiovecs; 668 xuio.uio_iovcnt = beio->num_segs; 669 xuio.uio_td = curthread; 670 671 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 672 xiovec->iov_base = beio->sg_segs[i].addr; 673 xiovec->iov_len = beio->sg_segs[i].len; 674 } 675 676 binuptime(&beio->ds_t0); 677 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 678 679 if (beio->bio_cmd == BIO_READ) { 680 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 681 682 if (beio->beio_cont == NULL && 683 cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT) 684 ctl_serseq_done(io); 685 /* 686 * UFS pays attention to IO_DIRECT for reads. If the 687 * DIRECTIO option is configured into the kernel, it calls 688 * ffs_rawread(). But that only works for single-segment 689 * uios with user space addresses. In our case, with a 690 * kernel uio, it still reads into the buffer cache, but it 691 * will just try to release the buffer from the cache later 692 * on in ffs_read(). 693 * 694 * ZFS does not pay attention to IO_DIRECT for reads. 695 * 696 * UFS does not pay attention to IO_SYNC for reads. 697 * 698 * ZFS pays attention to IO_SYNC (which translates into the 699 * Solaris define FRSYNC for zfs_read()) for reads. It 700 * attempts to sync the file before reading. 701 */ 702 error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred); 703 704 VOP_UNLOCK(be_lun->vn); 705 SDT_PROBE0(cbb, , read, file_done); 706 if (error == 0 && xuio.uio_resid > 0) { 707 /* 708 * If we red less then requested (EOF), then 709 * we should clean the rest of the buffer. 710 */ 711 s = beio->io_len - xuio.uio_resid; 712 for (i = 0; i < beio->num_segs; i++) { 713 if (s >= beio->sg_segs[i].len) { 714 s -= beio->sg_segs[i].len; 715 continue; 716 } 717 bzero((uint8_t *)beio->sg_segs[i].addr + s, 718 beio->sg_segs[i].len - s); 719 s = 0; 720 } 721 } 722 } else { 723 struct mount *mountpoint; 724 725 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 726 vn_lock(be_lun->vn, vn_lktype_write(mountpoint, 727 be_lun->vn) | LK_RETRY); 728 729 /* 730 * UFS pays attention to IO_DIRECT for writes. The write 731 * is done asynchronously. (Normally the write would just 732 * get put into cache. 733 * 734 * UFS pays attention to IO_SYNC for writes. It will 735 * attempt to write the buffer out synchronously if that 736 * flag is set. 737 * 738 * ZFS does not pay attention to IO_DIRECT for writes. 739 * 740 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) 741 * for writes. It will flush the transaction from the 742 * cache before returning. 743 */ 744 error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred); 745 VOP_UNLOCK(be_lun->vn); 746 747 vn_finished_write(mountpoint); 748 SDT_PROBE0(cbb, , write, file_done); 749 } 750 751 mtx_lock(&be_lun->io_lock); 752 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 753 beio->ds_tag_type, beio->ds_trans_type, 754 /*now*/ NULL, /*then*/&beio->ds_t0); 755 mtx_unlock(&be_lun->io_lock); 756 757 /* 758 * If we got an error, set the sense data to "MEDIUM ERROR" and 759 * return the I/O to the user. 760 */ 761 if (error != 0) { 762 if (error == ENOSPC || error == EDQUOT) { 763 ctl_set_space_alloc_fail(&io->scsiio); 764 } else if (error == EROFS || error == EACCES) { 765 ctl_set_hw_write_protected(&io->scsiio); 766 } else { 767 ctl_set_medium_error(&io->scsiio, 768 beio->bio_cmd == BIO_READ); 769 } 770 ctl_complete_beio(beio); 771 return; 772 } 773 774 /* 775 * If this is a write or a verify, we're all done. 776 * If this is a read, we can now send the data to the user. 777 */ 778 if ((beio->bio_cmd == BIO_WRITE) || 779 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 780 ctl_set_success(&io->scsiio); 781 ctl_complete_beio(beio); 782 } else { 783 if ((ARGS(io)->flags & CTL_LLF_READ) && 784 beio->beio_cont == NULL) { 785 ctl_set_success(&io->scsiio); 786 if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT) 787 ctl_serseq_done(io); 788 } 789 ctl_datamove(io); 790 } 791 } 792 793 static void 794 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 795 struct ctl_be_block_io *beio) 796 { 797 union ctl_io *io = beio->io; 798 struct ctl_lba_len_flags *lbalen = ARGS(io); 799 struct scsi_get_lba_status_data *data; 800 off_t roff, off; 801 int error, status; 802 803 DPRINTF("entered\n"); 804 805 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 806 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 807 error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off, 808 0, curthread->td_ucred, curthread); 809 if (error == 0 && off > roff) 810 status = 0; /* mapped up to off */ 811 else { 812 error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off, 813 0, curthread->td_ucred, curthread); 814 if (error == 0 && off > roff) 815 status = 1; /* deallocated up to off */ 816 else { 817 status = 0; /* unknown up to the end */ 818 off = be_lun->size_bytes; 819 } 820 } 821 VOP_UNLOCK(be_lun->vn); 822 823 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 824 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 825 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 826 lbalen->lba), data->descr[0].length); 827 data->descr[0].status = status; 828 829 ctl_complete_beio(beio); 830 } 831 832 static uint64_t 833 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname) 834 { 835 struct vattr vattr; 836 struct statfs statfs; 837 uint64_t val; 838 int error; 839 840 val = UINT64_MAX; 841 if (be_lun->vn == NULL) 842 return (val); 843 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 844 if (strcmp(attrname, "blocksused") == 0) { 845 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 846 if (error == 0) 847 val = vattr.va_bytes / be_lun->cbe_lun.blocksize; 848 } 849 if (strcmp(attrname, "blocksavail") == 0 && 850 !VN_IS_DOOMED(be_lun->vn)) { 851 error = VFS_STATFS(be_lun->vn->v_mount, &statfs); 852 if (error == 0) 853 val = statfs.f_bavail * statfs.f_bsize / 854 be_lun->cbe_lun.blocksize; 855 } 856 VOP_UNLOCK(be_lun->vn); 857 return (val); 858 } 859 860 static void 861 ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun, 862 struct ctl_be_block_io *beio) 863 { 864 struct ctl_be_block_filedata *file_data; 865 union ctl_io *io; 866 struct ctl_ptr_len_flags *ptrlen; 867 struct scsi_unmap_desc *buf, *end; 868 struct mount *mp; 869 off_t off, len; 870 int error; 871 872 io = beio->io; 873 file_data = &be_lun->backend.file; 874 mp = NULL; 875 error = 0; 876 877 binuptime(&beio->ds_t0); 878 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 879 880 (void)vn_start_write(be_lun->vn, &mp, V_WAIT); 881 vn_lock(be_lun->vn, vn_lktype_write(mp, be_lun->vn) | LK_RETRY); 882 if (beio->io_offset == -1) { 883 beio->io_len = 0; 884 ptrlen = (struct ctl_ptr_len_flags *) 885 &io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 886 buf = (struct scsi_unmap_desc *)ptrlen->ptr; 887 end = buf + ptrlen->len / sizeof(*buf); 888 for (; buf < end; buf++) { 889 off = (off_t)scsi_8btou64(buf->lba) * 890 be_lun->cbe_lun.blocksize; 891 len = (off_t)scsi_4btoul(buf->length) * 892 be_lun->cbe_lun.blocksize; 893 beio->io_len += len; 894 error = vn_deallocate(be_lun->vn, &off, &len, 895 0, IO_NOMACCHECK | IO_NODELOCKED, file_data->cred, 896 NOCRED); 897 if (error != 0) 898 break; 899 } 900 } else { 901 /* WRITE_SAME */ 902 off = beio->io_offset; 903 len = beio->io_len; 904 error = vn_deallocate(be_lun->vn, &off, &len, 0, 905 IO_NOMACCHECK | IO_NODELOCKED, file_data->cred, NOCRED); 906 } 907 VOP_UNLOCK(be_lun->vn); 908 vn_finished_write(mp); 909 910 mtx_lock(&be_lun->io_lock); 911 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 912 beio->ds_tag_type, beio->ds_trans_type, 913 /*now*/ NULL, /*then*/&beio->ds_t0); 914 mtx_unlock(&be_lun->io_lock); 915 916 /* 917 * If we got an error, set the sense data to "MEDIUM ERROR" and 918 * return the I/O to the user. 919 */ 920 switch (error) { 921 case 0: 922 ctl_set_success(&io->scsiio); 923 break; 924 case ENOSPC: 925 case EDQUOT: 926 ctl_set_space_alloc_fail(&io->scsiio); 927 break; 928 case EROFS: 929 case EACCES: 930 ctl_set_hw_write_protected(&io->scsiio); 931 break; 932 default: 933 ctl_set_medium_error(&io->scsiio, false); 934 } 935 ctl_complete_beio(beio); 936 } 937 938 static void 939 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun, 940 struct ctl_be_block_io *beio) 941 { 942 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 943 union ctl_io *io; 944 struct cdevsw *csw; 945 struct cdev *dev; 946 struct uio xuio; 947 struct iovec *xiovec; 948 int error, flags, i, ref; 949 950 DPRINTF("entered\n"); 951 952 io = beio->io; 953 flags = 0; 954 if (ARGS(io)->flags & CTL_LLF_DPO) 955 flags |= IO_DIRECT; 956 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 957 flags |= IO_SYNC; 958 959 bzero(&xuio, sizeof(xuio)); 960 if (beio->bio_cmd == BIO_READ) { 961 SDT_PROBE0(cbb, , read, file_start); 962 xuio.uio_rw = UIO_READ; 963 } else { 964 SDT_PROBE0(cbb, , write, file_start); 965 xuio.uio_rw = UIO_WRITE; 966 } 967 xuio.uio_offset = beio->io_offset; 968 xuio.uio_resid = beio->io_len; 969 xuio.uio_segflg = UIO_SYSSPACE; 970 xuio.uio_iov = beio->xiovecs; 971 xuio.uio_iovcnt = beio->num_segs; 972 xuio.uio_td = curthread; 973 974 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 975 xiovec->iov_base = beio->sg_segs[i].addr; 976 xiovec->iov_len = beio->sg_segs[i].len; 977 } 978 979 binuptime(&beio->ds_t0); 980 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 981 982 csw = devvn_refthread(be_lun->vn, &dev, &ref); 983 if (csw) { 984 if (beio->bio_cmd == BIO_READ) { 985 if (beio->beio_cont == NULL && 986 cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT) 987 ctl_serseq_done(io); 988 error = csw->d_read(dev, &xuio, flags); 989 } else 990 error = csw->d_write(dev, &xuio, flags); 991 dev_relthread(dev, ref); 992 } else 993 error = ENXIO; 994 995 if (beio->bio_cmd == BIO_READ) 996 SDT_PROBE0(cbb, , read, file_done); 997 else 998 SDT_PROBE0(cbb, , write, file_done); 999 1000 mtx_lock(&be_lun->io_lock); 1001 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 1002 beio->ds_tag_type, beio->ds_trans_type, 1003 /*now*/ NULL, /*then*/&beio->ds_t0); 1004 mtx_unlock(&be_lun->io_lock); 1005 1006 /* 1007 * If we got an error, set the sense data to "MEDIUM ERROR" and 1008 * return the I/O to the user. 1009 */ 1010 if (error != 0) { 1011 if (error == ENOSPC || error == EDQUOT) { 1012 ctl_set_space_alloc_fail(&io->scsiio); 1013 } else if (error == EROFS || error == EACCES) { 1014 ctl_set_hw_write_protected(&io->scsiio); 1015 } else { 1016 ctl_set_medium_error(&io->scsiio, 1017 beio->bio_cmd == BIO_READ); 1018 } 1019 ctl_complete_beio(beio); 1020 return; 1021 } 1022 1023 /* 1024 * If this is a write or a verify, we're all done. 1025 * If this is a read, we can now send the data to the user. 1026 */ 1027 if ((beio->bio_cmd == BIO_WRITE) || 1028 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 1029 ctl_set_success(&io->scsiio); 1030 ctl_complete_beio(beio); 1031 } else { 1032 if ((ARGS(io)->flags & CTL_LLF_READ) && 1033 beio->beio_cont == NULL) { 1034 ctl_set_success(&io->scsiio); 1035 if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT) 1036 ctl_serseq_done(io); 1037 } 1038 ctl_datamove(io); 1039 } 1040 } 1041 1042 static void 1043 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun, 1044 struct ctl_be_block_io *beio) 1045 { 1046 union ctl_io *io = beio->io; 1047 struct cdevsw *csw; 1048 struct cdev *dev; 1049 struct ctl_lba_len_flags *lbalen = ARGS(io); 1050 struct scsi_get_lba_status_data *data; 1051 off_t roff, off; 1052 int error, ref, status; 1053 1054 DPRINTF("entered\n"); 1055 1056 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1057 if (csw == NULL) { 1058 status = 0; /* unknown up to the end */ 1059 off = be_lun->size_bytes; 1060 goto done; 1061 } 1062 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 1063 error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD, 1064 curthread); 1065 if (error == 0 && off > roff) 1066 status = 0; /* mapped up to off */ 1067 else { 1068 error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD, 1069 curthread); 1070 if (error == 0 && off > roff) 1071 status = 1; /* deallocated up to off */ 1072 else { 1073 status = 0; /* unknown up to the end */ 1074 off = be_lun->size_bytes; 1075 } 1076 } 1077 dev_relthread(dev, ref); 1078 1079 done: 1080 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 1081 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 1082 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 1083 lbalen->lba), data->descr[0].length); 1084 data->descr[0].status = status; 1085 1086 ctl_complete_beio(beio); 1087 } 1088 1089 static void 1090 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 1091 struct ctl_be_block_io *beio) 1092 { 1093 struct bio *bio; 1094 struct cdevsw *csw; 1095 struct cdev *dev; 1096 int ref; 1097 1098 DPRINTF("entered\n"); 1099 1100 /* This can't fail, it's a blocking allocation. */ 1101 bio = g_alloc_bio(); 1102 1103 bio->bio_cmd = BIO_FLUSH; 1104 bio->bio_offset = 0; 1105 bio->bio_data = 0; 1106 bio->bio_done = ctl_be_block_biodone; 1107 bio->bio_caller1 = beio; 1108 bio->bio_pblkno = 0; 1109 1110 /* 1111 * We don't need to acquire the LUN lock here, because we are only 1112 * sending one bio, and so there is no other context to synchronize 1113 * with. 1114 */ 1115 beio->num_bios_sent = 1; 1116 beio->send_complete = 1; 1117 1118 binuptime(&beio->ds_t0); 1119 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1120 1121 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1122 if (csw) { 1123 bio->bio_dev = dev; 1124 csw->d_strategy(bio); 1125 dev_relthread(dev, ref); 1126 } else { 1127 bio->bio_error = ENXIO; 1128 ctl_be_block_biodone(bio); 1129 } 1130 } 1131 1132 static void 1133 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun, 1134 struct ctl_be_block_io *beio, 1135 uint64_t off, uint64_t len, int last) 1136 { 1137 struct bio *bio; 1138 uint64_t maxlen; 1139 struct cdevsw *csw; 1140 struct cdev *dev; 1141 int ref; 1142 1143 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1144 maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize); 1145 while (len > 0) { 1146 bio = g_alloc_bio(); 1147 bio->bio_cmd = BIO_DELETE; 1148 bio->bio_dev = dev; 1149 bio->bio_offset = off; 1150 bio->bio_length = MIN(len, maxlen); 1151 bio->bio_data = 0; 1152 bio->bio_done = ctl_be_block_biodone; 1153 bio->bio_caller1 = beio; 1154 bio->bio_pblkno = off / be_lun->cbe_lun.blocksize; 1155 1156 off += bio->bio_length; 1157 len -= bio->bio_length; 1158 1159 mtx_lock(&be_lun->io_lock); 1160 beio->num_bios_sent++; 1161 if (last && len == 0) 1162 beio->send_complete = 1; 1163 mtx_unlock(&be_lun->io_lock); 1164 1165 if (csw) { 1166 csw->d_strategy(bio); 1167 } else { 1168 bio->bio_error = ENXIO; 1169 ctl_be_block_biodone(bio); 1170 } 1171 } 1172 if (csw) 1173 dev_relthread(dev, ref); 1174 } 1175 1176 static void 1177 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 1178 struct ctl_be_block_io *beio) 1179 { 1180 union ctl_io *io; 1181 struct ctl_ptr_len_flags *ptrlen; 1182 struct scsi_unmap_desc *buf, *end; 1183 uint64_t len; 1184 1185 io = beio->io; 1186 1187 DPRINTF("entered\n"); 1188 1189 binuptime(&beio->ds_t0); 1190 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1191 1192 if (beio->io_offset == -1) { 1193 beio->io_len = 0; 1194 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1195 buf = (struct scsi_unmap_desc *)ptrlen->ptr; 1196 end = buf + ptrlen->len / sizeof(*buf); 1197 for (; buf < end; buf++) { 1198 len = (uint64_t)scsi_4btoul(buf->length) * 1199 be_lun->cbe_lun.blocksize; 1200 beio->io_len += len; 1201 ctl_be_block_unmap_dev_range(be_lun, beio, 1202 scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize, 1203 len, (end - buf < 2) ? TRUE : FALSE); 1204 } 1205 } else 1206 ctl_be_block_unmap_dev_range(be_lun, beio, 1207 beio->io_offset, beio->io_len, TRUE); 1208 } 1209 1210 static void 1211 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 1212 struct ctl_be_block_io *beio) 1213 { 1214 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 1215 struct bio *bio; 1216 struct cdevsw *csw; 1217 struct cdev *dev; 1218 off_t cur_offset; 1219 int i, max_iosize, ref; 1220 1221 DPRINTF("entered\n"); 1222 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1223 1224 /* 1225 * We have to limit our I/O size to the maximum supported by the 1226 * backend device. 1227 */ 1228 if (csw) { 1229 max_iosize = dev->si_iosize_max; 1230 if (max_iosize <= 0) 1231 max_iosize = DFLTPHYS; 1232 } else 1233 max_iosize = maxphys; 1234 1235 cur_offset = beio->io_offset; 1236 for (i = 0; i < beio->num_segs; i++) { 1237 size_t cur_size; 1238 uint8_t *cur_ptr; 1239 1240 cur_size = beio->sg_segs[i].len; 1241 cur_ptr = beio->sg_segs[i].addr; 1242 1243 while (cur_size > 0) { 1244 /* This can't fail, it's a blocking allocation. */ 1245 bio = g_alloc_bio(); 1246 1247 KASSERT(bio != NULL, ("g_alloc_bio() failed!\n")); 1248 1249 bio->bio_cmd = beio->bio_cmd; 1250 bio->bio_dev = dev; 1251 bio->bio_caller1 = beio; 1252 bio->bio_length = min(cur_size, max_iosize); 1253 bio->bio_offset = cur_offset; 1254 bio->bio_data = cur_ptr; 1255 bio->bio_done = ctl_be_block_biodone; 1256 bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize; 1257 1258 cur_offset += bio->bio_length; 1259 cur_ptr += bio->bio_length; 1260 cur_size -= bio->bio_length; 1261 1262 TAILQ_INSERT_TAIL(&queue, bio, bio_queue); 1263 beio->num_bios_sent++; 1264 } 1265 } 1266 beio->send_complete = 1; 1267 binuptime(&beio->ds_t0); 1268 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1269 1270 /* 1271 * Fire off all allocated requests! 1272 */ 1273 while ((bio = TAILQ_FIRST(&queue)) != NULL) { 1274 TAILQ_REMOVE(&queue, bio, bio_queue); 1275 if (csw) 1276 csw->d_strategy(bio); 1277 else { 1278 bio->bio_error = ENXIO; 1279 ctl_be_block_biodone(bio); 1280 } 1281 } 1282 if (csw) 1283 dev_relthread(dev, ref); 1284 } 1285 1286 static uint64_t 1287 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname) 1288 { 1289 struct diocgattr_arg arg; 1290 struct cdevsw *csw; 1291 struct cdev *dev; 1292 int error, ref; 1293 1294 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1295 if (csw == NULL) 1296 return (UINT64_MAX); 1297 strlcpy(arg.name, attrname, sizeof(arg.name)); 1298 arg.len = sizeof(arg.value.off); 1299 if (csw->d_ioctl) { 1300 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 1301 curthread); 1302 } else 1303 error = ENODEV; 1304 dev_relthread(dev, ref); 1305 if (error != 0) 1306 return (UINT64_MAX); 1307 return (arg.value.off); 1308 } 1309 1310 static void 1311 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun, 1312 union ctl_io *io) 1313 { 1314 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1315 struct ctl_be_block_io *beio; 1316 struct ctl_lba_len_flags *lbalen; 1317 1318 DPRINTF("entered\n"); 1319 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1320 lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1321 1322 beio->io_len = lbalen->len * cbe_lun->blocksize; 1323 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1324 beio->io_arg = (lbalen->flags & SSC_IMMED) != 0; 1325 beio->bio_cmd = BIO_FLUSH; 1326 beio->ds_trans_type = DEVSTAT_NO_DATA; 1327 DPRINTF("SYNC\n"); 1328 be_lun->lun_flush(be_lun, beio); 1329 } 1330 1331 static void 1332 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio) 1333 { 1334 union ctl_io *io; 1335 1336 io = beio->io; 1337 ctl_free_beio(beio); 1338 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1339 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1340 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1341 ctl_config_write_done(io); 1342 return; 1343 } 1344 1345 ctl_be_block_config_write(io); 1346 } 1347 1348 static void 1349 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun, 1350 union ctl_io *io) 1351 { 1352 struct ctl_be_block_softc *softc = be_lun->softc; 1353 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1354 struct ctl_be_block_io *beio; 1355 struct ctl_lba_len_flags *lbalen; 1356 uint64_t len_left, lba; 1357 uint32_t pb, pbo, adj; 1358 int i, seglen; 1359 uint8_t *buf, *end; 1360 1361 DPRINTF("entered\n"); 1362 1363 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1364 lbalen = ARGS(io); 1365 1366 if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) || 1367 (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) { 1368 ctl_free_beio(beio); 1369 ctl_set_invalid_field(&io->scsiio, 1370 /*sks_valid*/ 1, 1371 /*command*/ 1, 1372 /*field*/ 1, 1373 /*bit_valid*/ 0, 1374 /*bit*/ 0); 1375 ctl_config_write_done(io); 1376 return; 1377 } 1378 1379 if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) { 1380 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1381 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize; 1382 beio->bio_cmd = BIO_DELETE; 1383 beio->ds_trans_type = DEVSTAT_FREE; 1384 1385 be_lun->unmap(be_lun, beio); 1386 return; 1387 } 1388 1389 beio->bio_cmd = BIO_WRITE; 1390 beio->ds_trans_type = DEVSTAT_WRITE; 1391 1392 DPRINTF("WRITE SAME at LBA %jx len %u\n", 1393 (uintmax_t)lbalen->lba, lbalen->len); 1394 1395 pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp; 1396 if (be_lun->cbe_lun.pblockoff > 0) 1397 pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff; 1398 else 1399 pbo = 0; 1400 len_left = (uint64_t)lbalen->len * cbe_lun->blocksize; 1401 for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { 1402 /* 1403 * Setup the S/G entry for this chunk. 1404 */ 1405 seglen = MIN(CTLBLK_MAX_SEG, len_left); 1406 if (pb > cbe_lun->blocksize) { 1407 adj = ((lbalen->lba + lba) * cbe_lun->blocksize + 1408 seglen - pbo) % pb; 1409 if (seglen > adj) 1410 seglen -= adj; 1411 else 1412 seglen -= seglen % cbe_lun->blocksize; 1413 } else 1414 seglen -= seglen % cbe_lun->blocksize; 1415 ctl_alloc_seg(softc, &beio->sg_segs[i], seglen); 1416 1417 DPRINTF("segment %d addr %p len %zd\n", i, 1418 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1419 1420 beio->num_segs++; 1421 len_left -= seglen; 1422 1423 buf = beio->sg_segs[i].addr; 1424 end = buf + seglen; 1425 for (; buf < end; buf += cbe_lun->blocksize) { 1426 if (lbalen->flags & SWS_NDOB) { 1427 memset(buf, 0, cbe_lun->blocksize); 1428 } else { 1429 memcpy(buf, io->scsiio.kern_data_ptr, 1430 cbe_lun->blocksize); 1431 } 1432 if (lbalen->flags & SWS_LBDATA) 1433 scsi_ulto4b(lbalen->lba + lba, buf); 1434 lba++; 1435 } 1436 } 1437 1438 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1439 beio->io_len = lba * cbe_lun->blocksize; 1440 1441 /* We can not do all in one run. Correct and schedule rerun. */ 1442 if (len_left > 0) { 1443 lbalen->lba += lba; 1444 lbalen->len -= lba; 1445 beio->beio_cont = ctl_be_block_cw_done_ws; 1446 } 1447 1448 be_lun->dispatch(be_lun, beio); 1449 } 1450 1451 static void 1452 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun, 1453 union ctl_io *io) 1454 { 1455 struct ctl_be_block_io *beio; 1456 struct ctl_ptr_len_flags *ptrlen; 1457 1458 DPRINTF("entered\n"); 1459 1460 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1461 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1462 1463 if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) { 1464 ctl_free_beio(beio); 1465 ctl_set_invalid_field(&io->scsiio, 1466 /*sks_valid*/ 0, 1467 /*command*/ 1, 1468 /*field*/ 0, 1469 /*bit_valid*/ 0, 1470 /*bit*/ 0); 1471 ctl_config_write_done(io); 1472 return; 1473 } 1474 1475 beio->io_len = 0; 1476 beio->io_offset = -1; 1477 beio->bio_cmd = BIO_DELETE; 1478 beio->ds_trans_type = DEVSTAT_FREE; 1479 DPRINTF("UNMAP\n"); 1480 be_lun->unmap(be_lun, beio); 1481 } 1482 1483 static void 1484 ctl_be_block_cr_done(struct ctl_be_block_io *beio) 1485 { 1486 union ctl_io *io; 1487 1488 io = beio->io; 1489 ctl_free_beio(beio); 1490 ctl_config_read_done(io); 1491 } 1492 1493 static void 1494 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 1495 union ctl_io *io) 1496 { 1497 struct ctl_be_block_io *beio; 1498 struct ctl_be_block_softc *softc; 1499 1500 DPRINTF("entered\n"); 1501 1502 softc = be_lun->softc; 1503 beio = ctl_alloc_beio(softc); 1504 beio->io = io; 1505 beio->lun = be_lun; 1506 beio->beio_cont = ctl_be_block_cr_done; 1507 PRIV(io)->ptr = (void *)beio; 1508 1509 switch (io->scsiio.cdb[0]) { 1510 case SERVICE_ACTION_IN: /* GET LBA STATUS */ 1511 beio->bio_cmd = -1; 1512 beio->ds_trans_type = DEVSTAT_NO_DATA; 1513 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1514 beio->io_len = 0; 1515 if (be_lun->get_lba_status) 1516 be_lun->get_lba_status(be_lun, beio); 1517 else 1518 ctl_be_block_cr_done(beio); 1519 break; 1520 default: 1521 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1522 break; 1523 } 1524 } 1525 1526 static void 1527 ctl_be_block_cw_done(struct ctl_be_block_io *beio) 1528 { 1529 union ctl_io *io; 1530 1531 io = beio->io; 1532 ctl_free_beio(beio); 1533 ctl_config_write_done(io); 1534 } 1535 1536 static void 1537 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 1538 union ctl_io *io) 1539 { 1540 struct ctl_be_block_io *beio; 1541 struct ctl_be_block_softc *softc; 1542 1543 DPRINTF("entered\n"); 1544 1545 softc = be_lun->softc; 1546 beio = ctl_alloc_beio(softc); 1547 beio->io = io; 1548 beio->lun = be_lun; 1549 beio->beio_cont = ctl_be_block_cw_done; 1550 switch (io->scsiio.tag_type) { 1551 case CTL_TAG_ORDERED: 1552 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1553 break; 1554 case CTL_TAG_HEAD_OF_QUEUE: 1555 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1556 break; 1557 case CTL_TAG_UNTAGGED: 1558 case CTL_TAG_SIMPLE: 1559 case CTL_TAG_ACA: 1560 default: 1561 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1562 break; 1563 } 1564 PRIV(io)->ptr = (void *)beio; 1565 1566 switch (io->scsiio.cdb[0]) { 1567 case SYNCHRONIZE_CACHE: 1568 case SYNCHRONIZE_CACHE_16: 1569 ctl_be_block_cw_dispatch_sync(be_lun, io); 1570 break; 1571 case WRITE_SAME_10: 1572 case WRITE_SAME_16: 1573 ctl_be_block_cw_dispatch_ws(be_lun, io); 1574 break; 1575 case UNMAP: 1576 ctl_be_block_cw_dispatch_unmap(be_lun, io); 1577 break; 1578 default: 1579 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1580 break; 1581 } 1582 } 1583 1584 SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t"); 1585 SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t"); 1586 SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t"); 1587 SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t"); 1588 1589 static void 1590 ctl_be_block_next(struct ctl_be_block_io *beio) 1591 { 1592 struct ctl_be_block_lun *be_lun; 1593 union ctl_io *io; 1594 1595 io = beio->io; 1596 be_lun = beio->lun; 1597 ctl_free_beio(beio); 1598 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1599 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1600 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1601 ctl_data_submit_done(io); 1602 return; 1603 } 1604 1605 io->io_hdr.status &= ~CTL_STATUS_MASK; 1606 io->io_hdr.status |= CTL_STATUS_NONE; 1607 1608 mtx_lock(&be_lun->queue_lock); 1609 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1610 mtx_unlock(&be_lun->queue_lock); 1611 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1612 } 1613 1614 static void 1615 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 1616 union ctl_io *io) 1617 { 1618 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1619 struct ctl_be_block_io *beio; 1620 struct ctl_be_block_softc *softc; 1621 struct ctl_lba_len_flags *lbalen; 1622 struct ctl_ptr_len_flags *bptrlen; 1623 uint64_t len_left, lbas; 1624 int i; 1625 1626 softc = be_lun->softc; 1627 1628 DPRINTF("entered\n"); 1629 1630 lbalen = ARGS(io); 1631 if (lbalen->flags & CTL_LLF_WRITE) { 1632 SDT_PROBE0(cbb, , write, start); 1633 } else { 1634 SDT_PROBE0(cbb, , read, start); 1635 } 1636 1637 beio = ctl_alloc_beio(softc); 1638 beio->io = io; 1639 beio->lun = be_lun; 1640 bptrlen = PRIV(io); 1641 bptrlen->ptr = (void *)beio; 1642 1643 switch (io->scsiio.tag_type) { 1644 case CTL_TAG_ORDERED: 1645 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1646 break; 1647 case CTL_TAG_HEAD_OF_QUEUE: 1648 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1649 break; 1650 case CTL_TAG_UNTAGGED: 1651 case CTL_TAG_SIMPLE: 1652 case CTL_TAG_ACA: 1653 default: 1654 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1655 break; 1656 } 1657 1658 if (lbalen->flags & CTL_LLF_WRITE) { 1659 beio->bio_cmd = BIO_WRITE; 1660 beio->ds_trans_type = DEVSTAT_WRITE; 1661 } else { 1662 beio->bio_cmd = BIO_READ; 1663 beio->ds_trans_type = DEVSTAT_READ; 1664 } 1665 1666 DPRINTF("%s at LBA %jx len %u @%ju\n", 1667 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", 1668 (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len); 1669 lbas = CTLBLK_MAX_IO_SIZE; 1670 if (lbalen->flags & CTL_LLF_COMPARE) { 1671 beio->two_sglists = 1; 1672 lbas /= 2; 1673 } 1674 lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize); 1675 beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize; 1676 beio->io_len = lbas * cbe_lun->blocksize; 1677 bptrlen->len += lbas; 1678 1679 for (i = 0, len_left = beio->io_len; len_left > 0; i++) { 1680 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)", 1681 i, CTLBLK_MAX_SEGS)); 1682 1683 /* 1684 * Setup the S/G entry for this chunk. 1685 */ 1686 ctl_alloc_seg(softc, &beio->sg_segs[i], 1687 MIN(CTLBLK_MAX_SEG, len_left)); 1688 1689 DPRINTF("segment %d addr %p len %zd\n", i, 1690 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1691 1692 /* Set up second segment for compare operation. */ 1693 if (beio->two_sglists) { 1694 ctl_alloc_seg(softc, 1695 &beio->sg_segs[i + CTLBLK_HALF_SEGS], 1696 beio->sg_segs[i].len); 1697 } 1698 1699 beio->num_segs++; 1700 len_left -= beio->sg_segs[i].len; 1701 } 1702 if (bptrlen->len < lbalen->len) 1703 beio->beio_cont = ctl_be_block_next; 1704 io->scsiio.be_move_done = ctl_be_block_move_done; 1705 /* For compare we have separate S/G lists for read and datamove. */ 1706 if (beio->two_sglists) 1707 io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS]; 1708 else 1709 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs; 1710 io->scsiio.kern_data_len = beio->io_len; 1711 io->scsiio.kern_sg_entries = beio->num_segs; 1712 io->scsiio.kern_data_ref = ctl_refcnt_beio; 1713 io->scsiio.kern_data_arg = beio; 1714 io->io_hdr.flags |= CTL_FLAG_ALLOCATED; 1715 1716 /* 1717 * For the read case, we need to read the data into our buffers and 1718 * then we can send it back to the user. For the write case, we 1719 * need to get the data from the user first. 1720 */ 1721 if (beio->bio_cmd == BIO_READ) { 1722 SDT_PROBE0(cbb, , read, alloc_done); 1723 be_lun->dispatch(be_lun, beio); 1724 } else { 1725 SDT_PROBE0(cbb, , write, alloc_done); 1726 ctl_datamove(io); 1727 } 1728 } 1729 1730 static void 1731 ctl_be_block_worker(void *context, int pending) 1732 { 1733 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context; 1734 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1735 union ctl_io *io; 1736 struct ctl_be_block_io *beio; 1737 1738 DPRINTF("entered\n"); 1739 /* 1740 * Fetch and process I/Os from all queues. If we detect LUN 1741 * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race, 1742 * so make response maximally opaque to not confuse initiator. 1743 */ 1744 for (;;) { 1745 mtx_lock(&be_lun->queue_lock); 1746 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue); 1747 if (io != NULL) { 1748 DPRINTF("datamove queue\n"); 1749 STAILQ_REMOVE_HEAD(&be_lun->datamove_queue, links); 1750 mtx_unlock(&be_lun->queue_lock); 1751 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1752 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1753 ctl_set_busy(&io->scsiio); 1754 ctl_complete_beio(beio); 1755 continue; 1756 } 1757 be_lun->dispatch(be_lun, beio); 1758 continue; 1759 } 1760 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue); 1761 if (io != NULL) { 1762 DPRINTF("config write queue\n"); 1763 STAILQ_REMOVE_HEAD(&be_lun->config_write_queue, links); 1764 mtx_unlock(&be_lun->queue_lock); 1765 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1766 ctl_set_busy(&io->scsiio); 1767 ctl_config_write_done(io); 1768 continue; 1769 } 1770 ctl_be_block_cw_dispatch(be_lun, io); 1771 continue; 1772 } 1773 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue); 1774 if (io != NULL) { 1775 DPRINTF("config read queue\n"); 1776 STAILQ_REMOVE_HEAD(&be_lun->config_read_queue, links); 1777 mtx_unlock(&be_lun->queue_lock); 1778 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1779 ctl_set_busy(&io->scsiio); 1780 ctl_config_read_done(io); 1781 continue; 1782 } 1783 ctl_be_block_cr_dispatch(be_lun, io); 1784 continue; 1785 } 1786 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue); 1787 if (io != NULL) { 1788 DPRINTF("input queue\n"); 1789 STAILQ_REMOVE_HEAD(&be_lun->input_queue, links); 1790 mtx_unlock(&be_lun->queue_lock); 1791 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1792 ctl_set_busy(&io->scsiio); 1793 ctl_data_submit_done(io); 1794 continue; 1795 } 1796 ctl_be_block_dispatch(be_lun, io); 1797 continue; 1798 } 1799 1800 /* 1801 * If we get here, there is no work left in the queues, so 1802 * just break out and let the task queue go to sleep. 1803 */ 1804 mtx_unlock(&be_lun->queue_lock); 1805 break; 1806 } 1807 } 1808 1809 /* 1810 * Entry point from CTL to the backend for I/O. We queue everything to a 1811 * work thread, so this just puts the I/O on a queue and wakes up the 1812 * thread. 1813 */ 1814 static int 1815 ctl_be_block_submit(union ctl_io *io) 1816 { 1817 struct ctl_be_block_lun *be_lun; 1818 1819 DPRINTF("entered\n"); 1820 1821 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 1822 1823 KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, 1824 ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type)); 1825 1826 PRIV(io)->len = 0; 1827 1828 mtx_lock(&be_lun->queue_lock); 1829 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1830 mtx_unlock(&be_lun->queue_lock); 1831 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1832 1833 return (CTL_RETVAL_COMPLETE); 1834 } 1835 1836 static int 1837 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 1838 int flag, struct thread *td) 1839 { 1840 struct ctl_be_block_softc *softc = &backend_block_softc; 1841 int error; 1842 1843 error = 0; 1844 switch (cmd) { 1845 case CTL_LUN_REQ: { 1846 struct ctl_lun_req *lun_req; 1847 1848 lun_req = (struct ctl_lun_req *)addr; 1849 1850 switch (lun_req->reqtype) { 1851 case CTL_LUNREQ_CREATE: 1852 error = ctl_be_block_create(softc, lun_req); 1853 break; 1854 case CTL_LUNREQ_RM: 1855 error = ctl_be_block_rm(softc, lun_req); 1856 break; 1857 case CTL_LUNREQ_MODIFY: 1858 error = ctl_be_block_modify(softc, lun_req); 1859 break; 1860 default: 1861 lun_req->status = CTL_LUN_ERROR; 1862 snprintf(lun_req->error_str, sizeof(lun_req->error_str), 1863 "invalid LUN request type %d", 1864 lun_req->reqtype); 1865 break; 1866 } 1867 break; 1868 } 1869 default: 1870 error = ENOTTY; 1871 break; 1872 } 1873 1874 return (error); 1875 } 1876 1877 static int 1878 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1879 { 1880 struct ctl_be_lun *cbe_lun; 1881 struct ctl_be_block_filedata *file_data; 1882 struct ctl_lun_create_params *params; 1883 const char *value; 1884 struct vattr vattr; 1885 off_t ps, pss, po, pos, us, uss, uo, uos; 1886 int error; 1887 long pconf; 1888 1889 cbe_lun = &be_lun->cbe_lun; 1890 file_data = &be_lun->backend.file; 1891 params = &be_lun->params; 1892 1893 be_lun->dev_type = CTL_BE_BLOCK_FILE; 1894 be_lun->dispatch = ctl_be_block_dispatch_file; 1895 be_lun->lun_flush = ctl_be_block_flush_file; 1896 be_lun->get_lba_status = ctl_be_block_gls_file; 1897 be_lun->getattr = ctl_be_block_getattr_file; 1898 be_lun->unmap = ctl_be_block_unmap_file; 1899 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 1900 1901 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 1902 if (error != 0) { 1903 snprintf(req->error_str, sizeof(req->error_str), 1904 "error calling VOP_GETATTR() for file %s", 1905 be_lun->dev_path); 1906 return (error); 1907 } 1908 1909 error = VOP_PATHCONF(be_lun->vn, _PC_DEALLOC_PRESENT, &pconf); 1910 if (error != 0) { 1911 snprintf(req->error_str, sizeof(req->error_str), 1912 "error calling VOP_PATHCONF() for file %s", 1913 be_lun->dev_path); 1914 return (error); 1915 } 1916 if (pconf == 1) 1917 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; 1918 1919 file_data->cred = crhold(curthread->td_ucred); 1920 if (params->lun_size_bytes != 0) 1921 be_lun->size_bytes = params->lun_size_bytes; 1922 else 1923 be_lun->size_bytes = vattr.va_size; 1924 1925 /* 1926 * For files we can use any logical block size. Prefer 512 bytes 1927 * for compatibility reasons. If file's vattr.va_blocksize 1928 * (preferred I/O block size) is bigger and multiple to chosen 1929 * logical block size -- report it as physical block size. 1930 */ 1931 if (params->blocksize_bytes != 0) 1932 cbe_lun->blocksize = params->blocksize_bytes; 1933 else if (cbe_lun->lun_type == T_CDROM) 1934 cbe_lun->blocksize = 2048; 1935 else 1936 cbe_lun->blocksize = 512; 1937 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 1938 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 1939 0 : (be_lun->size_blocks - 1); 1940 1941 us = ps = vattr.va_blocksize; 1942 uo = po = 0; 1943 1944 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL); 1945 if (value != NULL) 1946 ctl_expand_number(value, &ps); 1947 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL); 1948 if (value != NULL) 1949 ctl_expand_number(value, &po); 1950 pss = ps / cbe_lun->blocksize; 1951 pos = po / cbe_lun->blocksize; 1952 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 1953 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 1954 cbe_lun->pblockexp = fls(pss) - 1; 1955 cbe_lun->pblockoff = (pss - pos) % pss; 1956 } 1957 1958 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL); 1959 if (value != NULL) 1960 ctl_expand_number(value, &us); 1961 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL); 1962 if (value != NULL) 1963 ctl_expand_number(value, &uo); 1964 uss = us / cbe_lun->blocksize; 1965 uos = uo / cbe_lun->blocksize; 1966 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 1967 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 1968 cbe_lun->ublockexp = fls(uss) - 1; 1969 cbe_lun->ublockoff = (uss - uos) % uss; 1970 } 1971 1972 /* 1973 * Sanity check. The media size has to be at least one 1974 * sector long. 1975 */ 1976 if (be_lun->size_bytes < cbe_lun->blocksize) { 1977 error = EINVAL; 1978 snprintf(req->error_str, sizeof(req->error_str), 1979 "file %s size %ju < block size %u", be_lun->dev_path, 1980 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize); 1981 } 1982 1983 cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize; 1984 return (error); 1985 } 1986 1987 static int 1988 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1989 { 1990 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1991 struct ctl_lun_create_params *params; 1992 struct cdevsw *csw; 1993 struct cdev *dev; 1994 const char *value; 1995 int error, atomic, maxio, ref, unmap, tmp; 1996 off_t ps, pss, po, pos, us, uss, uo, uos, otmp; 1997 1998 params = &be_lun->params; 1999 2000 be_lun->dev_type = CTL_BE_BLOCK_DEV; 2001 csw = devvn_refthread(be_lun->vn, &dev, &ref); 2002 if (csw == NULL) 2003 return (ENXIO); 2004 if (strcmp(csw->d_name, "zvol") == 0) { 2005 be_lun->dispatch = ctl_be_block_dispatch_zvol; 2006 be_lun->get_lba_status = ctl_be_block_gls_zvol; 2007 atomic = maxio = CTLBLK_MAX_IO_SIZE; 2008 } else { 2009 be_lun->dispatch = ctl_be_block_dispatch_dev; 2010 be_lun->get_lba_status = NULL; 2011 atomic = 0; 2012 maxio = dev->si_iosize_max; 2013 if (maxio <= 0) 2014 maxio = DFLTPHYS; 2015 if (maxio > CTLBLK_MAX_SEG) 2016 maxio = CTLBLK_MAX_SEG; 2017 } 2018 be_lun->lun_flush = ctl_be_block_flush_dev; 2019 be_lun->getattr = ctl_be_block_getattr_dev; 2020 be_lun->unmap = ctl_be_block_unmap_dev; 2021 2022 if (!csw->d_ioctl) { 2023 dev_relthread(dev, ref); 2024 snprintf(req->error_str, sizeof(req->error_str), 2025 "no d_ioctl for device %s!", be_lun->dev_path); 2026 return (ENODEV); 2027 } 2028 2029 error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD, 2030 curthread); 2031 if (error) { 2032 dev_relthread(dev, ref); 2033 snprintf(req->error_str, sizeof(req->error_str), 2034 "error %d returned for DIOCGSECTORSIZE ioctl " 2035 "on %s!", error, be_lun->dev_path); 2036 return (error); 2037 } 2038 2039 /* 2040 * If the user has asked for a blocksize that is greater than the 2041 * backing device's blocksize, we can do it only if the blocksize 2042 * the user is asking for is an even multiple of the underlying 2043 * device's blocksize. 2044 */ 2045 if ((params->blocksize_bytes != 0) && 2046 (params->blocksize_bytes >= tmp)) { 2047 if (params->blocksize_bytes % tmp == 0) { 2048 cbe_lun->blocksize = params->blocksize_bytes; 2049 } else { 2050 dev_relthread(dev, ref); 2051 snprintf(req->error_str, sizeof(req->error_str), 2052 "requested blocksize %u is not an even " 2053 "multiple of backing device blocksize %u", 2054 params->blocksize_bytes, tmp); 2055 return (EINVAL); 2056 } 2057 } else if (params->blocksize_bytes != 0) { 2058 dev_relthread(dev, ref); 2059 snprintf(req->error_str, sizeof(req->error_str), 2060 "requested blocksize %u < backing device " 2061 "blocksize %u", params->blocksize_bytes, tmp); 2062 return (EINVAL); 2063 } else if (cbe_lun->lun_type == T_CDROM) 2064 cbe_lun->blocksize = MAX(tmp, 2048); 2065 else 2066 cbe_lun->blocksize = tmp; 2067 2068 error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD, 2069 curthread); 2070 if (error) { 2071 dev_relthread(dev, ref); 2072 snprintf(req->error_str, sizeof(req->error_str), 2073 "error %d returned for DIOCGMEDIASIZE " 2074 " ioctl on %s!", error, 2075 be_lun->dev_path); 2076 return (error); 2077 } 2078 2079 if (params->lun_size_bytes != 0) { 2080 if (params->lun_size_bytes > otmp) { 2081 dev_relthread(dev, ref); 2082 snprintf(req->error_str, sizeof(req->error_str), 2083 "requested LUN size %ju > backing device " 2084 "size %ju", 2085 (uintmax_t)params->lun_size_bytes, 2086 (uintmax_t)otmp); 2087 return (EINVAL); 2088 } 2089 2090 be_lun->size_bytes = params->lun_size_bytes; 2091 } else 2092 be_lun->size_bytes = otmp; 2093 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2094 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2095 0 : (be_lun->size_blocks - 1); 2096 2097 error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD, 2098 curthread); 2099 if (error) 2100 ps = po = 0; 2101 else { 2102 error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po, 2103 FREAD, curthread); 2104 if (error) 2105 po = 0; 2106 } 2107 us = ps; 2108 uo = po; 2109 2110 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL); 2111 if (value != NULL) 2112 ctl_expand_number(value, &ps); 2113 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL); 2114 if (value != NULL) 2115 ctl_expand_number(value, &po); 2116 pss = ps / cbe_lun->blocksize; 2117 pos = po / cbe_lun->blocksize; 2118 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 2119 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 2120 cbe_lun->pblockexp = fls(pss) - 1; 2121 cbe_lun->pblockoff = (pss - pos) % pss; 2122 } 2123 2124 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL); 2125 if (value != NULL) 2126 ctl_expand_number(value, &us); 2127 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL); 2128 if (value != NULL) 2129 ctl_expand_number(value, &uo); 2130 uss = us / cbe_lun->blocksize; 2131 uos = uo / cbe_lun->blocksize; 2132 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 2133 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 2134 cbe_lun->ublockexp = fls(uss) - 1; 2135 cbe_lun->ublockoff = (uss - uos) % uss; 2136 } 2137 2138 cbe_lun->atomicblock = atomic / cbe_lun->blocksize; 2139 cbe_lun->opttxferlen = maxio / cbe_lun->blocksize; 2140 2141 if (be_lun->dispatch == ctl_be_block_dispatch_zvol) { 2142 unmap = 1; 2143 } else { 2144 struct diocgattr_arg arg; 2145 2146 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); 2147 arg.len = sizeof(arg.value.i); 2148 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 2149 curthread); 2150 unmap = (error == 0) ? arg.value.i : 0; 2151 } 2152 value = dnvlist_get_string(cbe_lun->options, "unmap", NULL); 2153 if (value != NULL) 2154 unmap = (strcmp(value, "on") == 0); 2155 if (unmap) 2156 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; 2157 else 2158 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 2159 2160 dev_relthread(dev, ref); 2161 return (0); 2162 } 2163 2164 static int 2165 ctl_be_block_close(struct ctl_be_block_lun *be_lun) 2166 { 2167 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2168 int flags; 2169 2170 if (be_lun->vn) { 2171 flags = FREAD; 2172 if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0) 2173 flags |= FWRITE; 2174 (void)vn_close(be_lun->vn, flags, NOCRED, curthread); 2175 be_lun->vn = NULL; 2176 2177 switch (be_lun->dev_type) { 2178 case CTL_BE_BLOCK_DEV: 2179 break; 2180 case CTL_BE_BLOCK_FILE: 2181 if (be_lun->backend.file.cred != NULL) { 2182 crfree(be_lun->backend.file.cred); 2183 be_lun->backend.file.cred = NULL; 2184 } 2185 break; 2186 case CTL_BE_BLOCK_NONE: 2187 break; 2188 default: 2189 panic("Unexpected backend type %d", be_lun->dev_type); 2190 break; 2191 } 2192 be_lun->dev_type = CTL_BE_BLOCK_NONE; 2193 } 2194 return (0); 2195 } 2196 2197 static int 2198 ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 2199 { 2200 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2201 struct nameidata nd; 2202 const char *value; 2203 int error, flags; 2204 2205 error = 0; 2206 if (rootvnode == NULL) { 2207 snprintf(req->error_str, sizeof(req->error_str), 2208 "Root filesystem is not mounted"); 2209 return (1); 2210 } 2211 pwd_ensure_dirs(); 2212 2213 value = dnvlist_get_string(cbe_lun->options, "file", NULL); 2214 if (value == NULL) { 2215 snprintf(req->error_str, sizeof(req->error_str), 2216 "no file argument specified"); 2217 return (1); 2218 } 2219 free(be_lun->dev_path, M_CTLBLK); 2220 be_lun->dev_path = strdup(value, M_CTLBLK); 2221 2222 flags = FREAD; 2223 value = dnvlist_get_string(cbe_lun->options, "readonly", NULL); 2224 if (value != NULL) { 2225 if (strcmp(value, "on") != 0) 2226 flags |= FWRITE; 2227 } else if (cbe_lun->lun_type == T_DIRECT) 2228 flags |= FWRITE; 2229 2230 again: 2231 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path); 2232 error = vn_open(&nd, &flags, 0, NULL); 2233 if ((error == EROFS || error == EACCES) && (flags & FWRITE)) { 2234 flags &= ~FWRITE; 2235 goto again; 2236 } 2237 if (error) { 2238 /* 2239 * This is the only reasonable guess we can make as far as 2240 * path if the user doesn't give us a fully qualified path. 2241 * If they want to specify a file, they need to specify the 2242 * full path. 2243 */ 2244 if (be_lun->dev_path[0] != '/') { 2245 char *dev_name; 2246 2247 asprintf(&dev_name, M_CTLBLK, "/dev/%s", 2248 be_lun->dev_path); 2249 free(be_lun->dev_path, M_CTLBLK); 2250 be_lun->dev_path = dev_name; 2251 goto again; 2252 } 2253 snprintf(req->error_str, sizeof(req->error_str), 2254 "error opening %s: %d", be_lun->dev_path, error); 2255 return (error); 2256 } 2257 if (flags & FWRITE) 2258 cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY; 2259 else 2260 cbe_lun->flags |= CTL_LUN_FLAG_READONLY; 2261 2262 NDFREE_PNBUF(&nd); 2263 be_lun->vn = nd.ni_vp; 2264 2265 /* We only support disks and files. */ 2266 if (vn_isdisk_error(be_lun->vn, &error)) { 2267 error = ctl_be_block_open_dev(be_lun, req); 2268 } else if (be_lun->vn->v_type == VREG) { 2269 error = ctl_be_block_open_file(be_lun, req); 2270 } else { 2271 error = EINVAL; 2272 snprintf(req->error_str, sizeof(req->error_str), 2273 "%s is not a disk or plain file", be_lun->dev_path); 2274 } 2275 VOP_UNLOCK(be_lun->vn); 2276 2277 if (error != 0) 2278 ctl_be_block_close(be_lun); 2279 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2280 if (be_lun->dispatch != ctl_be_block_dispatch_dev) 2281 cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT; 2282 value = dnvlist_get_string(cbe_lun->options, "serseq", NULL); 2283 if (value != NULL && strcmp(value, "on") == 0) 2284 cbe_lun->serseq = CTL_LUN_SERSEQ_ON; 2285 else if (value != NULL && strcmp(value, "read") == 0) 2286 cbe_lun->serseq = CTL_LUN_SERSEQ_READ; 2287 else if (value != NULL && strcmp(value, "soft") == 0) 2288 cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT; 2289 else if (value != NULL && strcmp(value, "off") == 0) 2290 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2291 return (0); 2292 } 2293 2294 static int 2295 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2296 { 2297 struct ctl_be_lun *cbe_lun; 2298 struct ctl_be_block_lun *be_lun; 2299 struct ctl_lun_create_params *params; 2300 char num_thread_str[16]; 2301 char tmpstr[32]; 2302 const char *value; 2303 int retval, num_threads; 2304 int tmp_num_threads; 2305 2306 params = &req->reqdata.create; 2307 retval = 0; 2308 req->status = CTL_LUN_OK; 2309 2310 be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK); 2311 cbe_lun = &be_lun->cbe_lun; 2312 be_lun->params = req->reqdata.create; 2313 be_lun->softc = softc; 2314 STAILQ_INIT(&be_lun->input_queue); 2315 STAILQ_INIT(&be_lun->config_read_queue); 2316 STAILQ_INIT(&be_lun->config_write_queue); 2317 STAILQ_INIT(&be_lun->datamove_queue); 2318 mtx_init(&be_lun->io_lock, "ctlblock io", NULL, MTX_DEF); 2319 mtx_init(&be_lun->queue_lock, "ctlblock queue", NULL, MTX_DEF); 2320 cbe_lun->options = nvlist_clone(req->args_nvl); 2321 2322 if (params->flags & CTL_LUN_FLAG_DEV_TYPE) 2323 cbe_lun->lun_type = params->device_type; 2324 else 2325 cbe_lun->lun_type = T_DIRECT; 2326 be_lun->flags = 0; 2327 cbe_lun->flags = 0; 2328 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL); 2329 if (value != NULL) { 2330 if (strcmp(value, "primary") == 0) 2331 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2332 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2333 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2334 2335 if (cbe_lun->lun_type == T_DIRECT || 2336 cbe_lun->lun_type == T_CDROM) { 2337 be_lun->size_bytes = params->lun_size_bytes; 2338 if (params->blocksize_bytes != 0) 2339 cbe_lun->blocksize = params->blocksize_bytes; 2340 else if (cbe_lun->lun_type == T_CDROM) 2341 cbe_lun->blocksize = 2048; 2342 else 2343 cbe_lun->blocksize = 512; 2344 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2345 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2346 0 : (be_lun->size_blocks - 1); 2347 2348 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2349 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2350 retval = ctl_be_block_open(be_lun, req); 2351 if (retval != 0) { 2352 retval = 0; 2353 req->status = CTL_LUN_WARNING; 2354 } 2355 } 2356 num_threads = cbb_num_threads; 2357 } else { 2358 num_threads = 1; 2359 } 2360 2361 value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL); 2362 if (value != NULL) { 2363 tmp_num_threads = strtol(value, NULL, 0); 2364 2365 /* 2366 * We don't let the user specify less than one 2367 * thread, but hope he's clueful enough not to 2368 * specify 1000 threads. 2369 */ 2370 if (tmp_num_threads < 1) { 2371 snprintf(req->error_str, sizeof(req->error_str), 2372 "invalid number of threads %s", 2373 num_thread_str); 2374 goto bailout_error; 2375 } 2376 num_threads = tmp_num_threads; 2377 } 2378 2379 if (be_lun->vn == NULL) 2380 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2381 /* Tell the user the blocksize we ended up using */ 2382 params->lun_size_bytes = be_lun->size_bytes; 2383 params->blocksize_bytes = cbe_lun->blocksize; 2384 if (params->flags & CTL_LUN_FLAG_ID_REQ) { 2385 cbe_lun->req_lun_id = params->req_lun_id; 2386 cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ; 2387 } else 2388 cbe_lun->req_lun_id = 0; 2389 2390 cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown; 2391 cbe_lun->be = &ctl_be_block_driver; 2392 2393 if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) { 2394 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d", 2395 softc->num_luns); 2396 strncpy((char *)cbe_lun->serial_num, tmpstr, 2397 MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr))); 2398 2399 /* Tell the user what we used for a serial number */ 2400 strncpy((char *)params->serial_num, tmpstr, 2401 MIN(sizeof(params->serial_num), sizeof(tmpstr))); 2402 } else { 2403 strncpy((char *)cbe_lun->serial_num, params->serial_num, 2404 MIN(sizeof(cbe_lun->serial_num), 2405 sizeof(params->serial_num))); 2406 } 2407 if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) { 2408 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns); 2409 strncpy((char *)cbe_lun->device_id, tmpstr, 2410 MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr))); 2411 2412 /* Tell the user what we used for a device ID */ 2413 strncpy((char *)params->device_id, tmpstr, 2414 MIN(sizeof(params->device_id), sizeof(tmpstr))); 2415 } else { 2416 strncpy((char *)cbe_lun->device_id, params->device_id, 2417 MIN(sizeof(cbe_lun->device_id), 2418 sizeof(params->device_id))); 2419 } 2420 2421 TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun); 2422 2423 be_lun->io_taskqueue = taskqueue_create("ctlblocktq", M_WAITOK, 2424 taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue); 2425 2426 if (be_lun->io_taskqueue == NULL) { 2427 snprintf(req->error_str, sizeof(req->error_str), 2428 "unable to create taskqueue"); 2429 goto bailout_error; 2430 } 2431 2432 /* 2433 * Note that we start the same number of threads by default for 2434 * both the file case and the block device case. For the file 2435 * case, we need multiple threads to allow concurrency, because the 2436 * vnode interface is designed to be a blocking interface. For the 2437 * block device case, ZFS zvols at least will block the caller's 2438 * context in many instances, and so we need multiple threads to 2439 * overcome that problem. Other block devices don't need as many 2440 * threads, but they shouldn't cause too many problems. 2441 * 2442 * If the user wants to just have a single thread for a block 2443 * device, he can specify that when the LUN is created, or change 2444 * the tunable/sysctl to alter the default number of threads. 2445 */ 2446 retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue, 2447 /*num threads*/num_threads, 2448 /*priority*/PUSER, 2449 /*proc*/control_softc->ctl_proc, 2450 /*thread name*/"block"); 2451 2452 if (retval != 0) 2453 goto bailout_error; 2454 2455 be_lun->num_threads = num_threads; 2456 2457 retval = ctl_add_lun(&be_lun->cbe_lun); 2458 if (retval != 0) { 2459 snprintf(req->error_str, sizeof(req->error_str), 2460 "ctl_add_lun() returned error %d, see dmesg for " 2461 "details", retval); 2462 retval = 0; 2463 goto bailout_error; 2464 } 2465 2466 be_lun->disk_stats = devstat_new_entry("cbb", cbe_lun->lun_id, 2467 cbe_lun->blocksize, 2468 DEVSTAT_ALL_SUPPORTED, 2469 cbe_lun->lun_type 2470 | DEVSTAT_TYPE_IF_OTHER, 2471 DEVSTAT_PRIORITY_OTHER); 2472 2473 mtx_lock(&softc->lock); 2474 softc->num_luns++; 2475 SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links); 2476 mtx_unlock(&softc->lock); 2477 2478 params->req_lun_id = cbe_lun->lun_id; 2479 2480 return (retval); 2481 2482 bailout_error: 2483 req->status = CTL_LUN_ERROR; 2484 2485 if (be_lun->io_taskqueue != NULL) 2486 taskqueue_free(be_lun->io_taskqueue); 2487 ctl_be_block_close(be_lun); 2488 if (be_lun->dev_path != NULL) 2489 free(be_lun->dev_path, M_CTLBLK); 2490 nvlist_destroy(cbe_lun->options); 2491 mtx_destroy(&be_lun->queue_lock); 2492 mtx_destroy(&be_lun->io_lock); 2493 free(be_lun, M_CTLBLK); 2494 2495 return (retval); 2496 } 2497 2498 static int 2499 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2500 { 2501 struct ctl_lun_rm_params *params; 2502 struct ctl_be_block_lun *be_lun; 2503 struct ctl_be_lun *cbe_lun; 2504 int retval; 2505 2506 params = &req->reqdata.rm; 2507 2508 sx_xlock(&softc->modify_lock); 2509 mtx_lock(&softc->lock); 2510 SLIST_FOREACH(be_lun, &softc->lun_list, links) { 2511 if (be_lun->cbe_lun.lun_id == params->lun_id) { 2512 SLIST_REMOVE(&softc->lun_list, be_lun, 2513 ctl_be_block_lun, links); 2514 softc->num_luns--; 2515 break; 2516 } 2517 } 2518 mtx_unlock(&softc->lock); 2519 sx_xunlock(&softc->modify_lock); 2520 if (be_lun == NULL) { 2521 snprintf(req->error_str, sizeof(req->error_str), 2522 "LUN %u is not managed by the block backend", 2523 params->lun_id); 2524 goto bailout_error; 2525 } 2526 cbe_lun = &be_lun->cbe_lun; 2527 2528 if (be_lun->vn != NULL) { 2529 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2530 ctl_lun_no_media(cbe_lun); 2531 taskqueue_drain_all(be_lun->io_taskqueue); 2532 ctl_be_block_close(be_lun); 2533 } 2534 2535 mtx_lock(&softc->lock); 2536 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2537 mtx_unlock(&softc->lock); 2538 2539 retval = ctl_remove_lun(cbe_lun); 2540 if (retval != 0) { 2541 snprintf(req->error_str, sizeof(req->error_str), 2542 "error %d returned from ctl_remove_lun() for " 2543 "LUN %d", retval, params->lun_id); 2544 mtx_lock(&softc->lock); 2545 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2546 mtx_unlock(&softc->lock); 2547 goto bailout_error; 2548 } 2549 2550 mtx_lock(&softc->lock); 2551 while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2552 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblockrm", 0); 2553 if (retval == EINTR) 2554 break; 2555 } 2556 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2557 if (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) { 2558 mtx_unlock(&softc->lock); 2559 free(be_lun, M_CTLBLK); 2560 } else { 2561 mtx_unlock(&softc->lock); 2562 return (EINTR); 2563 } 2564 2565 req->status = CTL_LUN_OK; 2566 return (0); 2567 2568 bailout_error: 2569 req->status = CTL_LUN_ERROR; 2570 return (0); 2571 } 2572 2573 static int 2574 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2575 { 2576 struct ctl_lun_modify_params *params; 2577 struct ctl_be_block_lun *be_lun; 2578 struct ctl_be_lun *cbe_lun; 2579 const char *value; 2580 uint64_t oldsize; 2581 int error, wasprim; 2582 2583 params = &req->reqdata.modify; 2584 2585 sx_xlock(&softc->modify_lock); 2586 mtx_lock(&softc->lock); 2587 SLIST_FOREACH(be_lun, &softc->lun_list, links) { 2588 if (be_lun->cbe_lun.lun_id == params->lun_id) 2589 break; 2590 } 2591 mtx_unlock(&softc->lock); 2592 if (be_lun == NULL) { 2593 snprintf(req->error_str, sizeof(req->error_str), 2594 "LUN %u is not managed by the block backend", 2595 params->lun_id); 2596 goto bailout_error; 2597 } 2598 cbe_lun = &be_lun->cbe_lun; 2599 2600 if (params->lun_size_bytes != 0) 2601 be_lun->params.lun_size_bytes = params->lun_size_bytes; 2602 2603 if (req->args_nvl != NULL) { 2604 nvlist_destroy(cbe_lun->options); 2605 cbe_lun->options = nvlist_clone(req->args_nvl); 2606 } 2607 2608 wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY); 2609 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL); 2610 if (value != NULL) { 2611 if (strcmp(value, "primary") == 0) 2612 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2613 else 2614 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2615 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2616 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2617 else 2618 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2619 if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) { 2620 if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) 2621 ctl_lun_primary(cbe_lun); 2622 else 2623 ctl_lun_secondary(cbe_lun); 2624 } 2625 2626 oldsize = be_lun->size_blocks; 2627 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2628 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2629 if (be_lun->vn == NULL) 2630 error = ctl_be_block_open(be_lun, req); 2631 else if (vn_isdisk_error(be_lun->vn, &error)) 2632 error = ctl_be_block_open_dev(be_lun, req); 2633 else if (be_lun->vn->v_type == VREG) { 2634 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 2635 error = ctl_be_block_open_file(be_lun, req); 2636 VOP_UNLOCK(be_lun->vn); 2637 } else 2638 error = EINVAL; 2639 if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) && 2640 be_lun->vn != NULL) { 2641 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; 2642 ctl_lun_has_media(cbe_lun); 2643 } else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 && 2644 be_lun->vn == NULL) { 2645 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2646 ctl_lun_no_media(cbe_lun); 2647 } 2648 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; 2649 } else { 2650 if (be_lun->vn != NULL) { 2651 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2652 ctl_lun_no_media(cbe_lun); 2653 taskqueue_drain_all(be_lun->io_taskqueue); 2654 error = ctl_be_block_close(be_lun); 2655 } else 2656 error = 0; 2657 } 2658 if (be_lun->size_blocks != oldsize) 2659 ctl_lun_capacity_changed(cbe_lun); 2660 2661 /* Tell the user the exact size we ended up using */ 2662 params->lun_size_bytes = be_lun->size_bytes; 2663 2664 sx_xunlock(&softc->modify_lock); 2665 req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK; 2666 return (0); 2667 2668 bailout_error: 2669 sx_xunlock(&softc->modify_lock); 2670 req->status = CTL_LUN_ERROR; 2671 return (0); 2672 } 2673 2674 static void 2675 ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun) 2676 { 2677 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)cbe_lun; 2678 struct ctl_be_block_softc *softc = be_lun->softc; 2679 2680 taskqueue_drain_all(be_lun->io_taskqueue); 2681 taskqueue_free(be_lun->io_taskqueue); 2682 if (be_lun->disk_stats != NULL) 2683 devstat_remove_entry(be_lun->disk_stats); 2684 nvlist_destroy(be_lun->cbe_lun.options); 2685 free(be_lun->dev_path, M_CTLBLK); 2686 mtx_destroy(&be_lun->queue_lock); 2687 mtx_destroy(&be_lun->io_lock); 2688 2689 mtx_lock(&softc->lock); 2690 be_lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED; 2691 if (be_lun->flags & CTL_BE_BLOCK_LUN_WAITING) 2692 wakeup(be_lun); 2693 else 2694 free(be_lun, M_CTLBLK); 2695 mtx_unlock(&softc->lock); 2696 } 2697 2698 static int 2699 ctl_be_block_config_write(union ctl_io *io) 2700 { 2701 struct ctl_be_block_lun *be_lun; 2702 struct ctl_be_lun *cbe_lun; 2703 int retval; 2704 2705 DPRINTF("entered\n"); 2706 2707 cbe_lun = CTL_BACKEND_LUN(io); 2708 be_lun = (struct ctl_be_block_lun *)cbe_lun; 2709 2710 retval = 0; 2711 switch (io->scsiio.cdb[0]) { 2712 case SYNCHRONIZE_CACHE: 2713 case SYNCHRONIZE_CACHE_16: 2714 case WRITE_SAME_10: 2715 case WRITE_SAME_16: 2716 case UNMAP: 2717 /* 2718 * The upper level CTL code will filter out any CDBs with 2719 * the immediate bit set and return the proper error. 2720 * 2721 * We don't really need to worry about what LBA range the 2722 * user asked to be synced out. When they issue a sync 2723 * cache command, we'll sync out the whole thing. 2724 */ 2725 mtx_lock(&be_lun->queue_lock); 2726 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, 2727 links); 2728 mtx_unlock(&be_lun->queue_lock); 2729 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 2730 break; 2731 case START_STOP_UNIT: { 2732 struct scsi_start_stop_unit *cdb; 2733 struct ctl_lun_req req; 2734 2735 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb; 2736 if ((cdb->how & SSS_PC_MASK) != 0) { 2737 ctl_set_success(&io->scsiio); 2738 ctl_config_write_done(io); 2739 break; 2740 } 2741 if (cdb->how & SSS_START) { 2742 if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) { 2743 retval = ctl_be_block_open(be_lun, &req); 2744 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; 2745 if (retval == 0) { 2746 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; 2747 ctl_lun_has_media(cbe_lun); 2748 } else { 2749 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2750 ctl_lun_no_media(cbe_lun); 2751 } 2752 } 2753 ctl_start_lun(cbe_lun); 2754 } else { 2755 ctl_stop_lun(cbe_lun); 2756 if (cdb->how & SSS_LOEJ) { 2757 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2758 cbe_lun->flags |= CTL_LUN_FLAG_EJECTED; 2759 ctl_lun_ejected(cbe_lun); 2760 if (be_lun->vn != NULL) 2761 ctl_be_block_close(be_lun); 2762 } 2763 } 2764 2765 ctl_set_success(&io->scsiio); 2766 ctl_config_write_done(io); 2767 break; 2768 } 2769 case PREVENT_ALLOW: 2770 ctl_set_success(&io->scsiio); 2771 ctl_config_write_done(io); 2772 break; 2773 default: 2774 ctl_set_invalid_opcode(&io->scsiio); 2775 ctl_config_write_done(io); 2776 retval = CTL_RETVAL_COMPLETE; 2777 break; 2778 } 2779 2780 return (retval); 2781 } 2782 2783 static int 2784 ctl_be_block_config_read(union ctl_io *io) 2785 { 2786 struct ctl_be_block_lun *be_lun; 2787 int retval = 0; 2788 2789 DPRINTF("entered\n"); 2790 2791 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 2792 2793 switch (io->scsiio.cdb[0]) { 2794 case SERVICE_ACTION_IN: 2795 if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) { 2796 mtx_lock(&be_lun->queue_lock); 2797 STAILQ_INSERT_TAIL(&be_lun->config_read_queue, 2798 &io->io_hdr, links); 2799 mtx_unlock(&be_lun->queue_lock); 2800 taskqueue_enqueue(be_lun->io_taskqueue, 2801 &be_lun->io_task); 2802 retval = CTL_RETVAL_QUEUED; 2803 break; 2804 } 2805 ctl_set_invalid_field(&io->scsiio, 2806 /*sks_valid*/ 1, 2807 /*command*/ 1, 2808 /*field*/ 1, 2809 /*bit_valid*/ 1, 2810 /*bit*/ 4); 2811 ctl_config_read_done(io); 2812 retval = CTL_RETVAL_COMPLETE; 2813 break; 2814 default: 2815 ctl_set_invalid_opcode(&io->scsiio); 2816 ctl_config_read_done(io); 2817 retval = CTL_RETVAL_COMPLETE; 2818 break; 2819 } 2820 2821 return (retval); 2822 } 2823 2824 static int 2825 ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb) 2826 { 2827 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun; 2828 int retval; 2829 2830 retval = sbuf_printf(sb, "\t<num_threads>"); 2831 if (retval != 0) 2832 goto bailout; 2833 retval = sbuf_printf(sb, "%d", lun->num_threads); 2834 if (retval != 0) 2835 goto bailout; 2836 retval = sbuf_printf(sb, "</num_threads>\n"); 2837 2838 bailout: 2839 return (retval); 2840 } 2841 2842 static uint64_t 2843 ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname) 2844 { 2845 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun; 2846 2847 if (lun->getattr == NULL) 2848 return (UINT64_MAX); 2849 return (lun->getattr(lun, attrname)); 2850 } 2851 2852 static int 2853 ctl_be_block_init(void) 2854 { 2855 struct ctl_be_block_softc *softc = &backend_block_softc; 2856 2857 sx_init(&softc->modify_lock, "ctlblock modify"); 2858 mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF); 2859 softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io), 2860 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2861 softc->bufmin_zone = uma_zcreate("ctlblockmin", CTLBLK_MIN_SEG, 2862 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 2863 if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG) 2864 softc->bufmax_zone = uma_zcreate("ctlblockmax", CTLBLK_MAX_SEG, 2865 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 2866 SLIST_INIT(&softc->lun_list); 2867 return (0); 2868 } 2869 2870 static int 2871 ctl_be_block_shutdown(void) 2872 { 2873 struct ctl_be_block_softc *softc = &backend_block_softc; 2874 struct ctl_be_block_lun *lun; 2875 2876 mtx_lock(&softc->lock); 2877 while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) { 2878 SLIST_REMOVE_HEAD(&softc->lun_list, links); 2879 softc->num_luns--; 2880 /* 2881 * Drop our lock here. Since ctl_remove_lun() can call 2882 * back into us, this could potentially lead to a recursive 2883 * lock of the same mutex, which would cause a hang. 2884 */ 2885 mtx_unlock(&softc->lock); 2886 ctl_remove_lun(&lun->cbe_lun); 2887 mtx_lock(&softc->lock); 2888 } 2889 mtx_unlock(&softc->lock); 2890 uma_zdestroy(softc->bufmin_zone); 2891 if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG) 2892 uma_zdestroy(softc->bufmax_zone); 2893 uma_zdestroy(softc->beio_zone); 2894 mtx_destroy(&softc->lock); 2895 sx_destroy(&softc->modify_lock); 2896 return (0); 2897 } 2898