1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2003 Silicon Graphics International Corp. 5 * Copyright (c) 2009-2011 Spectra Logic Corporation 6 * Copyright (c) 2012,2021 The FreeBSD Foundation 7 * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org> 8 * All rights reserved. 9 * 10 * Portions of this software were developed by Edward Tomasz Napierala 11 * under sponsorship from the FreeBSD Foundation. 12 * 13 * Portions of this software were developed by Ka Ho Ng <khng@FreeBSD.org> 14 * under sponsorship from the FreeBSD Foundation. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions, and the following disclaimer, 21 * without modification. 22 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 23 * substantially similar to the "NO WARRANTY" disclaimer below 24 * ("Disclaimer") and any redistribution must be conditioned upon 25 * including a substantially similar Disclaimer requirement for further 26 * binary redistribution. 27 * 28 * NO WARRANTY 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 37 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 38 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 * POSSIBILITY OF SUCH DAMAGES. 40 * 41 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $ 42 */ 43 /* 44 * CAM Target Layer driver backend for block devices. 45 * 46 * Author: Ken Merry <ken@FreeBSD.org> 47 */ 48 #include <sys/cdefs.h> 49 __FBSDID("$FreeBSD$"); 50 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/kernel.h> 54 #include <sys/types.h> 55 #include <sys/kthread.h> 56 #include <sys/bio.h> 57 #include <sys/fcntl.h> 58 #include <sys/limits.h> 59 #include <sys/lock.h> 60 #include <sys/mutex.h> 61 #include <sys/condvar.h> 62 #include <sys/malloc.h> 63 #include <sys/conf.h> 64 #include <sys/ioccom.h> 65 #include <sys/queue.h> 66 #include <sys/sbuf.h> 67 #include <sys/endian.h> 68 #include <sys/uio.h> 69 #include <sys/buf.h> 70 #include <sys/taskqueue.h> 71 #include <sys/vnode.h> 72 #include <sys/namei.h> 73 #include <sys/mount.h> 74 #include <sys/disk.h> 75 #include <sys/fcntl.h> 76 #include <sys/filedesc.h> 77 #include <sys/filio.h> 78 #include <sys/proc.h> 79 #include <sys/pcpu.h> 80 #include <sys/module.h> 81 #include <sys/sdt.h> 82 #include <sys/devicestat.h> 83 #include <sys/sysctl.h> 84 #include <sys/nv.h> 85 #include <sys/dnv.h> 86 #include <sys/sx.h> 87 #include <sys/unistd.h> 88 89 #include <geom/geom.h> 90 91 #include <cam/cam.h> 92 #include <cam/scsi/scsi_all.h> 93 #include <cam/scsi/scsi_da.h> 94 #include <cam/ctl/ctl_io.h> 95 #include <cam/ctl/ctl.h> 96 #include <cam/ctl/ctl_backend.h> 97 #include <cam/ctl/ctl_ioctl.h> 98 #include <cam/ctl/ctl_ha.h> 99 #include <cam/ctl/ctl_scsi_all.h> 100 #include <cam/ctl/ctl_private.h> 101 #include <cam/ctl/ctl_error.h> 102 103 /* 104 * The idea here is that we'll allocate enough S/G space to hold a 1MB 105 * I/O. If we get an I/O larger than that, we'll split it. 106 */ 107 #define CTLBLK_HALF_IO_SIZE (512 * 1024) 108 #define CTLBLK_MAX_IO_SIZE (CTLBLK_HALF_IO_SIZE * 2) 109 #define CTLBLK_MIN_SEG (128 * 1024) 110 #define CTLBLK_MAX_SEG MIN(CTLBLK_HALF_IO_SIZE, maxphys) 111 #define CTLBLK_HALF_SEGS MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MIN_SEG, 1) 112 #define CTLBLK_MAX_SEGS (CTLBLK_HALF_SEGS * 2) 113 #define CTLBLK_NUM_SEGS (CTLBLK_MAX_IO_SIZE / CTLBLK_MAX_SEG) 114 115 #ifdef CTLBLK_DEBUG 116 #define DPRINTF(fmt, args...) \ 117 printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 118 #else 119 #define DPRINTF(fmt, args...) do {} while(0) 120 #endif 121 122 #define PRIV(io) \ 123 ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND]) 124 #define ARGS(io) \ 125 ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]) 126 127 SDT_PROVIDER_DEFINE(cbb); 128 129 typedef enum { 130 CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01, 131 CTL_BE_BLOCK_LUN_WAITING = 0x04, 132 } ctl_be_block_lun_flags; 133 134 typedef enum { 135 CTL_BE_BLOCK_NONE, 136 CTL_BE_BLOCK_DEV, 137 CTL_BE_BLOCK_FILE 138 } ctl_be_block_type; 139 140 struct ctl_be_block_filedata { 141 struct ucred *cred; 142 }; 143 144 union ctl_be_block_bedata { 145 struct ctl_be_block_filedata file; 146 }; 147 148 struct ctl_be_block_io; 149 struct ctl_be_block_lun; 150 151 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun, 152 struct ctl_be_block_io *beio); 153 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun, 154 const char *attrname); 155 156 /* 157 * Backend LUN structure. There is a 1:1 mapping between a block device 158 * and a backend block LUN, and between a backend block LUN and a CTL LUN. 159 */ 160 struct ctl_be_block_lun { 161 struct ctl_be_lun cbe_lun; /* Must be first element. */ 162 struct ctl_lun_create_params params; 163 char *dev_path; 164 ctl_be_block_type dev_type; 165 struct vnode *vn; 166 union ctl_be_block_bedata backend; 167 cbb_dispatch_t dispatch; 168 cbb_dispatch_t lun_flush; 169 cbb_dispatch_t unmap; 170 cbb_dispatch_t get_lba_status; 171 cbb_getattr_t getattr; 172 uint64_t size_blocks; 173 uint64_t size_bytes; 174 struct ctl_be_block_softc *softc; 175 struct devstat *disk_stats; 176 ctl_be_block_lun_flags flags; 177 SLIST_ENTRY(ctl_be_block_lun) links; 178 struct taskqueue *io_taskqueue; 179 struct task io_task; 180 int num_threads; 181 STAILQ_HEAD(, ctl_io_hdr) input_queue; 182 STAILQ_HEAD(, ctl_io_hdr) config_read_queue; 183 STAILQ_HEAD(, ctl_io_hdr) config_write_queue; 184 STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 185 struct mtx_padalign io_lock; 186 struct mtx_padalign queue_lock; 187 }; 188 189 /* 190 * Overall softc structure for the block backend module. 191 */ 192 struct ctl_be_block_softc { 193 struct sx modify_lock; 194 struct mtx lock; 195 int num_luns; 196 SLIST_HEAD(, ctl_be_block_lun) lun_list; 197 uma_zone_t beio_zone; 198 uma_zone_t bufmin_zone; 199 uma_zone_t bufmax_zone; 200 }; 201 202 static struct ctl_be_block_softc backend_block_softc; 203 204 /* 205 * Per-I/O information. 206 */ 207 struct ctl_be_block_io { 208 union ctl_io *io; 209 struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS]; 210 struct iovec xiovecs[CTLBLK_MAX_SEGS]; 211 int refcnt; 212 int bio_cmd; 213 int two_sglists; 214 int num_segs; 215 int num_bios_sent; 216 int num_bios_done; 217 int send_complete; 218 int first_error; 219 uint64_t first_error_offset; 220 struct bintime ds_t0; 221 devstat_tag_type ds_tag_type; 222 devstat_trans_flags ds_trans_type; 223 uint64_t io_len; 224 uint64_t io_offset; 225 int io_arg; 226 struct ctl_be_block_softc *softc; 227 struct ctl_be_block_lun *lun; 228 void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */ 229 }; 230 231 extern struct ctl_softc *control_softc; 232 233 static int cbb_num_threads = 32; 234 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 235 "CAM Target Layer Block Backend"); 236 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN, 237 &cbb_num_threads, 0, "Number of threads per backing file"); 238 239 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc); 240 static void ctl_free_beio(struct ctl_be_block_io *beio); 241 static void ctl_complete_beio(struct ctl_be_block_io *beio); 242 static int ctl_be_block_move_done(union ctl_io *io, bool samethr); 243 static void ctl_be_block_biodone(struct bio *bio); 244 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 245 struct ctl_be_block_io *beio); 246 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 247 struct ctl_be_block_io *beio); 248 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 249 struct ctl_be_block_io *beio); 250 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, 251 const char *attrname); 252 static void ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun, 253 struct ctl_be_block_io *beio); 254 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 255 struct ctl_be_block_io *beio); 256 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 257 struct ctl_be_block_io *beio); 258 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 259 struct ctl_be_block_io *beio); 260 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, 261 const char *attrname); 262 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 263 union ctl_io *io); 264 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 265 union ctl_io *io); 266 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 267 union ctl_io *io); 268 static void ctl_be_block_worker(void *context, int pending); 269 static int ctl_be_block_submit(union ctl_io *io); 270 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 271 int flag, struct thread *td); 272 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, 273 struct ctl_lun_req *req); 274 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, 275 struct ctl_lun_req *req); 276 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun); 277 static int ctl_be_block_open(struct ctl_be_block_lun *be_lun, 278 struct ctl_lun_req *req); 279 static int ctl_be_block_create(struct ctl_be_block_softc *softc, 280 struct ctl_lun_req *req); 281 static int ctl_be_block_rm(struct ctl_be_block_softc *softc, 282 struct ctl_lun_req *req); 283 static int ctl_be_block_modify(struct ctl_be_block_softc *softc, 284 struct ctl_lun_req *req); 285 static void ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun); 286 static int ctl_be_block_config_write(union ctl_io *io); 287 static int ctl_be_block_config_read(union ctl_io *io); 288 static int ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb); 289 static uint64_t ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname); 290 static int ctl_be_block_init(void); 291 static int ctl_be_block_shutdown(void); 292 293 static struct ctl_backend_driver ctl_be_block_driver = 294 { 295 .name = "block", 296 .flags = CTL_BE_FLAG_HAS_CONFIG, 297 .init = ctl_be_block_init, 298 .shutdown = ctl_be_block_shutdown, 299 .data_submit = ctl_be_block_submit, 300 .config_read = ctl_be_block_config_read, 301 .config_write = ctl_be_block_config_write, 302 .ioctl = ctl_be_block_ioctl, 303 .lun_info = ctl_be_block_lun_info, 304 .lun_attr = ctl_be_block_lun_attr 305 }; 306 307 MALLOC_DEFINE(M_CTLBLK, "ctlblock", "Memory used for CTL block backend"); 308 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver); 309 310 static void 311 ctl_alloc_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg, 312 size_t len) 313 { 314 315 if (len <= CTLBLK_MIN_SEG) { 316 sg->addr = uma_zalloc(softc->bufmin_zone, M_WAITOK); 317 } else { 318 KASSERT(len <= CTLBLK_MAX_SEG, 319 ("Too large alloc %zu > %lu", len, CTLBLK_MAX_SEG)); 320 sg->addr = uma_zalloc(softc->bufmax_zone, M_WAITOK); 321 } 322 sg->len = len; 323 } 324 325 static void 326 ctl_free_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg) 327 { 328 329 if (sg->len <= CTLBLK_MIN_SEG) { 330 uma_zfree(softc->bufmin_zone, sg->addr); 331 } else { 332 KASSERT(sg->len <= CTLBLK_MAX_SEG, 333 ("Too large free %zu > %lu", sg->len, CTLBLK_MAX_SEG)); 334 uma_zfree(softc->bufmax_zone, sg->addr); 335 } 336 } 337 338 static struct ctl_be_block_io * 339 ctl_alloc_beio(struct ctl_be_block_softc *softc) 340 { 341 struct ctl_be_block_io *beio; 342 343 beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO); 344 beio->softc = softc; 345 beio->refcnt = 1; 346 return (beio); 347 } 348 349 static void 350 ctl_real_free_beio(struct ctl_be_block_io *beio) 351 { 352 struct ctl_be_block_softc *softc = beio->softc; 353 int i; 354 355 for (i = 0; i < beio->num_segs; i++) { 356 ctl_free_seg(softc, &beio->sg_segs[i]); 357 358 /* For compare we had two equal S/G lists. */ 359 if (beio->two_sglists) { 360 ctl_free_seg(softc, 361 &beio->sg_segs[i + CTLBLK_HALF_SEGS]); 362 } 363 } 364 365 uma_zfree(softc->beio_zone, beio); 366 } 367 368 static void 369 ctl_refcnt_beio(void *arg, int diff) 370 { 371 struct ctl_be_block_io *beio = arg; 372 373 if (atomic_fetchadd_int(&beio->refcnt, diff) + diff == 0) 374 ctl_real_free_beio(beio); 375 } 376 377 static void 378 ctl_free_beio(struct ctl_be_block_io *beio) 379 { 380 381 ctl_refcnt_beio(beio, -1); 382 } 383 384 static void 385 ctl_complete_beio(struct ctl_be_block_io *beio) 386 { 387 union ctl_io *io = beio->io; 388 389 if (beio->beio_cont != NULL) { 390 beio->beio_cont(beio); 391 } else { 392 ctl_free_beio(beio); 393 ctl_data_submit_done(io); 394 } 395 } 396 397 static size_t 398 cmp(uint8_t *a, uint8_t *b, size_t size) 399 { 400 size_t i; 401 402 for (i = 0; i < size; i++) { 403 if (a[i] != b[i]) 404 break; 405 } 406 return (i); 407 } 408 409 static void 410 ctl_be_block_compare(union ctl_io *io) 411 { 412 struct ctl_be_block_io *beio; 413 uint64_t off, res; 414 int i; 415 uint8_t info[8]; 416 417 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 418 off = 0; 419 for (i = 0; i < beio->num_segs; i++) { 420 res = cmp(beio->sg_segs[i].addr, 421 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr, 422 beio->sg_segs[i].len); 423 off += res; 424 if (res < beio->sg_segs[i].len) 425 break; 426 } 427 if (i < beio->num_segs) { 428 scsi_u64to8b(off, info); 429 ctl_set_sense(&io->scsiio, /*current_error*/ 1, 430 /*sense_key*/ SSD_KEY_MISCOMPARE, 431 /*asc*/ 0x1D, /*ascq*/ 0x00, 432 /*type*/ SSD_ELEM_INFO, 433 /*size*/ sizeof(info), /*data*/ &info, 434 /*type*/ SSD_ELEM_NONE); 435 } else 436 ctl_set_success(&io->scsiio); 437 } 438 439 static int 440 ctl_be_block_move_done(union ctl_io *io, bool samethr) 441 { 442 struct ctl_be_block_io *beio; 443 struct ctl_be_block_lun *be_lun; 444 struct ctl_lba_len_flags *lbalen; 445 446 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 447 448 DPRINTF("entered\n"); 449 io->scsiio.kern_rel_offset += io->scsiio.kern_data_len; 450 451 /* 452 * We set status at this point for read and compare commands. 453 */ 454 if ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0 && 455 (io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE) { 456 lbalen = ARGS(io); 457 if (lbalen->flags & CTL_LLF_READ) { 458 ctl_set_success(&io->scsiio); 459 } else if (lbalen->flags & CTL_LLF_COMPARE) { 460 /* We have two data blocks ready for comparison. */ 461 ctl_be_block_compare(io); 462 } 463 } 464 465 /* 466 * If this is a read, or a write with errors, it is done. 467 */ 468 if ((beio->bio_cmd == BIO_READ) 469 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0) 470 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) { 471 ctl_complete_beio(beio); 472 return (0); 473 } 474 475 /* 476 * At this point, we have a write and the DMA completed successfully. 477 * If we were called synchronously in the original thread then just 478 * dispatch, otherwise we now have to queue it to the task queue to 479 * execute the backend I/O. That is because we do blocking 480 * memory allocations, and in the file backing case, blocking I/O. 481 * This move done routine is generally called in the SIM's 482 * interrupt context, and therefore we cannot block. 483 */ 484 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 485 if (samethr) { 486 be_lun->dispatch(be_lun, beio); 487 } else { 488 mtx_lock(&be_lun->queue_lock); 489 STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links); 490 mtx_unlock(&be_lun->queue_lock); 491 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 492 } 493 return (0); 494 } 495 496 static void 497 ctl_be_block_biodone(struct bio *bio) 498 { 499 struct ctl_be_block_io *beio = bio->bio_caller1; 500 struct ctl_be_block_lun *be_lun = beio->lun; 501 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 502 union ctl_io *io; 503 int error; 504 505 io = beio->io; 506 507 DPRINTF("entered\n"); 508 509 error = bio->bio_error; 510 mtx_lock(&be_lun->io_lock); 511 if (error != 0 && 512 (beio->first_error == 0 || 513 bio->bio_offset < beio->first_error_offset)) { 514 beio->first_error = error; 515 beio->first_error_offset = bio->bio_offset; 516 } 517 518 beio->num_bios_done++; 519 520 /* 521 * XXX KDM will this cause WITNESS to complain? Holding a lock 522 * during the free might cause it to complain. 523 */ 524 g_destroy_bio(bio); 525 526 /* 527 * If the send complete bit isn't set, or we aren't the last I/O to 528 * complete, then we're done. 529 */ 530 if ((beio->send_complete == 0) 531 || (beio->num_bios_done < beio->num_bios_sent)) { 532 mtx_unlock(&be_lun->io_lock); 533 return; 534 } 535 536 /* 537 * At this point, we've verified that we are the last I/O to 538 * complete, so it's safe to drop the lock. 539 */ 540 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 541 beio->ds_tag_type, beio->ds_trans_type, 542 /*now*/ NULL, /*then*/&beio->ds_t0); 543 mtx_unlock(&be_lun->io_lock); 544 545 /* 546 * If there are any errors from the backing device, we fail the 547 * entire I/O with a medium error. 548 */ 549 error = beio->first_error; 550 if (error != 0) { 551 if (error == EOPNOTSUPP) { 552 ctl_set_invalid_opcode(&io->scsiio); 553 } else if (error == ENOSPC || error == EDQUOT) { 554 ctl_set_space_alloc_fail(&io->scsiio); 555 } else if (error == EROFS || error == EACCES) { 556 ctl_set_hw_write_protected(&io->scsiio); 557 } else if (beio->bio_cmd == BIO_FLUSH) { 558 /* XXX KDM is there is a better error here? */ 559 ctl_set_internal_failure(&io->scsiio, 560 /*sks_valid*/ 1, 561 /*retry_count*/ 0xbad2); 562 } else { 563 ctl_set_medium_error(&io->scsiio, 564 beio->bio_cmd == BIO_READ); 565 } 566 ctl_complete_beio(beio); 567 return; 568 } 569 570 /* 571 * If this is a write, a flush, a delete or verify, we're all done. 572 * If this is a read, we can now send the data to the user. 573 */ 574 if ((beio->bio_cmd == BIO_WRITE) 575 || (beio->bio_cmd == BIO_FLUSH) 576 || (beio->bio_cmd == BIO_DELETE) 577 || (ARGS(io)->flags & CTL_LLF_VERIFY)) { 578 ctl_set_success(&io->scsiio); 579 ctl_complete_beio(beio); 580 } else { 581 if ((ARGS(io)->flags & CTL_LLF_READ) && 582 beio->beio_cont == NULL) { 583 ctl_set_success(&io->scsiio); 584 if (cbe_lun->serseq >= CTL_LUN_SERSEQ_SOFT) 585 ctl_serseq_done(io); 586 } 587 ctl_datamove(io); 588 } 589 } 590 591 static void 592 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 593 struct ctl_be_block_io *beio) 594 { 595 union ctl_io *io = beio->io; 596 struct mount *mountpoint; 597 int error; 598 599 DPRINTF("entered\n"); 600 601 binuptime(&beio->ds_t0); 602 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 603 604 (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 605 606 vn_lock(be_lun->vn, vn_lktype_write(mountpoint, be_lun->vn) | 607 LK_RETRY); 608 error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT, 609 curthread); 610 VOP_UNLOCK(be_lun->vn); 611 612 vn_finished_write(mountpoint); 613 614 mtx_lock(&be_lun->io_lock); 615 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 616 beio->ds_tag_type, beio->ds_trans_type, 617 /*now*/ NULL, /*then*/&beio->ds_t0); 618 mtx_unlock(&be_lun->io_lock); 619 620 if (error == 0) 621 ctl_set_success(&io->scsiio); 622 else { 623 /* XXX KDM is there is a better error here? */ 624 ctl_set_internal_failure(&io->scsiio, 625 /*sks_valid*/ 1, 626 /*retry_count*/ 0xbad1); 627 } 628 629 ctl_complete_beio(beio); 630 } 631 632 SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t"); 633 SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t"); 634 SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t"); 635 SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t"); 636 637 static void 638 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 639 struct ctl_be_block_io *beio) 640 { 641 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 642 struct ctl_be_block_filedata *file_data; 643 union ctl_io *io; 644 struct uio xuio; 645 struct iovec *xiovec; 646 size_t s; 647 int error, flags, i; 648 649 DPRINTF("entered\n"); 650 651 file_data = &be_lun->backend.file; 652 io = beio->io; 653 flags = 0; 654 if (ARGS(io)->flags & CTL_LLF_DPO) 655 flags |= IO_DIRECT; 656 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 657 flags |= IO_SYNC; 658 659 bzero(&xuio, sizeof(xuio)); 660 if (beio->bio_cmd == BIO_READ) { 661 SDT_PROBE0(cbb, , read, file_start); 662 xuio.uio_rw = UIO_READ; 663 } else { 664 SDT_PROBE0(cbb, , write, file_start); 665 xuio.uio_rw = UIO_WRITE; 666 } 667 xuio.uio_offset = beio->io_offset; 668 xuio.uio_resid = beio->io_len; 669 xuio.uio_segflg = UIO_SYSSPACE; 670 xuio.uio_iov = beio->xiovecs; 671 xuio.uio_iovcnt = beio->num_segs; 672 xuio.uio_td = curthread; 673 674 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 675 xiovec->iov_base = beio->sg_segs[i].addr; 676 xiovec->iov_len = beio->sg_segs[i].len; 677 } 678 679 binuptime(&beio->ds_t0); 680 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 681 682 if (beio->bio_cmd == BIO_READ) { 683 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 684 685 if (beio->beio_cont == NULL && 686 cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT) 687 ctl_serseq_done(io); 688 /* 689 * UFS pays attention to IO_DIRECT for reads. If the 690 * DIRECTIO option is configured into the kernel, it calls 691 * ffs_rawread(). But that only works for single-segment 692 * uios with user space addresses. In our case, with a 693 * kernel uio, it still reads into the buffer cache, but it 694 * will just try to release the buffer from the cache later 695 * on in ffs_read(). 696 * 697 * ZFS does not pay attention to IO_DIRECT for reads. 698 * 699 * UFS does not pay attention to IO_SYNC for reads. 700 * 701 * ZFS pays attention to IO_SYNC (which translates into the 702 * Solaris define FRSYNC for zfs_read()) for reads. It 703 * attempts to sync the file before reading. 704 */ 705 error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred); 706 707 VOP_UNLOCK(be_lun->vn); 708 SDT_PROBE0(cbb, , read, file_done); 709 if (error == 0 && xuio.uio_resid > 0) { 710 /* 711 * If we red less then requested (EOF), then 712 * we should clean the rest of the buffer. 713 */ 714 s = beio->io_len - xuio.uio_resid; 715 for (i = 0; i < beio->num_segs; i++) { 716 if (s >= beio->sg_segs[i].len) { 717 s -= beio->sg_segs[i].len; 718 continue; 719 } 720 bzero((uint8_t *)beio->sg_segs[i].addr + s, 721 beio->sg_segs[i].len - s); 722 s = 0; 723 } 724 } 725 } else { 726 struct mount *mountpoint; 727 728 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 729 vn_lock(be_lun->vn, vn_lktype_write(mountpoint, 730 be_lun->vn) | LK_RETRY); 731 732 /* 733 * UFS pays attention to IO_DIRECT for writes. The write 734 * is done asynchronously. (Normally the write would just 735 * get put into cache. 736 * 737 * UFS pays attention to IO_SYNC for writes. It will 738 * attempt to write the buffer out synchronously if that 739 * flag is set. 740 * 741 * ZFS does not pay attention to IO_DIRECT for writes. 742 * 743 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) 744 * for writes. It will flush the transaction from the 745 * cache before returning. 746 */ 747 error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred); 748 VOP_UNLOCK(be_lun->vn); 749 750 vn_finished_write(mountpoint); 751 SDT_PROBE0(cbb, , write, file_done); 752 } 753 754 mtx_lock(&be_lun->io_lock); 755 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 756 beio->ds_tag_type, beio->ds_trans_type, 757 /*now*/ NULL, /*then*/&beio->ds_t0); 758 mtx_unlock(&be_lun->io_lock); 759 760 /* 761 * If we got an error, set the sense data to "MEDIUM ERROR" and 762 * return the I/O to the user. 763 */ 764 if (error != 0) { 765 if (error == ENOSPC || error == EDQUOT) { 766 ctl_set_space_alloc_fail(&io->scsiio); 767 } else if (error == EROFS || error == EACCES) { 768 ctl_set_hw_write_protected(&io->scsiio); 769 } else { 770 ctl_set_medium_error(&io->scsiio, 771 beio->bio_cmd == BIO_READ); 772 } 773 ctl_complete_beio(beio); 774 return; 775 } 776 777 /* 778 * If this is a write or a verify, we're all done. 779 * If this is a read, we can now send the data to the user. 780 */ 781 if ((beio->bio_cmd == BIO_WRITE) || 782 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 783 ctl_set_success(&io->scsiio); 784 ctl_complete_beio(beio); 785 } else { 786 if ((ARGS(io)->flags & CTL_LLF_READ) && 787 beio->beio_cont == NULL) { 788 ctl_set_success(&io->scsiio); 789 if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT) 790 ctl_serseq_done(io); 791 } 792 ctl_datamove(io); 793 } 794 } 795 796 static void 797 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 798 struct ctl_be_block_io *beio) 799 { 800 union ctl_io *io = beio->io; 801 struct ctl_lba_len_flags *lbalen = ARGS(io); 802 struct scsi_get_lba_status_data *data; 803 off_t roff, off; 804 int error, status; 805 806 DPRINTF("entered\n"); 807 808 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 809 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 810 error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off, 811 0, curthread->td_ucred, curthread); 812 if (error == 0 && off > roff) 813 status = 0; /* mapped up to off */ 814 else { 815 error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off, 816 0, curthread->td_ucred, curthread); 817 if (error == 0 && off > roff) 818 status = 1; /* deallocated up to off */ 819 else { 820 status = 0; /* unknown up to the end */ 821 off = be_lun->size_bytes; 822 } 823 } 824 VOP_UNLOCK(be_lun->vn); 825 826 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 827 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 828 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 829 lbalen->lba), data->descr[0].length); 830 data->descr[0].status = status; 831 832 ctl_complete_beio(beio); 833 } 834 835 static uint64_t 836 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname) 837 { 838 struct vattr vattr; 839 struct statfs statfs; 840 uint64_t val; 841 int error; 842 843 val = UINT64_MAX; 844 if (be_lun->vn == NULL) 845 return (val); 846 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 847 if (strcmp(attrname, "blocksused") == 0) { 848 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 849 if (error == 0) 850 val = vattr.va_bytes / be_lun->cbe_lun.blocksize; 851 } 852 if (strcmp(attrname, "blocksavail") == 0 && 853 !VN_IS_DOOMED(be_lun->vn)) { 854 error = VFS_STATFS(be_lun->vn->v_mount, &statfs); 855 if (error == 0) 856 val = statfs.f_bavail * statfs.f_bsize / 857 be_lun->cbe_lun.blocksize; 858 } 859 VOP_UNLOCK(be_lun->vn); 860 return (val); 861 } 862 863 static void 864 ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun, 865 struct ctl_be_block_io *beio) 866 { 867 struct ctl_be_block_filedata *file_data; 868 union ctl_io *io; 869 struct ctl_ptr_len_flags *ptrlen; 870 struct scsi_unmap_desc *buf, *end; 871 struct mount *mp; 872 off_t off, len; 873 int error; 874 875 io = beio->io; 876 file_data = &be_lun->backend.file; 877 mp = NULL; 878 error = 0; 879 880 binuptime(&beio->ds_t0); 881 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 882 883 (void)vn_start_write(be_lun->vn, &mp, V_WAIT); 884 vn_lock(be_lun->vn, vn_lktype_write(mp, be_lun->vn) | LK_RETRY); 885 if (beio->io_offset == -1) { 886 beio->io_len = 0; 887 ptrlen = (struct ctl_ptr_len_flags *) 888 &io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 889 buf = (struct scsi_unmap_desc *)ptrlen->ptr; 890 end = buf + ptrlen->len / sizeof(*buf); 891 for (; buf < end; buf++) { 892 off = (off_t)scsi_8btou64(buf->lba) * 893 be_lun->cbe_lun.blocksize; 894 len = (off_t)scsi_4btoul(buf->length) * 895 be_lun->cbe_lun.blocksize; 896 beio->io_len += len; 897 error = vn_deallocate(be_lun->vn, &off, &len, 898 0, IO_NOMACCHECK | IO_NODELOCKED, file_data->cred, 899 NOCRED); 900 if (error != 0) 901 break; 902 } 903 } else { 904 /* WRITE_SAME */ 905 off = beio->io_offset; 906 len = beio->io_len; 907 error = vn_deallocate(be_lun->vn, &off, &len, 0, 908 IO_NOMACCHECK | IO_NODELOCKED, file_data->cred, NOCRED); 909 } 910 VOP_UNLOCK(be_lun->vn); 911 vn_finished_write(mp); 912 913 mtx_lock(&be_lun->io_lock); 914 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 915 beio->ds_tag_type, beio->ds_trans_type, 916 /*now*/ NULL, /*then*/&beio->ds_t0); 917 mtx_unlock(&be_lun->io_lock); 918 919 /* 920 * If we got an error, set the sense data to "MEDIUM ERROR" and 921 * return the I/O to the user. 922 */ 923 switch (error) { 924 case 0: 925 ctl_set_success(&io->scsiio); 926 break; 927 case ENOSPC: 928 case EDQUOT: 929 ctl_set_space_alloc_fail(&io->scsiio); 930 break; 931 case EROFS: 932 case EACCES: 933 ctl_set_hw_write_protected(&io->scsiio); 934 break; 935 default: 936 ctl_set_medium_error(&io->scsiio, false); 937 } 938 ctl_complete_beio(beio); 939 } 940 941 static void 942 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun, 943 struct ctl_be_block_io *beio) 944 { 945 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 946 union ctl_io *io; 947 struct cdevsw *csw; 948 struct cdev *dev; 949 struct uio xuio; 950 struct iovec *xiovec; 951 int error, flags, i, ref; 952 953 DPRINTF("entered\n"); 954 955 io = beio->io; 956 flags = 0; 957 if (ARGS(io)->flags & CTL_LLF_DPO) 958 flags |= IO_DIRECT; 959 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 960 flags |= IO_SYNC; 961 962 bzero(&xuio, sizeof(xuio)); 963 if (beio->bio_cmd == BIO_READ) { 964 SDT_PROBE0(cbb, , read, file_start); 965 xuio.uio_rw = UIO_READ; 966 } else { 967 SDT_PROBE0(cbb, , write, file_start); 968 xuio.uio_rw = UIO_WRITE; 969 } 970 xuio.uio_offset = beio->io_offset; 971 xuio.uio_resid = beio->io_len; 972 xuio.uio_segflg = UIO_SYSSPACE; 973 xuio.uio_iov = beio->xiovecs; 974 xuio.uio_iovcnt = beio->num_segs; 975 xuio.uio_td = curthread; 976 977 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 978 xiovec->iov_base = beio->sg_segs[i].addr; 979 xiovec->iov_len = beio->sg_segs[i].len; 980 } 981 982 binuptime(&beio->ds_t0); 983 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 984 985 csw = devvn_refthread(be_lun->vn, &dev, &ref); 986 if (csw) { 987 if (beio->bio_cmd == BIO_READ) { 988 if (beio->beio_cont == NULL && 989 cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT) 990 ctl_serseq_done(io); 991 error = csw->d_read(dev, &xuio, flags); 992 } else 993 error = csw->d_write(dev, &xuio, flags); 994 dev_relthread(dev, ref); 995 } else 996 error = ENXIO; 997 998 if (beio->bio_cmd == BIO_READ) 999 SDT_PROBE0(cbb, , read, file_done); 1000 else 1001 SDT_PROBE0(cbb, , write, file_done); 1002 1003 mtx_lock(&be_lun->io_lock); 1004 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 1005 beio->ds_tag_type, beio->ds_trans_type, 1006 /*now*/ NULL, /*then*/&beio->ds_t0); 1007 mtx_unlock(&be_lun->io_lock); 1008 1009 /* 1010 * If we got an error, set the sense data to "MEDIUM ERROR" and 1011 * return the I/O to the user. 1012 */ 1013 if (error != 0) { 1014 if (error == ENOSPC || error == EDQUOT) { 1015 ctl_set_space_alloc_fail(&io->scsiio); 1016 } else if (error == EROFS || error == EACCES) { 1017 ctl_set_hw_write_protected(&io->scsiio); 1018 } else { 1019 ctl_set_medium_error(&io->scsiio, 1020 beio->bio_cmd == BIO_READ); 1021 } 1022 ctl_complete_beio(beio); 1023 return; 1024 } 1025 1026 /* 1027 * If this is a write or a verify, we're all done. 1028 * If this is a read, we can now send the data to the user. 1029 */ 1030 if ((beio->bio_cmd == BIO_WRITE) || 1031 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 1032 ctl_set_success(&io->scsiio); 1033 ctl_complete_beio(beio); 1034 } else { 1035 if ((ARGS(io)->flags & CTL_LLF_READ) && 1036 beio->beio_cont == NULL) { 1037 ctl_set_success(&io->scsiio); 1038 if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT) 1039 ctl_serseq_done(io); 1040 } 1041 ctl_datamove(io); 1042 } 1043 } 1044 1045 static void 1046 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun, 1047 struct ctl_be_block_io *beio) 1048 { 1049 union ctl_io *io = beio->io; 1050 struct cdevsw *csw; 1051 struct cdev *dev; 1052 struct ctl_lba_len_flags *lbalen = ARGS(io); 1053 struct scsi_get_lba_status_data *data; 1054 off_t roff, off; 1055 int error, ref, status; 1056 1057 DPRINTF("entered\n"); 1058 1059 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1060 if (csw == NULL) { 1061 status = 0; /* unknown up to the end */ 1062 off = be_lun->size_bytes; 1063 goto done; 1064 } 1065 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 1066 error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD, 1067 curthread); 1068 if (error == 0 && off > roff) 1069 status = 0; /* mapped up to off */ 1070 else { 1071 error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD, 1072 curthread); 1073 if (error == 0 && off > roff) 1074 status = 1; /* deallocated up to off */ 1075 else { 1076 status = 0; /* unknown up to the end */ 1077 off = be_lun->size_bytes; 1078 } 1079 } 1080 dev_relthread(dev, ref); 1081 1082 done: 1083 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 1084 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 1085 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 1086 lbalen->lba), data->descr[0].length); 1087 data->descr[0].status = status; 1088 1089 ctl_complete_beio(beio); 1090 } 1091 1092 static void 1093 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 1094 struct ctl_be_block_io *beio) 1095 { 1096 struct bio *bio; 1097 struct cdevsw *csw; 1098 struct cdev *dev; 1099 int ref; 1100 1101 DPRINTF("entered\n"); 1102 1103 /* This can't fail, it's a blocking allocation. */ 1104 bio = g_alloc_bio(); 1105 1106 bio->bio_cmd = BIO_FLUSH; 1107 bio->bio_offset = 0; 1108 bio->bio_data = 0; 1109 bio->bio_done = ctl_be_block_biodone; 1110 bio->bio_caller1 = beio; 1111 bio->bio_pblkno = 0; 1112 1113 /* 1114 * We don't need to acquire the LUN lock here, because we are only 1115 * sending one bio, and so there is no other context to synchronize 1116 * with. 1117 */ 1118 beio->num_bios_sent = 1; 1119 beio->send_complete = 1; 1120 1121 binuptime(&beio->ds_t0); 1122 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1123 1124 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1125 if (csw) { 1126 bio->bio_dev = dev; 1127 csw->d_strategy(bio); 1128 dev_relthread(dev, ref); 1129 } else { 1130 bio->bio_error = ENXIO; 1131 ctl_be_block_biodone(bio); 1132 } 1133 } 1134 1135 static void 1136 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun, 1137 struct ctl_be_block_io *beio, 1138 uint64_t off, uint64_t len, int last) 1139 { 1140 struct bio *bio; 1141 uint64_t maxlen; 1142 struct cdevsw *csw; 1143 struct cdev *dev; 1144 int ref; 1145 1146 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1147 maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize); 1148 while (len > 0) { 1149 bio = g_alloc_bio(); 1150 bio->bio_cmd = BIO_DELETE; 1151 bio->bio_dev = dev; 1152 bio->bio_offset = off; 1153 bio->bio_length = MIN(len, maxlen); 1154 bio->bio_data = 0; 1155 bio->bio_done = ctl_be_block_biodone; 1156 bio->bio_caller1 = beio; 1157 bio->bio_pblkno = off / be_lun->cbe_lun.blocksize; 1158 1159 off += bio->bio_length; 1160 len -= bio->bio_length; 1161 1162 mtx_lock(&be_lun->io_lock); 1163 beio->num_bios_sent++; 1164 if (last && len == 0) 1165 beio->send_complete = 1; 1166 mtx_unlock(&be_lun->io_lock); 1167 1168 if (csw) { 1169 csw->d_strategy(bio); 1170 } else { 1171 bio->bio_error = ENXIO; 1172 ctl_be_block_biodone(bio); 1173 } 1174 } 1175 if (csw) 1176 dev_relthread(dev, ref); 1177 } 1178 1179 static void 1180 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 1181 struct ctl_be_block_io *beio) 1182 { 1183 union ctl_io *io; 1184 struct ctl_ptr_len_flags *ptrlen; 1185 struct scsi_unmap_desc *buf, *end; 1186 uint64_t len; 1187 1188 io = beio->io; 1189 1190 DPRINTF("entered\n"); 1191 1192 binuptime(&beio->ds_t0); 1193 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1194 1195 if (beio->io_offset == -1) { 1196 beio->io_len = 0; 1197 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1198 buf = (struct scsi_unmap_desc *)ptrlen->ptr; 1199 end = buf + ptrlen->len / sizeof(*buf); 1200 for (; buf < end; buf++) { 1201 len = (uint64_t)scsi_4btoul(buf->length) * 1202 be_lun->cbe_lun.blocksize; 1203 beio->io_len += len; 1204 ctl_be_block_unmap_dev_range(be_lun, beio, 1205 scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize, 1206 len, (end - buf < 2) ? TRUE : FALSE); 1207 } 1208 } else 1209 ctl_be_block_unmap_dev_range(be_lun, beio, 1210 beio->io_offset, beio->io_len, TRUE); 1211 } 1212 1213 static void 1214 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 1215 struct ctl_be_block_io *beio) 1216 { 1217 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 1218 struct bio *bio; 1219 struct cdevsw *csw; 1220 struct cdev *dev; 1221 off_t cur_offset; 1222 int i, max_iosize, ref; 1223 1224 DPRINTF("entered\n"); 1225 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1226 1227 /* 1228 * We have to limit our I/O size to the maximum supported by the 1229 * backend device. 1230 */ 1231 if (csw) { 1232 max_iosize = dev->si_iosize_max; 1233 if (max_iosize < PAGE_SIZE) 1234 max_iosize = DFLTPHYS; 1235 } else 1236 max_iosize = DFLTPHYS; 1237 1238 cur_offset = beio->io_offset; 1239 for (i = 0; i < beio->num_segs; i++) { 1240 size_t cur_size; 1241 uint8_t *cur_ptr; 1242 1243 cur_size = beio->sg_segs[i].len; 1244 cur_ptr = beio->sg_segs[i].addr; 1245 1246 while (cur_size > 0) { 1247 /* This can't fail, it's a blocking allocation. */ 1248 bio = g_alloc_bio(); 1249 1250 KASSERT(bio != NULL, ("g_alloc_bio() failed!\n")); 1251 1252 bio->bio_cmd = beio->bio_cmd; 1253 bio->bio_dev = dev; 1254 bio->bio_caller1 = beio; 1255 bio->bio_length = min(cur_size, max_iosize); 1256 bio->bio_offset = cur_offset; 1257 bio->bio_data = cur_ptr; 1258 bio->bio_done = ctl_be_block_biodone; 1259 bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize; 1260 1261 cur_offset += bio->bio_length; 1262 cur_ptr += bio->bio_length; 1263 cur_size -= bio->bio_length; 1264 1265 TAILQ_INSERT_TAIL(&queue, bio, bio_queue); 1266 beio->num_bios_sent++; 1267 } 1268 } 1269 beio->send_complete = 1; 1270 binuptime(&beio->ds_t0); 1271 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1272 1273 /* 1274 * Fire off all allocated requests! 1275 */ 1276 while ((bio = TAILQ_FIRST(&queue)) != NULL) { 1277 TAILQ_REMOVE(&queue, bio, bio_queue); 1278 if (csw) 1279 csw->d_strategy(bio); 1280 else { 1281 bio->bio_error = ENXIO; 1282 ctl_be_block_biodone(bio); 1283 } 1284 } 1285 if (csw) 1286 dev_relthread(dev, ref); 1287 } 1288 1289 static uint64_t 1290 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname) 1291 { 1292 struct diocgattr_arg arg; 1293 struct cdevsw *csw; 1294 struct cdev *dev; 1295 int error, ref; 1296 1297 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1298 if (csw == NULL) 1299 return (UINT64_MAX); 1300 strlcpy(arg.name, attrname, sizeof(arg.name)); 1301 arg.len = sizeof(arg.value.off); 1302 if (csw->d_ioctl) { 1303 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 1304 curthread); 1305 } else 1306 error = ENODEV; 1307 dev_relthread(dev, ref); 1308 if (error != 0) 1309 return (UINT64_MAX); 1310 return (arg.value.off); 1311 } 1312 1313 static void 1314 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun, 1315 union ctl_io *io) 1316 { 1317 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1318 struct ctl_be_block_io *beio; 1319 struct ctl_lba_len_flags *lbalen; 1320 1321 DPRINTF("entered\n"); 1322 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1323 lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1324 1325 beio->io_len = lbalen->len * cbe_lun->blocksize; 1326 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1327 beio->io_arg = (lbalen->flags & SSC_IMMED) != 0; 1328 beio->bio_cmd = BIO_FLUSH; 1329 beio->ds_trans_type = DEVSTAT_NO_DATA; 1330 DPRINTF("SYNC\n"); 1331 be_lun->lun_flush(be_lun, beio); 1332 } 1333 1334 static void 1335 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio) 1336 { 1337 union ctl_io *io; 1338 1339 io = beio->io; 1340 ctl_free_beio(beio); 1341 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1342 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1343 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1344 ctl_config_write_done(io); 1345 return; 1346 } 1347 1348 ctl_be_block_config_write(io); 1349 } 1350 1351 static void 1352 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun, 1353 union ctl_io *io) 1354 { 1355 struct ctl_be_block_softc *softc = be_lun->softc; 1356 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1357 struct ctl_be_block_io *beio; 1358 struct ctl_lba_len_flags *lbalen; 1359 uint64_t len_left, lba; 1360 uint32_t pb, pbo, adj; 1361 int i, seglen; 1362 uint8_t *buf, *end; 1363 1364 DPRINTF("entered\n"); 1365 1366 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1367 lbalen = ARGS(io); 1368 1369 if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) || 1370 (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) { 1371 ctl_free_beio(beio); 1372 ctl_set_invalid_field(&io->scsiio, 1373 /*sks_valid*/ 1, 1374 /*command*/ 1, 1375 /*field*/ 1, 1376 /*bit_valid*/ 0, 1377 /*bit*/ 0); 1378 ctl_config_write_done(io); 1379 return; 1380 } 1381 1382 if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) { 1383 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1384 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize; 1385 beio->bio_cmd = BIO_DELETE; 1386 beio->ds_trans_type = DEVSTAT_FREE; 1387 1388 be_lun->unmap(be_lun, beio); 1389 return; 1390 } 1391 1392 beio->bio_cmd = BIO_WRITE; 1393 beio->ds_trans_type = DEVSTAT_WRITE; 1394 1395 DPRINTF("WRITE SAME at LBA %jx len %u\n", 1396 (uintmax_t)lbalen->lba, lbalen->len); 1397 1398 pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp; 1399 if (be_lun->cbe_lun.pblockoff > 0) 1400 pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff; 1401 else 1402 pbo = 0; 1403 len_left = (uint64_t)lbalen->len * cbe_lun->blocksize; 1404 for (i = 0, lba = 0; i < CTLBLK_NUM_SEGS && len_left > 0; i++) { 1405 /* 1406 * Setup the S/G entry for this chunk. 1407 */ 1408 seglen = MIN(CTLBLK_MAX_SEG, len_left); 1409 if (pb > cbe_lun->blocksize) { 1410 adj = ((lbalen->lba + lba) * cbe_lun->blocksize + 1411 seglen - pbo) % pb; 1412 if (seglen > adj) 1413 seglen -= adj; 1414 else 1415 seglen -= seglen % cbe_lun->blocksize; 1416 } else 1417 seglen -= seglen % cbe_lun->blocksize; 1418 ctl_alloc_seg(softc, &beio->sg_segs[i], seglen); 1419 1420 DPRINTF("segment %d addr %p len %zd\n", i, 1421 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1422 1423 beio->num_segs++; 1424 len_left -= seglen; 1425 1426 buf = beio->sg_segs[i].addr; 1427 end = buf + seglen; 1428 for (; buf < end; buf += cbe_lun->blocksize) { 1429 if (lbalen->flags & SWS_NDOB) { 1430 memset(buf, 0, cbe_lun->blocksize); 1431 } else { 1432 memcpy(buf, io->scsiio.kern_data_ptr, 1433 cbe_lun->blocksize); 1434 } 1435 if (lbalen->flags & SWS_LBDATA) 1436 scsi_ulto4b(lbalen->lba + lba, buf); 1437 lba++; 1438 } 1439 } 1440 1441 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1442 beio->io_len = lba * cbe_lun->blocksize; 1443 1444 /* We can not do all in one run. Correct and schedule rerun. */ 1445 if (len_left > 0) { 1446 lbalen->lba += lba; 1447 lbalen->len -= lba; 1448 beio->beio_cont = ctl_be_block_cw_done_ws; 1449 } 1450 1451 be_lun->dispatch(be_lun, beio); 1452 } 1453 1454 static void 1455 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun, 1456 union ctl_io *io) 1457 { 1458 struct ctl_be_block_io *beio; 1459 struct ctl_ptr_len_flags *ptrlen; 1460 1461 DPRINTF("entered\n"); 1462 1463 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1464 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1465 1466 if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) { 1467 ctl_free_beio(beio); 1468 ctl_set_invalid_field(&io->scsiio, 1469 /*sks_valid*/ 0, 1470 /*command*/ 1, 1471 /*field*/ 0, 1472 /*bit_valid*/ 0, 1473 /*bit*/ 0); 1474 ctl_config_write_done(io); 1475 return; 1476 } 1477 1478 beio->io_len = 0; 1479 beio->io_offset = -1; 1480 beio->bio_cmd = BIO_DELETE; 1481 beio->ds_trans_type = DEVSTAT_FREE; 1482 DPRINTF("UNMAP\n"); 1483 be_lun->unmap(be_lun, beio); 1484 } 1485 1486 static void 1487 ctl_be_block_cr_done(struct ctl_be_block_io *beio) 1488 { 1489 union ctl_io *io; 1490 1491 io = beio->io; 1492 ctl_free_beio(beio); 1493 ctl_config_read_done(io); 1494 } 1495 1496 static void 1497 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 1498 union ctl_io *io) 1499 { 1500 struct ctl_be_block_io *beio; 1501 struct ctl_be_block_softc *softc; 1502 1503 DPRINTF("entered\n"); 1504 1505 softc = be_lun->softc; 1506 beio = ctl_alloc_beio(softc); 1507 beio->io = io; 1508 beio->lun = be_lun; 1509 beio->beio_cont = ctl_be_block_cr_done; 1510 PRIV(io)->ptr = (void *)beio; 1511 1512 switch (io->scsiio.cdb[0]) { 1513 case SERVICE_ACTION_IN: /* GET LBA STATUS */ 1514 beio->bio_cmd = -1; 1515 beio->ds_trans_type = DEVSTAT_NO_DATA; 1516 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1517 beio->io_len = 0; 1518 if (be_lun->get_lba_status) 1519 be_lun->get_lba_status(be_lun, beio); 1520 else 1521 ctl_be_block_cr_done(beio); 1522 break; 1523 default: 1524 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1525 break; 1526 } 1527 } 1528 1529 static void 1530 ctl_be_block_cw_done(struct ctl_be_block_io *beio) 1531 { 1532 union ctl_io *io; 1533 1534 io = beio->io; 1535 ctl_free_beio(beio); 1536 ctl_config_write_done(io); 1537 } 1538 1539 static void 1540 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 1541 union ctl_io *io) 1542 { 1543 struct ctl_be_block_io *beio; 1544 struct ctl_be_block_softc *softc; 1545 1546 DPRINTF("entered\n"); 1547 1548 softc = be_lun->softc; 1549 beio = ctl_alloc_beio(softc); 1550 beio->io = io; 1551 beio->lun = be_lun; 1552 beio->beio_cont = ctl_be_block_cw_done; 1553 switch (io->scsiio.tag_type) { 1554 case CTL_TAG_ORDERED: 1555 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1556 break; 1557 case CTL_TAG_HEAD_OF_QUEUE: 1558 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1559 break; 1560 case CTL_TAG_UNTAGGED: 1561 case CTL_TAG_SIMPLE: 1562 case CTL_TAG_ACA: 1563 default: 1564 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1565 break; 1566 } 1567 PRIV(io)->ptr = (void *)beio; 1568 1569 switch (io->scsiio.cdb[0]) { 1570 case SYNCHRONIZE_CACHE: 1571 case SYNCHRONIZE_CACHE_16: 1572 ctl_be_block_cw_dispatch_sync(be_lun, io); 1573 break; 1574 case WRITE_SAME_10: 1575 case WRITE_SAME_16: 1576 ctl_be_block_cw_dispatch_ws(be_lun, io); 1577 break; 1578 case UNMAP: 1579 ctl_be_block_cw_dispatch_unmap(be_lun, io); 1580 break; 1581 default: 1582 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1583 break; 1584 } 1585 } 1586 1587 SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t"); 1588 SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t"); 1589 SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t"); 1590 SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t"); 1591 1592 static void 1593 ctl_be_block_next(struct ctl_be_block_io *beio) 1594 { 1595 struct ctl_be_block_lun *be_lun; 1596 union ctl_io *io; 1597 1598 io = beio->io; 1599 be_lun = beio->lun; 1600 ctl_free_beio(beio); 1601 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1602 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1603 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1604 ctl_data_submit_done(io); 1605 return; 1606 } 1607 1608 io->io_hdr.status &= ~CTL_STATUS_MASK; 1609 io->io_hdr.status |= CTL_STATUS_NONE; 1610 1611 mtx_lock(&be_lun->queue_lock); 1612 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1613 mtx_unlock(&be_lun->queue_lock); 1614 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1615 } 1616 1617 static void 1618 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 1619 union ctl_io *io) 1620 { 1621 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1622 struct ctl_be_block_io *beio; 1623 struct ctl_be_block_softc *softc; 1624 struct ctl_lba_len_flags *lbalen; 1625 struct ctl_ptr_len_flags *bptrlen; 1626 uint64_t len_left, lbas; 1627 int i; 1628 1629 softc = be_lun->softc; 1630 1631 DPRINTF("entered\n"); 1632 1633 lbalen = ARGS(io); 1634 if (lbalen->flags & CTL_LLF_WRITE) { 1635 SDT_PROBE0(cbb, , write, start); 1636 } else { 1637 SDT_PROBE0(cbb, , read, start); 1638 } 1639 1640 beio = ctl_alloc_beio(softc); 1641 beio->io = io; 1642 beio->lun = be_lun; 1643 bptrlen = PRIV(io); 1644 bptrlen->ptr = (void *)beio; 1645 1646 switch (io->scsiio.tag_type) { 1647 case CTL_TAG_ORDERED: 1648 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1649 break; 1650 case CTL_TAG_HEAD_OF_QUEUE: 1651 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1652 break; 1653 case CTL_TAG_UNTAGGED: 1654 case CTL_TAG_SIMPLE: 1655 case CTL_TAG_ACA: 1656 default: 1657 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1658 break; 1659 } 1660 1661 if (lbalen->flags & CTL_LLF_WRITE) { 1662 beio->bio_cmd = BIO_WRITE; 1663 beio->ds_trans_type = DEVSTAT_WRITE; 1664 } else { 1665 beio->bio_cmd = BIO_READ; 1666 beio->ds_trans_type = DEVSTAT_READ; 1667 } 1668 1669 DPRINTF("%s at LBA %jx len %u @%ju\n", 1670 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", 1671 (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len); 1672 if (lbalen->flags & CTL_LLF_COMPARE) { 1673 beio->two_sglists = 1; 1674 lbas = CTLBLK_HALF_IO_SIZE; 1675 } else { 1676 lbas = CTLBLK_MAX_IO_SIZE; 1677 } 1678 lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize); 1679 beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize; 1680 beio->io_len = lbas * cbe_lun->blocksize; 1681 bptrlen->len += lbas; 1682 1683 for (i = 0, len_left = beio->io_len; len_left > 0; i++) { 1684 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)", 1685 i, CTLBLK_MAX_SEGS)); 1686 1687 /* 1688 * Setup the S/G entry for this chunk. 1689 */ 1690 ctl_alloc_seg(softc, &beio->sg_segs[i], 1691 MIN(CTLBLK_MAX_SEG, len_left)); 1692 1693 DPRINTF("segment %d addr %p len %zd\n", i, 1694 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1695 1696 /* Set up second segment for compare operation. */ 1697 if (beio->two_sglists) { 1698 ctl_alloc_seg(softc, 1699 &beio->sg_segs[i + CTLBLK_HALF_SEGS], 1700 beio->sg_segs[i].len); 1701 } 1702 1703 beio->num_segs++; 1704 len_left -= beio->sg_segs[i].len; 1705 } 1706 if (bptrlen->len < lbalen->len) 1707 beio->beio_cont = ctl_be_block_next; 1708 io->scsiio.be_move_done = ctl_be_block_move_done; 1709 /* For compare we have separate S/G lists for read and datamove. */ 1710 if (beio->two_sglists) 1711 io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS]; 1712 else 1713 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs; 1714 io->scsiio.kern_data_len = beio->io_len; 1715 io->scsiio.kern_sg_entries = beio->num_segs; 1716 io->scsiio.kern_data_ref = ctl_refcnt_beio; 1717 io->scsiio.kern_data_arg = beio; 1718 io->io_hdr.flags |= CTL_FLAG_ALLOCATED; 1719 1720 /* 1721 * For the read case, we need to read the data into our buffers and 1722 * then we can send it back to the user. For the write case, we 1723 * need to get the data from the user first. 1724 */ 1725 if (beio->bio_cmd == BIO_READ) { 1726 SDT_PROBE0(cbb, , read, alloc_done); 1727 be_lun->dispatch(be_lun, beio); 1728 } else { 1729 SDT_PROBE0(cbb, , write, alloc_done); 1730 ctl_datamove(io); 1731 } 1732 } 1733 1734 static void 1735 ctl_be_block_worker(void *context, int pending) 1736 { 1737 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context; 1738 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1739 union ctl_io *io; 1740 struct ctl_be_block_io *beio; 1741 1742 DPRINTF("entered\n"); 1743 /* 1744 * Fetch and process I/Os from all queues. If we detect LUN 1745 * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race, 1746 * so make response maximally opaque to not confuse initiator. 1747 */ 1748 for (;;) { 1749 mtx_lock(&be_lun->queue_lock); 1750 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue); 1751 if (io != NULL) { 1752 DPRINTF("datamove queue\n"); 1753 STAILQ_REMOVE_HEAD(&be_lun->datamove_queue, links); 1754 mtx_unlock(&be_lun->queue_lock); 1755 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1756 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1757 ctl_set_busy(&io->scsiio); 1758 ctl_complete_beio(beio); 1759 continue; 1760 } 1761 be_lun->dispatch(be_lun, beio); 1762 continue; 1763 } 1764 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue); 1765 if (io != NULL) { 1766 DPRINTF("config write queue\n"); 1767 STAILQ_REMOVE_HEAD(&be_lun->config_write_queue, links); 1768 mtx_unlock(&be_lun->queue_lock); 1769 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1770 ctl_set_busy(&io->scsiio); 1771 ctl_config_write_done(io); 1772 continue; 1773 } 1774 ctl_be_block_cw_dispatch(be_lun, io); 1775 continue; 1776 } 1777 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue); 1778 if (io != NULL) { 1779 DPRINTF("config read queue\n"); 1780 STAILQ_REMOVE_HEAD(&be_lun->config_read_queue, links); 1781 mtx_unlock(&be_lun->queue_lock); 1782 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1783 ctl_set_busy(&io->scsiio); 1784 ctl_config_read_done(io); 1785 continue; 1786 } 1787 ctl_be_block_cr_dispatch(be_lun, io); 1788 continue; 1789 } 1790 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue); 1791 if (io != NULL) { 1792 DPRINTF("input queue\n"); 1793 STAILQ_REMOVE_HEAD(&be_lun->input_queue, links); 1794 mtx_unlock(&be_lun->queue_lock); 1795 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1796 ctl_set_busy(&io->scsiio); 1797 ctl_data_submit_done(io); 1798 continue; 1799 } 1800 ctl_be_block_dispatch(be_lun, io); 1801 continue; 1802 } 1803 1804 /* 1805 * If we get here, there is no work left in the queues, so 1806 * just break out and let the task queue go to sleep. 1807 */ 1808 mtx_unlock(&be_lun->queue_lock); 1809 break; 1810 } 1811 } 1812 1813 /* 1814 * Entry point from CTL to the backend for I/O. We queue everything to a 1815 * work thread, so this just puts the I/O on a queue and wakes up the 1816 * thread. 1817 */ 1818 static int 1819 ctl_be_block_submit(union ctl_io *io) 1820 { 1821 struct ctl_be_block_lun *be_lun; 1822 1823 DPRINTF("entered\n"); 1824 1825 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 1826 1827 KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, 1828 ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type)); 1829 1830 PRIV(io)->len = 0; 1831 1832 mtx_lock(&be_lun->queue_lock); 1833 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1834 mtx_unlock(&be_lun->queue_lock); 1835 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1836 1837 return (CTL_RETVAL_COMPLETE); 1838 } 1839 1840 static int 1841 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 1842 int flag, struct thread *td) 1843 { 1844 struct ctl_be_block_softc *softc = &backend_block_softc; 1845 int error; 1846 1847 error = 0; 1848 switch (cmd) { 1849 case CTL_LUN_REQ: { 1850 struct ctl_lun_req *lun_req; 1851 1852 lun_req = (struct ctl_lun_req *)addr; 1853 1854 switch (lun_req->reqtype) { 1855 case CTL_LUNREQ_CREATE: 1856 error = ctl_be_block_create(softc, lun_req); 1857 break; 1858 case CTL_LUNREQ_RM: 1859 error = ctl_be_block_rm(softc, lun_req); 1860 break; 1861 case CTL_LUNREQ_MODIFY: 1862 error = ctl_be_block_modify(softc, lun_req); 1863 break; 1864 default: 1865 lun_req->status = CTL_LUN_ERROR; 1866 snprintf(lun_req->error_str, sizeof(lun_req->error_str), 1867 "invalid LUN request type %d", 1868 lun_req->reqtype); 1869 break; 1870 } 1871 break; 1872 } 1873 default: 1874 error = ENOTTY; 1875 break; 1876 } 1877 1878 return (error); 1879 } 1880 1881 static int 1882 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1883 { 1884 struct ctl_be_lun *cbe_lun; 1885 struct ctl_be_block_filedata *file_data; 1886 struct ctl_lun_create_params *params; 1887 const char *value; 1888 struct vattr vattr; 1889 off_t ps, pss, po, pos, us, uss, uo, uos; 1890 int error; 1891 long pconf; 1892 1893 cbe_lun = &be_lun->cbe_lun; 1894 file_data = &be_lun->backend.file; 1895 params = &be_lun->params; 1896 1897 be_lun->dev_type = CTL_BE_BLOCK_FILE; 1898 be_lun->dispatch = ctl_be_block_dispatch_file; 1899 be_lun->lun_flush = ctl_be_block_flush_file; 1900 be_lun->get_lba_status = ctl_be_block_gls_file; 1901 be_lun->getattr = ctl_be_block_getattr_file; 1902 be_lun->unmap = ctl_be_block_unmap_file; 1903 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 1904 1905 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 1906 if (error != 0) { 1907 snprintf(req->error_str, sizeof(req->error_str), 1908 "error calling VOP_GETATTR() for file %s", 1909 be_lun->dev_path); 1910 return (error); 1911 } 1912 1913 error = VOP_PATHCONF(be_lun->vn, _PC_DEALLOC_PRESENT, &pconf); 1914 if (error != 0) { 1915 snprintf(req->error_str, sizeof(req->error_str), 1916 "error calling VOP_PATHCONF() for file %s", 1917 be_lun->dev_path); 1918 return (error); 1919 } 1920 if (pconf == 1) 1921 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; 1922 1923 file_data->cred = crhold(curthread->td_ucred); 1924 if (params->lun_size_bytes != 0) 1925 be_lun->size_bytes = params->lun_size_bytes; 1926 else 1927 be_lun->size_bytes = vattr.va_size; 1928 1929 /* 1930 * For files we can use any logical block size. Prefer 512 bytes 1931 * for compatibility reasons. If file's vattr.va_blocksize 1932 * (preferred I/O block size) is bigger and multiple to chosen 1933 * logical block size -- report it as physical block size. 1934 */ 1935 if (params->blocksize_bytes != 0) 1936 cbe_lun->blocksize = params->blocksize_bytes; 1937 else if (cbe_lun->lun_type == T_CDROM) 1938 cbe_lun->blocksize = 2048; 1939 else 1940 cbe_lun->blocksize = 512; 1941 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 1942 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 1943 0 : (be_lun->size_blocks - 1); 1944 1945 us = ps = vattr.va_blocksize; 1946 uo = po = 0; 1947 1948 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL); 1949 if (value != NULL) 1950 ctl_expand_number(value, &ps); 1951 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL); 1952 if (value != NULL) 1953 ctl_expand_number(value, &po); 1954 pss = ps / cbe_lun->blocksize; 1955 pos = po / cbe_lun->blocksize; 1956 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 1957 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 1958 cbe_lun->pblockexp = fls(pss) - 1; 1959 cbe_lun->pblockoff = (pss - pos) % pss; 1960 } 1961 1962 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL); 1963 if (value != NULL) 1964 ctl_expand_number(value, &us); 1965 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL); 1966 if (value != NULL) 1967 ctl_expand_number(value, &uo); 1968 uss = us / cbe_lun->blocksize; 1969 uos = uo / cbe_lun->blocksize; 1970 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 1971 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 1972 cbe_lun->ublockexp = fls(uss) - 1; 1973 cbe_lun->ublockoff = (uss - uos) % uss; 1974 } 1975 1976 /* 1977 * Sanity check. The media size has to be at least one 1978 * sector long. 1979 */ 1980 if (be_lun->size_bytes < cbe_lun->blocksize) { 1981 error = EINVAL; 1982 snprintf(req->error_str, sizeof(req->error_str), 1983 "file %s size %ju < block size %u", be_lun->dev_path, 1984 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize); 1985 } 1986 1987 cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize; 1988 return (error); 1989 } 1990 1991 static int 1992 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1993 { 1994 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1995 struct ctl_lun_create_params *params; 1996 struct cdevsw *csw; 1997 struct cdev *dev; 1998 const char *value; 1999 int error, atomic, maxio, ref, unmap, tmp; 2000 off_t ps, pss, po, pos, us, uss, uo, uos, otmp; 2001 2002 params = &be_lun->params; 2003 2004 be_lun->dev_type = CTL_BE_BLOCK_DEV; 2005 csw = devvn_refthread(be_lun->vn, &dev, &ref); 2006 if (csw == NULL) 2007 return (ENXIO); 2008 if (strcmp(csw->d_name, "zvol") == 0) { 2009 be_lun->dispatch = ctl_be_block_dispatch_zvol; 2010 be_lun->get_lba_status = ctl_be_block_gls_zvol; 2011 atomic = maxio = CTLBLK_MAX_IO_SIZE; 2012 } else { 2013 be_lun->dispatch = ctl_be_block_dispatch_dev; 2014 be_lun->get_lba_status = NULL; 2015 atomic = 0; 2016 maxio = dev->si_iosize_max; 2017 if (maxio <= 0) 2018 maxio = DFLTPHYS; 2019 if (maxio > CTLBLK_MAX_SEG) 2020 maxio = CTLBLK_MAX_SEG; 2021 } 2022 be_lun->lun_flush = ctl_be_block_flush_dev; 2023 be_lun->getattr = ctl_be_block_getattr_dev; 2024 be_lun->unmap = ctl_be_block_unmap_dev; 2025 2026 if (!csw->d_ioctl) { 2027 dev_relthread(dev, ref); 2028 snprintf(req->error_str, sizeof(req->error_str), 2029 "no d_ioctl for device %s!", be_lun->dev_path); 2030 return (ENODEV); 2031 } 2032 2033 error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD, 2034 curthread); 2035 if (error) { 2036 dev_relthread(dev, ref); 2037 snprintf(req->error_str, sizeof(req->error_str), 2038 "error %d returned for DIOCGSECTORSIZE ioctl " 2039 "on %s!", error, be_lun->dev_path); 2040 return (error); 2041 } 2042 2043 /* 2044 * If the user has asked for a blocksize that is greater than the 2045 * backing device's blocksize, we can do it only if the blocksize 2046 * the user is asking for is an even multiple of the underlying 2047 * device's blocksize. 2048 */ 2049 if ((params->blocksize_bytes != 0) && 2050 (params->blocksize_bytes >= tmp)) { 2051 if (params->blocksize_bytes % tmp == 0) { 2052 cbe_lun->blocksize = params->blocksize_bytes; 2053 } else { 2054 dev_relthread(dev, ref); 2055 snprintf(req->error_str, sizeof(req->error_str), 2056 "requested blocksize %u is not an even " 2057 "multiple of backing device blocksize %u", 2058 params->blocksize_bytes, tmp); 2059 return (EINVAL); 2060 } 2061 } else if (params->blocksize_bytes != 0) { 2062 dev_relthread(dev, ref); 2063 snprintf(req->error_str, sizeof(req->error_str), 2064 "requested blocksize %u < backing device " 2065 "blocksize %u", params->blocksize_bytes, tmp); 2066 return (EINVAL); 2067 } else if (cbe_lun->lun_type == T_CDROM) 2068 cbe_lun->blocksize = MAX(tmp, 2048); 2069 else 2070 cbe_lun->blocksize = tmp; 2071 2072 error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD, 2073 curthread); 2074 if (error) { 2075 dev_relthread(dev, ref); 2076 snprintf(req->error_str, sizeof(req->error_str), 2077 "error %d returned for DIOCGMEDIASIZE " 2078 " ioctl on %s!", error, 2079 be_lun->dev_path); 2080 return (error); 2081 } 2082 2083 if (params->lun_size_bytes != 0) { 2084 if (params->lun_size_bytes > otmp) { 2085 dev_relthread(dev, ref); 2086 snprintf(req->error_str, sizeof(req->error_str), 2087 "requested LUN size %ju > backing device " 2088 "size %ju", 2089 (uintmax_t)params->lun_size_bytes, 2090 (uintmax_t)otmp); 2091 return (EINVAL); 2092 } 2093 2094 be_lun->size_bytes = params->lun_size_bytes; 2095 } else 2096 be_lun->size_bytes = otmp; 2097 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2098 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2099 0 : (be_lun->size_blocks - 1); 2100 2101 error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD, 2102 curthread); 2103 if (error) 2104 ps = po = 0; 2105 else { 2106 error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po, 2107 FREAD, curthread); 2108 if (error) 2109 po = 0; 2110 } 2111 us = ps; 2112 uo = po; 2113 2114 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL); 2115 if (value != NULL) 2116 ctl_expand_number(value, &ps); 2117 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL); 2118 if (value != NULL) 2119 ctl_expand_number(value, &po); 2120 pss = ps / cbe_lun->blocksize; 2121 pos = po / cbe_lun->blocksize; 2122 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 2123 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 2124 cbe_lun->pblockexp = fls(pss) - 1; 2125 cbe_lun->pblockoff = (pss - pos) % pss; 2126 } 2127 2128 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL); 2129 if (value != NULL) 2130 ctl_expand_number(value, &us); 2131 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL); 2132 if (value != NULL) 2133 ctl_expand_number(value, &uo); 2134 uss = us / cbe_lun->blocksize; 2135 uos = uo / cbe_lun->blocksize; 2136 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 2137 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 2138 cbe_lun->ublockexp = fls(uss) - 1; 2139 cbe_lun->ublockoff = (uss - uos) % uss; 2140 } 2141 2142 cbe_lun->atomicblock = atomic / cbe_lun->blocksize; 2143 cbe_lun->opttxferlen = maxio / cbe_lun->blocksize; 2144 2145 if (be_lun->dispatch == ctl_be_block_dispatch_zvol) { 2146 unmap = 1; 2147 } else { 2148 struct diocgattr_arg arg; 2149 2150 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); 2151 arg.len = sizeof(arg.value.i); 2152 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 2153 curthread); 2154 unmap = (error == 0) ? arg.value.i : 0; 2155 } 2156 value = dnvlist_get_string(cbe_lun->options, "unmap", NULL); 2157 if (value != NULL) 2158 unmap = (strcmp(value, "on") == 0); 2159 if (unmap) 2160 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; 2161 else 2162 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 2163 2164 dev_relthread(dev, ref); 2165 return (0); 2166 } 2167 2168 static int 2169 ctl_be_block_close(struct ctl_be_block_lun *be_lun) 2170 { 2171 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2172 int flags; 2173 2174 if (be_lun->vn) { 2175 flags = FREAD; 2176 if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0) 2177 flags |= FWRITE; 2178 (void)vn_close(be_lun->vn, flags, NOCRED, curthread); 2179 be_lun->vn = NULL; 2180 2181 switch (be_lun->dev_type) { 2182 case CTL_BE_BLOCK_DEV: 2183 break; 2184 case CTL_BE_BLOCK_FILE: 2185 if (be_lun->backend.file.cred != NULL) { 2186 crfree(be_lun->backend.file.cred); 2187 be_lun->backend.file.cred = NULL; 2188 } 2189 break; 2190 case CTL_BE_BLOCK_NONE: 2191 break; 2192 default: 2193 panic("Unexpected backend type %d", be_lun->dev_type); 2194 break; 2195 } 2196 be_lun->dev_type = CTL_BE_BLOCK_NONE; 2197 } 2198 return (0); 2199 } 2200 2201 static int 2202 ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 2203 { 2204 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2205 struct nameidata nd; 2206 const char *value; 2207 int error, flags; 2208 2209 error = 0; 2210 if (rootvnode == NULL) { 2211 snprintf(req->error_str, sizeof(req->error_str), 2212 "Root filesystem is not mounted"); 2213 return (1); 2214 } 2215 pwd_ensure_dirs(); 2216 2217 value = dnvlist_get_string(cbe_lun->options, "file", NULL); 2218 if (value == NULL) { 2219 snprintf(req->error_str, sizeof(req->error_str), 2220 "no file argument specified"); 2221 return (1); 2222 } 2223 free(be_lun->dev_path, M_CTLBLK); 2224 be_lun->dev_path = strdup(value, M_CTLBLK); 2225 2226 flags = FREAD; 2227 value = dnvlist_get_string(cbe_lun->options, "readonly", NULL); 2228 if (value != NULL) { 2229 if (strcmp(value, "on") != 0) 2230 flags |= FWRITE; 2231 } else if (cbe_lun->lun_type == T_DIRECT) 2232 flags |= FWRITE; 2233 2234 again: 2235 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path); 2236 error = vn_open(&nd, &flags, 0, NULL); 2237 if ((error == EROFS || error == EACCES) && (flags & FWRITE)) { 2238 flags &= ~FWRITE; 2239 goto again; 2240 } 2241 if (error) { 2242 /* 2243 * This is the only reasonable guess we can make as far as 2244 * path if the user doesn't give us a fully qualified path. 2245 * If they want to specify a file, they need to specify the 2246 * full path. 2247 */ 2248 if (be_lun->dev_path[0] != '/') { 2249 char *dev_name; 2250 2251 asprintf(&dev_name, M_CTLBLK, "/dev/%s", 2252 be_lun->dev_path); 2253 free(be_lun->dev_path, M_CTLBLK); 2254 be_lun->dev_path = dev_name; 2255 goto again; 2256 } 2257 snprintf(req->error_str, sizeof(req->error_str), 2258 "error opening %s: %d", be_lun->dev_path, error); 2259 return (error); 2260 } 2261 if (flags & FWRITE) 2262 cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY; 2263 else 2264 cbe_lun->flags |= CTL_LUN_FLAG_READONLY; 2265 2266 NDFREE(&nd, NDF_ONLY_PNBUF); 2267 be_lun->vn = nd.ni_vp; 2268 2269 /* We only support disks and files. */ 2270 if (vn_isdisk_error(be_lun->vn, &error)) { 2271 error = ctl_be_block_open_dev(be_lun, req); 2272 } else if (be_lun->vn->v_type == VREG) { 2273 error = ctl_be_block_open_file(be_lun, req); 2274 } else { 2275 error = EINVAL; 2276 snprintf(req->error_str, sizeof(req->error_str), 2277 "%s is not a disk or plain file", be_lun->dev_path); 2278 } 2279 VOP_UNLOCK(be_lun->vn); 2280 2281 if (error != 0) 2282 ctl_be_block_close(be_lun); 2283 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2284 if (be_lun->dispatch != ctl_be_block_dispatch_dev) 2285 cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT; 2286 value = dnvlist_get_string(cbe_lun->options, "serseq", NULL); 2287 if (value != NULL && strcmp(value, "on") == 0) 2288 cbe_lun->serseq = CTL_LUN_SERSEQ_ON; 2289 else if (value != NULL && strcmp(value, "read") == 0) 2290 cbe_lun->serseq = CTL_LUN_SERSEQ_READ; 2291 else if (value != NULL && strcmp(value, "soft") == 0) 2292 cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT; 2293 else if (value != NULL && strcmp(value, "off") == 0) 2294 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2295 return (0); 2296 } 2297 2298 static int 2299 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2300 { 2301 struct ctl_be_lun *cbe_lun; 2302 struct ctl_be_block_lun *be_lun; 2303 struct ctl_lun_create_params *params; 2304 char num_thread_str[16]; 2305 char tmpstr[32]; 2306 const char *value; 2307 int retval, num_threads; 2308 int tmp_num_threads; 2309 2310 params = &req->reqdata.create; 2311 retval = 0; 2312 req->status = CTL_LUN_OK; 2313 2314 be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK); 2315 cbe_lun = &be_lun->cbe_lun; 2316 be_lun->params = req->reqdata.create; 2317 be_lun->softc = softc; 2318 STAILQ_INIT(&be_lun->input_queue); 2319 STAILQ_INIT(&be_lun->config_read_queue); 2320 STAILQ_INIT(&be_lun->config_write_queue); 2321 STAILQ_INIT(&be_lun->datamove_queue); 2322 mtx_init(&be_lun->io_lock, "ctlblock io", NULL, MTX_DEF); 2323 mtx_init(&be_lun->queue_lock, "ctlblock queue", NULL, MTX_DEF); 2324 cbe_lun->options = nvlist_clone(req->args_nvl); 2325 2326 if (params->flags & CTL_LUN_FLAG_DEV_TYPE) 2327 cbe_lun->lun_type = params->device_type; 2328 else 2329 cbe_lun->lun_type = T_DIRECT; 2330 be_lun->flags = 0; 2331 cbe_lun->flags = 0; 2332 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL); 2333 if (value != NULL) { 2334 if (strcmp(value, "primary") == 0) 2335 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2336 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2337 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2338 2339 if (cbe_lun->lun_type == T_DIRECT || 2340 cbe_lun->lun_type == T_CDROM) { 2341 be_lun->size_bytes = params->lun_size_bytes; 2342 if (params->blocksize_bytes != 0) 2343 cbe_lun->blocksize = params->blocksize_bytes; 2344 else if (cbe_lun->lun_type == T_CDROM) 2345 cbe_lun->blocksize = 2048; 2346 else 2347 cbe_lun->blocksize = 512; 2348 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2349 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2350 0 : (be_lun->size_blocks - 1); 2351 2352 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2353 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2354 retval = ctl_be_block_open(be_lun, req); 2355 if (retval != 0) { 2356 retval = 0; 2357 req->status = CTL_LUN_WARNING; 2358 } 2359 } 2360 num_threads = cbb_num_threads; 2361 } else { 2362 num_threads = 1; 2363 } 2364 2365 value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL); 2366 if (value != NULL) { 2367 tmp_num_threads = strtol(value, NULL, 0); 2368 2369 /* 2370 * We don't let the user specify less than one 2371 * thread, but hope he's clueful enough not to 2372 * specify 1000 threads. 2373 */ 2374 if (tmp_num_threads < 1) { 2375 snprintf(req->error_str, sizeof(req->error_str), 2376 "invalid number of threads %s", 2377 num_thread_str); 2378 goto bailout_error; 2379 } 2380 num_threads = tmp_num_threads; 2381 } 2382 2383 if (be_lun->vn == NULL) 2384 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2385 /* Tell the user the blocksize we ended up using */ 2386 params->lun_size_bytes = be_lun->size_bytes; 2387 params->blocksize_bytes = cbe_lun->blocksize; 2388 if (params->flags & CTL_LUN_FLAG_ID_REQ) { 2389 cbe_lun->req_lun_id = params->req_lun_id; 2390 cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ; 2391 } else 2392 cbe_lun->req_lun_id = 0; 2393 2394 cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown; 2395 cbe_lun->be = &ctl_be_block_driver; 2396 2397 if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) { 2398 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d", 2399 softc->num_luns); 2400 strncpy((char *)cbe_lun->serial_num, tmpstr, 2401 MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr))); 2402 2403 /* Tell the user what we used for a serial number */ 2404 strncpy((char *)params->serial_num, tmpstr, 2405 MIN(sizeof(params->serial_num), sizeof(tmpstr))); 2406 } else { 2407 strncpy((char *)cbe_lun->serial_num, params->serial_num, 2408 MIN(sizeof(cbe_lun->serial_num), 2409 sizeof(params->serial_num))); 2410 } 2411 if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) { 2412 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns); 2413 strncpy((char *)cbe_lun->device_id, tmpstr, 2414 MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr))); 2415 2416 /* Tell the user what we used for a device ID */ 2417 strncpy((char *)params->device_id, tmpstr, 2418 MIN(sizeof(params->device_id), sizeof(tmpstr))); 2419 } else { 2420 strncpy((char *)cbe_lun->device_id, params->device_id, 2421 MIN(sizeof(cbe_lun->device_id), 2422 sizeof(params->device_id))); 2423 } 2424 2425 TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun); 2426 2427 be_lun->io_taskqueue = taskqueue_create("ctlblocktq", M_WAITOK, 2428 taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue); 2429 2430 if (be_lun->io_taskqueue == NULL) { 2431 snprintf(req->error_str, sizeof(req->error_str), 2432 "unable to create taskqueue"); 2433 goto bailout_error; 2434 } 2435 2436 /* 2437 * Note that we start the same number of threads by default for 2438 * both the file case and the block device case. For the file 2439 * case, we need multiple threads to allow concurrency, because the 2440 * vnode interface is designed to be a blocking interface. For the 2441 * block device case, ZFS zvols at least will block the caller's 2442 * context in many instances, and so we need multiple threads to 2443 * overcome that problem. Other block devices don't need as many 2444 * threads, but they shouldn't cause too many problems. 2445 * 2446 * If the user wants to just have a single thread for a block 2447 * device, he can specify that when the LUN is created, or change 2448 * the tunable/sysctl to alter the default number of threads. 2449 */ 2450 retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue, 2451 /*num threads*/num_threads, 2452 /*priority*/PUSER, 2453 /*proc*/control_softc->ctl_proc, 2454 /*thread name*/"block"); 2455 2456 if (retval != 0) 2457 goto bailout_error; 2458 2459 be_lun->num_threads = num_threads; 2460 2461 retval = ctl_add_lun(&be_lun->cbe_lun); 2462 if (retval != 0) { 2463 snprintf(req->error_str, sizeof(req->error_str), 2464 "ctl_add_lun() returned error %d, see dmesg for " 2465 "details", retval); 2466 retval = 0; 2467 goto bailout_error; 2468 } 2469 2470 be_lun->disk_stats = devstat_new_entry("cbb", cbe_lun->lun_id, 2471 cbe_lun->blocksize, 2472 DEVSTAT_ALL_SUPPORTED, 2473 cbe_lun->lun_type 2474 | DEVSTAT_TYPE_IF_OTHER, 2475 DEVSTAT_PRIORITY_OTHER); 2476 2477 mtx_lock(&softc->lock); 2478 softc->num_luns++; 2479 SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links); 2480 mtx_unlock(&softc->lock); 2481 2482 params->req_lun_id = cbe_lun->lun_id; 2483 2484 return (retval); 2485 2486 bailout_error: 2487 req->status = CTL_LUN_ERROR; 2488 2489 if (be_lun->io_taskqueue != NULL) 2490 taskqueue_free(be_lun->io_taskqueue); 2491 ctl_be_block_close(be_lun); 2492 if (be_lun->dev_path != NULL) 2493 free(be_lun->dev_path, M_CTLBLK); 2494 nvlist_destroy(cbe_lun->options); 2495 mtx_destroy(&be_lun->queue_lock); 2496 mtx_destroy(&be_lun->io_lock); 2497 free(be_lun, M_CTLBLK); 2498 2499 return (retval); 2500 } 2501 2502 static int 2503 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2504 { 2505 struct ctl_lun_rm_params *params; 2506 struct ctl_be_block_lun *be_lun; 2507 struct ctl_be_lun *cbe_lun; 2508 int retval; 2509 2510 params = &req->reqdata.rm; 2511 2512 sx_xlock(&softc->modify_lock); 2513 mtx_lock(&softc->lock); 2514 SLIST_FOREACH(be_lun, &softc->lun_list, links) { 2515 if (be_lun->cbe_lun.lun_id == params->lun_id) { 2516 SLIST_REMOVE(&softc->lun_list, be_lun, 2517 ctl_be_block_lun, links); 2518 softc->num_luns--; 2519 break; 2520 } 2521 } 2522 mtx_unlock(&softc->lock); 2523 sx_xunlock(&softc->modify_lock); 2524 if (be_lun == NULL) { 2525 snprintf(req->error_str, sizeof(req->error_str), 2526 "LUN %u is not managed by the block backend", 2527 params->lun_id); 2528 goto bailout_error; 2529 } 2530 cbe_lun = &be_lun->cbe_lun; 2531 2532 if (be_lun->vn != NULL) { 2533 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2534 ctl_lun_no_media(cbe_lun); 2535 taskqueue_drain_all(be_lun->io_taskqueue); 2536 ctl_be_block_close(be_lun); 2537 } 2538 2539 mtx_lock(&softc->lock); 2540 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2541 mtx_unlock(&softc->lock); 2542 2543 retval = ctl_remove_lun(cbe_lun); 2544 if (retval != 0) { 2545 snprintf(req->error_str, sizeof(req->error_str), 2546 "error %d returned from ctl_remove_lun() for " 2547 "LUN %d", retval, params->lun_id); 2548 mtx_lock(&softc->lock); 2549 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2550 mtx_unlock(&softc->lock); 2551 goto bailout_error; 2552 } 2553 2554 mtx_lock(&softc->lock); 2555 while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2556 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblockrm", 0); 2557 if (retval == EINTR) 2558 break; 2559 } 2560 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2561 if (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) { 2562 mtx_unlock(&softc->lock); 2563 free(be_lun, M_CTLBLK); 2564 } else { 2565 mtx_unlock(&softc->lock); 2566 return (EINTR); 2567 } 2568 2569 req->status = CTL_LUN_OK; 2570 return (0); 2571 2572 bailout_error: 2573 req->status = CTL_LUN_ERROR; 2574 return (0); 2575 } 2576 2577 static int 2578 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2579 { 2580 struct ctl_lun_modify_params *params; 2581 struct ctl_be_block_lun *be_lun; 2582 struct ctl_be_lun *cbe_lun; 2583 const char *value; 2584 uint64_t oldsize; 2585 int error, wasprim; 2586 2587 params = &req->reqdata.modify; 2588 2589 sx_xlock(&softc->modify_lock); 2590 mtx_lock(&softc->lock); 2591 SLIST_FOREACH(be_lun, &softc->lun_list, links) { 2592 if (be_lun->cbe_lun.lun_id == params->lun_id) 2593 break; 2594 } 2595 mtx_unlock(&softc->lock); 2596 if (be_lun == NULL) { 2597 snprintf(req->error_str, sizeof(req->error_str), 2598 "LUN %u is not managed by the block backend", 2599 params->lun_id); 2600 goto bailout_error; 2601 } 2602 cbe_lun = &be_lun->cbe_lun; 2603 2604 if (params->lun_size_bytes != 0) 2605 be_lun->params.lun_size_bytes = params->lun_size_bytes; 2606 2607 if (req->args_nvl != NULL) { 2608 nvlist_destroy(cbe_lun->options); 2609 cbe_lun->options = nvlist_clone(req->args_nvl); 2610 } 2611 2612 wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY); 2613 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL); 2614 if (value != NULL) { 2615 if (strcmp(value, "primary") == 0) 2616 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2617 else 2618 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2619 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2620 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2621 else 2622 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2623 if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) { 2624 if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) 2625 ctl_lun_primary(cbe_lun); 2626 else 2627 ctl_lun_secondary(cbe_lun); 2628 } 2629 2630 oldsize = be_lun->size_blocks; 2631 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2632 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2633 if (be_lun->vn == NULL) 2634 error = ctl_be_block_open(be_lun, req); 2635 else if (vn_isdisk_error(be_lun->vn, &error)) 2636 error = ctl_be_block_open_dev(be_lun, req); 2637 else if (be_lun->vn->v_type == VREG) { 2638 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 2639 error = ctl_be_block_open_file(be_lun, req); 2640 VOP_UNLOCK(be_lun->vn); 2641 } else 2642 error = EINVAL; 2643 if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) && 2644 be_lun->vn != NULL) { 2645 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; 2646 ctl_lun_has_media(cbe_lun); 2647 } else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 && 2648 be_lun->vn == NULL) { 2649 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2650 ctl_lun_no_media(cbe_lun); 2651 } 2652 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; 2653 } else { 2654 if (be_lun->vn != NULL) { 2655 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2656 ctl_lun_no_media(cbe_lun); 2657 taskqueue_drain_all(be_lun->io_taskqueue); 2658 error = ctl_be_block_close(be_lun); 2659 } else 2660 error = 0; 2661 } 2662 if (be_lun->size_blocks != oldsize) 2663 ctl_lun_capacity_changed(cbe_lun); 2664 2665 /* Tell the user the exact size we ended up using */ 2666 params->lun_size_bytes = be_lun->size_bytes; 2667 2668 sx_xunlock(&softc->modify_lock); 2669 req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK; 2670 return (0); 2671 2672 bailout_error: 2673 sx_xunlock(&softc->modify_lock); 2674 req->status = CTL_LUN_ERROR; 2675 return (0); 2676 } 2677 2678 static void 2679 ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun) 2680 { 2681 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)cbe_lun; 2682 struct ctl_be_block_softc *softc = be_lun->softc; 2683 2684 taskqueue_drain_all(be_lun->io_taskqueue); 2685 taskqueue_free(be_lun->io_taskqueue); 2686 if (be_lun->disk_stats != NULL) 2687 devstat_remove_entry(be_lun->disk_stats); 2688 nvlist_destroy(be_lun->cbe_lun.options); 2689 free(be_lun->dev_path, M_CTLBLK); 2690 mtx_destroy(&be_lun->queue_lock); 2691 mtx_destroy(&be_lun->io_lock); 2692 2693 mtx_lock(&softc->lock); 2694 be_lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED; 2695 if (be_lun->flags & CTL_BE_BLOCK_LUN_WAITING) 2696 wakeup(be_lun); 2697 else 2698 free(be_lun, M_CTLBLK); 2699 mtx_unlock(&softc->lock); 2700 } 2701 2702 static int 2703 ctl_be_block_config_write(union ctl_io *io) 2704 { 2705 struct ctl_be_block_lun *be_lun; 2706 struct ctl_be_lun *cbe_lun; 2707 int retval; 2708 2709 DPRINTF("entered\n"); 2710 2711 cbe_lun = CTL_BACKEND_LUN(io); 2712 be_lun = (struct ctl_be_block_lun *)cbe_lun; 2713 2714 retval = 0; 2715 switch (io->scsiio.cdb[0]) { 2716 case SYNCHRONIZE_CACHE: 2717 case SYNCHRONIZE_CACHE_16: 2718 case WRITE_SAME_10: 2719 case WRITE_SAME_16: 2720 case UNMAP: 2721 /* 2722 * The upper level CTL code will filter out any CDBs with 2723 * the immediate bit set and return the proper error. 2724 * 2725 * We don't really need to worry about what LBA range the 2726 * user asked to be synced out. When they issue a sync 2727 * cache command, we'll sync out the whole thing. 2728 */ 2729 mtx_lock(&be_lun->queue_lock); 2730 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, 2731 links); 2732 mtx_unlock(&be_lun->queue_lock); 2733 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 2734 break; 2735 case START_STOP_UNIT: { 2736 struct scsi_start_stop_unit *cdb; 2737 struct ctl_lun_req req; 2738 2739 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb; 2740 if ((cdb->how & SSS_PC_MASK) != 0) { 2741 ctl_set_success(&io->scsiio); 2742 ctl_config_write_done(io); 2743 break; 2744 } 2745 if (cdb->how & SSS_START) { 2746 if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) { 2747 retval = ctl_be_block_open(be_lun, &req); 2748 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; 2749 if (retval == 0) { 2750 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; 2751 ctl_lun_has_media(cbe_lun); 2752 } else { 2753 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2754 ctl_lun_no_media(cbe_lun); 2755 } 2756 } 2757 ctl_start_lun(cbe_lun); 2758 } else { 2759 ctl_stop_lun(cbe_lun); 2760 if (cdb->how & SSS_LOEJ) { 2761 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2762 cbe_lun->flags |= CTL_LUN_FLAG_EJECTED; 2763 ctl_lun_ejected(cbe_lun); 2764 if (be_lun->vn != NULL) 2765 ctl_be_block_close(be_lun); 2766 } 2767 } 2768 2769 ctl_set_success(&io->scsiio); 2770 ctl_config_write_done(io); 2771 break; 2772 } 2773 case PREVENT_ALLOW: 2774 ctl_set_success(&io->scsiio); 2775 ctl_config_write_done(io); 2776 break; 2777 default: 2778 ctl_set_invalid_opcode(&io->scsiio); 2779 ctl_config_write_done(io); 2780 retval = CTL_RETVAL_COMPLETE; 2781 break; 2782 } 2783 2784 return (retval); 2785 } 2786 2787 static int 2788 ctl_be_block_config_read(union ctl_io *io) 2789 { 2790 struct ctl_be_block_lun *be_lun; 2791 int retval = 0; 2792 2793 DPRINTF("entered\n"); 2794 2795 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 2796 2797 switch (io->scsiio.cdb[0]) { 2798 case SERVICE_ACTION_IN: 2799 if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) { 2800 mtx_lock(&be_lun->queue_lock); 2801 STAILQ_INSERT_TAIL(&be_lun->config_read_queue, 2802 &io->io_hdr, links); 2803 mtx_unlock(&be_lun->queue_lock); 2804 taskqueue_enqueue(be_lun->io_taskqueue, 2805 &be_lun->io_task); 2806 retval = CTL_RETVAL_QUEUED; 2807 break; 2808 } 2809 ctl_set_invalid_field(&io->scsiio, 2810 /*sks_valid*/ 1, 2811 /*command*/ 1, 2812 /*field*/ 1, 2813 /*bit_valid*/ 1, 2814 /*bit*/ 4); 2815 ctl_config_read_done(io); 2816 retval = CTL_RETVAL_COMPLETE; 2817 break; 2818 default: 2819 ctl_set_invalid_opcode(&io->scsiio); 2820 ctl_config_read_done(io); 2821 retval = CTL_RETVAL_COMPLETE; 2822 break; 2823 } 2824 2825 return (retval); 2826 } 2827 2828 static int 2829 ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb) 2830 { 2831 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun; 2832 int retval; 2833 2834 retval = sbuf_printf(sb, "\t<num_threads>"); 2835 if (retval != 0) 2836 goto bailout; 2837 retval = sbuf_printf(sb, "%d", lun->num_threads); 2838 if (retval != 0) 2839 goto bailout; 2840 retval = sbuf_printf(sb, "</num_threads>\n"); 2841 2842 bailout: 2843 return (retval); 2844 } 2845 2846 static uint64_t 2847 ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname) 2848 { 2849 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun; 2850 2851 if (lun->getattr == NULL) 2852 return (UINT64_MAX); 2853 return (lun->getattr(lun, attrname)); 2854 } 2855 2856 static int 2857 ctl_be_block_init(void) 2858 { 2859 struct ctl_be_block_softc *softc = &backend_block_softc; 2860 2861 sx_init(&softc->modify_lock, "ctlblock modify"); 2862 mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF); 2863 softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io), 2864 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2865 softc->bufmin_zone = uma_zcreate("ctlblockmin", CTLBLK_MIN_SEG, 2866 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 2867 if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG) 2868 softc->bufmax_zone = uma_zcreate("ctlblockmax", CTLBLK_MAX_SEG, 2869 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 2870 SLIST_INIT(&softc->lun_list); 2871 return (0); 2872 } 2873 2874 static int 2875 ctl_be_block_shutdown(void) 2876 { 2877 struct ctl_be_block_softc *softc = &backend_block_softc; 2878 struct ctl_be_block_lun *lun; 2879 2880 mtx_lock(&softc->lock); 2881 while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) { 2882 SLIST_REMOVE_HEAD(&softc->lun_list, links); 2883 softc->num_luns--; 2884 /* 2885 * Drop our lock here. Since ctl_remove_lun() can call 2886 * back into us, this could potentially lead to a recursive 2887 * lock of the same mutex, which would cause a hang. 2888 */ 2889 mtx_unlock(&softc->lock); 2890 ctl_remove_lun(&lun->cbe_lun); 2891 mtx_lock(&softc->lock); 2892 } 2893 mtx_unlock(&softc->lock); 2894 uma_zdestroy(softc->bufmin_zone); 2895 if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG) 2896 uma_zdestroy(softc->bufmax_zone); 2897 uma_zdestroy(softc->beio_zone); 2898 mtx_destroy(&softc->lock); 2899 sx_destroy(&softc->modify_lock); 2900 return (0); 2901 } 2902