1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2003 Silicon Graphics International Corp. 5 * Copyright (c) 2009-2011 Spectra Logic Corporation 6 * Copyright (c) 2012 The FreeBSD Foundation 7 * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org> 8 * All rights reserved. 9 * 10 * Portions of this software were developed by Edward Tomasz Napierala 11 * under sponsorship from the FreeBSD Foundation. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions, and the following disclaimer, 18 * without modification. 19 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 20 * substantially similar to the "NO WARRANTY" disclaimer below 21 * ("Disclaimer") and any redistribution must be conditioned upon 22 * including a substantially similar Disclaimer requirement for further 23 * binary redistribution. 24 * 25 * NO WARRANTY 26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 34 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 35 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGES. 37 * 38 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $ 39 */ 40 /* 41 * CAM Target Layer driver backend for block devices. 42 * 43 * Author: Ken Merry <ken@FreeBSD.org> 44 */ 45 #include <sys/cdefs.h> 46 __FBSDID("$FreeBSD$"); 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/kernel.h> 51 #include <sys/types.h> 52 #include <sys/kthread.h> 53 #include <sys/bio.h> 54 #include <sys/fcntl.h> 55 #include <sys/limits.h> 56 #include <sys/lock.h> 57 #include <sys/mutex.h> 58 #include <sys/condvar.h> 59 #include <sys/malloc.h> 60 #include <sys/conf.h> 61 #include <sys/ioccom.h> 62 #include <sys/queue.h> 63 #include <sys/sbuf.h> 64 #include <sys/endian.h> 65 #include <sys/uio.h> 66 #include <sys/buf.h> 67 #include <sys/taskqueue.h> 68 #include <sys/vnode.h> 69 #include <sys/namei.h> 70 #include <sys/mount.h> 71 #include <sys/disk.h> 72 #include <sys/fcntl.h> 73 #include <sys/filedesc.h> 74 #include <sys/filio.h> 75 #include <sys/proc.h> 76 #include <sys/pcpu.h> 77 #include <sys/module.h> 78 #include <sys/sdt.h> 79 #include <sys/devicestat.h> 80 #include <sys/sysctl.h> 81 #include <sys/nv.h> 82 #include <sys/dnv.h> 83 #include <sys/sx.h> 84 85 #include <geom/geom.h> 86 87 #include <cam/cam.h> 88 #include <cam/scsi/scsi_all.h> 89 #include <cam/scsi/scsi_da.h> 90 #include <cam/ctl/ctl_io.h> 91 #include <cam/ctl/ctl.h> 92 #include <cam/ctl/ctl_backend.h> 93 #include <cam/ctl/ctl_ioctl.h> 94 #include <cam/ctl/ctl_ha.h> 95 #include <cam/ctl/ctl_scsi_all.h> 96 #include <cam/ctl/ctl_private.h> 97 #include <cam/ctl/ctl_error.h> 98 99 /* 100 * The idea here is that we'll allocate enough S/G space to hold a 1MB 101 * I/O. If we get an I/O larger than that, we'll split it. 102 */ 103 #define CTLBLK_HALF_IO_SIZE (512 * 1024) 104 #define CTLBLK_MAX_IO_SIZE (CTLBLK_HALF_IO_SIZE * 2) 105 #define CTLBLK_MAX_SEG MAXPHYS 106 #define CTLBLK_HALF_SEGS MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1) 107 #define CTLBLK_MAX_SEGS (CTLBLK_HALF_SEGS * 2) 108 109 #ifdef CTLBLK_DEBUG 110 #define DPRINTF(fmt, args...) \ 111 printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 112 #else 113 #define DPRINTF(fmt, args...) do {} while(0) 114 #endif 115 116 #define PRIV(io) \ 117 ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND]) 118 #define ARGS(io) \ 119 ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]) 120 121 SDT_PROVIDER_DEFINE(cbb); 122 123 typedef enum { 124 CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01, 125 CTL_BE_BLOCK_LUN_WAITING = 0x04, 126 } ctl_be_block_lun_flags; 127 128 typedef enum { 129 CTL_BE_BLOCK_NONE, 130 CTL_BE_BLOCK_DEV, 131 CTL_BE_BLOCK_FILE 132 } ctl_be_block_type; 133 134 struct ctl_be_block_filedata { 135 struct ucred *cred; 136 }; 137 138 union ctl_be_block_bedata { 139 struct ctl_be_block_filedata file; 140 }; 141 142 struct ctl_be_block_io; 143 struct ctl_be_block_lun; 144 145 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun, 146 struct ctl_be_block_io *beio); 147 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun, 148 const char *attrname); 149 150 /* 151 * Backend LUN structure. There is a 1:1 mapping between a block device 152 * and a backend block LUN, and between a backend block LUN and a CTL LUN. 153 */ 154 struct ctl_be_block_lun { 155 struct ctl_be_lun cbe_lun; /* Must be first element. */ 156 struct ctl_lun_create_params params; 157 char *dev_path; 158 ctl_be_block_type dev_type; 159 struct vnode *vn; 160 union ctl_be_block_bedata backend; 161 cbb_dispatch_t dispatch; 162 cbb_dispatch_t lun_flush; 163 cbb_dispatch_t unmap; 164 cbb_dispatch_t get_lba_status; 165 cbb_getattr_t getattr; 166 uint64_t size_blocks; 167 uint64_t size_bytes; 168 struct ctl_be_block_softc *softc; 169 struct devstat *disk_stats; 170 ctl_be_block_lun_flags flags; 171 SLIST_ENTRY(ctl_be_block_lun) links; 172 struct taskqueue *io_taskqueue; 173 struct task io_task; 174 int num_threads; 175 STAILQ_HEAD(, ctl_io_hdr) input_queue; 176 STAILQ_HEAD(, ctl_io_hdr) config_read_queue; 177 STAILQ_HEAD(, ctl_io_hdr) config_write_queue; 178 STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 179 struct mtx_padalign io_lock; 180 struct mtx_padalign queue_lock; 181 }; 182 183 /* 184 * Overall softc structure for the block backend module. 185 */ 186 struct ctl_be_block_softc { 187 struct sx modify_lock; 188 struct mtx lock; 189 int num_luns; 190 SLIST_HEAD(, ctl_be_block_lun) lun_list; 191 uma_zone_t beio_zone; 192 uma_zone_t buf_zone; 193 }; 194 195 static struct ctl_be_block_softc backend_block_softc; 196 197 /* 198 * Per-I/O information. 199 */ 200 struct ctl_be_block_io { 201 union ctl_io *io; 202 struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS]; 203 struct iovec xiovecs[CTLBLK_MAX_SEGS]; 204 int bio_cmd; 205 int two_sglists; 206 int num_segs; 207 int num_bios_sent; 208 int num_bios_done; 209 int send_complete; 210 int first_error; 211 uint64_t first_error_offset; 212 struct bintime ds_t0; 213 devstat_tag_type ds_tag_type; 214 devstat_trans_flags ds_trans_type; 215 uint64_t io_len; 216 uint64_t io_offset; 217 int io_arg; 218 struct ctl_be_block_softc *softc; 219 struct ctl_be_block_lun *lun; 220 void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */ 221 }; 222 223 extern struct ctl_softc *control_softc; 224 225 static int cbb_num_threads = 14; 226 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 227 "CAM Target Layer Block Backend"); 228 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN, 229 &cbb_num_threads, 0, "Number of threads per backing file"); 230 231 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc); 232 static void ctl_free_beio(struct ctl_be_block_io *beio); 233 static void ctl_complete_beio(struct ctl_be_block_io *beio); 234 static int ctl_be_block_move_done(union ctl_io *io); 235 static void ctl_be_block_biodone(struct bio *bio); 236 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 237 struct ctl_be_block_io *beio); 238 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 239 struct ctl_be_block_io *beio); 240 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 241 struct ctl_be_block_io *beio); 242 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, 243 const char *attrname); 244 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 245 struct ctl_be_block_io *beio); 246 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 247 struct ctl_be_block_io *beio); 248 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 249 struct ctl_be_block_io *beio); 250 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, 251 const char *attrname); 252 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 253 union ctl_io *io); 254 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 255 union ctl_io *io); 256 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 257 union ctl_io *io); 258 static void ctl_be_block_worker(void *context, int pending); 259 static int ctl_be_block_submit(union ctl_io *io); 260 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 261 int flag, struct thread *td); 262 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, 263 struct ctl_lun_req *req); 264 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, 265 struct ctl_lun_req *req); 266 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun); 267 static int ctl_be_block_open(struct ctl_be_block_lun *be_lun, 268 struct ctl_lun_req *req); 269 static int ctl_be_block_create(struct ctl_be_block_softc *softc, 270 struct ctl_lun_req *req); 271 static int ctl_be_block_rm(struct ctl_be_block_softc *softc, 272 struct ctl_lun_req *req); 273 static int ctl_be_block_modify(struct ctl_be_block_softc *softc, 274 struct ctl_lun_req *req); 275 static void ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun); 276 static int ctl_be_block_config_write(union ctl_io *io); 277 static int ctl_be_block_config_read(union ctl_io *io); 278 static int ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb); 279 static uint64_t ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname); 280 static int ctl_be_block_init(void); 281 static int ctl_be_block_shutdown(void); 282 283 static struct ctl_backend_driver ctl_be_block_driver = 284 { 285 .name = "block", 286 .flags = CTL_BE_FLAG_HAS_CONFIG, 287 .init = ctl_be_block_init, 288 .shutdown = ctl_be_block_shutdown, 289 .data_submit = ctl_be_block_submit, 290 .data_move_done = ctl_be_block_move_done, 291 .config_read = ctl_be_block_config_read, 292 .config_write = ctl_be_block_config_write, 293 .ioctl = ctl_be_block_ioctl, 294 .lun_info = ctl_be_block_lun_info, 295 .lun_attr = ctl_be_block_lun_attr 296 }; 297 298 MALLOC_DEFINE(M_CTLBLK, "ctlblock", "Memory used for CTL block backend"); 299 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver); 300 301 static struct ctl_be_block_io * 302 ctl_alloc_beio(struct ctl_be_block_softc *softc) 303 { 304 struct ctl_be_block_io *beio; 305 306 beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO); 307 beio->softc = softc; 308 return (beio); 309 } 310 311 static void 312 ctl_free_beio(struct ctl_be_block_io *beio) 313 { 314 struct ctl_be_block_softc *softc = beio->softc; 315 int i; 316 317 for (i = 0; i < beio->num_segs; i++) { 318 uma_zfree(softc->buf_zone, beio->sg_segs[i].addr); 319 320 /* For compare we had two equal S/G lists. */ 321 if (beio->two_sglists) { 322 uma_zfree(softc->buf_zone, 323 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr); 324 } 325 } 326 327 uma_zfree(softc->beio_zone, beio); 328 } 329 330 static void 331 ctl_complete_beio(struct ctl_be_block_io *beio) 332 { 333 union ctl_io *io = beio->io; 334 335 if (beio->beio_cont != NULL) { 336 beio->beio_cont(beio); 337 } else { 338 ctl_free_beio(beio); 339 ctl_data_submit_done(io); 340 } 341 } 342 343 static size_t 344 cmp(uint8_t *a, uint8_t *b, size_t size) 345 { 346 size_t i; 347 348 for (i = 0; i < size; i++) { 349 if (a[i] != b[i]) 350 break; 351 } 352 return (i); 353 } 354 355 static void 356 ctl_be_block_compare(union ctl_io *io) 357 { 358 struct ctl_be_block_io *beio; 359 uint64_t off, res; 360 int i; 361 uint8_t info[8]; 362 363 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 364 off = 0; 365 for (i = 0; i < beio->num_segs; i++) { 366 res = cmp(beio->sg_segs[i].addr, 367 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr, 368 beio->sg_segs[i].len); 369 off += res; 370 if (res < beio->sg_segs[i].len) 371 break; 372 } 373 if (i < beio->num_segs) { 374 scsi_u64to8b(off, info); 375 ctl_set_sense(&io->scsiio, /*current_error*/ 1, 376 /*sense_key*/ SSD_KEY_MISCOMPARE, 377 /*asc*/ 0x1D, /*ascq*/ 0x00, 378 /*type*/ SSD_ELEM_INFO, 379 /*size*/ sizeof(info), /*data*/ &info, 380 /*type*/ SSD_ELEM_NONE); 381 } else 382 ctl_set_success(&io->scsiio); 383 } 384 385 static int 386 ctl_be_block_move_done(union ctl_io *io) 387 { 388 struct ctl_be_block_io *beio; 389 struct ctl_be_block_lun *be_lun; 390 struct ctl_lba_len_flags *lbalen; 391 #ifdef CTL_TIME_IO 392 struct bintime cur_bt; 393 #endif 394 395 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 396 be_lun = beio->lun; 397 398 DPRINTF("entered\n"); 399 400 #ifdef CTL_TIME_IO 401 getbinuptime(&cur_bt); 402 bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt); 403 bintime_add(&io->io_hdr.dma_bt, &cur_bt); 404 #endif 405 io->io_hdr.num_dmas++; 406 io->scsiio.kern_rel_offset += io->scsiio.kern_data_len; 407 408 /* 409 * We set status at this point for read commands, and write 410 * commands with errors. 411 */ 412 if (io->io_hdr.flags & CTL_FLAG_ABORT) { 413 ; 414 } else if ((io->io_hdr.port_status != 0) && 415 ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || 416 (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { 417 ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, 418 /*retry_count*/ io->io_hdr.port_status); 419 } else if (io->scsiio.kern_data_resid != 0 && 420 (io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_OUT && 421 ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || 422 (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { 423 ctl_set_invalid_field_ciu(&io->scsiio); 424 } else if ((io->io_hdr.port_status == 0) && 425 ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) { 426 lbalen = ARGS(beio->io); 427 if (lbalen->flags & CTL_LLF_READ) { 428 ctl_set_success(&io->scsiio); 429 } else if (lbalen->flags & CTL_LLF_COMPARE) { 430 /* We have two data blocks ready for comparison. */ 431 ctl_be_block_compare(io); 432 } 433 } 434 435 /* 436 * If this is a read, or a write with errors, it is done. 437 */ 438 if ((beio->bio_cmd == BIO_READ) 439 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0) 440 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) { 441 ctl_complete_beio(beio); 442 return (0); 443 } 444 445 /* 446 * At this point, we have a write and the DMA completed 447 * successfully. We now have to queue it to the task queue to 448 * execute the backend I/O. That is because we do blocking 449 * memory allocations, and in the file backing case, blocking I/O. 450 * This move done routine is generally called in the SIM's 451 * interrupt context, and therefore we cannot block. 452 */ 453 mtx_lock(&be_lun->queue_lock); 454 STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links); 455 mtx_unlock(&be_lun->queue_lock); 456 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 457 458 return (0); 459 } 460 461 static void 462 ctl_be_block_biodone(struct bio *bio) 463 { 464 struct ctl_be_block_io *beio; 465 struct ctl_be_block_lun *be_lun; 466 union ctl_io *io; 467 int error; 468 469 beio = bio->bio_caller1; 470 be_lun = beio->lun; 471 io = beio->io; 472 473 DPRINTF("entered\n"); 474 475 error = bio->bio_error; 476 mtx_lock(&be_lun->io_lock); 477 if (error != 0 && 478 (beio->first_error == 0 || 479 bio->bio_offset < beio->first_error_offset)) { 480 beio->first_error = error; 481 beio->first_error_offset = bio->bio_offset; 482 } 483 484 beio->num_bios_done++; 485 486 /* 487 * XXX KDM will this cause WITNESS to complain? Holding a lock 488 * during the free might cause it to complain. 489 */ 490 g_destroy_bio(bio); 491 492 /* 493 * If the send complete bit isn't set, or we aren't the last I/O to 494 * complete, then we're done. 495 */ 496 if ((beio->send_complete == 0) 497 || (beio->num_bios_done < beio->num_bios_sent)) { 498 mtx_unlock(&be_lun->io_lock); 499 return; 500 } 501 502 /* 503 * At this point, we've verified that we are the last I/O to 504 * complete, so it's safe to drop the lock. 505 */ 506 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 507 beio->ds_tag_type, beio->ds_trans_type, 508 /*now*/ NULL, /*then*/&beio->ds_t0); 509 mtx_unlock(&be_lun->io_lock); 510 511 /* 512 * If there are any errors from the backing device, we fail the 513 * entire I/O with a medium error. 514 */ 515 error = beio->first_error; 516 if (error != 0) { 517 if (error == EOPNOTSUPP) { 518 ctl_set_invalid_opcode(&io->scsiio); 519 } else if (error == ENOSPC || error == EDQUOT) { 520 ctl_set_space_alloc_fail(&io->scsiio); 521 } else if (error == EROFS || error == EACCES) { 522 ctl_set_hw_write_protected(&io->scsiio); 523 } else if (beio->bio_cmd == BIO_FLUSH) { 524 /* XXX KDM is there is a better error here? */ 525 ctl_set_internal_failure(&io->scsiio, 526 /*sks_valid*/ 1, 527 /*retry_count*/ 0xbad2); 528 } else { 529 ctl_set_medium_error(&io->scsiio, 530 beio->bio_cmd == BIO_READ); 531 } 532 ctl_complete_beio(beio); 533 return; 534 } 535 536 /* 537 * If this is a write, a flush, a delete or verify, we're all done. 538 * If this is a read, we can now send the data to the user. 539 */ 540 if ((beio->bio_cmd == BIO_WRITE) 541 || (beio->bio_cmd == BIO_FLUSH) 542 || (beio->bio_cmd == BIO_DELETE) 543 || (ARGS(io)->flags & CTL_LLF_VERIFY)) { 544 ctl_set_success(&io->scsiio); 545 ctl_complete_beio(beio); 546 } else { 547 if ((ARGS(io)->flags & CTL_LLF_READ) && 548 beio->beio_cont == NULL) { 549 ctl_set_success(&io->scsiio); 550 ctl_serseq_done(io); 551 } 552 #ifdef CTL_TIME_IO 553 getbinuptime(&io->io_hdr.dma_start_bt); 554 #endif 555 ctl_datamove(io); 556 } 557 } 558 559 static void 560 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 561 struct ctl_be_block_io *beio) 562 { 563 union ctl_io *io = beio->io; 564 struct mount *mountpoint; 565 int error, lock_flags; 566 567 DPRINTF("entered\n"); 568 569 binuptime(&beio->ds_t0); 570 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 571 572 (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 573 574 if (MNT_SHARED_WRITES(mountpoint) || 575 ((mountpoint == NULL) && MNT_SHARED_WRITES(be_lun->vn->v_mount))) 576 lock_flags = LK_SHARED; 577 else 578 lock_flags = LK_EXCLUSIVE; 579 vn_lock(be_lun->vn, lock_flags | LK_RETRY); 580 error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT, 581 curthread); 582 VOP_UNLOCK(be_lun->vn); 583 584 vn_finished_write(mountpoint); 585 586 mtx_lock(&be_lun->io_lock); 587 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 588 beio->ds_tag_type, beio->ds_trans_type, 589 /*now*/ NULL, /*then*/&beio->ds_t0); 590 mtx_unlock(&be_lun->io_lock); 591 592 if (error == 0) 593 ctl_set_success(&io->scsiio); 594 else { 595 /* XXX KDM is there is a better error here? */ 596 ctl_set_internal_failure(&io->scsiio, 597 /*sks_valid*/ 1, 598 /*retry_count*/ 0xbad1); 599 } 600 601 ctl_complete_beio(beio); 602 } 603 604 SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t"); 605 SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t"); 606 SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t"); 607 SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t"); 608 609 static void 610 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 611 struct ctl_be_block_io *beio) 612 { 613 struct ctl_be_block_filedata *file_data; 614 union ctl_io *io; 615 struct uio xuio; 616 struct iovec *xiovec; 617 size_t s; 618 int error, flags, i; 619 620 DPRINTF("entered\n"); 621 622 file_data = &be_lun->backend.file; 623 io = beio->io; 624 flags = 0; 625 if (ARGS(io)->flags & CTL_LLF_DPO) 626 flags |= IO_DIRECT; 627 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 628 flags |= IO_SYNC; 629 630 bzero(&xuio, sizeof(xuio)); 631 if (beio->bio_cmd == BIO_READ) { 632 SDT_PROBE0(cbb, , read, file_start); 633 xuio.uio_rw = UIO_READ; 634 } else { 635 SDT_PROBE0(cbb, , write, file_start); 636 xuio.uio_rw = UIO_WRITE; 637 } 638 xuio.uio_offset = beio->io_offset; 639 xuio.uio_resid = beio->io_len; 640 xuio.uio_segflg = UIO_SYSSPACE; 641 xuio.uio_iov = beio->xiovecs; 642 xuio.uio_iovcnt = beio->num_segs; 643 xuio.uio_td = curthread; 644 645 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 646 xiovec->iov_base = beio->sg_segs[i].addr; 647 xiovec->iov_len = beio->sg_segs[i].len; 648 } 649 650 binuptime(&beio->ds_t0); 651 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 652 653 if (beio->bio_cmd == BIO_READ) { 654 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 655 656 /* 657 * UFS pays attention to IO_DIRECT for reads. If the 658 * DIRECTIO option is configured into the kernel, it calls 659 * ffs_rawread(). But that only works for single-segment 660 * uios with user space addresses. In our case, with a 661 * kernel uio, it still reads into the buffer cache, but it 662 * will just try to release the buffer from the cache later 663 * on in ffs_read(). 664 * 665 * ZFS does not pay attention to IO_DIRECT for reads. 666 * 667 * UFS does not pay attention to IO_SYNC for reads. 668 * 669 * ZFS pays attention to IO_SYNC (which translates into the 670 * Solaris define FRSYNC for zfs_read()) for reads. It 671 * attempts to sync the file before reading. 672 */ 673 error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred); 674 675 VOP_UNLOCK(be_lun->vn); 676 SDT_PROBE0(cbb, , read, file_done); 677 if (error == 0 && xuio.uio_resid > 0) { 678 /* 679 * If we red less then requested (EOF), then 680 * we should clean the rest of the buffer. 681 */ 682 s = beio->io_len - xuio.uio_resid; 683 for (i = 0; i < beio->num_segs; i++) { 684 if (s >= beio->sg_segs[i].len) { 685 s -= beio->sg_segs[i].len; 686 continue; 687 } 688 bzero((uint8_t *)beio->sg_segs[i].addr + s, 689 beio->sg_segs[i].len - s); 690 s = 0; 691 } 692 } 693 } else { 694 struct mount *mountpoint; 695 int lock_flags; 696 697 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 698 699 if (MNT_SHARED_WRITES(mountpoint) || ((mountpoint == NULL) 700 && MNT_SHARED_WRITES(be_lun->vn->v_mount))) 701 lock_flags = LK_SHARED; 702 else 703 lock_flags = LK_EXCLUSIVE; 704 vn_lock(be_lun->vn, lock_flags | LK_RETRY); 705 706 /* 707 * UFS pays attention to IO_DIRECT for writes. The write 708 * is done asynchronously. (Normally the write would just 709 * get put into cache. 710 * 711 * UFS pays attention to IO_SYNC for writes. It will 712 * attempt to write the buffer out synchronously if that 713 * flag is set. 714 * 715 * ZFS does not pay attention to IO_DIRECT for writes. 716 * 717 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) 718 * for writes. It will flush the transaction from the 719 * cache before returning. 720 */ 721 error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred); 722 VOP_UNLOCK(be_lun->vn); 723 724 vn_finished_write(mountpoint); 725 SDT_PROBE0(cbb, , write, file_done); 726 } 727 728 mtx_lock(&be_lun->io_lock); 729 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 730 beio->ds_tag_type, beio->ds_trans_type, 731 /*now*/ NULL, /*then*/&beio->ds_t0); 732 mtx_unlock(&be_lun->io_lock); 733 734 /* 735 * If we got an error, set the sense data to "MEDIUM ERROR" and 736 * return the I/O to the user. 737 */ 738 if (error != 0) { 739 if (error == ENOSPC || error == EDQUOT) { 740 ctl_set_space_alloc_fail(&io->scsiio); 741 } else if (error == EROFS || error == EACCES) { 742 ctl_set_hw_write_protected(&io->scsiio); 743 } else { 744 ctl_set_medium_error(&io->scsiio, 745 beio->bio_cmd == BIO_READ); 746 } 747 ctl_complete_beio(beio); 748 return; 749 } 750 751 /* 752 * If this is a write or a verify, we're all done. 753 * If this is a read, we can now send the data to the user. 754 */ 755 if ((beio->bio_cmd == BIO_WRITE) || 756 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 757 ctl_set_success(&io->scsiio); 758 ctl_complete_beio(beio); 759 } else { 760 if ((ARGS(io)->flags & CTL_LLF_READ) && 761 beio->beio_cont == NULL) { 762 ctl_set_success(&io->scsiio); 763 ctl_serseq_done(io); 764 } 765 #ifdef CTL_TIME_IO 766 getbinuptime(&io->io_hdr.dma_start_bt); 767 #endif 768 ctl_datamove(io); 769 } 770 } 771 772 static void 773 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 774 struct ctl_be_block_io *beio) 775 { 776 union ctl_io *io = beio->io; 777 struct ctl_lba_len_flags *lbalen = ARGS(io); 778 struct scsi_get_lba_status_data *data; 779 off_t roff, off; 780 int error, status; 781 782 DPRINTF("entered\n"); 783 784 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 785 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 786 error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off, 787 0, curthread->td_ucred, curthread); 788 if (error == 0 && off > roff) 789 status = 0; /* mapped up to off */ 790 else { 791 error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off, 792 0, curthread->td_ucred, curthread); 793 if (error == 0 && off > roff) 794 status = 1; /* deallocated up to off */ 795 else { 796 status = 0; /* unknown up to the end */ 797 off = be_lun->size_bytes; 798 } 799 } 800 VOP_UNLOCK(be_lun->vn); 801 802 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 803 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 804 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 805 lbalen->lba), data->descr[0].length); 806 data->descr[0].status = status; 807 808 ctl_complete_beio(beio); 809 } 810 811 static uint64_t 812 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname) 813 { 814 struct vattr vattr; 815 struct statfs statfs; 816 uint64_t val; 817 int error; 818 819 val = UINT64_MAX; 820 if (be_lun->vn == NULL) 821 return (val); 822 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 823 if (strcmp(attrname, "blocksused") == 0) { 824 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 825 if (error == 0) 826 val = vattr.va_bytes / be_lun->cbe_lun.blocksize; 827 } 828 if (strcmp(attrname, "blocksavail") == 0 && 829 !VN_IS_DOOMED(be_lun->vn)) { 830 error = VFS_STATFS(be_lun->vn->v_mount, &statfs); 831 if (error == 0) 832 val = statfs.f_bavail * statfs.f_bsize / 833 be_lun->cbe_lun.blocksize; 834 } 835 VOP_UNLOCK(be_lun->vn); 836 return (val); 837 } 838 839 static void 840 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun, 841 struct ctl_be_block_io *beio) 842 { 843 union ctl_io *io; 844 struct cdevsw *csw; 845 struct cdev *dev; 846 struct uio xuio; 847 struct iovec *xiovec; 848 int error, flags, i, ref; 849 850 DPRINTF("entered\n"); 851 852 io = beio->io; 853 flags = 0; 854 if (ARGS(io)->flags & CTL_LLF_DPO) 855 flags |= IO_DIRECT; 856 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 857 flags |= IO_SYNC; 858 859 bzero(&xuio, sizeof(xuio)); 860 if (beio->bio_cmd == BIO_READ) { 861 SDT_PROBE0(cbb, , read, file_start); 862 xuio.uio_rw = UIO_READ; 863 } else { 864 SDT_PROBE0(cbb, , write, file_start); 865 xuio.uio_rw = UIO_WRITE; 866 } 867 xuio.uio_offset = beio->io_offset; 868 xuio.uio_resid = beio->io_len; 869 xuio.uio_segflg = UIO_SYSSPACE; 870 xuio.uio_iov = beio->xiovecs; 871 xuio.uio_iovcnt = beio->num_segs; 872 xuio.uio_td = curthread; 873 874 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 875 xiovec->iov_base = beio->sg_segs[i].addr; 876 xiovec->iov_len = beio->sg_segs[i].len; 877 } 878 879 binuptime(&beio->ds_t0); 880 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 881 882 csw = devvn_refthread(be_lun->vn, &dev, &ref); 883 if (csw) { 884 if (beio->bio_cmd == BIO_READ) 885 error = csw->d_read(dev, &xuio, flags); 886 else 887 error = csw->d_write(dev, &xuio, flags); 888 dev_relthread(dev, ref); 889 } else 890 error = ENXIO; 891 892 if (beio->bio_cmd == BIO_READ) 893 SDT_PROBE0(cbb, , read, file_done); 894 else 895 SDT_PROBE0(cbb, , write, file_done); 896 897 mtx_lock(&be_lun->io_lock); 898 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 899 beio->ds_tag_type, beio->ds_trans_type, 900 /*now*/ NULL, /*then*/&beio->ds_t0); 901 mtx_unlock(&be_lun->io_lock); 902 903 /* 904 * If we got an error, set the sense data to "MEDIUM ERROR" and 905 * return the I/O to the user. 906 */ 907 if (error != 0) { 908 if (error == ENOSPC || error == EDQUOT) { 909 ctl_set_space_alloc_fail(&io->scsiio); 910 } else if (error == EROFS || error == EACCES) { 911 ctl_set_hw_write_protected(&io->scsiio); 912 } else { 913 ctl_set_medium_error(&io->scsiio, 914 beio->bio_cmd == BIO_READ); 915 } 916 ctl_complete_beio(beio); 917 return; 918 } 919 920 /* 921 * If this is a write or a verify, we're all done. 922 * If this is a read, we can now send the data to the user. 923 */ 924 if ((beio->bio_cmd == BIO_WRITE) || 925 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 926 ctl_set_success(&io->scsiio); 927 ctl_complete_beio(beio); 928 } else { 929 if ((ARGS(io)->flags & CTL_LLF_READ) && 930 beio->beio_cont == NULL) { 931 ctl_set_success(&io->scsiio); 932 ctl_serseq_done(io); 933 } 934 #ifdef CTL_TIME_IO 935 getbinuptime(&io->io_hdr.dma_start_bt); 936 #endif 937 ctl_datamove(io); 938 } 939 } 940 941 static void 942 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun, 943 struct ctl_be_block_io *beio) 944 { 945 union ctl_io *io = beio->io; 946 struct cdevsw *csw; 947 struct cdev *dev; 948 struct ctl_lba_len_flags *lbalen = ARGS(io); 949 struct scsi_get_lba_status_data *data; 950 off_t roff, off; 951 int error, ref, status; 952 953 DPRINTF("entered\n"); 954 955 csw = devvn_refthread(be_lun->vn, &dev, &ref); 956 if (csw == NULL) { 957 status = 0; /* unknown up to the end */ 958 off = be_lun->size_bytes; 959 goto done; 960 } 961 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 962 error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD, 963 curthread); 964 if (error == 0 && off > roff) 965 status = 0; /* mapped up to off */ 966 else { 967 error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD, 968 curthread); 969 if (error == 0 && off > roff) 970 status = 1; /* deallocated up to off */ 971 else { 972 status = 0; /* unknown up to the end */ 973 off = be_lun->size_bytes; 974 } 975 } 976 dev_relthread(dev, ref); 977 978 done: 979 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 980 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 981 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 982 lbalen->lba), data->descr[0].length); 983 data->descr[0].status = status; 984 985 ctl_complete_beio(beio); 986 } 987 988 static void 989 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 990 struct ctl_be_block_io *beio) 991 { 992 struct bio *bio; 993 struct cdevsw *csw; 994 struct cdev *dev; 995 int ref; 996 997 DPRINTF("entered\n"); 998 999 /* This can't fail, it's a blocking allocation. */ 1000 bio = g_alloc_bio(); 1001 1002 bio->bio_cmd = BIO_FLUSH; 1003 bio->bio_offset = 0; 1004 bio->bio_data = 0; 1005 bio->bio_done = ctl_be_block_biodone; 1006 bio->bio_caller1 = beio; 1007 bio->bio_pblkno = 0; 1008 1009 /* 1010 * We don't need to acquire the LUN lock here, because we are only 1011 * sending one bio, and so there is no other context to synchronize 1012 * with. 1013 */ 1014 beio->num_bios_sent = 1; 1015 beio->send_complete = 1; 1016 1017 binuptime(&beio->ds_t0); 1018 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1019 1020 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1021 if (csw) { 1022 bio->bio_dev = dev; 1023 csw->d_strategy(bio); 1024 dev_relthread(dev, ref); 1025 } else { 1026 bio->bio_error = ENXIO; 1027 ctl_be_block_biodone(bio); 1028 } 1029 } 1030 1031 static void 1032 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun, 1033 struct ctl_be_block_io *beio, 1034 uint64_t off, uint64_t len, int last) 1035 { 1036 struct bio *bio; 1037 uint64_t maxlen; 1038 struct cdevsw *csw; 1039 struct cdev *dev; 1040 int ref; 1041 1042 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1043 maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize); 1044 while (len > 0) { 1045 bio = g_alloc_bio(); 1046 bio->bio_cmd = BIO_DELETE; 1047 bio->bio_dev = dev; 1048 bio->bio_offset = off; 1049 bio->bio_length = MIN(len, maxlen); 1050 bio->bio_data = 0; 1051 bio->bio_done = ctl_be_block_biodone; 1052 bio->bio_caller1 = beio; 1053 bio->bio_pblkno = off / be_lun->cbe_lun.blocksize; 1054 1055 off += bio->bio_length; 1056 len -= bio->bio_length; 1057 1058 mtx_lock(&be_lun->io_lock); 1059 beio->num_bios_sent++; 1060 if (last && len == 0) 1061 beio->send_complete = 1; 1062 mtx_unlock(&be_lun->io_lock); 1063 1064 if (csw) { 1065 csw->d_strategy(bio); 1066 } else { 1067 bio->bio_error = ENXIO; 1068 ctl_be_block_biodone(bio); 1069 } 1070 } 1071 if (csw) 1072 dev_relthread(dev, ref); 1073 } 1074 1075 static void 1076 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 1077 struct ctl_be_block_io *beio) 1078 { 1079 union ctl_io *io; 1080 struct ctl_ptr_len_flags *ptrlen; 1081 struct scsi_unmap_desc *buf, *end; 1082 uint64_t len; 1083 1084 io = beio->io; 1085 1086 DPRINTF("entered\n"); 1087 1088 binuptime(&beio->ds_t0); 1089 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1090 1091 if (beio->io_offset == -1) { 1092 beio->io_len = 0; 1093 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1094 buf = (struct scsi_unmap_desc *)ptrlen->ptr; 1095 end = buf + ptrlen->len / sizeof(*buf); 1096 for (; buf < end; buf++) { 1097 len = (uint64_t)scsi_4btoul(buf->length) * 1098 be_lun->cbe_lun.blocksize; 1099 beio->io_len += len; 1100 ctl_be_block_unmap_dev_range(be_lun, beio, 1101 scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize, 1102 len, (end - buf < 2) ? TRUE : FALSE); 1103 } 1104 } else 1105 ctl_be_block_unmap_dev_range(be_lun, beio, 1106 beio->io_offset, beio->io_len, TRUE); 1107 } 1108 1109 static void 1110 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 1111 struct ctl_be_block_io *beio) 1112 { 1113 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 1114 struct bio *bio; 1115 struct cdevsw *csw; 1116 struct cdev *dev; 1117 off_t cur_offset; 1118 int i, max_iosize, ref; 1119 1120 DPRINTF("entered\n"); 1121 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1122 1123 /* 1124 * We have to limit our I/O size to the maximum supported by the 1125 * backend device. Hopefully it is MAXPHYS. If the driver doesn't 1126 * set it properly, use DFLTPHYS. 1127 */ 1128 if (csw) { 1129 max_iosize = dev->si_iosize_max; 1130 if (max_iosize < PAGE_SIZE) 1131 max_iosize = DFLTPHYS; 1132 } else 1133 max_iosize = DFLTPHYS; 1134 1135 cur_offset = beio->io_offset; 1136 for (i = 0; i < beio->num_segs; i++) { 1137 size_t cur_size; 1138 uint8_t *cur_ptr; 1139 1140 cur_size = beio->sg_segs[i].len; 1141 cur_ptr = beio->sg_segs[i].addr; 1142 1143 while (cur_size > 0) { 1144 /* This can't fail, it's a blocking allocation. */ 1145 bio = g_alloc_bio(); 1146 1147 KASSERT(bio != NULL, ("g_alloc_bio() failed!\n")); 1148 1149 bio->bio_cmd = beio->bio_cmd; 1150 bio->bio_dev = dev; 1151 bio->bio_caller1 = beio; 1152 bio->bio_length = min(cur_size, max_iosize); 1153 bio->bio_offset = cur_offset; 1154 bio->bio_data = cur_ptr; 1155 bio->bio_done = ctl_be_block_biodone; 1156 bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize; 1157 1158 cur_offset += bio->bio_length; 1159 cur_ptr += bio->bio_length; 1160 cur_size -= bio->bio_length; 1161 1162 TAILQ_INSERT_TAIL(&queue, bio, bio_queue); 1163 beio->num_bios_sent++; 1164 } 1165 } 1166 beio->send_complete = 1; 1167 binuptime(&beio->ds_t0); 1168 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1169 1170 /* 1171 * Fire off all allocated requests! 1172 */ 1173 while ((bio = TAILQ_FIRST(&queue)) != NULL) { 1174 TAILQ_REMOVE(&queue, bio, bio_queue); 1175 if (csw) 1176 csw->d_strategy(bio); 1177 else { 1178 bio->bio_error = ENXIO; 1179 ctl_be_block_biodone(bio); 1180 } 1181 } 1182 if (csw) 1183 dev_relthread(dev, ref); 1184 } 1185 1186 static uint64_t 1187 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname) 1188 { 1189 struct diocgattr_arg arg; 1190 struct cdevsw *csw; 1191 struct cdev *dev; 1192 int error, ref; 1193 1194 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1195 if (csw == NULL) 1196 return (UINT64_MAX); 1197 strlcpy(arg.name, attrname, sizeof(arg.name)); 1198 arg.len = sizeof(arg.value.off); 1199 if (csw->d_ioctl) { 1200 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 1201 curthread); 1202 } else 1203 error = ENODEV; 1204 dev_relthread(dev, ref); 1205 if (error != 0) 1206 return (UINT64_MAX); 1207 return (arg.value.off); 1208 } 1209 1210 static void 1211 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun, 1212 union ctl_io *io) 1213 { 1214 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1215 struct ctl_be_block_io *beio; 1216 struct ctl_lba_len_flags *lbalen; 1217 1218 DPRINTF("entered\n"); 1219 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1220 lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1221 1222 beio->io_len = lbalen->len * cbe_lun->blocksize; 1223 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1224 beio->io_arg = (lbalen->flags & SSC_IMMED) != 0; 1225 beio->bio_cmd = BIO_FLUSH; 1226 beio->ds_trans_type = DEVSTAT_NO_DATA; 1227 DPRINTF("SYNC\n"); 1228 be_lun->lun_flush(be_lun, beio); 1229 } 1230 1231 static void 1232 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio) 1233 { 1234 union ctl_io *io; 1235 1236 io = beio->io; 1237 ctl_free_beio(beio); 1238 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1239 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1240 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1241 ctl_config_write_done(io); 1242 return; 1243 } 1244 1245 ctl_be_block_config_write(io); 1246 } 1247 1248 static void 1249 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun, 1250 union ctl_io *io) 1251 { 1252 struct ctl_be_block_softc *softc = be_lun->softc; 1253 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1254 struct ctl_be_block_io *beio; 1255 struct ctl_lba_len_flags *lbalen; 1256 uint64_t len_left, lba; 1257 uint32_t pb, pbo, adj; 1258 int i, seglen; 1259 uint8_t *buf, *end; 1260 1261 DPRINTF("entered\n"); 1262 1263 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1264 lbalen = ARGS(beio->io); 1265 1266 if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) || 1267 (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) { 1268 ctl_free_beio(beio); 1269 ctl_set_invalid_field(&io->scsiio, 1270 /*sks_valid*/ 1, 1271 /*command*/ 1, 1272 /*field*/ 1, 1273 /*bit_valid*/ 0, 1274 /*bit*/ 0); 1275 ctl_config_write_done(io); 1276 return; 1277 } 1278 1279 if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) { 1280 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1281 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize; 1282 beio->bio_cmd = BIO_DELETE; 1283 beio->ds_trans_type = DEVSTAT_FREE; 1284 1285 be_lun->unmap(be_lun, beio); 1286 return; 1287 } 1288 1289 beio->bio_cmd = BIO_WRITE; 1290 beio->ds_trans_type = DEVSTAT_WRITE; 1291 1292 DPRINTF("WRITE SAME at LBA %jx len %u\n", 1293 (uintmax_t)lbalen->lba, lbalen->len); 1294 1295 pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp; 1296 if (be_lun->cbe_lun.pblockoff > 0) 1297 pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff; 1298 else 1299 pbo = 0; 1300 len_left = (uint64_t)lbalen->len * cbe_lun->blocksize; 1301 for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { 1302 1303 /* 1304 * Setup the S/G entry for this chunk. 1305 */ 1306 seglen = MIN(CTLBLK_MAX_SEG, len_left); 1307 if (pb > cbe_lun->blocksize) { 1308 adj = ((lbalen->lba + lba) * cbe_lun->blocksize + 1309 seglen - pbo) % pb; 1310 if (seglen > adj) 1311 seglen -= adj; 1312 else 1313 seglen -= seglen % cbe_lun->blocksize; 1314 } else 1315 seglen -= seglen % cbe_lun->blocksize; 1316 beio->sg_segs[i].len = seglen; 1317 beio->sg_segs[i].addr = uma_zalloc(softc->buf_zone, M_WAITOK); 1318 1319 DPRINTF("segment %d addr %p len %zd\n", i, 1320 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1321 1322 beio->num_segs++; 1323 len_left -= seglen; 1324 1325 buf = beio->sg_segs[i].addr; 1326 end = buf + seglen; 1327 for (; buf < end; buf += cbe_lun->blocksize) { 1328 if (lbalen->flags & SWS_NDOB) { 1329 memset(buf, 0, cbe_lun->blocksize); 1330 } else { 1331 memcpy(buf, io->scsiio.kern_data_ptr, 1332 cbe_lun->blocksize); 1333 } 1334 if (lbalen->flags & SWS_LBDATA) 1335 scsi_ulto4b(lbalen->lba + lba, buf); 1336 lba++; 1337 } 1338 } 1339 1340 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1341 beio->io_len = lba * cbe_lun->blocksize; 1342 1343 /* We can not do all in one run. Correct and schedule rerun. */ 1344 if (len_left > 0) { 1345 lbalen->lba += lba; 1346 lbalen->len -= lba; 1347 beio->beio_cont = ctl_be_block_cw_done_ws; 1348 } 1349 1350 be_lun->dispatch(be_lun, beio); 1351 } 1352 1353 static void 1354 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun, 1355 union ctl_io *io) 1356 { 1357 struct ctl_be_block_io *beio; 1358 struct ctl_ptr_len_flags *ptrlen; 1359 1360 DPRINTF("entered\n"); 1361 1362 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1363 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1364 1365 if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) { 1366 ctl_free_beio(beio); 1367 ctl_set_invalid_field(&io->scsiio, 1368 /*sks_valid*/ 0, 1369 /*command*/ 1, 1370 /*field*/ 0, 1371 /*bit_valid*/ 0, 1372 /*bit*/ 0); 1373 ctl_config_write_done(io); 1374 return; 1375 } 1376 1377 beio->io_len = 0; 1378 beio->io_offset = -1; 1379 beio->bio_cmd = BIO_DELETE; 1380 beio->ds_trans_type = DEVSTAT_FREE; 1381 DPRINTF("UNMAP\n"); 1382 be_lun->unmap(be_lun, beio); 1383 } 1384 1385 static void 1386 ctl_be_block_cr_done(struct ctl_be_block_io *beio) 1387 { 1388 union ctl_io *io; 1389 1390 io = beio->io; 1391 ctl_free_beio(beio); 1392 ctl_config_read_done(io); 1393 } 1394 1395 static void 1396 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 1397 union ctl_io *io) 1398 { 1399 struct ctl_be_block_io *beio; 1400 struct ctl_be_block_softc *softc; 1401 1402 DPRINTF("entered\n"); 1403 1404 softc = be_lun->softc; 1405 beio = ctl_alloc_beio(softc); 1406 beio->io = io; 1407 beio->lun = be_lun; 1408 beio->beio_cont = ctl_be_block_cr_done; 1409 PRIV(io)->ptr = (void *)beio; 1410 1411 switch (io->scsiio.cdb[0]) { 1412 case SERVICE_ACTION_IN: /* GET LBA STATUS */ 1413 beio->bio_cmd = -1; 1414 beio->ds_trans_type = DEVSTAT_NO_DATA; 1415 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1416 beio->io_len = 0; 1417 if (be_lun->get_lba_status) 1418 be_lun->get_lba_status(be_lun, beio); 1419 else 1420 ctl_be_block_cr_done(beio); 1421 break; 1422 default: 1423 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1424 break; 1425 } 1426 } 1427 1428 static void 1429 ctl_be_block_cw_done(struct ctl_be_block_io *beio) 1430 { 1431 union ctl_io *io; 1432 1433 io = beio->io; 1434 ctl_free_beio(beio); 1435 ctl_config_write_done(io); 1436 } 1437 1438 static void 1439 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 1440 union ctl_io *io) 1441 { 1442 struct ctl_be_block_io *beio; 1443 struct ctl_be_block_softc *softc; 1444 1445 DPRINTF("entered\n"); 1446 1447 softc = be_lun->softc; 1448 beio = ctl_alloc_beio(softc); 1449 beio->io = io; 1450 beio->lun = be_lun; 1451 beio->beio_cont = ctl_be_block_cw_done; 1452 switch (io->scsiio.tag_type) { 1453 case CTL_TAG_ORDERED: 1454 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1455 break; 1456 case CTL_TAG_HEAD_OF_QUEUE: 1457 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1458 break; 1459 case CTL_TAG_UNTAGGED: 1460 case CTL_TAG_SIMPLE: 1461 case CTL_TAG_ACA: 1462 default: 1463 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1464 break; 1465 } 1466 PRIV(io)->ptr = (void *)beio; 1467 1468 switch (io->scsiio.cdb[0]) { 1469 case SYNCHRONIZE_CACHE: 1470 case SYNCHRONIZE_CACHE_16: 1471 ctl_be_block_cw_dispatch_sync(be_lun, io); 1472 break; 1473 case WRITE_SAME_10: 1474 case WRITE_SAME_16: 1475 ctl_be_block_cw_dispatch_ws(be_lun, io); 1476 break; 1477 case UNMAP: 1478 ctl_be_block_cw_dispatch_unmap(be_lun, io); 1479 break; 1480 default: 1481 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1482 break; 1483 } 1484 } 1485 1486 SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t"); 1487 SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t"); 1488 SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t"); 1489 SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t"); 1490 1491 static void 1492 ctl_be_block_next(struct ctl_be_block_io *beio) 1493 { 1494 struct ctl_be_block_lun *be_lun; 1495 union ctl_io *io; 1496 1497 io = beio->io; 1498 be_lun = beio->lun; 1499 ctl_free_beio(beio); 1500 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1501 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1502 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1503 ctl_data_submit_done(io); 1504 return; 1505 } 1506 1507 io->io_hdr.status &= ~CTL_STATUS_MASK; 1508 io->io_hdr.status |= CTL_STATUS_NONE; 1509 1510 mtx_lock(&be_lun->queue_lock); 1511 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1512 mtx_unlock(&be_lun->queue_lock); 1513 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1514 } 1515 1516 static void 1517 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 1518 union ctl_io *io) 1519 { 1520 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1521 struct ctl_be_block_io *beio; 1522 struct ctl_be_block_softc *softc; 1523 struct ctl_lba_len_flags *lbalen; 1524 struct ctl_ptr_len_flags *bptrlen; 1525 uint64_t len_left, lbas; 1526 int i; 1527 1528 softc = be_lun->softc; 1529 1530 DPRINTF("entered\n"); 1531 1532 lbalen = ARGS(io); 1533 if (lbalen->flags & CTL_LLF_WRITE) { 1534 SDT_PROBE0(cbb, , write, start); 1535 } else { 1536 SDT_PROBE0(cbb, , read, start); 1537 } 1538 1539 beio = ctl_alloc_beio(softc); 1540 beio->io = io; 1541 beio->lun = be_lun; 1542 bptrlen = PRIV(io); 1543 bptrlen->ptr = (void *)beio; 1544 1545 switch (io->scsiio.tag_type) { 1546 case CTL_TAG_ORDERED: 1547 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1548 break; 1549 case CTL_TAG_HEAD_OF_QUEUE: 1550 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1551 break; 1552 case CTL_TAG_UNTAGGED: 1553 case CTL_TAG_SIMPLE: 1554 case CTL_TAG_ACA: 1555 default: 1556 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1557 break; 1558 } 1559 1560 if (lbalen->flags & CTL_LLF_WRITE) { 1561 beio->bio_cmd = BIO_WRITE; 1562 beio->ds_trans_type = DEVSTAT_WRITE; 1563 } else { 1564 beio->bio_cmd = BIO_READ; 1565 beio->ds_trans_type = DEVSTAT_READ; 1566 } 1567 1568 DPRINTF("%s at LBA %jx len %u @%ju\n", 1569 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", 1570 (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len); 1571 if (lbalen->flags & CTL_LLF_COMPARE) { 1572 beio->two_sglists = 1; 1573 lbas = CTLBLK_HALF_IO_SIZE; 1574 } else { 1575 lbas = CTLBLK_MAX_IO_SIZE; 1576 } 1577 lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize); 1578 beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize; 1579 beio->io_len = lbas * cbe_lun->blocksize; 1580 bptrlen->len += lbas; 1581 1582 for (i = 0, len_left = beio->io_len; len_left > 0; i++) { 1583 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)", 1584 i, CTLBLK_MAX_SEGS)); 1585 1586 /* 1587 * Setup the S/G entry for this chunk. 1588 */ 1589 beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left); 1590 beio->sg_segs[i].addr = uma_zalloc(softc->buf_zone, M_WAITOK); 1591 1592 DPRINTF("segment %d addr %p len %zd\n", i, 1593 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1594 1595 /* Set up second segment for compare operation. */ 1596 if (beio->two_sglists) { 1597 beio->sg_segs[i + CTLBLK_HALF_SEGS].len = 1598 beio->sg_segs[i].len; 1599 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = 1600 uma_zalloc(softc->buf_zone, M_WAITOK); 1601 } 1602 1603 beio->num_segs++; 1604 len_left -= beio->sg_segs[i].len; 1605 } 1606 if (bptrlen->len < lbalen->len) 1607 beio->beio_cont = ctl_be_block_next; 1608 io->scsiio.be_move_done = ctl_be_block_move_done; 1609 /* For compare we have separate S/G lists for read and datamove. */ 1610 if (beio->two_sglists) 1611 io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS]; 1612 else 1613 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs; 1614 io->scsiio.kern_data_len = beio->io_len; 1615 io->scsiio.kern_sg_entries = beio->num_segs; 1616 io->io_hdr.flags |= CTL_FLAG_ALLOCATED; 1617 1618 /* 1619 * For the read case, we need to read the data into our buffers and 1620 * then we can send it back to the user. For the write case, we 1621 * need to get the data from the user first. 1622 */ 1623 if (beio->bio_cmd == BIO_READ) { 1624 SDT_PROBE0(cbb, , read, alloc_done); 1625 be_lun->dispatch(be_lun, beio); 1626 } else { 1627 SDT_PROBE0(cbb, , write, alloc_done); 1628 #ifdef CTL_TIME_IO 1629 getbinuptime(&io->io_hdr.dma_start_bt); 1630 #endif 1631 ctl_datamove(io); 1632 } 1633 } 1634 1635 static void 1636 ctl_be_block_worker(void *context, int pending) 1637 { 1638 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context; 1639 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1640 union ctl_io *io; 1641 struct ctl_be_block_io *beio; 1642 1643 DPRINTF("entered\n"); 1644 /* 1645 * Fetch and process I/Os from all queues. If we detect LUN 1646 * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race, 1647 * so make response maximally opaque to not confuse initiator. 1648 */ 1649 for (;;) { 1650 mtx_lock(&be_lun->queue_lock); 1651 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue); 1652 if (io != NULL) { 1653 DPRINTF("datamove queue\n"); 1654 STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr, 1655 ctl_io_hdr, links); 1656 mtx_unlock(&be_lun->queue_lock); 1657 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1658 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1659 ctl_set_busy(&io->scsiio); 1660 ctl_complete_beio(beio); 1661 return; 1662 } 1663 be_lun->dispatch(be_lun, beio); 1664 continue; 1665 } 1666 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue); 1667 if (io != NULL) { 1668 DPRINTF("config write queue\n"); 1669 STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr, 1670 ctl_io_hdr, links); 1671 mtx_unlock(&be_lun->queue_lock); 1672 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1673 ctl_set_busy(&io->scsiio); 1674 ctl_config_write_done(io); 1675 return; 1676 } 1677 ctl_be_block_cw_dispatch(be_lun, io); 1678 continue; 1679 } 1680 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue); 1681 if (io != NULL) { 1682 DPRINTF("config read queue\n"); 1683 STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr, 1684 ctl_io_hdr, links); 1685 mtx_unlock(&be_lun->queue_lock); 1686 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1687 ctl_set_busy(&io->scsiio); 1688 ctl_config_read_done(io); 1689 return; 1690 } 1691 ctl_be_block_cr_dispatch(be_lun, io); 1692 continue; 1693 } 1694 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue); 1695 if (io != NULL) { 1696 DPRINTF("input queue\n"); 1697 STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr, 1698 ctl_io_hdr, links); 1699 mtx_unlock(&be_lun->queue_lock); 1700 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1701 ctl_set_busy(&io->scsiio); 1702 ctl_data_submit_done(io); 1703 return; 1704 } 1705 ctl_be_block_dispatch(be_lun, io); 1706 continue; 1707 } 1708 1709 /* 1710 * If we get here, there is no work left in the queues, so 1711 * just break out and let the task queue go to sleep. 1712 */ 1713 mtx_unlock(&be_lun->queue_lock); 1714 break; 1715 } 1716 } 1717 1718 /* 1719 * Entry point from CTL to the backend for I/O. We queue everything to a 1720 * work thread, so this just puts the I/O on a queue and wakes up the 1721 * thread. 1722 */ 1723 static int 1724 ctl_be_block_submit(union ctl_io *io) 1725 { 1726 struct ctl_be_block_lun *be_lun; 1727 1728 DPRINTF("entered\n"); 1729 1730 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 1731 1732 /* 1733 * Make sure we only get SCSI I/O. 1734 */ 1735 KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type " 1736 "%#x) encountered", io->io_hdr.io_type)); 1737 1738 PRIV(io)->len = 0; 1739 1740 mtx_lock(&be_lun->queue_lock); 1741 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1742 mtx_unlock(&be_lun->queue_lock); 1743 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1744 1745 return (CTL_RETVAL_COMPLETE); 1746 } 1747 1748 static int 1749 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 1750 int flag, struct thread *td) 1751 { 1752 struct ctl_be_block_softc *softc = &backend_block_softc; 1753 int error; 1754 1755 error = 0; 1756 switch (cmd) { 1757 case CTL_LUN_REQ: { 1758 struct ctl_lun_req *lun_req; 1759 1760 lun_req = (struct ctl_lun_req *)addr; 1761 1762 switch (lun_req->reqtype) { 1763 case CTL_LUNREQ_CREATE: 1764 error = ctl_be_block_create(softc, lun_req); 1765 break; 1766 case CTL_LUNREQ_RM: 1767 error = ctl_be_block_rm(softc, lun_req); 1768 break; 1769 case CTL_LUNREQ_MODIFY: 1770 error = ctl_be_block_modify(softc, lun_req); 1771 break; 1772 default: 1773 lun_req->status = CTL_LUN_ERROR; 1774 snprintf(lun_req->error_str, sizeof(lun_req->error_str), 1775 "invalid LUN request type %d", 1776 lun_req->reqtype); 1777 break; 1778 } 1779 break; 1780 } 1781 default: 1782 error = ENOTTY; 1783 break; 1784 } 1785 1786 return (error); 1787 } 1788 1789 static int 1790 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1791 { 1792 struct ctl_be_lun *cbe_lun; 1793 struct ctl_be_block_filedata *file_data; 1794 struct ctl_lun_create_params *params; 1795 const char *value; 1796 struct vattr vattr; 1797 off_t ps, pss, po, pos, us, uss, uo, uos; 1798 int error; 1799 1800 cbe_lun = &be_lun->cbe_lun; 1801 file_data = &be_lun->backend.file; 1802 params = &be_lun->params; 1803 1804 be_lun->dev_type = CTL_BE_BLOCK_FILE; 1805 be_lun->dispatch = ctl_be_block_dispatch_file; 1806 be_lun->lun_flush = ctl_be_block_flush_file; 1807 be_lun->get_lba_status = ctl_be_block_gls_file; 1808 be_lun->getattr = ctl_be_block_getattr_file; 1809 be_lun->unmap = NULL; 1810 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 1811 1812 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 1813 if (error != 0) { 1814 snprintf(req->error_str, sizeof(req->error_str), 1815 "error calling VOP_GETATTR() for file %s", 1816 be_lun->dev_path); 1817 return (error); 1818 } 1819 1820 file_data->cred = crhold(curthread->td_ucred); 1821 if (params->lun_size_bytes != 0) 1822 be_lun->size_bytes = params->lun_size_bytes; 1823 else 1824 be_lun->size_bytes = vattr.va_size; 1825 1826 /* 1827 * For files we can use any logical block size. Prefer 512 bytes 1828 * for compatibility reasons. If file's vattr.va_blocksize 1829 * (preferred I/O block size) is bigger and multiple to chosen 1830 * logical block size -- report it as physical block size. 1831 */ 1832 if (params->blocksize_bytes != 0) 1833 cbe_lun->blocksize = params->blocksize_bytes; 1834 else if (cbe_lun->lun_type == T_CDROM) 1835 cbe_lun->blocksize = 2048; 1836 else 1837 cbe_lun->blocksize = 512; 1838 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 1839 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 1840 0 : (be_lun->size_blocks - 1); 1841 1842 us = ps = vattr.va_blocksize; 1843 uo = po = 0; 1844 1845 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL); 1846 if (value != NULL) 1847 ctl_expand_number(value, &ps); 1848 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL); 1849 if (value != NULL) 1850 ctl_expand_number(value, &po); 1851 pss = ps / cbe_lun->blocksize; 1852 pos = po / cbe_lun->blocksize; 1853 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 1854 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 1855 cbe_lun->pblockexp = fls(pss) - 1; 1856 cbe_lun->pblockoff = (pss - pos) % pss; 1857 } 1858 1859 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL); 1860 if (value != NULL) 1861 ctl_expand_number(value, &us); 1862 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL); 1863 if (value != NULL) 1864 ctl_expand_number(value, &uo); 1865 uss = us / cbe_lun->blocksize; 1866 uos = uo / cbe_lun->blocksize; 1867 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 1868 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 1869 cbe_lun->ublockexp = fls(uss) - 1; 1870 cbe_lun->ublockoff = (uss - uos) % uss; 1871 } 1872 1873 /* 1874 * Sanity check. The media size has to be at least one 1875 * sector long. 1876 */ 1877 if (be_lun->size_bytes < cbe_lun->blocksize) { 1878 error = EINVAL; 1879 snprintf(req->error_str, sizeof(req->error_str), 1880 "file %s size %ju < block size %u", be_lun->dev_path, 1881 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize); 1882 } 1883 1884 cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize; 1885 return (error); 1886 } 1887 1888 static int 1889 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1890 { 1891 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1892 struct ctl_lun_create_params *params; 1893 struct cdevsw *csw; 1894 struct cdev *dev; 1895 const char *value; 1896 int error, atomic, maxio, ref, unmap, tmp; 1897 off_t ps, pss, po, pos, us, uss, uo, uos, otmp; 1898 1899 params = &be_lun->params; 1900 1901 be_lun->dev_type = CTL_BE_BLOCK_DEV; 1902 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1903 if (csw == NULL) 1904 return (ENXIO); 1905 if (strcmp(csw->d_name, "zvol") == 0) { 1906 be_lun->dispatch = ctl_be_block_dispatch_zvol; 1907 be_lun->get_lba_status = ctl_be_block_gls_zvol; 1908 atomic = maxio = CTLBLK_MAX_IO_SIZE; 1909 } else { 1910 be_lun->dispatch = ctl_be_block_dispatch_dev; 1911 be_lun->get_lba_status = NULL; 1912 atomic = 0; 1913 maxio = dev->si_iosize_max; 1914 if (maxio <= 0) 1915 maxio = DFLTPHYS; 1916 if (maxio > CTLBLK_MAX_IO_SIZE) 1917 maxio = CTLBLK_MAX_IO_SIZE; 1918 } 1919 be_lun->lun_flush = ctl_be_block_flush_dev; 1920 be_lun->getattr = ctl_be_block_getattr_dev; 1921 be_lun->unmap = ctl_be_block_unmap_dev; 1922 1923 if (!csw->d_ioctl) { 1924 dev_relthread(dev, ref); 1925 snprintf(req->error_str, sizeof(req->error_str), 1926 "no d_ioctl for device %s!", be_lun->dev_path); 1927 return (ENODEV); 1928 } 1929 1930 error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD, 1931 curthread); 1932 if (error) { 1933 dev_relthread(dev, ref); 1934 snprintf(req->error_str, sizeof(req->error_str), 1935 "error %d returned for DIOCGSECTORSIZE ioctl " 1936 "on %s!", error, be_lun->dev_path); 1937 return (error); 1938 } 1939 1940 /* 1941 * If the user has asked for a blocksize that is greater than the 1942 * backing device's blocksize, we can do it only if the blocksize 1943 * the user is asking for is an even multiple of the underlying 1944 * device's blocksize. 1945 */ 1946 if ((params->blocksize_bytes != 0) && 1947 (params->blocksize_bytes >= tmp)) { 1948 if (params->blocksize_bytes % tmp == 0) { 1949 cbe_lun->blocksize = params->blocksize_bytes; 1950 } else { 1951 dev_relthread(dev, ref); 1952 snprintf(req->error_str, sizeof(req->error_str), 1953 "requested blocksize %u is not an even " 1954 "multiple of backing device blocksize %u", 1955 params->blocksize_bytes, tmp); 1956 return (EINVAL); 1957 } 1958 } else if (params->blocksize_bytes != 0) { 1959 dev_relthread(dev, ref); 1960 snprintf(req->error_str, sizeof(req->error_str), 1961 "requested blocksize %u < backing device " 1962 "blocksize %u", params->blocksize_bytes, tmp); 1963 return (EINVAL); 1964 } else if (cbe_lun->lun_type == T_CDROM) 1965 cbe_lun->blocksize = MAX(tmp, 2048); 1966 else 1967 cbe_lun->blocksize = tmp; 1968 1969 error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD, 1970 curthread); 1971 if (error) { 1972 dev_relthread(dev, ref); 1973 snprintf(req->error_str, sizeof(req->error_str), 1974 "error %d returned for DIOCGMEDIASIZE " 1975 " ioctl on %s!", error, 1976 be_lun->dev_path); 1977 return (error); 1978 } 1979 1980 if (params->lun_size_bytes != 0) { 1981 if (params->lun_size_bytes > otmp) { 1982 dev_relthread(dev, ref); 1983 snprintf(req->error_str, sizeof(req->error_str), 1984 "requested LUN size %ju > backing device " 1985 "size %ju", 1986 (uintmax_t)params->lun_size_bytes, 1987 (uintmax_t)otmp); 1988 return (EINVAL); 1989 } 1990 1991 be_lun->size_bytes = params->lun_size_bytes; 1992 } else 1993 be_lun->size_bytes = otmp; 1994 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 1995 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 1996 0 : (be_lun->size_blocks - 1); 1997 1998 error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD, 1999 curthread); 2000 if (error) 2001 ps = po = 0; 2002 else { 2003 error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po, 2004 FREAD, curthread); 2005 if (error) 2006 po = 0; 2007 } 2008 us = ps; 2009 uo = po; 2010 2011 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL); 2012 if (value != NULL) 2013 ctl_expand_number(value, &ps); 2014 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL); 2015 if (value != NULL) 2016 ctl_expand_number(value, &po); 2017 pss = ps / cbe_lun->blocksize; 2018 pos = po / cbe_lun->blocksize; 2019 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 2020 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 2021 cbe_lun->pblockexp = fls(pss) - 1; 2022 cbe_lun->pblockoff = (pss - pos) % pss; 2023 } 2024 2025 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL); 2026 if (value != NULL) 2027 ctl_expand_number(value, &us); 2028 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL); 2029 if (value != NULL) 2030 ctl_expand_number(value, &uo); 2031 uss = us / cbe_lun->blocksize; 2032 uos = uo / cbe_lun->blocksize; 2033 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 2034 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 2035 cbe_lun->ublockexp = fls(uss) - 1; 2036 cbe_lun->ublockoff = (uss - uos) % uss; 2037 } 2038 2039 cbe_lun->atomicblock = atomic / cbe_lun->blocksize; 2040 cbe_lun->opttxferlen = maxio / cbe_lun->blocksize; 2041 2042 if (be_lun->dispatch == ctl_be_block_dispatch_zvol) { 2043 unmap = 1; 2044 } else { 2045 struct diocgattr_arg arg; 2046 2047 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); 2048 arg.len = sizeof(arg.value.i); 2049 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 2050 curthread); 2051 unmap = (error == 0) ? arg.value.i : 0; 2052 } 2053 value = dnvlist_get_string(cbe_lun->options, "unmap", NULL); 2054 if (value != NULL) 2055 unmap = (strcmp(value, "on") == 0); 2056 if (unmap) 2057 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; 2058 else 2059 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 2060 2061 dev_relthread(dev, ref); 2062 return (0); 2063 } 2064 2065 static int 2066 ctl_be_block_close(struct ctl_be_block_lun *be_lun) 2067 { 2068 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2069 int flags; 2070 2071 if (be_lun->vn) { 2072 flags = FREAD; 2073 if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0) 2074 flags |= FWRITE; 2075 (void)vn_close(be_lun->vn, flags, NOCRED, curthread); 2076 be_lun->vn = NULL; 2077 2078 switch (be_lun->dev_type) { 2079 case CTL_BE_BLOCK_DEV: 2080 break; 2081 case CTL_BE_BLOCK_FILE: 2082 if (be_lun->backend.file.cred != NULL) { 2083 crfree(be_lun->backend.file.cred); 2084 be_lun->backend.file.cred = NULL; 2085 } 2086 break; 2087 case CTL_BE_BLOCK_NONE: 2088 break; 2089 default: 2090 panic("Unexpected backend type %d", be_lun->dev_type); 2091 break; 2092 } 2093 be_lun->dev_type = CTL_BE_BLOCK_NONE; 2094 } 2095 return (0); 2096 } 2097 2098 static int 2099 ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 2100 { 2101 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2102 struct nameidata nd; 2103 const char *value; 2104 int error, flags; 2105 2106 error = 0; 2107 if (rootvnode == NULL) { 2108 snprintf(req->error_str, sizeof(req->error_str), 2109 "Root filesystem is not mounted"); 2110 return (1); 2111 } 2112 pwd_ensure_dirs(); 2113 2114 value = dnvlist_get_string(cbe_lun->options, "file", NULL); 2115 if (value == NULL) { 2116 snprintf(req->error_str, sizeof(req->error_str), 2117 "no file argument specified"); 2118 return (1); 2119 } 2120 free(be_lun->dev_path, M_CTLBLK); 2121 be_lun->dev_path = strdup(value, M_CTLBLK); 2122 2123 flags = FREAD; 2124 value = dnvlist_get_string(cbe_lun->options, "readonly", NULL); 2125 if (value != NULL) { 2126 if (strcmp(value, "on") != 0) 2127 flags |= FWRITE; 2128 } else if (cbe_lun->lun_type == T_DIRECT) 2129 flags |= FWRITE; 2130 2131 again: 2132 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread); 2133 error = vn_open(&nd, &flags, 0, NULL); 2134 if ((error == EROFS || error == EACCES) && (flags & FWRITE)) { 2135 flags &= ~FWRITE; 2136 goto again; 2137 } 2138 if (error) { 2139 /* 2140 * This is the only reasonable guess we can make as far as 2141 * path if the user doesn't give us a fully qualified path. 2142 * If they want to specify a file, they need to specify the 2143 * full path. 2144 */ 2145 if (be_lun->dev_path[0] != '/') { 2146 char *dev_name; 2147 2148 asprintf(&dev_name, M_CTLBLK, "/dev/%s", 2149 be_lun->dev_path); 2150 free(be_lun->dev_path, M_CTLBLK); 2151 be_lun->dev_path = dev_name; 2152 goto again; 2153 } 2154 snprintf(req->error_str, sizeof(req->error_str), 2155 "error opening %s: %d", be_lun->dev_path, error); 2156 return (error); 2157 } 2158 if (flags & FWRITE) 2159 cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY; 2160 else 2161 cbe_lun->flags |= CTL_LUN_FLAG_READONLY; 2162 2163 NDFREE(&nd, NDF_ONLY_PNBUF); 2164 be_lun->vn = nd.ni_vp; 2165 2166 /* We only support disks and files. */ 2167 if (vn_isdisk(be_lun->vn, &error)) { 2168 error = ctl_be_block_open_dev(be_lun, req); 2169 } else if (be_lun->vn->v_type == VREG) { 2170 error = ctl_be_block_open_file(be_lun, req); 2171 } else { 2172 error = EINVAL; 2173 snprintf(req->error_str, sizeof(req->error_str), 2174 "%s is not a disk or plain file", be_lun->dev_path); 2175 } 2176 VOP_UNLOCK(be_lun->vn); 2177 2178 if (error != 0) 2179 ctl_be_block_close(be_lun); 2180 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2181 if (be_lun->dispatch != ctl_be_block_dispatch_dev) 2182 cbe_lun->serseq = CTL_LUN_SERSEQ_READ; 2183 value = dnvlist_get_string(cbe_lun->options, "serseq", NULL); 2184 if (value != NULL && strcmp(value, "on") == 0) 2185 cbe_lun->serseq = CTL_LUN_SERSEQ_ON; 2186 else if (value != NULL && strcmp(value, "read") == 0) 2187 cbe_lun->serseq = CTL_LUN_SERSEQ_READ; 2188 else if (value != NULL && strcmp(value, "off") == 0) 2189 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2190 return (0); 2191 } 2192 2193 static int 2194 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2195 { 2196 struct ctl_be_lun *cbe_lun; 2197 struct ctl_be_block_lun *be_lun; 2198 struct ctl_lun_create_params *params; 2199 char num_thread_str[16]; 2200 char tmpstr[32]; 2201 const char *value; 2202 int retval, num_threads; 2203 int tmp_num_threads; 2204 2205 params = &req->reqdata.create; 2206 retval = 0; 2207 req->status = CTL_LUN_OK; 2208 2209 be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK); 2210 cbe_lun = &be_lun->cbe_lun; 2211 be_lun->params = req->reqdata.create; 2212 be_lun->softc = softc; 2213 STAILQ_INIT(&be_lun->input_queue); 2214 STAILQ_INIT(&be_lun->config_read_queue); 2215 STAILQ_INIT(&be_lun->config_write_queue); 2216 STAILQ_INIT(&be_lun->datamove_queue); 2217 mtx_init(&be_lun->io_lock, "ctlblock io", NULL, MTX_DEF); 2218 mtx_init(&be_lun->queue_lock, "ctlblock queue", NULL, MTX_DEF); 2219 cbe_lun->options = nvlist_clone(req->args_nvl); 2220 2221 if (params->flags & CTL_LUN_FLAG_DEV_TYPE) 2222 cbe_lun->lun_type = params->device_type; 2223 else 2224 cbe_lun->lun_type = T_DIRECT; 2225 be_lun->flags = 0; 2226 cbe_lun->flags = 0; 2227 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL); 2228 if (value != NULL) { 2229 if (strcmp(value, "primary") == 0) 2230 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2231 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2232 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2233 2234 if (cbe_lun->lun_type == T_DIRECT || 2235 cbe_lun->lun_type == T_CDROM) { 2236 be_lun->size_bytes = params->lun_size_bytes; 2237 if (params->blocksize_bytes != 0) 2238 cbe_lun->blocksize = params->blocksize_bytes; 2239 else if (cbe_lun->lun_type == T_CDROM) 2240 cbe_lun->blocksize = 2048; 2241 else 2242 cbe_lun->blocksize = 512; 2243 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2244 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2245 0 : (be_lun->size_blocks - 1); 2246 2247 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2248 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2249 retval = ctl_be_block_open(be_lun, req); 2250 if (retval != 0) { 2251 retval = 0; 2252 req->status = CTL_LUN_WARNING; 2253 } 2254 } 2255 num_threads = cbb_num_threads; 2256 } else { 2257 num_threads = 1; 2258 } 2259 2260 value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL); 2261 if (value != NULL) { 2262 tmp_num_threads = strtol(value, NULL, 0); 2263 2264 /* 2265 * We don't let the user specify less than one 2266 * thread, but hope he's clueful enough not to 2267 * specify 1000 threads. 2268 */ 2269 if (tmp_num_threads < 1) { 2270 snprintf(req->error_str, sizeof(req->error_str), 2271 "invalid number of threads %s", 2272 num_thread_str); 2273 goto bailout_error; 2274 } 2275 num_threads = tmp_num_threads; 2276 } 2277 2278 if (be_lun->vn == NULL) 2279 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2280 /* Tell the user the blocksize we ended up using */ 2281 params->lun_size_bytes = be_lun->size_bytes; 2282 params->blocksize_bytes = cbe_lun->blocksize; 2283 if (params->flags & CTL_LUN_FLAG_ID_REQ) { 2284 cbe_lun->req_lun_id = params->req_lun_id; 2285 cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ; 2286 } else 2287 cbe_lun->req_lun_id = 0; 2288 2289 cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown; 2290 cbe_lun->be = &ctl_be_block_driver; 2291 2292 if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) { 2293 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d", 2294 softc->num_luns); 2295 strncpy((char *)cbe_lun->serial_num, tmpstr, 2296 MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr))); 2297 2298 /* Tell the user what we used for a serial number */ 2299 strncpy((char *)params->serial_num, tmpstr, 2300 MIN(sizeof(params->serial_num), sizeof(tmpstr))); 2301 } else { 2302 strncpy((char *)cbe_lun->serial_num, params->serial_num, 2303 MIN(sizeof(cbe_lun->serial_num), 2304 sizeof(params->serial_num))); 2305 } 2306 if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) { 2307 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns); 2308 strncpy((char *)cbe_lun->device_id, tmpstr, 2309 MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr))); 2310 2311 /* Tell the user what we used for a device ID */ 2312 strncpy((char *)params->device_id, tmpstr, 2313 MIN(sizeof(params->device_id), sizeof(tmpstr))); 2314 } else { 2315 strncpy((char *)cbe_lun->device_id, params->device_id, 2316 MIN(sizeof(cbe_lun->device_id), 2317 sizeof(params->device_id))); 2318 } 2319 2320 TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun); 2321 2322 be_lun->io_taskqueue = taskqueue_create("ctlblocktq", M_WAITOK, 2323 taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue); 2324 2325 if (be_lun->io_taskqueue == NULL) { 2326 snprintf(req->error_str, sizeof(req->error_str), 2327 "unable to create taskqueue"); 2328 goto bailout_error; 2329 } 2330 2331 /* 2332 * Note that we start the same number of threads by default for 2333 * both the file case and the block device case. For the file 2334 * case, we need multiple threads to allow concurrency, because the 2335 * vnode interface is designed to be a blocking interface. For the 2336 * block device case, ZFS zvols at least will block the caller's 2337 * context in many instances, and so we need multiple threads to 2338 * overcome that problem. Other block devices don't need as many 2339 * threads, but they shouldn't cause too many problems. 2340 * 2341 * If the user wants to just have a single thread for a block 2342 * device, he can specify that when the LUN is created, or change 2343 * the tunable/sysctl to alter the default number of threads. 2344 */ 2345 retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue, 2346 /*num threads*/num_threads, 2347 /*priority*/PUSER, 2348 /*proc*/control_softc->ctl_proc, 2349 /*thread name*/"block"); 2350 2351 if (retval != 0) 2352 goto bailout_error; 2353 2354 be_lun->num_threads = num_threads; 2355 2356 retval = ctl_add_lun(&be_lun->cbe_lun); 2357 if (retval != 0) { 2358 snprintf(req->error_str, sizeof(req->error_str), 2359 "ctl_add_lun() returned error %d, see dmesg for " 2360 "details", retval); 2361 retval = 0; 2362 goto bailout_error; 2363 } 2364 2365 be_lun->disk_stats = devstat_new_entry("cbb", cbe_lun->lun_id, 2366 cbe_lun->blocksize, 2367 DEVSTAT_ALL_SUPPORTED, 2368 cbe_lun->lun_type 2369 | DEVSTAT_TYPE_IF_OTHER, 2370 DEVSTAT_PRIORITY_OTHER); 2371 2372 mtx_lock(&softc->lock); 2373 softc->num_luns++; 2374 SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links); 2375 mtx_unlock(&softc->lock); 2376 2377 params->req_lun_id = cbe_lun->lun_id; 2378 2379 return (retval); 2380 2381 bailout_error: 2382 req->status = CTL_LUN_ERROR; 2383 2384 if (be_lun->io_taskqueue != NULL) 2385 taskqueue_free(be_lun->io_taskqueue); 2386 ctl_be_block_close(be_lun); 2387 if (be_lun->dev_path != NULL) 2388 free(be_lun->dev_path, M_CTLBLK); 2389 nvlist_destroy(cbe_lun->options); 2390 mtx_destroy(&be_lun->queue_lock); 2391 mtx_destroy(&be_lun->io_lock); 2392 free(be_lun, M_CTLBLK); 2393 2394 return (retval); 2395 } 2396 2397 static int 2398 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2399 { 2400 struct ctl_lun_rm_params *params; 2401 struct ctl_be_block_lun *be_lun; 2402 struct ctl_be_lun *cbe_lun; 2403 int retval; 2404 2405 params = &req->reqdata.rm; 2406 2407 sx_xlock(&softc->modify_lock); 2408 mtx_lock(&softc->lock); 2409 SLIST_FOREACH(be_lun, &softc->lun_list, links) { 2410 if (be_lun->cbe_lun.lun_id == params->lun_id) { 2411 SLIST_REMOVE(&softc->lun_list, be_lun, 2412 ctl_be_block_lun, links); 2413 softc->num_luns--; 2414 break; 2415 } 2416 } 2417 mtx_unlock(&softc->lock); 2418 sx_xunlock(&softc->modify_lock); 2419 if (be_lun == NULL) { 2420 snprintf(req->error_str, sizeof(req->error_str), 2421 "LUN %u is not managed by the block backend", 2422 params->lun_id); 2423 goto bailout_error; 2424 } 2425 cbe_lun = &be_lun->cbe_lun; 2426 2427 if (be_lun->vn != NULL) { 2428 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2429 ctl_lun_no_media(cbe_lun); 2430 taskqueue_drain_all(be_lun->io_taskqueue); 2431 ctl_be_block_close(be_lun); 2432 } 2433 2434 mtx_lock(&softc->lock); 2435 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2436 mtx_unlock(&softc->lock); 2437 2438 retval = ctl_remove_lun(cbe_lun); 2439 if (retval != 0) { 2440 snprintf(req->error_str, sizeof(req->error_str), 2441 "error %d returned from ctl_remove_lun() for " 2442 "LUN %d", retval, params->lun_id); 2443 mtx_lock(&softc->lock); 2444 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2445 mtx_unlock(&softc->lock); 2446 goto bailout_error; 2447 } 2448 2449 mtx_lock(&softc->lock); 2450 while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2451 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblockrm", 0); 2452 if (retval == EINTR) 2453 break; 2454 } 2455 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2456 if (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) { 2457 mtx_unlock(&softc->lock); 2458 free(be_lun, M_CTLBLK); 2459 } else { 2460 mtx_unlock(&softc->lock); 2461 return (EINTR); 2462 } 2463 2464 req->status = CTL_LUN_OK; 2465 return (0); 2466 2467 bailout_error: 2468 req->status = CTL_LUN_ERROR; 2469 return (0); 2470 } 2471 2472 static int 2473 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2474 { 2475 struct ctl_lun_modify_params *params; 2476 struct ctl_be_block_lun *be_lun; 2477 struct ctl_be_lun *cbe_lun; 2478 const char *value; 2479 uint64_t oldsize; 2480 int error, wasprim; 2481 2482 params = &req->reqdata.modify; 2483 2484 sx_xlock(&softc->modify_lock); 2485 mtx_lock(&softc->lock); 2486 SLIST_FOREACH(be_lun, &softc->lun_list, links) { 2487 if (be_lun->cbe_lun.lun_id == params->lun_id) 2488 break; 2489 } 2490 mtx_unlock(&softc->lock); 2491 if (be_lun == NULL) { 2492 snprintf(req->error_str, sizeof(req->error_str), 2493 "LUN %u is not managed by the block backend", 2494 params->lun_id); 2495 goto bailout_error; 2496 } 2497 cbe_lun = &be_lun->cbe_lun; 2498 2499 if (params->lun_size_bytes != 0) 2500 be_lun->params.lun_size_bytes = params->lun_size_bytes; 2501 2502 if (req->args_nvl != NULL) { 2503 nvlist_destroy(cbe_lun->options); 2504 cbe_lun->options = nvlist_clone(req->args_nvl); 2505 } 2506 2507 wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY); 2508 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL); 2509 if (value != NULL) { 2510 if (strcmp(value, "primary") == 0) 2511 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2512 else 2513 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2514 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2515 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2516 else 2517 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2518 if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) { 2519 if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) 2520 ctl_lun_primary(cbe_lun); 2521 else 2522 ctl_lun_secondary(cbe_lun); 2523 } 2524 2525 oldsize = be_lun->size_blocks; 2526 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2527 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2528 if (be_lun->vn == NULL) 2529 error = ctl_be_block_open(be_lun, req); 2530 else if (vn_isdisk(be_lun->vn, &error)) 2531 error = ctl_be_block_open_dev(be_lun, req); 2532 else if (be_lun->vn->v_type == VREG) { 2533 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 2534 error = ctl_be_block_open_file(be_lun, req); 2535 VOP_UNLOCK(be_lun->vn); 2536 } else 2537 error = EINVAL; 2538 if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) && 2539 be_lun->vn != NULL) { 2540 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; 2541 ctl_lun_has_media(cbe_lun); 2542 } else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 && 2543 be_lun->vn == NULL) { 2544 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2545 ctl_lun_no_media(cbe_lun); 2546 } 2547 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; 2548 } else { 2549 if (be_lun->vn != NULL) { 2550 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2551 ctl_lun_no_media(cbe_lun); 2552 taskqueue_drain_all(be_lun->io_taskqueue); 2553 error = ctl_be_block_close(be_lun); 2554 } else 2555 error = 0; 2556 } 2557 if (be_lun->size_blocks != oldsize) 2558 ctl_lun_capacity_changed(cbe_lun); 2559 2560 /* Tell the user the exact size we ended up using */ 2561 params->lun_size_bytes = be_lun->size_bytes; 2562 2563 sx_xunlock(&softc->modify_lock); 2564 req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK; 2565 return (0); 2566 2567 bailout_error: 2568 sx_xunlock(&softc->modify_lock); 2569 req->status = CTL_LUN_ERROR; 2570 return (0); 2571 } 2572 2573 static void 2574 ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun) 2575 { 2576 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)cbe_lun; 2577 struct ctl_be_block_softc *softc = be_lun->softc; 2578 2579 taskqueue_drain_all(be_lun->io_taskqueue); 2580 taskqueue_free(be_lun->io_taskqueue); 2581 if (be_lun->disk_stats != NULL) 2582 devstat_remove_entry(be_lun->disk_stats); 2583 nvlist_destroy(be_lun->cbe_lun.options); 2584 free(be_lun->dev_path, M_CTLBLK); 2585 mtx_destroy(&be_lun->queue_lock); 2586 mtx_destroy(&be_lun->io_lock); 2587 2588 mtx_lock(&softc->lock); 2589 be_lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED; 2590 if (be_lun->flags & CTL_BE_BLOCK_LUN_WAITING) 2591 wakeup(be_lun); 2592 else 2593 free(be_lun, M_CTLBLK); 2594 mtx_unlock(&softc->lock); 2595 } 2596 2597 static int 2598 ctl_be_block_config_write(union ctl_io *io) 2599 { 2600 struct ctl_be_block_lun *be_lun; 2601 struct ctl_be_lun *cbe_lun; 2602 int retval; 2603 2604 DPRINTF("entered\n"); 2605 2606 cbe_lun = CTL_BACKEND_LUN(io); 2607 be_lun = (struct ctl_be_block_lun *)cbe_lun; 2608 2609 retval = 0; 2610 switch (io->scsiio.cdb[0]) { 2611 case SYNCHRONIZE_CACHE: 2612 case SYNCHRONIZE_CACHE_16: 2613 case WRITE_SAME_10: 2614 case WRITE_SAME_16: 2615 case UNMAP: 2616 /* 2617 * The upper level CTL code will filter out any CDBs with 2618 * the immediate bit set and return the proper error. 2619 * 2620 * We don't really need to worry about what LBA range the 2621 * user asked to be synced out. When they issue a sync 2622 * cache command, we'll sync out the whole thing. 2623 */ 2624 mtx_lock(&be_lun->queue_lock); 2625 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, 2626 links); 2627 mtx_unlock(&be_lun->queue_lock); 2628 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 2629 break; 2630 case START_STOP_UNIT: { 2631 struct scsi_start_stop_unit *cdb; 2632 struct ctl_lun_req req; 2633 2634 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb; 2635 if ((cdb->how & SSS_PC_MASK) != 0) { 2636 ctl_set_success(&io->scsiio); 2637 ctl_config_write_done(io); 2638 break; 2639 } 2640 if (cdb->how & SSS_START) { 2641 if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) { 2642 retval = ctl_be_block_open(be_lun, &req); 2643 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; 2644 if (retval == 0) { 2645 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; 2646 ctl_lun_has_media(cbe_lun); 2647 } else { 2648 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2649 ctl_lun_no_media(cbe_lun); 2650 } 2651 } 2652 ctl_start_lun(cbe_lun); 2653 } else { 2654 ctl_stop_lun(cbe_lun); 2655 if (cdb->how & SSS_LOEJ) { 2656 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2657 cbe_lun->flags |= CTL_LUN_FLAG_EJECTED; 2658 ctl_lun_ejected(cbe_lun); 2659 if (be_lun->vn != NULL) 2660 ctl_be_block_close(be_lun); 2661 } 2662 } 2663 2664 ctl_set_success(&io->scsiio); 2665 ctl_config_write_done(io); 2666 break; 2667 } 2668 case PREVENT_ALLOW: 2669 ctl_set_success(&io->scsiio); 2670 ctl_config_write_done(io); 2671 break; 2672 default: 2673 ctl_set_invalid_opcode(&io->scsiio); 2674 ctl_config_write_done(io); 2675 retval = CTL_RETVAL_COMPLETE; 2676 break; 2677 } 2678 2679 return (retval); 2680 } 2681 2682 static int 2683 ctl_be_block_config_read(union ctl_io *io) 2684 { 2685 struct ctl_be_block_lun *be_lun; 2686 int retval = 0; 2687 2688 DPRINTF("entered\n"); 2689 2690 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 2691 2692 switch (io->scsiio.cdb[0]) { 2693 case SERVICE_ACTION_IN: 2694 if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) { 2695 mtx_lock(&be_lun->queue_lock); 2696 STAILQ_INSERT_TAIL(&be_lun->config_read_queue, 2697 &io->io_hdr, links); 2698 mtx_unlock(&be_lun->queue_lock); 2699 taskqueue_enqueue(be_lun->io_taskqueue, 2700 &be_lun->io_task); 2701 retval = CTL_RETVAL_QUEUED; 2702 break; 2703 } 2704 ctl_set_invalid_field(&io->scsiio, 2705 /*sks_valid*/ 1, 2706 /*command*/ 1, 2707 /*field*/ 1, 2708 /*bit_valid*/ 1, 2709 /*bit*/ 4); 2710 ctl_config_read_done(io); 2711 retval = CTL_RETVAL_COMPLETE; 2712 break; 2713 default: 2714 ctl_set_invalid_opcode(&io->scsiio); 2715 ctl_config_read_done(io); 2716 retval = CTL_RETVAL_COMPLETE; 2717 break; 2718 } 2719 2720 return (retval); 2721 } 2722 2723 static int 2724 ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb) 2725 { 2726 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun; 2727 int retval; 2728 2729 retval = sbuf_printf(sb, "\t<num_threads>"); 2730 if (retval != 0) 2731 goto bailout; 2732 retval = sbuf_printf(sb, "%d", lun->num_threads); 2733 if (retval != 0) 2734 goto bailout; 2735 retval = sbuf_printf(sb, "</num_threads>\n"); 2736 2737 bailout: 2738 return (retval); 2739 } 2740 2741 static uint64_t 2742 ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname) 2743 { 2744 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun; 2745 2746 if (lun->getattr == NULL) 2747 return (UINT64_MAX); 2748 return (lun->getattr(lun, attrname)); 2749 } 2750 2751 static int 2752 ctl_be_block_init(void) 2753 { 2754 struct ctl_be_block_softc *softc = &backend_block_softc; 2755 2756 sx_init(&softc->modify_lock, "ctlblock modify"); 2757 mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF); 2758 softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io), 2759 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2760 softc->buf_zone = uma_zcreate("ctlblock", CTLBLK_MAX_SEG, 2761 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 2762 SLIST_INIT(&softc->lun_list); 2763 return (0); 2764 } 2765 2766 2767 static int 2768 ctl_be_block_shutdown(void) 2769 { 2770 struct ctl_be_block_softc *softc = &backend_block_softc; 2771 struct ctl_be_block_lun *lun; 2772 2773 mtx_lock(&softc->lock); 2774 while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) { 2775 SLIST_REMOVE_HEAD(&softc->lun_list, links); 2776 softc->num_luns--; 2777 /* 2778 * Drop our lock here. Since ctl_remove_lun() can call 2779 * back into us, this could potentially lead to a recursive 2780 * lock of the same mutex, which would cause a hang. 2781 */ 2782 mtx_unlock(&softc->lock); 2783 ctl_remove_lun(&lun->cbe_lun); 2784 mtx_lock(&softc->lock); 2785 } 2786 mtx_unlock(&softc->lock); 2787 uma_zdestroy(softc->buf_zone); 2788 uma_zdestroy(softc->beio_zone); 2789 mtx_destroy(&softc->lock); 2790 sx_destroy(&softc->modify_lock); 2791 return (0); 2792 } 2793