1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Description: uring_cmd based ublk 4 */ 5 6 #include <linux/fs.h> 7 #include "kublk.h" 8 9 #define MAX_NR_TGT_ARG 64 10 11 unsigned int ublk_dbg_mask = UBLK_LOG; 12 static const struct ublk_tgt_ops *tgt_ops_list[] = { 13 &null_tgt_ops, 14 &loop_tgt_ops, 15 &stripe_tgt_ops, 16 &fault_inject_tgt_ops, 17 }; 18 19 static const struct ublk_tgt_ops *ublk_find_tgt(const char *name) 20 { 21 int i; 22 23 if (name == NULL) 24 return NULL; 25 26 for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) 27 if (strcmp(tgt_ops_list[i]->name, name) == 0) 28 return tgt_ops_list[i]; 29 return NULL; 30 } 31 32 static inline int ublk_setup_ring(struct io_uring *r, int depth, 33 int cq_depth, unsigned flags) 34 { 35 struct io_uring_params p; 36 37 memset(&p, 0, sizeof(p)); 38 p.flags = flags | IORING_SETUP_CQSIZE; 39 p.cq_entries = cq_depth; 40 41 return io_uring_queue_init_params(depth, r, &p); 42 } 43 44 static void ublk_ctrl_init_cmd(struct ublk_dev *dev, 45 struct io_uring_sqe *sqe, 46 struct ublk_ctrl_cmd_data *data) 47 { 48 struct ublksrv_ctrl_dev_info *info = &dev->dev_info; 49 struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe); 50 51 sqe->fd = dev->ctrl_fd; 52 sqe->opcode = IORING_OP_URING_CMD; 53 sqe->ioprio = 0; 54 55 if (data->flags & CTRL_CMD_HAS_BUF) { 56 cmd->addr = data->addr; 57 cmd->len = data->len; 58 } 59 60 if (data->flags & CTRL_CMD_HAS_DATA) 61 cmd->data[0] = data->data[0]; 62 63 cmd->dev_id = info->dev_id; 64 cmd->queue_id = -1; 65 66 ublk_set_sqe_cmd_op(sqe, data->cmd_op); 67 68 io_uring_sqe_set_data(sqe, cmd); 69 } 70 71 static int __ublk_ctrl_cmd(struct ublk_dev *dev, 72 struct ublk_ctrl_cmd_data *data) 73 { 74 struct io_uring_sqe *sqe; 75 struct io_uring_cqe *cqe; 76 int ret = -EINVAL; 77 78 sqe = io_uring_get_sqe(&dev->ring); 79 if (!sqe) { 80 ublk_err("%s: can't get sqe ret %d\n", __func__, ret); 81 return ret; 82 } 83 84 ublk_ctrl_init_cmd(dev, sqe, data); 85 86 ret = io_uring_submit(&dev->ring); 87 if (ret < 0) { 88 ublk_err("uring submit ret %d\n", ret); 89 return ret; 90 } 91 92 ret = io_uring_wait_cqe(&dev->ring, &cqe); 93 if (ret < 0) { 94 ublk_err("wait cqe: %s\n", strerror(-ret)); 95 return ret; 96 } 97 io_uring_cqe_seen(&dev->ring, cqe); 98 99 return cqe->res; 100 } 101 102 static int ublk_ctrl_stop_dev(struct ublk_dev *dev) 103 { 104 struct ublk_ctrl_cmd_data data = { 105 .cmd_op = UBLK_U_CMD_STOP_DEV, 106 }; 107 108 return __ublk_ctrl_cmd(dev, &data); 109 } 110 111 static int ublk_ctrl_try_stop_dev(struct ublk_dev *dev) 112 { 113 struct ublk_ctrl_cmd_data data = { 114 .cmd_op = UBLK_U_CMD_TRY_STOP_DEV, 115 }; 116 117 return __ublk_ctrl_cmd(dev, &data); 118 } 119 120 static int ublk_ctrl_start_dev(struct ublk_dev *dev, 121 int daemon_pid) 122 { 123 struct ublk_ctrl_cmd_data data = { 124 .cmd_op = UBLK_U_CMD_START_DEV, 125 .flags = CTRL_CMD_HAS_DATA, 126 }; 127 128 dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; 129 130 return __ublk_ctrl_cmd(dev, &data); 131 } 132 133 static int ublk_ctrl_start_user_recovery(struct ublk_dev *dev) 134 { 135 struct ublk_ctrl_cmd_data data = { 136 .cmd_op = UBLK_U_CMD_START_USER_RECOVERY, 137 }; 138 139 return __ublk_ctrl_cmd(dev, &data); 140 } 141 142 static int ublk_ctrl_end_user_recovery(struct ublk_dev *dev, int daemon_pid) 143 { 144 struct ublk_ctrl_cmd_data data = { 145 .cmd_op = UBLK_U_CMD_END_USER_RECOVERY, 146 .flags = CTRL_CMD_HAS_DATA, 147 }; 148 149 dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; 150 151 return __ublk_ctrl_cmd(dev, &data); 152 } 153 154 static int ublk_ctrl_add_dev(struct ublk_dev *dev) 155 { 156 struct ublk_ctrl_cmd_data data = { 157 .cmd_op = UBLK_U_CMD_ADD_DEV, 158 .flags = CTRL_CMD_HAS_BUF, 159 .addr = (__u64) (uintptr_t) &dev->dev_info, 160 .len = sizeof(struct ublksrv_ctrl_dev_info), 161 }; 162 163 return __ublk_ctrl_cmd(dev, &data); 164 } 165 166 static int ublk_ctrl_del_dev(struct ublk_dev *dev) 167 { 168 struct ublk_ctrl_cmd_data data = { 169 .cmd_op = UBLK_U_CMD_DEL_DEV, 170 .flags = 0, 171 }; 172 173 return __ublk_ctrl_cmd(dev, &data); 174 } 175 176 static int ublk_ctrl_get_info(struct ublk_dev *dev) 177 { 178 struct ublk_ctrl_cmd_data data = { 179 .cmd_op = UBLK_U_CMD_GET_DEV_INFO, 180 .flags = CTRL_CMD_HAS_BUF, 181 .addr = (__u64) (uintptr_t) &dev->dev_info, 182 .len = sizeof(struct ublksrv_ctrl_dev_info), 183 }; 184 185 return __ublk_ctrl_cmd(dev, &data); 186 } 187 188 static int ublk_ctrl_set_params(struct ublk_dev *dev, 189 struct ublk_params *params) 190 { 191 struct ublk_ctrl_cmd_data data = { 192 .cmd_op = UBLK_U_CMD_SET_PARAMS, 193 .flags = CTRL_CMD_HAS_BUF, 194 .addr = (__u64) (uintptr_t) params, 195 .len = sizeof(*params), 196 }; 197 params->len = sizeof(*params); 198 return __ublk_ctrl_cmd(dev, &data); 199 } 200 201 static int ublk_ctrl_get_params(struct ublk_dev *dev, 202 struct ublk_params *params) 203 { 204 struct ublk_ctrl_cmd_data data = { 205 .cmd_op = UBLK_U_CMD_GET_PARAMS, 206 .flags = CTRL_CMD_HAS_BUF, 207 .addr = (__u64)params, 208 .len = sizeof(*params), 209 }; 210 211 params->len = sizeof(*params); 212 213 return __ublk_ctrl_cmd(dev, &data); 214 } 215 216 static int ublk_ctrl_get_features(struct ublk_dev *dev, 217 __u64 *features) 218 { 219 struct ublk_ctrl_cmd_data data = { 220 .cmd_op = UBLK_U_CMD_GET_FEATURES, 221 .flags = CTRL_CMD_HAS_BUF, 222 .addr = (__u64) (uintptr_t) features, 223 .len = sizeof(*features), 224 }; 225 226 return __ublk_ctrl_cmd(dev, &data); 227 } 228 229 static int ublk_ctrl_update_size(struct ublk_dev *dev, 230 __u64 nr_sects) 231 { 232 struct ublk_ctrl_cmd_data data = { 233 .cmd_op = UBLK_U_CMD_UPDATE_SIZE, 234 .flags = CTRL_CMD_HAS_DATA, 235 }; 236 237 data.data[0] = nr_sects; 238 return __ublk_ctrl_cmd(dev, &data); 239 } 240 241 static int ublk_ctrl_quiesce_dev(struct ublk_dev *dev, 242 unsigned int timeout_ms) 243 { 244 struct ublk_ctrl_cmd_data data = { 245 .cmd_op = UBLK_U_CMD_QUIESCE_DEV, 246 .flags = CTRL_CMD_HAS_DATA, 247 }; 248 249 data.data[0] = timeout_ms; 250 return __ublk_ctrl_cmd(dev, &data); 251 } 252 253 static const char *ublk_dev_state_desc(struct ublk_dev *dev) 254 { 255 switch (dev->dev_info.state) { 256 case UBLK_S_DEV_DEAD: 257 return "DEAD"; 258 case UBLK_S_DEV_LIVE: 259 return "LIVE"; 260 case UBLK_S_DEV_QUIESCED: 261 return "QUIESCED"; 262 default: 263 return "UNKNOWN"; 264 }; 265 } 266 267 static void ublk_print_cpu_set(const cpu_set_t *set, char *buf, unsigned len) 268 { 269 unsigned done = 0; 270 int i; 271 272 for (i = 0; i < CPU_SETSIZE; i++) { 273 if (CPU_ISSET(i, set)) 274 done += snprintf(&buf[done], len - done, "%d ", i); 275 } 276 } 277 278 static void ublk_adjust_affinity(cpu_set_t *set) 279 { 280 int j, updated = 0; 281 282 /* 283 * Just keep the 1st CPU now. 284 * 285 * In future, auto affinity selection can be tried. 286 */ 287 for (j = 0; j < CPU_SETSIZE; j++) { 288 if (CPU_ISSET(j, set)) { 289 if (!updated) { 290 updated = 1; 291 continue; 292 } 293 CPU_CLR(j, set); 294 } 295 } 296 } 297 298 /* Caller must free the allocated buffer */ 299 static int ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf) 300 { 301 struct ublk_ctrl_cmd_data data = { 302 .cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY, 303 .flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF, 304 }; 305 cpu_set_t *buf; 306 int i, ret; 307 308 buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues); 309 if (!buf) 310 return -ENOMEM; 311 312 for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) { 313 data.data[0] = i; 314 data.len = sizeof(cpu_set_t); 315 data.addr = (__u64)&buf[i]; 316 317 ret = __ublk_ctrl_cmd(ctrl_dev, &data); 318 if (ret < 0) { 319 free(buf); 320 return ret; 321 } 322 ublk_adjust_affinity(&buf[i]); 323 } 324 325 *ptr_buf = buf; 326 return 0; 327 } 328 329 static void ublk_ctrl_dump(struct ublk_dev *dev) 330 { 331 struct ublksrv_ctrl_dev_info *info = &dev->dev_info; 332 struct ublk_params p; 333 cpu_set_t *affinity; 334 int ret; 335 336 ret = ublk_ctrl_get_params(dev, &p); 337 if (ret < 0) { 338 ublk_err("failed to get params %d %s\n", ret, strerror(-ret)); 339 return; 340 } 341 342 ret = ublk_ctrl_get_affinity(dev, &affinity); 343 if (ret < 0) { 344 ublk_err("failed to get affinity %m\n"); 345 return; 346 } 347 348 ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n", 349 info->dev_id, info->nr_hw_queues, info->queue_depth, 350 1 << p.basic.logical_bs_shift, p.basic.dev_sectors); 351 ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n", 352 info->max_io_buf_bytes, info->ublksrv_pid, info->flags, 353 ublk_dev_state_desc(dev)); 354 355 if (affinity) { 356 char buf[512]; 357 int i; 358 359 for (i = 0; i < info->nr_hw_queues; i++) { 360 ublk_print_cpu_set(&affinity[i], buf, sizeof(buf)); 361 printf("\tqueue %u: affinity(%s)\n", 362 i, buf); 363 } 364 free(affinity); 365 } 366 367 fflush(stdout); 368 } 369 370 static void ublk_ctrl_deinit(struct ublk_dev *dev) 371 { 372 close(dev->ctrl_fd); 373 free(dev); 374 } 375 376 static struct ublk_dev *ublk_ctrl_init(void) 377 { 378 struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev)); 379 struct ublksrv_ctrl_dev_info *info = &dev->dev_info; 380 int ret; 381 382 dev->ctrl_fd = open(CTRL_DEV, O_RDWR); 383 if (dev->ctrl_fd < 0) { 384 free(dev); 385 return NULL; 386 } 387 388 info->max_io_buf_bytes = UBLK_IO_MAX_BYTES; 389 390 ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH, 391 UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128); 392 if (ret < 0) { 393 ublk_err("queue_init: %s\n", strerror(-ret)); 394 free(dev); 395 return NULL; 396 } 397 dev->nr_fds = 1; 398 399 return dev; 400 } 401 402 static int __ublk_queue_cmd_buf_sz(unsigned depth) 403 { 404 int size = depth * sizeof(struct ublksrv_io_desc); 405 unsigned int page_sz = getpagesize(); 406 407 return round_up(size, page_sz); 408 } 409 410 static int ublk_queue_max_cmd_buf_sz(void) 411 { 412 return __ublk_queue_cmd_buf_sz(UBLK_MAX_QUEUE_DEPTH); 413 } 414 415 static int ublk_queue_cmd_buf_sz(struct ublk_queue *q) 416 { 417 return __ublk_queue_cmd_buf_sz(q->q_depth); 418 } 419 420 static void ublk_queue_deinit(struct ublk_queue *q) 421 { 422 int i; 423 int nr_ios = q->q_depth; 424 425 if (q->io_cmd_buf) 426 munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q)); 427 428 for (i = 0; i < nr_ios; i++) { 429 free(q->ios[i].buf_addr); 430 free(q->ios[i].integrity_buf); 431 } 432 } 433 434 static void ublk_thread_deinit(struct ublk_thread *t) 435 { 436 io_uring_unregister_buffers(&t->ring); 437 438 ublk_batch_free_buf(t); 439 440 io_uring_unregister_ring_fd(&t->ring); 441 442 if (t->ring.ring_fd > 0) { 443 io_uring_unregister_files(&t->ring); 444 close(t->ring.ring_fd); 445 t->ring.ring_fd = -1; 446 } 447 } 448 449 static int ublk_queue_init(struct ublk_queue *q, unsigned long long extra_flags, 450 __u8 metadata_size) 451 { 452 struct ublk_dev *dev = q->dev; 453 int depth = dev->dev_info.queue_depth; 454 int i; 455 int cmd_buf_size, io_buf_size, integrity_size; 456 unsigned long off; 457 458 pthread_spin_init(&q->lock, PTHREAD_PROCESS_PRIVATE); 459 q->tgt_ops = dev->tgt.ops; 460 q->flags = 0; 461 q->q_depth = depth; 462 q->flags = dev->dev_info.flags; 463 q->flags |= extra_flags; 464 q->metadata_size = metadata_size; 465 466 /* Cache fd in queue for fast path access */ 467 q->ublk_fd = dev->fds[0]; 468 469 cmd_buf_size = ublk_queue_cmd_buf_sz(q); 470 off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); 471 q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ, 472 MAP_SHARED | MAP_POPULATE, dev->fds[0], off); 473 if (q->io_cmd_buf == MAP_FAILED) { 474 ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n", 475 q->dev->dev_info.dev_id, q->q_id); 476 goto fail; 477 } 478 479 io_buf_size = dev->dev_info.max_io_buf_bytes; 480 integrity_size = ublk_integrity_len(q, io_buf_size); 481 for (i = 0; i < q->q_depth; i++) { 482 q->ios[i].buf_addr = NULL; 483 q->ios[i].flags = UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_FREE; 484 q->ios[i].tag = i; 485 486 if (integrity_size) { 487 q->ios[i].integrity_buf = malloc(integrity_size); 488 if (!q->ios[i].integrity_buf) { 489 ublk_err("ublk dev %d queue %d io %d malloc(%d) failed: %m\n", 490 dev->dev_info.dev_id, q->q_id, i, 491 integrity_size); 492 goto fail; 493 } 494 } 495 496 497 if (ublk_queue_no_buf(q)) 498 continue; 499 500 if (posix_memalign((void **)&q->ios[i].buf_addr, 501 getpagesize(), io_buf_size)) { 502 ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n", 503 dev->dev_info.dev_id, q->q_id, i); 504 goto fail; 505 } 506 } 507 508 return 0; 509 fail: 510 ublk_queue_deinit(q); 511 ublk_err("ublk dev %d queue %d failed\n", 512 dev->dev_info.dev_id, q->q_id); 513 return -ENOMEM; 514 } 515 516 static int ublk_thread_init(struct ublk_thread *t, unsigned long long extra_flags) 517 { 518 struct ublk_dev *dev = t->dev; 519 unsigned long long flags = dev->dev_info.flags | extra_flags; 520 int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth; 521 int ret; 522 523 /* FETCH_IO_CMDS is multishot, so increase cq depth for BATCH_IO */ 524 if (ublk_dev_batch_io(dev)) 525 cq_depth += dev->dev_info.queue_depth * 2; 526 527 ret = ublk_setup_ring(&t->ring, ring_depth, cq_depth, 528 IORING_SETUP_COOP_TASKRUN | 529 IORING_SETUP_SINGLE_ISSUER | 530 IORING_SETUP_DEFER_TASKRUN); 531 if (ret < 0) { 532 ublk_err("ublk dev %d thread %d setup io_uring failed %d\n", 533 dev->dev_info.dev_id, t->idx, ret); 534 goto fail; 535 } 536 537 if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) { 538 unsigned nr_ios = dev->dev_info.queue_depth * dev->dev_info.nr_hw_queues; 539 unsigned max_nr_ios_per_thread = nr_ios / dev->nthreads; 540 max_nr_ios_per_thread += !!(nr_ios % dev->nthreads); 541 542 t->nr_bufs = max_nr_ios_per_thread; 543 } else { 544 t->nr_bufs = 0; 545 } 546 547 if (ublk_dev_batch_io(dev)) 548 ublk_batch_prepare(t); 549 550 if (t->nr_bufs) { 551 ret = io_uring_register_buffers_sparse(&t->ring, t->nr_bufs); 552 if (ret) { 553 ublk_err("ublk dev %d thread %d register spare buffers failed %d\n", 554 dev->dev_info.dev_id, t->idx, ret); 555 goto fail; 556 } 557 } 558 559 if (ublk_dev_batch_io(dev)) { 560 ret = ublk_batch_alloc_buf(t); 561 if (ret) { 562 ublk_err("ublk dev %d thread %d alloc batch buf failed %d\n", 563 dev->dev_info.dev_id, t->idx, ret); 564 goto fail; 565 } 566 } 567 568 io_uring_register_ring_fd(&t->ring); 569 570 if (flags & UBLKS_Q_NO_UBLK_FIXED_FD) { 571 /* Register only backing files starting from index 1, exclude ublk control device */ 572 if (dev->nr_fds > 1) { 573 ret = io_uring_register_files(&t->ring, &dev->fds[1], dev->nr_fds - 1); 574 } else { 575 /* No backing files to register, skip file registration */ 576 ret = 0; 577 } 578 } else { 579 ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds); 580 } 581 if (ret) { 582 ublk_err("ublk dev %d thread %d register files failed %d\n", 583 t->dev->dev_info.dev_id, t->idx, ret); 584 goto fail; 585 } 586 587 return 0; 588 fail: 589 ublk_thread_deinit(t); 590 ublk_err("ublk dev %d thread %d init failed\n", 591 dev->dev_info.dev_id, t->idx); 592 return -ENOMEM; 593 } 594 595 #define WAIT_USEC 100000 596 #define MAX_WAIT_USEC (3 * 1000000) 597 static int ublk_dev_prep(const struct dev_ctx *ctx, struct ublk_dev *dev) 598 { 599 int dev_id = dev->dev_info.dev_id; 600 unsigned int wait_usec = 0; 601 int ret = 0, fd = -1; 602 char buf[64]; 603 604 snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id); 605 606 while (wait_usec < MAX_WAIT_USEC) { 607 fd = open(buf, O_RDWR); 608 if (fd >= 0) 609 break; 610 usleep(WAIT_USEC); 611 wait_usec += WAIT_USEC; 612 } 613 if (fd < 0) { 614 ublk_err("can't open %s %s\n", buf, strerror(errno)); 615 return -1; 616 } 617 618 dev->fds[0] = fd; 619 if (dev->tgt.ops->init_tgt) 620 ret = dev->tgt.ops->init_tgt(ctx, dev); 621 if (ret) 622 close(dev->fds[0]); 623 return ret; 624 } 625 626 static void ublk_dev_unprep(struct ublk_dev *dev) 627 { 628 if (dev->tgt.ops->deinit_tgt) 629 dev->tgt.ops->deinit_tgt(dev); 630 close(dev->fds[0]); 631 } 632 633 static void ublk_set_auto_buf_reg(const struct ublk_thread *t, 634 const struct ublk_queue *q, 635 struct io_uring_sqe *sqe, 636 unsigned short tag) 637 { 638 struct ublk_auto_buf_reg buf = {}; 639 640 if (q->tgt_ops->buf_index) 641 buf.index = q->tgt_ops->buf_index(t, q, tag); 642 else 643 buf.index = ublk_io_buf_idx(t, q, tag); 644 645 if (ublk_queue_auto_zc_fallback(q)) 646 buf.flags = UBLK_AUTO_BUF_REG_FALLBACK; 647 648 sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf); 649 } 650 651 /* Copy in pieces to test the buffer offset logic */ 652 #define UBLK_USER_COPY_LEN 2048 653 654 static void ublk_user_copy(const struct ublk_io *io, __u8 match_ublk_op) 655 { 656 const struct ublk_queue *q = ublk_io_to_queue(io); 657 const struct ublksrv_io_desc *iod = ublk_get_iod(q, io->tag); 658 __u64 off = ublk_user_copy_offset(q->q_id, io->tag); 659 __u8 ublk_op = ublksrv_get_op(iod); 660 __u32 len = iod->nr_sectors << 9; 661 void *addr = io->buf_addr; 662 ssize_t copied; 663 664 if (ublk_op != match_ublk_op) 665 return; 666 667 while (len) { 668 __u32 copy_len = min(len, UBLK_USER_COPY_LEN); 669 670 if (ublk_op == UBLK_IO_OP_WRITE) 671 copied = pread(q->ublk_fd, addr, copy_len, off); 672 else if (ublk_op == UBLK_IO_OP_READ) 673 copied = pwrite(q->ublk_fd, addr, copy_len, off); 674 else 675 assert(0); 676 assert(copied == (ssize_t)copy_len); 677 addr += copy_len; 678 off += copy_len; 679 len -= copy_len; 680 } 681 682 if (!(iod->op_flags & UBLK_IO_F_INTEGRITY)) 683 return; 684 685 len = ublk_integrity_len(q, iod->nr_sectors << 9); 686 off = ublk_user_copy_offset(q->q_id, io->tag); 687 off |= UBLKSRV_IO_INTEGRITY_FLAG; 688 if (ublk_op == UBLK_IO_OP_WRITE) 689 copied = pread(q->ublk_fd, io->integrity_buf, len, off); 690 else if (ublk_op == UBLK_IO_OP_READ) 691 copied = pwrite(q->ublk_fd, io->integrity_buf, len, off); 692 else 693 assert(0); 694 assert(copied == (ssize_t)len); 695 } 696 697 int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io) 698 { 699 struct ublk_queue *q = ublk_io_to_queue(io); 700 struct ublksrv_io_cmd *cmd; 701 struct io_uring_sqe *sqe[1]; 702 unsigned int cmd_op = 0; 703 __u64 user_data; 704 705 /* only freed io can be issued */ 706 if (!(io->flags & UBLKS_IO_FREE)) 707 return 0; 708 709 /* 710 * we issue because we need either fetching or committing or 711 * getting data 712 */ 713 if (!(io->flags & 714 (UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_NEED_GET_DATA))) 715 return 0; 716 717 if (io->flags & UBLKS_IO_NEED_GET_DATA) 718 cmd_op = UBLK_U_IO_NEED_GET_DATA; 719 else if (io->flags & UBLKS_IO_NEED_COMMIT_RQ_COMP) { 720 if (ublk_queue_use_user_copy(q)) 721 ublk_user_copy(io, UBLK_IO_OP_READ); 722 723 cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ; 724 } else if (io->flags & UBLKS_IO_NEED_FETCH_RQ) 725 cmd_op = UBLK_U_IO_FETCH_REQ; 726 727 if (io_uring_sq_space_left(&t->ring) < 1) 728 io_uring_submit(&t->ring); 729 730 ublk_io_alloc_sqes(t, sqe, 1); 731 if (!sqe[0]) { 732 ublk_err("%s: run out of sqe. thread %u, tag %d\n", 733 __func__, t->idx, io->tag); 734 return -1; 735 } 736 737 cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe[0]); 738 739 if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ) 740 cmd->result = io->result; 741 742 /* These fields should be written once, never change */ 743 ublk_set_sqe_cmd_op(sqe[0], cmd_op); 744 sqe[0]->fd = ublk_get_registered_fd(q, 0); /* dev->fds[0] */ 745 sqe[0]->opcode = IORING_OP_URING_CMD; 746 if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) 747 sqe[0]->flags = 0; /* Use raw FD, not fixed file */ 748 else 749 sqe[0]->flags = IOSQE_FIXED_FILE; 750 sqe[0]->rw_flags = 0; 751 cmd->tag = io->tag; 752 cmd->q_id = q->q_id; 753 if (!ublk_queue_no_buf(q) && !ublk_queue_use_user_copy(q)) 754 cmd->addr = (__u64) (uintptr_t) io->buf_addr; 755 else 756 cmd->addr = 0; 757 758 if (ublk_queue_use_auto_zc(q)) 759 ublk_set_auto_buf_reg(t, q, sqe[0], io->tag); 760 761 user_data = build_user_data(io->tag, _IOC_NR(cmd_op), 0, q->q_id, 0); 762 io_uring_sqe_set_data64(sqe[0], user_data); 763 764 io->flags = 0; 765 766 t->cmd_inflight += 1; 767 768 ublk_dbg(UBLK_DBG_IO_CMD, "%s: (thread %u qid %d tag %u cmd_op %u) iof %x stopping %d\n", 769 __func__, t->idx, q->q_id, io->tag, cmd_op, 770 io->flags, !!(t->state & UBLKS_T_STOPPING)); 771 return 1; 772 } 773 774 static void ublk_submit_fetch_commands(struct ublk_thread *t) 775 { 776 struct ublk_queue *q; 777 struct ublk_io *io; 778 int i = 0, j = 0; 779 780 if (t->dev->per_io_tasks) { 781 /* 782 * Lexicographically order all the (qid,tag) pairs, with 783 * qid taking priority (so (1,0) > (0,1)). Then make 784 * this thread the daemon for every Nth entry in this 785 * list (N is the number of threads), starting at this 786 * thread's index. This ensures that each queue is 787 * handled by as many ublk server threads as possible, 788 * so that load that is concentrated on one or a few 789 * queues can make use of all ublk server threads. 790 */ 791 const struct ublksrv_ctrl_dev_info *dinfo = &t->dev->dev_info; 792 int nr_ios = dinfo->nr_hw_queues * dinfo->queue_depth; 793 for (i = t->idx; i < nr_ios; i += t->dev->nthreads) { 794 int q_id = i / dinfo->queue_depth; 795 int tag = i % dinfo->queue_depth; 796 q = &t->dev->q[q_id]; 797 io = &q->ios[tag]; 798 io->buf_index = j++; 799 ublk_queue_io_cmd(t, io); 800 } 801 } else { 802 /* 803 * Service exclusively the queue whose q_id matches our 804 * thread index. 805 */ 806 struct ublk_queue *q = &t->dev->q[t->idx]; 807 for (i = 0; i < q->q_depth; i++) { 808 io = &q->ios[i]; 809 io->buf_index = i; 810 ublk_queue_io_cmd(t, io); 811 } 812 } 813 } 814 815 static int ublk_thread_is_idle(struct ublk_thread *t) 816 { 817 return !io_uring_sq_ready(&t->ring) && !t->io_inflight; 818 } 819 820 static int ublk_thread_is_done(struct ublk_thread *t) 821 { 822 return (t->state & UBLKS_T_STOPPING) && ublk_thread_is_idle(t) && !t->cmd_inflight; 823 } 824 825 static inline void ublksrv_handle_tgt_cqe(struct ublk_thread *t, 826 struct ublk_queue *q, 827 struct io_uring_cqe *cqe) 828 { 829 if (cqe->res < 0 && cqe->res != -EAGAIN) 830 ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n", 831 __func__, cqe->res, q->q_id, 832 user_data_to_tag(cqe->user_data), 833 user_data_to_op(cqe->user_data)); 834 835 if (q->tgt_ops->tgt_io_done) 836 q->tgt_ops->tgt_io_done(t, q, cqe); 837 } 838 839 static void ublk_handle_uring_cmd(struct ublk_thread *t, 840 struct ublk_queue *q, 841 const struct io_uring_cqe *cqe) 842 { 843 int fetch = (cqe->res != UBLK_IO_RES_ABORT) && 844 !(t->state & UBLKS_T_STOPPING); 845 unsigned tag = user_data_to_tag(cqe->user_data); 846 struct ublk_io *io = &q->ios[tag]; 847 848 t->cmd_inflight--; 849 850 if (!fetch) { 851 t->state |= UBLKS_T_STOPPING; 852 io->flags &= ~UBLKS_IO_NEED_FETCH_RQ; 853 } 854 855 if (cqe->res == UBLK_IO_RES_OK) { 856 ublk_assert(tag < q->q_depth); 857 858 if (ublk_queue_use_user_copy(q)) 859 ublk_user_copy(io, UBLK_IO_OP_WRITE); 860 861 if (q->tgt_ops->queue_io) 862 q->tgt_ops->queue_io(t, q, tag); 863 } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { 864 io->flags |= UBLKS_IO_NEED_GET_DATA | UBLKS_IO_FREE; 865 ublk_queue_io_cmd(t, io); 866 } else { 867 /* 868 * COMMIT_REQ will be completed immediately since no fetching 869 * piggyback is required. 870 * 871 * Marking IO_FREE only, then this io won't be issued since 872 * we only issue io with (UBLKS_IO_FREE | UBLKSRV_NEED_*) 873 * 874 * */ 875 io->flags = UBLKS_IO_FREE; 876 } 877 } 878 879 static void ublk_handle_cqe(struct ublk_thread *t, 880 struct io_uring_cqe *cqe, void *data) 881 { 882 struct ublk_dev *dev = t->dev; 883 unsigned q_id = user_data_to_q_id(cqe->user_data); 884 unsigned cmd_op = user_data_to_op(cqe->user_data); 885 886 if (cqe->res < 0 && cqe->res != -ENODEV && cqe->res != -ENOBUFS) 887 ublk_err("%s: res %d userdata %llx thread state %x\n", __func__, 888 cqe->res, cqe->user_data, t->state); 889 890 ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (thread %d qid %d tag %u cmd_op %x " 891 "data %lx target %d/%d) stopping %d\n", 892 __func__, cqe->res, t->idx, q_id, 893 user_data_to_tag(cqe->user_data), 894 cmd_op, cqe->user_data, is_target_io(cqe->user_data), 895 user_data_to_tgt_data(cqe->user_data), 896 (t->state & UBLKS_T_STOPPING)); 897 898 /* Don't retrieve io in case of target io */ 899 if (is_target_io(cqe->user_data)) { 900 ublksrv_handle_tgt_cqe(t, &dev->q[q_id], cqe); 901 return; 902 } 903 904 if (ublk_thread_batch_io(t)) 905 ublk_batch_compl_cmd(t, cqe); 906 else 907 ublk_handle_uring_cmd(t, &dev->q[q_id], cqe); 908 } 909 910 static int ublk_reap_events_uring(struct ublk_thread *t) 911 { 912 struct io_uring_cqe *cqe; 913 unsigned head; 914 int count = 0; 915 916 io_uring_for_each_cqe(&t->ring, head, cqe) { 917 ublk_handle_cqe(t, cqe, NULL); 918 count += 1; 919 } 920 io_uring_cq_advance(&t->ring, count); 921 922 return count; 923 } 924 925 static int ublk_process_io(struct ublk_thread *t) 926 { 927 int ret, reapped; 928 929 ublk_dbg(UBLK_DBG_THREAD, "dev%d-t%u: to_submit %d inflight cmd %u stopping %d\n", 930 t->dev->dev_info.dev_id, 931 t->idx, io_uring_sq_ready(&t->ring), 932 t->cmd_inflight, 933 (t->state & UBLKS_T_STOPPING)); 934 935 if (ublk_thread_is_done(t)) 936 return -ENODEV; 937 938 ret = io_uring_submit_and_wait(&t->ring, 1); 939 if (ublk_thread_batch_io(t)) { 940 ublk_batch_prep_commit(t); 941 reapped = ublk_reap_events_uring(t); 942 ublk_batch_commit_io_cmds(t); 943 } else { 944 reapped = ublk_reap_events_uring(t); 945 } 946 947 ublk_dbg(UBLK_DBG_THREAD, "submit result %d, reapped %d stop %d idle %d\n", 948 ret, reapped, (t->state & UBLKS_T_STOPPING), 949 (t->state & UBLKS_T_IDLE)); 950 951 return reapped; 952 } 953 954 struct ublk_thread_info { 955 struct ublk_dev *dev; 956 pthread_t thread; 957 unsigned idx; 958 sem_t *ready; 959 cpu_set_t *affinity; 960 unsigned long long extra_flags; 961 unsigned char (*q_thread_map)[UBLK_MAX_QUEUES]; 962 }; 963 964 static void ublk_thread_set_sched_affinity(const struct ublk_thread_info *info) 965 { 966 if (pthread_setaffinity_np(pthread_self(), sizeof(*info->affinity), info->affinity) < 0) 967 ublk_err("ublk dev %u thread %u set affinity failed", 968 info->dev->dev_info.dev_id, info->idx); 969 } 970 971 static void ublk_batch_setup_queues(struct ublk_thread *t) 972 { 973 int i; 974 975 for (i = 0; i < t->dev->dev_info.nr_hw_queues; i++) { 976 struct ublk_queue *q = &t->dev->q[i]; 977 int ret; 978 979 /* 980 * Only prepare io commands in the mapped thread context, 981 * otherwise io command buffer index may not work as expected 982 */ 983 if (t->q_map[i] == 0) 984 continue; 985 986 ret = ublk_batch_queue_prep_io_cmds(t, q); 987 ublk_assert(ret >= 0); 988 } 989 } 990 991 static __attribute__((noinline)) int __ublk_io_handler_fn(struct ublk_thread_info *info) 992 { 993 struct ublk_thread t = { 994 .dev = info->dev, 995 .idx = info->idx, 996 }; 997 int dev_id = info->dev->dev_info.dev_id; 998 int ret; 999 1000 /* Copy per-thread queue mapping into thread-local variable */ 1001 if (info->q_thread_map) 1002 memcpy(t.q_map, info->q_thread_map[info->idx], sizeof(t.q_map)); 1003 1004 ret = ublk_thread_init(&t, info->extra_flags); 1005 if (ret) { 1006 ublk_err("ublk dev %d thread %u init failed\n", 1007 dev_id, t.idx); 1008 return ret; 1009 } 1010 sem_post(info->ready); 1011 1012 ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %u started\n", 1013 gettid(), dev_id, t.idx); 1014 1015 if (!ublk_thread_batch_io(&t)) { 1016 /* submit all io commands to ublk driver */ 1017 ublk_submit_fetch_commands(&t); 1018 } else { 1019 ublk_batch_setup_queues(&t); 1020 ublk_batch_start_fetch(&t); 1021 } 1022 1023 do { 1024 if (ublk_process_io(&t) < 0) 1025 break; 1026 } while (1); 1027 1028 ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %d exiting\n", 1029 gettid(), dev_id, t.idx); 1030 ublk_thread_deinit(&t); 1031 return 0; 1032 } 1033 1034 static void *ublk_io_handler_fn(void *data) 1035 { 1036 struct ublk_thread_info *info = data; 1037 1038 /* 1039 * IO perf is sensitive with queue pthread affinity on NUMA machine 1040 * 1041 * Set sched_affinity at beginning, so following allocated memory/pages 1042 * could be CPU/NUMA aware. 1043 */ 1044 if (info->affinity) 1045 ublk_thread_set_sched_affinity(info); 1046 1047 __ublk_io_handler_fn(info); 1048 1049 return NULL; 1050 } 1051 1052 static void ublk_set_parameters(struct ublk_dev *dev) 1053 { 1054 int ret; 1055 1056 ret = ublk_ctrl_set_params(dev, &dev->tgt.params); 1057 if (ret) 1058 ublk_err("dev %d set basic parameter failed %d\n", 1059 dev->dev_info.dev_id, ret); 1060 } 1061 1062 static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev, int dev_id) 1063 { 1064 uint64_t id; 1065 int evtfd = ctx->_evtfd; 1066 1067 if (evtfd < 0) 1068 return -EBADF; 1069 1070 if (dev_id >= 0) 1071 id = dev_id + 1; 1072 else 1073 id = ERROR_EVTFD_DEVID; 1074 1075 if (dev && ctx->shadow_dev) 1076 memcpy(&ctx->shadow_dev->q, &dev->q, sizeof(dev->q)); 1077 1078 if (write(evtfd, &id, sizeof(id)) != sizeof(id)) 1079 return -EINVAL; 1080 1081 close(evtfd); 1082 shmdt(ctx->shadow_dev); 1083 1084 return 0; 1085 } 1086 1087 1088 static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) 1089 { 1090 const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info; 1091 struct ublk_thread_info *tinfo; 1092 unsigned long long extra_flags = 0; 1093 cpu_set_t *affinity_buf; 1094 unsigned char (*q_thread_map)[UBLK_MAX_QUEUES] = NULL; 1095 void *thread_ret; 1096 sem_t ready; 1097 int ret, i; 1098 1099 ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__); 1100 1101 tinfo = calloc(sizeof(struct ublk_thread_info), dev->nthreads); 1102 if (!tinfo) 1103 return -ENOMEM; 1104 1105 sem_init(&ready, 0, 0); 1106 ret = ublk_dev_prep(ctx, dev); 1107 if (ret) 1108 return ret; 1109 1110 ret = ublk_ctrl_get_affinity(dev, &affinity_buf); 1111 if (ret) 1112 return ret; 1113 1114 if (ublk_dev_batch_io(dev)) { 1115 q_thread_map = calloc(dev->nthreads, sizeof(*q_thread_map)); 1116 if (!q_thread_map) { 1117 ret = -ENOMEM; 1118 goto fail; 1119 } 1120 ublk_batch_setup_map(q_thread_map, dev->nthreads, 1121 dinfo->nr_hw_queues); 1122 } 1123 1124 if (ctx->auto_zc_fallback) 1125 extra_flags = UBLKS_Q_AUTO_BUF_REG_FALLBACK; 1126 if (ctx->no_ublk_fixed_fd) 1127 extra_flags |= UBLKS_Q_NO_UBLK_FIXED_FD; 1128 1129 for (i = 0; i < dinfo->nr_hw_queues; i++) { 1130 dev->q[i].dev = dev; 1131 dev->q[i].q_id = i; 1132 1133 ret = ublk_queue_init(&dev->q[i], extra_flags, 1134 ctx->metadata_size); 1135 if (ret) { 1136 ublk_err("ublk dev %d queue %d init queue failed\n", 1137 dinfo->dev_id, i); 1138 goto fail; 1139 } 1140 } 1141 1142 for (i = 0; i < dev->nthreads; i++) { 1143 tinfo[i].dev = dev; 1144 tinfo[i].idx = i; 1145 tinfo[i].ready = &ready; 1146 tinfo[i].extra_flags = extra_flags; 1147 tinfo[i].q_thread_map = q_thread_map; 1148 1149 /* 1150 * If threads are not tied 1:1 to queues, setting thread 1151 * affinity based on queue affinity makes little sense. 1152 * However, thread CPU affinity has significant impact 1153 * on performance, so to compare fairly, we'll still set 1154 * thread CPU affinity based on queue affinity where 1155 * possible. 1156 */ 1157 if (dev->nthreads == dinfo->nr_hw_queues) 1158 tinfo[i].affinity = &affinity_buf[i]; 1159 pthread_create(&tinfo[i].thread, NULL, 1160 ublk_io_handler_fn, 1161 &tinfo[i]); 1162 } 1163 1164 for (i = 0; i < dev->nthreads; i++) 1165 sem_wait(&ready); 1166 free(affinity_buf); 1167 free(q_thread_map); 1168 1169 /* everything is fine now, start us */ 1170 if (ctx->recovery) 1171 ret = ublk_ctrl_end_user_recovery(dev, getpid()); 1172 else { 1173 ublk_set_parameters(dev); 1174 ret = ublk_ctrl_start_dev(dev, getpid()); 1175 } 1176 if (ret < 0) { 1177 ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret); 1178 /* stop device so that inflight uring_cmd can be cancelled */ 1179 ublk_ctrl_stop_dev(dev); 1180 goto fail_start; 1181 } 1182 1183 ublk_ctrl_get_info(dev); 1184 if (ctx->fg) 1185 ublk_ctrl_dump(dev); 1186 else 1187 ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id); 1188 fail_start: 1189 /* wait until we are terminated */ 1190 for (i = 0; i < dev->nthreads; i++) 1191 pthread_join(tinfo[i].thread, &thread_ret); 1192 free(tinfo); 1193 fail: 1194 for (i = 0; i < dinfo->nr_hw_queues; i++) 1195 ublk_queue_deinit(&dev->q[i]); 1196 ublk_dev_unprep(dev); 1197 ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__); 1198 1199 return ret; 1200 } 1201 1202 static int wait_ublk_dev(const char *path, int evt_mask, unsigned timeout) 1203 { 1204 #define EV_SIZE (sizeof(struct inotify_event)) 1205 #define EV_BUF_LEN (128 * (EV_SIZE + 16)) 1206 struct pollfd pfd; 1207 int fd, wd; 1208 int ret = -EINVAL; 1209 const char *dev_name = basename(path); 1210 1211 fd = inotify_init(); 1212 if (fd < 0) { 1213 ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__); 1214 return fd; 1215 } 1216 1217 wd = inotify_add_watch(fd, "/dev", evt_mask); 1218 if (wd == -1) { 1219 ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__); 1220 goto fail; 1221 } 1222 1223 pfd.fd = fd; 1224 pfd.events = POLL_IN; 1225 while (1) { 1226 int i = 0; 1227 char buffer[EV_BUF_LEN]; 1228 ret = poll(&pfd, 1, 1000 * timeout); 1229 1230 if (ret == -1) { 1231 ublk_err("%s: poll inotify failed: %d\n", __func__, ret); 1232 goto rm_watch; 1233 } else if (ret == 0) { 1234 ublk_err("%s: poll inotify timeout\n", __func__); 1235 ret = -ETIMEDOUT; 1236 goto rm_watch; 1237 } 1238 1239 ret = read(fd, buffer, EV_BUF_LEN); 1240 if (ret < 0) { 1241 ublk_err("%s: read inotify fd failed\n", __func__); 1242 goto rm_watch; 1243 } 1244 1245 while (i < ret) { 1246 struct inotify_event *event = (struct inotify_event *)&buffer[i]; 1247 1248 ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n", 1249 __func__, event->mask, event->name); 1250 if (event->mask & evt_mask) { 1251 if (!strcmp(event->name, dev_name)) { 1252 ret = 0; 1253 goto rm_watch; 1254 } 1255 } 1256 i += EV_SIZE + event->len; 1257 } 1258 } 1259 rm_watch: 1260 inotify_rm_watch(fd, wd); 1261 fail: 1262 close(fd); 1263 return ret; 1264 } 1265 1266 static int ublk_stop_io_daemon(const struct ublk_dev *dev) 1267 { 1268 int daemon_pid = dev->dev_info.ublksrv_pid; 1269 int dev_id = dev->dev_info.dev_id; 1270 char ublkc[64]; 1271 int ret = 0; 1272 1273 if (daemon_pid < 0) 1274 return 0; 1275 1276 /* daemon may be dead already */ 1277 if (kill(daemon_pid, 0) < 0) 1278 goto wait; 1279 1280 snprintf(ublkc, sizeof(ublkc), "/dev/%s%d", "ublkc", dev_id); 1281 1282 /* ublk char device may be gone already */ 1283 if (access(ublkc, F_OK) != 0) 1284 goto wait; 1285 1286 /* Wait until ublk char device is closed, when the daemon is shutdown */ 1287 ret = wait_ublk_dev(ublkc, IN_CLOSE, 10); 1288 /* double check and since it may be closed before starting inotify */ 1289 if (ret == -ETIMEDOUT) 1290 ret = kill(daemon_pid, 0) < 0; 1291 wait: 1292 waitpid(daemon_pid, NULL, 0); 1293 ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n", 1294 __func__, daemon_pid, dev_id, ret); 1295 1296 return ret; 1297 } 1298 1299 static int __cmd_dev_add(const struct dev_ctx *ctx) 1300 { 1301 unsigned nthreads = ctx->nthreads; 1302 unsigned nr_queues = ctx->nr_hw_queues; 1303 const char *tgt_type = ctx->tgt_type; 1304 unsigned depth = ctx->queue_depth; 1305 __u64 features; 1306 const struct ublk_tgt_ops *ops; 1307 struct ublksrv_ctrl_dev_info *info; 1308 struct ublk_dev *dev = NULL; 1309 int dev_id = ctx->dev_id; 1310 int ret, i; 1311 1312 ops = ublk_find_tgt(tgt_type); 1313 if (!ops) { 1314 ublk_err("%s: no such tgt type, type %s\n", 1315 __func__, tgt_type); 1316 ret = -ENODEV; 1317 goto fail; 1318 } 1319 1320 if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) { 1321 ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n", 1322 __func__, nr_queues, depth); 1323 ret = -EINVAL; 1324 goto fail; 1325 } 1326 1327 /* default to 1:1 threads:queues if nthreads is unspecified */ 1328 if (!nthreads) 1329 nthreads = nr_queues; 1330 1331 if (nthreads > UBLK_MAX_THREADS) { 1332 ublk_err("%s: %u is too many threads (max %u)\n", 1333 __func__, nthreads, UBLK_MAX_THREADS); 1334 ret = -EINVAL; 1335 goto fail; 1336 } 1337 1338 if (nthreads != nr_queues && (!ctx->per_io_tasks && 1339 !(ctx->flags & UBLK_F_BATCH_IO))) { 1340 ublk_err("%s: threads %u must be same as queues %u if " 1341 "not using per_io_tasks\n", 1342 __func__, nthreads, nr_queues); 1343 ret = -EINVAL; 1344 goto fail; 1345 } 1346 1347 dev = ublk_ctrl_init(); 1348 if (!dev) { 1349 ublk_err("%s: can't alloc dev id %d, type %s\n", 1350 __func__, dev_id, tgt_type); 1351 ret = -ENOMEM; 1352 goto fail; 1353 } 1354 1355 /* kernel doesn't support get_features */ 1356 ret = ublk_ctrl_get_features(dev, &features); 1357 if (ret < 0) { 1358 ret = -EINVAL; 1359 goto fail; 1360 } 1361 1362 if (!(features & UBLK_F_CMD_IOCTL_ENCODE)) { 1363 ret = -ENOTSUP; 1364 goto fail; 1365 } 1366 1367 info = &dev->dev_info; 1368 info->dev_id = ctx->dev_id; 1369 info->nr_hw_queues = nr_queues; 1370 info->queue_depth = depth; 1371 info->flags = ctx->flags; 1372 if ((features & UBLK_F_QUIESCE) && 1373 (info->flags & UBLK_F_USER_RECOVERY)) 1374 info->flags |= UBLK_F_QUIESCE; 1375 dev->nthreads = nthreads; 1376 dev->per_io_tasks = ctx->per_io_tasks; 1377 dev->tgt.ops = ops; 1378 dev->tgt.sq_depth = depth; 1379 dev->tgt.cq_depth = depth; 1380 1381 for (i = 0; i < MAX_BACK_FILES; i++) { 1382 if (ctx->files[i]) { 1383 strcpy(dev->tgt.backing_file[i], ctx->files[i]); 1384 dev->tgt.nr_backing_files++; 1385 } 1386 } 1387 1388 if (ctx->recovery) 1389 ret = ublk_ctrl_start_user_recovery(dev); 1390 else 1391 ret = ublk_ctrl_add_dev(dev); 1392 if (ret < 0) { 1393 ublk_err("%s: can't add dev id %d, type %s ret %d\n", 1394 __func__, dev_id, tgt_type, ret); 1395 goto fail; 1396 } 1397 1398 ret = ublk_start_daemon(ctx, dev); 1399 ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\n", __func__, ret); 1400 if (ret < 0) 1401 ublk_ctrl_del_dev(dev); 1402 1403 fail: 1404 if (ret < 0) 1405 ublk_send_dev_event(ctx, dev, -1); 1406 if (dev) 1407 ublk_ctrl_deinit(dev); 1408 return ret; 1409 } 1410 1411 static int __cmd_dev_list(struct dev_ctx *ctx); 1412 1413 static int cmd_dev_add(struct dev_ctx *ctx) 1414 { 1415 int res; 1416 1417 if (ctx->fg) 1418 goto run; 1419 1420 ctx->_shmid = shmget(IPC_PRIVATE, sizeof(struct ublk_dev), IPC_CREAT | 0666); 1421 if (ctx->_shmid < 0) { 1422 ublk_err("%s: failed to shmget %s\n", __func__, strerror(errno)); 1423 exit(-1); 1424 } 1425 ctx->shadow_dev = (struct ublk_dev *)shmat(ctx->_shmid, NULL, 0); 1426 if (ctx->shadow_dev == (struct ublk_dev *)-1) { 1427 ublk_err("%s: failed to shmat %s\n", __func__, strerror(errno)); 1428 exit(-1); 1429 } 1430 ctx->_evtfd = eventfd(0, 0); 1431 if (ctx->_evtfd < 0) { 1432 ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno)); 1433 exit(-1); 1434 } 1435 1436 res = fork(); 1437 if (res == 0) { 1438 int res2; 1439 1440 setsid(); 1441 res2 = fork(); 1442 if (res2 == 0) { 1443 /* prepare for detaching */ 1444 close(STDIN_FILENO); 1445 close(STDOUT_FILENO); 1446 close(STDERR_FILENO); 1447 run: 1448 res = __cmd_dev_add(ctx); 1449 return res; 1450 } else { 1451 /* detached from the foreground task */ 1452 exit(EXIT_SUCCESS); 1453 } 1454 } else if (res > 0) { 1455 uint64_t id; 1456 int exit_code = EXIT_FAILURE; 1457 1458 res = read(ctx->_evtfd, &id, sizeof(id)); 1459 close(ctx->_evtfd); 1460 if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) { 1461 ctx->dev_id = id - 1; 1462 if (__cmd_dev_list(ctx) >= 0) 1463 exit_code = EXIT_SUCCESS; 1464 } 1465 shmdt(ctx->shadow_dev); 1466 shmctl(ctx->_shmid, IPC_RMID, NULL); 1467 /* wait for child and detach from it */ 1468 wait(NULL); 1469 if (exit_code == EXIT_FAILURE) 1470 ublk_err("%s: command failed\n", __func__); 1471 exit(exit_code); 1472 } else { 1473 exit(EXIT_FAILURE); 1474 } 1475 } 1476 1477 static int __cmd_dev_del(struct dev_ctx *ctx) 1478 { 1479 int number = ctx->dev_id; 1480 struct ublk_dev *dev; 1481 int ret; 1482 1483 dev = ublk_ctrl_init(); 1484 dev->dev_info.dev_id = number; 1485 1486 ret = ublk_ctrl_get_info(dev); 1487 if (ret < 0) 1488 goto fail; 1489 1490 ret = ublk_ctrl_stop_dev(dev); 1491 if (ret < 0) 1492 ublk_err("%s: stop dev %d failed ret %d\n", __func__, number, ret); 1493 1494 ret = ublk_stop_io_daemon(dev); 1495 if (ret < 0) 1496 ublk_err("%s: stop daemon id %d dev %d, ret %d\n", 1497 __func__, dev->dev_info.ublksrv_pid, number, ret); 1498 ublk_ctrl_del_dev(dev); 1499 fail: 1500 ublk_ctrl_deinit(dev); 1501 1502 return (ret >= 0) ? 0 : ret; 1503 } 1504 1505 static int cmd_dev_del(struct dev_ctx *ctx) 1506 { 1507 int i; 1508 1509 if (ctx->dev_id >= 0 || !ctx->all) 1510 return __cmd_dev_del(ctx); 1511 1512 for (i = 0; i < 255; i++) { 1513 ctx->dev_id = i; 1514 __cmd_dev_del(ctx); 1515 } 1516 return 0; 1517 } 1518 1519 static int cmd_dev_stop(struct dev_ctx *ctx) 1520 { 1521 int number = ctx->dev_id; 1522 struct ublk_dev *dev; 1523 int ret; 1524 1525 if (number < 0) { 1526 ublk_err("%s: device id is required\n", __func__); 1527 return -EINVAL; 1528 } 1529 1530 dev = ublk_ctrl_init(); 1531 dev->dev_info.dev_id = number; 1532 1533 ret = ublk_ctrl_get_info(dev); 1534 if (ret < 0) 1535 goto fail; 1536 1537 if (ctx->safe_stop) { 1538 ret = ublk_ctrl_try_stop_dev(dev); 1539 if (ret < 0) 1540 ublk_err("%s: try_stop dev %d failed ret %d\n", 1541 __func__, number, ret); 1542 } else { 1543 ret = ublk_ctrl_stop_dev(dev); 1544 if (ret < 0) 1545 ublk_err("%s: stop dev %d failed ret %d\n", 1546 __func__, number, ret); 1547 } 1548 1549 fail: 1550 ublk_ctrl_deinit(dev); 1551 1552 return ret; 1553 } 1554 1555 static int __cmd_dev_list(struct dev_ctx *ctx) 1556 { 1557 struct ublk_dev *dev = ublk_ctrl_init(); 1558 int ret; 1559 1560 if (!dev) 1561 return -ENODEV; 1562 1563 dev->dev_info.dev_id = ctx->dev_id; 1564 1565 ret = ublk_ctrl_get_info(dev); 1566 if (ret < 0) { 1567 if (ctx->logging) 1568 ublk_err("%s: can't get dev info from %d: %d\n", 1569 __func__, ctx->dev_id, ret); 1570 } else { 1571 if (ctx->shadow_dev) 1572 memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q)); 1573 1574 ublk_ctrl_dump(dev); 1575 } 1576 1577 ublk_ctrl_deinit(dev); 1578 1579 return ret; 1580 } 1581 1582 static int cmd_dev_list(struct dev_ctx *ctx) 1583 { 1584 int i; 1585 1586 if (ctx->dev_id >= 0 || !ctx->all) 1587 return __cmd_dev_list(ctx); 1588 1589 ctx->logging = false; 1590 for (i = 0; i < 255; i++) { 1591 ctx->dev_id = i; 1592 __cmd_dev_list(ctx); 1593 } 1594 return 0; 1595 } 1596 1597 static int cmd_dev_get_features(void) 1598 { 1599 #define const_ilog2(x) (63 - __builtin_clzll(x)) 1600 #define FEAT_NAME(f) [const_ilog2(f)] = #f 1601 static const char *feat_map[] = { 1602 FEAT_NAME(UBLK_F_SUPPORT_ZERO_COPY), 1603 FEAT_NAME(UBLK_F_URING_CMD_COMP_IN_TASK), 1604 FEAT_NAME(UBLK_F_NEED_GET_DATA), 1605 FEAT_NAME(UBLK_F_USER_RECOVERY), 1606 FEAT_NAME(UBLK_F_USER_RECOVERY_REISSUE), 1607 FEAT_NAME(UBLK_F_UNPRIVILEGED_DEV), 1608 FEAT_NAME(UBLK_F_CMD_IOCTL_ENCODE), 1609 FEAT_NAME(UBLK_F_USER_COPY), 1610 FEAT_NAME(UBLK_F_ZONED), 1611 FEAT_NAME(UBLK_F_USER_RECOVERY_FAIL_IO), 1612 FEAT_NAME(UBLK_F_UPDATE_SIZE), 1613 FEAT_NAME(UBLK_F_AUTO_BUF_REG), 1614 FEAT_NAME(UBLK_F_QUIESCE), 1615 FEAT_NAME(UBLK_F_PER_IO_DAEMON), 1616 FEAT_NAME(UBLK_F_BUF_REG_OFF_DAEMON), 1617 FEAT_NAME(UBLK_F_INTEGRITY), 1618 FEAT_NAME(UBLK_F_SAFE_STOP_DEV), 1619 FEAT_NAME(UBLK_F_BATCH_IO), 1620 FEAT_NAME(UBLK_F_NO_AUTO_PART_SCAN), 1621 }; 1622 struct ublk_dev *dev; 1623 __u64 features = 0; 1624 int ret; 1625 1626 dev = ublk_ctrl_init(); 1627 if (!dev) { 1628 fprintf(stderr, "ublksrv_ctrl_init failed id\n"); 1629 return -EOPNOTSUPP; 1630 } 1631 1632 ret = ublk_ctrl_get_features(dev, &features); 1633 if (!ret) { 1634 int i; 1635 1636 printf("ublk_drv features: 0x%llx\n", features); 1637 1638 for (i = 0; i < sizeof(features) * 8; i++) { 1639 const char *feat; 1640 1641 if (!((1ULL << i) & features)) 1642 continue; 1643 if (i < ARRAY_SIZE(feat_map)) 1644 feat = feat_map[i]; 1645 else 1646 feat = "unknown"; 1647 printf("0x%-16llx: %s\n", 1ULL << i, feat); 1648 } 1649 } 1650 1651 return ret; 1652 } 1653 1654 static int cmd_dev_update_size(struct dev_ctx *ctx) 1655 { 1656 struct ublk_dev *dev = ublk_ctrl_init(); 1657 struct ublk_params p; 1658 int ret = -EINVAL; 1659 1660 if (!dev) 1661 return -ENODEV; 1662 1663 if (ctx->dev_id < 0) { 1664 fprintf(stderr, "device id isn't provided\n"); 1665 goto out; 1666 } 1667 1668 dev->dev_info.dev_id = ctx->dev_id; 1669 ret = ublk_ctrl_get_params(dev, &p); 1670 if (ret < 0) { 1671 ublk_err("failed to get params %d %s\n", ret, strerror(-ret)); 1672 goto out; 1673 } 1674 1675 if (ctx->size & ((1 << p.basic.logical_bs_shift) - 1)) { 1676 ublk_err("size isn't aligned with logical block size\n"); 1677 ret = -EINVAL; 1678 goto out; 1679 } 1680 1681 ret = ublk_ctrl_update_size(dev, ctx->size >> 9); 1682 out: 1683 ublk_ctrl_deinit(dev); 1684 return ret; 1685 } 1686 1687 static int cmd_dev_quiesce(struct dev_ctx *ctx) 1688 { 1689 struct ublk_dev *dev = ublk_ctrl_init(); 1690 int ret = -EINVAL; 1691 1692 if (!dev) 1693 return -ENODEV; 1694 1695 if (ctx->dev_id < 0) { 1696 fprintf(stderr, "device id isn't provided for quiesce\n"); 1697 goto out; 1698 } 1699 dev->dev_info.dev_id = ctx->dev_id; 1700 ret = ublk_ctrl_quiesce_dev(dev, 10000); 1701 1702 out: 1703 ublk_ctrl_deinit(dev); 1704 return ret; 1705 } 1706 1707 static void __cmd_create_help(char *exe, bool recovery) 1708 { 1709 int i; 1710 1711 printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", 1712 exe, recovery ? "recover" : "add"); 1713 printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1] [-g] [-u]\n"); 1714 printf("\t[-e 0|1 ] [-i 0|1] [--no_ublk_fixed_fd]\n"); 1715 printf("\t[--nthreads threads] [--per_io_tasks]\n"); 1716 printf("\t[--integrity_capable] [--integrity_reftag] [--metadata_size SIZE] " 1717 "[--pi_offset OFFSET] [--csum_type ip|t10dif|nvme] [--tag_size SIZE]\n"); 1718 printf("\t[--batch|-b] [--no_auto_part_scan]\n"); 1719 printf("\t[target options] [backfile1] [backfile2] ...\n"); 1720 printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); 1721 printf("\tdefault: nthreads=nr_queues"); 1722 1723 for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) { 1724 const struct ublk_tgt_ops *ops = tgt_ops_list[i]; 1725 1726 if (ops->usage) 1727 ops->usage(ops); 1728 } 1729 } 1730 1731 static void cmd_add_help(char *exe) 1732 { 1733 __cmd_create_help(exe, false); 1734 printf("\n"); 1735 } 1736 1737 static void cmd_recover_help(char *exe) 1738 { 1739 __cmd_create_help(exe, true); 1740 printf("\tPlease provide exact command line for creating this device with real dev_id\n"); 1741 printf("\n"); 1742 } 1743 1744 static int cmd_dev_help(char *exe) 1745 { 1746 cmd_add_help(exe); 1747 cmd_recover_help(exe); 1748 1749 printf("%s del [-n dev_id] -a \n", exe); 1750 printf("\t -a delete all devices -n delete specified device\n\n"); 1751 printf("%s stop -n dev_id [--safe]\n", exe); 1752 printf("\t --safe only stop if device has no active openers\n\n"); 1753 printf("%s list [-n dev_id] -a \n", exe); 1754 printf("\t -a list all devices, -n list specified device, default -a \n\n"); 1755 printf("%s features\n", exe); 1756 printf("%s update_size -n dev_id -s|--size size_in_bytes \n", exe); 1757 printf("%s quiesce -n dev_id\n", exe); 1758 return 0; 1759 } 1760 1761 int main(int argc, char *argv[]) 1762 { 1763 static const struct option longopts[] = { 1764 { "all", 0, NULL, 'a' }, 1765 { "type", 1, NULL, 't' }, 1766 { "number", 1, NULL, 'n' }, 1767 { "queues", 1, NULL, 'q' }, 1768 { "depth", 1, NULL, 'd' }, 1769 { "debug_mask", 1, NULL, 0 }, 1770 { "quiet", 0, NULL, 0 }, 1771 { "zero_copy", 0, NULL, 'z' }, 1772 { "foreground", 0, NULL, 0 }, 1773 { "recovery", 1, NULL, 'r' }, 1774 { "recovery_fail_io", 1, NULL, 'e'}, 1775 { "recovery_reissue", 1, NULL, 'i'}, 1776 { "get_data", 1, NULL, 'g'}, 1777 { "auto_zc", 0, NULL, 0 }, 1778 { "auto_zc_fallback", 0, NULL, 0 }, 1779 { "user_copy", 0, NULL, 'u'}, 1780 { "size", 1, NULL, 's'}, 1781 { "nthreads", 1, NULL, 0 }, 1782 { "per_io_tasks", 0, NULL, 0 }, 1783 { "no_ublk_fixed_fd", 0, NULL, 0 }, 1784 { "integrity_capable", 0, NULL, 0 }, 1785 { "integrity_reftag", 0, NULL, 0 }, 1786 { "metadata_size", 1, NULL, 0 }, 1787 { "pi_offset", 1, NULL, 0 }, 1788 { "csum_type", 1, NULL, 0 }, 1789 { "tag_size", 1, NULL, 0 }, 1790 { "safe", 0, NULL, 0 }, 1791 { "batch", 0, NULL, 'b'}, 1792 { "no_auto_part_scan", 0, NULL, 0 }, 1793 { 0, 0, 0, 0 } 1794 }; 1795 const struct ublk_tgt_ops *ops = NULL; 1796 int option_idx, opt; 1797 const char *cmd = argv[1]; 1798 struct dev_ctx ctx = { 1799 ._evtfd = -1, 1800 .queue_depth = 128, 1801 .nr_hw_queues = 2, 1802 .dev_id = -1, 1803 .tgt_type = "unknown", 1804 .csum_type = LBMD_PI_CSUM_NONE, 1805 }; 1806 int ret = -EINVAL, i; 1807 int tgt_argc = 1; 1808 char *tgt_argv[MAX_NR_TGT_ARG] = { NULL }; 1809 int value; 1810 1811 if (argc == 1) 1812 return ret; 1813 1814 opterr = 0; 1815 optind = 2; 1816 while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:s:gazub", 1817 longopts, &option_idx)) != -1) { 1818 switch (opt) { 1819 case 'a': 1820 ctx.all = 1; 1821 break; 1822 case 'b': 1823 ctx.flags |= UBLK_F_BATCH_IO; 1824 break; 1825 case 'n': 1826 ctx.dev_id = strtol(optarg, NULL, 10); 1827 break; 1828 case 't': 1829 if (strlen(optarg) < sizeof(ctx.tgt_type)) 1830 strcpy(ctx.tgt_type, optarg); 1831 break; 1832 case 'q': 1833 ctx.nr_hw_queues = strtol(optarg, NULL, 10); 1834 break; 1835 case 'd': 1836 ctx.queue_depth = strtol(optarg, NULL, 10); 1837 break; 1838 case 'z': 1839 ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY; 1840 break; 1841 case 'r': 1842 value = strtol(optarg, NULL, 10); 1843 if (value) 1844 ctx.flags |= UBLK_F_USER_RECOVERY; 1845 break; 1846 case 'e': 1847 value = strtol(optarg, NULL, 10); 1848 if (value) 1849 ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO; 1850 break; 1851 case 'i': 1852 value = strtol(optarg, NULL, 10); 1853 if (value) 1854 ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE; 1855 break; 1856 case 'g': 1857 ctx.flags |= UBLK_F_NEED_GET_DATA; 1858 break; 1859 case 'u': 1860 ctx.flags |= UBLK_F_USER_COPY; 1861 break; 1862 case 's': 1863 ctx.size = strtoull(optarg, NULL, 10); 1864 break; 1865 case 0: 1866 if (!strcmp(longopts[option_idx].name, "debug_mask")) 1867 ublk_dbg_mask = strtol(optarg, NULL, 16); 1868 if (!strcmp(longopts[option_idx].name, "quiet")) 1869 ublk_dbg_mask = 0; 1870 if (!strcmp(longopts[option_idx].name, "foreground")) 1871 ctx.fg = 1; 1872 if (!strcmp(longopts[option_idx].name, "auto_zc")) 1873 ctx.flags |= UBLK_F_AUTO_BUF_REG; 1874 if (!strcmp(longopts[option_idx].name, "auto_zc_fallback")) 1875 ctx.auto_zc_fallback = 1; 1876 if (!strcmp(longopts[option_idx].name, "nthreads")) 1877 ctx.nthreads = strtol(optarg, NULL, 10); 1878 if (!strcmp(longopts[option_idx].name, "per_io_tasks")) 1879 ctx.per_io_tasks = 1; 1880 if (!strcmp(longopts[option_idx].name, "no_ublk_fixed_fd")) 1881 ctx.no_ublk_fixed_fd = 1; 1882 if (!strcmp(longopts[option_idx].name, "integrity_capable")) 1883 ctx.integrity_flags |= LBMD_PI_CAP_INTEGRITY; 1884 if (!strcmp(longopts[option_idx].name, "integrity_reftag")) 1885 ctx.integrity_flags |= LBMD_PI_CAP_REFTAG; 1886 if (!strcmp(longopts[option_idx].name, "metadata_size")) 1887 ctx.metadata_size = strtoul(optarg, NULL, 0); 1888 if (!strcmp(longopts[option_idx].name, "pi_offset")) 1889 ctx.pi_offset = strtoul(optarg, NULL, 0); 1890 if (!strcmp(longopts[option_idx].name, "csum_type")) { 1891 if (!strcmp(optarg, "ip")) { 1892 ctx.csum_type = LBMD_PI_CSUM_IP; 1893 } else if (!strcmp(optarg, "t10dif")) { 1894 ctx.csum_type = LBMD_PI_CSUM_CRC16_T10DIF; 1895 } else if (!strcmp(optarg, "nvme")) { 1896 ctx.csum_type = LBMD_PI_CSUM_CRC64_NVME; 1897 } else { 1898 ublk_err("invalid csum_type: %s\n", optarg); 1899 return -EINVAL; 1900 } 1901 } 1902 if (!strcmp(longopts[option_idx].name, "tag_size")) 1903 ctx.tag_size = strtoul(optarg, NULL, 0); 1904 if (!strcmp(longopts[option_idx].name, "safe")) 1905 ctx.safe_stop = 1; 1906 if (!strcmp(longopts[option_idx].name, "no_auto_part_scan")) 1907 ctx.flags |= UBLK_F_NO_AUTO_PART_SCAN; 1908 break; 1909 case '?': 1910 /* 1911 * target requires every option must have argument 1912 */ 1913 if (argv[optind][0] == '-' || argv[optind - 1][0] != '-') { 1914 fprintf(stderr, "every target option requires argument: %s %s\n", 1915 argv[optind - 1], argv[optind]); 1916 exit(EXIT_FAILURE); 1917 } 1918 1919 if (tgt_argc < (MAX_NR_TGT_ARG - 1) / 2) { 1920 tgt_argv[tgt_argc++] = argv[optind - 1]; 1921 tgt_argv[tgt_argc++] = argv[optind]; 1922 } else { 1923 fprintf(stderr, "too many target options\n"); 1924 exit(EXIT_FAILURE); 1925 } 1926 optind += 1; 1927 break; 1928 } 1929 } 1930 1931 if (ctx.per_io_tasks && (ctx.flags & UBLK_F_BATCH_IO)) { 1932 ublk_err("per_io_task and F_BATCH_IO conflict\n"); 1933 return -EINVAL; 1934 } 1935 1936 /* auto_zc_fallback depends on F_AUTO_BUF_REG & F_SUPPORT_ZERO_COPY */ 1937 if (ctx.auto_zc_fallback && 1938 !((ctx.flags & UBLK_F_AUTO_BUF_REG) && 1939 (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY))) { 1940 ublk_err("%s: auto_zc_fallback is set but neither " 1941 "F_AUTO_BUF_REG nor F_SUPPORT_ZERO_COPY is enabled\n", 1942 __func__); 1943 return -EINVAL; 1944 } 1945 1946 if (!!(ctx.flags & UBLK_F_NEED_GET_DATA) + 1947 !!(ctx.flags & UBLK_F_USER_COPY) + 1948 (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY && !ctx.auto_zc_fallback) + 1949 (ctx.flags & UBLK_F_AUTO_BUF_REG && !ctx.auto_zc_fallback) + 1950 ctx.auto_zc_fallback > 1) { 1951 fprintf(stderr, "too many data copy modes specified\n"); 1952 return -EINVAL; 1953 } 1954 1955 if (ctx.metadata_size) { 1956 if (!(ctx.flags & UBLK_F_USER_COPY)) { 1957 ublk_err("integrity requires user_copy\n"); 1958 return -EINVAL; 1959 } 1960 1961 ctx.flags |= UBLK_F_INTEGRITY; 1962 } else if (ctx.integrity_flags || 1963 ctx.pi_offset || 1964 ctx.csum_type != LBMD_PI_CSUM_NONE || 1965 ctx.tag_size) { 1966 ublk_err("integrity parameters require metadata_size\n"); 1967 return -EINVAL; 1968 } 1969 1970 if ((ctx.flags & UBLK_F_AUTO_BUF_REG) && 1971 (ctx.flags & UBLK_F_BATCH_IO) && 1972 (ctx.nthreads > ctx.nr_hw_queues)) { 1973 ublk_err("too many threads for F_AUTO_BUF_REG & F_BATCH_IO\n"); 1974 return -EINVAL; 1975 } 1976 1977 i = optind; 1978 while (i < argc && ctx.nr_files < MAX_BACK_FILES) { 1979 ctx.files[ctx.nr_files++] = argv[i++]; 1980 } 1981 1982 ops = ublk_find_tgt(ctx.tgt_type); 1983 if (ops && ops->parse_cmd_line) { 1984 optind = 0; 1985 1986 tgt_argv[0] = ctx.tgt_type; 1987 ops->parse_cmd_line(&ctx, tgt_argc, tgt_argv); 1988 } 1989 1990 if (!strcmp(cmd, "add")) 1991 ret = cmd_dev_add(&ctx); 1992 else if (!strcmp(cmd, "recover")) { 1993 if (ctx.dev_id < 0) { 1994 fprintf(stderr, "device id isn't provided for recovering\n"); 1995 ret = -EINVAL; 1996 } else { 1997 ctx.recovery = 1; 1998 ret = cmd_dev_add(&ctx); 1999 } 2000 } else if (!strcmp(cmd, "del")) 2001 ret = cmd_dev_del(&ctx); 2002 else if (!strcmp(cmd, "stop")) 2003 ret = cmd_dev_stop(&ctx); 2004 else if (!strcmp(cmd, "list")) { 2005 ctx.all = 1; 2006 ret = cmd_dev_list(&ctx); 2007 } else if (!strcmp(cmd, "help")) 2008 ret = cmd_dev_help(argv[0]); 2009 else if (!strcmp(cmd, "features")) 2010 ret = cmd_dev_get_features(); 2011 else if (!strcmp(cmd, "update_size")) 2012 ret = cmd_dev_update_size(&ctx); 2013 else if (!strcmp(cmd, "quiesce")) 2014 ret = cmd_dev_quiesce(&ctx); 2015 else 2016 cmd_dev_help(argv[0]); 2017 2018 return ret; 2019 } 2020