1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Description: uring_cmd based ublk 4 */ 5 6 #include "kublk.h" 7 8 #define MAX_NR_TGT_ARG 64 9 10 unsigned int ublk_dbg_mask = UBLK_LOG; 11 static const struct ublk_tgt_ops *tgt_ops_list[] = { 12 &null_tgt_ops, 13 &loop_tgt_ops, 14 &stripe_tgt_ops, 15 &fault_inject_tgt_ops, 16 }; 17 18 static const struct ublk_tgt_ops *ublk_find_tgt(const char *name) 19 { 20 int i; 21 22 if (name == NULL) 23 return NULL; 24 25 for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) 26 if (strcmp(tgt_ops_list[i]->name, name) == 0) 27 return tgt_ops_list[i]; 28 return NULL; 29 } 30 31 static inline int ublk_setup_ring(struct io_uring *r, int depth, 32 int cq_depth, unsigned flags) 33 { 34 struct io_uring_params p; 35 36 memset(&p, 0, sizeof(p)); 37 p.flags = flags | IORING_SETUP_CQSIZE; 38 p.cq_entries = cq_depth; 39 40 return io_uring_queue_init_params(depth, r, &p); 41 } 42 43 static void ublk_ctrl_init_cmd(struct ublk_dev *dev, 44 struct io_uring_sqe *sqe, 45 struct ublk_ctrl_cmd_data *data) 46 { 47 struct ublksrv_ctrl_dev_info *info = &dev->dev_info; 48 struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe); 49 50 sqe->fd = dev->ctrl_fd; 51 sqe->opcode = IORING_OP_URING_CMD; 52 sqe->ioprio = 0; 53 54 if (data->flags & CTRL_CMD_HAS_BUF) { 55 cmd->addr = data->addr; 56 cmd->len = data->len; 57 } 58 59 if (data->flags & CTRL_CMD_HAS_DATA) 60 cmd->data[0] = data->data[0]; 61 62 cmd->dev_id = info->dev_id; 63 cmd->queue_id = -1; 64 65 ublk_set_sqe_cmd_op(sqe, data->cmd_op); 66 67 io_uring_sqe_set_data(sqe, cmd); 68 } 69 70 static int __ublk_ctrl_cmd(struct ublk_dev *dev, 71 struct ublk_ctrl_cmd_data *data) 72 { 73 struct io_uring_sqe *sqe; 74 struct io_uring_cqe *cqe; 75 int ret = -EINVAL; 76 77 sqe = io_uring_get_sqe(&dev->ring); 78 if (!sqe) { 79 ublk_err("%s: can't get sqe ret %d\n", __func__, ret); 80 return ret; 81 } 82 83 ublk_ctrl_init_cmd(dev, sqe, data); 84 85 ret = io_uring_submit(&dev->ring); 86 if (ret < 0) { 87 ublk_err("uring submit ret %d\n", ret); 88 return ret; 89 } 90 91 ret = io_uring_wait_cqe(&dev->ring, &cqe); 92 if (ret < 0) { 93 ublk_err("wait cqe: %s\n", strerror(-ret)); 94 return ret; 95 } 96 io_uring_cqe_seen(&dev->ring, cqe); 97 98 return cqe->res; 99 } 100 101 static int ublk_ctrl_stop_dev(struct ublk_dev *dev) 102 { 103 struct ublk_ctrl_cmd_data data = { 104 .cmd_op = UBLK_U_CMD_STOP_DEV, 105 }; 106 107 return __ublk_ctrl_cmd(dev, &data); 108 } 109 110 static int ublk_ctrl_start_dev(struct ublk_dev *dev, 111 int daemon_pid) 112 { 113 struct ublk_ctrl_cmd_data data = { 114 .cmd_op = UBLK_U_CMD_START_DEV, 115 .flags = CTRL_CMD_HAS_DATA, 116 }; 117 118 dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; 119 120 return __ublk_ctrl_cmd(dev, &data); 121 } 122 123 static int ublk_ctrl_start_user_recovery(struct ublk_dev *dev) 124 { 125 struct ublk_ctrl_cmd_data data = { 126 .cmd_op = UBLK_U_CMD_START_USER_RECOVERY, 127 }; 128 129 return __ublk_ctrl_cmd(dev, &data); 130 } 131 132 static int ublk_ctrl_end_user_recovery(struct ublk_dev *dev, int daemon_pid) 133 { 134 struct ublk_ctrl_cmd_data data = { 135 .cmd_op = UBLK_U_CMD_END_USER_RECOVERY, 136 .flags = CTRL_CMD_HAS_DATA, 137 }; 138 139 dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; 140 141 return __ublk_ctrl_cmd(dev, &data); 142 } 143 144 static int ublk_ctrl_add_dev(struct ublk_dev *dev) 145 { 146 struct ublk_ctrl_cmd_data data = { 147 .cmd_op = UBLK_U_CMD_ADD_DEV, 148 .flags = CTRL_CMD_HAS_BUF, 149 .addr = (__u64) (uintptr_t) &dev->dev_info, 150 .len = sizeof(struct ublksrv_ctrl_dev_info), 151 }; 152 153 return __ublk_ctrl_cmd(dev, &data); 154 } 155 156 static int ublk_ctrl_del_dev(struct ublk_dev *dev) 157 { 158 struct ublk_ctrl_cmd_data data = { 159 .cmd_op = UBLK_U_CMD_DEL_DEV, 160 .flags = 0, 161 }; 162 163 return __ublk_ctrl_cmd(dev, &data); 164 } 165 166 static int ublk_ctrl_get_info(struct ublk_dev *dev) 167 { 168 struct ublk_ctrl_cmd_data data = { 169 .cmd_op = UBLK_U_CMD_GET_DEV_INFO, 170 .flags = CTRL_CMD_HAS_BUF, 171 .addr = (__u64) (uintptr_t) &dev->dev_info, 172 .len = sizeof(struct ublksrv_ctrl_dev_info), 173 }; 174 175 return __ublk_ctrl_cmd(dev, &data); 176 } 177 178 static int ublk_ctrl_set_params(struct ublk_dev *dev, 179 struct ublk_params *params) 180 { 181 struct ublk_ctrl_cmd_data data = { 182 .cmd_op = UBLK_U_CMD_SET_PARAMS, 183 .flags = CTRL_CMD_HAS_BUF, 184 .addr = (__u64) (uintptr_t) params, 185 .len = sizeof(*params), 186 }; 187 params->len = sizeof(*params); 188 return __ublk_ctrl_cmd(dev, &data); 189 } 190 191 static int ublk_ctrl_get_params(struct ublk_dev *dev, 192 struct ublk_params *params) 193 { 194 struct ublk_ctrl_cmd_data data = { 195 .cmd_op = UBLK_U_CMD_GET_PARAMS, 196 .flags = CTRL_CMD_HAS_BUF, 197 .addr = (__u64)params, 198 .len = sizeof(*params), 199 }; 200 201 params->len = sizeof(*params); 202 203 return __ublk_ctrl_cmd(dev, &data); 204 } 205 206 static int ublk_ctrl_get_features(struct ublk_dev *dev, 207 __u64 *features) 208 { 209 struct ublk_ctrl_cmd_data data = { 210 .cmd_op = UBLK_U_CMD_GET_FEATURES, 211 .flags = CTRL_CMD_HAS_BUF, 212 .addr = (__u64) (uintptr_t) features, 213 .len = sizeof(*features), 214 }; 215 216 return __ublk_ctrl_cmd(dev, &data); 217 } 218 219 static int ublk_ctrl_update_size(struct ublk_dev *dev, 220 __u64 nr_sects) 221 { 222 struct ublk_ctrl_cmd_data data = { 223 .cmd_op = UBLK_U_CMD_UPDATE_SIZE, 224 .flags = CTRL_CMD_HAS_DATA, 225 }; 226 227 data.data[0] = nr_sects; 228 return __ublk_ctrl_cmd(dev, &data); 229 } 230 231 static int ublk_ctrl_quiesce_dev(struct ublk_dev *dev, 232 unsigned int timeout_ms) 233 { 234 struct ublk_ctrl_cmd_data data = { 235 .cmd_op = UBLK_U_CMD_QUIESCE_DEV, 236 .flags = CTRL_CMD_HAS_DATA, 237 }; 238 239 data.data[0] = timeout_ms; 240 return __ublk_ctrl_cmd(dev, &data); 241 } 242 243 static const char *ublk_dev_state_desc(struct ublk_dev *dev) 244 { 245 switch (dev->dev_info.state) { 246 case UBLK_S_DEV_DEAD: 247 return "DEAD"; 248 case UBLK_S_DEV_LIVE: 249 return "LIVE"; 250 case UBLK_S_DEV_QUIESCED: 251 return "QUIESCED"; 252 default: 253 return "UNKNOWN"; 254 }; 255 } 256 257 static void ublk_print_cpu_set(const cpu_set_t *set, char *buf, unsigned len) 258 { 259 unsigned done = 0; 260 int i; 261 262 for (i = 0; i < CPU_SETSIZE; i++) { 263 if (CPU_ISSET(i, set)) 264 done += snprintf(&buf[done], len - done, "%d ", i); 265 } 266 } 267 268 static void ublk_adjust_affinity(cpu_set_t *set) 269 { 270 int j, updated = 0; 271 272 /* 273 * Just keep the 1st CPU now. 274 * 275 * In future, auto affinity selection can be tried. 276 */ 277 for (j = 0; j < CPU_SETSIZE; j++) { 278 if (CPU_ISSET(j, set)) { 279 if (!updated) { 280 updated = 1; 281 continue; 282 } 283 CPU_CLR(j, set); 284 } 285 } 286 } 287 288 /* Caller must free the allocated buffer */ 289 static int ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf) 290 { 291 struct ublk_ctrl_cmd_data data = { 292 .cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY, 293 .flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF, 294 }; 295 cpu_set_t *buf; 296 int i, ret; 297 298 buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues); 299 if (!buf) 300 return -ENOMEM; 301 302 for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) { 303 data.data[0] = i; 304 data.len = sizeof(cpu_set_t); 305 data.addr = (__u64)&buf[i]; 306 307 ret = __ublk_ctrl_cmd(ctrl_dev, &data); 308 if (ret < 0) { 309 free(buf); 310 return ret; 311 } 312 ublk_adjust_affinity(&buf[i]); 313 } 314 315 *ptr_buf = buf; 316 return 0; 317 } 318 319 static void ublk_ctrl_dump(struct ublk_dev *dev) 320 { 321 struct ublksrv_ctrl_dev_info *info = &dev->dev_info; 322 struct ublk_params p; 323 cpu_set_t *affinity; 324 int ret; 325 326 ret = ublk_ctrl_get_params(dev, &p); 327 if (ret < 0) { 328 ublk_err("failed to get params %d %s\n", ret, strerror(-ret)); 329 return; 330 } 331 332 ret = ublk_ctrl_get_affinity(dev, &affinity); 333 if (ret < 0) { 334 ublk_err("failed to get affinity %m\n"); 335 return; 336 } 337 338 ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n", 339 info->dev_id, info->nr_hw_queues, info->queue_depth, 340 1 << p.basic.logical_bs_shift, p.basic.dev_sectors); 341 ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n", 342 info->max_io_buf_bytes, info->ublksrv_pid, info->flags, 343 ublk_dev_state_desc(dev)); 344 345 if (affinity) { 346 char buf[512]; 347 int i; 348 349 for (i = 0; i < info->nr_hw_queues; i++) { 350 ublk_print_cpu_set(&affinity[i], buf, sizeof(buf)); 351 printf("\tqueue %u: affinity(%s)\n", 352 i, buf); 353 } 354 free(affinity); 355 } 356 357 fflush(stdout); 358 } 359 360 static void ublk_ctrl_deinit(struct ublk_dev *dev) 361 { 362 close(dev->ctrl_fd); 363 free(dev); 364 } 365 366 static struct ublk_dev *ublk_ctrl_init(void) 367 { 368 struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev)); 369 struct ublksrv_ctrl_dev_info *info = &dev->dev_info; 370 int ret; 371 372 dev->ctrl_fd = open(CTRL_DEV, O_RDWR); 373 if (dev->ctrl_fd < 0) { 374 free(dev); 375 return NULL; 376 } 377 378 info->max_io_buf_bytes = UBLK_IO_MAX_BYTES; 379 380 ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH, 381 UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128); 382 if (ret < 0) { 383 ublk_err("queue_init: %s\n", strerror(-ret)); 384 free(dev); 385 return NULL; 386 } 387 dev->nr_fds = 1; 388 389 return dev; 390 } 391 392 static int __ublk_queue_cmd_buf_sz(unsigned depth) 393 { 394 int size = depth * sizeof(struct ublksrv_io_desc); 395 unsigned int page_sz = getpagesize(); 396 397 return round_up(size, page_sz); 398 } 399 400 static int ublk_queue_max_cmd_buf_sz(void) 401 { 402 return __ublk_queue_cmd_buf_sz(UBLK_MAX_QUEUE_DEPTH); 403 } 404 405 static int ublk_queue_cmd_buf_sz(struct ublk_queue *q) 406 { 407 return __ublk_queue_cmd_buf_sz(q->q_depth); 408 } 409 410 static void ublk_queue_deinit(struct ublk_queue *q) 411 { 412 int i; 413 int nr_ios = q->q_depth; 414 415 if (q->io_cmd_buf) 416 munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q)); 417 418 for (i = 0; i < nr_ios; i++) 419 free(q->ios[i].buf_addr); 420 } 421 422 static void ublk_thread_deinit(struct ublk_thread *t) 423 { 424 io_uring_unregister_buffers(&t->ring); 425 426 io_uring_unregister_ring_fd(&t->ring); 427 428 if (t->ring.ring_fd > 0) { 429 io_uring_unregister_files(&t->ring); 430 close(t->ring.ring_fd); 431 t->ring.ring_fd = -1; 432 } 433 } 434 435 static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) 436 { 437 struct ublk_dev *dev = q->dev; 438 int depth = dev->dev_info.queue_depth; 439 int i; 440 int cmd_buf_size, io_buf_size; 441 unsigned long off; 442 443 q->tgt_ops = dev->tgt.ops; 444 q->flags = 0; 445 q->q_depth = depth; 446 q->flags = dev->dev_info.flags; 447 q->flags |= extra_flags; 448 449 cmd_buf_size = ublk_queue_cmd_buf_sz(q); 450 off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); 451 q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ, 452 MAP_SHARED | MAP_POPULATE, dev->fds[0], off); 453 if (q->io_cmd_buf == MAP_FAILED) { 454 ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n", 455 q->dev->dev_info.dev_id, q->q_id); 456 goto fail; 457 } 458 459 io_buf_size = dev->dev_info.max_io_buf_bytes; 460 for (i = 0; i < q->q_depth; i++) { 461 q->ios[i].buf_addr = NULL; 462 q->ios[i].flags = UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_FREE; 463 q->ios[i].tag = i; 464 465 if (ublk_queue_no_buf(q)) 466 continue; 467 468 if (posix_memalign((void **)&q->ios[i].buf_addr, 469 getpagesize(), io_buf_size)) { 470 ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n", 471 dev->dev_info.dev_id, q->q_id, i); 472 goto fail; 473 } 474 } 475 476 return 0; 477 fail: 478 ublk_queue_deinit(q); 479 ublk_err("ublk dev %d queue %d failed\n", 480 dev->dev_info.dev_id, q->q_id); 481 return -ENOMEM; 482 } 483 484 static int ublk_thread_init(struct ublk_thread *t) 485 { 486 struct ublk_dev *dev = t->dev; 487 int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth; 488 int ret; 489 490 ret = ublk_setup_ring(&t->ring, ring_depth, cq_depth, 491 IORING_SETUP_COOP_TASKRUN | 492 IORING_SETUP_SINGLE_ISSUER | 493 IORING_SETUP_DEFER_TASKRUN); 494 if (ret < 0) { 495 ublk_err("ublk dev %d thread %d setup io_uring failed %d\n", 496 dev->dev_info.dev_id, t->idx, ret); 497 goto fail; 498 } 499 500 if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) { 501 unsigned nr_ios = dev->dev_info.queue_depth * dev->dev_info.nr_hw_queues; 502 unsigned max_nr_ios_per_thread = nr_ios / dev->nthreads; 503 max_nr_ios_per_thread += !!(nr_ios % dev->nthreads); 504 ret = io_uring_register_buffers_sparse( 505 &t->ring, max_nr_ios_per_thread); 506 if (ret) { 507 ublk_err("ublk dev %d thread %d register spare buffers failed %d", 508 dev->dev_info.dev_id, t->idx, ret); 509 goto fail; 510 } 511 } 512 513 io_uring_register_ring_fd(&t->ring); 514 515 ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds); 516 if (ret) { 517 ublk_err("ublk dev %d thread %d register files failed %d\n", 518 t->dev->dev_info.dev_id, t->idx, ret); 519 goto fail; 520 } 521 522 return 0; 523 fail: 524 ublk_thread_deinit(t); 525 ublk_err("ublk dev %d thread %d init failed\n", 526 dev->dev_info.dev_id, t->idx); 527 return -ENOMEM; 528 } 529 530 #define WAIT_USEC 100000 531 #define MAX_WAIT_USEC (3 * 1000000) 532 static int ublk_dev_prep(const struct dev_ctx *ctx, struct ublk_dev *dev) 533 { 534 int dev_id = dev->dev_info.dev_id; 535 unsigned int wait_usec = 0; 536 int ret = 0, fd = -1; 537 char buf[64]; 538 539 snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id); 540 541 while (wait_usec < MAX_WAIT_USEC) { 542 fd = open(buf, O_RDWR); 543 if (fd >= 0) 544 break; 545 usleep(WAIT_USEC); 546 wait_usec += WAIT_USEC; 547 } 548 if (fd < 0) { 549 ublk_err("can't open %s %s\n", buf, strerror(errno)); 550 return -1; 551 } 552 553 dev->fds[0] = fd; 554 if (dev->tgt.ops->init_tgt) 555 ret = dev->tgt.ops->init_tgt(ctx, dev); 556 if (ret) 557 close(dev->fds[0]); 558 return ret; 559 } 560 561 static void ublk_dev_unprep(struct ublk_dev *dev) 562 { 563 if (dev->tgt.ops->deinit_tgt) 564 dev->tgt.ops->deinit_tgt(dev); 565 close(dev->fds[0]); 566 } 567 568 static void ublk_set_auto_buf_reg(const struct ublk_queue *q, 569 struct io_uring_sqe *sqe, 570 unsigned short tag) 571 { 572 struct ublk_auto_buf_reg buf = {}; 573 574 if (q->tgt_ops->buf_index) 575 buf.index = q->tgt_ops->buf_index(q, tag); 576 else 577 buf.index = q->ios[tag].buf_index; 578 579 if (ublk_queue_auto_zc_fallback(q)) 580 buf.flags = UBLK_AUTO_BUF_REG_FALLBACK; 581 582 sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf); 583 } 584 585 int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io) 586 { 587 struct ublk_queue *q = ublk_io_to_queue(io); 588 struct ublksrv_io_cmd *cmd; 589 struct io_uring_sqe *sqe[1]; 590 unsigned int cmd_op = 0; 591 __u64 user_data; 592 593 /* only freed io can be issued */ 594 if (!(io->flags & UBLKS_IO_FREE)) 595 return 0; 596 597 /* 598 * we issue because we need either fetching or committing or 599 * getting data 600 */ 601 if (!(io->flags & 602 (UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_NEED_GET_DATA))) 603 return 0; 604 605 if (io->flags & UBLKS_IO_NEED_GET_DATA) 606 cmd_op = UBLK_U_IO_NEED_GET_DATA; 607 else if (io->flags & UBLKS_IO_NEED_COMMIT_RQ_COMP) 608 cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ; 609 else if (io->flags & UBLKS_IO_NEED_FETCH_RQ) 610 cmd_op = UBLK_U_IO_FETCH_REQ; 611 612 if (io_uring_sq_space_left(&t->ring) < 1) 613 io_uring_submit(&t->ring); 614 615 ublk_io_alloc_sqes(t, sqe, 1); 616 if (!sqe[0]) { 617 ublk_err("%s: run out of sqe. thread %u, tag %d\n", 618 __func__, t->idx, io->tag); 619 return -1; 620 } 621 622 cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe[0]); 623 624 if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ) 625 cmd->result = io->result; 626 627 /* These fields should be written once, never change */ 628 ublk_set_sqe_cmd_op(sqe[0], cmd_op); 629 sqe[0]->fd = 0; /* dev->fds[0] */ 630 sqe[0]->opcode = IORING_OP_URING_CMD; 631 sqe[0]->flags = IOSQE_FIXED_FILE; 632 sqe[0]->rw_flags = 0; 633 cmd->tag = io->tag; 634 cmd->q_id = q->q_id; 635 if (!ublk_queue_no_buf(q)) 636 cmd->addr = (__u64) (uintptr_t) io->buf_addr; 637 else 638 cmd->addr = 0; 639 640 if (ublk_queue_use_auto_zc(q)) 641 ublk_set_auto_buf_reg(q, sqe[0], io->tag); 642 643 user_data = build_user_data(io->tag, _IOC_NR(cmd_op), 0, q->q_id, 0); 644 io_uring_sqe_set_data64(sqe[0], user_data); 645 646 io->flags = 0; 647 648 t->cmd_inflight += 1; 649 650 ublk_dbg(UBLK_DBG_IO_CMD, "%s: (thread %u qid %d tag %u cmd_op %u) iof %x stopping %d\n", 651 __func__, t->idx, q->q_id, io->tag, cmd_op, 652 io->flags, !!(t->state & UBLKS_T_STOPPING)); 653 return 1; 654 } 655 656 static void ublk_submit_fetch_commands(struct ublk_thread *t) 657 { 658 struct ublk_queue *q; 659 struct ublk_io *io; 660 int i = 0, j = 0; 661 662 if (t->dev->per_io_tasks) { 663 /* 664 * Lexicographically order all the (qid,tag) pairs, with 665 * qid taking priority (so (1,0) > (0,1)). Then make 666 * this thread the daemon for every Nth entry in this 667 * list (N is the number of threads), starting at this 668 * thread's index. This ensures that each queue is 669 * handled by as many ublk server threads as possible, 670 * so that load that is concentrated on one or a few 671 * queues can make use of all ublk server threads. 672 */ 673 const struct ublksrv_ctrl_dev_info *dinfo = &t->dev->dev_info; 674 int nr_ios = dinfo->nr_hw_queues * dinfo->queue_depth; 675 for (i = t->idx; i < nr_ios; i += t->dev->nthreads) { 676 int q_id = i / dinfo->queue_depth; 677 int tag = i % dinfo->queue_depth; 678 q = &t->dev->q[q_id]; 679 io = &q->ios[tag]; 680 io->buf_index = j++; 681 ublk_queue_io_cmd(t, io); 682 } 683 } else { 684 /* 685 * Service exclusively the queue whose q_id matches our 686 * thread index. 687 */ 688 struct ublk_queue *q = &t->dev->q[t->idx]; 689 for (i = 0; i < q->q_depth; i++) { 690 io = &q->ios[i]; 691 io->buf_index = i; 692 ublk_queue_io_cmd(t, io); 693 } 694 } 695 } 696 697 static int ublk_thread_is_idle(struct ublk_thread *t) 698 { 699 return !io_uring_sq_ready(&t->ring) && !t->io_inflight; 700 } 701 702 static int ublk_thread_is_done(struct ublk_thread *t) 703 { 704 return (t->state & UBLKS_T_STOPPING) && ublk_thread_is_idle(t); 705 } 706 707 static inline void ublksrv_handle_tgt_cqe(struct ublk_thread *t, 708 struct ublk_queue *q, 709 struct io_uring_cqe *cqe) 710 { 711 if (cqe->res < 0 && cqe->res != -EAGAIN) 712 ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n", 713 __func__, cqe->res, q->q_id, 714 user_data_to_tag(cqe->user_data), 715 user_data_to_op(cqe->user_data)); 716 717 if (q->tgt_ops->tgt_io_done) 718 q->tgt_ops->tgt_io_done(t, q, cqe); 719 } 720 721 static void ublk_handle_uring_cmd(struct ublk_thread *t, 722 struct ublk_queue *q, 723 const struct io_uring_cqe *cqe) 724 { 725 int fetch = (cqe->res != UBLK_IO_RES_ABORT) && 726 !(t->state & UBLKS_T_STOPPING); 727 unsigned tag = user_data_to_tag(cqe->user_data); 728 struct ublk_io *io = &q->ios[tag]; 729 730 if (!fetch) { 731 t->state |= UBLKS_T_STOPPING; 732 io->flags &= ~UBLKS_IO_NEED_FETCH_RQ; 733 } 734 735 if (cqe->res == UBLK_IO_RES_OK) { 736 assert(tag < q->q_depth); 737 if (q->tgt_ops->queue_io) 738 q->tgt_ops->queue_io(t, q, tag); 739 } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { 740 io->flags |= UBLKS_IO_NEED_GET_DATA | UBLKS_IO_FREE; 741 ublk_queue_io_cmd(t, io); 742 } else { 743 /* 744 * COMMIT_REQ will be completed immediately since no fetching 745 * piggyback is required. 746 * 747 * Marking IO_FREE only, then this io won't be issued since 748 * we only issue io with (UBLKS_IO_FREE | UBLKSRV_NEED_*) 749 * 750 * */ 751 io->flags = UBLKS_IO_FREE; 752 } 753 } 754 755 static void ublk_handle_cqe(struct ublk_thread *t, 756 struct io_uring_cqe *cqe, void *data) 757 { 758 struct ublk_dev *dev = t->dev; 759 unsigned q_id = user_data_to_q_id(cqe->user_data); 760 struct ublk_queue *q = &dev->q[q_id]; 761 unsigned cmd_op = user_data_to_op(cqe->user_data); 762 763 if (cqe->res < 0 && cqe->res != -ENODEV) 764 ublk_err("%s: res %d userdata %llx queue state %x\n", __func__, 765 cqe->res, cqe->user_data, q->flags); 766 767 ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d/%d) stopping %d\n", 768 __func__, cqe->res, q->q_id, user_data_to_tag(cqe->user_data), 769 cmd_op, is_target_io(cqe->user_data), 770 user_data_to_tgt_data(cqe->user_data), 771 (t->state & UBLKS_T_STOPPING)); 772 773 /* Don't retrieve io in case of target io */ 774 if (is_target_io(cqe->user_data)) { 775 ublksrv_handle_tgt_cqe(t, q, cqe); 776 return; 777 } 778 779 t->cmd_inflight--; 780 781 ublk_handle_uring_cmd(t, q, cqe); 782 } 783 784 static int ublk_reap_events_uring(struct ublk_thread *t) 785 { 786 struct io_uring_cqe *cqe; 787 unsigned head; 788 int count = 0; 789 790 io_uring_for_each_cqe(&t->ring, head, cqe) { 791 ublk_handle_cqe(t, cqe, NULL); 792 count += 1; 793 } 794 io_uring_cq_advance(&t->ring, count); 795 796 return count; 797 } 798 799 static int ublk_process_io(struct ublk_thread *t) 800 { 801 int ret, reapped; 802 803 ublk_dbg(UBLK_DBG_THREAD, "dev%d-t%u: to_submit %d inflight cmd %u stopping %d\n", 804 t->dev->dev_info.dev_id, 805 t->idx, io_uring_sq_ready(&t->ring), 806 t->cmd_inflight, 807 (t->state & UBLKS_T_STOPPING)); 808 809 if (ublk_thread_is_done(t)) 810 return -ENODEV; 811 812 ret = io_uring_submit_and_wait(&t->ring, 1); 813 reapped = ublk_reap_events_uring(t); 814 815 ublk_dbg(UBLK_DBG_THREAD, "submit result %d, reapped %d stop %d idle %d\n", 816 ret, reapped, (t->state & UBLKS_T_STOPPING), 817 (t->state & UBLKS_T_IDLE)); 818 819 return reapped; 820 } 821 822 static void ublk_thread_set_sched_affinity(const struct ublk_thread *t, 823 cpu_set_t *cpuset) 824 { 825 if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0) 826 ublk_err("ublk dev %u thread %u set affinity failed", 827 t->dev->dev_info.dev_id, t->idx); 828 } 829 830 struct ublk_thread_info { 831 struct ublk_dev *dev; 832 unsigned idx; 833 sem_t *ready; 834 cpu_set_t *affinity; 835 }; 836 837 static void *ublk_io_handler_fn(void *data) 838 { 839 struct ublk_thread_info *info = data; 840 struct ublk_thread *t = &info->dev->threads[info->idx]; 841 int dev_id = info->dev->dev_info.dev_id; 842 int ret; 843 844 t->dev = info->dev; 845 t->idx = info->idx; 846 847 ret = ublk_thread_init(t); 848 if (ret) { 849 ublk_err("ublk dev %d thread %u init failed\n", 850 dev_id, t->idx); 851 return NULL; 852 } 853 /* IO perf is sensitive with queue pthread affinity on NUMA machine*/ 854 if (info->affinity) 855 ublk_thread_set_sched_affinity(t, info->affinity); 856 sem_post(info->ready); 857 858 ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %u started\n", 859 gettid(), dev_id, t->idx); 860 861 /* submit all io commands to ublk driver */ 862 ublk_submit_fetch_commands(t); 863 do { 864 if (ublk_process_io(t) < 0) 865 break; 866 } while (1); 867 868 ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %d exiting\n", 869 gettid(), dev_id, t->idx); 870 ublk_thread_deinit(t); 871 return NULL; 872 } 873 874 static void ublk_set_parameters(struct ublk_dev *dev) 875 { 876 int ret; 877 878 ret = ublk_ctrl_set_params(dev, &dev->tgt.params); 879 if (ret) 880 ublk_err("dev %d set basic parameter failed %d\n", 881 dev->dev_info.dev_id, ret); 882 } 883 884 static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev, int dev_id) 885 { 886 uint64_t id; 887 int evtfd = ctx->_evtfd; 888 889 if (evtfd < 0) 890 return -EBADF; 891 892 if (dev_id >= 0) 893 id = dev_id + 1; 894 else 895 id = ERROR_EVTFD_DEVID; 896 897 if (dev && ctx->shadow_dev) 898 memcpy(&ctx->shadow_dev->q, &dev->q, sizeof(dev->q)); 899 900 if (write(evtfd, &id, sizeof(id)) != sizeof(id)) 901 return -EINVAL; 902 903 close(evtfd); 904 shmdt(ctx->shadow_dev); 905 906 return 0; 907 } 908 909 910 static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) 911 { 912 const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info; 913 struct ublk_thread_info *tinfo; 914 unsigned long long extra_flags = 0; 915 cpu_set_t *affinity_buf; 916 void *thread_ret; 917 sem_t ready; 918 int ret, i; 919 920 ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__); 921 922 tinfo = calloc(sizeof(struct ublk_thread_info), dev->nthreads); 923 if (!tinfo) 924 return -ENOMEM; 925 926 sem_init(&ready, 0, 0); 927 ret = ublk_dev_prep(ctx, dev); 928 if (ret) 929 return ret; 930 931 ret = ublk_ctrl_get_affinity(dev, &affinity_buf); 932 if (ret) 933 return ret; 934 935 if (ctx->auto_zc_fallback) 936 extra_flags = UBLKS_Q_AUTO_BUF_REG_FALLBACK; 937 938 for (i = 0; i < dinfo->nr_hw_queues; i++) { 939 dev->q[i].dev = dev; 940 dev->q[i].q_id = i; 941 942 ret = ublk_queue_init(&dev->q[i], extra_flags); 943 if (ret) { 944 ublk_err("ublk dev %d queue %d init queue failed\n", 945 dinfo->dev_id, i); 946 goto fail; 947 } 948 } 949 950 for (i = 0; i < dev->nthreads; i++) { 951 tinfo[i].dev = dev; 952 tinfo[i].idx = i; 953 tinfo[i].ready = &ready; 954 955 /* 956 * If threads are not tied 1:1 to queues, setting thread 957 * affinity based on queue affinity makes little sense. 958 * However, thread CPU affinity has significant impact 959 * on performance, so to compare fairly, we'll still set 960 * thread CPU affinity based on queue affinity where 961 * possible. 962 */ 963 if (dev->nthreads == dinfo->nr_hw_queues) 964 tinfo[i].affinity = &affinity_buf[i]; 965 pthread_create(&dev->threads[i].thread, NULL, 966 ublk_io_handler_fn, 967 &tinfo[i]); 968 } 969 970 for (i = 0; i < dev->nthreads; i++) 971 sem_wait(&ready); 972 free(tinfo); 973 free(affinity_buf); 974 975 /* everything is fine now, start us */ 976 if (ctx->recovery) 977 ret = ublk_ctrl_end_user_recovery(dev, getpid()); 978 else { 979 ublk_set_parameters(dev); 980 ret = ublk_ctrl_start_dev(dev, getpid()); 981 } 982 if (ret < 0) { 983 ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret); 984 goto fail; 985 } 986 987 ublk_ctrl_get_info(dev); 988 if (ctx->fg) 989 ublk_ctrl_dump(dev); 990 else 991 ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id); 992 993 /* wait until we are terminated */ 994 for (i = 0; i < dev->nthreads; i++) 995 pthread_join(dev->threads[i].thread, &thread_ret); 996 fail: 997 for (i = 0; i < dinfo->nr_hw_queues; i++) 998 ublk_queue_deinit(&dev->q[i]); 999 ublk_dev_unprep(dev); 1000 ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__); 1001 1002 return ret; 1003 } 1004 1005 static int wait_ublk_dev(const char *path, int evt_mask, unsigned timeout) 1006 { 1007 #define EV_SIZE (sizeof(struct inotify_event)) 1008 #define EV_BUF_LEN (128 * (EV_SIZE + 16)) 1009 struct pollfd pfd; 1010 int fd, wd; 1011 int ret = -EINVAL; 1012 const char *dev_name = basename(path); 1013 1014 fd = inotify_init(); 1015 if (fd < 0) { 1016 ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__); 1017 return fd; 1018 } 1019 1020 wd = inotify_add_watch(fd, "/dev", evt_mask); 1021 if (wd == -1) { 1022 ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__); 1023 goto fail; 1024 } 1025 1026 pfd.fd = fd; 1027 pfd.events = POLL_IN; 1028 while (1) { 1029 int i = 0; 1030 char buffer[EV_BUF_LEN]; 1031 ret = poll(&pfd, 1, 1000 * timeout); 1032 1033 if (ret == -1) { 1034 ublk_err("%s: poll inotify failed: %d\n", __func__, ret); 1035 goto rm_watch; 1036 } else if (ret == 0) { 1037 ublk_err("%s: poll inotify timeout\n", __func__); 1038 ret = -ETIMEDOUT; 1039 goto rm_watch; 1040 } 1041 1042 ret = read(fd, buffer, EV_BUF_LEN); 1043 if (ret < 0) { 1044 ublk_err("%s: read inotify fd failed\n", __func__); 1045 goto rm_watch; 1046 } 1047 1048 while (i < ret) { 1049 struct inotify_event *event = (struct inotify_event *)&buffer[i]; 1050 1051 ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n", 1052 __func__, event->mask, event->name); 1053 if (event->mask & evt_mask) { 1054 if (!strcmp(event->name, dev_name)) { 1055 ret = 0; 1056 goto rm_watch; 1057 } 1058 } 1059 i += EV_SIZE + event->len; 1060 } 1061 } 1062 rm_watch: 1063 inotify_rm_watch(fd, wd); 1064 fail: 1065 close(fd); 1066 return ret; 1067 } 1068 1069 static int ublk_stop_io_daemon(const struct ublk_dev *dev) 1070 { 1071 int daemon_pid = dev->dev_info.ublksrv_pid; 1072 int dev_id = dev->dev_info.dev_id; 1073 char ublkc[64]; 1074 int ret = 0; 1075 1076 if (daemon_pid < 0) 1077 return 0; 1078 1079 /* daemon may be dead already */ 1080 if (kill(daemon_pid, 0) < 0) 1081 goto wait; 1082 1083 snprintf(ublkc, sizeof(ublkc), "/dev/%s%d", "ublkc", dev_id); 1084 1085 /* ublk char device may be gone already */ 1086 if (access(ublkc, F_OK) != 0) 1087 goto wait; 1088 1089 /* Wait until ublk char device is closed, when the daemon is shutdown */ 1090 ret = wait_ublk_dev(ublkc, IN_CLOSE, 10); 1091 /* double check and since it may be closed before starting inotify */ 1092 if (ret == -ETIMEDOUT) 1093 ret = kill(daemon_pid, 0) < 0; 1094 wait: 1095 waitpid(daemon_pid, NULL, 0); 1096 ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n", 1097 __func__, daemon_pid, dev_id, ret); 1098 1099 return ret; 1100 } 1101 1102 static int __cmd_dev_add(const struct dev_ctx *ctx) 1103 { 1104 unsigned nthreads = ctx->nthreads; 1105 unsigned nr_queues = ctx->nr_hw_queues; 1106 const char *tgt_type = ctx->tgt_type; 1107 unsigned depth = ctx->queue_depth; 1108 __u64 features; 1109 const struct ublk_tgt_ops *ops; 1110 struct ublksrv_ctrl_dev_info *info; 1111 struct ublk_dev *dev = NULL; 1112 int dev_id = ctx->dev_id; 1113 int ret, i; 1114 1115 ops = ublk_find_tgt(tgt_type); 1116 if (!ops) { 1117 ublk_err("%s: no such tgt type, type %s\n", 1118 __func__, tgt_type); 1119 ret = -ENODEV; 1120 goto fail; 1121 } 1122 1123 if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) { 1124 ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n", 1125 __func__, nr_queues, depth); 1126 ret = -EINVAL; 1127 goto fail; 1128 } 1129 1130 /* default to 1:1 threads:queues if nthreads is unspecified */ 1131 if (!nthreads) 1132 nthreads = nr_queues; 1133 1134 if (nthreads > UBLK_MAX_THREADS) { 1135 ublk_err("%s: %u is too many threads (max %u)\n", 1136 __func__, nthreads, UBLK_MAX_THREADS); 1137 ret = -EINVAL; 1138 goto fail; 1139 } 1140 1141 if (nthreads != nr_queues && !ctx->per_io_tasks) { 1142 ublk_err("%s: threads %u must be same as queues %u if " 1143 "not using per_io_tasks\n", 1144 __func__, nthreads, nr_queues); 1145 ret = -EINVAL; 1146 goto fail; 1147 } 1148 1149 dev = ublk_ctrl_init(); 1150 if (!dev) { 1151 ublk_err("%s: can't alloc dev id %d, type %s\n", 1152 __func__, dev_id, tgt_type); 1153 ret = -ENOMEM; 1154 goto fail; 1155 } 1156 1157 /* kernel doesn't support get_features */ 1158 ret = ublk_ctrl_get_features(dev, &features); 1159 if (ret < 0) { 1160 ret = -EINVAL; 1161 goto fail; 1162 } 1163 1164 if (!(features & UBLK_F_CMD_IOCTL_ENCODE)) { 1165 ret = -ENOTSUP; 1166 goto fail; 1167 } 1168 1169 info = &dev->dev_info; 1170 info->dev_id = ctx->dev_id; 1171 info->nr_hw_queues = nr_queues; 1172 info->queue_depth = depth; 1173 info->flags = ctx->flags; 1174 if ((features & UBLK_F_QUIESCE) && 1175 (info->flags & UBLK_F_USER_RECOVERY)) 1176 info->flags |= UBLK_F_QUIESCE; 1177 dev->nthreads = nthreads; 1178 dev->per_io_tasks = ctx->per_io_tasks; 1179 dev->tgt.ops = ops; 1180 dev->tgt.sq_depth = depth; 1181 dev->tgt.cq_depth = depth; 1182 1183 for (i = 0; i < MAX_BACK_FILES; i++) { 1184 if (ctx->files[i]) { 1185 strcpy(dev->tgt.backing_file[i], ctx->files[i]); 1186 dev->tgt.nr_backing_files++; 1187 } 1188 } 1189 1190 if (ctx->recovery) 1191 ret = ublk_ctrl_start_user_recovery(dev); 1192 else 1193 ret = ublk_ctrl_add_dev(dev); 1194 if (ret < 0) { 1195 ublk_err("%s: can't add dev id %d, type %s ret %d\n", 1196 __func__, dev_id, tgt_type, ret); 1197 goto fail; 1198 } 1199 1200 ret = ublk_start_daemon(ctx, dev); 1201 ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\b", ret); 1202 if (ret < 0) 1203 ublk_ctrl_del_dev(dev); 1204 1205 fail: 1206 if (ret < 0) 1207 ublk_send_dev_event(ctx, dev, -1); 1208 if (dev) 1209 ublk_ctrl_deinit(dev); 1210 return ret; 1211 } 1212 1213 static int __cmd_dev_list(struct dev_ctx *ctx); 1214 1215 static int cmd_dev_add(struct dev_ctx *ctx) 1216 { 1217 int res; 1218 1219 if (ctx->fg) 1220 goto run; 1221 1222 ctx->_shmid = shmget(IPC_PRIVATE, sizeof(struct ublk_dev), IPC_CREAT | 0666); 1223 if (ctx->_shmid < 0) { 1224 ublk_err("%s: failed to shmget %s\n", __func__, strerror(errno)); 1225 exit(-1); 1226 } 1227 ctx->shadow_dev = (struct ublk_dev *)shmat(ctx->_shmid, NULL, 0); 1228 if (ctx->shadow_dev == (struct ublk_dev *)-1) { 1229 ublk_err("%s: failed to shmat %s\n", __func__, strerror(errno)); 1230 exit(-1); 1231 } 1232 ctx->_evtfd = eventfd(0, 0); 1233 if (ctx->_evtfd < 0) { 1234 ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno)); 1235 exit(-1); 1236 } 1237 1238 res = fork(); 1239 if (res == 0) { 1240 int res2; 1241 1242 setsid(); 1243 res2 = fork(); 1244 if (res2 == 0) { 1245 /* prepare for detaching */ 1246 close(STDIN_FILENO); 1247 close(STDOUT_FILENO); 1248 close(STDERR_FILENO); 1249 run: 1250 res = __cmd_dev_add(ctx); 1251 return res; 1252 } else { 1253 /* detached from the foreground task */ 1254 exit(EXIT_SUCCESS); 1255 } 1256 } else if (res > 0) { 1257 uint64_t id; 1258 int exit_code = EXIT_FAILURE; 1259 1260 res = read(ctx->_evtfd, &id, sizeof(id)); 1261 close(ctx->_evtfd); 1262 if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) { 1263 ctx->dev_id = id - 1; 1264 if (__cmd_dev_list(ctx) >= 0) 1265 exit_code = EXIT_SUCCESS; 1266 } 1267 shmdt(ctx->shadow_dev); 1268 shmctl(ctx->_shmid, IPC_RMID, NULL); 1269 /* wait for child and detach from it */ 1270 wait(NULL); 1271 if (exit_code == EXIT_FAILURE) 1272 ublk_err("%s: command failed\n", __func__); 1273 exit(exit_code); 1274 } else { 1275 exit(EXIT_FAILURE); 1276 } 1277 } 1278 1279 static int __cmd_dev_del(struct dev_ctx *ctx) 1280 { 1281 int number = ctx->dev_id; 1282 struct ublk_dev *dev; 1283 int ret; 1284 1285 dev = ublk_ctrl_init(); 1286 dev->dev_info.dev_id = number; 1287 1288 ret = ublk_ctrl_get_info(dev); 1289 if (ret < 0) 1290 goto fail; 1291 1292 ret = ublk_ctrl_stop_dev(dev); 1293 if (ret < 0) 1294 ublk_err("%s: stop dev %d failed ret %d\n", __func__, number, ret); 1295 1296 ret = ublk_stop_io_daemon(dev); 1297 if (ret < 0) 1298 ublk_err("%s: stop daemon id %d dev %d, ret %d\n", 1299 __func__, dev->dev_info.ublksrv_pid, number, ret); 1300 ublk_ctrl_del_dev(dev); 1301 fail: 1302 ublk_ctrl_deinit(dev); 1303 1304 return (ret >= 0) ? 0 : ret; 1305 } 1306 1307 static int cmd_dev_del(struct dev_ctx *ctx) 1308 { 1309 int i; 1310 1311 if (ctx->dev_id >= 0 || !ctx->all) 1312 return __cmd_dev_del(ctx); 1313 1314 for (i = 0; i < 255; i++) { 1315 ctx->dev_id = i; 1316 __cmd_dev_del(ctx); 1317 } 1318 return 0; 1319 } 1320 1321 static int __cmd_dev_list(struct dev_ctx *ctx) 1322 { 1323 struct ublk_dev *dev = ublk_ctrl_init(); 1324 int ret; 1325 1326 if (!dev) 1327 return -ENODEV; 1328 1329 dev->dev_info.dev_id = ctx->dev_id; 1330 1331 ret = ublk_ctrl_get_info(dev); 1332 if (ret < 0) { 1333 if (ctx->logging) 1334 ublk_err("%s: can't get dev info from %d: %d\n", 1335 __func__, ctx->dev_id, ret); 1336 } else { 1337 if (ctx->shadow_dev) 1338 memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q)); 1339 1340 ublk_ctrl_dump(dev); 1341 } 1342 1343 ublk_ctrl_deinit(dev); 1344 1345 return ret; 1346 } 1347 1348 static int cmd_dev_list(struct dev_ctx *ctx) 1349 { 1350 int i; 1351 1352 if (ctx->dev_id >= 0 || !ctx->all) 1353 return __cmd_dev_list(ctx); 1354 1355 ctx->logging = false; 1356 for (i = 0; i < 255; i++) { 1357 ctx->dev_id = i; 1358 __cmd_dev_list(ctx); 1359 } 1360 return 0; 1361 } 1362 1363 static int cmd_dev_get_features(void) 1364 { 1365 #define const_ilog2(x) (63 - __builtin_clzll(x)) 1366 static const char *feat_map[] = { 1367 [const_ilog2(UBLK_F_SUPPORT_ZERO_COPY)] = "ZERO_COPY", 1368 [const_ilog2(UBLK_F_URING_CMD_COMP_IN_TASK)] = "COMP_IN_TASK", 1369 [const_ilog2(UBLK_F_NEED_GET_DATA)] = "GET_DATA", 1370 [const_ilog2(UBLK_F_USER_RECOVERY)] = "USER_RECOVERY", 1371 [const_ilog2(UBLK_F_USER_RECOVERY_REISSUE)] = "RECOVERY_REISSUE", 1372 [const_ilog2(UBLK_F_UNPRIVILEGED_DEV)] = "UNPRIVILEGED_DEV", 1373 [const_ilog2(UBLK_F_CMD_IOCTL_ENCODE)] = "CMD_IOCTL_ENCODE", 1374 [const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY", 1375 [const_ilog2(UBLK_F_ZONED)] = "ZONED", 1376 [const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO", 1377 [const_ilog2(UBLK_F_UPDATE_SIZE)] = "UPDATE_SIZE", 1378 [const_ilog2(UBLK_F_AUTO_BUF_REG)] = "AUTO_BUF_REG", 1379 [const_ilog2(UBLK_F_QUIESCE)] = "QUIESCE", 1380 [const_ilog2(UBLK_F_PER_IO_DAEMON)] = "PER_IO_DAEMON", 1381 }; 1382 struct ublk_dev *dev; 1383 __u64 features = 0; 1384 int ret; 1385 1386 dev = ublk_ctrl_init(); 1387 if (!dev) { 1388 fprintf(stderr, "ublksrv_ctrl_init failed id\n"); 1389 return -EOPNOTSUPP; 1390 } 1391 1392 ret = ublk_ctrl_get_features(dev, &features); 1393 if (!ret) { 1394 int i; 1395 1396 printf("ublk_drv features: 0x%llx\n", features); 1397 1398 for (i = 0; i < sizeof(features) * 8; i++) { 1399 const char *feat; 1400 1401 if (!((1ULL << i) & features)) 1402 continue; 1403 if (i < sizeof(feat_map) / sizeof(feat_map[0])) 1404 feat = feat_map[i]; 1405 else 1406 feat = "unknown"; 1407 printf("\t%-20s: 0x%llx\n", feat, 1ULL << i); 1408 } 1409 } 1410 1411 return ret; 1412 } 1413 1414 static int cmd_dev_update_size(struct dev_ctx *ctx) 1415 { 1416 struct ublk_dev *dev = ublk_ctrl_init(); 1417 struct ublk_params p; 1418 int ret = -EINVAL; 1419 1420 if (!dev) 1421 return -ENODEV; 1422 1423 if (ctx->dev_id < 0) { 1424 fprintf(stderr, "device id isn't provided\n"); 1425 goto out; 1426 } 1427 1428 dev->dev_info.dev_id = ctx->dev_id; 1429 ret = ublk_ctrl_get_params(dev, &p); 1430 if (ret < 0) { 1431 ublk_err("failed to get params %d %s\n", ret, strerror(-ret)); 1432 goto out; 1433 } 1434 1435 if (ctx->size & ((1 << p.basic.logical_bs_shift) - 1)) { 1436 ublk_err("size isn't aligned with logical block size\n"); 1437 ret = -EINVAL; 1438 goto out; 1439 } 1440 1441 ret = ublk_ctrl_update_size(dev, ctx->size >> 9); 1442 out: 1443 ublk_ctrl_deinit(dev); 1444 return ret; 1445 } 1446 1447 static int cmd_dev_quiesce(struct dev_ctx *ctx) 1448 { 1449 struct ublk_dev *dev = ublk_ctrl_init(); 1450 int ret = -EINVAL; 1451 1452 if (!dev) 1453 return -ENODEV; 1454 1455 if (ctx->dev_id < 0) { 1456 fprintf(stderr, "device id isn't provided for quiesce\n"); 1457 goto out; 1458 } 1459 dev->dev_info.dev_id = ctx->dev_id; 1460 ret = ublk_ctrl_quiesce_dev(dev, 10000); 1461 1462 out: 1463 ublk_ctrl_deinit(dev); 1464 return ret; 1465 } 1466 1467 static void __cmd_create_help(char *exe, bool recovery) 1468 { 1469 int i; 1470 1471 printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", 1472 exe, recovery ? "recover" : "add"); 1473 printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n"); 1474 printf("\t[-e 0|1 ] [-i 0|1]\n"); 1475 printf("\t[--nthreads threads] [--per_io_tasks]\n"); 1476 printf("\t[target options] [backfile1] [backfile2] ...\n"); 1477 printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); 1478 printf("\tdefault: nthreads=nr_queues"); 1479 1480 for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) { 1481 const struct ublk_tgt_ops *ops = tgt_ops_list[i]; 1482 1483 if (ops->usage) 1484 ops->usage(ops); 1485 } 1486 } 1487 1488 static void cmd_add_help(char *exe) 1489 { 1490 __cmd_create_help(exe, false); 1491 printf("\n"); 1492 } 1493 1494 static void cmd_recover_help(char *exe) 1495 { 1496 __cmd_create_help(exe, true); 1497 printf("\tPlease provide exact command line for creating this device with real dev_id\n"); 1498 printf("\n"); 1499 } 1500 1501 static int cmd_dev_help(char *exe) 1502 { 1503 cmd_add_help(exe); 1504 cmd_recover_help(exe); 1505 1506 printf("%s del [-n dev_id] -a \n", exe); 1507 printf("\t -a delete all devices -n delete specified device\n\n"); 1508 printf("%s list [-n dev_id] -a \n", exe); 1509 printf("\t -a list all devices, -n list specified device, default -a \n\n"); 1510 printf("%s features\n", exe); 1511 printf("%s update_size -n dev_id -s|--size size_in_bytes \n", exe); 1512 printf("%s quiesce -n dev_id\n", exe); 1513 return 0; 1514 } 1515 1516 int main(int argc, char *argv[]) 1517 { 1518 static const struct option longopts[] = { 1519 { "all", 0, NULL, 'a' }, 1520 { "type", 1, NULL, 't' }, 1521 { "number", 1, NULL, 'n' }, 1522 { "queues", 1, NULL, 'q' }, 1523 { "depth", 1, NULL, 'd' }, 1524 { "debug_mask", 1, NULL, 0 }, 1525 { "quiet", 0, NULL, 0 }, 1526 { "zero_copy", 0, NULL, 'z' }, 1527 { "foreground", 0, NULL, 0 }, 1528 { "recovery", 1, NULL, 'r' }, 1529 { "recovery_fail_io", 1, NULL, 'e'}, 1530 { "recovery_reissue", 1, NULL, 'i'}, 1531 { "get_data", 1, NULL, 'g'}, 1532 { "auto_zc", 0, NULL, 0 }, 1533 { "auto_zc_fallback", 0, NULL, 0 }, 1534 { "size", 1, NULL, 's'}, 1535 { "nthreads", 1, NULL, 0 }, 1536 { "per_io_tasks", 0, NULL, 0 }, 1537 { 0, 0, 0, 0 } 1538 }; 1539 const struct ublk_tgt_ops *ops = NULL; 1540 int option_idx, opt; 1541 const char *cmd = argv[1]; 1542 struct dev_ctx ctx = { 1543 .queue_depth = 128, 1544 .nr_hw_queues = 2, 1545 .dev_id = -1, 1546 .tgt_type = "unknown", 1547 }; 1548 int ret = -EINVAL, i; 1549 int tgt_argc = 1; 1550 char *tgt_argv[MAX_NR_TGT_ARG] = { NULL }; 1551 int value; 1552 1553 if (argc == 1) 1554 return ret; 1555 1556 opterr = 0; 1557 optind = 2; 1558 while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:s:gaz", 1559 longopts, &option_idx)) != -1) { 1560 switch (opt) { 1561 case 'a': 1562 ctx.all = 1; 1563 break; 1564 case 'n': 1565 ctx.dev_id = strtol(optarg, NULL, 10); 1566 break; 1567 case 't': 1568 if (strlen(optarg) < sizeof(ctx.tgt_type)) 1569 strcpy(ctx.tgt_type, optarg); 1570 break; 1571 case 'q': 1572 ctx.nr_hw_queues = strtol(optarg, NULL, 10); 1573 break; 1574 case 'd': 1575 ctx.queue_depth = strtol(optarg, NULL, 10); 1576 break; 1577 case 'z': 1578 ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY; 1579 break; 1580 case 'r': 1581 value = strtol(optarg, NULL, 10); 1582 if (value) 1583 ctx.flags |= UBLK_F_USER_RECOVERY; 1584 break; 1585 case 'e': 1586 value = strtol(optarg, NULL, 10); 1587 if (value) 1588 ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO; 1589 break; 1590 case 'i': 1591 value = strtol(optarg, NULL, 10); 1592 if (value) 1593 ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE; 1594 break; 1595 case 'g': 1596 ctx.flags |= UBLK_F_NEED_GET_DATA; 1597 break; 1598 case 's': 1599 ctx.size = strtoull(optarg, NULL, 10); 1600 break; 1601 case 0: 1602 if (!strcmp(longopts[option_idx].name, "debug_mask")) 1603 ublk_dbg_mask = strtol(optarg, NULL, 16); 1604 if (!strcmp(longopts[option_idx].name, "quiet")) 1605 ublk_dbg_mask = 0; 1606 if (!strcmp(longopts[option_idx].name, "foreground")) 1607 ctx.fg = 1; 1608 if (!strcmp(longopts[option_idx].name, "auto_zc")) 1609 ctx.flags |= UBLK_F_AUTO_BUF_REG; 1610 if (!strcmp(longopts[option_idx].name, "auto_zc_fallback")) 1611 ctx.auto_zc_fallback = 1; 1612 if (!strcmp(longopts[option_idx].name, "nthreads")) 1613 ctx.nthreads = strtol(optarg, NULL, 10); 1614 if (!strcmp(longopts[option_idx].name, "per_io_tasks")) 1615 ctx.per_io_tasks = 1; 1616 break; 1617 case '?': 1618 /* 1619 * target requires every option must have argument 1620 */ 1621 if (argv[optind][0] == '-' || argv[optind - 1][0] != '-') { 1622 fprintf(stderr, "every target option requires argument: %s %s\n", 1623 argv[optind - 1], argv[optind]); 1624 exit(EXIT_FAILURE); 1625 } 1626 1627 if (tgt_argc < (MAX_NR_TGT_ARG - 1) / 2) { 1628 tgt_argv[tgt_argc++] = argv[optind - 1]; 1629 tgt_argv[tgt_argc++] = argv[optind]; 1630 } else { 1631 fprintf(stderr, "too many target options\n"); 1632 exit(EXIT_FAILURE); 1633 } 1634 optind += 1; 1635 break; 1636 } 1637 } 1638 1639 /* auto_zc_fallback depends on F_AUTO_BUF_REG & F_SUPPORT_ZERO_COPY */ 1640 if (ctx.auto_zc_fallback && 1641 !((ctx.flags & UBLK_F_AUTO_BUF_REG) && 1642 (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY))) { 1643 ublk_err("%s: auto_zc_fallback is set but neither " 1644 "F_AUTO_BUF_REG nor F_SUPPORT_ZERO_COPY is enabled\n", 1645 __func__); 1646 return -EINVAL; 1647 } 1648 1649 i = optind; 1650 while (i < argc && ctx.nr_files < MAX_BACK_FILES) { 1651 ctx.files[ctx.nr_files++] = argv[i++]; 1652 } 1653 1654 ops = ublk_find_tgt(ctx.tgt_type); 1655 if (ops && ops->parse_cmd_line) { 1656 optind = 0; 1657 1658 tgt_argv[0] = ctx.tgt_type; 1659 ops->parse_cmd_line(&ctx, tgt_argc, tgt_argv); 1660 } 1661 1662 if (!strcmp(cmd, "add")) 1663 ret = cmd_dev_add(&ctx); 1664 else if (!strcmp(cmd, "recover")) { 1665 if (ctx.dev_id < 0) { 1666 fprintf(stderr, "device id isn't provided for recovering\n"); 1667 ret = -EINVAL; 1668 } else { 1669 ctx.recovery = 1; 1670 ret = cmd_dev_add(&ctx); 1671 } 1672 } else if (!strcmp(cmd, "del")) 1673 ret = cmd_dev_del(&ctx); 1674 else if (!strcmp(cmd, "list")) { 1675 ctx.all = 1; 1676 ret = cmd_dev_list(&ctx); 1677 } else if (!strcmp(cmd, "help")) 1678 ret = cmd_dev_help(argv[0]); 1679 else if (!strcmp(cmd, "features")) 1680 ret = cmd_dev_get_features(); 1681 else if (!strcmp(cmd, "update_size")) 1682 ret = cmd_dev_update_size(&ctx); 1683 else if (!strcmp(cmd, "quiesce")) 1684 ret = cmd_dev_quiesce(&ctx); 1685 else 1686 cmd_dev_help(argv[0]); 1687 1688 return ret; 1689 } 1690