1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Description: uring_cmd based ublk 4 */ 5 6 #include "kublk.h" 7 8 #define MAX_NR_TGT_ARG 64 9 10 unsigned int ublk_dbg_mask = UBLK_LOG; 11 static const struct ublk_tgt_ops *tgt_ops_list[] = { 12 &null_tgt_ops, 13 &loop_tgt_ops, 14 &stripe_tgt_ops, 15 &fault_inject_tgt_ops, 16 }; 17 18 static const struct ublk_tgt_ops *ublk_find_tgt(const char *name) 19 { 20 int i; 21 22 if (name == NULL) 23 return NULL; 24 25 for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) 26 if (strcmp(tgt_ops_list[i]->name, name) == 0) 27 return tgt_ops_list[i]; 28 return NULL; 29 } 30 31 static inline int ublk_setup_ring(struct io_uring *r, int depth, 32 int cq_depth, unsigned flags) 33 { 34 struct io_uring_params p; 35 36 memset(&p, 0, sizeof(p)); 37 p.flags = flags | IORING_SETUP_CQSIZE; 38 p.cq_entries = cq_depth; 39 40 return io_uring_queue_init_params(depth, r, &p); 41 } 42 43 static void ublk_ctrl_init_cmd(struct ublk_dev *dev, 44 struct io_uring_sqe *sqe, 45 struct ublk_ctrl_cmd_data *data) 46 { 47 struct ublksrv_ctrl_dev_info *info = &dev->dev_info; 48 struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe); 49 50 sqe->fd = dev->ctrl_fd; 51 sqe->opcode = IORING_OP_URING_CMD; 52 sqe->ioprio = 0; 53 54 if (data->flags & CTRL_CMD_HAS_BUF) { 55 cmd->addr = data->addr; 56 cmd->len = data->len; 57 } 58 59 if (data->flags & CTRL_CMD_HAS_DATA) 60 cmd->data[0] = data->data[0]; 61 62 cmd->dev_id = info->dev_id; 63 cmd->queue_id = -1; 64 65 ublk_set_sqe_cmd_op(sqe, data->cmd_op); 66 67 io_uring_sqe_set_data(sqe, cmd); 68 } 69 70 static int __ublk_ctrl_cmd(struct ublk_dev *dev, 71 struct ublk_ctrl_cmd_data *data) 72 { 73 struct io_uring_sqe *sqe; 74 struct io_uring_cqe *cqe; 75 int ret = -EINVAL; 76 77 sqe = io_uring_get_sqe(&dev->ring); 78 if (!sqe) { 79 ublk_err("%s: can't get sqe ret %d\n", __func__, ret); 80 return ret; 81 } 82 83 ublk_ctrl_init_cmd(dev, sqe, data); 84 85 ret = io_uring_submit(&dev->ring); 86 if (ret < 0) { 87 ublk_err("uring submit ret %d\n", ret); 88 return ret; 89 } 90 91 ret = io_uring_wait_cqe(&dev->ring, &cqe); 92 if (ret < 0) { 93 ublk_err("wait cqe: %s\n", strerror(-ret)); 94 return ret; 95 } 96 io_uring_cqe_seen(&dev->ring, cqe); 97 98 return cqe->res; 99 } 100 101 static int ublk_ctrl_stop_dev(struct ublk_dev *dev) 102 { 103 struct ublk_ctrl_cmd_data data = { 104 .cmd_op = UBLK_U_CMD_STOP_DEV, 105 }; 106 107 return __ublk_ctrl_cmd(dev, &data); 108 } 109 110 static int ublk_ctrl_start_dev(struct ublk_dev *dev, 111 int daemon_pid) 112 { 113 struct ublk_ctrl_cmd_data data = { 114 .cmd_op = UBLK_U_CMD_START_DEV, 115 .flags = CTRL_CMD_HAS_DATA, 116 }; 117 118 dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; 119 120 return __ublk_ctrl_cmd(dev, &data); 121 } 122 123 static int ublk_ctrl_start_user_recovery(struct ublk_dev *dev) 124 { 125 struct ublk_ctrl_cmd_data data = { 126 .cmd_op = UBLK_U_CMD_START_USER_RECOVERY, 127 }; 128 129 return __ublk_ctrl_cmd(dev, &data); 130 } 131 132 static int ublk_ctrl_end_user_recovery(struct ublk_dev *dev, int daemon_pid) 133 { 134 struct ublk_ctrl_cmd_data data = { 135 .cmd_op = UBLK_U_CMD_END_USER_RECOVERY, 136 .flags = CTRL_CMD_HAS_DATA, 137 }; 138 139 dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; 140 141 return __ublk_ctrl_cmd(dev, &data); 142 } 143 144 static int ublk_ctrl_add_dev(struct ublk_dev *dev) 145 { 146 struct ublk_ctrl_cmd_data data = { 147 .cmd_op = UBLK_U_CMD_ADD_DEV, 148 .flags = CTRL_CMD_HAS_BUF, 149 .addr = (__u64) (uintptr_t) &dev->dev_info, 150 .len = sizeof(struct ublksrv_ctrl_dev_info), 151 }; 152 153 return __ublk_ctrl_cmd(dev, &data); 154 } 155 156 static int ublk_ctrl_del_dev(struct ublk_dev *dev) 157 { 158 struct ublk_ctrl_cmd_data data = { 159 .cmd_op = UBLK_U_CMD_DEL_DEV, 160 .flags = 0, 161 }; 162 163 return __ublk_ctrl_cmd(dev, &data); 164 } 165 166 static int ublk_ctrl_get_info(struct ublk_dev *dev) 167 { 168 struct ublk_ctrl_cmd_data data = { 169 .cmd_op = UBLK_U_CMD_GET_DEV_INFO, 170 .flags = CTRL_CMD_HAS_BUF, 171 .addr = (__u64) (uintptr_t) &dev->dev_info, 172 .len = sizeof(struct ublksrv_ctrl_dev_info), 173 }; 174 175 return __ublk_ctrl_cmd(dev, &data); 176 } 177 178 static int ublk_ctrl_set_params(struct ublk_dev *dev, 179 struct ublk_params *params) 180 { 181 struct ublk_ctrl_cmd_data data = { 182 .cmd_op = UBLK_U_CMD_SET_PARAMS, 183 .flags = CTRL_CMD_HAS_BUF, 184 .addr = (__u64) (uintptr_t) params, 185 .len = sizeof(*params), 186 }; 187 params->len = sizeof(*params); 188 return __ublk_ctrl_cmd(dev, &data); 189 } 190 191 static int ublk_ctrl_get_params(struct ublk_dev *dev, 192 struct ublk_params *params) 193 { 194 struct ublk_ctrl_cmd_data data = { 195 .cmd_op = UBLK_U_CMD_GET_PARAMS, 196 .flags = CTRL_CMD_HAS_BUF, 197 .addr = (__u64)params, 198 .len = sizeof(*params), 199 }; 200 201 params->len = sizeof(*params); 202 203 return __ublk_ctrl_cmd(dev, &data); 204 } 205 206 static int ublk_ctrl_get_features(struct ublk_dev *dev, 207 __u64 *features) 208 { 209 struct ublk_ctrl_cmd_data data = { 210 .cmd_op = UBLK_U_CMD_GET_FEATURES, 211 .flags = CTRL_CMD_HAS_BUF, 212 .addr = (__u64) (uintptr_t) features, 213 .len = sizeof(*features), 214 }; 215 216 return __ublk_ctrl_cmd(dev, &data); 217 } 218 219 static int ublk_ctrl_update_size(struct ublk_dev *dev, 220 __u64 nr_sects) 221 { 222 struct ublk_ctrl_cmd_data data = { 223 .cmd_op = UBLK_U_CMD_UPDATE_SIZE, 224 .flags = CTRL_CMD_HAS_DATA, 225 }; 226 227 data.data[0] = nr_sects; 228 return __ublk_ctrl_cmd(dev, &data); 229 } 230 231 static int ublk_ctrl_quiesce_dev(struct ublk_dev *dev, 232 unsigned int timeout_ms) 233 { 234 struct ublk_ctrl_cmd_data data = { 235 .cmd_op = UBLK_U_CMD_QUIESCE_DEV, 236 .flags = CTRL_CMD_HAS_DATA, 237 }; 238 239 data.data[0] = timeout_ms; 240 return __ublk_ctrl_cmd(dev, &data); 241 } 242 243 static const char *ublk_dev_state_desc(struct ublk_dev *dev) 244 { 245 switch (dev->dev_info.state) { 246 case UBLK_S_DEV_DEAD: 247 return "DEAD"; 248 case UBLK_S_DEV_LIVE: 249 return "LIVE"; 250 case UBLK_S_DEV_QUIESCED: 251 return "QUIESCED"; 252 default: 253 return "UNKNOWN"; 254 }; 255 } 256 257 static void ublk_print_cpu_set(const cpu_set_t *set, char *buf, unsigned len) 258 { 259 unsigned done = 0; 260 int i; 261 262 for (i = 0; i < CPU_SETSIZE; i++) { 263 if (CPU_ISSET(i, set)) 264 done += snprintf(&buf[done], len - done, "%d ", i); 265 } 266 } 267 268 static void ublk_adjust_affinity(cpu_set_t *set) 269 { 270 int j, updated = 0; 271 272 /* 273 * Just keep the 1st CPU now. 274 * 275 * In future, auto affinity selection can be tried. 276 */ 277 for (j = 0; j < CPU_SETSIZE; j++) { 278 if (CPU_ISSET(j, set)) { 279 if (!updated) { 280 updated = 1; 281 continue; 282 } 283 CPU_CLR(j, set); 284 } 285 } 286 } 287 288 /* Caller must free the allocated buffer */ 289 static int ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf) 290 { 291 struct ublk_ctrl_cmd_data data = { 292 .cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY, 293 .flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF, 294 }; 295 cpu_set_t *buf; 296 int i, ret; 297 298 buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues); 299 if (!buf) 300 return -ENOMEM; 301 302 for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) { 303 data.data[0] = i; 304 data.len = sizeof(cpu_set_t); 305 data.addr = (__u64)&buf[i]; 306 307 ret = __ublk_ctrl_cmd(ctrl_dev, &data); 308 if (ret < 0) { 309 free(buf); 310 return ret; 311 } 312 ublk_adjust_affinity(&buf[i]); 313 } 314 315 *ptr_buf = buf; 316 return 0; 317 } 318 319 static void ublk_ctrl_dump(struct ublk_dev *dev) 320 { 321 struct ublksrv_ctrl_dev_info *info = &dev->dev_info; 322 struct ublk_params p; 323 cpu_set_t *affinity; 324 int ret; 325 326 ret = ublk_ctrl_get_params(dev, &p); 327 if (ret < 0) { 328 ublk_err("failed to get params %d %s\n", ret, strerror(-ret)); 329 return; 330 } 331 332 ret = ublk_ctrl_get_affinity(dev, &affinity); 333 if (ret < 0) { 334 ublk_err("failed to get affinity %m\n"); 335 return; 336 } 337 338 ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n", 339 info->dev_id, info->nr_hw_queues, info->queue_depth, 340 1 << p.basic.logical_bs_shift, p.basic.dev_sectors); 341 ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n", 342 info->max_io_buf_bytes, info->ublksrv_pid, info->flags, 343 ublk_dev_state_desc(dev)); 344 345 if (affinity) { 346 char buf[512]; 347 int i; 348 349 for (i = 0; i < info->nr_hw_queues; i++) { 350 ublk_print_cpu_set(&affinity[i], buf, sizeof(buf)); 351 printf("\tqueue %u: affinity(%s)\n", 352 i, buf); 353 } 354 free(affinity); 355 } 356 357 fflush(stdout); 358 } 359 360 static void ublk_ctrl_deinit(struct ublk_dev *dev) 361 { 362 close(dev->ctrl_fd); 363 free(dev); 364 } 365 366 static struct ublk_dev *ublk_ctrl_init(void) 367 { 368 struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev)); 369 struct ublksrv_ctrl_dev_info *info = &dev->dev_info; 370 int ret; 371 372 dev->ctrl_fd = open(CTRL_DEV, O_RDWR); 373 if (dev->ctrl_fd < 0) { 374 free(dev); 375 return NULL; 376 } 377 378 info->max_io_buf_bytes = UBLK_IO_MAX_BYTES; 379 380 ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH, 381 UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128); 382 if (ret < 0) { 383 ublk_err("queue_init: %s\n", strerror(-ret)); 384 free(dev); 385 return NULL; 386 } 387 dev->nr_fds = 1; 388 389 return dev; 390 } 391 392 static int __ublk_queue_cmd_buf_sz(unsigned depth) 393 { 394 int size = depth * sizeof(struct ublksrv_io_desc); 395 unsigned int page_sz = getpagesize(); 396 397 return round_up(size, page_sz); 398 } 399 400 static int ublk_queue_max_cmd_buf_sz(void) 401 { 402 return __ublk_queue_cmd_buf_sz(UBLK_MAX_QUEUE_DEPTH); 403 } 404 405 static int ublk_queue_cmd_buf_sz(struct ublk_queue *q) 406 { 407 return __ublk_queue_cmd_buf_sz(q->q_depth); 408 } 409 410 static void ublk_queue_deinit(struct ublk_queue *q) 411 { 412 int i; 413 int nr_ios = q->q_depth; 414 415 if (q->io_cmd_buf) 416 munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q)); 417 418 for (i = 0; i < nr_ios; i++) 419 free(q->ios[i].buf_addr); 420 } 421 422 static void ublk_thread_deinit(struct ublk_thread *t) 423 { 424 io_uring_unregister_buffers(&t->ring); 425 426 io_uring_unregister_ring_fd(&t->ring); 427 428 if (t->ring.ring_fd > 0) { 429 io_uring_unregister_files(&t->ring); 430 close(t->ring.ring_fd); 431 t->ring.ring_fd = -1; 432 } 433 } 434 435 static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) 436 { 437 struct ublk_dev *dev = q->dev; 438 int depth = dev->dev_info.queue_depth; 439 int i; 440 int cmd_buf_size, io_buf_size; 441 unsigned long off; 442 443 q->tgt_ops = dev->tgt.ops; 444 q->state = 0; 445 q->q_depth = depth; 446 447 if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) { 448 q->state |= UBLKSRV_NO_BUF; 449 if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) 450 q->state |= UBLKSRV_ZC; 451 if (dev->dev_info.flags & UBLK_F_AUTO_BUF_REG) 452 q->state |= UBLKSRV_AUTO_BUF_REG; 453 } 454 q->state |= extra_flags; 455 456 cmd_buf_size = ublk_queue_cmd_buf_sz(q); 457 off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); 458 q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ, 459 MAP_SHARED | MAP_POPULATE, dev->fds[0], off); 460 if (q->io_cmd_buf == MAP_FAILED) { 461 ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n", 462 q->dev->dev_info.dev_id, q->q_id); 463 goto fail; 464 } 465 466 io_buf_size = dev->dev_info.max_io_buf_bytes; 467 for (i = 0; i < q->q_depth; i++) { 468 q->ios[i].buf_addr = NULL; 469 q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE; 470 q->ios[i].tag = i; 471 472 if (q->state & UBLKSRV_NO_BUF) 473 continue; 474 475 if (posix_memalign((void **)&q->ios[i].buf_addr, 476 getpagesize(), io_buf_size)) { 477 ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n", 478 dev->dev_info.dev_id, q->q_id, i); 479 goto fail; 480 } 481 } 482 483 return 0; 484 fail: 485 ublk_queue_deinit(q); 486 ublk_err("ublk dev %d queue %d failed\n", 487 dev->dev_info.dev_id, q->q_id); 488 return -ENOMEM; 489 } 490 491 static int ublk_thread_init(struct ublk_thread *t) 492 { 493 struct ublk_dev *dev = t->dev; 494 int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth; 495 int ret; 496 497 ret = ublk_setup_ring(&t->ring, ring_depth, cq_depth, 498 IORING_SETUP_COOP_TASKRUN | 499 IORING_SETUP_SINGLE_ISSUER | 500 IORING_SETUP_DEFER_TASKRUN); 501 if (ret < 0) { 502 ublk_err("ublk dev %d thread %d setup io_uring failed %d\n", 503 dev->dev_info.dev_id, t->idx, ret); 504 goto fail; 505 } 506 507 if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) { 508 unsigned nr_ios = dev->dev_info.queue_depth * dev->dev_info.nr_hw_queues; 509 unsigned max_nr_ios_per_thread = nr_ios / dev->nthreads; 510 max_nr_ios_per_thread += !!(nr_ios % dev->nthreads); 511 ret = io_uring_register_buffers_sparse( 512 &t->ring, max_nr_ios_per_thread); 513 if (ret) { 514 ublk_err("ublk dev %d thread %d register spare buffers failed %d", 515 dev->dev_info.dev_id, t->idx, ret); 516 goto fail; 517 } 518 } 519 520 io_uring_register_ring_fd(&t->ring); 521 522 ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds); 523 if (ret) { 524 ublk_err("ublk dev %d thread %d register files failed %d\n", 525 t->dev->dev_info.dev_id, t->idx, ret); 526 goto fail; 527 } 528 529 return 0; 530 fail: 531 ublk_thread_deinit(t); 532 ublk_err("ublk dev %d thread %d init failed\n", 533 dev->dev_info.dev_id, t->idx); 534 return -ENOMEM; 535 } 536 537 #define WAIT_USEC 100000 538 #define MAX_WAIT_USEC (3 * 1000000) 539 static int ublk_dev_prep(const struct dev_ctx *ctx, struct ublk_dev *dev) 540 { 541 int dev_id = dev->dev_info.dev_id; 542 unsigned int wait_usec = 0; 543 int ret = 0, fd = -1; 544 char buf[64]; 545 546 snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id); 547 548 while (wait_usec < MAX_WAIT_USEC) { 549 fd = open(buf, O_RDWR); 550 if (fd >= 0) 551 break; 552 usleep(WAIT_USEC); 553 wait_usec += WAIT_USEC; 554 } 555 if (fd < 0) { 556 ublk_err("can't open %s %s\n", buf, strerror(errno)); 557 return -1; 558 } 559 560 dev->fds[0] = fd; 561 if (dev->tgt.ops->init_tgt) 562 ret = dev->tgt.ops->init_tgt(ctx, dev); 563 if (ret) 564 close(dev->fds[0]); 565 return ret; 566 } 567 568 static void ublk_dev_unprep(struct ublk_dev *dev) 569 { 570 if (dev->tgt.ops->deinit_tgt) 571 dev->tgt.ops->deinit_tgt(dev); 572 close(dev->fds[0]); 573 } 574 575 static void ublk_set_auto_buf_reg(const struct ublk_queue *q, 576 struct io_uring_sqe *sqe, 577 unsigned short tag) 578 { 579 struct ublk_auto_buf_reg buf = {}; 580 581 if (q->tgt_ops->buf_index) 582 buf.index = q->tgt_ops->buf_index(q, tag); 583 else 584 buf.index = q->ios[tag].buf_index; 585 586 if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK) 587 buf.flags = UBLK_AUTO_BUF_REG_FALLBACK; 588 589 sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf); 590 } 591 592 int ublk_queue_io_cmd(struct ublk_io *io) 593 { 594 struct ublk_thread *t = io->t; 595 struct ublk_queue *q = ublk_io_to_queue(io); 596 struct ublksrv_io_cmd *cmd; 597 struct io_uring_sqe *sqe[1]; 598 unsigned int cmd_op = 0; 599 __u64 user_data; 600 601 /* only freed io can be issued */ 602 if (!(io->flags & UBLKSRV_IO_FREE)) 603 return 0; 604 605 /* 606 * we issue because we need either fetching or committing or 607 * getting data 608 */ 609 if (!(io->flags & 610 (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA))) 611 return 0; 612 613 if (io->flags & UBLKSRV_NEED_GET_DATA) 614 cmd_op = UBLK_U_IO_NEED_GET_DATA; 615 else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) 616 cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ; 617 else if (io->flags & UBLKSRV_NEED_FETCH_RQ) 618 cmd_op = UBLK_U_IO_FETCH_REQ; 619 620 if (io_uring_sq_space_left(&t->ring) < 1) 621 io_uring_submit(&t->ring); 622 623 ublk_io_alloc_sqes(io, sqe, 1); 624 if (!sqe[0]) { 625 ublk_err("%s: run out of sqe. thread %u, tag %d\n", 626 __func__, t->idx, io->tag); 627 return -1; 628 } 629 630 cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe[0]); 631 632 if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ) 633 cmd->result = io->result; 634 635 /* These fields should be written once, never change */ 636 ublk_set_sqe_cmd_op(sqe[0], cmd_op); 637 sqe[0]->fd = 0; /* dev->fds[0] */ 638 sqe[0]->opcode = IORING_OP_URING_CMD; 639 sqe[0]->flags = IOSQE_FIXED_FILE; 640 sqe[0]->rw_flags = 0; 641 cmd->tag = io->tag; 642 cmd->q_id = q->q_id; 643 if (!(q->state & UBLKSRV_NO_BUF)) 644 cmd->addr = (__u64) (uintptr_t) io->buf_addr; 645 else 646 cmd->addr = 0; 647 648 if (q->state & UBLKSRV_AUTO_BUF_REG) 649 ublk_set_auto_buf_reg(q, sqe[0], io->tag); 650 651 user_data = build_user_data(io->tag, _IOC_NR(cmd_op), 0, q->q_id, 0); 652 io_uring_sqe_set_data64(sqe[0], user_data); 653 654 io->flags = 0; 655 656 t->cmd_inflight += 1; 657 658 ublk_dbg(UBLK_DBG_IO_CMD, "%s: (thread %u qid %d tag %u cmd_op %u) iof %x stopping %d\n", 659 __func__, t->idx, q->q_id, io->tag, cmd_op, 660 io->flags, !!(t->state & UBLKSRV_THREAD_STOPPING)); 661 return 1; 662 } 663 664 static void ublk_submit_fetch_commands(struct ublk_thread *t) 665 { 666 struct ublk_queue *q; 667 struct ublk_io *io; 668 int i = 0, j = 0; 669 670 if (t->dev->per_io_tasks) { 671 /* 672 * Lexicographically order all the (qid,tag) pairs, with 673 * qid taking priority (so (1,0) > (0,1)). Then make 674 * this thread the daemon for every Nth entry in this 675 * list (N is the number of threads), starting at this 676 * thread's index. This ensures that each queue is 677 * handled by as many ublk server threads as possible, 678 * so that load that is concentrated on one or a few 679 * queues can make use of all ublk server threads. 680 */ 681 const struct ublksrv_ctrl_dev_info *dinfo = &t->dev->dev_info; 682 int nr_ios = dinfo->nr_hw_queues * dinfo->queue_depth; 683 for (i = t->idx; i < nr_ios; i += t->dev->nthreads) { 684 int q_id = i / dinfo->queue_depth; 685 int tag = i % dinfo->queue_depth; 686 q = &t->dev->q[q_id]; 687 io = &q->ios[tag]; 688 io->t = t; 689 io->buf_index = j++; 690 ublk_queue_io_cmd(io); 691 } 692 } else { 693 /* 694 * Service exclusively the queue whose q_id matches our 695 * thread index. 696 */ 697 struct ublk_queue *q = &t->dev->q[t->idx]; 698 for (i = 0; i < q->q_depth; i++) { 699 io = &q->ios[i]; 700 io->t = t; 701 io->buf_index = i; 702 ublk_queue_io_cmd(io); 703 } 704 } 705 } 706 707 static int ublk_thread_is_idle(struct ublk_thread *t) 708 { 709 return !io_uring_sq_ready(&t->ring) && !t->io_inflight; 710 } 711 712 static int ublk_thread_is_done(struct ublk_thread *t) 713 { 714 return (t->state & UBLKSRV_THREAD_STOPPING) && ublk_thread_is_idle(t); 715 } 716 717 static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q, 718 struct io_uring_cqe *cqe) 719 { 720 unsigned tag = user_data_to_tag(cqe->user_data); 721 722 if (cqe->res < 0 && cqe->res != -EAGAIN) 723 ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n", 724 __func__, cqe->res, q->q_id, 725 user_data_to_tag(cqe->user_data), 726 user_data_to_op(cqe->user_data)); 727 728 if (q->tgt_ops->tgt_io_done) 729 q->tgt_ops->tgt_io_done(q, tag, cqe); 730 } 731 732 static void ublk_handle_cqe(struct ublk_thread *t, 733 struct io_uring_cqe *cqe, void *data) 734 { 735 struct ublk_dev *dev = t->dev; 736 unsigned q_id = user_data_to_q_id(cqe->user_data); 737 struct ublk_queue *q = &dev->q[q_id]; 738 unsigned tag = user_data_to_tag(cqe->user_data); 739 unsigned cmd_op = user_data_to_op(cqe->user_data); 740 int fetch = (cqe->res != UBLK_IO_RES_ABORT) && 741 !(t->state & UBLKSRV_THREAD_STOPPING); 742 struct ublk_io *io; 743 744 if (cqe->res < 0 && cqe->res != -ENODEV) 745 ublk_err("%s: res %d userdata %llx queue state %x\n", __func__, 746 cqe->res, cqe->user_data, q->state); 747 748 ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d/%d) stopping %d\n", 749 __func__, cqe->res, q->q_id, tag, cmd_op, 750 is_target_io(cqe->user_data), 751 user_data_to_tgt_data(cqe->user_data), 752 (t->state & UBLKSRV_THREAD_STOPPING)); 753 754 /* Don't retrieve io in case of target io */ 755 if (is_target_io(cqe->user_data)) { 756 ublksrv_handle_tgt_cqe(q, cqe); 757 return; 758 } 759 760 io = &q->ios[tag]; 761 t->cmd_inflight--; 762 763 if (!fetch) { 764 t->state |= UBLKSRV_THREAD_STOPPING; 765 io->flags &= ~UBLKSRV_NEED_FETCH_RQ; 766 } 767 768 if (cqe->res == UBLK_IO_RES_OK) { 769 assert(tag < q->q_depth); 770 if (q->tgt_ops->queue_io) 771 q->tgt_ops->queue_io(q, tag); 772 } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { 773 io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE; 774 ublk_queue_io_cmd(io); 775 } else { 776 /* 777 * COMMIT_REQ will be completed immediately since no fetching 778 * piggyback is required. 779 * 780 * Marking IO_FREE only, then this io won't be issued since 781 * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*) 782 * 783 * */ 784 io->flags = UBLKSRV_IO_FREE; 785 } 786 } 787 788 static int ublk_reap_events_uring(struct ublk_thread *t) 789 { 790 struct io_uring_cqe *cqe; 791 unsigned head; 792 int count = 0; 793 794 io_uring_for_each_cqe(&t->ring, head, cqe) { 795 ublk_handle_cqe(t, cqe, NULL); 796 count += 1; 797 } 798 io_uring_cq_advance(&t->ring, count); 799 800 return count; 801 } 802 803 static int ublk_process_io(struct ublk_thread *t) 804 { 805 int ret, reapped; 806 807 ublk_dbg(UBLK_DBG_THREAD, "dev%d-t%u: to_submit %d inflight cmd %u stopping %d\n", 808 t->dev->dev_info.dev_id, 809 t->idx, io_uring_sq_ready(&t->ring), 810 t->cmd_inflight, 811 (t->state & UBLKSRV_THREAD_STOPPING)); 812 813 if (ublk_thread_is_done(t)) 814 return -ENODEV; 815 816 ret = io_uring_submit_and_wait(&t->ring, 1); 817 reapped = ublk_reap_events_uring(t); 818 819 ublk_dbg(UBLK_DBG_THREAD, "submit result %d, reapped %d stop %d idle %d\n", 820 ret, reapped, (t->state & UBLKSRV_THREAD_STOPPING), 821 (t->state & UBLKSRV_THREAD_IDLE)); 822 823 return reapped; 824 } 825 826 static void ublk_thread_set_sched_affinity(const struct ublk_thread *t, 827 cpu_set_t *cpuset) 828 { 829 if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0) 830 ublk_err("ublk dev %u thread %u set affinity failed", 831 t->dev->dev_info.dev_id, t->idx); 832 } 833 834 struct ublk_thread_info { 835 struct ublk_dev *dev; 836 unsigned idx; 837 sem_t *ready; 838 cpu_set_t *affinity; 839 }; 840 841 static void *ublk_io_handler_fn(void *data) 842 { 843 struct ublk_thread_info *info = data; 844 struct ublk_thread *t = &info->dev->threads[info->idx]; 845 int dev_id = info->dev->dev_info.dev_id; 846 int ret; 847 848 t->dev = info->dev; 849 t->idx = info->idx; 850 851 ret = ublk_thread_init(t); 852 if (ret) { 853 ublk_err("ublk dev %d thread %u init failed\n", 854 dev_id, t->idx); 855 return NULL; 856 } 857 /* IO perf is sensitive with queue pthread affinity on NUMA machine*/ 858 if (info->affinity) 859 ublk_thread_set_sched_affinity(t, info->affinity); 860 sem_post(info->ready); 861 862 ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %u started\n", 863 gettid(), dev_id, t->idx); 864 865 /* submit all io commands to ublk driver */ 866 ublk_submit_fetch_commands(t); 867 do { 868 if (ublk_process_io(t) < 0) 869 break; 870 } while (1); 871 872 ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %d exiting\n", 873 gettid(), dev_id, t->idx); 874 ublk_thread_deinit(t); 875 return NULL; 876 } 877 878 static void ublk_set_parameters(struct ublk_dev *dev) 879 { 880 int ret; 881 882 ret = ublk_ctrl_set_params(dev, &dev->tgt.params); 883 if (ret) 884 ublk_err("dev %d set basic parameter failed %d\n", 885 dev->dev_info.dev_id, ret); 886 } 887 888 static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev, int dev_id) 889 { 890 uint64_t id; 891 int evtfd = ctx->_evtfd; 892 893 if (evtfd < 0) 894 return -EBADF; 895 896 if (dev_id >= 0) 897 id = dev_id + 1; 898 else 899 id = ERROR_EVTFD_DEVID; 900 901 if (dev && ctx->shadow_dev) 902 memcpy(&ctx->shadow_dev->q, &dev->q, sizeof(dev->q)); 903 904 if (write(evtfd, &id, sizeof(id)) != sizeof(id)) 905 return -EINVAL; 906 907 close(evtfd); 908 shmdt(ctx->shadow_dev); 909 910 return 0; 911 } 912 913 914 static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) 915 { 916 const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info; 917 struct ublk_thread_info *tinfo; 918 unsigned extra_flags = 0; 919 cpu_set_t *affinity_buf; 920 void *thread_ret; 921 sem_t ready; 922 int ret, i; 923 924 ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__); 925 926 tinfo = calloc(sizeof(struct ublk_thread_info), dev->nthreads); 927 if (!tinfo) 928 return -ENOMEM; 929 930 sem_init(&ready, 0, 0); 931 ret = ublk_dev_prep(ctx, dev); 932 if (ret) 933 return ret; 934 935 ret = ublk_ctrl_get_affinity(dev, &affinity_buf); 936 if (ret) 937 return ret; 938 939 if (ctx->auto_zc_fallback) 940 extra_flags = UBLKSRV_AUTO_BUF_REG_FALLBACK; 941 942 for (i = 0; i < dinfo->nr_hw_queues; i++) { 943 dev->q[i].dev = dev; 944 dev->q[i].q_id = i; 945 946 ret = ublk_queue_init(&dev->q[i], extra_flags); 947 if (ret) { 948 ublk_err("ublk dev %d queue %d init queue failed\n", 949 dinfo->dev_id, i); 950 goto fail; 951 } 952 } 953 954 for (i = 0; i < dev->nthreads; i++) { 955 tinfo[i].dev = dev; 956 tinfo[i].idx = i; 957 tinfo[i].ready = &ready; 958 959 /* 960 * If threads are not tied 1:1 to queues, setting thread 961 * affinity based on queue affinity makes little sense. 962 * However, thread CPU affinity has significant impact 963 * on performance, so to compare fairly, we'll still set 964 * thread CPU affinity based on queue affinity where 965 * possible. 966 */ 967 if (dev->nthreads == dinfo->nr_hw_queues) 968 tinfo[i].affinity = &affinity_buf[i]; 969 pthread_create(&dev->threads[i].thread, NULL, 970 ublk_io_handler_fn, 971 &tinfo[i]); 972 } 973 974 for (i = 0; i < dev->nthreads; i++) 975 sem_wait(&ready); 976 free(tinfo); 977 free(affinity_buf); 978 979 /* everything is fine now, start us */ 980 if (ctx->recovery) 981 ret = ublk_ctrl_end_user_recovery(dev, getpid()); 982 else { 983 ublk_set_parameters(dev); 984 ret = ublk_ctrl_start_dev(dev, getpid()); 985 } 986 if (ret < 0) { 987 ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret); 988 goto fail; 989 } 990 991 ublk_ctrl_get_info(dev); 992 if (ctx->fg) 993 ublk_ctrl_dump(dev); 994 else 995 ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id); 996 997 /* wait until we are terminated */ 998 for (i = 0; i < dev->nthreads; i++) 999 pthread_join(dev->threads[i].thread, &thread_ret); 1000 fail: 1001 for (i = 0; i < dinfo->nr_hw_queues; i++) 1002 ublk_queue_deinit(&dev->q[i]); 1003 ublk_dev_unprep(dev); 1004 ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__); 1005 1006 return ret; 1007 } 1008 1009 static int wait_ublk_dev(const char *path, int evt_mask, unsigned timeout) 1010 { 1011 #define EV_SIZE (sizeof(struct inotify_event)) 1012 #define EV_BUF_LEN (128 * (EV_SIZE + 16)) 1013 struct pollfd pfd; 1014 int fd, wd; 1015 int ret = -EINVAL; 1016 const char *dev_name = basename(path); 1017 1018 fd = inotify_init(); 1019 if (fd < 0) { 1020 ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__); 1021 return fd; 1022 } 1023 1024 wd = inotify_add_watch(fd, "/dev", evt_mask); 1025 if (wd == -1) { 1026 ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__); 1027 goto fail; 1028 } 1029 1030 pfd.fd = fd; 1031 pfd.events = POLL_IN; 1032 while (1) { 1033 int i = 0; 1034 char buffer[EV_BUF_LEN]; 1035 ret = poll(&pfd, 1, 1000 * timeout); 1036 1037 if (ret == -1) { 1038 ublk_err("%s: poll inotify failed: %d\n", __func__, ret); 1039 goto rm_watch; 1040 } else if (ret == 0) { 1041 ublk_err("%s: poll inotify timeout\n", __func__); 1042 ret = -ETIMEDOUT; 1043 goto rm_watch; 1044 } 1045 1046 ret = read(fd, buffer, EV_BUF_LEN); 1047 if (ret < 0) { 1048 ublk_err("%s: read inotify fd failed\n", __func__); 1049 goto rm_watch; 1050 } 1051 1052 while (i < ret) { 1053 struct inotify_event *event = (struct inotify_event *)&buffer[i]; 1054 1055 ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n", 1056 __func__, event->mask, event->name); 1057 if (event->mask & evt_mask) { 1058 if (!strcmp(event->name, dev_name)) { 1059 ret = 0; 1060 goto rm_watch; 1061 } 1062 } 1063 i += EV_SIZE + event->len; 1064 } 1065 } 1066 rm_watch: 1067 inotify_rm_watch(fd, wd); 1068 fail: 1069 close(fd); 1070 return ret; 1071 } 1072 1073 static int ublk_stop_io_daemon(const struct ublk_dev *dev) 1074 { 1075 int daemon_pid = dev->dev_info.ublksrv_pid; 1076 int dev_id = dev->dev_info.dev_id; 1077 char ublkc[64]; 1078 int ret = 0; 1079 1080 if (daemon_pid < 0) 1081 return 0; 1082 1083 /* daemon may be dead already */ 1084 if (kill(daemon_pid, 0) < 0) 1085 goto wait; 1086 1087 snprintf(ublkc, sizeof(ublkc), "/dev/%s%d", "ublkc", dev_id); 1088 1089 /* ublk char device may be gone already */ 1090 if (access(ublkc, F_OK) != 0) 1091 goto wait; 1092 1093 /* Wait until ublk char device is closed, when the daemon is shutdown */ 1094 ret = wait_ublk_dev(ublkc, IN_CLOSE, 10); 1095 /* double check and since it may be closed before starting inotify */ 1096 if (ret == -ETIMEDOUT) 1097 ret = kill(daemon_pid, 0) < 0; 1098 wait: 1099 waitpid(daemon_pid, NULL, 0); 1100 ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n", 1101 __func__, daemon_pid, dev_id, ret); 1102 1103 return ret; 1104 } 1105 1106 static int __cmd_dev_add(const struct dev_ctx *ctx) 1107 { 1108 unsigned nthreads = ctx->nthreads; 1109 unsigned nr_queues = ctx->nr_hw_queues; 1110 const char *tgt_type = ctx->tgt_type; 1111 unsigned depth = ctx->queue_depth; 1112 __u64 features; 1113 const struct ublk_tgt_ops *ops; 1114 struct ublksrv_ctrl_dev_info *info; 1115 struct ublk_dev *dev = NULL; 1116 int dev_id = ctx->dev_id; 1117 int ret, i; 1118 1119 ops = ublk_find_tgt(tgt_type); 1120 if (!ops) { 1121 ublk_err("%s: no such tgt type, type %s\n", 1122 __func__, tgt_type); 1123 ret = -ENODEV; 1124 goto fail; 1125 } 1126 1127 if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) { 1128 ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n", 1129 __func__, nr_queues, depth); 1130 ret = -EINVAL; 1131 goto fail; 1132 } 1133 1134 /* default to 1:1 threads:queues if nthreads is unspecified */ 1135 if (!nthreads) 1136 nthreads = nr_queues; 1137 1138 if (nthreads > UBLK_MAX_THREADS) { 1139 ublk_err("%s: %u is too many threads (max %u)\n", 1140 __func__, nthreads, UBLK_MAX_THREADS); 1141 ret = -EINVAL; 1142 goto fail; 1143 } 1144 1145 if (nthreads != nr_queues && !ctx->per_io_tasks) { 1146 ublk_err("%s: threads %u must be same as queues %u if " 1147 "not using per_io_tasks\n", 1148 __func__, nthreads, nr_queues); 1149 ret = -EINVAL; 1150 goto fail; 1151 } 1152 1153 dev = ublk_ctrl_init(); 1154 if (!dev) { 1155 ublk_err("%s: can't alloc dev id %d, type %s\n", 1156 __func__, dev_id, tgt_type); 1157 ret = -ENOMEM; 1158 goto fail; 1159 } 1160 1161 /* kernel doesn't support get_features */ 1162 ret = ublk_ctrl_get_features(dev, &features); 1163 if (ret < 0) { 1164 ret = -EINVAL; 1165 goto fail; 1166 } 1167 1168 if (!(features & UBLK_F_CMD_IOCTL_ENCODE)) { 1169 ret = -ENOTSUP; 1170 goto fail; 1171 } 1172 1173 info = &dev->dev_info; 1174 info->dev_id = ctx->dev_id; 1175 info->nr_hw_queues = nr_queues; 1176 info->queue_depth = depth; 1177 info->flags = ctx->flags; 1178 if ((features & UBLK_F_QUIESCE) && 1179 (info->flags & UBLK_F_USER_RECOVERY)) 1180 info->flags |= UBLK_F_QUIESCE; 1181 dev->nthreads = nthreads; 1182 dev->per_io_tasks = ctx->per_io_tasks; 1183 dev->tgt.ops = ops; 1184 dev->tgt.sq_depth = depth; 1185 dev->tgt.cq_depth = depth; 1186 1187 for (i = 0; i < MAX_BACK_FILES; i++) { 1188 if (ctx->files[i]) { 1189 strcpy(dev->tgt.backing_file[i], ctx->files[i]); 1190 dev->tgt.nr_backing_files++; 1191 } 1192 } 1193 1194 if (ctx->recovery) 1195 ret = ublk_ctrl_start_user_recovery(dev); 1196 else 1197 ret = ublk_ctrl_add_dev(dev); 1198 if (ret < 0) { 1199 ublk_err("%s: can't add dev id %d, type %s ret %d\n", 1200 __func__, dev_id, tgt_type, ret); 1201 goto fail; 1202 } 1203 1204 ret = ublk_start_daemon(ctx, dev); 1205 ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\b", ret); 1206 if (ret < 0) 1207 ublk_ctrl_del_dev(dev); 1208 1209 fail: 1210 if (ret < 0) 1211 ublk_send_dev_event(ctx, dev, -1); 1212 if (dev) 1213 ublk_ctrl_deinit(dev); 1214 return ret; 1215 } 1216 1217 static int __cmd_dev_list(struct dev_ctx *ctx); 1218 1219 static int cmd_dev_add(struct dev_ctx *ctx) 1220 { 1221 int res; 1222 1223 if (ctx->fg) 1224 goto run; 1225 1226 ctx->_shmid = shmget(IPC_PRIVATE, sizeof(struct ublk_dev), IPC_CREAT | 0666); 1227 if (ctx->_shmid < 0) { 1228 ublk_err("%s: failed to shmget %s\n", __func__, strerror(errno)); 1229 exit(-1); 1230 } 1231 ctx->shadow_dev = (struct ublk_dev *)shmat(ctx->_shmid, NULL, 0); 1232 if (ctx->shadow_dev == (struct ublk_dev *)-1) { 1233 ublk_err("%s: failed to shmat %s\n", __func__, strerror(errno)); 1234 exit(-1); 1235 } 1236 ctx->_evtfd = eventfd(0, 0); 1237 if (ctx->_evtfd < 0) { 1238 ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno)); 1239 exit(-1); 1240 } 1241 1242 res = fork(); 1243 if (res == 0) { 1244 int res2; 1245 1246 setsid(); 1247 res2 = fork(); 1248 if (res2 == 0) { 1249 /* prepare for detaching */ 1250 close(STDIN_FILENO); 1251 close(STDOUT_FILENO); 1252 close(STDERR_FILENO); 1253 run: 1254 res = __cmd_dev_add(ctx); 1255 return res; 1256 } else { 1257 /* detached from the foreground task */ 1258 exit(EXIT_SUCCESS); 1259 } 1260 } else if (res > 0) { 1261 uint64_t id; 1262 int exit_code = EXIT_FAILURE; 1263 1264 res = read(ctx->_evtfd, &id, sizeof(id)); 1265 close(ctx->_evtfd); 1266 if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) { 1267 ctx->dev_id = id - 1; 1268 if (__cmd_dev_list(ctx) >= 0) 1269 exit_code = EXIT_SUCCESS; 1270 } 1271 shmdt(ctx->shadow_dev); 1272 shmctl(ctx->_shmid, IPC_RMID, NULL); 1273 /* wait for child and detach from it */ 1274 wait(NULL); 1275 if (exit_code == EXIT_FAILURE) 1276 ublk_err("%s: command failed\n", __func__); 1277 exit(exit_code); 1278 } else { 1279 exit(EXIT_FAILURE); 1280 } 1281 } 1282 1283 static int __cmd_dev_del(struct dev_ctx *ctx) 1284 { 1285 int number = ctx->dev_id; 1286 struct ublk_dev *dev; 1287 int ret; 1288 1289 dev = ublk_ctrl_init(); 1290 dev->dev_info.dev_id = number; 1291 1292 ret = ublk_ctrl_get_info(dev); 1293 if (ret < 0) 1294 goto fail; 1295 1296 ret = ublk_ctrl_stop_dev(dev); 1297 if (ret < 0) 1298 ublk_err("%s: stop dev %d failed ret %d\n", __func__, number, ret); 1299 1300 ret = ublk_stop_io_daemon(dev); 1301 if (ret < 0) 1302 ublk_err("%s: stop daemon id %d dev %d, ret %d\n", 1303 __func__, dev->dev_info.ublksrv_pid, number, ret); 1304 ublk_ctrl_del_dev(dev); 1305 fail: 1306 ublk_ctrl_deinit(dev); 1307 1308 return (ret >= 0) ? 0 : ret; 1309 } 1310 1311 static int cmd_dev_del(struct dev_ctx *ctx) 1312 { 1313 int i; 1314 1315 if (ctx->dev_id >= 0 || !ctx->all) 1316 return __cmd_dev_del(ctx); 1317 1318 for (i = 0; i < 255; i++) { 1319 ctx->dev_id = i; 1320 __cmd_dev_del(ctx); 1321 } 1322 return 0; 1323 } 1324 1325 static int __cmd_dev_list(struct dev_ctx *ctx) 1326 { 1327 struct ublk_dev *dev = ublk_ctrl_init(); 1328 int ret; 1329 1330 if (!dev) 1331 return -ENODEV; 1332 1333 dev->dev_info.dev_id = ctx->dev_id; 1334 1335 ret = ublk_ctrl_get_info(dev); 1336 if (ret < 0) { 1337 if (ctx->logging) 1338 ublk_err("%s: can't get dev info from %d: %d\n", 1339 __func__, ctx->dev_id, ret); 1340 } else { 1341 if (ctx->shadow_dev) 1342 memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q)); 1343 1344 ublk_ctrl_dump(dev); 1345 } 1346 1347 ublk_ctrl_deinit(dev); 1348 1349 return ret; 1350 } 1351 1352 static int cmd_dev_list(struct dev_ctx *ctx) 1353 { 1354 int i; 1355 1356 if (ctx->dev_id >= 0 || !ctx->all) 1357 return __cmd_dev_list(ctx); 1358 1359 ctx->logging = false; 1360 for (i = 0; i < 255; i++) { 1361 ctx->dev_id = i; 1362 __cmd_dev_list(ctx); 1363 } 1364 return 0; 1365 } 1366 1367 static int cmd_dev_get_features(void) 1368 { 1369 #define const_ilog2(x) (63 - __builtin_clzll(x)) 1370 static const char *feat_map[] = { 1371 [const_ilog2(UBLK_F_SUPPORT_ZERO_COPY)] = "ZERO_COPY", 1372 [const_ilog2(UBLK_F_URING_CMD_COMP_IN_TASK)] = "COMP_IN_TASK", 1373 [const_ilog2(UBLK_F_NEED_GET_DATA)] = "GET_DATA", 1374 [const_ilog2(UBLK_F_USER_RECOVERY)] = "USER_RECOVERY", 1375 [const_ilog2(UBLK_F_USER_RECOVERY_REISSUE)] = "RECOVERY_REISSUE", 1376 [const_ilog2(UBLK_F_UNPRIVILEGED_DEV)] = "UNPRIVILEGED_DEV", 1377 [const_ilog2(UBLK_F_CMD_IOCTL_ENCODE)] = "CMD_IOCTL_ENCODE", 1378 [const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY", 1379 [const_ilog2(UBLK_F_ZONED)] = "ZONED", 1380 [const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO", 1381 [const_ilog2(UBLK_F_UPDATE_SIZE)] = "UPDATE_SIZE", 1382 [const_ilog2(UBLK_F_AUTO_BUF_REG)] = "AUTO_BUF_REG", 1383 [const_ilog2(UBLK_F_QUIESCE)] = "QUIESCE", 1384 [const_ilog2(UBLK_F_PER_IO_DAEMON)] = "PER_IO_DAEMON", 1385 }; 1386 struct ublk_dev *dev; 1387 __u64 features = 0; 1388 int ret; 1389 1390 dev = ublk_ctrl_init(); 1391 if (!dev) { 1392 fprintf(stderr, "ublksrv_ctrl_init failed id\n"); 1393 return -EOPNOTSUPP; 1394 } 1395 1396 ret = ublk_ctrl_get_features(dev, &features); 1397 if (!ret) { 1398 int i; 1399 1400 printf("ublk_drv features: 0x%llx\n", features); 1401 1402 for (i = 0; i < sizeof(features) * 8; i++) { 1403 const char *feat; 1404 1405 if (!((1ULL << i) & features)) 1406 continue; 1407 if (i < sizeof(feat_map) / sizeof(feat_map[0])) 1408 feat = feat_map[i]; 1409 else 1410 feat = "unknown"; 1411 printf("\t%-20s: 0x%llx\n", feat, 1ULL << i); 1412 } 1413 } 1414 1415 return ret; 1416 } 1417 1418 static int cmd_dev_update_size(struct dev_ctx *ctx) 1419 { 1420 struct ublk_dev *dev = ublk_ctrl_init(); 1421 struct ublk_params p; 1422 int ret = -EINVAL; 1423 1424 if (!dev) 1425 return -ENODEV; 1426 1427 if (ctx->dev_id < 0) { 1428 fprintf(stderr, "device id isn't provided\n"); 1429 goto out; 1430 } 1431 1432 dev->dev_info.dev_id = ctx->dev_id; 1433 ret = ublk_ctrl_get_params(dev, &p); 1434 if (ret < 0) { 1435 ublk_err("failed to get params %d %s\n", ret, strerror(-ret)); 1436 goto out; 1437 } 1438 1439 if (ctx->size & ((1 << p.basic.logical_bs_shift) - 1)) { 1440 ublk_err("size isn't aligned with logical block size\n"); 1441 ret = -EINVAL; 1442 goto out; 1443 } 1444 1445 ret = ublk_ctrl_update_size(dev, ctx->size >> 9); 1446 out: 1447 ublk_ctrl_deinit(dev); 1448 return ret; 1449 } 1450 1451 static int cmd_dev_quiesce(struct dev_ctx *ctx) 1452 { 1453 struct ublk_dev *dev = ublk_ctrl_init(); 1454 int ret = -EINVAL; 1455 1456 if (!dev) 1457 return -ENODEV; 1458 1459 if (ctx->dev_id < 0) { 1460 fprintf(stderr, "device id isn't provided for quiesce\n"); 1461 goto out; 1462 } 1463 dev->dev_info.dev_id = ctx->dev_id; 1464 ret = ublk_ctrl_quiesce_dev(dev, 10000); 1465 1466 out: 1467 ublk_ctrl_deinit(dev); 1468 return ret; 1469 } 1470 1471 static void __cmd_create_help(char *exe, bool recovery) 1472 { 1473 int i; 1474 1475 printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", 1476 exe, recovery ? "recover" : "add"); 1477 printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n"); 1478 printf("\t[-e 0|1 ] [-i 0|1]\n"); 1479 printf("\t[--nthreads threads] [--per_io_tasks]\n"); 1480 printf("\t[target options] [backfile1] [backfile2] ...\n"); 1481 printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); 1482 printf("\tdefault: nthreads=nr_queues"); 1483 1484 for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) { 1485 const struct ublk_tgt_ops *ops = tgt_ops_list[i]; 1486 1487 if (ops->usage) 1488 ops->usage(ops); 1489 } 1490 } 1491 1492 static void cmd_add_help(char *exe) 1493 { 1494 __cmd_create_help(exe, false); 1495 printf("\n"); 1496 } 1497 1498 static void cmd_recover_help(char *exe) 1499 { 1500 __cmd_create_help(exe, true); 1501 printf("\tPlease provide exact command line for creating this device with real dev_id\n"); 1502 printf("\n"); 1503 } 1504 1505 static int cmd_dev_help(char *exe) 1506 { 1507 cmd_add_help(exe); 1508 cmd_recover_help(exe); 1509 1510 printf("%s del [-n dev_id] -a \n", exe); 1511 printf("\t -a delete all devices -n delete specified device\n\n"); 1512 printf("%s list [-n dev_id] -a \n", exe); 1513 printf("\t -a list all devices, -n list specified device, default -a \n\n"); 1514 printf("%s features\n", exe); 1515 printf("%s update_size -n dev_id -s|--size size_in_bytes \n", exe); 1516 printf("%s quiesce -n dev_id\n", exe); 1517 return 0; 1518 } 1519 1520 int main(int argc, char *argv[]) 1521 { 1522 static const struct option longopts[] = { 1523 { "all", 0, NULL, 'a' }, 1524 { "type", 1, NULL, 't' }, 1525 { "number", 1, NULL, 'n' }, 1526 { "queues", 1, NULL, 'q' }, 1527 { "depth", 1, NULL, 'd' }, 1528 { "debug_mask", 1, NULL, 0 }, 1529 { "quiet", 0, NULL, 0 }, 1530 { "zero_copy", 0, NULL, 'z' }, 1531 { "foreground", 0, NULL, 0 }, 1532 { "recovery", 1, NULL, 'r' }, 1533 { "recovery_fail_io", 1, NULL, 'e'}, 1534 { "recovery_reissue", 1, NULL, 'i'}, 1535 { "get_data", 1, NULL, 'g'}, 1536 { "auto_zc", 0, NULL, 0 }, 1537 { "auto_zc_fallback", 0, NULL, 0 }, 1538 { "size", 1, NULL, 's'}, 1539 { "nthreads", 1, NULL, 0 }, 1540 { "per_io_tasks", 0, NULL, 0 }, 1541 { 0, 0, 0, 0 } 1542 }; 1543 const struct ublk_tgt_ops *ops = NULL; 1544 int option_idx, opt; 1545 const char *cmd = argv[1]; 1546 struct dev_ctx ctx = { 1547 .queue_depth = 128, 1548 .nr_hw_queues = 2, 1549 .dev_id = -1, 1550 .tgt_type = "unknown", 1551 }; 1552 int ret = -EINVAL, i; 1553 int tgt_argc = 1; 1554 char *tgt_argv[MAX_NR_TGT_ARG] = { NULL }; 1555 int value; 1556 1557 if (argc == 1) 1558 return ret; 1559 1560 opterr = 0; 1561 optind = 2; 1562 while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:s:gaz", 1563 longopts, &option_idx)) != -1) { 1564 switch (opt) { 1565 case 'a': 1566 ctx.all = 1; 1567 break; 1568 case 'n': 1569 ctx.dev_id = strtol(optarg, NULL, 10); 1570 break; 1571 case 't': 1572 if (strlen(optarg) < sizeof(ctx.tgt_type)) 1573 strcpy(ctx.tgt_type, optarg); 1574 break; 1575 case 'q': 1576 ctx.nr_hw_queues = strtol(optarg, NULL, 10); 1577 break; 1578 case 'd': 1579 ctx.queue_depth = strtol(optarg, NULL, 10); 1580 break; 1581 case 'z': 1582 ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY; 1583 break; 1584 case 'r': 1585 value = strtol(optarg, NULL, 10); 1586 if (value) 1587 ctx.flags |= UBLK_F_USER_RECOVERY; 1588 break; 1589 case 'e': 1590 value = strtol(optarg, NULL, 10); 1591 if (value) 1592 ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO; 1593 break; 1594 case 'i': 1595 value = strtol(optarg, NULL, 10); 1596 if (value) 1597 ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE; 1598 break; 1599 case 'g': 1600 ctx.flags |= UBLK_F_NEED_GET_DATA; 1601 break; 1602 case 's': 1603 ctx.size = strtoull(optarg, NULL, 10); 1604 break; 1605 case 0: 1606 if (!strcmp(longopts[option_idx].name, "debug_mask")) 1607 ublk_dbg_mask = strtol(optarg, NULL, 16); 1608 if (!strcmp(longopts[option_idx].name, "quiet")) 1609 ublk_dbg_mask = 0; 1610 if (!strcmp(longopts[option_idx].name, "foreground")) 1611 ctx.fg = 1; 1612 if (!strcmp(longopts[option_idx].name, "auto_zc")) 1613 ctx.flags |= UBLK_F_AUTO_BUF_REG; 1614 if (!strcmp(longopts[option_idx].name, "auto_zc_fallback")) 1615 ctx.auto_zc_fallback = 1; 1616 if (!strcmp(longopts[option_idx].name, "nthreads")) 1617 ctx.nthreads = strtol(optarg, NULL, 10); 1618 if (!strcmp(longopts[option_idx].name, "per_io_tasks")) 1619 ctx.per_io_tasks = 1; 1620 break; 1621 case '?': 1622 /* 1623 * target requires every option must have argument 1624 */ 1625 if (argv[optind][0] == '-' || argv[optind - 1][0] != '-') { 1626 fprintf(stderr, "every target option requires argument: %s %s\n", 1627 argv[optind - 1], argv[optind]); 1628 exit(EXIT_FAILURE); 1629 } 1630 1631 if (tgt_argc < (MAX_NR_TGT_ARG - 1) / 2) { 1632 tgt_argv[tgt_argc++] = argv[optind - 1]; 1633 tgt_argv[tgt_argc++] = argv[optind]; 1634 } else { 1635 fprintf(stderr, "too many target options\n"); 1636 exit(EXIT_FAILURE); 1637 } 1638 optind += 1; 1639 break; 1640 } 1641 } 1642 1643 /* auto_zc_fallback depends on F_AUTO_BUF_REG & F_SUPPORT_ZERO_COPY */ 1644 if (ctx.auto_zc_fallback && 1645 !((ctx.flags & UBLK_F_AUTO_BUF_REG) && 1646 (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY))) { 1647 ublk_err("%s: auto_zc_fallback is set but neither " 1648 "F_AUTO_BUF_REG nor F_SUPPORT_ZERO_COPY is enabled\n", 1649 __func__); 1650 return -EINVAL; 1651 } 1652 1653 i = optind; 1654 while (i < argc && ctx.nr_files < MAX_BACK_FILES) { 1655 ctx.files[ctx.nr_files++] = argv[i++]; 1656 } 1657 1658 ops = ublk_find_tgt(ctx.tgt_type); 1659 if (ops && ops->parse_cmd_line) { 1660 optind = 0; 1661 1662 tgt_argv[0] = ctx.tgt_type; 1663 ops->parse_cmd_line(&ctx, tgt_argc, tgt_argv); 1664 } 1665 1666 if (!strcmp(cmd, "add")) 1667 ret = cmd_dev_add(&ctx); 1668 else if (!strcmp(cmd, "recover")) { 1669 if (ctx.dev_id < 0) { 1670 fprintf(stderr, "device id isn't provided for recovering\n"); 1671 ret = -EINVAL; 1672 } else { 1673 ctx.recovery = 1; 1674 ret = cmd_dev_add(&ctx); 1675 } 1676 } else if (!strcmp(cmd, "del")) 1677 ret = cmd_dev_del(&ctx); 1678 else if (!strcmp(cmd, "list")) { 1679 ctx.all = 1; 1680 ret = cmd_dev_list(&ctx); 1681 } else if (!strcmp(cmd, "help")) 1682 ret = cmd_dev_help(argv[0]); 1683 else if (!strcmp(cmd, "features")) 1684 ret = cmd_dev_get_features(); 1685 else if (!strcmp(cmd, "update_size")) 1686 ret = cmd_dev_update_size(&ctx); 1687 else if (!strcmp(cmd, "quiesce")) 1688 ret = cmd_dev_quiesce(&ctx); 1689 else 1690 cmd_dev_help(argv[0]); 1691 1692 return ret; 1693 } 1694