1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Description: uring_cmd based ublk 4 */ 5 6 #include "kublk.h" 7 8 #define MAX_NR_TGT_ARG 64 9 10 unsigned int ublk_dbg_mask = UBLK_LOG; 11 static const struct ublk_tgt_ops *tgt_ops_list[] = { 12 &null_tgt_ops, 13 &loop_tgt_ops, 14 &stripe_tgt_ops, 15 &fault_inject_tgt_ops, 16 }; 17 18 static const struct ublk_tgt_ops *ublk_find_tgt(const char *name) 19 { 20 int i; 21 22 if (name == NULL) 23 return NULL; 24 25 for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) 26 if (strcmp(tgt_ops_list[i]->name, name) == 0) 27 return tgt_ops_list[i]; 28 return NULL; 29 } 30 31 static inline int ublk_setup_ring(struct io_uring *r, int depth, 32 int cq_depth, unsigned flags) 33 { 34 struct io_uring_params p; 35 36 memset(&p, 0, sizeof(p)); 37 p.flags = flags | IORING_SETUP_CQSIZE; 38 p.cq_entries = cq_depth; 39 40 return io_uring_queue_init_params(depth, r, &p); 41 } 42 43 static void ublk_ctrl_init_cmd(struct ublk_dev *dev, 44 struct io_uring_sqe *sqe, 45 struct ublk_ctrl_cmd_data *data) 46 { 47 struct ublksrv_ctrl_dev_info *info = &dev->dev_info; 48 struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe); 49 50 sqe->fd = dev->ctrl_fd; 51 sqe->opcode = IORING_OP_URING_CMD; 52 sqe->ioprio = 0; 53 54 if (data->flags & CTRL_CMD_HAS_BUF) { 55 cmd->addr = data->addr; 56 cmd->len = data->len; 57 } 58 59 if (data->flags & CTRL_CMD_HAS_DATA) 60 cmd->data[0] = data->data[0]; 61 62 cmd->dev_id = info->dev_id; 63 cmd->queue_id = -1; 64 65 ublk_set_sqe_cmd_op(sqe, data->cmd_op); 66 67 io_uring_sqe_set_data(sqe, cmd); 68 } 69 70 static int __ublk_ctrl_cmd(struct ublk_dev *dev, 71 struct ublk_ctrl_cmd_data *data) 72 { 73 struct io_uring_sqe *sqe; 74 struct io_uring_cqe *cqe; 75 int ret = -EINVAL; 76 77 sqe = io_uring_get_sqe(&dev->ring); 78 if (!sqe) { 79 ublk_err("%s: can't get sqe ret %d\n", __func__, ret); 80 return ret; 81 } 82 83 ublk_ctrl_init_cmd(dev, sqe, data); 84 85 ret = io_uring_submit(&dev->ring); 86 if (ret < 0) { 87 ublk_err("uring submit ret %d\n", ret); 88 return ret; 89 } 90 91 ret = io_uring_wait_cqe(&dev->ring, &cqe); 92 if (ret < 0) { 93 ublk_err("wait cqe: %s\n", strerror(-ret)); 94 return ret; 95 } 96 io_uring_cqe_seen(&dev->ring, cqe); 97 98 return cqe->res; 99 } 100 101 static int ublk_ctrl_stop_dev(struct ublk_dev *dev) 102 { 103 struct ublk_ctrl_cmd_data data = { 104 .cmd_op = UBLK_U_CMD_STOP_DEV, 105 }; 106 107 return __ublk_ctrl_cmd(dev, &data); 108 } 109 110 static int ublk_ctrl_start_dev(struct ublk_dev *dev, 111 int daemon_pid) 112 { 113 struct ublk_ctrl_cmd_data data = { 114 .cmd_op = UBLK_U_CMD_START_DEV, 115 .flags = CTRL_CMD_HAS_DATA, 116 }; 117 118 dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; 119 120 return __ublk_ctrl_cmd(dev, &data); 121 } 122 123 static int ublk_ctrl_start_user_recovery(struct ublk_dev *dev) 124 { 125 struct ublk_ctrl_cmd_data data = { 126 .cmd_op = UBLK_U_CMD_START_USER_RECOVERY, 127 }; 128 129 return __ublk_ctrl_cmd(dev, &data); 130 } 131 132 static int ublk_ctrl_end_user_recovery(struct ublk_dev *dev, int daemon_pid) 133 { 134 struct ublk_ctrl_cmd_data data = { 135 .cmd_op = UBLK_U_CMD_END_USER_RECOVERY, 136 .flags = CTRL_CMD_HAS_DATA, 137 }; 138 139 dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; 140 141 return __ublk_ctrl_cmd(dev, &data); 142 } 143 144 static int ublk_ctrl_add_dev(struct ublk_dev *dev) 145 { 146 struct ublk_ctrl_cmd_data data = { 147 .cmd_op = UBLK_U_CMD_ADD_DEV, 148 .flags = CTRL_CMD_HAS_BUF, 149 .addr = (__u64) (uintptr_t) &dev->dev_info, 150 .len = sizeof(struct ublksrv_ctrl_dev_info), 151 }; 152 153 return __ublk_ctrl_cmd(dev, &data); 154 } 155 156 static int ublk_ctrl_del_dev(struct ublk_dev *dev) 157 { 158 struct ublk_ctrl_cmd_data data = { 159 .cmd_op = UBLK_U_CMD_DEL_DEV, 160 .flags = 0, 161 }; 162 163 return __ublk_ctrl_cmd(dev, &data); 164 } 165 166 static int ublk_ctrl_get_info(struct ublk_dev *dev) 167 { 168 struct ublk_ctrl_cmd_data data = { 169 .cmd_op = UBLK_U_CMD_GET_DEV_INFO, 170 .flags = CTRL_CMD_HAS_BUF, 171 .addr = (__u64) (uintptr_t) &dev->dev_info, 172 .len = sizeof(struct ublksrv_ctrl_dev_info), 173 }; 174 175 return __ublk_ctrl_cmd(dev, &data); 176 } 177 178 static int ublk_ctrl_set_params(struct ublk_dev *dev, 179 struct ublk_params *params) 180 { 181 struct ublk_ctrl_cmd_data data = { 182 .cmd_op = UBLK_U_CMD_SET_PARAMS, 183 .flags = CTRL_CMD_HAS_BUF, 184 .addr = (__u64) (uintptr_t) params, 185 .len = sizeof(*params), 186 }; 187 params->len = sizeof(*params); 188 return __ublk_ctrl_cmd(dev, &data); 189 } 190 191 static int ublk_ctrl_get_params(struct ublk_dev *dev, 192 struct ublk_params *params) 193 { 194 struct ublk_ctrl_cmd_data data = { 195 .cmd_op = UBLK_U_CMD_GET_PARAMS, 196 .flags = CTRL_CMD_HAS_BUF, 197 .addr = (__u64)params, 198 .len = sizeof(*params), 199 }; 200 201 params->len = sizeof(*params); 202 203 return __ublk_ctrl_cmd(dev, &data); 204 } 205 206 static int ublk_ctrl_get_features(struct ublk_dev *dev, 207 __u64 *features) 208 { 209 struct ublk_ctrl_cmd_data data = { 210 .cmd_op = UBLK_U_CMD_GET_FEATURES, 211 .flags = CTRL_CMD_HAS_BUF, 212 .addr = (__u64) (uintptr_t) features, 213 .len = sizeof(*features), 214 }; 215 216 return __ublk_ctrl_cmd(dev, &data); 217 } 218 219 static int ublk_ctrl_update_size(struct ublk_dev *dev, 220 __u64 nr_sects) 221 { 222 struct ublk_ctrl_cmd_data data = { 223 .cmd_op = UBLK_U_CMD_UPDATE_SIZE, 224 .flags = CTRL_CMD_HAS_DATA, 225 }; 226 227 data.data[0] = nr_sects; 228 return __ublk_ctrl_cmd(dev, &data); 229 } 230 231 static int ublk_ctrl_quiesce_dev(struct ublk_dev *dev, 232 unsigned int timeout_ms) 233 { 234 struct ublk_ctrl_cmd_data data = { 235 .cmd_op = UBLK_U_CMD_QUIESCE_DEV, 236 .flags = CTRL_CMD_HAS_DATA, 237 }; 238 239 data.data[0] = timeout_ms; 240 return __ublk_ctrl_cmd(dev, &data); 241 } 242 243 static const char *ublk_dev_state_desc(struct ublk_dev *dev) 244 { 245 switch (dev->dev_info.state) { 246 case UBLK_S_DEV_DEAD: 247 return "DEAD"; 248 case UBLK_S_DEV_LIVE: 249 return "LIVE"; 250 case UBLK_S_DEV_QUIESCED: 251 return "QUIESCED"; 252 default: 253 return "UNKNOWN"; 254 }; 255 } 256 257 static void ublk_print_cpu_set(const cpu_set_t *set, char *buf, unsigned len) 258 { 259 unsigned done = 0; 260 int i; 261 262 for (i = 0; i < CPU_SETSIZE; i++) { 263 if (CPU_ISSET(i, set)) 264 done += snprintf(&buf[done], len - done, "%d ", i); 265 } 266 } 267 268 static void ublk_adjust_affinity(cpu_set_t *set) 269 { 270 int j, updated = 0; 271 272 /* 273 * Just keep the 1st CPU now. 274 * 275 * In future, auto affinity selection can be tried. 276 */ 277 for (j = 0; j < CPU_SETSIZE; j++) { 278 if (CPU_ISSET(j, set)) { 279 if (!updated) { 280 updated = 1; 281 continue; 282 } 283 CPU_CLR(j, set); 284 } 285 } 286 } 287 288 /* Caller must free the allocated buffer */ 289 static int ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf) 290 { 291 struct ublk_ctrl_cmd_data data = { 292 .cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY, 293 .flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF, 294 }; 295 cpu_set_t *buf; 296 int i, ret; 297 298 buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues); 299 if (!buf) 300 return -ENOMEM; 301 302 for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) { 303 data.data[0] = i; 304 data.len = sizeof(cpu_set_t); 305 data.addr = (__u64)&buf[i]; 306 307 ret = __ublk_ctrl_cmd(ctrl_dev, &data); 308 if (ret < 0) { 309 free(buf); 310 return ret; 311 } 312 ublk_adjust_affinity(&buf[i]); 313 } 314 315 *ptr_buf = buf; 316 return 0; 317 } 318 319 static void ublk_ctrl_dump(struct ublk_dev *dev) 320 { 321 struct ublksrv_ctrl_dev_info *info = &dev->dev_info; 322 struct ublk_params p; 323 cpu_set_t *affinity; 324 int ret; 325 326 ret = ublk_ctrl_get_params(dev, &p); 327 if (ret < 0) { 328 ublk_err("failed to get params %d %s\n", ret, strerror(-ret)); 329 return; 330 } 331 332 ret = ublk_ctrl_get_affinity(dev, &affinity); 333 if (ret < 0) { 334 ublk_err("failed to get affinity %m\n"); 335 return; 336 } 337 338 ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n", 339 info->dev_id, info->nr_hw_queues, info->queue_depth, 340 1 << p.basic.logical_bs_shift, p.basic.dev_sectors); 341 ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n", 342 info->max_io_buf_bytes, info->ublksrv_pid, info->flags, 343 ublk_dev_state_desc(dev)); 344 345 if (affinity) { 346 char buf[512]; 347 int i; 348 349 for (i = 0; i < info->nr_hw_queues; i++) { 350 ublk_print_cpu_set(&affinity[i], buf, sizeof(buf)); 351 printf("\tqueue %u: tid %d affinity(%s)\n", 352 i, dev->q[i].tid, buf); 353 } 354 free(affinity); 355 } 356 357 fflush(stdout); 358 } 359 360 static void ublk_ctrl_deinit(struct ublk_dev *dev) 361 { 362 close(dev->ctrl_fd); 363 free(dev); 364 } 365 366 static struct ublk_dev *ublk_ctrl_init(void) 367 { 368 struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev)); 369 struct ublksrv_ctrl_dev_info *info = &dev->dev_info; 370 int ret; 371 372 dev->ctrl_fd = open(CTRL_DEV, O_RDWR); 373 if (dev->ctrl_fd < 0) { 374 free(dev); 375 return NULL; 376 } 377 378 info->max_io_buf_bytes = UBLK_IO_MAX_BYTES; 379 380 ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH, 381 UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128); 382 if (ret < 0) { 383 ublk_err("queue_init: %s\n", strerror(-ret)); 384 free(dev); 385 return NULL; 386 } 387 dev->nr_fds = 1; 388 389 return dev; 390 } 391 392 static int __ublk_queue_cmd_buf_sz(unsigned depth) 393 { 394 int size = depth * sizeof(struct ublksrv_io_desc); 395 unsigned int page_sz = getpagesize(); 396 397 return round_up(size, page_sz); 398 } 399 400 static int ublk_queue_max_cmd_buf_sz(void) 401 { 402 return __ublk_queue_cmd_buf_sz(UBLK_MAX_QUEUE_DEPTH); 403 } 404 405 static int ublk_queue_cmd_buf_sz(struct ublk_queue *q) 406 { 407 return __ublk_queue_cmd_buf_sz(q->q_depth); 408 } 409 410 static void ublk_queue_deinit(struct ublk_queue *q) 411 { 412 int i; 413 int nr_ios = q->q_depth; 414 415 io_uring_unregister_buffers(&q->ring); 416 417 io_uring_unregister_ring_fd(&q->ring); 418 419 if (q->ring.ring_fd > 0) { 420 io_uring_unregister_files(&q->ring); 421 close(q->ring.ring_fd); 422 q->ring.ring_fd = -1; 423 } 424 425 if (q->io_cmd_buf) 426 munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q)); 427 428 for (i = 0; i < nr_ios; i++) 429 free(q->ios[i].buf_addr); 430 } 431 432 static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) 433 { 434 struct ublk_dev *dev = q->dev; 435 int depth = dev->dev_info.queue_depth; 436 int i, ret = -1; 437 int cmd_buf_size, io_buf_size; 438 unsigned long off; 439 int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth; 440 441 q->tgt_ops = dev->tgt.ops; 442 q->state = 0; 443 q->q_depth = depth; 444 q->cmd_inflight = 0; 445 q->tid = gettid(); 446 447 if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) { 448 q->state |= UBLKSRV_NO_BUF; 449 if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) 450 q->state |= UBLKSRV_ZC; 451 if (dev->dev_info.flags & UBLK_F_AUTO_BUF_REG) 452 q->state |= UBLKSRV_AUTO_BUF_REG; 453 } 454 q->state |= extra_flags; 455 456 cmd_buf_size = ublk_queue_cmd_buf_sz(q); 457 off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); 458 q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ, 459 MAP_SHARED | MAP_POPULATE, dev->fds[0], off); 460 if (q->io_cmd_buf == MAP_FAILED) { 461 ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n", 462 q->dev->dev_info.dev_id, q->q_id); 463 goto fail; 464 } 465 466 io_buf_size = dev->dev_info.max_io_buf_bytes; 467 for (i = 0; i < q->q_depth; i++) { 468 q->ios[i].buf_addr = NULL; 469 q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE; 470 471 if (q->state & UBLKSRV_NO_BUF) 472 continue; 473 474 if (posix_memalign((void **)&q->ios[i].buf_addr, 475 getpagesize(), io_buf_size)) { 476 ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n", 477 dev->dev_info.dev_id, q->q_id, i); 478 goto fail; 479 } 480 } 481 482 ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth, 483 IORING_SETUP_COOP_TASKRUN | 484 IORING_SETUP_SINGLE_ISSUER | 485 IORING_SETUP_DEFER_TASKRUN); 486 if (ret < 0) { 487 ublk_err("ublk dev %d queue %d setup io_uring failed %d\n", 488 q->dev->dev_info.dev_id, q->q_id, ret); 489 goto fail; 490 } 491 492 if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) { 493 ret = io_uring_register_buffers_sparse(&q->ring, q->q_depth); 494 if (ret) { 495 ublk_err("ublk dev %d queue %d register spare buffers failed %d", 496 dev->dev_info.dev_id, q->q_id, ret); 497 goto fail; 498 } 499 } 500 501 io_uring_register_ring_fd(&q->ring); 502 503 ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds); 504 if (ret) { 505 ublk_err("ublk dev %d queue %d register files failed %d\n", 506 q->dev->dev_info.dev_id, q->q_id, ret); 507 goto fail; 508 } 509 510 return 0; 511 fail: 512 ublk_queue_deinit(q); 513 ublk_err("ublk dev %d queue %d failed\n", 514 dev->dev_info.dev_id, q->q_id); 515 return -ENOMEM; 516 } 517 518 #define WAIT_USEC 100000 519 #define MAX_WAIT_USEC (3 * 1000000) 520 static int ublk_dev_prep(const struct dev_ctx *ctx, struct ublk_dev *dev) 521 { 522 int dev_id = dev->dev_info.dev_id; 523 unsigned int wait_usec = 0; 524 int ret = 0, fd = -1; 525 char buf[64]; 526 527 snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id); 528 529 while (wait_usec < MAX_WAIT_USEC) { 530 fd = open(buf, O_RDWR); 531 if (fd >= 0) 532 break; 533 usleep(WAIT_USEC); 534 wait_usec += WAIT_USEC; 535 } 536 if (fd < 0) { 537 ublk_err("can't open %s %s\n", buf, strerror(errno)); 538 return -1; 539 } 540 541 dev->fds[0] = fd; 542 if (dev->tgt.ops->init_tgt) 543 ret = dev->tgt.ops->init_tgt(ctx, dev); 544 if (ret) 545 close(dev->fds[0]); 546 return ret; 547 } 548 549 static void ublk_dev_unprep(struct ublk_dev *dev) 550 { 551 if (dev->tgt.ops->deinit_tgt) 552 dev->tgt.ops->deinit_tgt(dev); 553 close(dev->fds[0]); 554 } 555 556 static void ublk_set_auto_buf_reg(const struct ublk_queue *q, 557 struct io_uring_sqe *sqe, 558 unsigned short tag) 559 { 560 struct ublk_auto_buf_reg buf = {}; 561 562 if (q->tgt_ops->buf_index) 563 buf.index = q->tgt_ops->buf_index(q, tag); 564 else 565 buf.index = tag; 566 567 if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK) 568 buf.flags = UBLK_AUTO_BUF_REG_FALLBACK; 569 570 sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf); 571 } 572 573 int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag) 574 { 575 struct ublksrv_io_cmd *cmd; 576 struct io_uring_sqe *sqe[1]; 577 unsigned int cmd_op = 0; 578 __u64 user_data; 579 580 /* only freed io can be issued */ 581 if (!(io->flags & UBLKSRV_IO_FREE)) 582 return 0; 583 584 /* 585 * we issue because we need either fetching or committing or 586 * getting data 587 */ 588 if (!(io->flags & 589 (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA))) 590 return 0; 591 592 if (io->flags & UBLKSRV_NEED_GET_DATA) 593 cmd_op = UBLK_U_IO_NEED_GET_DATA; 594 else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) 595 cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ; 596 else if (io->flags & UBLKSRV_NEED_FETCH_RQ) 597 cmd_op = UBLK_U_IO_FETCH_REQ; 598 599 if (io_uring_sq_space_left(&q->ring) < 1) 600 io_uring_submit(&q->ring); 601 602 ublk_queue_alloc_sqes(q, sqe, 1); 603 if (!sqe[0]) { 604 ublk_err("%s: run out of sqe %d, tag %d\n", 605 __func__, q->q_id, tag); 606 return -1; 607 } 608 609 cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe[0]); 610 611 if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ) 612 cmd->result = io->result; 613 614 /* These fields should be written once, never change */ 615 ublk_set_sqe_cmd_op(sqe[0], cmd_op); 616 sqe[0]->fd = 0; /* dev->fds[0] */ 617 sqe[0]->opcode = IORING_OP_URING_CMD; 618 sqe[0]->flags = IOSQE_FIXED_FILE; 619 sqe[0]->rw_flags = 0; 620 cmd->tag = tag; 621 cmd->q_id = q->q_id; 622 if (!(q->state & UBLKSRV_NO_BUF)) 623 cmd->addr = (__u64) (uintptr_t) io->buf_addr; 624 else 625 cmd->addr = 0; 626 627 if (q->state & UBLKSRV_AUTO_BUF_REG) 628 ublk_set_auto_buf_reg(q, sqe[0], tag); 629 630 user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0); 631 io_uring_sqe_set_data64(sqe[0], user_data); 632 633 io->flags = 0; 634 635 q->cmd_inflight += 1; 636 637 ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n", 638 __func__, q->q_id, tag, cmd_op, 639 io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING)); 640 return 1; 641 } 642 643 static void ublk_submit_fetch_commands(struct ublk_queue *q) 644 { 645 int i = 0; 646 647 for (i = 0; i < q->q_depth; i++) 648 ublk_queue_io_cmd(q, &q->ios[i], i); 649 } 650 651 static int ublk_queue_is_idle(struct ublk_queue *q) 652 { 653 return !io_uring_sq_ready(&q->ring) && !q->io_inflight; 654 } 655 656 static int ublk_queue_is_done(struct ublk_queue *q) 657 { 658 return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q); 659 } 660 661 static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q, 662 struct io_uring_cqe *cqe) 663 { 664 unsigned tag = user_data_to_tag(cqe->user_data); 665 666 if (cqe->res < 0 && cqe->res != -EAGAIN) 667 ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n", 668 __func__, cqe->res, q->q_id, 669 user_data_to_tag(cqe->user_data), 670 user_data_to_op(cqe->user_data)); 671 672 if (q->tgt_ops->tgt_io_done) 673 q->tgt_ops->tgt_io_done(q, tag, cqe); 674 } 675 676 static void ublk_handle_cqe(struct io_uring *r, 677 struct io_uring_cqe *cqe, void *data) 678 { 679 struct ublk_queue *q = container_of(r, struct ublk_queue, ring); 680 unsigned tag = user_data_to_tag(cqe->user_data); 681 unsigned cmd_op = user_data_to_op(cqe->user_data); 682 int fetch = (cqe->res != UBLK_IO_RES_ABORT) && 683 !(q->state & UBLKSRV_QUEUE_STOPPING); 684 struct ublk_io *io; 685 686 if (cqe->res < 0 && cqe->res != -ENODEV) 687 ublk_err("%s: res %d userdata %llx queue state %x\n", __func__, 688 cqe->res, cqe->user_data, q->state); 689 690 ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d/%d) stopping %d\n", 691 __func__, cqe->res, q->q_id, tag, cmd_op, 692 is_target_io(cqe->user_data), 693 user_data_to_tgt_data(cqe->user_data), 694 (q->state & UBLKSRV_QUEUE_STOPPING)); 695 696 /* Don't retrieve io in case of target io */ 697 if (is_target_io(cqe->user_data)) { 698 ublksrv_handle_tgt_cqe(q, cqe); 699 return; 700 } 701 702 io = &q->ios[tag]; 703 q->cmd_inflight--; 704 705 if (!fetch) { 706 q->state |= UBLKSRV_QUEUE_STOPPING; 707 io->flags &= ~UBLKSRV_NEED_FETCH_RQ; 708 } 709 710 if (cqe->res == UBLK_IO_RES_OK) { 711 assert(tag < q->q_depth); 712 if (q->tgt_ops->queue_io) 713 q->tgt_ops->queue_io(q, tag); 714 } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { 715 io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE; 716 ublk_queue_io_cmd(q, io, tag); 717 } else { 718 /* 719 * COMMIT_REQ will be completed immediately since no fetching 720 * piggyback is required. 721 * 722 * Marking IO_FREE only, then this io won't be issued since 723 * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*) 724 * 725 * */ 726 io->flags = UBLKSRV_IO_FREE; 727 } 728 } 729 730 static int ublk_reap_events_uring(struct io_uring *r) 731 { 732 struct io_uring_cqe *cqe; 733 unsigned head; 734 int count = 0; 735 736 io_uring_for_each_cqe(r, head, cqe) { 737 ublk_handle_cqe(r, cqe, NULL); 738 count += 1; 739 } 740 io_uring_cq_advance(r, count); 741 742 return count; 743 } 744 745 static int ublk_process_io(struct ublk_queue *q) 746 { 747 int ret, reapped; 748 749 ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n", 750 q->dev->dev_info.dev_id, 751 q->q_id, io_uring_sq_ready(&q->ring), 752 q->cmd_inflight, 753 (q->state & UBLKSRV_QUEUE_STOPPING)); 754 755 if (ublk_queue_is_done(q)) 756 return -ENODEV; 757 758 ret = io_uring_submit_and_wait(&q->ring, 1); 759 reapped = ublk_reap_events_uring(&q->ring); 760 761 ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n", 762 ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING), 763 (q->state & UBLKSRV_QUEUE_IDLE)); 764 765 return reapped; 766 } 767 768 static void ublk_queue_set_sched_affinity(const struct ublk_queue *q, 769 cpu_set_t *cpuset) 770 { 771 if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0) 772 ublk_err("ublk dev %u queue %u set affinity failed", 773 q->dev->dev_info.dev_id, q->q_id); 774 } 775 776 struct ublk_queue_info { 777 struct ublk_queue *q; 778 sem_t *queue_sem; 779 cpu_set_t *affinity; 780 unsigned char auto_zc_fallback; 781 }; 782 783 static void *ublk_io_handler_fn(void *data) 784 { 785 struct ublk_queue_info *info = data; 786 struct ublk_queue *q = info->q; 787 int dev_id = q->dev->dev_info.dev_id; 788 unsigned extra_flags = 0; 789 int ret; 790 791 if (info->auto_zc_fallback) 792 extra_flags = UBLKSRV_AUTO_BUF_REG_FALLBACK; 793 794 ret = ublk_queue_init(q, extra_flags); 795 if (ret) { 796 ublk_err("ublk dev %d queue %d init queue failed\n", 797 dev_id, q->q_id); 798 return NULL; 799 } 800 /* IO perf is sensitive with queue pthread affinity on NUMA machine*/ 801 ublk_queue_set_sched_affinity(q, info->affinity); 802 sem_post(info->queue_sem); 803 804 ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n", 805 q->tid, dev_id, q->q_id); 806 807 /* submit all io commands to ublk driver */ 808 ublk_submit_fetch_commands(q); 809 do { 810 if (ublk_process_io(q) < 0) 811 break; 812 } while (1); 813 814 ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id); 815 ublk_queue_deinit(q); 816 return NULL; 817 } 818 819 static void ublk_set_parameters(struct ublk_dev *dev) 820 { 821 int ret; 822 823 ret = ublk_ctrl_set_params(dev, &dev->tgt.params); 824 if (ret) 825 ublk_err("dev %d set basic parameter failed %d\n", 826 dev->dev_info.dev_id, ret); 827 } 828 829 static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev, int dev_id) 830 { 831 uint64_t id; 832 int evtfd = ctx->_evtfd; 833 834 if (evtfd < 0) 835 return -EBADF; 836 837 if (dev_id >= 0) 838 id = dev_id + 1; 839 else 840 id = ERROR_EVTFD_DEVID; 841 842 if (dev && ctx->shadow_dev) 843 memcpy(&ctx->shadow_dev->q, &dev->q, sizeof(dev->q)); 844 845 if (write(evtfd, &id, sizeof(id)) != sizeof(id)) 846 return -EINVAL; 847 848 close(evtfd); 849 shmdt(ctx->shadow_dev); 850 851 return 0; 852 } 853 854 855 static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) 856 { 857 const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info; 858 struct ublk_queue_info *qinfo; 859 cpu_set_t *affinity_buf; 860 void *thread_ret; 861 sem_t queue_sem; 862 int ret, i; 863 864 ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__); 865 866 qinfo = (struct ublk_queue_info *)calloc(sizeof(struct ublk_queue_info), 867 dinfo->nr_hw_queues); 868 if (!qinfo) 869 return -ENOMEM; 870 871 sem_init(&queue_sem, 0, 0); 872 ret = ublk_dev_prep(ctx, dev); 873 if (ret) 874 return ret; 875 876 ret = ublk_ctrl_get_affinity(dev, &affinity_buf); 877 if (ret) 878 return ret; 879 880 for (i = 0; i < dinfo->nr_hw_queues; i++) { 881 dev->q[i].dev = dev; 882 dev->q[i].q_id = i; 883 884 qinfo[i].q = &dev->q[i]; 885 qinfo[i].queue_sem = &queue_sem; 886 qinfo[i].affinity = &affinity_buf[i]; 887 qinfo[i].auto_zc_fallback = ctx->auto_zc_fallback; 888 pthread_create(&dev->q[i].thread, NULL, 889 ublk_io_handler_fn, 890 &qinfo[i]); 891 } 892 893 for (i = 0; i < dinfo->nr_hw_queues; i++) 894 sem_wait(&queue_sem); 895 free(qinfo); 896 free(affinity_buf); 897 898 /* everything is fine now, start us */ 899 if (ctx->recovery) 900 ret = ublk_ctrl_end_user_recovery(dev, getpid()); 901 else { 902 ublk_set_parameters(dev); 903 ret = ublk_ctrl_start_dev(dev, getpid()); 904 } 905 if (ret < 0) { 906 ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret); 907 goto fail; 908 } 909 910 ublk_ctrl_get_info(dev); 911 if (ctx->fg) 912 ublk_ctrl_dump(dev); 913 else 914 ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id); 915 916 /* wait until we are terminated */ 917 for (i = 0; i < dinfo->nr_hw_queues; i++) 918 pthread_join(dev->q[i].thread, &thread_ret); 919 fail: 920 ublk_dev_unprep(dev); 921 ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__); 922 923 return ret; 924 } 925 926 static int wait_ublk_dev(const char *path, int evt_mask, unsigned timeout) 927 { 928 #define EV_SIZE (sizeof(struct inotify_event)) 929 #define EV_BUF_LEN (128 * (EV_SIZE + 16)) 930 struct pollfd pfd; 931 int fd, wd; 932 int ret = -EINVAL; 933 const char *dev_name = basename(path); 934 935 fd = inotify_init(); 936 if (fd < 0) { 937 ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__); 938 return fd; 939 } 940 941 wd = inotify_add_watch(fd, "/dev", evt_mask); 942 if (wd == -1) { 943 ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__); 944 goto fail; 945 } 946 947 pfd.fd = fd; 948 pfd.events = POLL_IN; 949 while (1) { 950 int i = 0; 951 char buffer[EV_BUF_LEN]; 952 ret = poll(&pfd, 1, 1000 * timeout); 953 954 if (ret == -1) { 955 ublk_err("%s: poll inotify failed: %d\n", __func__, ret); 956 goto rm_watch; 957 } else if (ret == 0) { 958 ublk_err("%s: poll inotify timeout\n", __func__); 959 ret = -ETIMEDOUT; 960 goto rm_watch; 961 } 962 963 ret = read(fd, buffer, EV_BUF_LEN); 964 if (ret < 0) { 965 ublk_err("%s: read inotify fd failed\n", __func__); 966 goto rm_watch; 967 } 968 969 while (i < ret) { 970 struct inotify_event *event = (struct inotify_event *)&buffer[i]; 971 972 ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n", 973 __func__, event->mask, event->name); 974 if (event->mask & evt_mask) { 975 if (!strcmp(event->name, dev_name)) { 976 ret = 0; 977 goto rm_watch; 978 } 979 } 980 i += EV_SIZE + event->len; 981 } 982 } 983 rm_watch: 984 inotify_rm_watch(fd, wd); 985 fail: 986 close(fd); 987 return ret; 988 } 989 990 static int ublk_stop_io_daemon(const struct ublk_dev *dev) 991 { 992 int daemon_pid = dev->dev_info.ublksrv_pid; 993 int dev_id = dev->dev_info.dev_id; 994 char ublkc[64]; 995 int ret = 0; 996 997 if (daemon_pid < 0) 998 return 0; 999 1000 /* daemon may be dead already */ 1001 if (kill(daemon_pid, 0) < 0) 1002 goto wait; 1003 1004 snprintf(ublkc, sizeof(ublkc), "/dev/%s%d", "ublkc", dev_id); 1005 1006 /* ublk char device may be gone already */ 1007 if (access(ublkc, F_OK) != 0) 1008 goto wait; 1009 1010 /* Wait until ublk char device is closed, when the daemon is shutdown */ 1011 ret = wait_ublk_dev(ublkc, IN_CLOSE, 10); 1012 /* double check and since it may be closed before starting inotify */ 1013 if (ret == -ETIMEDOUT) 1014 ret = kill(daemon_pid, 0) < 0; 1015 wait: 1016 waitpid(daemon_pid, NULL, 0); 1017 ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n", 1018 __func__, daemon_pid, dev_id, ret); 1019 1020 return ret; 1021 } 1022 1023 static int __cmd_dev_add(const struct dev_ctx *ctx) 1024 { 1025 unsigned nr_queues = ctx->nr_hw_queues; 1026 const char *tgt_type = ctx->tgt_type; 1027 unsigned depth = ctx->queue_depth; 1028 __u64 features; 1029 const struct ublk_tgt_ops *ops; 1030 struct ublksrv_ctrl_dev_info *info; 1031 struct ublk_dev *dev; 1032 int dev_id = ctx->dev_id; 1033 int ret, i; 1034 1035 ops = ublk_find_tgt(tgt_type); 1036 if (!ops) { 1037 ublk_err("%s: no such tgt type, type %s\n", 1038 __func__, tgt_type); 1039 return -ENODEV; 1040 } 1041 1042 if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) { 1043 ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n", 1044 __func__, nr_queues, depth); 1045 return -EINVAL; 1046 } 1047 1048 dev = ublk_ctrl_init(); 1049 if (!dev) { 1050 ublk_err("%s: can't alloc dev id %d, type %s\n", 1051 __func__, dev_id, tgt_type); 1052 return -ENOMEM; 1053 } 1054 1055 /* kernel doesn't support get_features */ 1056 ret = ublk_ctrl_get_features(dev, &features); 1057 if (ret < 0) 1058 return -EINVAL; 1059 1060 if (!(features & UBLK_F_CMD_IOCTL_ENCODE)) 1061 return -ENOTSUP; 1062 1063 info = &dev->dev_info; 1064 info->dev_id = ctx->dev_id; 1065 info->nr_hw_queues = nr_queues; 1066 info->queue_depth = depth; 1067 info->flags = ctx->flags; 1068 if ((features & UBLK_F_QUIESCE) && 1069 (info->flags & UBLK_F_USER_RECOVERY)) 1070 info->flags |= UBLK_F_QUIESCE; 1071 dev->tgt.ops = ops; 1072 dev->tgt.sq_depth = depth; 1073 dev->tgt.cq_depth = depth; 1074 1075 for (i = 0; i < MAX_BACK_FILES; i++) { 1076 if (ctx->files[i]) { 1077 strcpy(dev->tgt.backing_file[i], ctx->files[i]); 1078 dev->tgt.nr_backing_files++; 1079 } 1080 } 1081 1082 if (ctx->recovery) 1083 ret = ublk_ctrl_start_user_recovery(dev); 1084 else 1085 ret = ublk_ctrl_add_dev(dev); 1086 if (ret < 0) { 1087 ublk_err("%s: can't add dev id %d, type %s ret %d\n", 1088 __func__, dev_id, tgt_type, ret); 1089 goto fail; 1090 } 1091 1092 ret = ublk_start_daemon(ctx, dev); 1093 ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\b", ret); 1094 if (ret < 0) 1095 ublk_ctrl_del_dev(dev); 1096 1097 fail: 1098 if (ret < 0) 1099 ublk_send_dev_event(ctx, dev, -1); 1100 ublk_ctrl_deinit(dev); 1101 return ret; 1102 } 1103 1104 static int __cmd_dev_list(struct dev_ctx *ctx); 1105 1106 static int cmd_dev_add(struct dev_ctx *ctx) 1107 { 1108 int res; 1109 1110 if (ctx->fg) 1111 goto run; 1112 1113 ctx->_shmid = shmget(IPC_PRIVATE, sizeof(struct ublk_dev), IPC_CREAT | 0666); 1114 if (ctx->_shmid < 0) { 1115 ublk_err("%s: failed to shmget %s\n", __func__, strerror(errno)); 1116 exit(-1); 1117 } 1118 ctx->shadow_dev = (struct ublk_dev *)shmat(ctx->_shmid, NULL, 0); 1119 if (ctx->shadow_dev == (struct ublk_dev *)-1) { 1120 ublk_err("%s: failed to shmat %s\n", __func__, strerror(errno)); 1121 exit(-1); 1122 } 1123 ctx->_evtfd = eventfd(0, 0); 1124 if (ctx->_evtfd < 0) { 1125 ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno)); 1126 exit(-1); 1127 } 1128 1129 res = fork(); 1130 if (res == 0) { 1131 int res2; 1132 1133 setsid(); 1134 res2 = fork(); 1135 if (res2 == 0) { 1136 /* prepare for detaching */ 1137 close(STDIN_FILENO); 1138 close(STDOUT_FILENO); 1139 close(STDERR_FILENO); 1140 run: 1141 res = __cmd_dev_add(ctx); 1142 return res; 1143 } else { 1144 /* detached from the foreground task */ 1145 exit(EXIT_SUCCESS); 1146 } 1147 } else if (res > 0) { 1148 uint64_t id; 1149 int exit_code = EXIT_FAILURE; 1150 1151 res = read(ctx->_evtfd, &id, sizeof(id)); 1152 close(ctx->_evtfd); 1153 if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) { 1154 ctx->dev_id = id - 1; 1155 if (__cmd_dev_list(ctx) >= 0) 1156 exit_code = EXIT_SUCCESS; 1157 } 1158 shmdt(ctx->shadow_dev); 1159 shmctl(ctx->_shmid, IPC_RMID, NULL); 1160 /* wait for child and detach from it */ 1161 wait(NULL); 1162 exit(exit_code); 1163 } else { 1164 exit(EXIT_FAILURE); 1165 } 1166 } 1167 1168 static int __cmd_dev_del(struct dev_ctx *ctx) 1169 { 1170 int number = ctx->dev_id; 1171 struct ublk_dev *dev; 1172 int ret; 1173 1174 dev = ublk_ctrl_init(); 1175 dev->dev_info.dev_id = number; 1176 1177 ret = ublk_ctrl_get_info(dev); 1178 if (ret < 0) 1179 goto fail; 1180 1181 ret = ublk_ctrl_stop_dev(dev); 1182 if (ret < 0) 1183 ublk_err("%s: stop dev %d failed ret %d\n", __func__, number, ret); 1184 1185 ret = ublk_stop_io_daemon(dev); 1186 if (ret < 0) 1187 ublk_err("%s: stop daemon id %d dev %d, ret %d\n", 1188 __func__, dev->dev_info.ublksrv_pid, number, ret); 1189 ublk_ctrl_del_dev(dev); 1190 fail: 1191 ublk_ctrl_deinit(dev); 1192 1193 return (ret >= 0) ? 0 : ret; 1194 } 1195 1196 static int cmd_dev_del(struct dev_ctx *ctx) 1197 { 1198 int i; 1199 1200 if (ctx->dev_id >= 0 || !ctx->all) 1201 return __cmd_dev_del(ctx); 1202 1203 for (i = 0; i < 255; i++) { 1204 ctx->dev_id = i; 1205 __cmd_dev_del(ctx); 1206 } 1207 return 0; 1208 } 1209 1210 static int __cmd_dev_list(struct dev_ctx *ctx) 1211 { 1212 struct ublk_dev *dev = ublk_ctrl_init(); 1213 int ret; 1214 1215 if (!dev) 1216 return -ENODEV; 1217 1218 dev->dev_info.dev_id = ctx->dev_id; 1219 1220 ret = ublk_ctrl_get_info(dev); 1221 if (ret < 0) { 1222 if (ctx->logging) 1223 ublk_err("%s: can't get dev info from %d: %d\n", 1224 __func__, ctx->dev_id, ret); 1225 } else { 1226 if (ctx->shadow_dev) 1227 memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q)); 1228 1229 ublk_ctrl_dump(dev); 1230 } 1231 1232 ublk_ctrl_deinit(dev); 1233 1234 return ret; 1235 } 1236 1237 static int cmd_dev_list(struct dev_ctx *ctx) 1238 { 1239 int i; 1240 1241 if (ctx->dev_id >= 0 || !ctx->all) 1242 return __cmd_dev_list(ctx); 1243 1244 ctx->logging = false; 1245 for (i = 0; i < 255; i++) { 1246 ctx->dev_id = i; 1247 __cmd_dev_list(ctx); 1248 } 1249 return 0; 1250 } 1251 1252 static int cmd_dev_get_features(void) 1253 { 1254 #define const_ilog2(x) (63 - __builtin_clzll(x)) 1255 static const char *feat_map[] = { 1256 [const_ilog2(UBLK_F_SUPPORT_ZERO_COPY)] = "ZERO_COPY", 1257 [const_ilog2(UBLK_F_URING_CMD_COMP_IN_TASK)] = "COMP_IN_TASK", 1258 [const_ilog2(UBLK_F_NEED_GET_DATA)] = "GET_DATA", 1259 [const_ilog2(UBLK_F_USER_RECOVERY)] = "USER_RECOVERY", 1260 [const_ilog2(UBLK_F_USER_RECOVERY_REISSUE)] = "RECOVERY_REISSUE", 1261 [const_ilog2(UBLK_F_UNPRIVILEGED_DEV)] = "UNPRIVILEGED_DEV", 1262 [const_ilog2(UBLK_F_CMD_IOCTL_ENCODE)] = "CMD_IOCTL_ENCODE", 1263 [const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY", 1264 [const_ilog2(UBLK_F_ZONED)] = "ZONED", 1265 [const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO", 1266 [const_ilog2(UBLK_F_UPDATE_SIZE)] = "UPDATE_SIZE", 1267 [const_ilog2(UBLK_F_AUTO_BUF_REG)] = "AUTO_BUF_REG", 1268 [const_ilog2(UBLK_F_QUIESCE)] = "QUIESCE", 1269 }; 1270 struct ublk_dev *dev; 1271 __u64 features = 0; 1272 int ret; 1273 1274 dev = ublk_ctrl_init(); 1275 if (!dev) { 1276 fprintf(stderr, "ublksrv_ctrl_init failed id\n"); 1277 return -EOPNOTSUPP; 1278 } 1279 1280 ret = ublk_ctrl_get_features(dev, &features); 1281 if (!ret) { 1282 int i; 1283 1284 printf("ublk_drv features: 0x%llx\n", features); 1285 1286 for (i = 0; i < sizeof(features) * 8; i++) { 1287 const char *feat; 1288 1289 if (!((1ULL << i) & features)) 1290 continue; 1291 if (i < sizeof(feat_map) / sizeof(feat_map[0])) 1292 feat = feat_map[i]; 1293 else 1294 feat = "unknown"; 1295 printf("\t%-20s: 0x%llx\n", feat, 1ULL << i); 1296 } 1297 } 1298 1299 return ret; 1300 } 1301 1302 static int cmd_dev_update_size(struct dev_ctx *ctx) 1303 { 1304 struct ublk_dev *dev = ublk_ctrl_init(); 1305 struct ublk_params p; 1306 int ret = -EINVAL; 1307 1308 if (!dev) 1309 return -ENODEV; 1310 1311 if (ctx->dev_id < 0) { 1312 fprintf(stderr, "device id isn't provided\n"); 1313 goto out; 1314 } 1315 1316 dev->dev_info.dev_id = ctx->dev_id; 1317 ret = ublk_ctrl_get_params(dev, &p); 1318 if (ret < 0) { 1319 ublk_err("failed to get params %d %s\n", ret, strerror(-ret)); 1320 goto out; 1321 } 1322 1323 if (ctx->size & ((1 << p.basic.logical_bs_shift) - 1)) { 1324 ublk_err("size isn't aligned with logical block size\n"); 1325 ret = -EINVAL; 1326 goto out; 1327 } 1328 1329 ret = ublk_ctrl_update_size(dev, ctx->size >> 9); 1330 out: 1331 ublk_ctrl_deinit(dev); 1332 return ret; 1333 } 1334 1335 static int cmd_dev_quiesce(struct dev_ctx *ctx) 1336 { 1337 struct ublk_dev *dev = ublk_ctrl_init(); 1338 int ret = -EINVAL; 1339 1340 if (!dev) 1341 return -ENODEV; 1342 1343 if (ctx->dev_id < 0) { 1344 fprintf(stderr, "device id isn't provided for quiesce\n"); 1345 goto out; 1346 } 1347 dev->dev_info.dev_id = ctx->dev_id; 1348 ret = ublk_ctrl_quiesce_dev(dev, 10000); 1349 1350 out: 1351 ublk_ctrl_deinit(dev); 1352 return ret; 1353 } 1354 1355 static void __cmd_create_help(char *exe, bool recovery) 1356 { 1357 int i; 1358 1359 printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", 1360 exe, recovery ? "recover" : "add"); 1361 printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n"); 1362 printf("\t[-e 0|1 ] [-i 0|1]\n"); 1363 printf("\t[target options] [backfile1] [backfile2] ...\n"); 1364 printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); 1365 1366 for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) { 1367 const struct ublk_tgt_ops *ops = tgt_ops_list[i]; 1368 1369 if (ops->usage) 1370 ops->usage(ops); 1371 } 1372 } 1373 1374 static void cmd_add_help(char *exe) 1375 { 1376 __cmd_create_help(exe, false); 1377 printf("\n"); 1378 } 1379 1380 static void cmd_recover_help(char *exe) 1381 { 1382 __cmd_create_help(exe, true); 1383 printf("\tPlease provide exact command line for creating this device with real dev_id\n"); 1384 printf("\n"); 1385 } 1386 1387 static int cmd_dev_help(char *exe) 1388 { 1389 cmd_add_help(exe); 1390 cmd_recover_help(exe); 1391 1392 printf("%s del [-n dev_id] -a \n", exe); 1393 printf("\t -a delete all devices -n delete specified device\n\n"); 1394 printf("%s list [-n dev_id] -a \n", exe); 1395 printf("\t -a list all devices, -n list specified device, default -a \n\n"); 1396 printf("%s features\n", exe); 1397 printf("%s update_size -n dev_id -s|--size size_in_bytes \n", exe); 1398 printf("%s quiesce -n dev_id\n", exe); 1399 return 0; 1400 } 1401 1402 int main(int argc, char *argv[]) 1403 { 1404 static const struct option longopts[] = { 1405 { "all", 0, NULL, 'a' }, 1406 { "type", 1, NULL, 't' }, 1407 { "number", 1, NULL, 'n' }, 1408 { "queues", 1, NULL, 'q' }, 1409 { "depth", 1, NULL, 'd' }, 1410 { "debug_mask", 1, NULL, 0 }, 1411 { "quiet", 0, NULL, 0 }, 1412 { "zero_copy", 0, NULL, 'z' }, 1413 { "foreground", 0, NULL, 0 }, 1414 { "recovery", 1, NULL, 'r' }, 1415 { "recovery_fail_io", 1, NULL, 'e'}, 1416 { "recovery_reissue", 1, NULL, 'i'}, 1417 { "get_data", 1, NULL, 'g'}, 1418 { "auto_zc", 0, NULL, 0 }, 1419 { "auto_zc_fallback", 0, NULL, 0 }, 1420 { "size", 1, NULL, 's'}, 1421 { 0, 0, 0, 0 } 1422 }; 1423 const struct ublk_tgt_ops *ops = NULL; 1424 int option_idx, opt; 1425 const char *cmd = argv[1]; 1426 struct dev_ctx ctx = { 1427 .queue_depth = 128, 1428 .nr_hw_queues = 2, 1429 .dev_id = -1, 1430 .tgt_type = "unknown", 1431 }; 1432 int ret = -EINVAL, i; 1433 int tgt_argc = 1; 1434 char *tgt_argv[MAX_NR_TGT_ARG] = { NULL }; 1435 int value; 1436 1437 if (argc == 1) 1438 return ret; 1439 1440 opterr = 0; 1441 optind = 2; 1442 while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:s:gaz", 1443 longopts, &option_idx)) != -1) { 1444 switch (opt) { 1445 case 'a': 1446 ctx.all = 1; 1447 break; 1448 case 'n': 1449 ctx.dev_id = strtol(optarg, NULL, 10); 1450 break; 1451 case 't': 1452 if (strlen(optarg) < sizeof(ctx.tgt_type)) 1453 strcpy(ctx.tgt_type, optarg); 1454 break; 1455 case 'q': 1456 ctx.nr_hw_queues = strtol(optarg, NULL, 10); 1457 break; 1458 case 'd': 1459 ctx.queue_depth = strtol(optarg, NULL, 10); 1460 break; 1461 case 'z': 1462 ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY; 1463 break; 1464 case 'r': 1465 value = strtol(optarg, NULL, 10); 1466 if (value) 1467 ctx.flags |= UBLK_F_USER_RECOVERY; 1468 break; 1469 case 'e': 1470 value = strtol(optarg, NULL, 10); 1471 if (value) 1472 ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO; 1473 break; 1474 case 'i': 1475 value = strtol(optarg, NULL, 10); 1476 if (value) 1477 ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE; 1478 break; 1479 case 'g': 1480 ctx.flags |= UBLK_F_NEED_GET_DATA; 1481 break; 1482 case 's': 1483 ctx.size = strtoull(optarg, NULL, 10); 1484 break; 1485 case 0: 1486 if (!strcmp(longopts[option_idx].name, "debug_mask")) 1487 ublk_dbg_mask = strtol(optarg, NULL, 16); 1488 if (!strcmp(longopts[option_idx].name, "quiet")) 1489 ublk_dbg_mask = 0; 1490 if (!strcmp(longopts[option_idx].name, "foreground")) 1491 ctx.fg = 1; 1492 if (!strcmp(longopts[option_idx].name, "auto_zc")) 1493 ctx.flags |= UBLK_F_AUTO_BUF_REG; 1494 if (!strcmp(longopts[option_idx].name, "auto_zc_fallback")) 1495 ctx.auto_zc_fallback = 1; 1496 break; 1497 case '?': 1498 /* 1499 * target requires every option must have argument 1500 */ 1501 if (argv[optind][0] == '-' || argv[optind - 1][0] != '-') { 1502 fprintf(stderr, "every target option requires argument: %s %s\n", 1503 argv[optind - 1], argv[optind]); 1504 exit(EXIT_FAILURE); 1505 } 1506 1507 if (tgt_argc < (MAX_NR_TGT_ARG - 1) / 2) { 1508 tgt_argv[tgt_argc++] = argv[optind - 1]; 1509 tgt_argv[tgt_argc++] = argv[optind]; 1510 } else { 1511 fprintf(stderr, "too many target options\n"); 1512 exit(EXIT_FAILURE); 1513 } 1514 optind += 1; 1515 break; 1516 } 1517 } 1518 1519 /* auto_zc_fallback depends on F_AUTO_BUF_REG & F_SUPPORT_ZERO_COPY */ 1520 if (ctx.auto_zc_fallback && 1521 !((ctx.flags & UBLK_F_AUTO_BUF_REG) && 1522 (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY))) { 1523 ublk_err("%s: auto_zc_fallback is set but neither " 1524 "F_AUTO_BUF_REG nor F_SUPPORT_ZERO_COPY is enabled\n", 1525 __func__); 1526 return -EINVAL; 1527 } 1528 1529 i = optind; 1530 while (i < argc && ctx.nr_files < MAX_BACK_FILES) { 1531 ctx.files[ctx.nr_files++] = argv[i++]; 1532 } 1533 1534 ops = ublk_find_tgt(ctx.tgt_type); 1535 if (ops && ops->parse_cmd_line) { 1536 optind = 0; 1537 1538 tgt_argv[0] = ctx.tgt_type; 1539 ops->parse_cmd_line(&ctx, tgt_argc, tgt_argv); 1540 } 1541 1542 if (!strcmp(cmd, "add")) 1543 ret = cmd_dev_add(&ctx); 1544 else if (!strcmp(cmd, "recover")) { 1545 if (ctx.dev_id < 0) { 1546 fprintf(stderr, "device id isn't provided for recovering\n"); 1547 ret = -EINVAL; 1548 } else { 1549 ctx.recovery = 1; 1550 ret = cmd_dev_add(&ctx); 1551 } 1552 } else if (!strcmp(cmd, "del")) 1553 ret = cmd_dev_del(&ctx); 1554 else if (!strcmp(cmd, "list")) { 1555 ctx.all = 1; 1556 ret = cmd_dev_list(&ctx); 1557 } else if (!strcmp(cmd, "help")) 1558 ret = cmd_dev_help(argv[0]); 1559 else if (!strcmp(cmd, "features")) 1560 ret = cmd_dev_get_features(); 1561 else if (!strcmp(cmd, "update_size")) 1562 ret = cmd_dev_update_size(&ctx); 1563 else if (!strcmp(cmd, "quiesce")) 1564 ret = cmd_dev_quiesce(&ctx); 1565 else 1566 cmd_dev_help(argv[0]); 1567 1568 return ret; 1569 } 1570