1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Description: uring_cmd based ublk 4 */ 5 6 #include "kublk.h" 7 8 unsigned int ublk_dbg_mask = UBLK_LOG; 9 static const struct ublk_tgt_ops *tgt_ops_list[] = { 10 &null_tgt_ops, 11 &loop_tgt_ops, 12 &stripe_tgt_ops, 13 }; 14 15 static const struct ublk_tgt_ops *ublk_find_tgt(const char *name) 16 { 17 const struct ublk_tgt_ops *ops; 18 int i; 19 20 if (name == NULL) 21 return NULL; 22 23 for (i = 0; sizeof(tgt_ops_list) / sizeof(ops); i++) 24 if (strcmp(tgt_ops_list[i]->name, name) == 0) 25 return tgt_ops_list[i]; 26 return NULL; 27 } 28 29 static inline int ublk_setup_ring(struct io_uring *r, int depth, 30 int cq_depth, unsigned flags) 31 { 32 struct io_uring_params p; 33 34 memset(&p, 0, sizeof(p)); 35 p.flags = flags | IORING_SETUP_CQSIZE; 36 p.cq_entries = cq_depth; 37 38 return io_uring_queue_init_params(depth, r, &p); 39 } 40 41 static void ublk_ctrl_init_cmd(struct ublk_dev *dev, 42 struct io_uring_sqe *sqe, 43 struct ublk_ctrl_cmd_data *data) 44 { 45 struct ublksrv_ctrl_dev_info *info = &dev->dev_info; 46 struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe); 47 48 sqe->fd = dev->ctrl_fd; 49 sqe->opcode = IORING_OP_URING_CMD; 50 sqe->ioprio = 0; 51 52 if (data->flags & CTRL_CMD_HAS_BUF) { 53 cmd->addr = data->addr; 54 cmd->len = data->len; 55 } 56 57 if (data->flags & CTRL_CMD_HAS_DATA) 58 cmd->data[0] = data->data[0]; 59 60 cmd->dev_id = info->dev_id; 61 cmd->queue_id = -1; 62 63 ublk_set_sqe_cmd_op(sqe, data->cmd_op); 64 65 io_uring_sqe_set_data(sqe, cmd); 66 } 67 68 static int __ublk_ctrl_cmd(struct ublk_dev *dev, 69 struct ublk_ctrl_cmd_data *data) 70 { 71 struct io_uring_sqe *sqe; 72 struct io_uring_cqe *cqe; 73 int ret = -EINVAL; 74 75 sqe = io_uring_get_sqe(&dev->ring); 76 if (!sqe) { 77 ublk_err("%s: can't get sqe ret %d\n", __func__, ret); 78 return ret; 79 } 80 81 ublk_ctrl_init_cmd(dev, sqe, data); 82 83 ret = io_uring_submit(&dev->ring); 84 if (ret < 0) { 85 ublk_err("uring submit ret %d\n", ret); 86 return ret; 87 } 88 89 ret = io_uring_wait_cqe(&dev->ring, &cqe); 90 if (ret < 0) { 91 ublk_err("wait cqe: %s\n", strerror(-ret)); 92 return ret; 93 } 94 io_uring_cqe_seen(&dev->ring, cqe); 95 96 return cqe->res; 97 } 98 99 static int ublk_ctrl_stop_dev(struct ublk_dev *dev) 100 { 101 struct ublk_ctrl_cmd_data data = { 102 .cmd_op = UBLK_U_CMD_STOP_DEV, 103 }; 104 105 return __ublk_ctrl_cmd(dev, &data); 106 } 107 108 static int ublk_ctrl_start_dev(struct ublk_dev *dev, 109 int daemon_pid) 110 { 111 struct ublk_ctrl_cmd_data data = { 112 .cmd_op = UBLK_U_CMD_START_DEV, 113 .flags = CTRL_CMD_HAS_DATA, 114 }; 115 116 dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; 117 118 return __ublk_ctrl_cmd(dev, &data); 119 } 120 121 static int ublk_ctrl_add_dev(struct ublk_dev *dev) 122 { 123 struct ublk_ctrl_cmd_data data = { 124 .cmd_op = UBLK_U_CMD_ADD_DEV, 125 .flags = CTRL_CMD_HAS_BUF, 126 .addr = (__u64) (uintptr_t) &dev->dev_info, 127 .len = sizeof(struct ublksrv_ctrl_dev_info), 128 }; 129 130 return __ublk_ctrl_cmd(dev, &data); 131 } 132 133 static int ublk_ctrl_del_dev(struct ublk_dev *dev) 134 { 135 struct ublk_ctrl_cmd_data data = { 136 .cmd_op = UBLK_U_CMD_DEL_DEV, 137 .flags = 0, 138 }; 139 140 return __ublk_ctrl_cmd(dev, &data); 141 } 142 143 static int ublk_ctrl_get_info(struct ublk_dev *dev) 144 { 145 struct ublk_ctrl_cmd_data data = { 146 .cmd_op = UBLK_U_CMD_GET_DEV_INFO, 147 .flags = CTRL_CMD_HAS_BUF, 148 .addr = (__u64) (uintptr_t) &dev->dev_info, 149 .len = sizeof(struct ublksrv_ctrl_dev_info), 150 }; 151 152 return __ublk_ctrl_cmd(dev, &data); 153 } 154 155 static int ublk_ctrl_set_params(struct ublk_dev *dev, 156 struct ublk_params *params) 157 { 158 struct ublk_ctrl_cmd_data data = { 159 .cmd_op = UBLK_U_CMD_SET_PARAMS, 160 .flags = CTRL_CMD_HAS_BUF, 161 .addr = (__u64) (uintptr_t) params, 162 .len = sizeof(*params), 163 }; 164 params->len = sizeof(*params); 165 return __ublk_ctrl_cmd(dev, &data); 166 } 167 168 static int ublk_ctrl_get_params(struct ublk_dev *dev, 169 struct ublk_params *params) 170 { 171 struct ublk_ctrl_cmd_data data = { 172 .cmd_op = UBLK_U_CMD_GET_PARAMS, 173 .flags = CTRL_CMD_HAS_BUF, 174 .addr = (__u64)params, 175 .len = sizeof(*params), 176 }; 177 178 params->len = sizeof(*params); 179 180 return __ublk_ctrl_cmd(dev, &data); 181 } 182 183 static int ublk_ctrl_get_features(struct ublk_dev *dev, 184 __u64 *features) 185 { 186 struct ublk_ctrl_cmd_data data = { 187 .cmd_op = UBLK_U_CMD_GET_FEATURES, 188 .flags = CTRL_CMD_HAS_BUF, 189 .addr = (__u64) (uintptr_t) features, 190 .len = sizeof(*features), 191 }; 192 193 return __ublk_ctrl_cmd(dev, &data); 194 } 195 196 static const char *ublk_dev_state_desc(struct ublk_dev *dev) 197 { 198 switch (dev->dev_info.state) { 199 case UBLK_S_DEV_DEAD: 200 return "DEAD"; 201 case UBLK_S_DEV_LIVE: 202 return "LIVE"; 203 case UBLK_S_DEV_QUIESCED: 204 return "QUIESCED"; 205 default: 206 return "UNKNOWN"; 207 }; 208 } 209 210 static void ublk_ctrl_dump(struct ublk_dev *dev) 211 { 212 struct ublksrv_ctrl_dev_info *info = &dev->dev_info; 213 struct ublk_params p; 214 int ret; 215 216 ret = ublk_ctrl_get_params(dev, &p); 217 if (ret < 0) { 218 ublk_err("failed to get params %d %s\n", ret, strerror(-ret)); 219 return; 220 } 221 222 ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n", 223 info->dev_id, info->nr_hw_queues, info->queue_depth, 224 1 << p.basic.logical_bs_shift, p.basic.dev_sectors); 225 ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n", 226 info->max_io_buf_bytes, info->ublksrv_pid, info->flags, 227 ublk_dev_state_desc(dev)); 228 fflush(stdout); 229 } 230 231 static void ublk_ctrl_deinit(struct ublk_dev *dev) 232 { 233 close(dev->ctrl_fd); 234 free(dev); 235 } 236 237 static struct ublk_dev *ublk_ctrl_init(void) 238 { 239 struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev)); 240 struct ublksrv_ctrl_dev_info *info = &dev->dev_info; 241 int ret; 242 243 dev->ctrl_fd = open(CTRL_DEV, O_RDWR); 244 if (dev->ctrl_fd < 0) { 245 free(dev); 246 return NULL; 247 } 248 249 info->max_io_buf_bytes = UBLK_IO_MAX_BYTES; 250 251 ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH, 252 UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128); 253 if (ret < 0) { 254 ublk_err("queue_init: %s\n", strerror(-ret)); 255 free(dev); 256 return NULL; 257 } 258 dev->nr_fds = 1; 259 260 return dev; 261 } 262 263 static int __ublk_queue_cmd_buf_sz(unsigned depth) 264 { 265 int size = depth * sizeof(struct ublksrv_io_desc); 266 unsigned int page_sz = getpagesize(); 267 268 return round_up(size, page_sz); 269 } 270 271 static int ublk_queue_max_cmd_buf_sz(void) 272 { 273 return __ublk_queue_cmd_buf_sz(UBLK_MAX_QUEUE_DEPTH); 274 } 275 276 static int ublk_queue_cmd_buf_sz(struct ublk_queue *q) 277 { 278 return __ublk_queue_cmd_buf_sz(q->q_depth); 279 } 280 281 static void ublk_queue_deinit(struct ublk_queue *q) 282 { 283 int i; 284 int nr_ios = q->q_depth; 285 286 io_uring_unregister_buffers(&q->ring); 287 288 io_uring_unregister_ring_fd(&q->ring); 289 290 if (q->ring.ring_fd > 0) { 291 io_uring_unregister_files(&q->ring); 292 close(q->ring.ring_fd); 293 q->ring.ring_fd = -1; 294 } 295 296 if (q->io_cmd_buf) 297 munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q)); 298 299 for (i = 0; i < nr_ios; i++) 300 free(q->ios[i].buf_addr); 301 } 302 303 static int ublk_queue_init(struct ublk_queue *q) 304 { 305 struct ublk_dev *dev = q->dev; 306 int depth = dev->dev_info.queue_depth; 307 int i, ret = -1; 308 int cmd_buf_size, io_buf_size; 309 unsigned long off; 310 int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth; 311 312 q->tgt_ops = dev->tgt.ops; 313 q->state = 0; 314 q->q_depth = depth; 315 q->cmd_inflight = 0; 316 q->tid = gettid(); 317 318 if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) { 319 q->state |= UBLKSRV_NO_BUF; 320 q->state |= UBLKSRV_ZC; 321 } 322 323 cmd_buf_size = ublk_queue_cmd_buf_sz(q); 324 off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); 325 q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ, 326 MAP_SHARED | MAP_POPULATE, dev->fds[0], off); 327 if (q->io_cmd_buf == MAP_FAILED) { 328 ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n", 329 q->dev->dev_info.dev_id, q->q_id); 330 goto fail; 331 } 332 333 io_buf_size = dev->dev_info.max_io_buf_bytes; 334 for (i = 0; i < q->q_depth; i++) { 335 q->ios[i].buf_addr = NULL; 336 q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE; 337 338 if (q->state & UBLKSRV_NO_BUF) 339 continue; 340 341 if (posix_memalign((void **)&q->ios[i].buf_addr, 342 getpagesize(), io_buf_size)) { 343 ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n", 344 dev->dev_info.dev_id, q->q_id, i); 345 goto fail; 346 } 347 } 348 349 ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth, 350 IORING_SETUP_COOP_TASKRUN); 351 if (ret < 0) { 352 ublk_err("ublk dev %d queue %d setup io_uring failed %d\n", 353 q->dev->dev_info.dev_id, q->q_id, ret); 354 goto fail; 355 } 356 357 if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) { 358 ret = io_uring_register_buffers_sparse(&q->ring, q->q_depth); 359 if (ret) { 360 ublk_err("ublk dev %d queue %d register spare buffers failed %d", 361 dev->dev_info.dev_id, q->q_id, ret); 362 goto fail; 363 } 364 } 365 366 io_uring_register_ring_fd(&q->ring); 367 368 ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds); 369 if (ret) { 370 ublk_err("ublk dev %d queue %d register files failed %d\n", 371 q->dev->dev_info.dev_id, q->q_id, ret); 372 goto fail; 373 } 374 375 return 0; 376 fail: 377 ublk_queue_deinit(q); 378 ublk_err("ublk dev %d queue %d failed\n", 379 dev->dev_info.dev_id, q->q_id); 380 return -ENOMEM; 381 } 382 383 #define WAIT_USEC 100000 384 #define MAX_WAIT_USEC (3 * 1000000) 385 static int ublk_dev_prep(const struct dev_ctx *ctx, struct ublk_dev *dev) 386 { 387 int dev_id = dev->dev_info.dev_id; 388 unsigned int wait_usec = 0; 389 int ret = 0, fd = -1; 390 char buf[64]; 391 392 snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id); 393 394 while (wait_usec < MAX_WAIT_USEC) { 395 fd = open(buf, O_RDWR); 396 if (fd >= 0) 397 break; 398 usleep(WAIT_USEC); 399 wait_usec += WAIT_USEC; 400 } 401 if (fd < 0) { 402 ublk_err("can't open %s %s\n", buf, strerror(errno)); 403 return -1; 404 } 405 406 dev->fds[0] = fd; 407 if (dev->tgt.ops->init_tgt) 408 ret = dev->tgt.ops->init_tgt(ctx, dev); 409 if (ret) 410 close(dev->fds[0]); 411 return ret; 412 } 413 414 static void ublk_dev_unprep(struct ublk_dev *dev) 415 { 416 if (dev->tgt.ops->deinit_tgt) 417 dev->tgt.ops->deinit_tgt(dev); 418 close(dev->fds[0]); 419 } 420 421 int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag) 422 { 423 struct ublksrv_io_cmd *cmd; 424 struct io_uring_sqe *sqe[1]; 425 unsigned int cmd_op = 0; 426 __u64 user_data; 427 428 /* only freed io can be issued */ 429 if (!(io->flags & UBLKSRV_IO_FREE)) 430 return 0; 431 432 /* we issue because we need either fetching or committing */ 433 if (!(io->flags & 434 (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP))) 435 return 0; 436 437 if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) 438 cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ; 439 else if (io->flags & UBLKSRV_NEED_FETCH_RQ) 440 cmd_op = UBLK_U_IO_FETCH_REQ; 441 442 if (io_uring_sq_space_left(&q->ring) < 1) 443 io_uring_submit(&q->ring); 444 445 ublk_queue_alloc_sqes(q, sqe, 1); 446 if (!sqe[0]) { 447 ublk_err("%s: run out of sqe %d, tag %d\n", 448 __func__, q->q_id, tag); 449 return -1; 450 } 451 452 cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe[0]); 453 454 if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ) 455 cmd->result = io->result; 456 457 /* These fields should be written once, never change */ 458 ublk_set_sqe_cmd_op(sqe[0], cmd_op); 459 sqe[0]->fd = 0; /* dev->fds[0] */ 460 sqe[0]->opcode = IORING_OP_URING_CMD; 461 sqe[0]->flags = IOSQE_FIXED_FILE; 462 sqe[0]->rw_flags = 0; 463 cmd->tag = tag; 464 cmd->q_id = q->q_id; 465 if (!(q->state & UBLKSRV_NO_BUF)) 466 cmd->addr = (__u64) (uintptr_t) io->buf_addr; 467 else 468 cmd->addr = 0; 469 470 user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0); 471 io_uring_sqe_set_data64(sqe[0], user_data); 472 473 io->flags = 0; 474 475 q->cmd_inflight += 1; 476 477 ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n", 478 __func__, q->q_id, tag, cmd_op, 479 io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING)); 480 return 1; 481 } 482 483 static void ublk_submit_fetch_commands(struct ublk_queue *q) 484 { 485 int i = 0; 486 487 for (i = 0; i < q->q_depth; i++) 488 ublk_queue_io_cmd(q, &q->ios[i], i); 489 } 490 491 static int ublk_queue_is_idle(struct ublk_queue *q) 492 { 493 return !io_uring_sq_ready(&q->ring) && !q->io_inflight; 494 } 495 496 static int ublk_queue_is_done(struct ublk_queue *q) 497 { 498 return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q); 499 } 500 501 static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q, 502 struct io_uring_cqe *cqe) 503 { 504 unsigned tag = user_data_to_tag(cqe->user_data); 505 506 if (cqe->res < 0 && cqe->res != -EAGAIN) 507 ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n", 508 __func__, cqe->res, q->q_id, 509 user_data_to_tag(cqe->user_data), 510 user_data_to_op(cqe->user_data)); 511 512 if (q->tgt_ops->tgt_io_done) 513 q->tgt_ops->tgt_io_done(q, tag, cqe); 514 } 515 516 static void ublk_handle_cqe(struct io_uring *r, 517 struct io_uring_cqe *cqe, void *data) 518 { 519 struct ublk_queue *q = container_of(r, struct ublk_queue, ring); 520 unsigned tag = user_data_to_tag(cqe->user_data); 521 unsigned cmd_op = user_data_to_op(cqe->user_data); 522 int fetch = (cqe->res != UBLK_IO_RES_ABORT) && 523 !(q->state & UBLKSRV_QUEUE_STOPPING); 524 struct ublk_io *io; 525 526 if (cqe->res < 0 && cqe->res != -ENODEV) 527 ublk_err("%s: res %d userdata %llx queue state %x\n", __func__, 528 cqe->res, cqe->user_data, q->state); 529 530 ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d/%d) stopping %d\n", 531 __func__, cqe->res, q->q_id, tag, cmd_op, 532 is_target_io(cqe->user_data), 533 user_data_to_tgt_data(cqe->user_data), 534 (q->state & UBLKSRV_QUEUE_STOPPING)); 535 536 /* Don't retrieve io in case of target io */ 537 if (is_target_io(cqe->user_data)) { 538 ublksrv_handle_tgt_cqe(q, cqe); 539 return; 540 } 541 542 io = &q->ios[tag]; 543 q->cmd_inflight--; 544 545 if (!fetch) { 546 q->state |= UBLKSRV_QUEUE_STOPPING; 547 io->flags &= ~UBLKSRV_NEED_FETCH_RQ; 548 } 549 550 if (cqe->res == UBLK_IO_RES_OK) { 551 assert(tag < q->q_depth); 552 if (q->tgt_ops->queue_io) 553 q->tgt_ops->queue_io(q, tag); 554 } else { 555 /* 556 * COMMIT_REQ will be completed immediately since no fetching 557 * piggyback is required. 558 * 559 * Marking IO_FREE only, then this io won't be issued since 560 * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*) 561 * 562 * */ 563 io->flags = UBLKSRV_IO_FREE; 564 } 565 } 566 567 static int ublk_reap_events_uring(struct io_uring *r) 568 { 569 struct io_uring_cqe *cqe; 570 unsigned head; 571 int count = 0; 572 573 io_uring_for_each_cqe(r, head, cqe) { 574 ublk_handle_cqe(r, cqe, NULL); 575 count += 1; 576 } 577 io_uring_cq_advance(r, count); 578 579 return count; 580 } 581 582 static int ublk_process_io(struct ublk_queue *q) 583 { 584 int ret, reapped; 585 586 ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n", 587 q->dev->dev_info.dev_id, 588 q->q_id, io_uring_sq_ready(&q->ring), 589 q->cmd_inflight, 590 (q->state & UBLKSRV_QUEUE_STOPPING)); 591 592 if (ublk_queue_is_done(q)) 593 return -ENODEV; 594 595 ret = io_uring_submit_and_wait(&q->ring, 1); 596 reapped = ublk_reap_events_uring(&q->ring); 597 598 ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n", 599 ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING), 600 (q->state & UBLKSRV_QUEUE_IDLE)); 601 602 return reapped; 603 } 604 605 static void *ublk_io_handler_fn(void *data) 606 { 607 struct ublk_queue *q = data; 608 int dev_id = q->dev->dev_info.dev_id; 609 int ret; 610 611 ret = ublk_queue_init(q); 612 if (ret) { 613 ublk_err("ublk dev %d queue %d init queue failed\n", 614 dev_id, q->q_id); 615 return NULL; 616 } 617 ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n", 618 q->tid, dev_id, q->q_id); 619 620 /* submit all io commands to ublk driver */ 621 ublk_submit_fetch_commands(q); 622 do { 623 if (ublk_process_io(q) < 0) 624 break; 625 } while (1); 626 627 ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id); 628 ublk_queue_deinit(q); 629 return NULL; 630 } 631 632 static void ublk_set_parameters(struct ublk_dev *dev) 633 { 634 int ret; 635 636 ret = ublk_ctrl_set_params(dev, &dev->tgt.params); 637 if (ret) 638 ublk_err("dev %d set basic parameter failed %d\n", 639 dev->dev_info.dev_id, ret); 640 } 641 642 static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id) 643 { 644 uint64_t id; 645 int evtfd = ctx->_evtfd; 646 647 if (evtfd < 0) 648 return -EBADF; 649 650 if (dev_id >= 0) 651 id = dev_id + 1; 652 else 653 id = ERROR_EVTFD_DEVID; 654 655 if (write(evtfd, &id, sizeof(id)) != sizeof(id)) 656 return -EINVAL; 657 658 return 0; 659 } 660 661 662 static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) 663 { 664 int ret, i; 665 void *thread_ret; 666 const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info; 667 668 ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__); 669 670 ret = ublk_dev_prep(ctx, dev); 671 if (ret) 672 return ret; 673 674 for (i = 0; i < dinfo->nr_hw_queues; i++) { 675 dev->q[i].dev = dev; 676 dev->q[i].q_id = i; 677 pthread_create(&dev->q[i].thread, NULL, 678 ublk_io_handler_fn, 679 &dev->q[i]); 680 } 681 682 /* everything is fine now, start us */ 683 ublk_set_parameters(dev); 684 ret = ublk_ctrl_start_dev(dev, getpid()); 685 if (ret < 0) { 686 ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret); 687 goto fail; 688 } 689 690 ublk_ctrl_get_info(dev); 691 if (ctx->fg) 692 ublk_ctrl_dump(dev); 693 else 694 ublk_send_dev_event(ctx, dev->dev_info.dev_id); 695 696 /* wait until we are terminated */ 697 for (i = 0; i < dinfo->nr_hw_queues; i++) 698 pthread_join(dev->q[i].thread, &thread_ret); 699 fail: 700 ublk_dev_unprep(dev); 701 ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__); 702 703 return ret; 704 } 705 706 static int wait_ublk_dev(const char *path, int evt_mask, unsigned timeout) 707 { 708 #define EV_SIZE (sizeof(struct inotify_event)) 709 #define EV_BUF_LEN (128 * (EV_SIZE + 16)) 710 struct pollfd pfd; 711 int fd, wd; 712 int ret = -EINVAL; 713 const char *dev_name = basename(path); 714 715 fd = inotify_init(); 716 if (fd < 0) { 717 ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__); 718 return fd; 719 } 720 721 wd = inotify_add_watch(fd, "/dev", evt_mask); 722 if (wd == -1) { 723 ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__); 724 goto fail; 725 } 726 727 pfd.fd = fd; 728 pfd.events = POLL_IN; 729 while (1) { 730 int i = 0; 731 char buffer[EV_BUF_LEN]; 732 ret = poll(&pfd, 1, 1000 * timeout); 733 734 if (ret == -1) { 735 ublk_err("%s: poll inotify failed: %d\n", __func__, ret); 736 goto rm_watch; 737 } else if (ret == 0) { 738 ublk_err("%s: poll inotify timeout\n", __func__); 739 ret = -ETIMEDOUT; 740 goto rm_watch; 741 } 742 743 ret = read(fd, buffer, EV_BUF_LEN); 744 if (ret < 0) { 745 ublk_err("%s: read inotify fd failed\n", __func__); 746 goto rm_watch; 747 } 748 749 while (i < ret) { 750 struct inotify_event *event = (struct inotify_event *)&buffer[i]; 751 752 ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n", 753 __func__, event->mask, event->name); 754 if (event->mask & evt_mask) { 755 if (!strcmp(event->name, dev_name)) { 756 ret = 0; 757 goto rm_watch; 758 } 759 } 760 i += EV_SIZE + event->len; 761 } 762 } 763 rm_watch: 764 inotify_rm_watch(fd, wd); 765 fail: 766 close(fd); 767 return ret; 768 } 769 770 static int ublk_stop_io_daemon(const struct ublk_dev *dev) 771 { 772 int daemon_pid = dev->dev_info.ublksrv_pid; 773 int dev_id = dev->dev_info.dev_id; 774 char ublkc[64]; 775 int ret = 0; 776 777 if (daemon_pid < 0) 778 return 0; 779 780 /* daemon may be dead already */ 781 if (kill(daemon_pid, 0) < 0) 782 goto wait; 783 784 snprintf(ublkc, sizeof(ublkc), "/dev/%s%d", "ublkc", dev_id); 785 786 /* ublk char device may be gone already */ 787 if (access(ublkc, F_OK) != 0) 788 goto wait; 789 790 /* Wait until ublk char device is closed, when the daemon is shutdown */ 791 ret = wait_ublk_dev(ublkc, IN_CLOSE, 10); 792 /* double check and since it may be closed before starting inotify */ 793 if (ret == -ETIMEDOUT) 794 ret = kill(daemon_pid, 0) < 0; 795 wait: 796 waitpid(daemon_pid, NULL, 0); 797 ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n", 798 __func__, daemon_pid, dev_id, ret); 799 800 return ret; 801 } 802 803 static int __cmd_dev_add(const struct dev_ctx *ctx) 804 { 805 unsigned nr_queues = ctx->nr_hw_queues; 806 const char *tgt_type = ctx->tgt_type; 807 unsigned depth = ctx->queue_depth; 808 __u64 features; 809 const struct ublk_tgt_ops *ops; 810 struct ublksrv_ctrl_dev_info *info; 811 struct ublk_dev *dev; 812 int dev_id = ctx->dev_id; 813 int ret, i; 814 815 ops = ublk_find_tgt(tgt_type); 816 if (!ops) { 817 ublk_err("%s: no such tgt type, type %s\n", 818 __func__, tgt_type); 819 return -ENODEV; 820 } 821 822 if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) { 823 ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n", 824 __func__, nr_queues, depth); 825 return -EINVAL; 826 } 827 828 dev = ublk_ctrl_init(); 829 if (!dev) { 830 ublk_err("%s: can't alloc dev id %d, type %s\n", 831 __func__, dev_id, tgt_type); 832 return -ENOMEM; 833 } 834 835 /* kernel doesn't support get_features */ 836 ret = ublk_ctrl_get_features(dev, &features); 837 if (ret < 0) 838 return -EINVAL; 839 840 if (!(features & UBLK_F_CMD_IOCTL_ENCODE)) 841 return -ENOTSUP; 842 843 info = &dev->dev_info; 844 info->dev_id = ctx->dev_id; 845 info->nr_hw_queues = nr_queues; 846 info->queue_depth = depth; 847 info->flags = ctx->flags; 848 dev->tgt.ops = ops; 849 dev->tgt.sq_depth = depth; 850 dev->tgt.cq_depth = depth; 851 852 for (i = 0; i < MAX_BACK_FILES; i++) { 853 if (ctx->files[i]) { 854 strcpy(dev->tgt.backing_file[i], ctx->files[i]); 855 dev->tgt.nr_backing_files++; 856 } 857 } 858 859 ret = ublk_ctrl_add_dev(dev); 860 if (ret < 0) { 861 ublk_err("%s: can't add dev id %d, type %s ret %d\n", 862 __func__, dev_id, tgt_type, ret); 863 goto fail; 864 } 865 866 ret = ublk_start_daemon(ctx, dev); 867 ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\b", ret); 868 if (ret < 0) 869 ublk_ctrl_del_dev(dev); 870 871 fail: 872 if (ret < 0) 873 ublk_send_dev_event(ctx, -1); 874 ublk_ctrl_deinit(dev); 875 return ret; 876 } 877 878 static int __cmd_dev_list(struct dev_ctx *ctx); 879 880 static int cmd_dev_add(struct dev_ctx *ctx) 881 { 882 int res; 883 884 if (ctx->fg) 885 goto run; 886 887 ctx->_evtfd = eventfd(0, 0); 888 if (ctx->_evtfd < 0) { 889 ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno)); 890 exit(-1); 891 } 892 893 setsid(); 894 res = fork(); 895 if (res == 0) { 896 run: 897 res = __cmd_dev_add(ctx); 898 return res; 899 } else if (res > 0) { 900 uint64_t id; 901 902 res = read(ctx->_evtfd, &id, sizeof(id)); 903 close(ctx->_evtfd); 904 if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) { 905 ctx->dev_id = id - 1; 906 return __cmd_dev_list(ctx); 907 } 908 exit(EXIT_FAILURE); 909 } else { 910 return res; 911 } 912 } 913 914 static int __cmd_dev_del(struct dev_ctx *ctx) 915 { 916 int number = ctx->dev_id; 917 struct ublk_dev *dev; 918 int ret; 919 920 dev = ublk_ctrl_init(); 921 dev->dev_info.dev_id = number; 922 923 ret = ublk_ctrl_get_info(dev); 924 if (ret < 0) 925 goto fail; 926 927 ret = ublk_ctrl_stop_dev(dev); 928 if (ret < 0) 929 ublk_err("%s: stop dev %d failed ret %d\n", __func__, number, ret); 930 931 ret = ublk_stop_io_daemon(dev); 932 if (ret < 0) 933 ublk_err("%s: stop daemon id %d dev %d, ret %d\n", 934 __func__, dev->dev_info.ublksrv_pid, number, ret); 935 ublk_ctrl_del_dev(dev); 936 fail: 937 ublk_ctrl_deinit(dev); 938 939 return (ret >= 0) ? 0 : ret; 940 } 941 942 static int cmd_dev_del(struct dev_ctx *ctx) 943 { 944 int i; 945 946 if (ctx->dev_id >= 0 || !ctx->all) 947 return __cmd_dev_del(ctx); 948 949 for (i = 0; i < 255; i++) { 950 ctx->dev_id = i; 951 __cmd_dev_del(ctx); 952 } 953 return 0; 954 } 955 956 static int __cmd_dev_list(struct dev_ctx *ctx) 957 { 958 struct ublk_dev *dev = ublk_ctrl_init(); 959 int ret; 960 961 if (!dev) 962 return -ENODEV; 963 964 dev->dev_info.dev_id = ctx->dev_id; 965 966 ret = ublk_ctrl_get_info(dev); 967 if (ret < 0) { 968 if (ctx->logging) 969 ublk_err("%s: can't get dev info from %d: %d\n", 970 __func__, ctx->dev_id, ret); 971 } else { 972 ublk_ctrl_dump(dev); 973 } 974 975 ublk_ctrl_deinit(dev); 976 977 return ret; 978 } 979 980 static int cmd_dev_list(struct dev_ctx *ctx) 981 { 982 int i; 983 984 if (ctx->dev_id >= 0 || !ctx->all) 985 return __cmd_dev_list(ctx); 986 987 ctx->logging = false; 988 for (i = 0; i < 255; i++) { 989 ctx->dev_id = i; 990 __cmd_dev_list(ctx); 991 } 992 return 0; 993 } 994 995 static int cmd_dev_get_features(void) 996 { 997 #define const_ilog2(x) (63 - __builtin_clzll(x)) 998 static const char *feat_map[] = { 999 [const_ilog2(UBLK_F_SUPPORT_ZERO_COPY)] = "ZERO_COPY", 1000 [const_ilog2(UBLK_F_URING_CMD_COMP_IN_TASK)] = "COMP_IN_TASK", 1001 [const_ilog2(UBLK_F_NEED_GET_DATA)] = "GET_DATA", 1002 [const_ilog2(UBLK_F_USER_RECOVERY)] = "USER_RECOVERY", 1003 [const_ilog2(UBLK_F_USER_RECOVERY_REISSUE)] = "RECOVERY_REISSUE", 1004 [const_ilog2(UBLK_F_UNPRIVILEGED_DEV)] = "UNPRIVILEGED_DEV", 1005 [const_ilog2(UBLK_F_CMD_IOCTL_ENCODE)] = "CMD_IOCTL_ENCODE", 1006 [const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY", 1007 [const_ilog2(UBLK_F_ZONED)] = "ZONED", 1008 [const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO", 1009 }; 1010 struct ublk_dev *dev; 1011 __u64 features = 0; 1012 int ret; 1013 1014 dev = ublk_ctrl_init(); 1015 if (!dev) { 1016 fprintf(stderr, "ublksrv_ctrl_init failed id\n"); 1017 return -EOPNOTSUPP; 1018 } 1019 1020 ret = ublk_ctrl_get_features(dev, &features); 1021 if (!ret) { 1022 int i; 1023 1024 printf("ublk_drv features: 0x%llx\n", features); 1025 1026 for (i = 0; i < sizeof(features) * 8; i++) { 1027 const char *feat; 1028 1029 if (!((1ULL << i) & features)) 1030 continue; 1031 if (i < sizeof(feat_map) / sizeof(feat_map[0])) 1032 feat = feat_map[i]; 1033 else 1034 feat = "unknown"; 1035 printf("\t%-20s: 0x%llx\n", feat, 1ULL << i); 1036 } 1037 } 1038 1039 return ret; 1040 } 1041 1042 static int cmd_dev_help(char *exe) 1043 { 1044 printf("%s add -t [null|loop] [-q nr_queues] [-d depth] [-n dev_id] [backfile1] [backfile2] ...\n", exe); 1045 printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n"); 1046 printf("%s del [-n dev_id] -a \n", exe); 1047 printf("\t -a delete all devices -n delete specified device\n"); 1048 printf("%s list [-n dev_id] -a \n", exe); 1049 printf("\t -a list all devices, -n list specified device, default -a \n"); 1050 printf("%s features\n", exe); 1051 return 0; 1052 } 1053 1054 int main(int argc, char *argv[]) 1055 { 1056 static const struct option longopts[] = { 1057 { "all", 0, NULL, 'a' }, 1058 { "type", 1, NULL, 't' }, 1059 { "number", 1, NULL, 'n' }, 1060 { "queues", 1, NULL, 'q' }, 1061 { "depth", 1, NULL, 'd' }, 1062 { "debug_mask", 1, NULL, 0 }, 1063 { "quiet", 0, NULL, 0 }, 1064 { "zero_copy", 0, NULL, 'z' }, 1065 { "foreground", 0, NULL, 0 }, 1066 { "chunk_size", 1, NULL, 0 }, 1067 { 0, 0, 0, 0 } 1068 }; 1069 int option_idx, opt; 1070 const char *cmd = argv[1]; 1071 struct dev_ctx ctx = { 1072 .queue_depth = 128, 1073 .nr_hw_queues = 2, 1074 .dev_id = -1, 1075 .tgt_type = "unknown", 1076 .chunk_size = 65536, /* def chunk size is 64K */ 1077 }; 1078 int ret = -EINVAL, i; 1079 1080 if (argc == 1) 1081 return ret; 1082 1083 optind = 2; 1084 while ((opt = getopt_long(argc, argv, "t:n:d:q:az", 1085 longopts, &option_idx)) != -1) { 1086 switch (opt) { 1087 case 'a': 1088 ctx.all = 1; 1089 break; 1090 case 'n': 1091 ctx.dev_id = strtol(optarg, NULL, 10); 1092 break; 1093 case 't': 1094 if (strlen(optarg) < sizeof(ctx.tgt_type)) 1095 strcpy(ctx.tgt_type, optarg); 1096 break; 1097 case 'q': 1098 ctx.nr_hw_queues = strtol(optarg, NULL, 10); 1099 break; 1100 case 'd': 1101 ctx.queue_depth = strtol(optarg, NULL, 10); 1102 break; 1103 case 'z': 1104 ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY; 1105 break; 1106 case 0: 1107 if (!strcmp(longopts[option_idx].name, "debug_mask")) 1108 ublk_dbg_mask = strtol(optarg, NULL, 16); 1109 if (!strcmp(longopts[option_idx].name, "quiet")) 1110 ublk_dbg_mask = 0; 1111 if (!strcmp(longopts[option_idx].name, "foreground")) 1112 ctx.fg = 1; 1113 if (!strcmp(longopts[option_idx].name, "chunk_size")) 1114 ctx.chunk_size = strtol(optarg, NULL, 10); 1115 } 1116 } 1117 1118 i = optind; 1119 while (i < argc && ctx.nr_files < MAX_BACK_FILES) { 1120 ctx.files[ctx.nr_files++] = argv[i++]; 1121 } 1122 1123 if (!strcmp(cmd, "add")) 1124 ret = cmd_dev_add(&ctx); 1125 else if (!strcmp(cmd, "del")) 1126 ret = cmd_dev_del(&ctx); 1127 else if (!strcmp(cmd, "list")) { 1128 ctx.all = 1; 1129 ret = cmd_dev_list(&ctx); 1130 } else if (!strcmp(cmd, "help")) 1131 ret = cmd_dev_help(argv[0]); 1132 else if (!strcmp(cmd, "features")) 1133 ret = cmd_dev_get_features(); 1134 else 1135 cmd_dev_help(argv[0]); 1136 1137 return ret; 1138 } 1139