1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef KUBLK_INTERNAL_H 3 #define KUBLK_INTERNAL_H 4 5 #include <unistd.h> 6 #include <stdlib.h> 7 #include <assert.h> 8 #include <stdio.h> 9 #include <stdarg.h> 10 #include <string.h> 11 #include <pthread.h> 12 #include <getopt.h> 13 #include <limits.h> 14 #include <poll.h> 15 #include <fcntl.h> 16 #include <sys/syscall.h> 17 #include <sys/mman.h> 18 #include <sys/ioctl.h> 19 #include <sys/inotify.h> 20 #include <sys/wait.h> 21 #include <sys/eventfd.h> 22 #include <sys/ipc.h> 23 #include <sys/shm.h> 24 #include <linux/io_uring.h> 25 #include <liburing.h> 26 #include <semaphore.h> 27 28 /* allow ublk_dep.h to override ublk_cmd.h */ 29 #include "ublk_dep.h" 30 #include <linux/ublk_cmd.h> 31 32 #include "utils.h" 33 34 #define MAX_BACK_FILES 4 35 36 /****************** part 1: libublk ********************/ 37 38 #define CTRL_DEV "/dev/ublk-control" 39 #define UBLKC_DEV "/dev/ublkc" 40 #define UBLKB_DEV "/dev/ublkb" 41 #define UBLK_CTRL_RING_DEPTH 32 42 #define ERROR_EVTFD_DEVID -2 43 44 #define UBLK_IO_MAX_BYTES (1 << 20) 45 #define UBLK_MAX_QUEUES_SHIFT 5 46 #define UBLK_MAX_QUEUES (1 << UBLK_MAX_QUEUES_SHIFT) 47 #define UBLK_MAX_THREADS_SHIFT 5 48 #define UBLK_MAX_THREADS (1 << UBLK_MAX_THREADS_SHIFT) 49 #define UBLK_QUEUE_DEPTH 1024 50 51 struct ublk_dev; 52 struct ublk_queue; 53 struct ublk_thread; 54 55 struct stripe_ctx { 56 /* stripe */ 57 unsigned int chunk_size; 58 }; 59 60 struct fault_inject_ctx { 61 /* fault_inject */ 62 unsigned long delay_us; 63 }; 64 65 struct dev_ctx { 66 char tgt_type[16]; 67 unsigned long flags; 68 unsigned nr_hw_queues; 69 unsigned short nthreads; 70 unsigned queue_depth; 71 int dev_id; 72 int nr_files; 73 char *files[MAX_BACK_FILES]; 74 unsigned int logging:1; 75 unsigned int all:1; 76 unsigned int fg:1; 77 unsigned int recovery:1; 78 unsigned int auto_zc_fallback:1; 79 unsigned int per_io_tasks:1; 80 unsigned int no_ublk_fixed_fd:1; 81 unsigned int safe_stop:1; 82 unsigned int no_auto_part_scan:1; 83 __u32 integrity_flags; 84 __u8 metadata_size; 85 __u8 pi_offset; 86 __u8 csum_type; 87 __u8 tag_size; 88 89 int _evtfd; 90 int _shmid; 91 92 /* built from shmem, only for ublk_dump_dev() */ 93 struct ublk_dev *shadow_dev; 94 95 /* for 'update_size' command */ 96 unsigned long long size; 97 98 union { 99 struct stripe_ctx stripe; 100 struct fault_inject_ctx fault_inject; 101 }; 102 }; 103 104 struct ublk_ctrl_cmd_data { 105 __u32 cmd_op; 106 #define CTRL_CMD_HAS_DATA 1 107 #define CTRL_CMD_HAS_BUF 2 108 __u32 flags; 109 110 __u64 data[2]; 111 __u64 addr; 112 __u32 len; 113 }; 114 115 struct ublk_io { 116 char *buf_addr; 117 void *integrity_buf; 118 119 #define UBLKS_IO_NEED_FETCH_RQ (1UL << 0) 120 #define UBLKS_IO_NEED_COMMIT_RQ_COMP (1UL << 1) 121 #define UBLKS_IO_FREE (1UL << 2) 122 #define UBLKS_IO_NEED_GET_DATA (1UL << 3) 123 #define UBLKS_IO_NEED_REG_BUF (1UL << 4) 124 unsigned short flags; 125 unsigned short refs; /* used by target code only */ 126 127 int tag; 128 129 int result; 130 131 unsigned short buf_index; 132 unsigned short tgt_ios; 133 void *private_data; 134 }; 135 136 struct ublk_tgt_ops { 137 const char *name; 138 int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *); 139 void (*deinit_tgt)(struct ublk_dev *); 140 141 int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag); 142 void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *, 143 const struct io_uring_cqe *); 144 145 /* 146 * Target specific command line handling 147 * 148 * each option requires argument for target command line 149 */ 150 void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]); 151 void (*usage)(const struct ublk_tgt_ops *ops); 152 153 /* return buffer index for UBLK_F_AUTO_BUF_REG */ 154 unsigned short (*buf_index)(const struct ublk_thread *t, 155 const struct ublk_queue *, int tag); 156 }; 157 158 struct ublk_tgt { 159 unsigned long dev_size; 160 unsigned int sq_depth; 161 unsigned int cq_depth; 162 const struct ublk_tgt_ops *ops; 163 struct ublk_params params; 164 165 int nr_backing_files; 166 unsigned long backing_file_size[MAX_BACK_FILES]; 167 char backing_file[MAX_BACK_FILES][PATH_MAX]; 168 }; 169 170 struct ublk_queue { 171 int q_id; 172 int q_depth; 173 struct ublk_dev *dev; 174 const struct ublk_tgt_ops *tgt_ops; 175 struct ublksrv_io_desc *io_cmd_buf; 176 177 /* borrow three bit of ublk uapi flags, which may never be used */ 178 #define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63) 179 #define UBLKS_Q_NO_UBLK_FIXED_FD (1ULL << 62) 180 #define UBLKS_Q_PREPARED (1ULL << 61) 181 __u64 flags; 182 int ublk_fd; /* cached ublk char device fd */ 183 __u8 metadata_size; 184 struct ublk_io ios[UBLK_QUEUE_DEPTH]; 185 186 /* used for prep io commands */ 187 pthread_spinlock_t lock; 188 }; 189 190 /* align with `ublk_elem_header` */ 191 struct ublk_batch_elem { 192 __u16 tag; 193 __u16 buf_index; 194 __s32 result; 195 __u64 buf_addr; 196 }; 197 198 struct batch_commit_buf { 199 unsigned short q_id; 200 unsigned short buf_idx; 201 void *elem; 202 unsigned short done; 203 unsigned short count; 204 }; 205 206 struct batch_fetch_buf { 207 struct io_uring_buf_ring *br; 208 void *fetch_buf; 209 unsigned int fetch_buf_size; 210 unsigned int fetch_buf_off; 211 }; 212 213 struct ublk_thread { 214 /* Thread-local copy of queue-to-thread mapping for this thread */ 215 unsigned char q_map[UBLK_MAX_QUEUES]; 216 217 struct ublk_dev *dev; 218 unsigned short idx; 219 unsigned short nr_queues; 220 221 #define UBLKS_T_STOPPING (1U << 0) 222 #define UBLKS_T_IDLE (1U << 1) 223 #define UBLKS_T_BATCH_IO (1U << 31) /* readonly */ 224 unsigned state; 225 unsigned int cmd_inflight; 226 unsigned int io_inflight; 227 228 unsigned short nr_bufs; 229 230 /* followings are for BATCH_IO */ 231 unsigned short commit_buf_start; 232 unsigned char commit_buf_elem_size; 233 /* 234 * We just support single device, so pre-calculate commit/prep flags 235 */ 236 unsigned short cmd_flags; 237 unsigned int nr_commit_buf; 238 unsigned int commit_buf_size; 239 void *commit_buf; 240 #define UBLKS_T_COMMIT_BUF_INV_IDX ((unsigned short)-1) 241 struct allocator commit_buf_alloc; 242 struct batch_commit_buf *commit; 243 /* FETCH_IO_CMDS buffer */ 244 unsigned short nr_fetch_bufs; 245 struct batch_fetch_buf *fetch; 246 247 struct io_uring ring; 248 }; 249 250 struct ublk_dev { 251 struct ublk_tgt tgt; 252 struct ublksrv_ctrl_dev_info dev_info; 253 struct ublk_queue q[UBLK_MAX_QUEUES]; 254 unsigned nthreads; 255 unsigned per_io_tasks; 256 257 int fds[MAX_BACK_FILES + 1]; /* fds[0] points to /dev/ublkcN */ 258 int nr_fds; 259 int ctrl_fd; 260 struct io_uring ring; 261 262 void *private_data; 263 }; 264 265 extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io); 266 267 static inline int __ublk_use_batch_io(__u64 flags) 268 { 269 return flags & UBLK_F_BATCH_IO; 270 } 271 272 static inline int ublk_queue_batch_io(const struct ublk_queue *q) 273 { 274 return __ublk_use_batch_io(q->flags); 275 } 276 277 static inline int ublk_dev_batch_io(const struct ublk_dev *dev) 278 { 279 return __ublk_use_batch_io(dev->dev_info.flags); 280 } 281 282 /* only work for handle single device in this pthread context */ 283 static inline int ublk_thread_batch_io(const struct ublk_thread *t) 284 { 285 return t->state & UBLKS_T_BATCH_IO; 286 } 287 288 static inline void ublk_set_integrity_params(const struct dev_ctx *ctx, 289 struct ublk_params *params) 290 { 291 if (!ctx->metadata_size) 292 return; 293 294 params->types |= UBLK_PARAM_TYPE_INTEGRITY; 295 params->integrity = (struct ublk_param_integrity) { 296 .flags = ctx->integrity_flags, 297 .interval_exp = params->basic.logical_bs_shift, 298 .metadata_size = ctx->metadata_size, 299 .pi_offset = ctx->pi_offset, 300 .csum_type = ctx->csum_type, 301 .tag_size = ctx->tag_size, 302 }; 303 } 304 305 static inline size_t ublk_integrity_len(const struct ublk_queue *q, size_t len) 306 { 307 /* All targets currently use interval_exp = logical_bs_shift = 9 */ 308 return (len >> 9) * q->metadata_size; 309 } 310 311 static inline size_t 312 ublk_integrity_data_len(const struct ublk_queue *q, size_t integrity_len) 313 { 314 return (integrity_len / q->metadata_size) << 9; 315 } 316 317 static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod) 318 { 319 return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF); 320 } 321 322 static inline __u64 ublk_user_copy_offset(unsigned q_id, unsigned tag) 323 { 324 return UBLKSRV_IO_BUF_OFFSET + 325 ((__u64)q_id << UBLK_QID_OFF | (__u64)tag << UBLK_TAG_OFF); 326 } 327 328 static inline int is_target_io(__u64 user_data) 329 { 330 return (user_data & (1ULL << 63)) != 0; 331 } 332 333 static inline __u64 build_user_data(unsigned tag, unsigned op, 334 unsigned tgt_data, unsigned q_id, unsigned is_target_io) 335 { 336 /* we only have 7 bits to encode q_id */ 337 _Static_assert(UBLK_MAX_QUEUES_SHIFT <= 7, "UBLK_MAX_QUEUES_SHIFT must be <= 7"); 338 ublk_assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16) && !(q_id >> 7)); 339 340 return tag | ((__u64)op << 16) | ((__u64)tgt_data << 24) | 341 (__u64)q_id << 56 | (__u64)is_target_io << 63; 342 } 343 344 static inline unsigned int user_data_to_tag(__u64 user_data) 345 { 346 return user_data & 0xffff; 347 } 348 349 static inline unsigned int user_data_to_op(__u64 user_data) 350 { 351 return (user_data >> 16) & 0xff; 352 } 353 354 static inline unsigned int user_data_to_tgt_data(__u64 user_data) 355 { 356 return (user_data >> 24) & 0xffff; 357 } 358 359 static inline unsigned int user_data_to_q_id(__u64 user_data) 360 { 361 return (user_data >> 56) & 0x7f; 362 } 363 364 static inline unsigned short ublk_cmd_op_nr(unsigned int op) 365 { 366 return _IOC_NR(op); 367 } 368 369 static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io) 370 { 371 return container_of(io, struct ublk_queue, ios[io->tag]); 372 } 373 374 static inline int ublk_io_alloc_sqes(struct ublk_thread *t, 375 struct io_uring_sqe *sqes[], int nr_sqes) 376 { 377 struct io_uring *ring = &t->ring; 378 unsigned left = io_uring_sq_space_left(ring); 379 int i; 380 381 if (left < nr_sqes) 382 io_uring_submit(ring); 383 384 for (i = 0; i < nr_sqes; i++) { 385 sqes[i] = io_uring_get_sqe(ring); 386 if (!sqes[i]) 387 return i; 388 } 389 390 return nr_sqes; 391 } 392 393 static inline int ublk_get_registered_fd(struct ublk_queue *q, int fd_index) 394 { 395 if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) { 396 if (fd_index == 0) 397 /* Return the raw ublk FD for index 0 */ 398 return q->ublk_fd; 399 /* Adjust index for backing files (index 1 becomes 0, etc.) */ 400 return fd_index - 1; 401 } 402 return fd_index; 403 } 404 405 static inline void __io_uring_prep_buf_reg_unreg(struct io_uring_sqe *sqe, 406 struct ublk_queue *q, int tag, int q_id, __u64 index) 407 { 408 struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; 409 int dev_fd = ublk_get_registered_fd(q, 0); 410 411 io_uring_prep_read(sqe, dev_fd, 0, 0, 0); 412 sqe->opcode = IORING_OP_URING_CMD; 413 if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) 414 sqe->flags &= ~IOSQE_FIXED_FILE; 415 else 416 sqe->flags |= IOSQE_FIXED_FILE; 417 418 cmd->tag = tag; 419 cmd->addr = index; 420 cmd->q_id = q_id; 421 } 422 423 static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, 424 struct ublk_queue *q, int tag, int q_id, __u64 index) 425 { 426 __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); 427 sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; 428 } 429 430 static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, 431 struct ublk_queue *q, int tag, int q_id, __u64 index) 432 { 433 __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); 434 sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF; 435 } 436 437 static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe) 438 { 439 return (void *)&sqe->cmd; 440 } 441 442 static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res) 443 { 444 q->ios[tag].result = res; 445 } 446 447 static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag) 448 { 449 return q->ios[tag].result; 450 } 451 452 static inline void ublk_mark_io_done(struct ublk_io *io, int res) 453 { 454 io->flags |= (UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_FREE); 455 io->result = res; 456 } 457 458 static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag) 459 { 460 return &q->io_cmd_buf[tag]; 461 } 462 463 static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op) 464 { 465 __u32 *addr = (__u32 *)&sqe->off; 466 467 addr[0] = cmd_op; 468 addr[1] = 0; 469 } 470 471 static inline unsigned short ublk_batch_io_buf_idx( 472 const struct ublk_thread *t, const struct ublk_queue *q, 473 unsigned tag); 474 475 static inline unsigned short ublk_io_buf_idx(const struct ublk_thread *t, 476 const struct ublk_queue *q, 477 unsigned tag) 478 { 479 if (ublk_queue_batch_io(q)) 480 return ublk_batch_io_buf_idx(t, q, tag); 481 return q->ios[tag].buf_index; 482 } 483 484 static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag) 485 { 486 return &q->ios[tag]; 487 } 488 489 static inline int ublk_completed_tgt_io(struct ublk_thread *t, 490 struct ublk_queue *q, unsigned tag) 491 { 492 struct ublk_io *io = ublk_get_io(q, tag); 493 494 t->io_inflight--; 495 496 return --io->tgt_ios == 0; 497 } 498 499 static inline bool ublk_queue_use_zc(const struct ublk_queue *q) 500 { 501 return !!(q->flags & UBLK_F_SUPPORT_ZERO_COPY); 502 } 503 504 static inline bool ublk_queue_use_auto_zc(const struct ublk_queue *q) 505 { 506 return !!(q->flags & UBLK_F_AUTO_BUF_REG); 507 } 508 509 static inline bool ublk_queue_auto_zc_fallback(const struct ublk_queue *q) 510 { 511 return !!(q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK); 512 } 513 514 static inline bool ublk_queue_use_user_copy(const struct ublk_queue *q) 515 { 516 return !!(q->flags & UBLK_F_USER_COPY); 517 } 518 519 static inline int ublk_queue_no_buf(const struct ublk_queue *q) 520 { 521 return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q); 522 } 523 524 static inline int ublk_batch_commit_prepared(struct batch_commit_buf *cb) 525 { 526 return cb->buf_idx != UBLKS_T_COMMIT_BUF_INV_IDX; 527 } 528 529 static inline unsigned ublk_queue_idx_in_thread(const struct ublk_thread *t, 530 const struct ublk_queue *q) 531 { 532 unsigned char idx; 533 534 idx = t->q_map[q->q_id]; 535 ublk_assert(idx != 0); 536 return idx - 1; 537 } 538 539 /* 540 * Each IO's buffer index has to be calculated by this helper for 541 * UBLKS_T_BATCH_IO 542 */ 543 static inline unsigned short ublk_batch_io_buf_idx( 544 const struct ublk_thread *t, const struct ublk_queue *q, 545 unsigned tag) 546 { 547 return ublk_queue_idx_in_thread(t, q) * q->q_depth + tag; 548 } 549 550 /* Queue UBLK_U_IO_PREP_IO_CMDS for a specific queue with batch elements */ 551 int ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q); 552 /* Start fetching I/O commands using multishot UBLK_U_IO_FETCH_IO_CMDS */ 553 void ublk_batch_start_fetch(struct ublk_thread *t); 554 /* Handle completion of batch I/O commands (prep/commit) */ 555 void ublk_batch_compl_cmd(struct ublk_thread *t, 556 const struct io_uring_cqe *cqe); 557 /* Initialize batch I/O state and calculate buffer parameters */ 558 void ublk_batch_prepare(struct ublk_thread *t); 559 /* Allocate and register commit buffers for batch operations */ 560 int ublk_batch_alloc_buf(struct ublk_thread *t); 561 /* Free commit buffers and cleanup batch allocator */ 562 void ublk_batch_free_buf(struct ublk_thread *t); 563 564 /* Prepare a new commit buffer for batching completed I/O operations */ 565 void ublk_batch_prep_commit(struct ublk_thread *t); 566 /* Submit UBLK_U_IO_COMMIT_IO_CMDS with batched completed I/O operations */ 567 void ublk_batch_commit_io_cmds(struct ublk_thread *t); 568 /* Add a completed I/O operation to the current batch commit buffer */ 569 void ublk_batch_complete_io(struct ublk_thread *t, struct ublk_queue *q, 570 unsigned tag, int res); 571 void ublk_batch_setup_map(unsigned char (*q_thread_map)[UBLK_MAX_QUEUES], 572 int nthreads, int queues); 573 574 static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q, 575 unsigned tag, int res) 576 { 577 if (ublk_queue_batch_io(q)) { 578 ublk_batch_complete_io(t, q, tag, res); 579 return 0; 580 } else { 581 struct ublk_io *io = &q->ios[tag]; 582 583 ublk_mark_io_done(io, res); 584 return ublk_queue_io_cmd(t, io); 585 } 586 } 587 588 static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q, 589 unsigned tag, int queued) 590 { 591 if (queued < 0) 592 ublk_complete_io(t, q, tag, queued); 593 else { 594 struct ublk_io *io = ublk_get_io(q, tag); 595 596 t->io_inflight += queued; 597 io->tgt_ios = queued; 598 io->result = 0; 599 } 600 } 601 602 extern const struct ublk_tgt_ops null_tgt_ops; 603 extern const struct ublk_tgt_ops loop_tgt_ops; 604 extern const struct ublk_tgt_ops stripe_tgt_ops; 605 extern const struct ublk_tgt_ops fault_inject_tgt_ops; 606 607 void backing_file_tgt_deinit(struct ublk_dev *dev); 608 int backing_file_tgt_init(struct ublk_dev *dev, unsigned int nr_direct); 609 610 #endif 611