1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef KUBLK_INTERNAL_H 3 #define KUBLK_INTERNAL_H 4 5 #include <unistd.h> 6 #include <stdlib.h> 7 #include <assert.h> 8 #include <stdio.h> 9 #include <stdarg.h> 10 #include <string.h> 11 #include <pthread.h> 12 #include <getopt.h> 13 #include <limits.h> 14 #include <poll.h> 15 #include <fcntl.h> 16 #include <sys/syscall.h> 17 #include <sys/mman.h> 18 #include <sys/ioctl.h> 19 #include <sys/inotify.h> 20 #include <sys/wait.h> 21 #include <sys/eventfd.h> 22 #include <sys/ipc.h> 23 #include <sys/shm.h> 24 #include <linux/io_uring.h> 25 #include <liburing.h> 26 #include <semaphore.h> 27 28 /* allow ublk_dep.h to override ublk_cmd.h */ 29 #include "ublk_dep.h" 30 #include <linux/ublk_cmd.h> 31 32 #define __maybe_unused __attribute__((unused)) 33 #define MAX_BACK_FILES 4 34 #ifndef min 35 #define min(a, b) ((a) < (b) ? (a) : (b)) 36 #endif 37 38 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) 39 40 /****************** part 1: libublk ********************/ 41 42 #define CTRL_DEV "/dev/ublk-control" 43 #define UBLKC_DEV "/dev/ublkc" 44 #define UBLKB_DEV "/dev/ublkb" 45 #define UBLK_CTRL_RING_DEPTH 32 46 #define ERROR_EVTFD_DEVID -2 47 48 /* queue idle timeout */ 49 #define UBLKSRV_IO_IDLE_SECS 20 50 51 #define UBLK_IO_MAX_BYTES (1 << 20) 52 #define UBLK_MAX_QUEUES_SHIFT 5 53 #define UBLK_MAX_QUEUES (1 << UBLK_MAX_QUEUES_SHIFT) 54 #define UBLK_MAX_THREADS_SHIFT 5 55 #define UBLK_MAX_THREADS (1 << UBLK_MAX_THREADS_SHIFT) 56 #define UBLK_QUEUE_DEPTH 1024 57 58 #define UBLK_DBG_DEV (1U << 0) 59 #define UBLK_DBG_THREAD (1U << 1) 60 #define UBLK_DBG_IO_CMD (1U << 2) 61 #define UBLK_DBG_IO (1U << 3) 62 #define UBLK_DBG_CTRL_CMD (1U << 4) 63 #define UBLK_LOG (1U << 5) 64 65 struct ublk_dev; 66 struct ublk_queue; 67 struct ublk_thread; 68 69 struct stripe_ctx { 70 /* stripe */ 71 unsigned int chunk_size; 72 }; 73 74 struct fault_inject_ctx { 75 /* fault_inject */ 76 unsigned long delay_us; 77 }; 78 79 struct dev_ctx { 80 char tgt_type[16]; 81 unsigned long flags; 82 unsigned nr_hw_queues; 83 unsigned short nthreads; 84 unsigned queue_depth; 85 int dev_id; 86 int nr_files; 87 char *files[MAX_BACK_FILES]; 88 unsigned int logging:1; 89 unsigned int all:1; 90 unsigned int fg:1; 91 unsigned int recovery:1; 92 unsigned int auto_zc_fallback:1; 93 unsigned int per_io_tasks:1; 94 95 int _evtfd; 96 int _shmid; 97 98 /* built from shmem, only for ublk_dump_dev() */ 99 struct ublk_dev *shadow_dev; 100 101 /* for 'update_size' command */ 102 unsigned long long size; 103 104 union { 105 struct stripe_ctx stripe; 106 struct fault_inject_ctx fault_inject; 107 }; 108 }; 109 110 struct ublk_ctrl_cmd_data { 111 __u32 cmd_op; 112 #define CTRL_CMD_HAS_DATA 1 113 #define CTRL_CMD_HAS_BUF 2 114 __u32 flags; 115 116 __u64 data[2]; 117 __u64 addr; 118 __u32 len; 119 }; 120 121 struct ublk_io { 122 char *buf_addr; 123 124 #define UBLKSRV_NEED_FETCH_RQ (1UL << 0) 125 #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) 126 #define UBLKSRV_IO_FREE (1UL << 2) 127 #define UBLKSRV_NEED_GET_DATA (1UL << 3) 128 #define UBLKSRV_NEED_REG_BUF (1UL << 4) 129 unsigned short flags; 130 unsigned short refs; /* used by target code only */ 131 132 int tag; 133 134 int result; 135 136 unsigned short buf_index; 137 unsigned short tgt_ios; 138 void *private_data; 139 struct ublk_thread *t; 140 }; 141 142 struct ublk_tgt_ops { 143 const char *name; 144 int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *); 145 void (*deinit_tgt)(struct ublk_dev *); 146 147 int (*queue_io)(struct ublk_queue *, int tag); 148 void (*tgt_io_done)(struct ublk_queue *, 149 int tag, const struct io_uring_cqe *); 150 151 /* 152 * Target specific command line handling 153 * 154 * each option requires argument for target command line 155 */ 156 void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]); 157 void (*usage)(const struct ublk_tgt_ops *ops); 158 159 /* return buffer index for UBLK_F_AUTO_BUF_REG */ 160 unsigned short (*buf_index)(const struct ublk_queue *, int tag); 161 }; 162 163 struct ublk_tgt { 164 unsigned long dev_size; 165 unsigned int sq_depth; 166 unsigned int cq_depth; 167 const struct ublk_tgt_ops *ops; 168 struct ublk_params params; 169 170 int nr_backing_files; 171 unsigned long backing_file_size[MAX_BACK_FILES]; 172 char backing_file[MAX_BACK_FILES][PATH_MAX]; 173 }; 174 175 struct ublk_queue { 176 int q_id; 177 int q_depth; 178 struct ublk_dev *dev; 179 const struct ublk_tgt_ops *tgt_ops; 180 struct ublksrv_io_desc *io_cmd_buf; 181 182 struct ublk_io ios[UBLK_QUEUE_DEPTH]; 183 #define UBLKSRV_NO_BUF (1U << 2) 184 #define UBLKSRV_ZC (1U << 3) 185 #define UBLKSRV_AUTO_BUF_REG (1U << 4) 186 #define UBLKSRV_AUTO_BUF_REG_FALLBACK (1U << 5) 187 unsigned state; 188 }; 189 190 struct ublk_thread { 191 struct ublk_dev *dev; 192 struct io_uring ring; 193 unsigned int cmd_inflight; 194 unsigned int io_inflight; 195 196 pthread_t thread; 197 unsigned idx; 198 199 #define UBLKSRV_THREAD_STOPPING (1U << 0) 200 #define UBLKSRV_THREAD_IDLE (1U << 1) 201 unsigned state; 202 }; 203 204 struct ublk_dev { 205 struct ublk_tgt tgt; 206 struct ublksrv_ctrl_dev_info dev_info; 207 struct ublk_queue q[UBLK_MAX_QUEUES]; 208 struct ublk_thread threads[UBLK_MAX_THREADS]; 209 unsigned nthreads; 210 unsigned per_io_tasks; 211 212 int fds[MAX_BACK_FILES + 1]; /* fds[0] points to /dev/ublkcN */ 213 int nr_fds; 214 int ctrl_fd; 215 struct io_uring ring; 216 217 void *private_data; 218 }; 219 220 #ifndef offsetof 221 #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) 222 #endif 223 224 #ifndef container_of 225 #define container_of(ptr, type, member) ({ \ 226 unsigned long __mptr = (unsigned long)(ptr); \ 227 ((type *)(__mptr - offsetof(type, member))); }) 228 #endif 229 230 #define round_up(val, rnd) \ 231 (((val) + ((rnd) - 1)) & ~((rnd) - 1)) 232 233 234 extern unsigned int ublk_dbg_mask; 235 extern int ublk_queue_io_cmd(struct ublk_io *io); 236 237 238 static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod) 239 { 240 return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF); 241 } 242 243 static inline int is_target_io(__u64 user_data) 244 { 245 return (user_data & (1ULL << 63)) != 0; 246 } 247 248 static inline __u64 build_user_data(unsigned tag, unsigned op, 249 unsigned tgt_data, unsigned q_id, unsigned is_target_io) 250 { 251 /* we only have 7 bits to encode q_id */ 252 _Static_assert(UBLK_MAX_QUEUES_SHIFT <= 7); 253 assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16) && !(q_id >> 7)); 254 255 return tag | (op << 16) | (tgt_data << 24) | 256 (__u64)q_id << 56 | (__u64)is_target_io << 63; 257 } 258 259 static inline unsigned int user_data_to_tag(__u64 user_data) 260 { 261 return user_data & 0xffff; 262 } 263 264 static inline unsigned int user_data_to_op(__u64 user_data) 265 { 266 return (user_data >> 16) & 0xff; 267 } 268 269 static inline unsigned int user_data_to_tgt_data(__u64 user_data) 270 { 271 return (user_data >> 24) & 0xffff; 272 } 273 274 static inline unsigned int user_data_to_q_id(__u64 user_data) 275 { 276 return (user_data >> 56) & 0x7f; 277 } 278 279 static inline unsigned short ublk_cmd_op_nr(unsigned int op) 280 { 281 return _IOC_NR(op); 282 } 283 284 static inline void ublk_err(const char *fmt, ...) 285 { 286 va_list ap; 287 288 va_start(ap, fmt); 289 vfprintf(stderr, fmt, ap); 290 } 291 292 static inline void ublk_log(const char *fmt, ...) 293 { 294 if (ublk_dbg_mask & UBLK_LOG) { 295 va_list ap; 296 297 va_start(ap, fmt); 298 vfprintf(stdout, fmt, ap); 299 } 300 } 301 302 static inline void ublk_dbg(int level, const char *fmt, ...) 303 { 304 if (level & ublk_dbg_mask) { 305 va_list ap; 306 307 va_start(ap, fmt); 308 vfprintf(stdout, fmt, ap); 309 } 310 } 311 312 static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io) 313 { 314 return container_of(io, struct ublk_queue, ios[io->tag]); 315 } 316 317 static inline int ublk_io_alloc_sqes(struct ublk_io *io, 318 struct io_uring_sqe *sqes[], int nr_sqes) 319 { 320 struct io_uring *ring = &io->t->ring; 321 unsigned left = io_uring_sq_space_left(ring); 322 int i; 323 324 if (left < nr_sqes) 325 io_uring_submit(ring); 326 327 for (i = 0; i < nr_sqes; i++) { 328 sqes[i] = io_uring_get_sqe(ring); 329 if (!sqes[i]) 330 return i; 331 } 332 333 return nr_sqes; 334 } 335 336 static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, 337 int dev_fd, int tag, int q_id, __u64 index) 338 { 339 struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; 340 341 io_uring_prep_read(sqe, dev_fd, 0, 0, 0); 342 sqe->opcode = IORING_OP_URING_CMD; 343 sqe->flags |= IOSQE_FIXED_FILE; 344 sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; 345 346 cmd->tag = tag; 347 cmd->addr = index; 348 cmd->q_id = q_id; 349 } 350 351 static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, 352 int dev_fd, int tag, int q_id, __u64 index) 353 { 354 struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; 355 356 io_uring_prep_read(sqe, dev_fd, 0, 0, 0); 357 sqe->opcode = IORING_OP_URING_CMD; 358 sqe->flags |= IOSQE_FIXED_FILE; 359 sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF; 360 361 cmd->tag = tag; 362 cmd->addr = index; 363 cmd->q_id = q_id; 364 } 365 366 static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe) 367 { 368 return (void *)&sqe->cmd; 369 } 370 371 static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res) 372 { 373 q->ios[tag].result = res; 374 } 375 376 static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag) 377 { 378 return q->ios[tag].result; 379 } 380 381 static inline void ublk_mark_io_done(struct ublk_io *io, int res) 382 { 383 io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE); 384 io->result = res; 385 } 386 387 static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag) 388 { 389 return &q->io_cmd_buf[tag]; 390 } 391 392 static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op) 393 { 394 __u32 *addr = (__u32 *)&sqe->off; 395 396 addr[0] = cmd_op; 397 addr[1] = 0; 398 } 399 400 static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag) 401 { 402 return &q->ios[tag]; 403 } 404 405 static inline int ublk_complete_io(struct ublk_queue *q, unsigned tag, int res) 406 { 407 struct ublk_io *io = &q->ios[tag]; 408 409 ublk_mark_io_done(io, res); 410 411 return ublk_queue_io_cmd(io); 412 } 413 414 static inline void ublk_queued_tgt_io(struct ublk_queue *q, unsigned tag, int queued) 415 { 416 if (queued < 0) 417 ublk_complete_io(q, tag, queued); 418 else { 419 struct ublk_io *io = ublk_get_io(q, tag); 420 421 io->t->io_inflight += queued; 422 io->tgt_ios = queued; 423 io->result = 0; 424 } 425 } 426 427 static inline int ublk_completed_tgt_io(struct ublk_queue *q, unsigned tag) 428 { 429 struct ublk_io *io = ublk_get_io(q, tag); 430 431 io->t->io_inflight--; 432 433 return --io->tgt_ios == 0; 434 } 435 436 static inline int ublk_queue_use_zc(const struct ublk_queue *q) 437 { 438 return q->state & UBLKSRV_ZC; 439 } 440 441 static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q) 442 { 443 return q->state & UBLKSRV_AUTO_BUF_REG; 444 } 445 446 extern const struct ublk_tgt_ops null_tgt_ops; 447 extern const struct ublk_tgt_ops loop_tgt_ops; 448 extern const struct ublk_tgt_ops stripe_tgt_ops; 449 extern const struct ublk_tgt_ops fault_inject_tgt_ops; 450 451 void backing_file_tgt_deinit(struct ublk_dev *dev); 452 int backing_file_tgt_init(struct ublk_dev *dev); 453 454 static inline unsigned int ilog2(unsigned int x) 455 { 456 if (x == 0) 457 return 0; 458 return (sizeof(x) * 8 - 1) - __builtin_clz(x); 459 } 460 #endif 461