1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef KUBLK_INTERNAL_H 3 #define KUBLK_INTERNAL_H 4 5 #include <unistd.h> 6 #include <stdlib.h> 7 #include <assert.h> 8 #include <stdio.h> 9 #include <stdarg.h> 10 #include <string.h> 11 #include <pthread.h> 12 #include <getopt.h> 13 #include <limits.h> 14 #include <poll.h> 15 #include <fcntl.h> 16 #include <sys/syscall.h> 17 #include <sys/mman.h> 18 #include <sys/ioctl.h> 19 #include <sys/inotify.h> 20 #include <sys/wait.h> 21 #include <sys/eventfd.h> 22 #include <sys/ipc.h> 23 #include <sys/shm.h> 24 #include <linux/io_uring.h> 25 #include <liburing.h> 26 #include <semaphore.h> 27 28 /* allow ublk_dep.h to override ublk_cmd.h */ 29 #include "ublk_dep.h" 30 #include <linux/ublk_cmd.h> 31 32 #include "utils.h" 33 34 #define MAX_BACK_FILES 4 35 36 /****************** part 1: libublk ********************/ 37 38 #define CTRL_DEV "/dev/ublk-control" 39 #define UBLKC_DEV "/dev/ublkc" 40 #define UBLKB_DEV "/dev/ublkb" 41 #define UBLK_CTRL_RING_DEPTH 32 42 #define ERROR_EVTFD_DEVID -2 43 44 #define UBLK_IO_MAX_BYTES (1 << 20) 45 #define UBLK_MAX_QUEUES_SHIFT 5 46 #define UBLK_MAX_QUEUES (1 << UBLK_MAX_QUEUES_SHIFT) 47 #define UBLK_MAX_THREADS_SHIFT 5 48 #define UBLK_MAX_THREADS (1 << UBLK_MAX_THREADS_SHIFT) 49 #define UBLK_QUEUE_DEPTH 1024 50 51 struct ublk_dev; 52 struct ublk_queue; 53 struct ublk_thread; 54 55 struct stripe_ctx { 56 /* stripe */ 57 unsigned int chunk_size; 58 }; 59 60 struct fault_inject_ctx { 61 /* fault_inject */ 62 unsigned long delay_us; 63 }; 64 65 struct dev_ctx { 66 char tgt_type[16]; 67 unsigned long flags; 68 unsigned nr_hw_queues; 69 unsigned short nthreads; 70 unsigned queue_depth; 71 int dev_id; 72 int nr_files; 73 char *files[MAX_BACK_FILES]; 74 unsigned int logging:1; 75 unsigned int all:1; 76 unsigned int fg:1; 77 unsigned int recovery:1; 78 unsigned int auto_zc_fallback:1; 79 unsigned int per_io_tasks:1; 80 unsigned int no_ublk_fixed_fd:1; 81 82 int _evtfd; 83 int _shmid; 84 85 /* built from shmem, only for ublk_dump_dev() */ 86 struct ublk_dev *shadow_dev; 87 88 /* for 'update_size' command */ 89 unsigned long long size; 90 91 union { 92 struct stripe_ctx stripe; 93 struct fault_inject_ctx fault_inject; 94 }; 95 }; 96 97 struct ublk_ctrl_cmd_data { 98 __u32 cmd_op; 99 #define CTRL_CMD_HAS_DATA 1 100 #define CTRL_CMD_HAS_BUF 2 101 __u32 flags; 102 103 __u64 data[2]; 104 __u64 addr; 105 __u32 len; 106 }; 107 108 struct ublk_io { 109 char *buf_addr; 110 111 #define UBLKS_IO_NEED_FETCH_RQ (1UL << 0) 112 #define UBLKS_IO_NEED_COMMIT_RQ_COMP (1UL << 1) 113 #define UBLKS_IO_FREE (1UL << 2) 114 #define UBLKS_IO_NEED_GET_DATA (1UL << 3) 115 #define UBLKS_IO_NEED_REG_BUF (1UL << 4) 116 unsigned short flags; 117 unsigned short refs; /* used by target code only */ 118 119 int tag; 120 121 int result; 122 123 unsigned short buf_index; 124 unsigned short tgt_ios; 125 void *private_data; 126 }; 127 128 struct ublk_tgt_ops { 129 const char *name; 130 int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *); 131 void (*deinit_tgt)(struct ublk_dev *); 132 133 int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag); 134 void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *, 135 const struct io_uring_cqe *); 136 137 /* 138 * Target specific command line handling 139 * 140 * each option requires argument for target command line 141 */ 142 void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]); 143 void (*usage)(const struct ublk_tgt_ops *ops); 144 145 /* return buffer index for UBLK_F_AUTO_BUF_REG */ 146 unsigned short (*buf_index)(const struct ublk_queue *, int tag); 147 }; 148 149 struct ublk_tgt { 150 unsigned long dev_size; 151 unsigned int sq_depth; 152 unsigned int cq_depth; 153 const struct ublk_tgt_ops *ops; 154 struct ublk_params params; 155 156 int nr_backing_files; 157 unsigned long backing_file_size[MAX_BACK_FILES]; 158 char backing_file[MAX_BACK_FILES][PATH_MAX]; 159 }; 160 161 struct ublk_queue { 162 int q_id; 163 int q_depth; 164 struct ublk_dev *dev; 165 const struct ublk_tgt_ops *tgt_ops; 166 struct ublksrv_io_desc *io_cmd_buf; 167 168 /* borrow one bit of ublk uapi flags, which may never be used */ 169 #define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63) 170 #define UBLKS_Q_NO_UBLK_FIXED_FD (1ULL << 62) 171 __u64 flags; 172 int ublk_fd; /* cached ublk char device fd */ 173 struct ublk_io ios[UBLK_QUEUE_DEPTH]; 174 }; 175 176 struct ublk_thread { 177 struct ublk_dev *dev; 178 unsigned idx; 179 180 #define UBLKS_T_STOPPING (1U << 0) 181 #define UBLKS_T_IDLE (1U << 1) 182 unsigned state; 183 unsigned int cmd_inflight; 184 unsigned int io_inflight; 185 struct io_uring ring; 186 }; 187 188 struct ublk_dev { 189 struct ublk_tgt tgt; 190 struct ublksrv_ctrl_dev_info dev_info; 191 struct ublk_queue q[UBLK_MAX_QUEUES]; 192 unsigned nthreads; 193 unsigned per_io_tasks; 194 195 int fds[MAX_BACK_FILES + 1]; /* fds[0] points to /dev/ublkcN */ 196 int nr_fds; 197 int ctrl_fd; 198 struct io_uring ring; 199 200 void *private_data; 201 }; 202 203 extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io); 204 205 206 static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod) 207 { 208 return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF); 209 } 210 211 static inline __u64 ublk_user_copy_offset(unsigned q_id, unsigned tag) 212 { 213 return UBLKSRV_IO_BUF_OFFSET + 214 ((__u64)q_id << UBLK_QID_OFF | (__u64)tag << UBLK_TAG_OFF); 215 } 216 217 static inline int is_target_io(__u64 user_data) 218 { 219 return (user_data & (1ULL << 63)) != 0; 220 } 221 222 static inline __u64 build_user_data(unsigned tag, unsigned op, 223 unsigned tgt_data, unsigned q_id, unsigned is_target_io) 224 { 225 /* we only have 7 bits to encode q_id */ 226 _Static_assert(UBLK_MAX_QUEUES_SHIFT <= 7); 227 assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16) && !(q_id >> 7)); 228 229 return tag | (op << 16) | (tgt_data << 24) | 230 (__u64)q_id << 56 | (__u64)is_target_io << 63; 231 } 232 233 static inline unsigned int user_data_to_tag(__u64 user_data) 234 { 235 return user_data & 0xffff; 236 } 237 238 static inline unsigned int user_data_to_op(__u64 user_data) 239 { 240 return (user_data >> 16) & 0xff; 241 } 242 243 static inline unsigned int user_data_to_tgt_data(__u64 user_data) 244 { 245 return (user_data >> 24) & 0xffff; 246 } 247 248 static inline unsigned int user_data_to_q_id(__u64 user_data) 249 { 250 return (user_data >> 56) & 0x7f; 251 } 252 253 static inline unsigned short ublk_cmd_op_nr(unsigned int op) 254 { 255 return _IOC_NR(op); 256 } 257 258 static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io) 259 { 260 return container_of(io, struct ublk_queue, ios[io->tag]); 261 } 262 263 static inline int ublk_io_alloc_sqes(struct ublk_thread *t, 264 struct io_uring_sqe *sqes[], int nr_sqes) 265 { 266 struct io_uring *ring = &t->ring; 267 unsigned left = io_uring_sq_space_left(ring); 268 int i; 269 270 if (left < nr_sqes) 271 io_uring_submit(ring); 272 273 for (i = 0; i < nr_sqes; i++) { 274 sqes[i] = io_uring_get_sqe(ring); 275 if (!sqes[i]) 276 return i; 277 } 278 279 return nr_sqes; 280 } 281 282 static inline int ublk_get_registered_fd(struct ublk_queue *q, int fd_index) 283 { 284 if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) { 285 if (fd_index == 0) 286 /* Return the raw ublk FD for index 0 */ 287 return q->ublk_fd; 288 /* Adjust index for backing files (index 1 becomes 0, etc.) */ 289 return fd_index - 1; 290 } 291 return fd_index; 292 } 293 294 static inline void __io_uring_prep_buf_reg_unreg(struct io_uring_sqe *sqe, 295 struct ublk_queue *q, int tag, int q_id, __u64 index) 296 { 297 struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; 298 int dev_fd = ublk_get_registered_fd(q, 0); 299 300 io_uring_prep_read(sqe, dev_fd, 0, 0, 0); 301 sqe->opcode = IORING_OP_URING_CMD; 302 if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) 303 sqe->flags &= ~IOSQE_FIXED_FILE; 304 else 305 sqe->flags |= IOSQE_FIXED_FILE; 306 307 cmd->tag = tag; 308 cmd->addr = index; 309 cmd->q_id = q_id; 310 } 311 312 static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, 313 struct ublk_queue *q, int tag, int q_id, __u64 index) 314 { 315 __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); 316 sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; 317 } 318 319 static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, 320 struct ublk_queue *q, int tag, int q_id, __u64 index) 321 { 322 __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); 323 sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF; 324 } 325 326 static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe) 327 { 328 return (void *)&sqe->cmd; 329 } 330 331 static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res) 332 { 333 q->ios[tag].result = res; 334 } 335 336 static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag) 337 { 338 return q->ios[tag].result; 339 } 340 341 static inline void ublk_mark_io_done(struct ublk_io *io, int res) 342 { 343 io->flags |= (UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_FREE); 344 io->result = res; 345 } 346 347 static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag) 348 { 349 return &q->io_cmd_buf[tag]; 350 } 351 352 static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op) 353 { 354 __u32 *addr = (__u32 *)&sqe->off; 355 356 addr[0] = cmd_op; 357 addr[1] = 0; 358 } 359 360 static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag) 361 { 362 return &q->ios[tag]; 363 } 364 365 static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q, 366 unsigned tag, int res) 367 { 368 struct ublk_io *io = &q->ios[tag]; 369 370 ublk_mark_io_done(io, res); 371 372 return ublk_queue_io_cmd(t, io); 373 } 374 375 static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q, 376 unsigned tag, int queued) 377 { 378 if (queued < 0) 379 ublk_complete_io(t, q, tag, queued); 380 else { 381 struct ublk_io *io = ublk_get_io(q, tag); 382 383 t->io_inflight += queued; 384 io->tgt_ios = queued; 385 io->result = 0; 386 } 387 } 388 389 static inline int ublk_completed_tgt_io(struct ublk_thread *t, 390 struct ublk_queue *q, unsigned tag) 391 { 392 struct ublk_io *io = ublk_get_io(q, tag); 393 394 t->io_inflight--; 395 396 return --io->tgt_ios == 0; 397 } 398 399 static inline bool ublk_queue_use_zc(const struct ublk_queue *q) 400 { 401 return !!(q->flags & UBLK_F_SUPPORT_ZERO_COPY); 402 } 403 404 static inline bool ublk_queue_use_auto_zc(const struct ublk_queue *q) 405 { 406 return !!(q->flags & UBLK_F_AUTO_BUF_REG); 407 } 408 409 static inline bool ublk_queue_auto_zc_fallback(const struct ublk_queue *q) 410 { 411 return !!(q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK); 412 } 413 414 static inline bool ublk_queue_use_user_copy(const struct ublk_queue *q) 415 { 416 return !!(q->flags & UBLK_F_USER_COPY); 417 } 418 419 static inline int ublk_queue_no_buf(const struct ublk_queue *q) 420 { 421 return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q); 422 } 423 424 extern const struct ublk_tgt_ops null_tgt_ops; 425 extern const struct ublk_tgt_ops loop_tgt_ops; 426 extern const struct ublk_tgt_ops stripe_tgt_ops; 427 extern const struct ublk_tgt_ops fault_inject_tgt_ops; 428 429 void backing_file_tgt_deinit(struct ublk_dev *dev); 430 int backing_file_tgt_init(struct ublk_dev *dev); 431 432 #endif 433