1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef KUBLK_INTERNAL_H 3 #define KUBLK_INTERNAL_H 4 5 #include <unistd.h> 6 #include <stdlib.h> 7 #include <assert.h> 8 #include <stdio.h> 9 #include <stdarg.h> 10 #include <string.h> 11 #include <pthread.h> 12 #include <getopt.h> 13 #include <limits.h> 14 #include <poll.h> 15 #include <fcntl.h> 16 #include <sys/syscall.h> 17 #include <sys/mman.h> 18 #include <sys/ioctl.h> 19 #include <sys/inotify.h> 20 #include <sys/wait.h> 21 #include <sys/eventfd.h> 22 #include <sys/ipc.h> 23 #include <sys/shm.h> 24 #include <linux/io_uring.h> 25 #include <liburing.h> 26 #include <semaphore.h> 27 28 /* allow ublk_dep.h to override ublk_cmd.h */ 29 #include "ublk_dep.h" 30 #include <linux/ublk_cmd.h> 31 32 #include "utils.h" 33 34 #define MAX_BACK_FILES 4 35 36 /****************** part 1: libublk ********************/ 37 38 #define CTRL_DEV "/dev/ublk-control" 39 #define UBLKC_DEV "/dev/ublkc" 40 #define UBLKB_DEV "/dev/ublkb" 41 #define UBLK_CTRL_RING_DEPTH 32 42 #define ERROR_EVTFD_DEVID -2 43 44 #define UBLK_IO_MAX_BYTES (1 << 20) 45 #define UBLK_MAX_QUEUES_SHIFT 5 46 #define UBLK_MAX_QUEUES (1 << UBLK_MAX_QUEUES_SHIFT) 47 #define UBLK_MAX_THREADS_SHIFT 5 48 #define UBLK_MAX_THREADS (1 << UBLK_MAX_THREADS_SHIFT) 49 #define UBLK_QUEUE_DEPTH 1024 50 51 struct ublk_dev; 52 struct ublk_queue; 53 struct ublk_thread; 54 55 struct stripe_ctx { 56 /* stripe */ 57 unsigned int chunk_size; 58 }; 59 60 struct fault_inject_ctx { 61 /* fault_inject */ 62 unsigned long delay_us; 63 }; 64 65 struct dev_ctx { 66 char tgt_type[16]; 67 unsigned long flags; 68 unsigned nr_hw_queues; 69 unsigned short nthreads; 70 unsigned queue_depth; 71 int dev_id; 72 int nr_files; 73 char *files[MAX_BACK_FILES]; 74 unsigned int logging:1; 75 unsigned int all:1; 76 unsigned int fg:1; 77 unsigned int recovery:1; 78 unsigned int auto_zc_fallback:1; 79 unsigned int per_io_tasks:1; 80 unsigned int no_ublk_fixed_fd:1; 81 82 int _evtfd; 83 int _shmid; 84 85 /* built from shmem, only for ublk_dump_dev() */ 86 struct ublk_dev *shadow_dev; 87 88 /* for 'update_size' command */ 89 unsigned long long size; 90 91 union { 92 struct stripe_ctx stripe; 93 struct fault_inject_ctx fault_inject; 94 }; 95 }; 96 97 struct ublk_ctrl_cmd_data { 98 __u32 cmd_op; 99 #define CTRL_CMD_HAS_DATA 1 100 #define CTRL_CMD_HAS_BUF 2 101 __u32 flags; 102 103 __u64 data[2]; 104 __u64 addr; 105 __u32 len; 106 }; 107 108 struct ublk_io { 109 char *buf_addr; 110 111 #define UBLKS_IO_NEED_FETCH_RQ (1UL << 0) 112 #define UBLKS_IO_NEED_COMMIT_RQ_COMP (1UL << 1) 113 #define UBLKS_IO_FREE (1UL << 2) 114 #define UBLKS_IO_NEED_GET_DATA (1UL << 3) 115 #define UBLKS_IO_NEED_REG_BUF (1UL << 4) 116 unsigned short flags; 117 unsigned short refs; /* used by target code only */ 118 119 int tag; 120 121 int result; 122 123 unsigned short buf_index; 124 unsigned short tgt_ios; 125 void *private_data; 126 }; 127 128 struct ublk_tgt_ops { 129 const char *name; 130 int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *); 131 void (*deinit_tgt)(struct ublk_dev *); 132 133 int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag); 134 void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *, 135 const struct io_uring_cqe *); 136 137 /* 138 * Target specific command line handling 139 * 140 * each option requires argument for target command line 141 */ 142 void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]); 143 void (*usage)(const struct ublk_tgt_ops *ops); 144 145 /* return buffer index for UBLK_F_AUTO_BUF_REG */ 146 unsigned short (*buf_index)(const struct ublk_queue *, int tag); 147 }; 148 149 struct ublk_tgt { 150 unsigned long dev_size; 151 unsigned int sq_depth; 152 unsigned int cq_depth; 153 const struct ublk_tgt_ops *ops; 154 struct ublk_params params; 155 156 int nr_backing_files; 157 unsigned long backing_file_size[MAX_BACK_FILES]; 158 char backing_file[MAX_BACK_FILES][PATH_MAX]; 159 }; 160 161 struct ublk_queue { 162 int q_id; 163 int q_depth; 164 struct ublk_dev *dev; 165 const struct ublk_tgt_ops *tgt_ops; 166 struct ublksrv_io_desc *io_cmd_buf; 167 168 /* borrow one bit of ublk uapi flags, which may never be used */ 169 #define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63) 170 #define UBLKS_Q_NO_UBLK_FIXED_FD (1ULL << 62) 171 __u64 flags; 172 int ublk_fd; /* cached ublk char device fd */ 173 struct ublk_io ios[UBLK_QUEUE_DEPTH]; 174 }; 175 176 struct ublk_thread { 177 struct ublk_dev *dev; 178 unsigned idx; 179 180 #define UBLKS_T_STOPPING (1U << 0) 181 #define UBLKS_T_IDLE (1U << 1) 182 unsigned state; 183 unsigned int cmd_inflight; 184 unsigned int io_inflight; 185 struct io_uring ring; 186 }; 187 188 struct ublk_dev { 189 struct ublk_tgt tgt; 190 struct ublksrv_ctrl_dev_info dev_info; 191 struct ublk_queue q[UBLK_MAX_QUEUES]; 192 unsigned nthreads; 193 unsigned per_io_tasks; 194 195 int fds[MAX_BACK_FILES + 1]; /* fds[0] points to /dev/ublkcN */ 196 int nr_fds; 197 int ctrl_fd; 198 struct io_uring ring; 199 200 void *private_data; 201 }; 202 203 extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io); 204 205 206 static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod) 207 { 208 return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF); 209 } 210 211 static inline int is_target_io(__u64 user_data) 212 { 213 return (user_data & (1ULL << 63)) != 0; 214 } 215 216 static inline __u64 build_user_data(unsigned tag, unsigned op, 217 unsigned tgt_data, unsigned q_id, unsigned is_target_io) 218 { 219 /* we only have 7 bits to encode q_id */ 220 _Static_assert(UBLK_MAX_QUEUES_SHIFT <= 7); 221 assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16) && !(q_id >> 7)); 222 223 return tag | (op << 16) | (tgt_data << 24) | 224 (__u64)q_id << 56 | (__u64)is_target_io << 63; 225 } 226 227 static inline unsigned int user_data_to_tag(__u64 user_data) 228 { 229 return user_data & 0xffff; 230 } 231 232 static inline unsigned int user_data_to_op(__u64 user_data) 233 { 234 return (user_data >> 16) & 0xff; 235 } 236 237 static inline unsigned int user_data_to_tgt_data(__u64 user_data) 238 { 239 return (user_data >> 24) & 0xffff; 240 } 241 242 static inline unsigned int user_data_to_q_id(__u64 user_data) 243 { 244 return (user_data >> 56) & 0x7f; 245 } 246 247 static inline unsigned short ublk_cmd_op_nr(unsigned int op) 248 { 249 return _IOC_NR(op); 250 } 251 252 static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io) 253 { 254 return container_of(io, struct ublk_queue, ios[io->tag]); 255 } 256 257 static inline int ublk_io_alloc_sqes(struct ublk_thread *t, 258 struct io_uring_sqe *sqes[], int nr_sqes) 259 { 260 struct io_uring *ring = &t->ring; 261 unsigned left = io_uring_sq_space_left(ring); 262 int i; 263 264 if (left < nr_sqes) 265 io_uring_submit(ring); 266 267 for (i = 0; i < nr_sqes; i++) { 268 sqes[i] = io_uring_get_sqe(ring); 269 if (!sqes[i]) 270 return i; 271 } 272 273 return nr_sqes; 274 } 275 276 static inline int ublk_get_registered_fd(struct ublk_queue *q, int fd_index) 277 { 278 if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) { 279 if (fd_index == 0) 280 /* Return the raw ublk FD for index 0 */ 281 return q->ublk_fd; 282 /* Adjust index for backing files (index 1 becomes 0, etc.) */ 283 return fd_index - 1; 284 } 285 return fd_index; 286 } 287 288 static inline void __io_uring_prep_buf_reg_unreg(struct io_uring_sqe *sqe, 289 struct ublk_queue *q, int tag, int q_id, __u64 index) 290 { 291 struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; 292 int dev_fd = ublk_get_registered_fd(q, 0); 293 294 io_uring_prep_read(sqe, dev_fd, 0, 0, 0); 295 sqe->opcode = IORING_OP_URING_CMD; 296 if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) 297 sqe->flags &= ~IOSQE_FIXED_FILE; 298 else 299 sqe->flags |= IOSQE_FIXED_FILE; 300 301 cmd->tag = tag; 302 cmd->addr = index; 303 cmd->q_id = q_id; 304 } 305 306 static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, 307 struct ublk_queue *q, int tag, int q_id, __u64 index) 308 { 309 __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); 310 sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; 311 } 312 313 static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, 314 struct ublk_queue *q, int tag, int q_id, __u64 index) 315 { 316 __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); 317 sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF; 318 } 319 320 static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe) 321 { 322 return (void *)&sqe->cmd; 323 } 324 325 static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res) 326 { 327 q->ios[tag].result = res; 328 } 329 330 static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag) 331 { 332 return q->ios[tag].result; 333 } 334 335 static inline void ublk_mark_io_done(struct ublk_io *io, int res) 336 { 337 io->flags |= (UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_FREE); 338 io->result = res; 339 } 340 341 static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag) 342 { 343 return &q->io_cmd_buf[tag]; 344 } 345 346 static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op) 347 { 348 __u32 *addr = (__u32 *)&sqe->off; 349 350 addr[0] = cmd_op; 351 addr[1] = 0; 352 } 353 354 static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag) 355 { 356 return &q->ios[tag]; 357 } 358 359 static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q, 360 unsigned tag, int res) 361 { 362 struct ublk_io *io = &q->ios[tag]; 363 364 ublk_mark_io_done(io, res); 365 366 return ublk_queue_io_cmd(t, io); 367 } 368 369 static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q, 370 unsigned tag, int queued) 371 { 372 if (queued < 0) 373 ublk_complete_io(t, q, tag, queued); 374 else { 375 struct ublk_io *io = ublk_get_io(q, tag); 376 377 t->io_inflight += queued; 378 io->tgt_ios = queued; 379 io->result = 0; 380 } 381 } 382 383 static inline int ublk_completed_tgt_io(struct ublk_thread *t, 384 struct ublk_queue *q, unsigned tag) 385 { 386 struct ublk_io *io = ublk_get_io(q, tag); 387 388 t->io_inflight--; 389 390 return --io->tgt_ios == 0; 391 } 392 393 static inline int ublk_queue_use_zc(const struct ublk_queue *q) 394 { 395 return q->flags & UBLK_F_SUPPORT_ZERO_COPY; 396 } 397 398 static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q) 399 { 400 return q->flags & UBLK_F_AUTO_BUF_REG; 401 } 402 403 static inline int ublk_queue_auto_zc_fallback(const struct ublk_queue *q) 404 { 405 return q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK; 406 } 407 408 static inline int ublk_queue_no_buf(const struct ublk_queue *q) 409 { 410 return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q); 411 } 412 413 extern const struct ublk_tgt_ops null_tgt_ops; 414 extern const struct ublk_tgt_ops loop_tgt_ops; 415 extern const struct ublk_tgt_ops stripe_tgt_ops; 416 extern const struct ublk_tgt_ops fault_inject_tgt_ops; 417 418 void backing_file_tgt_deinit(struct ublk_dev *dev); 419 int backing_file_tgt_init(struct ublk_dev *dev); 420 421 #endif 422