1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef KUBLK_INTERNAL_H 3 #define KUBLK_INTERNAL_H 4 5 #include <unistd.h> 6 #include <stdlib.h> 7 #include <assert.h> 8 #include <stdio.h> 9 #include <stdarg.h> 10 #include <string.h> 11 #include <pthread.h> 12 #include <getopt.h> 13 #include <limits.h> 14 #include <poll.h> 15 #include <fcntl.h> 16 #include <sys/syscall.h> 17 #include <sys/mman.h> 18 #include <sys/ioctl.h> 19 #include <sys/inotify.h> 20 #include <sys/wait.h> 21 #include <sys/eventfd.h> 22 #include <sys/ipc.h> 23 #include <sys/shm.h> 24 #include <linux/io_uring.h> 25 #include <liburing.h> 26 #include <semaphore.h> 27 28 /* allow ublk_dep.h to override ublk_cmd.h */ 29 #include "ublk_dep.h" 30 #include <linux/ublk_cmd.h> 31 32 #include "utils.h" 33 34 #define MAX_BACK_FILES 4 35 36 /****************** part 1: libublk ********************/ 37 38 #define CTRL_DEV "/dev/ublk-control" 39 #define UBLKC_DEV "/dev/ublkc" 40 #define UBLKB_DEV "/dev/ublkb" 41 #define UBLK_CTRL_RING_DEPTH 32 42 #define ERROR_EVTFD_DEVID -2 43 44 #define UBLK_IO_MAX_BYTES (1 << 20) 45 #define UBLK_MAX_QUEUES_SHIFT 5 46 #define UBLK_MAX_QUEUES (1 << UBLK_MAX_QUEUES_SHIFT) 47 #define UBLK_MAX_THREADS_SHIFT 5 48 #define UBLK_MAX_THREADS (1 << UBLK_MAX_THREADS_SHIFT) 49 #define UBLK_QUEUE_DEPTH 1024 50 51 struct ublk_dev; 52 struct ublk_queue; 53 struct ublk_thread; 54 55 struct stripe_ctx { 56 /* stripe */ 57 unsigned int chunk_size; 58 }; 59 60 struct fault_inject_ctx { 61 /* fault_inject */ 62 unsigned long delay_us; 63 }; 64 65 struct dev_ctx { 66 char tgt_type[16]; 67 unsigned long flags; 68 unsigned nr_hw_queues; 69 unsigned short nthreads; 70 unsigned queue_depth; 71 int dev_id; 72 int nr_files; 73 char *files[MAX_BACK_FILES]; 74 unsigned int logging:1; 75 unsigned int all:1; 76 unsigned int fg:1; 77 unsigned int recovery:1; 78 unsigned int auto_zc_fallback:1; 79 unsigned int per_io_tasks:1; 80 unsigned int no_ublk_fixed_fd:1; 81 82 int _evtfd; 83 int _shmid; 84 85 /* built from shmem, only for ublk_dump_dev() */ 86 struct ublk_dev *shadow_dev; 87 88 /* for 'update_size' command */ 89 unsigned long long size; 90 91 union { 92 struct stripe_ctx stripe; 93 struct fault_inject_ctx fault_inject; 94 }; 95 }; 96 97 struct ublk_ctrl_cmd_data { 98 __u32 cmd_op; 99 #define CTRL_CMD_HAS_DATA 1 100 #define CTRL_CMD_HAS_BUF 2 101 __u32 flags; 102 103 __u64 data[2]; 104 __u64 addr; 105 __u32 len; 106 }; 107 108 struct ublk_io { 109 char *buf_addr; 110 111 #define UBLKS_IO_NEED_FETCH_RQ (1UL << 0) 112 #define UBLKS_IO_NEED_COMMIT_RQ_COMP (1UL << 1) 113 #define UBLKS_IO_FREE (1UL << 2) 114 #define UBLKS_IO_NEED_GET_DATA (1UL << 3) 115 #define UBLKS_IO_NEED_REG_BUF (1UL << 4) 116 unsigned short flags; 117 unsigned short refs; /* used by target code only */ 118 119 int tag; 120 121 int result; 122 123 unsigned short buf_index; 124 unsigned short tgt_ios; 125 void *private_data; 126 }; 127 128 struct ublk_tgt_ops { 129 const char *name; 130 int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *); 131 void (*deinit_tgt)(struct ublk_dev *); 132 133 int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag); 134 void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *, 135 const struct io_uring_cqe *); 136 137 /* 138 * Target specific command line handling 139 * 140 * each option requires argument for target command line 141 */ 142 void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]); 143 void (*usage)(const struct ublk_tgt_ops *ops); 144 145 /* return buffer index for UBLK_F_AUTO_BUF_REG */ 146 unsigned short (*buf_index)(const struct ublk_queue *, int tag); 147 }; 148 149 struct ublk_tgt { 150 unsigned long dev_size; 151 unsigned int sq_depth; 152 unsigned int cq_depth; 153 const struct ublk_tgt_ops *ops; 154 struct ublk_params params; 155 156 int nr_backing_files; 157 unsigned long backing_file_size[MAX_BACK_FILES]; 158 char backing_file[MAX_BACK_FILES][PATH_MAX]; 159 }; 160 161 struct ublk_queue { 162 int q_id; 163 int q_depth; 164 struct ublk_dev *dev; 165 const struct ublk_tgt_ops *tgt_ops; 166 struct ublksrv_io_desc *io_cmd_buf; 167 168 /* borrow one bit of ublk uapi flags, which may never be used */ 169 #define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63) 170 #define UBLKS_Q_NO_UBLK_FIXED_FD (1ULL << 62) 171 __u64 flags; 172 int ublk_fd; /* cached ublk char device fd */ 173 struct ublk_io ios[UBLK_QUEUE_DEPTH]; 174 }; 175 176 struct ublk_thread { 177 struct ublk_dev *dev; 178 struct io_uring ring; 179 unsigned int cmd_inflight; 180 unsigned int io_inflight; 181 182 pthread_t thread; 183 unsigned idx; 184 185 #define UBLKS_T_STOPPING (1U << 0) 186 #define UBLKS_T_IDLE (1U << 1) 187 unsigned state; 188 }; 189 190 struct ublk_dev { 191 struct ublk_tgt tgt; 192 struct ublksrv_ctrl_dev_info dev_info; 193 struct ublk_queue q[UBLK_MAX_QUEUES]; 194 struct ublk_thread threads[UBLK_MAX_THREADS]; 195 unsigned nthreads; 196 unsigned per_io_tasks; 197 198 int fds[MAX_BACK_FILES + 1]; /* fds[0] points to /dev/ublkcN */ 199 int nr_fds; 200 int ctrl_fd; 201 struct io_uring ring; 202 203 void *private_data; 204 }; 205 206 extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io); 207 208 209 static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod) 210 { 211 return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF); 212 } 213 214 static inline int is_target_io(__u64 user_data) 215 { 216 return (user_data & (1ULL << 63)) != 0; 217 } 218 219 static inline __u64 build_user_data(unsigned tag, unsigned op, 220 unsigned tgt_data, unsigned q_id, unsigned is_target_io) 221 { 222 /* we only have 7 bits to encode q_id */ 223 _Static_assert(UBLK_MAX_QUEUES_SHIFT <= 7); 224 assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16) && !(q_id >> 7)); 225 226 return tag | (op << 16) | (tgt_data << 24) | 227 (__u64)q_id << 56 | (__u64)is_target_io << 63; 228 } 229 230 static inline unsigned int user_data_to_tag(__u64 user_data) 231 { 232 return user_data & 0xffff; 233 } 234 235 static inline unsigned int user_data_to_op(__u64 user_data) 236 { 237 return (user_data >> 16) & 0xff; 238 } 239 240 static inline unsigned int user_data_to_tgt_data(__u64 user_data) 241 { 242 return (user_data >> 24) & 0xffff; 243 } 244 245 static inline unsigned int user_data_to_q_id(__u64 user_data) 246 { 247 return (user_data >> 56) & 0x7f; 248 } 249 250 static inline unsigned short ublk_cmd_op_nr(unsigned int op) 251 { 252 return _IOC_NR(op); 253 } 254 255 static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io) 256 { 257 return container_of(io, struct ublk_queue, ios[io->tag]); 258 } 259 260 static inline int ublk_io_alloc_sqes(struct ublk_thread *t, 261 struct io_uring_sqe *sqes[], int nr_sqes) 262 { 263 struct io_uring *ring = &t->ring; 264 unsigned left = io_uring_sq_space_left(ring); 265 int i; 266 267 if (left < nr_sqes) 268 io_uring_submit(ring); 269 270 for (i = 0; i < nr_sqes; i++) { 271 sqes[i] = io_uring_get_sqe(ring); 272 if (!sqes[i]) 273 return i; 274 } 275 276 return nr_sqes; 277 } 278 279 static inline int ublk_get_registered_fd(struct ublk_queue *q, int fd_index) 280 { 281 if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) { 282 if (fd_index == 0) 283 /* Return the raw ublk FD for index 0 */ 284 return q->ublk_fd; 285 /* Adjust index for backing files (index 1 becomes 0, etc.) */ 286 return fd_index - 1; 287 } 288 return fd_index; 289 } 290 291 static inline void __io_uring_prep_buf_reg_unreg(struct io_uring_sqe *sqe, 292 struct ublk_queue *q, int tag, int q_id, __u64 index) 293 { 294 struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; 295 int dev_fd = ublk_get_registered_fd(q, 0); 296 297 io_uring_prep_read(sqe, dev_fd, 0, 0, 0); 298 sqe->opcode = IORING_OP_URING_CMD; 299 if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) 300 sqe->flags &= ~IOSQE_FIXED_FILE; 301 else 302 sqe->flags |= IOSQE_FIXED_FILE; 303 304 cmd->tag = tag; 305 cmd->addr = index; 306 cmd->q_id = q_id; 307 } 308 309 static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, 310 struct ublk_queue *q, int tag, int q_id, __u64 index) 311 { 312 __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); 313 sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; 314 } 315 316 static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, 317 struct ublk_queue *q, int tag, int q_id, __u64 index) 318 { 319 __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); 320 sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF; 321 } 322 323 static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe) 324 { 325 return (void *)&sqe->cmd; 326 } 327 328 static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res) 329 { 330 q->ios[tag].result = res; 331 } 332 333 static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag) 334 { 335 return q->ios[tag].result; 336 } 337 338 static inline void ublk_mark_io_done(struct ublk_io *io, int res) 339 { 340 io->flags |= (UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_FREE); 341 io->result = res; 342 } 343 344 static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag) 345 { 346 return &q->io_cmd_buf[tag]; 347 } 348 349 static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op) 350 { 351 __u32 *addr = (__u32 *)&sqe->off; 352 353 addr[0] = cmd_op; 354 addr[1] = 0; 355 } 356 357 static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag) 358 { 359 return &q->ios[tag]; 360 } 361 362 static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q, 363 unsigned tag, int res) 364 { 365 struct ublk_io *io = &q->ios[tag]; 366 367 ublk_mark_io_done(io, res); 368 369 return ublk_queue_io_cmd(t, io); 370 } 371 372 static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q, 373 unsigned tag, int queued) 374 { 375 if (queued < 0) 376 ublk_complete_io(t, q, tag, queued); 377 else { 378 struct ublk_io *io = ublk_get_io(q, tag); 379 380 t->io_inflight += queued; 381 io->tgt_ios = queued; 382 io->result = 0; 383 } 384 } 385 386 static inline int ublk_completed_tgt_io(struct ublk_thread *t, 387 struct ublk_queue *q, unsigned tag) 388 { 389 struct ublk_io *io = ublk_get_io(q, tag); 390 391 t->io_inflight--; 392 393 return --io->tgt_ios == 0; 394 } 395 396 static inline int ublk_queue_use_zc(const struct ublk_queue *q) 397 { 398 return q->flags & UBLK_F_SUPPORT_ZERO_COPY; 399 } 400 401 static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q) 402 { 403 return q->flags & UBLK_F_AUTO_BUF_REG; 404 } 405 406 static inline int ublk_queue_auto_zc_fallback(const struct ublk_queue *q) 407 { 408 return q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK; 409 } 410 411 static inline int ublk_queue_no_buf(const struct ublk_queue *q) 412 { 413 return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q); 414 } 415 416 extern const struct ublk_tgt_ops null_tgt_ops; 417 extern const struct ublk_tgt_ops loop_tgt_ops; 418 extern const struct ublk_tgt_ops stripe_tgt_ops; 419 extern const struct ublk_tgt_ops fault_inject_tgt_ops; 420 421 void backing_file_tgt_deinit(struct ublk_dev *dev); 422 int backing_file_tgt_init(struct ublk_dev *dev); 423 424 #endif 425