1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef KUBLK_INTERNAL_H 3 #define KUBLK_INTERNAL_H 4 5 #include <unistd.h> 6 #include <stdlib.h> 7 #include <assert.h> 8 #include <stdio.h> 9 #include <stdarg.h> 10 #include <string.h> 11 #include <pthread.h> 12 #include <getopt.h> 13 #include <limits.h> 14 #include <poll.h> 15 #include <fcntl.h> 16 #include <sys/syscall.h> 17 #include <sys/mman.h> 18 #include <sys/ioctl.h> 19 #include <sys/inotify.h> 20 #include <sys/wait.h> 21 #include <sys/eventfd.h> 22 #include <sys/ipc.h> 23 #include <sys/shm.h> 24 #include <linux/io_uring.h> 25 #include <liburing.h> 26 #include <semaphore.h> 27 28 /* allow ublk_dep.h to override ublk_cmd.h */ 29 #include "ublk_dep.h" 30 #include <linux/ublk_cmd.h> 31 32 #include "utils.h" 33 34 #define MAX_BACK_FILES 4 35 36 /****************** part 1: libublk ********************/ 37 38 #define CTRL_DEV "/dev/ublk-control" 39 #define UBLKC_DEV "/dev/ublkc" 40 #define UBLKB_DEV "/dev/ublkb" 41 #define UBLK_CTRL_RING_DEPTH 32 42 #define ERROR_EVTFD_DEVID -2 43 44 #define UBLK_IO_MAX_BYTES (1 << 20) 45 #define UBLK_MAX_QUEUES_SHIFT 5 46 #define UBLK_MAX_QUEUES (1 << UBLK_MAX_QUEUES_SHIFT) 47 #define UBLK_MAX_THREADS_SHIFT 5 48 #define UBLK_MAX_THREADS (1 << UBLK_MAX_THREADS_SHIFT) 49 #define UBLK_QUEUE_DEPTH 1024 50 51 struct ublk_dev; 52 struct ublk_queue; 53 struct ublk_thread; 54 55 struct stripe_ctx { 56 /* stripe */ 57 unsigned int chunk_size; 58 }; 59 60 struct fault_inject_ctx { 61 /* fault_inject */ 62 unsigned long delay_us; 63 }; 64 65 struct dev_ctx { 66 char tgt_type[16]; 67 unsigned long flags; 68 unsigned nr_hw_queues; 69 unsigned short nthreads; 70 unsigned queue_depth; 71 int dev_id; 72 int nr_files; 73 char *files[MAX_BACK_FILES]; 74 unsigned int logging:1; 75 unsigned int all:1; 76 unsigned int fg:1; 77 unsigned int recovery:1; 78 unsigned int auto_zc_fallback:1; 79 unsigned int per_io_tasks:1; 80 81 int _evtfd; 82 int _shmid; 83 84 /* built from shmem, only for ublk_dump_dev() */ 85 struct ublk_dev *shadow_dev; 86 87 /* for 'update_size' command */ 88 unsigned long long size; 89 90 union { 91 struct stripe_ctx stripe; 92 struct fault_inject_ctx fault_inject; 93 }; 94 }; 95 96 struct ublk_ctrl_cmd_data { 97 __u32 cmd_op; 98 #define CTRL_CMD_HAS_DATA 1 99 #define CTRL_CMD_HAS_BUF 2 100 __u32 flags; 101 102 __u64 data[2]; 103 __u64 addr; 104 __u32 len; 105 }; 106 107 struct ublk_io { 108 char *buf_addr; 109 110 #define UBLKS_IO_NEED_FETCH_RQ (1UL << 0) 111 #define UBLKS_IO_NEED_COMMIT_RQ_COMP (1UL << 1) 112 #define UBLKS_IO_FREE (1UL << 2) 113 #define UBLKS_IO_NEED_GET_DATA (1UL << 3) 114 #define UBLKS_IO_NEED_REG_BUF (1UL << 4) 115 unsigned short flags; 116 unsigned short refs; /* used by target code only */ 117 118 int tag; 119 120 int result; 121 122 unsigned short buf_index; 123 unsigned short tgt_ios; 124 void *private_data; 125 }; 126 127 struct ublk_tgt_ops { 128 const char *name; 129 int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *); 130 void (*deinit_tgt)(struct ublk_dev *); 131 132 int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag); 133 void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *, 134 const struct io_uring_cqe *); 135 136 /* 137 * Target specific command line handling 138 * 139 * each option requires argument for target command line 140 */ 141 void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]); 142 void (*usage)(const struct ublk_tgt_ops *ops); 143 144 /* return buffer index for UBLK_F_AUTO_BUF_REG */ 145 unsigned short (*buf_index)(const struct ublk_queue *, int tag); 146 }; 147 148 struct ublk_tgt { 149 unsigned long dev_size; 150 unsigned int sq_depth; 151 unsigned int cq_depth; 152 const struct ublk_tgt_ops *ops; 153 struct ublk_params params; 154 155 int nr_backing_files; 156 unsigned long backing_file_size[MAX_BACK_FILES]; 157 char backing_file[MAX_BACK_FILES][PATH_MAX]; 158 }; 159 160 struct ublk_queue { 161 int q_id; 162 int q_depth; 163 struct ublk_dev *dev; 164 const struct ublk_tgt_ops *tgt_ops; 165 struct ublksrv_io_desc *io_cmd_buf; 166 167 /* borrow one bit of ublk uapi flags, which may never be used */ 168 #define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63) 169 __u64 flags; 170 struct ublk_io ios[UBLK_QUEUE_DEPTH]; 171 }; 172 173 struct ublk_thread { 174 struct ublk_dev *dev; 175 struct io_uring ring; 176 unsigned int cmd_inflight; 177 unsigned int io_inflight; 178 179 pthread_t thread; 180 unsigned idx; 181 182 #define UBLKS_T_STOPPING (1U << 0) 183 #define UBLKS_T_IDLE (1U << 1) 184 unsigned state; 185 }; 186 187 struct ublk_dev { 188 struct ublk_tgt tgt; 189 struct ublksrv_ctrl_dev_info dev_info; 190 struct ublk_queue q[UBLK_MAX_QUEUES]; 191 struct ublk_thread threads[UBLK_MAX_THREADS]; 192 unsigned nthreads; 193 unsigned per_io_tasks; 194 195 int fds[MAX_BACK_FILES + 1]; /* fds[0] points to /dev/ublkcN */ 196 int nr_fds; 197 int ctrl_fd; 198 struct io_uring ring; 199 200 void *private_data; 201 }; 202 203 extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io); 204 205 206 static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod) 207 { 208 return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF); 209 } 210 211 static inline int is_target_io(__u64 user_data) 212 { 213 return (user_data & (1ULL << 63)) != 0; 214 } 215 216 static inline __u64 build_user_data(unsigned tag, unsigned op, 217 unsigned tgt_data, unsigned q_id, unsigned is_target_io) 218 { 219 /* we only have 7 bits to encode q_id */ 220 _Static_assert(UBLK_MAX_QUEUES_SHIFT <= 7); 221 assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16) && !(q_id >> 7)); 222 223 return tag | (op << 16) | (tgt_data << 24) | 224 (__u64)q_id << 56 | (__u64)is_target_io << 63; 225 } 226 227 static inline unsigned int user_data_to_tag(__u64 user_data) 228 { 229 return user_data & 0xffff; 230 } 231 232 static inline unsigned int user_data_to_op(__u64 user_data) 233 { 234 return (user_data >> 16) & 0xff; 235 } 236 237 static inline unsigned int user_data_to_tgt_data(__u64 user_data) 238 { 239 return (user_data >> 24) & 0xffff; 240 } 241 242 static inline unsigned int user_data_to_q_id(__u64 user_data) 243 { 244 return (user_data >> 56) & 0x7f; 245 } 246 247 static inline unsigned short ublk_cmd_op_nr(unsigned int op) 248 { 249 return _IOC_NR(op); 250 } 251 252 static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io) 253 { 254 return container_of(io, struct ublk_queue, ios[io->tag]); 255 } 256 257 static inline int ublk_io_alloc_sqes(struct ublk_thread *t, 258 struct io_uring_sqe *sqes[], int nr_sqes) 259 { 260 struct io_uring *ring = &t->ring; 261 unsigned left = io_uring_sq_space_left(ring); 262 int i; 263 264 if (left < nr_sqes) 265 io_uring_submit(ring); 266 267 for (i = 0; i < nr_sqes; i++) { 268 sqes[i] = io_uring_get_sqe(ring); 269 if (!sqes[i]) 270 return i; 271 } 272 273 return nr_sqes; 274 } 275 276 static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, 277 int dev_fd, int tag, int q_id, __u64 index) 278 { 279 struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; 280 281 io_uring_prep_read(sqe, dev_fd, 0, 0, 0); 282 sqe->opcode = IORING_OP_URING_CMD; 283 sqe->flags |= IOSQE_FIXED_FILE; 284 sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; 285 286 cmd->tag = tag; 287 cmd->addr = index; 288 cmd->q_id = q_id; 289 } 290 291 static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, 292 int dev_fd, int tag, int q_id, __u64 index) 293 { 294 struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; 295 296 io_uring_prep_read(sqe, dev_fd, 0, 0, 0); 297 sqe->opcode = IORING_OP_URING_CMD; 298 sqe->flags |= IOSQE_FIXED_FILE; 299 sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF; 300 301 cmd->tag = tag; 302 cmd->addr = index; 303 cmd->q_id = q_id; 304 } 305 306 static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe) 307 { 308 return (void *)&sqe->cmd; 309 } 310 311 static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res) 312 { 313 q->ios[tag].result = res; 314 } 315 316 static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag) 317 { 318 return q->ios[tag].result; 319 } 320 321 static inline void ublk_mark_io_done(struct ublk_io *io, int res) 322 { 323 io->flags |= (UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_FREE); 324 io->result = res; 325 } 326 327 static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag) 328 { 329 return &q->io_cmd_buf[tag]; 330 } 331 332 static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op) 333 { 334 __u32 *addr = (__u32 *)&sqe->off; 335 336 addr[0] = cmd_op; 337 addr[1] = 0; 338 } 339 340 static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag) 341 { 342 return &q->ios[tag]; 343 } 344 345 static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q, 346 unsigned tag, int res) 347 { 348 struct ublk_io *io = &q->ios[tag]; 349 350 ublk_mark_io_done(io, res); 351 352 return ublk_queue_io_cmd(t, io); 353 } 354 355 static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q, 356 unsigned tag, int queued) 357 { 358 if (queued < 0) 359 ublk_complete_io(t, q, tag, queued); 360 else { 361 struct ublk_io *io = ublk_get_io(q, tag); 362 363 t->io_inflight += queued; 364 io->tgt_ios = queued; 365 io->result = 0; 366 } 367 } 368 369 static inline int ublk_completed_tgt_io(struct ublk_thread *t, 370 struct ublk_queue *q, unsigned tag) 371 { 372 struct ublk_io *io = ublk_get_io(q, tag); 373 374 t->io_inflight--; 375 376 return --io->tgt_ios == 0; 377 } 378 379 static inline int ublk_queue_use_zc(const struct ublk_queue *q) 380 { 381 return q->flags & UBLK_F_SUPPORT_ZERO_COPY; 382 } 383 384 static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q) 385 { 386 return q->flags & UBLK_F_AUTO_BUF_REG; 387 } 388 389 static inline int ublk_queue_auto_zc_fallback(const struct ublk_queue *q) 390 { 391 return q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK; 392 } 393 394 static inline int ublk_queue_no_buf(const struct ublk_queue *q) 395 { 396 return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q); 397 } 398 399 extern const struct ublk_tgt_ops null_tgt_ops; 400 extern const struct ublk_tgt_ops loop_tgt_ops; 401 extern const struct ublk_tgt_ops stripe_tgt_ops; 402 extern const struct ublk_tgt_ops fault_inject_tgt_ops; 403 404 void backing_file_tgt_deinit(struct ublk_dev *dev); 405 int backing_file_tgt_init(struct ublk_dev *dev); 406 407 #endif 408