1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef KUBLK_INTERNAL_H 3 #define KUBLK_INTERNAL_H 4 5 #include <unistd.h> 6 #include <stdlib.h> 7 #include <assert.h> 8 #include <stdio.h> 9 #include <stdarg.h> 10 #include <string.h> 11 #include <pthread.h> 12 #include <getopt.h> 13 #include <limits.h> 14 #include <poll.h> 15 #include <fcntl.h> 16 #include <sys/syscall.h> 17 #include <sys/mman.h> 18 #include <sys/ioctl.h> 19 #include <sys/inotify.h> 20 #include <sys/wait.h> 21 #include <sys/eventfd.h> 22 #include <sys/uio.h> 23 #include <sys/ipc.h> 24 #include <sys/shm.h> 25 #include <linux/io_uring.h> 26 #include <liburing.h> 27 #include <semaphore.h> 28 29 /* allow ublk_dep.h to override ublk_cmd.h */ 30 #include "ublk_dep.h" 31 #include <linux/ublk_cmd.h> 32 33 #define __maybe_unused __attribute__((unused)) 34 #define MAX_BACK_FILES 4 35 #ifndef min 36 #define min(a, b) ((a) < (b) ? (a) : (b)) 37 #endif 38 39 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) 40 41 /****************** part 1: libublk ********************/ 42 43 #define CTRL_DEV "/dev/ublk-control" 44 #define UBLKC_DEV "/dev/ublkc" 45 #define UBLKB_DEV "/dev/ublkb" 46 #define UBLK_CTRL_RING_DEPTH 32 47 #define ERROR_EVTFD_DEVID -2 48 49 /* queue idle timeout */ 50 #define UBLKSRV_IO_IDLE_SECS 20 51 52 #define UBLK_IO_MAX_BYTES (1 << 20) 53 #define UBLK_MAX_QUEUES 32 54 #define UBLK_QUEUE_DEPTH 1024 55 56 #define UBLK_DBG_DEV (1U << 0) 57 #define UBLK_DBG_QUEUE (1U << 1) 58 #define UBLK_DBG_IO_CMD (1U << 2) 59 #define UBLK_DBG_IO (1U << 3) 60 #define UBLK_DBG_CTRL_CMD (1U << 4) 61 #define UBLK_LOG (1U << 5) 62 63 struct ublk_dev; 64 struct ublk_queue; 65 66 struct stripe_ctx { 67 /* stripe */ 68 unsigned int chunk_size; 69 }; 70 71 struct fault_inject_ctx { 72 /* fault_inject */ 73 unsigned long delay_us; 74 }; 75 76 struct dev_ctx { 77 char tgt_type[16]; 78 unsigned long flags; 79 unsigned nr_hw_queues; 80 unsigned queue_depth; 81 int dev_id; 82 int nr_files; 83 char *files[MAX_BACK_FILES]; 84 unsigned int logging:1; 85 unsigned int all:1; 86 unsigned int fg:1; 87 unsigned int recovery:1; 88 89 /* fault_inject */ 90 long long delay_us; 91 92 int _evtfd; 93 int _shmid; 94 95 /* built from shmem, only for ublk_dump_dev() */ 96 struct ublk_dev *shadow_dev; 97 98 union { 99 struct stripe_ctx stripe; 100 struct fault_inject_ctx fault_inject; 101 }; 102 }; 103 104 struct ublk_ctrl_cmd_data { 105 __u32 cmd_op; 106 #define CTRL_CMD_HAS_DATA 1 107 #define CTRL_CMD_HAS_BUF 2 108 __u32 flags; 109 110 __u64 data[2]; 111 __u64 addr; 112 __u32 len; 113 }; 114 115 struct ublk_io { 116 char *buf_addr; 117 118 #define UBLKSRV_NEED_FETCH_RQ (1UL << 0) 119 #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) 120 #define UBLKSRV_IO_FREE (1UL << 2) 121 unsigned short flags; 122 unsigned short refs; /* used by target code only */ 123 124 int result; 125 126 unsigned short tgt_ios; 127 void *private_data; 128 }; 129 130 struct ublk_tgt_ops { 131 const char *name; 132 int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *); 133 void (*deinit_tgt)(struct ublk_dev *); 134 135 int (*queue_io)(struct ublk_queue *, int tag); 136 void (*tgt_io_done)(struct ublk_queue *, 137 int tag, const struct io_uring_cqe *); 138 139 /* 140 * Target specific command line handling 141 * 142 * each option requires argument for target command line 143 */ 144 void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]); 145 void (*usage)(const struct ublk_tgt_ops *ops); 146 }; 147 148 struct ublk_tgt { 149 unsigned long dev_size; 150 unsigned int sq_depth; 151 unsigned int cq_depth; 152 const struct ublk_tgt_ops *ops; 153 struct ublk_params params; 154 155 int nr_backing_files; 156 unsigned long backing_file_size[MAX_BACK_FILES]; 157 char backing_file[MAX_BACK_FILES][PATH_MAX]; 158 }; 159 160 struct ublk_queue { 161 int q_id; 162 int q_depth; 163 unsigned int cmd_inflight; 164 unsigned int io_inflight; 165 struct ublk_dev *dev; 166 const struct ublk_tgt_ops *tgt_ops; 167 struct ublksrv_io_desc *io_cmd_buf; 168 struct io_uring ring; 169 struct ublk_io ios[UBLK_QUEUE_DEPTH]; 170 #define UBLKSRV_QUEUE_STOPPING (1U << 0) 171 #define UBLKSRV_QUEUE_IDLE (1U << 1) 172 #define UBLKSRV_NO_BUF (1U << 2) 173 #define UBLKSRV_ZC (1U << 3) 174 unsigned state; 175 pid_t tid; 176 pthread_t thread; 177 }; 178 179 struct ublk_dev { 180 struct ublk_tgt tgt; 181 struct ublksrv_ctrl_dev_info dev_info; 182 struct ublk_queue q[UBLK_MAX_QUEUES]; 183 184 int fds[MAX_BACK_FILES + 1]; /* fds[0] points to /dev/ublkcN */ 185 int nr_fds; 186 int ctrl_fd; 187 struct io_uring ring; 188 189 void *private_data; 190 }; 191 192 #ifndef offsetof 193 #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) 194 #endif 195 196 #ifndef container_of 197 #define container_of(ptr, type, member) ({ \ 198 unsigned long __mptr = (unsigned long)(ptr); \ 199 ((type *)(__mptr - offsetof(type, member))); }) 200 #endif 201 202 #define round_up(val, rnd) \ 203 (((val) + ((rnd) - 1)) & ~((rnd) - 1)) 204 205 206 extern unsigned int ublk_dbg_mask; 207 extern int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag); 208 209 static inline int is_target_io(__u64 user_data) 210 { 211 return (user_data & (1ULL << 63)) != 0; 212 } 213 214 static inline __u64 build_user_data(unsigned tag, unsigned op, 215 unsigned tgt_data, unsigned is_target_io) 216 { 217 assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16)); 218 219 return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63; 220 } 221 222 static inline unsigned int user_data_to_tag(__u64 user_data) 223 { 224 return user_data & 0xffff; 225 } 226 227 static inline unsigned int user_data_to_op(__u64 user_data) 228 { 229 return (user_data >> 16) & 0xff; 230 } 231 232 static inline unsigned int user_data_to_tgt_data(__u64 user_data) 233 { 234 return (user_data >> 24) & 0xffff; 235 } 236 237 static inline unsigned short ublk_cmd_op_nr(unsigned int op) 238 { 239 return _IOC_NR(op); 240 } 241 242 static inline void ublk_err(const char *fmt, ...) 243 { 244 va_list ap; 245 246 va_start(ap, fmt); 247 vfprintf(stderr, fmt, ap); 248 } 249 250 static inline void ublk_log(const char *fmt, ...) 251 { 252 if (ublk_dbg_mask & UBLK_LOG) { 253 va_list ap; 254 255 va_start(ap, fmt); 256 vfprintf(stdout, fmt, ap); 257 } 258 } 259 260 static inline void ublk_dbg(int level, const char *fmt, ...) 261 { 262 if (level & ublk_dbg_mask) { 263 va_list ap; 264 265 va_start(ap, fmt); 266 vfprintf(stdout, fmt, ap); 267 } 268 } 269 270 static inline int ublk_queue_alloc_sqes(struct ublk_queue *q, 271 struct io_uring_sqe *sqes[], int nr_sqes) 272 { 273 unsigned left = io_uring_sq_space_left(&q->ring); 274 int i; 275 276 if (left < nr_sqes) 277 io_uring_submit(&q->ring); 278 279 for (i = 0; i < nr_sqes; i++) { 280 sqes[i] = io_uring_get_sqe(&q->ring); 281 if (!sqes[i]) 282 return i; 283 } 284 285 return nr_sqes; 286 } 287 288 static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, 289 int dev_fd, int tag, int q_id, __u64 index) 290 { 291 struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; 292 293 io_uring_prep_read(sqe, dev_fd, 0, 0, 0); 294 sqe->opcode = IORING_OP_URING_CMD; 295 sqe->flags |= IOSQE_FIXED_FILE; 296 sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; 297 298 cmd->tag = tag; 299 cmd->addr = index; 300 cmd->q_id = q_id; 301 } 302 303 static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, 304 int dev_fd, int tag, int q_id, __u64 index) 305 { 306 struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; 307 308 io_uring_prep_read(sqe, dev_fd, 0, 0, 0); 309 sqe->opcode = IORING_OP_URING_CMD; 310 sqe->flags |= IOSQE_FIXED_FILE; 311 sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF; 312 313 cmd->tag = tag; 314 cmd->addr = index; 315 cmd->q_id = q_id; 316 } 317 318 static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe) 319 { 320 return (void *)&sqe->cmd; 321 } 322 323 static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res) 324 { 325 q->ios[tag].result = res; 326 } 327 328 static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag) 329 { 330 return q->ios[tag].result; 331 } 332 333 static inline void ublk_mark_io_done(struct ublk_io *io, int res) 334 { 335 io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE); 336 io->result = res; 337 } 338 339 static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag) 340 { 341 return &q->io_cmd_buf[tag]; 342 } 343 344 static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op) 345 { 346 __u32 *addr = (__u32 *)&sqe->off; 347 348 addr[0] = cmd_op; 349 addr[1] = 0; 350 } 351 352 static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag) 353 { 354 return &q->ios[tag]; 355 } 356 357 static inline int ublk_complete_io(struct ublk_queue *q, unsigned tag, int res) 358 { 359 struct ublk_io *io = &q->ios[tag]; 360 361 ublk_mark_io_done(io, res); 362 363 return ublk_queue_io_cmd(q, io, tag); 364 } 365 366 static inline void ublk_queued_tgt_io(struct ublk_queue *q, unsigned tag, int queued) 367 { 368 if (queued < 0) 369 ublk_complete_io(q, tag, queued); 370 else { 371 struct ublk_io *io = ublk_get_io(q, tag); 372 373 q->io_inflight += queued; 374 io->tgt_ios = queued; 375 io->result = 0; 376 } 377 } 378 379 static inline int ublk_completed_tgt_io(struct ublk_queue *q, unsigned tag) 380 { 381 struct ublk_io *io = ublk_get_io(q, tag); 382 383 q->io_inflight--; 384 385 return --io->tgt_ios == 0; 386 } 387 388 static inline int ublk_queue_use_zc(const struct ublk_queue *q) 389 { 390 return q->state & UBLKSRV_ZC; 391 } 392 393 extern const struct ublk_tgt_ops null_tgt_ops; 394 extern const struct ublk_tgt_ops loop_tgt_ops; 395 extern const struct ublk_tgt_ops stripe_tgt_ops; 396 extern const struct ublk_tgt_ops fault_inject_tgt_ops; 397 398 void backing_file_tgt_deinit(struct ublk_dev *dev); 399 int backing_file_tgt_init(struct ublk_dev *dev); 400 401 static inline unsigned int ilog2(unsigned int x) 402 { 403 if (x == 0) 404 return 0; 405 return (sizeof(x) * 8 - 1) - __builtin_clz(x); 406 } 407 #endif 408