1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef KUBLK_INTERNAL_H 3 #define KUBLK_INTERNAL_H 4 5 #include <unistd.h> 6 #include <stdlib.h> 7 #include <assert.h> 8 #include <stdio.h> 9 #include <stdarg.h> 10 #include <string.h> 11 #include <pthread.h> 12 #include <getopt.h> 13 #include <limits.h> 14 #include <poll.h> 15 #include <fcntl.h> 16 #include <sys/syscall.h> 17 #include <sys/mman.h> 18 #include <sys/ioctl.h> 19 #include <sys/inotify.h> 20 #include <sys/wait.h> 21 #include <sys/eventfd.h> 22 #include <sys/ipc.h> 23 #include <sys/shm.h> 24 #include <linux/io_uring.h> 25 #include <liburing.h> 26 #include <semaphore.h> 27 28 /* allow ublk_dep.h to override ublk_cmd.h */ 29 #include "ublk_dep.h" 30 #include <linux/ublk_cmd.h> 31 32 #define __maybe_unused __attribute__((unused)) 33 #define MAX_BACK_FILES 4 34 #ifndef min 35 #define min(a, b) ((a) < (b) ? (a) : (b)) 36 #endif 37 38 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) 39 40 /****************** part 1: libublk ********************/ 41 42 #define CTRL_DEV "/dev/ublk-control" 43 #define UBLKC_DEV "/dev/ublkc" 44 #define UBLKB_DEV "/dev/ublkb" 45 #define UBLK_CTRL_RING_DEPTH 32 46 #define ERROR_EVTFD_DEVID -2 47 48 /* queue idle timeout */ 49 #define UBLKSRV_IO_IDLE_SECS 20 50 51 #define UBLK_IO_MAX_BYTES (1 << 20) 52 #define UBLK_MAX_QUEUES 32 53 #define UBLK_QUEUE_DEPTH 1024 54 55 #define UBLK_DBG_DEV (1U << 0) 56 #define UBLK_DBG_QUEUE (1U << 1) 57 #define UBLK_DBG_IO_CMD (1U << 2) 58 #define UBLK_DBG_IO (1U << 3) 59 #define UBLK_DBG_CTRL_CMD (1U << 4) 60 #define UBLK_LOG (1U << 5) 61 62 struct ublk_dev; 63 struct ublk_queue; 64 65 struct stripe_ctx { 66 /* stripe */ 67 unsigned int chunk_size; 68 }; 69 70 struct fault_inject_ctx { 71 /* fault_inject */ 72 unsigned long delay_us; 73 }; 74 75 struct dev_ctx { 76 char tgt_type[16]; 77 unsigned long flags; 78 unsigned nr_hw_queues; 79 unsigned queue_depth; 80 int dev_id; 81 int nr_files; 82 char *files[MAX_BACK_FILES]; 83 unsigned int logging:1; 84 unsigned int all:1; 85 unsigned int fg:1; 86 unsigned int recovery:1; 87 unsigned int auto_zc_fallback:1; 88 89 int _evtfd; 90 int _shmid; 91 92 /* built from shmem, only for ublk_dump_dev() */ 93 struct ublk_dev *shadow_dev; 94 95 /* for 'update_size' command */ 96 unsigned long long size; 97 98 union { 99 struct stripe_ctx stripe; 100 struct fault_inject_ctx fault_inject; 101 }; 102 }; 103 104 struct ublk_ctrl_cmd_data { 105 __u32 cmd_op; 106 #define CTRL_CMD_HAS_DATA 1 107 #define CTRL_CMD_HAS_BUF 2 108 __u32 flags; 109 110 __u64 data[2]; 111 __u64 addr; 112 __u32 len; 113 }; 114 115 struct ublk_io { 116 char *buf_addr; 117 118 #define UBLKSRV_NEED_FETCH_RQ (1UL << 0) 119 #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) 120 #define UBLKSRV_IO_FREE (1UL << 2) 121 #define UBLKSRV_NEED_GET_DATA (1UL << 3) 122 #define UBLKSRV_NEED_REG_BUF (1UL << 4) 123 unsigned short flags; 124 unsigned short refs; /* used by target code only */ 125 126 int result; 127 128 unsigned short tgt_ios; 129 void *private_data; 130 }; 131 132 struct ublk_tgt_ops { 133 const char *name; 134 int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *); 135 void (*deinit_tgt)(struct ublk_dev *); 136 137 int (*queue_io)(struct ublk_queue *, int tag); 138 void (*tgt_io_done)(struct ublk_queue *, 139 int tag, const struct io_uring_cqe *); 140 141 /* 142 * Target specific command line handling 143 * 144 * each option requires argument for target command line 145 */ 146 void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]); 147 void (*usage)(const struct ublk_tgt_ops *ops); 148 149 /* return buffer index for UBLK_F_AUTO_BUF_REG */ 150 unsigned short (*buf_index)(const struct ublk_queue *, int tag); 151 }; 152 153 struct ublk_tgt { 154 unsigned long dev_size; 155 unsigned int sq_depth; 156 unsigned int cq_depth; 157 const struct ublk_tgt_ops *ops; 158 struct ublk_params params; 159 160 int nr_backing_files; 161 unsigned long backing_file_size[MAX_BACK_FILES]; 162 char backing_file[MAX_BACK_FILES][PATH_MAX]; 163 }; 164 165 struct ublk_queue { 166 int q_id; 167 int q_depth; 168 unsigned int cmd_inflight; 169 unsigned int io_inflight; 170 struct ublk_dev *dev; 171 const struct ublk_tgt_ops *tgt_ops; 172 struct ublksrv_io_desc *io_cmd_buf; 173 struct io_uring ring; 174 struct ublk_io ios[UBLK_QUEUE_DEPTH]; 175 #define UBLKSRV_QUEUE_STOPPING (1U << 0) 176 #define UBLKSRV_QUEUE_IDLE (1U << 1) 177 #define UBLKSRV_NO_BUF (1U << 2) 178 #define UBLKSRV_ZC (1U << 3) 179 #define UBLKSRV_AUTO_BUF_REG (1U << 4) 180 #define UBLKSRV_AUTO_BUF_REG_FALLBACK (1U << 5) 181 unsigned state; 182 pid_t tid; 183 pthread_t thread; 184 }; 185 186 struct ublk_dev { 187 struct ublk_tgt tgt; 188 struct ublksrv_ctrl_dev_info dev_info; 189 struct ublk_queue q[UBLK_MAX_QUEUES]; 190 191 int fds[MAX_BACK_FILES + 1]; /* fds[0] points to /dev/ublkcN */ 192 int nr_fds; 193 int ctrl_fd; 194 struct io_uring ring; 195 196 void *private_data; 197 }; 198 199 #ifndef offsetof 200 #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) 201 #endif 202 203 #ifndef container_of 204 #define container_of(ptr, type, member) ({ \ 205 unsigned long __mptr = (unsigned long)(ptr); \ 206 ((type *)(__mptr - offsetof(type, member))); }) 207 #endif 208 209 #define round_up(val, rnd) \ 210 (((val) + ((rnd) - 1)) & ~((rnd) - 1)) 211 212 213 extern unsigned int ublk_dbg_mask; 214 extern int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag); 215 216 217 static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod) 218 { 219 return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF); 220 } 221 222 static inline int is_target_io(__u64 user_data) 223 { 224 return (user_data & (1ULL << 63)) != 0; 225 } 226 227 static inline __u64 build_user_data(unsigned tag, unsigned op, 228 unsigned tgt_data, unsigned is_target_io) 229 { 230 assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16)); 231 232 return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63; 233 } 234 235 static inline unsigned int user_data_to_tag(__u64 user_data) 236 { 237 return user_data & 0xffff; 238 } 239 240 static inline unsigned int user_data_to_op(__u64 user_data) 241 { 242 return (user_data >> 16) & 0xff; 243 } 244 245 static inline unsigned int user_data_to_tgt_data(__u64 user_data) 246 { 247 return (user_data >> 24) & 0xffff; 248 } 249 250 static inline unsigned short ublk_cmd_op_nr(unsigned int op) 251 { 252 return _IOC_NR(op); 253 } 254 255 static inline void ublk_err(const char *fmt, ...) 256 { 257 va_list ap; 258 259 va_start(ap, fmt); 260 vfprintf(stderr, fmt, ap); 261 } 262 263 static inline void ublk_log(const char *fmt, ...) 264 { 265 if (ublk_dbg_mask & UBLK_LOG) { 266 va_list ap; 267 268 va_start(ap, fmt); 269 vfprintf(stdout, fmt, ap); 270 } 271 } 272 273 static inline void ublk_dbg(int level, const char *fmt, ...) 274 { 275 if (level & ublk_dbg_mask) { 276 va_list ap; 277 278 va_start(ap, fmt); 279 vfprintf(stdout, fmt, ap); 280 } 281 } 282 283 static inline int ublk_queue_alloc_sqes(struct ublk_queue *q, 284 struct io_uring_sqe *sqes[], int nr_sqes) 285 { 286 unsigned left = io_uring_sq_space_left(&q->ring); 287 int i; 288 289 if (left < nr_sqes) 290 io_uring_submit(&q->ring); 291 292 for (i = 0; i < nr_sqes; i++) { 293 sqes[i] = io_uring_get_sqe(&q->ring); 294 if (!sqes[i]) 295 return i; 296 } 297 298 return nr_sqes; 299 } 300 301 static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, 302 int dev_fd, int tag, int q_id, __u64 index) 303 { 304 struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; 305 306 io_uring_prep_read(sqe, dev_fd, 0, 0, 0); 307 sqe->opcode = IORING_OP_URING_CMD; 308 sqe->flags |= IOSQE_FIXED_FILE; 309 sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; 310 311 cmd->tag = tag; 312 cmd->addr = index; 313 cmd->q_id = q_id; 314 } 315 316 static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, 317 int dev_fd, int tag, int q_id, __u64 index) 318 { 319 struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; 320 321 io_uring_prep_read(sqe, dev_fd, 0, 0, 0); 322 sqe->opcode = IORING_OP_URING_CMD; 323 sqe->flags |= IOSQE_FIXED_FILE; 324 sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF; 325 326 cmd->tag = tag; 327 cmd->addr = index; 328 cmd->q_id = q_id; 329 } 330 331 static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe) 332 { 333 return (void *)&sqe->cmd; 334 } 335 336 static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res) 337 { 338 q->ios[tag].result = res; 339 } 340 341 static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag) 342 { 343 return q->ios[tag].result; 344 } 345 346 static inline void ublk_mark_io_done(struct ublk_io *io, int res) 347 { 348 io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE); 349 io->result = res; 350 } 351 352 static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag) 353 { 354 return &q->io_cmd_buf[tag]; 355 } 356 357 static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op) 358 { 359 __u32 *addr = (__u32 *)&sqe->off; 360 361 addr[0] = cmd_op; 362 addr[1] = 0; 363 } 364 365 static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag) 366 { 367 return &q->ios[tag]; 368 } 369 370 static inline int ublk_complete_io(struct ublk_queue *q, unsigned tag, int res) 371 { 372 struct ublk_io *io = &q->ios[tag]; 373 374 ublk_mark_io_done(io, res); 375 376 return ublk_queue_io_cmd(q, io, tag); 377 } 378 379 static inline void ublk_queued_tgt_io(struct ublk_queue *q, unsigned tag, int queued) 380 { 381 if (queued < 0) 382 ublk_complete_io(q, tag, queued); 383 else { 384 struct ublk_io *io = ublk_get_io(q, tag); 385 386 q->io_inflight += queued; 387 io->tgt_ios = queued; 388 io->result = 0; 389 } 390 } 391 392 static inline int ublk_completed_tgt_io(struct ublk_queue *q, unsigned tag) 393 { 394 struct ublk_io *io = ublk_get_io(q, tag); 395 396 q->io_inflight--; 397 398 return --io->tgt_ios == 0; 399 } 400 401 static inline int ublk_queue_use_zc(const struct ublk_queue *q) 402 { 403 return q->state & UBLKSRV_ZC; 404 } 405 406 static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q) 407 { 408 return q->state & UBLKSRV_AUTO_BUF_REG; 409 } 410 411 extern const struct ublk_tgt_ops null_tgt_ops; 412 extern const struct ublk_tgt_ops loop_tgt_ops; 413 extern const struct ublk_tgt_ops stripe_tgt_ops; 414 extern const struct ublk_tgt_ops fault_inject_tgt_ops; 415 416 void backing_file_tgt_deinit(struct ublk_dev *dev); 417 int backing_file_tgt_init(struct ublk_dev *dev); 418 419 static inline unsigned int ilog2(unsigned int x) 420 { 421 if (x == 0) 422 return 0; 423 return (sizeof(x) * 8 - 1) - __builtin_clz(x); 424 } 425 #endif 426