1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef KUBLK_INTERNAL_H
3 #define KUBLK_INTERNAL_H
4
5 #include <unistd.h>
6 #include <stdlib.h>
7 #include <assert.h>
8 #include <stdio.h>
9 #include <stdarg.h>
10 #include <string.h>
11 #include <pthread.h>
12 #include <getopt.h>
13 #include <limits.h>
14 #include <poll.h>
15 #include <fcntl.h>
16 #include <sys/syscall.h>
17 #include <sys/mman.h>
18 #include <sys/ioctl.h>
19 #include <sys/inotify.h>
20 #include <sys/wait.h>
21 #include <sys/eventfd.h>
22 #include <sys/ipc.h>
23 #include <sys/shm.h>
24 #include <linux/io_uring.h>
25 #include <liburing.h>
26 #include <semaphore.h>
27
28 /* allow ublk_dep.h to override ublk_cmd.h */
29 #include "ublk_dep.h"
30 #include <linux/ublk_cmd.h>
31
32 #define __maybe_unused __attribute__((unused))
33 #define MAX_BACK_FILES 4
34 #ifndef min
35 #define min(a, b) ((a) < (b) ? (a) : (b))
36 #endif
37
38 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
39
40 /****************** part 1: libublk ********************/
41
42 #define CTRL_DEV "/dev/ublk-control"
43 #define UBLKC_DEV "/dev/ublkc"
44 #define UBLKB_DEV "/dev/ublkb"
45 #define UBLK_CTRL_RING_DEPTH 32
46 #define ERROR_EVTFD_DEVID -2
47
48 /* queue idle timeout */
49 #define UBLKSRV_IO_IDLE_SECS 20
50
51 #define UBLK_IO_MAX_BYTES (1 << 20)
52 #define UBLK_MAX_QUEUES_SHIFT 5
53 #define UBLK_MAX_QUEUES (1 << UBLK_MAX_QUEUES_SHIFT)
54 #define UBLK_MAX_THREADS_SHIFT 5
55 #define UBLK_MAX_THREADS (1 << UBLK_MAX_THREADS_SHIFT)
56 #define UBLK_QUEUE_DEPTH 1024
57
58 #define UBLK_DBG_DEV (1U << 0)
59 #define UBLK_DBG_THREAD (1U << 1)
60 #define UBLK_DBG_IO_CMD (1U << 2)
61 #define UBLK_DBG_IO (1U << 3)
62 #define UBLK_DBG_CTRL_CMD (1U << 4)
63 #define UBLK_LOG (1U << 5)
64
65 struct ublk_dev;
66 struct ublk_queue;
67 struct ublk_thread;
68
69 struct stripe_ctx {
70 /* stripe */
71 unsigned int chunk_size;
72 };
73
74 struct fault_inject_ctx {
75 /* fault_inject */
76 unsigned long delay_us;
77 };
78
79 struct dev_ctx {
80 char tgt_type[16];
81 unsigned long flags;
82 unsigned nr_hw_queues;
83 unsigned short nthreads;
84 unsigned queue_depth;
85 int dev_id;
86 int nr_files;
87 char *files[MAX_BACK_FILES];
88 unsigned int logging:1;
89 unsigned int all:1;
90 unsigned int fg:1;
91 unsigned int recovery:1;
92 unsigned int auto_zc_fallback:1;
93 unsigned int per_io_tasks:1;
94
95 int _evtfd;
96 int _shmid;
97
98 /* built from shmem, only for ublk_dump_dev() */
99 struct ublk_dev *shadow_dev;
100
101 /* for 'update_size' command */
102 unsigned long long size;
103
104 union {
105 struct stripe_ctx stripe;
106 struct fault_inject_ctx fault_inject;
107 };
108 };
109
110 struct ublk_ctrl_cmd_data {
111 __u32 cmd_op;
112 #define CTRL_CMD_HAS_DATA 1
113 #define CTRL_CMD_HAS_BUF 2
114 __u32 flags;
115
116 __u64 data[2];
117 __u64 addr;
118 __u32 len;
119 };
120
121 struct ublk_io {
122 char *buf_addr;
123
124 #define UBLKSRV_NEED_FETCH_RQ (1UL << 0)
125 #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1)
126 #define UBLKSRV_IO_FREE (1UL << 2)
127 #define UBLKSRV_NEED_GET_DATA (1UL << 3)
128 #define UBLKSRV_NEED_REG_BUF (1UL << 4)
129 unsigned short flags;
130 unsigned short refs; /* used by target code only */
131
132 int tag;
133
134 int result;
135
136 unsigned short buf_index;
137 unsigned short tgt_ios;
138 void *private_data;
139 struct ublk_thread *t;
140 };
141
142 struct ublk_tgt_ops {
143 const char *name;
144 int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *);
145 void (*deinit_tgt)(struct ublk_dev *);
146
147 int (*queue_io)(struct ublk_queue *, int tag);
148 void (*tgt_io_done)(struct ublk_queue *,
149 int tag, const struct io_uring_cqe *);
150
151 /*
152 * Target specific command line handling
153 *
154 * each option requires argument for target command line
155 */
156 void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]);
157 void (*usage)(const struct ublk_tgt_ops *ops);
158
159 /* return buffer index for UBLK_F_AUTO_BUF_REG */
160 unsigned short (*buf_index)(const struct ublk_queue *, int tag);
161 };
162
163 struct ublk_tgt {
164 unsigned long dev_size;
165 unsigned int sq_depth;
166 unsigned int cq_depth;
167 const struct ublk_tgt_ops *ops;
168 struct ublk_params params;
169
170 int nr_backing_files;
171 unsigned long backing_file_size[MAX_BACK_FILES];
172 char backing_file[MAX_BACK_FILES][PATH_MAX];
173 };
174
175 struct ublk_queue {
176 int q_id;
177 int q_depth;
178 struct ublk_dev *dev;
179 const struct ublk_tgt_ops *tgt_ops;
180 struct ublksrv_io_desc *io_cmd_buf;
181
182 struct ublk_io ios[UBLK_QUEUE_DEPTH];
183 #define UBLKSRV_NO_BUF (1U << 2)
184 #define UBLKSRV_ZC (1U << 3)
185 #define UBLKSRV_AUTO_BUF_REG (1U << 4)
186 #define UBLKSRV_AUTO_BUF_REG_FALLBACK (1U << 5)
187 unsigned state;
188 };
189
190 struct ublk_thread {
191 struct ublk_dev *dev;
192 struct io_uring ring;
193 unsigned int cmd_inflight;
194 unsigned int io_inflight;
195
196 pthread_t thread;
197 unsigned idx;
198
199 #define UBLKSRV_THREAD_STOPPING (1U << 0)
200 #define UBLKSRV_THREAD_IDLE (1U << 1)
201 unsigned state;
202 };
203
204 struct ublk_dev {
205 struct ublk_tgt tgt;
206 struct ublksrv_ctrl_dev_info dev_info;
207 struct ublk_queue q[UBLK_MAX_QUEUES];
208 struct ublk_thread threads[UBLK_MAX_THREADS];
209 unsigned nthreads;
210 unsigned per_io_tasks;
211
212 int fds[MAX_BACK_FILES + 1]; /* fds[0] points to /dev/ublkcN */
213 int nr_fds;
214 int ctrl_fd;
215 struct io_uring ring;
216
217 void *private_data;
218 };
219
220 #ifndef offsetof
221 #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
222 #endif
223
224 #ifndef container_of
225 #define container_of(ptr, type, member) ({ \
226 unsigned long __mptr = (unsigned long)(ptr); \
227 ((type *)(__mptr - offsetof(type, member))); })
228 #endif
229
230 #define round_up(val, rnd) \
231 (((val) + ((rnd) - 1)) & ~((rnd) - 1))
232
233
234 extern unsigned int ublk_dbg_mask;
235 extern int ublk_queue_io_cmd(struct ublk_io *io);
236
237
ublk_io_auto_zc_fallback(const struct ublksrv_io_desc * iod)238 static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod)
239 {
240 return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF);
241 }
242
is_target_io(__u64 user_data)243 static inline int is_target_io(__u64 user_data)
244 {
245 return (user_data & (1ULL << 63)) != 0;
246 }
247
build_user_data(unsigned tag,unsigned op,unsigned tgt_data,unsigned q_id,unsigned is_target_io)248 static inline __u64 build_user_data(unsigned tag, unsigned op,
249 unsigned tgt_data, unsigned q_id, unsigned is_target_io)
250 {
251 /* we only have 7 bits to encode q_id */
252 _Static_assert(UBLK_MAX_QUEUES_SHIFT <= 7);
253 assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16) && !(q_id >> 7));
254
255 return tag | (op << 16) | (tgt_data << 24) |
256 (__u64)q_id << 56 | (__u64)is_target_io << 63;
257 }
258
user_data_to_tag(__u64 user_data)259 static inline unsigned int user_data_to_tag(__u64 user_data)
260 {
261 return user_data & 0xffff;
262 }
263
user_data_to_op(__u64 user_data)264 static inline unsigned int user_data_to_op(__u64 user_data)
265 {
266 return (user_data >> 16) & 0xff;
267 }
268
user_data_to_tgt_data(__u64 user_data)269 static inline unsigned int user_data_to_tgt_data(__u64 user_data)
270 {
271 return (user_data >> 24) & 0xffff;
272 }
273
user_data_to_q_id(__u64 user_data)274 static inline unsigned int user_data_to_q_id(__u64 user_data)
275 {
276 return (user_data >> 56) & 0x7f;
277 }
278
ublk_cmd_op_nr(unsigned int op)279 static inline unsigned short ublk_cmd_op_nr(unsigned int op)
280 {
281 return _IOC_NR(op);
282 }
283
ublk_err(const char * fmt,...)284 static inline void ublk_err(const char *fmt, ...)
285 {
286 va_list ap;
287
288 va_start(ap, fmt);
289 vfprintf(stderr, fmt, ap);
290 }
291
ublk_log(const char * fmt,...)292 static inline void ublk_log(const char *fmt, ...)
293 {
294 if (ublk_dbg_mask & UBLK_LOG) {
295 va_list ap;
296
297 va_start(ap, fmt);
298 vfprintf(stdout, fmt, ap);
299 }
300 }
301
ublk_dbg(int level,const char * fmt,...)302 static inline void ublk_dbg(int level, const char *fmt, ...)
303 {
304 if (level & ublk_dbg_mask) {
305 va_list ap;
306
307 va_start(ap, fmt);
308 vfprintf(stdout, fmt, ap);
309 }
310 }
311
ublk_io_to_queue(const struct ublk_io * io)312 static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io)
313 {
314 return container_of(io, struct ublk_queue, ios[io->tag]);
315 }
316
ublk_io_alloc_sqes(struct ublk_io * io,struct io_uring_sqe * sqes[],int nr_sqes)317 static inline int ublk_io_alloc_sqes(struct ublk_io *io,
318 struct io_uring_sqe *sqes[], int nr_sqes)
319 {
320 struct io_uring *ring = &io->t->ring;
321 unsigned left = io_uring_sq_space_left(ring);
322 int i;
323
324 if (left < nr_sqes)
325 io_uring_submit(ring);
326
327 for (i = 0; i < nr_sqes; i++) {
328 sqes[i] = io_uring_get_sqe(ring);
329 if (!sqes[i])
330 return i;
331 }
332
333 return nr_sqes;
334 }
335
io_uring_prep_buf_register(struct io_uring_sqe * sqe,int dev_fd,int tag,int q_id,__u64 index)336 static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe,
337 int dev_fd, int tag, int q_id, __u64 index)
338 {
339 struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd;
340
341 io_uring_prep_read(sqe, dev_fd, 0, 0, 0);
342 sqe->opcode = IORING_OP_URING_CMD;
343 sqe->flags |= IOSQE_FIXED_FILE;
344 sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF;
345
346 cmd->tag = tag;
347 cmd->addr = index;
348 cmd->q_id = q_id;
349 }
350
io_uring_prep_buf_unregister(struct io_uring_sqe * sqe,int dev_fd,int tag,int q_id,__u64 index)351 static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe,
352 int dev_fd, int tag, int q_id, __u64 index)
353 {
354 struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd;
355
356 io_uring_prep_read(sqe, dev_fd, 0, 0, 0);
357 sqe->opcode = IORING_OP_URING_CMD;
358 sqe->flags |= IOSQE_FIXED_FILE;
359 sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF;
360
361 cmd->tag = tag;
362 cmd->addr = index;
363 cmd->q_id = q_id;
364 }
365
ublk_get_sqe_cmd(const struct io_uring_sqe * sqe)366 static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
367 {
368 return (void *)&sqe->cmd;
369 }
370
ublk_set_io_res(struct ublk_queue * q,int tag,int res)371 static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res)
372 {
373 q->ios[tag].result = res;
374 }
375
ublk_get_io_res(const struct ublk_queue * q,unsigned tag)376 static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag)
377 {
378 return q->ios[tag].result;
379 }
380
ublk_mark_io_done(struct ublk_io * io,int res)381 static inline void ublk_mark_io_done(struct ublk_io *io, int res)
382 {
383 io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE);
384 io->result = res;
385 }
386
ublk_get_iod(const struct ublk_queue * q,int tag)387 static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag)
388 {
389 return &q->io_cmd_buf[tag];
390 }
391
ublk_set_sqe_cmd_op(struct io_uring_sqe * sqe,__u32 cmd_op)392 static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op)
393 {
394 __u32 *addr = (__u32 *)&sqe->off;
395
396 addr[0] = cmd_op;
397 addr[1] = 0;
398 }
399
ublk_get_io(struct ublk_queue * q,unsigned tag)400 static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag)
401 {
402 return &q->ios[tag];
403 }
404
ublk_complete_io(struct ublk_queue * q,unsigned tag,int res)405 static inline int ublk_complete_io(struct ublk_queue *q, unsigned tag, int res)
406 {
407 struct ublk_io *io = &q->ios[tag];
408
409 ublk_mark_io_done(io, res);
410
411 return ublk_queue_io_cmd(io);
412 }
413
ublk_queued_tgt_io(struct ublk_queue * q,unsigned tag,int queued)414 static inline void ublk_queued_tgt_io(struct ublk_queue *q, unsigned tag, int queued)
415 {
416 if (queued < 0)
417 ublk_complete_io(q, tag, queued);
418 else {
419 struct ublk_io *io = ublk_get_io(q, tag);
420
421 io->t->io_inflight += queued;
422 io->tgt_ios = queued;
423 io->result = 0;
424 }
425 }
426
ublk_completed_tgt_io(struct ublk_queue * q,unsigned tag)427 static inline int ublk_completed_tgt_io(struct ublk_queue *q, unsigned tag)
428 {
429 struct ublk_io *io = ublk_get_io(q, tag);
430
431 io->t->io_inflight--;
432
433 return --io->tgt_ios == 0;
434 }
435
ublk_queue_use_zc(const struct ublk_queue * q)436 static inline int ublk_queue_use_zc(const struct ublk_queue *q)
437 {
438 return q->state & UBLKSRV_ZC;
439 }
440
ublk_queue_use_auto_zc(const struct ublk_queue * q)441 static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q)
442 {
443 return q->state & UBLKSRV_AUTO_BUF_REG;
444 }
445
446 extern const struct ublk_tgt_ops null_tgt_ops;
447 extern const struct ublk_tgt_ops loop_tgt_ops;
448 extern const struct ublk_tgt_ops stripe_tgt_ops;
449 extern const struct ublk_tgt_ops fault_inject_tgt_ops;
450
451 void backing_file_tgt_deinit(struct ublk_dev *dev);
452 int backing_file_tgt_init(struct ublk_dev *dev);
453
ilog2(unsigned int x)454 static inline unsigned int ilog2(unsigned int x)
455 {
456 if (x == 0)
457 return 0;
458 return (sizeof(x) * 8 - 1) - __builtin_clz(x);
459 }
460 #endif
461