xref: /linux/tools/testing/selftests/ublk/kublk.h (revision 0c00ed308d0559fc216be0442a3df124e9e13533)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef KUBLK_INTERNAL_H
3 #define KUBLK_INTERNAL_H
4 
5 #include <unistd.h>
6 #include <stdlib.h>
7 #include <assert.h>
8 #include <stdio.h>
9 #include <stdarg.h>
10 #include <string.h>
11 #include <pthread.h>
12 #include <getopt.h>
13 #include <limits.h>
14 #include <poll.h>
15 #include <fcntl.h>
16 #include <sys/syscall.h>
17 #include <sys/mman.h>
18 #include <sys/ioctl.h>
19 #include <sys/inotify.h>
20 #include <sys/wait.h>
21 #include <sys/eventfd.h>
22 #include <sys/ipc.h>
23 #include <sys/shm.h>
24 #include <linux/io_uring.h>
25 #include <liburing.h>
26 #include <semaphore.h>
27 
28 /* allow ublk_dep.h to override ublk_cmd.h */
29 #include "ublk_dep.h"
30 #include <linux/ublk_cmd.h>
31 
32 #include "utils.h"
33 
34 #define MAX_BACK_FILES   4
35 
36 /****************** part 1: libublk ********************/
37 
38 #define CTRL_DEV		"/dev/ublk-control"
39 #define UBLKC_DEV		"/dev/ublkc"
40 #define UBLKB_DEV		"/dev/ublkb"
41 #define UBLK_CTRL_RING_DEPTH            32
42 #define ERROR_EVTFD_DEVID 	-2
43 
44 #define UBLK_IO_MAX_BYTES               (1 << 20)
45 #define UBLK_MAX_QUEUES_SHIFT		5
46 #define UBLK_MAX_QUEUES                 (1 << UBLK_MAX_QUEUES_SHIFT)
47 #define UBLK_MAX_THREADS_SHIFT		5
48 #define UBLK_MAX_THREADS		(1 << UBLK_MAX_THREADS_SHIFT)
49 #define UBLK_QUEUE_DEPTH                1024
50 
51 struct ublk_dev;
52 struct ublk_queue;
53 struct ublk_thread;
54 
55 struct stripe_ctx {
56 	/* stripe */
57 	unsigned int    chunk_size;
58 };
59 
60 struct fault_inject_ctx {
61 	/* fault_inject */
62 	unsigned long   delay_us;
63 };
64 
65 struct dev_ctx {
66 	char tgt_type[16];
67 	unsigned long flags;
68 	unsigned nr_hw_queues;
69 	unsigned short nthreads;
70 	unsigned queue_depth;
71 	int dev_id;
72 	int nr_files;
73 	char *files[MAX_BACK_FILES];
74 	unsigned int	logging:1;
75 	unsigned int	all:1;
76 	unsigned int	fg:1;
77 	unsigned int	recovery:1;
78 	unsigned int	auto_zc_fallback:1;
79 	unsigned int	per_io_tasks:1;
80 	unsigned int	no_ublk_fixed_fd:1;
81 	unsigned int	safe_stop:1;
82 	unsigned int	no_auto_part_scan:1;
83 	__u32 integrity_flags;
84 	__u8 metadata_size;
85 	__u8 pi_offset;
86 	__u8 csum_type;
87 	__u8 tag_size;
88 
89 	int _evtfd;
90 	int _shmid;
91 
92 	/* built from shmem, only for ublk_dump_dev() */
93 	struct ublk_dev *shadow_dev;
94 
95 	/* for 'update_size' command */
96 	unsigned long long size;
97 
98 	union {
99 		struct stripe_ctx 	stripe;
100 		struct fault_inject_ctx fault_inject;
101 	};
102 };
103 
104 struct ublk_ctrl_cmd_data {
105 	__u32 cmd_op;
106 #define CTRL_CMD_HAS_DATA	1
107 #define CTRL_CMD_HAS_BUF	2
108 	__u32 flags;
109 
110 	__u64 data[2];
111 	__u64 addr;
112 	__u32 len;
113 };
114 
115 struct ublk_io {
116 	char *buf_addr;
117 	void *integrity_buf;
118 
119 #define UBLKS_IO_NEED_FETCH_RQ		(1UL << 0)
120 #define UBLKS_IO_NEED_COMMIT_RQ_COMP	(1UL << 1)
121 #define UBLKS_IO_FREE			(1UL << 2)
122 #define UBLKS_IO_NEED_GET_DATA           (1UL << 3)
123 #define UBLKS_IO_NEED_REG_BUF            (1UL << 4)
124 	unsigned short flags;
125 	unsigned short refs;		/* used by target code only */
126 
127 	int tag;
128 
129 	int result;
130 
131 	unsigned short buf_index;
132 	unsigned short tgt_ios;
133 	void *private_data;
134 };
135 
136 struct ublk_tgt_ops {
137 	const char *name;
138 	int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *);
139 	void (*deinit_tgt)(struct ublk_dev *);
140 
141 	int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag);
142 	void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *,
143 			    const struct io_uring_cqe *);
144 
145 	/*
146 	 * Target specific command line handling
147 	 *
148 	 * each option requires argument for target command line
149 	 */
150 	void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]);
151 	void (*usage)(const struct ublk_tgt_ops *ops);
152 
153 	/* return buffer index for UBLK_F_AUTO_BUF_REG */
154 	unsigned short (*buf_index)(const struct ublk_thread *t,
155 			const struct ublk_queue *, int tag);
156 };
157 
158 struct ublk_tgt {
159 	unsigned long dev_size;
160 	unsigned int  sq_depth;
161 	unsigned int  cq_depth;
162 	const struct ublk_tgt_ops *ops;
163 	struct ublk_params params;
164 
165 	int nr_backing_files;
166 	unsigned long backing_file_size[MAX_BACK_FILES];
167 	char backing_file[MAX_BACK_FILES][PATH_MAX];
168 };
169 
170 struct ublk_queue {
171 	int q_id;
172 	int q_depth;
173 	struct ublk_dev *dev;
174 	const struct ublk_tgt_ops *tgt_ops;
175 	struct ublksrv_io_desc *io_cmd_buf;
176 
177 /* borrow three bit of ublk uapi flags, which may never be used */
178 #define UBLKS_Q_AUTO_BUF_REG_FALLBACK	(1ULL << 63)
179 #define UBLKS_Q_NO_UBLK_FIXED_FD	(1ULL << 62)
180 #define UBLKS_Q_PREPARED	(1ULL << 61)
181 	__u64 flags;
182 	int ublk_fd;	/* cached ublk char device fd */
183 	__u8 metadata_size;
184 	struct ublk_io ios[UBLK_QUEUE_DEPTH];
185 
186 	/* used for prep io commands */
187 	pthread_spinlock_t lock;
188 };
189 
190 /* align with `ublk_elem_header` */
191 struct ublk_batch_elem {
192 	__u16 tag;
193 	__u16 buf_index;
194 	__s32 result;
195 	__u64 buf_addr;
196 };
197 
198 struct batch_commit_buf {
199 	unsigned short q_id;
200 	unsigned short buf_idx;
201 	void *elem;
202 	unsigned short done;
203 	unsigned short count;
204 };
205 
206 struct batch_fetch_buf {
207 	struct io_uring_buf_ring *br;
208 	void *fetch_buf;
209 	unsigned int fetch_buf_size;
210 	unsigned int fetch_buf_off;
211 };
212 
213 struct ublk_thread {
214 	/* Thread-local copy of queue-to-thread mapping for this thread */
215 	unsigned char q_map[UBLK_MAX_QUEUES];
216 
217 	struct ublk_dev *dev;
218 	unsigned short idx;
219 	unsigned short nr_queues;
220 
221 #define UBLKS_T_STOPPING	(1U << 0)
222 #define UBLKS_T_IDLE	(1U << 1)
223 #define UBLKS_T_BATCH_IO	(1U << 31) 	/* readonly */
224 	unsigned state;
225 	unsigned int cmd_inflight;
226 	unsigned int io_inflight;
227 
228 	unsigned short nr_bufs;
229 
230        /* followings are for BATCH_IO */
231 	unsigned short commit_buf_start;
232 	unsigned char  commit_buf_elem_size;
233        /*
234         * We just support single device, so pre-calculate commit/prep flags
235         */
236 	unsigned short cmd_flags;
237 	unsigned int   nr_commit_buf;
238 	unsigned int   commit_buf_size;
239 	void *commit_buf;
240 #define UBLKS_T_COMMIT_BUF_INV_IDX  ((unsigned short)-1)
241 	struct allocator commit_buf_alloc;
242 	struct batch_commit_buf *commit;
243 	/* FETCH_IO_CMDS buffer */
244 	unsigned short nr_fetch_bufs;
245 	struct batch_fetch_buf *fetch;
246 
247 	struct io_uring ring;
248 };
249 
250 struct ublk_dev {
251 	struct ublk_tgt tgt;
252 	struct ublksrv_ctrl_dev_info  dev_info;
253 	struct ublk_queue q[UBLK_MAX_QUEUES];
254 	unsigned nthreads;
255 	unsigned per_io_tasks;
256 
257 	int fds[MAX_BACK_FILES + 1];	/* fds[0] points to /dev/ublkcN */
258 	int nr_fds;
259 	int ctrl_fd;
260 	struct io_uring ring;
261 
262 	void *private_data;
263 };
264 
265 extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io);
266 
267 static inline int __ublk_use_batch_io(__u64 flags)
268 {
269 	return flags & UBLK_F_BATCH_IO;
270 }
271 
272 static inline int ublk_queue_batch_io(const struct ublk_queue *q)
273 {
274 	return __ublk_use_batch_io(q->flags);
275 }
276 
277 static inline int ublk_dev_batch_io(const struct ublk_dev *dev)
278 {
279 	return __ublk_use_batch_io(dev->dev_info.flags);
280 }
281 
282 /* only work for handle single device in this pthread context */
283 static inline int ublk_thread_batch_io(const struct ublk_thread *t)
284 {
285 	return t->state & UBLKS_T_BATCH_IO;
286 }
287 
288 static inline void ublk_set_integrity_params(const struct dev_ctx *ctx,
289 					     struct ublk_params *params)
290 {
291 	if (!ctx->metadata_size)
292 		return;
293 
294 	params->types |= UBLK_PARAM_TYPE_INTEGRITY;
295 	params->integrity = (struct ublk_param_integrity) {
296 		.flags = ctx->integrity_flags,
297 		.interval_exp = params->basic.logical_bs_shift,
298 		.metadata_size = ctx->metadata_size,
299 		.pi_offset = ctx->pi_offset,
300 		.csum_type = ctx->csum_type,
301 		.tag_size = ctx->tag_size,
302 	};
303 }
304 
305 static inline size_t ublk_integrity_len(const struct ublk_queue *q, size_t len)
306 {
307 	/* All targets currently use interval_exp = logical_bs_shift = 9 */
308 	return (len >> 9) * q->metadata_size;
309 }
310 
311 static inline size_t
312 ublk_integrity_data_len(const struct ublk_queue *q, size_t integrity_len)
313 {
314 	return (integrity_len / q->metadata_size) << 9;
315 }
316 
317 static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod)
318 {
319 	return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF);
320 }
321 
322 static inline __u64 ublk_user_copy_offset(unsigned q_id, unsigned tag)
323 {
324 	return UBLKSRV_IO_BUF_OFFSET +
325 	       ((__u64)q_id << UBLK_QID_OFF | (__u64)tag << UBLK_TAG_OFF);
326 }
327 
328 static inline int is_target_io(__u64 user_data)
329 {
330 	return (user_data & (1ULL << 63)) != 0;
331 }
332 
333 static inline __u64 build_user_data(unsigned tag, unsigned op,
334 		unsigned tgt_data, unsigned q_id, unsigned is_target_io)
335 {
336 	/* we only have 7 bits to encode q_id */
337 	_Static_assert(UBLK_MAX_QUEUES_SHIFT <= 7, "UBLK_MAX_QUEUES_SHIFT must be <= 7");
338 	ublk_assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16) && !(q_id >> 7));
339 
340 	return tag | ((__u64)op << 16) | ((__u64)tgt_data << 24) |
341 		(__u64)q_id << 56 | (__u64)is_target_io << 63;
342 }
343 
344 static inline unsigned int user_data_to_tag(__u64 user_data)
345 {
346 	return user_data & 0xffff;
347 }
348 
349 static inline unsigned int user_data_to_op(__u64 user_data)
350 {
351 	return (user_data >> 16) & 0xff;
352 }
353 
354 static inline unsigned int user_data_to_tgt_data(__u64 user_data)
355 {
356 	return (user_data >> 24) & 0xffff;
357 }
358 
359 static inline unsigned int user_data_to_q_id(__u64 user_data)
360 {
361 	return (user_data >> 56) & 0x7f;
362 }
363 
364 static inline unsigned short ublk_cmd_op_nr(unsigned int op)
365 {
366 	return _IOC_NR(op);
367 }
368 
369 static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io)
370 {
371 	return container_of(io, struct ublk_queue, ios[io->tag]);
372 }
373 
374 static inline int ublk_io_alloc_sqes(struct ublk_thread *t,
375 		struct io_uring_sqe *sqes[], int nr_sqes)
376 {
377 	struct io_uring *ring = &t->ring;
378 	unsigned left = io_uring_sq_space_left(ring);
379 	int i;
380 
381 	if (left < nr_sqes)
382 		io_uring_submit(ring);
383 
384 	for (i = 0; i < nr_sqes; i++) {
385 		sqes[i] = io_uring_get_sqe(ring);
386 		if (!sqes[i])
387 			return i;
388 	}
389 
390 	return nr_sqes;
391 }
392 
393 static inline int ublk_get_registered_fd(struct ublk_queue *q, int fd_index)
394 {
395 	if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) {
396 		if (fd_index == 0)
397 			/* Return the raw ublk FD for index 0 */
398 			return q->ublk_fd;
399 		/* Adjust index for backing files (index 1 becomes 0, etc.) */
400 		return fd_index - 1;
401 	}
402 	return fd_index;
403 }
404 
405 static inline void __io_uring_prep_buf_reg_unreg(struct io_uring_sqe *sqe,
406 		struct ublk_queue *q, int tag, int q_id, __u64 index)
407 {
408 	struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd;
409 	int dev_fd = ublk_get_registered_fd(q, 0);
410 
411 	io_uring_prep_read(sqe, dev_fd, 0, 0, 0);
412 	sqe->opcode		= IORING_OP_URING_CMD;
413 	if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD)
414 		sqe->flags	&= ~IOSQE_FIXED_FILE;
415 	else
416 		sqe->flags	|= IOSQE_FIXED_FILE;
417 
418 	cmd->tag		= tag;
419 	cmd->addr		= index;
420 	cmd->q_id		= q_id;
421 }
422 
423 static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe,
424 		struct ublk_queue *q, int tag, int q_id, __u64 index)
425 {
426 	__io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index);
427 	sqe->cmd_op		= UBLK_U_IO_REGISTER_IO_BUF;
428 }
429 
430 static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe,
431 		struct ublk_queue *q, int tag, int q_id, __u64 index)
432 {
433 	__io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index);
434 	sqe->cmd_op		= UBLK_U_IO_UNREGISTER_IO_BUF;
435 }
436 
437 static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
438 {
439 	return (void *)&sqe->cmd;
440 }
441 
442 static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res)
443 {
444 	q->ios[tag].result = res;
445 }
446 
447 static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag)
448 {
449 	return q->ios[tag].result;
450 }
451 
452 static inline void ublk_mark_io_done(struct ublk_io *io, int res)
453 {
454 	io->flags |= (UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_FREE);
455 	io->result = res;
456 }
457 
458 static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag)
459 {
460 	return &q->io_cmd_buf[tag];
461 }
462 
463 static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op)
464 {
465 	__u32 *addr = (__u32 *)&sqe->off;
466 
467 	addr[0] = cmd_op;
468 	addr[1] = 0;
469 }
470 
471 static inline unsigned short ublk_batch_io_buf_idx(
472 		const struct ublk_thread *t, const struct ublk_queue *q,
473 		unsigned tag);
474 
475 static inline unsigned short ublk_io_buf_idx(const struct ublk_thread *t,
476 					     const struct ublk_queue *q,
477 					     unsigned tag)
478 {
479 	if (ublk_queue_batch_io(q))
480 		return ublk_batch_io_buf_idx(t, q, tag);
481 	return q->ios[tag].buf_index;
482 }
483 
484 static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag)
485 {
486 	return &q->ios[tag];
487 }
488 
489 static inline int ublk_completed_tgt_io(struct ublk_thread *t,
490 					struct ublk_queue *q, unsigned tag)
491 {
492 	struct ublk_io *io = ublk_get_io(q, tag);
493 
494 	t->io_inflight--;
495 
496 	return --io->tgt_ios == 0;
497 }
498 
499 static inline bool ublk_queue_use_zc(const struct ublk_queue *q)
500 {
501 	return !!(q->flags & UBLK_F_SUPPORT_ZERO_COPY);
502 }
503 
504 static inline bool ublk_queue_use_auto_zc(const struct ublk_queue *q)
505 {
506 	return !!(q->flags & UBLK_F_AUTO_BUF_REG);
507 }
508 
509 static inline bool ublk_queue_auto_zc_fallback(const struct ublk_queue *q)
510 {
511 	return !!(q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK);
512 }
513 
514 static inline bool ublk_queue_use_user_copy(const struct ublk_queue *q)
515 {
516 	return !!(q->flags & UBLK_F_USER_COPY);
517 }
518 
519 static inline int ublk_queue_no_buf(const struct ublk_queue *q)
520 {
521 	return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q);
522 }
523 
524 static inline int ublk_batch_commit_prepared(struct batch_commit_buf *cb)
525 {
526 	return cb->buf_idx != UBLKS_T_COMMIT_BUF_INV_IDX;
527 }
528 
529 static inline unsigned ublk_queue_idx_in_thread(const struct ublk_thread *t,
530 						const struct ublk_queue *q)
531 {
532 	unsigned char idx;
533 
534 	idx = t->q_map[q->q_id];
535 	ublk_assert(idx != 0);
536 	return idx - 1;
537 }
538 
539 /*
540  * Each IO's buffer index has to be calculated by this helper for
541  * UBLKS_T_BATCH_IO
542  */
543 static inline unsigned short ublk_batch_io_buf_idx(
544 		const struct ublk_thread *t, const struct ublk_queue *q,
545 		unsigned tag)
546 {
547 	return ublk_queue_idx_in_thread(t, q) * q->q_depth + tag;
548 }
549 
550 /* Queue UBLK_U_IO_PREP_IO_CMDS for a specific queue with batch elements */
551 int ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q);
552 /* Start fetching I/O commands using multishot UBLK_U_IO_FETCH_IO_CMDS */
553 void ublk_batch_start_fetch(struct ublk_thread *t);
554 /* Handle completion of batch I/O commands (prep/commit) */
555 void ublk_batch_compl_cmd(struct ublk_thread *t,
556 			  const struct io_uring_cqe *cqe);
557 /* Initialize batch I/O state and calculate buffer parameters */
558 void ublk_batch_prepare(struct ublk_thread *t);
559 /* Allocate and register commit buffers for batch operations */
560 int ublk_batch_alloc_buf(struct ublk_thread *t);
561 /* Free commit buffers and cleanup batch allocator */
562 void ublk_batch_free_buf(struct ublk_thread *t);
563 
564 /* Prepare a new commit buffer for batching completed I/O operations */
565 void ublk_batch_prep_commit(struct ublk_thread *t);
566 /* Submit UBLK_U_IO_COMMIT_IO_CMDS with batched completed I/O operations */
567 void ublk_batch_commit_io_cmds(struct ublk_thread *t);
568 /* Add a completed I/O operation to the current batch commit buffer */
569 void ublk_batch_complete_io(struct ublk_thread *t, struct ublk_queue *q,
570 			    unsigned tag, int res);
571 void ublk_batch_setup_map(unsigned char (*q_thread_map)[UBLK_MAX_QUEUES],
572 			   int nthreads, int queues);
573 
574 static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q,
575 				   unsigned tag, int res)
576 {
577 	if (ublk_queue_batch_io(q)) {
578 		ublk_batch_complete_io(t, q, tag, res);
579 		return 0;
580 	} else {
581 		struct ublk_io *io = &q->ios[tag];
582 
583 		ublk_mark_io_done(io, res);
584 		return ublk_queue_io_cmd(t, io);
585 	}
586 }
587 
588 static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q,
589 				      unsigned tag, int queued)
590 {
591 	if (queued < 0)
592 		ublk_complete_io(t, q, tag, queued);
593 	else {
594 		struct ublk_io *io = ublk_get_io(q, tag);
595 
596 		t->io_inflight += queued;
597 		io->tgt_ios = queued;
598 		io->result = 0;
599 	}
600 }
601 
602 extern const struct ublk_tgt_ops null_tgt_ops;
603 extern const struct ublk_tgt_ops loop_tgt_ops;
604 extern const struct ublk_tgt_ops stripe_tgt_ops;
605 extern const struct ublk_tgt_ops fault_inject_tgt_ops;
606 
607 void backing_file_tgt_deinit(struct ublk_dev *dev);
608 int backing_file_tgt_init(struct ublk_dev *dev, unsigned int nr_direct);
609 
610 #endif
611