xref: /linux/tools/testing/selftests/ublk/kublk.c (revision 91928e0d3cc29789f4483bffee5f36218f23942b)
1 /* SPDX-License-Identifier: MIT */
2 /*
3  * Description: uring_cmd based ublk
4  */
5 
6 #include "kublk.h"
7 
8 unsigned int ublk_dbg_mask = UBLK_LOG;
9 static const struct ublk_tgt_ops *tgt_ops_list[] = {
10 	&null_tgt_ops,
11 	&loop_tgt_ops,
12 	&stripe_tgt_ops,
13 };
14 
ublk_find_tgt(const char * name)15 static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
16 {
17 	const struct ublk_tgt_ops *ops;
18 	int i;
19 
20 	if (name == NULL)
21 		return NULL;
22 
23 	for (i = 0; sizeof(tgt_ops_list) / sizeof(ops); i++)
24 		if (strcmp(tgt_ops_list[i]->name, name) == 0)
25 			return tgt_ops_list[i];
26 	return NULL;
27 }
28 
ublk_setup_ring(struct io_uring * r,int depth,int cq_depth,unsigned flags)29 static inline int ublk_setup_ring(struct io_uring *r, int depth,
30 		int cq_depth, unsigned flags)
31 {
32 	struct io_uring_params p;
33 
34 	memset(&p, 0, sizeof(p));
35 	p.flags = flags | IORING_SETUP_CQSIZE;
36 	p.cq_entries = cq_depth;
37 
38 	return io_uring_queue_init_params(depth, r, &p);
39 }
40 
ublk_ctrl_init_cmd(struct ublk_dev * dev,struct io_uring_sqe * sqe,struct ublk_ctrl_cmd_data * data)41 static void ublk_ctrl_init_cmd(struct ublk_dev *dev,
42 		struct io_uring_sqe *sqe,
43 		struct ublk_ctrl_cmd_data *data)
44 {
45 	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
46 	struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe);
47 
48 	sqe->fd = dev->ctrl_fd;
49 	sqe->opcode = IORING_OP_URING_CMD;
50 	sqe->ioprio = 0;
51 
52 	if (data->flags & CTRL_CMD_HAS_BUF) {
53 		cmd->addr = data->addr;
54 		cmd->len = data->len;
55 	}
56 
57 	if (data->flags & CTRL_CMD_HAS_DATA)
58 		cmd->data[0] = data->data[0];
59 
60 	cmd->dev_id = info->dev_id;
61 	cmd->queue_id = -1;
62 
63 	ublk_set_sqe_cmd_op(sqe, data->cmd_op);
64 
65 	io_uring_sqe_set_data(sqe, cmd);
66 }
67 
__ublk_ctrl_cmd(struct ublk_dev * dev,struct ublk_ctrl_cmd_data * data)68 static int __ublk_ctrl_cmd(struct ublk_dev *dev,
69 		struct ublk_ctrl_cmd_data *data)
70 {
71 	struct io_uring_sqe *sqe;
72 	struct io_uring_cqe *cqe;
73 	int ret = -EINVAL;
74 
75 	sqe = io_uring_get_sqe(&dev->ring);
76 	if (!sqe) {
77 		ublk_err("%s: can't get sqe ret %d\n", __func__, ret);
78 		return ret;
79 	}
80 
81 	ublk_ctrl_init_cmd(dev, sqe, data);
82 
83 	ret = io_uring_submit(&dev->ring);
84 	if (ret < 0) {
85 		ublk_err("uring submit ret %d\n", ret);
86 		return ret;
87 	}
88 
89 	ret = io_uring_wait_cqe(&dev->ring, &cqe);
90 	if (ret < 0) {
91 		ublk_err("wait cqe: %s\n", strerror(-ret));
92 		return ret;
93 	}
94 	io_uring_cqe_seen(&dev->ring, cqe);
95 
96 	return cqe->res;
97 }
98 
ublk_ctrl_stop_dev(struct ublk_dev * dev)99 static int ublk_ctrl_stop_dev(struct ublk_dev *dev)
100 {
101 	struct ublk_ctrl_cmd_data data = {
102 		.cmd_op	= UBLK_CMD_STOP_DEV,
103 	};
104 
105 	return __ublk_ctrl_cmd(dev, &data);
106 }
107 
ublk_ctrl_start_dev(struct ublk_dev * dev,int daemon_pid)108 static int ublk_ctrl_start_dev(struct ublk_dev *dev,
109 		int daemon_pid)
110 {
111 	struct ublk_ctrl_cmd_data data = {
112 		.cmd_op	= UBLK_U_CMD_START_DEV,
113 		.flags	= CTRL_CMD_HAS_DATA,
114 	};
115 
116 	dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
117 
118 	return __ublk_ctrl_cmd(dev, &data);
119 }
120 
ublk_ctrl_add_dev(struct ublk_dev * dev)121 static int ublk_ctrl_add_dev(struct ublk_dev *dev)
122 {
123 	struct ublk_ctrl_cmd_data data = {
124 		.cmd_op	= UBLK_U_CMD_ADD_DEV,
125 		.flags	= CTRL_CMD_HAS_BUF,
126 		.addr = (__u64) (uintptr_t) &dev->dev_info,
127 		.len = sizeof(struct ublksrv_ctrl_dev_info),
128 	};
129 
130 	return __ublk_ctrl_cmd(dev, &data);
131 }
132 
ublk_ctrl_del_dev(struct ublk_dev * dev)133 static int ublk_ctrl_del_dev(struct ublk_dev *dev)
134 {
135 	struct ublk_ctrl_cmd_data data = {
136 		.cmd_op = UBLK_U_CMD_DEL_DEV,
137 		.flags = 0,
138 	};
139 
140 	return __ublk_ctrl_cmd(dev, &data);
141 }
142 
ublk_ctrl_get_info(struct ublk_dev * dev)143 static int ublk_ctrl_get_info(struct ublk_dev *dev)
144 {
145 	struct ublk_ctrl_cmd_data data = {
146 		.cmd_op	= UBLK_U_CMD_GET_DEV_INFO,
147 		.flags	= CTRL_CMD_HAS_BUF,
148 		.addr = (__u64) (uintptr_t) &dev->dev_info,
149 		.len = sizeof(struct ublksrv_ctrl_dev_info),
150 	};
151 
152 	return __ublk_ctrl_cmd(dev, &data);
153 }
154 
ublk_ctrl_set_params(struct ublk_dev * dev,struct ublk_params * params)155 static int ublk_ctrl_set_params(struct ublk_dev *dev,
156 		struct ublk_params *params)
157 {
158 	struct ublk_ctrl_cmd_data data = {
159 		.cmd_op	= UBLK_U_CMD_SET_PARAMS,
160 		.flags	= CTRL_CMD_HAS_BUF,
161 		.addr = (__u64) (uintptr_t) params,
162 		.len = sizeof(*params),
163 	};
164 	params->len = sizeof(*params);
165 	return __ublk_ctrl_cmd(dev, &data);
166 }
167 
ublk_ctrl_get_params(struct ublk_dev * dev,struct ublk_params * params)168 static int ublk_ctrl_get_params(struct ublk_dev *dev,
169 		struct ublk_params *params)
170 {
171 	struct ublk_ctrl_cmd_data data = {
172 		.cmd_op	= UBLK_CMD_GET_PARAMS,
173 		.flags	= CTRL_CMD_HAS_BUF,
174 		.addr = (__u64)params,
175 		.len = sizeof(*params),
176 	};
177 
178 	params->len = sizeof(*params);
179 
180 	return __ublk_ctrl_cmd(dev, &data);
181 }
182 
ublk_ctrl_get_features(struct ublk_dev * dev,__u64 * features)183 static int ublk_ctrl_get_features(struct ublk_dev *dev,
184 		__u64 *features)
185 {
186 	struct ublk_ctrl_cmd_data data = {
187 		.cmd_op	= UBLK_U_CMD_GET_FEATURES,
188 		.flags	= CTRL_CMD_HAS_BUF,
189 		.addr = (__u64) (uintptr_t) features,
190 		.len = sizeof(*features),
191 	};
192 
193 	return __ublk_ctrl_cmd(dev, &data);
194 }
195 
ublk_dev_state_desc(struct ublk_dev * dev)196 static const char *ublk_dev_state_desc(struct ublk_dev *dev)
197 {
198 	switch (dev->dev_info.state) {
199 	case UBLK_S_DEV_DEAD:
200 		return "DEAD";
201 	case UBLK_S_DEV_LIVE:
202 		return "LIVE";
203 	case UBLK_S_DEV_QUIESCED:
204 		return "QUIESCED";
205 	default:
206 		return "UNKNOWN";
207 	};
208 }
209 
ublk_ctrl_dump(struct ublk_dev * dev)210 static void ublk_ctrl_dump(struct ublk_dev *dev)
211 {
212 	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
213 	struct ublk_params p;
214 	int ret;
215 
216 	ret = ublk_ctrl_get_params(dev, &p);
217 	if (ret < 0) {
218 		ublk_err("failed to get params %m\n");
219 		return;
220 	}
221 
222 	ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n",
223 			info->dev_id, info->nr_hw_queues, info->queue_depth,
224 			1 << p.basic.logical_bs_shift, p.basic.dev_sectors);
225 	ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n",
226 			info->max_io_buf_bytes, info->ublksrv_pid, info->flags,
227 			ublk_dev_state_desc(dev));
228 	fflush(stdout);
229 }
230 
ublk_ctrl_deinit(struct ublk_dev * dev)231 static void ublk_ctrl_deinit(struct ublk_dev *dev)
232 {
233 	close(dev->ctrl_fd);
234 	free(dev);
235 }
236 
ublk_ctrl_init(void)237 static struct ublk_dev *ublk_ctrl_init(void)
238 {
239 	struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev));
240 	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
241 	int ret;
242 
243 	dev->ctrl_fd = open(CTRL_DEV, O_RDWR);
244 	if (dev->ctrl_fd < 0) {
245 		free(dev);
246 		return NULL;
247 	}
248 
249 	info->max_io_buf_bytes = UBLK_IO_MAX_BYTES;
250 
251 	ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH,
252 			UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128);
253 	if (ret < 0) {
254 		ublk_err("queue_init: %s\n", strerror(-ret));
255 		free(dev);
256 		return NULL;
257 	}
258 	dev->nr_fds = 1;
259 
260 	return dev;
261 }
262 
__ublk_queue_cmd_buf_sz(unsigned depth)263 static int __ublk_queue_cmd_buf_sz(unsigned depth)
264 {
265 	int size =  depth * sizeof(struct ublksrv_io_desc);
266 	unsigned int page_sz = getpagesize();
267 
268 	return round_up(size, page_sz);
269 }
270 
ublk_queue_max_cmd_buf_sz(void)271 static int ublk_queue_max_cmd_buf_sz(void)
272 {
273 	return __ublk_queue_cmd_buf_sz(UBLK_MAX_QUEUE_DEPTH);
274 }
275 
ublk_queue_cmd_buf_sz(struct ublk_queue * q)276 static int ublk_queue_cmd_buf_sz(struct ublk_queue *q)
277 {
278 	return __ublk_queue_cmd_buf_sz(q->q_depth);
279 }
280 
ublk_queue_deinit(struct ublk_queue * q)281 static void ublk_queue_deinit(struct ublk_queue *q)
282 {
283 	int i;
284 	int nr_ios = q->q_depth;
285 
286 	io_uring_unregister_buffers(&q->ring);
287 
288 	io_uring_unregister_ring_fd(&q->ring);
289 
290 	if (q->ring.ring_fd > 0) {
291 		io_uring_unregister_files(&q->ring);
292 		close(q->ring.ring_fd);
293 		q->ring.ring_fd = -1;
294 	}
295 
296 	if (q->io_cmd_buf)
297 		munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q));
298 
299 	for (i = 0; i < nr_ios; i++)
300 		free(q->ios[i].buf_addr);
301 }
302 
ublk_queue_init(struct ublk_queue * q)303 static int ublk_queue_init(struct ublk_queue *q)
304 {
305 	struct ublk_dev *dev = q->dev;
306 	int depth = dev->dev_info.queue_depth;
307 	int i, ret = -1;
308 	int cmd_buf_size, io_buf_size;
309 	unsigned long off;
310 	int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth;
311 
312 	q->tgt_ops = dev->tgt.ops;
313 	q->state = 0;
314 	q->q_depth = depth;
315 	q->cmd_inflight = 0;
316 	q->tid = gettid();
317 
318 	if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) {
319 		q->state |= UBLKSRV_NO_BUF;
320 		q->state |= UBLKSRV_ZC;
321 	}
322 
323 	cmd_buf_size = ublk_queue_cmd_buf_sz(q);
324 	off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz();
325 	q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ,
326 			MAP_SHARED | MAP_POPULATE, dev->fds[0], off);
327 	if (q->io_cmd_buf == MAP_FAILED) {
328 		ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n",
329 				q->dev->dev_info.dev_id, q->q_id);
330 		goto fail;
331 	}
332 
333 	io_buf_size = dev->dev_info.max_io_buf_bytes;
334 	for (i = 0; i < q->q_depth; i++) {
335 		q->ios[i].buf_addr = NULL;
336 		q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE;
337 
338 		if (q->state & UBLKSRV_NO_BUF)
339 			continue;
340 
341 		if (posix_memalign((void **)&q->ios[i].buf_addr,
342 					getpagesize(), io_buf_size)) {
343 			ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n",
344 					dev->dev_info.dev_id, q->q_id, i);
345 			goto fail;
346 		}
347 	}
348 
349 	ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
350 			IORING_SETUP_COOP_TASKRUN);
351 	if (ret < 0) {
352 		ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
353 				q->dev->dev_info.dev_id, q->q_id, ret);
354 		goto fail;
355 	}
356 
357 	if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) {
358 		ret = io_uring_register_buffers_sparse(&q->ring, q->q_depth);
359 		if (ret) {
360 			ublk_err("ublk dev %d queue %d register spare buffers failed %d",
361 					dev->dev_info.dev_id, q->q_id, ret);
362 			goto fail;
363 		}
364 	}
365 
366 	io_uring_register_ring_fd(&q->ring);
367 
368 	ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds);
369 	if (ret) {
370 		ublk_err("ublk dev %d queue %d register files failed %d\n",
371 				q->dev->dev_info.dev_id, q->q_id, ret);
372 		goto fail;
373 	}
374 
375 	return 0;
376  fail:
377 	ublk_queue_deinit(q);
378 	ublk_err("ublk dev %d queue %d failed\n",
379 			dev->dev_info.dev_id, q->q_id);
380 	return -ENOMEM;
381 }
382 
383 #define WAIT_USEC 	100000
384 #define MAX_WAIT_USEC 	(3 * 1000000)
ublk_dev_prep(const struct dev_ctx * ctx,struct ublk_dev * dev)385 static int ublk_dev_prep(const struct dev_ctx *ctx, struct ublk_dev *dev)
386 {
387 	int dev_id = dev->dev_info.dev_id;
388 	unsigned int wait_usec = 0;
389 	int ret = 0, fd = -1;
390 	char buf[64];
391 
392 	snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id);
393 
394 	while (wait_usec < MAX_WAIT_USEC) {
395 		fd = open(buf, O_RDWR);
396 		if (fd >= 0)
397 			break;
398 		usleep(WAIT_USEC);
399 		wait_usec += WAIT_USEC;
400 	}
401 	if (fd < 0) {
402 		ublk_err("can't open %s %s\n", buf, strerror(errno));
403 		return -1;
404 	}
405 
406 	dev->fds[0] = fd;
407 	if (dev->tgt.ops->init_tgt)
408 		ret = dev->tgt.ops->init_tgt(ctx, dev);
409 	if (ret)
410 		close(dev->fds[0]);
411 	return ret;
412 }
413 
ublk_dev_unprep(struct ublk_dev * dev)414 static void ublk_dev_unprep(struct ublk_dev *dev)
415 {
416 	if (dev->tgt.ops->deinit_tgt)
417 		dev->tgt.ops->deinit_tgt(dev);
418 	close(dev->fds[0]);
419 }
420 
ublk_queue_io_cmd(struct ublk_queue * q,struct ublk_io * io,unsigned tag)421 int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag)
422 {
423 	struct ublksrv_io_cmd *cmd;
424 	struct io_uring_sqe *sqe[1];
425 	unsigned int cmd_op = 0;
426 	__u64 user_data;
427 
428 	/* only freed io can be issued */
429 	if (!(io->flags & UBLKSRV_IO_FREE))
430 		return 0;
431 
432 	/* we issue because we need either fetching or committing */
433 	if (!(io->flags &
434 		(UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP)))
435 		return 0;
436 
437 	if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
438 		cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
439 	else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
440 		cmd_op = UBLK_U_IO_FETCH_REQ;
441 
442 	if (io_uring_sq_space_left(&q->ring) < 1)
443 		io_uring_submit(&q->ring);
444 
445 	ublk_queue_alloc_sqes(q, sqe, 1);
446 	if (!sqe[0]) {
447 		ublk_err("%s: run out of sqe %d, tag %d\n",
448 				__func__, q->q_id, tag);
449 		return -1;
450 	}
451 
452 	cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe[0]);
453 
454 	if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ)
455 		cmd->result = io->result;
456 
457 	/* These fields should be written once, never change */
458 	ublk_set_sqe_cmd_op(sqe[0], cmd_op);
459 	sqe[0]->fd		= 0;	/* dev->fds[0] */
460 	sqe[0]->opcode	= IORING_OP_URING_CMD;
461 	sqe[0]->flags	= IOSQE_FIXED_FILE;
462 	sqe[0]->rw_flags	= 0;
463 	cmd->tag	= tag;
464 	cmd->q_id	= q->q_id;
465 	if (!(q->state & UBLKSRV_NO_BUF))
466 		cmd->addr	= (__u64) (uintptr_t) io->buf_addr;
467 	else
468 		cmd->addr	= 0;
469 
470 	user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0);
471 	io_uring_sqe_set_data64(sqe[0], user_data);
472 
473 	io->flags = 0;
474 
475 	q->cmd_inflight += 1;
476 
477 	ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n",
478 			__func__, q->q_id, tag, cmd_op,
479 			io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING));
480 	return 1;
481 }
482 
ublk_submit_fetch_commands(struct ublk_queue * q)483 static void ublk_submit_fetch_commands(struct ublk_queue *q)
484 {
485 	int i = 0;
486 
487 	for (i = 0; i < q->q_depth; i++)
488 		ublk_queue_io_cmd(q, &q->ios[i], i);
489 }
490 
ublk_queue_is_idle(struct ublk_queue * q)491 static int ublk_queue_is_idle(struct ublk_queue *q)
492 {
493 	return !io_uring_sq_ready(&q->ring) && !q->io_inflight;
494 }
495 
ublk_queue_is_done(struct ublk_queue * q)496 static int ublk_queue_is_done(struct ublk_queue *q)
497 {
498 	return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q);
499 }
500 
ublksrv_handle_tgt_cqe(struct ublk_queue * q,struct io_uring_cqe * cqe)501 static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
502 		struct io_uring_cqe *cqe)
503 {
504 	unsigned tag = user_data_to_tag(cqe->user_data);
505 
506 	if (cqe->res < 0 && cqe->res != -EAGAIN)
507 		ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n",
508 			__func__, cqe->res, q->q_id,
509 			user_data_to_tag(cqe->user_data),
510 			user_data_to_op(cqe->user_data));
511 
512 	if (q->tgt_ops->tgt_io_done)
513 		q->tgt_ops->tgt_io_done(q, tag, cqe);
514 }
515 
ublk_handle_cqe(struct io_uring * r,struct io_uring_cqe * cqe,void * data)516 static void ublk_handle_cqe(struct io_uring *r,
517 		struct io_uring_cqe *cqe, void *data)
518 {
519 	struct ublk_queue *q = container_of(r, struct ublk_queue, ring);
520 	unsigned tag = user_data_to_tag(cqe->user_data);
521 	unsigned cmd_op = user_data_to_op(cqe->user_data);
522 	int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
523 		!(q->state & UBLKSRV_QUEUE_STOPPING);
524 	struct ublk_io *io;
525 
526 	if (cqe->res < 0 && cqe->res != -ENODEV)
527 		ublk_err("%s: res %d userdata %llx queue state %x\n", __func__,
528 				cqe->res, cqe->user_data, q->state);
529 
530 	ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d/%d) stopping %d\n",
531 			__func__, cqe->res, q->q_id, tag, cmd_op,
532 			is_target_io(cqe->user_data),
533 			user_data_to_tgt_data(cqe->user_data),
534 			(q->state & UBLKSRV_QUEUE_STOPPING));
535 
536 	/* Don't retrieve io in case of target io */
537 	if (is_target_io(cqe->user_data)) {
538 		ublksrv_handle_tgt_cqe(q, cqe);
539 		return;
540 	}
541 
542 	io = &q->ios[tag];
543 	q->cmd_inflight--;
544 
545 	if (!fetch) {
546 		q->state |= UBLKSRV_QUEUE_STOPPING;
547 		io->flags &= ~UBLKSRV_NEED_FETCH_RQ;
548 	}
549 
550 	if (cqe->res == UBLK_IO_RES_OK) {
551 		assert(tag < q->q_depth);
552 		if (q->tgt_ops->queue_io)
553 			q->tgt_ops->queue_io(q, tag);
554 	} else {
555 		/*
556 		 * COMMIT_REQ will be completed immediately since no fetching
557 		 * piggyback is required.
558 		 *
559 		 * Marking IO_FREE only, then this io won't be issued since
560 		 * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*)
561 		 *
562 		 * */
563 		io->flags = UBLKSRV_IO_FREE;
564 	}
565 }
566 
ublk_reap_events_uring(struct io_uring * r)567 static int ublk_reap_events_uring(struct io_uring *r)
568 {
569 	struct io_uring_cqe *cqe;
570 	unsigned head;
571 	int count = 0;
572 
573 	io_uring_for_each_cqe(r, head, cqe) {
574 		ublk_handle_cqe(r, cqe, NULL);
575 		count += 1;
576 	}
577 	io_uring_cq_advance(r, count);
578 
579 	return count;
580 }
581 
ublk_process_io(struct ublk_queue * q)582 static int ublk_process_io(struct ublk_queue *q)
583 {
584 	int ret, reapped;
585 
586 	ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n",
587 				q->dev->dev_info.dev_id,
588 				q->q_id, io_uring_sq_ready(&q->ring),
589 				q->cmd_inflight,
590 				(q->state & UBLKSRV_QUEUE_STOPPING));
591 
592 	if (ublk_queue_is_done(q))
593 		return -ENODEV;
594 
595 	ret = io_uring_submit_and_wait(&q->ring, 1);
596 	reapped = ublk_reap_events_uring(&q->ring);
597 
598 	ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n",
599 			ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING),
600 			(q->state & UBLKSRV_QUEUE_IDLE));
601 
602 	return reapped;
603 }
604 
ublk_io_handler_fn(void * data)605 static void *ublk_io_handler_fn(void *data)
606 {
607 	struct ublk_queue *q = data;
608 	int dev_id = q->dev->dev_info.dev_id;
609 	int ret;
610 
611 	ret = ublk_queue_init(q);
612 	if (ret) {
613 		ublk_err("ublk dev %d queue %d init queue failed\n",
614 				dev_id, q->q_id);
615 		return NULL;
616 	}
617 	ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
618 			q->tid, dev_id, q->q_id);
619 
620 	/* submit all io commands to ublk driver */
621 	ublk_submit_fetch_commands(q);
622 	do {
623 		if (ublk_process_io(q) < 0)
624 			break;
625 	} while (1);
626 
627 	ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id);
628 	ublk_queue_deinit(q);
629 	return NULL;
630 }
631 
ublk_set_parameters(struct ublk_dev * dev)632 static void ublk_set_parameters(struct ublk_dev *dev)
633 {
634 	int ret;
635 
636 	ret = ublk_ctrl_set_params(dev, &dev->tgt.params);
637 	if (ret)
638 		ublk_err("dev %d set basic parameter failed %d\n",
639 				dev->dev_info.dev_id, ret);
640 }
641 
ublk_send_dev_event(const struct dev_ctx * ctx,int dev_id)642 static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id)
643 {
644 	uint64_t id;
645 	int evtfd = ctx->_evtfd;
646 
647 	if (evtfd < 0)
648 		return -EBADF;
649 
650 	if (dev_id >= 0)
651 		id = dev_id + 1;
652 	else
653 		id = ERROR_EVTFD_DEVID;
654 
655 	if (write(evtfd, &id, sizeof(id)) != sizeof(id))
656 		return -EINVAL;
657 
658 	return 0;
659 }
660 
661 
ublk_start_daemon(const struct dev_ctx * ctx,struct ublk_dev * dev)662 static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
663 {
664 	int ret, i;
665 	void *thread_ret;
666 	const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
667 
668 	ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
669 
670 	ret = ublk_dev_prep(ctx, dev);
671 	if (ret)
672 		return ret;
673 
674 	for (i = 0; i < dinfo->nr_hw_queues; i++) {
675 		dev->q[i].dev = dev;
676 		dev->q[i].q_id = i;
677 		pthread_create(&dev->q[i].thread, NULL,
678 				ublk_io_handler_fn,
679 				&dev->q[i]);
680 	}
681 
682 	/* everything is fine now, start us */
683 	ublk_set_parameters(dev);
684 	ret = ublk_ctrl_start_dev(dev, getpid());
685 	if (ret < 0) {
686 		ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
687 		goto fail;
688 	}
689 
690 	ublk_ctrl_get_info(dev);
691 	if (ctx->fg)
692 		ublk_ctrl_dump(dev);
693 	else
694 		ublk_send_dev_event(ctx, dev->dev_info.dev_id);
695 
696 	/* wait until we are terminated */
697 	for (i = 0; i < dinfo->nr_hw_queues; i++)
698 		pthread_join(dev->q[i].thread, &thread_ret);
699  fail:
700 	ublk_dev_unprep(dev);
701 	ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__);
702 
703 	return ret;
704 }
705 
wait_ublk_dev(const char * path,int evt_mask,unsigned timeout)706 static int wait_ublk_dev(const char *path, int evt_mask, unsigned timeout)
707 {
708 #define EV_SIZE (sizeof(struct inotify_event))
709 #define EV_BUF_LEN (128 * (EV_SIZE + 16))
710 	struct pollfd pfd;
711 	int fd, wd;
712 	int ret = -EINVAL;
713 	const char *dev_name = basename(path);
714 
715 	fd = inotify_init();
716 	if (fd < 0) {
717 		ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__);
718 		return fd;
719 	}
720 
721 	wd = inotify_add_watch(fd, "/dev", evt_mask);
722 	if (wd == -1) {
723 		ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__);
724 		goto fail;
725 	}
726 
727 	pfd.fd = fd;
728 	pfd.events = POLL_IN;
729 	while (1) {
730 		int i = 0;
731 		char buffer[EV_BUF_LEN];
732 		ret = poll(&pfd, 1, 1000 * timeout);
733 
734 		if (ret == -1) {
735 			ublk_err("%s: poll inotify failed: %d\n", __func__, ret);
736 			goto rm_watch;
737 		} else if (ret == 0) {
738 			ublk_err("%s: poll inotify timeout\n", __func__);
739 			ret = -ETIMEDOUT;
740 			goto rm_watch;
741 		}
742 
743 		ret = read(fd, buffer, EV_BUF_LEN);
744 		if (ret < 0) {
745 			ublk_err("%s: read inotify fd failed\n", __func__);
746 			goto rm_watch;
747 		}
748 
749 		while (i < ret) {
750 			struct inotify_event *event = (struct inotify_event *)&buffer[i];
751 
752 			ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n",
753 					__func__, event->mask, event->name);
754 			if (event->mask & evt_mask) {
755 				if (!strcmp(event->name, dev_name)) {
756 					ret = 0;
757 					goto rm_watch;
758 				}
759 			}
760 			i += EV_SIZE + event->len;
761 		}
762 	}
763 rm_watch:
764 	inotify_rm_watch(fd, wd);
765 fail:
766 	close(fd);
767 	return ret;
768 }
769 
ublk_stop_io_daemon(const struct ublk_dev * dev)770 static int ublk_stop_io_daemon(const struct ublk_dev *dev)
771 {
772 	int daemon_pid = dev->dev_info.ublksrv_pid;
773 	int dev_id = dev->dev_info.dev_id;
774 	char ublkc[64];
775 	int ret = 0;
776 
777 	if (daemon_pid < 0)
778 		return 0;
779 
780 	/* daemon may be dead already */
781 	if (kill(daemon_pid, 0) < 0)
782 		goto wait;
783 
784 	snprintf(ublkc, sizeof(ublkc), "/dev/%s%d", "ublkc", dev_id);
785 
786 	/* ublk char device may be gone already */
787 	if (access(ublkc, F_OK) != 0)
788 		goto wait;
789 
790 	/* Wait until ublk char device is closed, when the daemon is shutdown */
791 	ret = wait_ublk_dev(ublkc, IN_CLOSE, 10);
792 	/* double check and since it may be closed before starting inotify */
793 	if (ret == -ETIMEDOUT)
794 		ret = kill(daemon_pid, 0) < 0;
795 wait:
796 	waitpid(daemon_pid, NULL, 0);
797 	ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n",
798 			__func__, daemon_pid, dev_id, ret);
799 
800 	return ret;
801 }
802 
__cmd_dev_add(const struct dev_ctx * ctx)803 static int __cmd_dev_add(const struct dev_ctx *ctx)
804 {
805 	unsigned nr_queues = ctx->nr_hw_queues;
806 	const char *tgt_type = ctx->tgt_type;
807 	unsigned depth = ctx->queue_depth;
808 	__u64 features;
809 	const struct ublk_tgt_ops *ops;
810 	struct ublksrv_ctrl_dev_info *info;
811 	struct ublk_dev *dev;
812 	int dev_id = ctx->dev_id;
813 	int ret, i;
814 
815 	ops = ublk_find_tgt(tgt_type);
816 	if (!ops) {
817 		ublk_err("%s: no such tgt type, type %s\n",
818 				__func__, tgt_type);
819 		return -ENODEV;
820 	}
821 
822 	if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) {
823 		ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n",
824 				__func__, nr_queues, depth);
825 		return -EINVAL;
826 	}
827 
828 	dev = ublk_ctrl_init();
829 	if (!dev) {
830 		ublk_err("%s: can't alloc dev id %d, type %s\n",
831 				__func__, dev_id, tgt_type);
832 		return -ENOMEM;
833 	}
834 
835 	/* kernel doesn't support get_features */
836 	ret = ublk_ctrl_get_features(dev, &features);
837 	if (ret < 0)
838 		return -EINVAL;
839 
840 	if (!(features & UBLK_F_CMD_IOCTL_ENCODE))
841 		return -ENOTSUP;
842 
843 	info = &dev->dev_info;
844 	info->dev_id = ctx->dev_id;
845 	info->nr_hw_queues = nr_queues;
846 	info->queue_depth = depth;
847 	info->flags = ctx->flags;
848 	dev->tgt.ops = ops;
849 	dev->tgt.sq_depth = depth;
850 	dev->tgt.cq_depth = depth;
851 
852 	for (i = 0; i < MAX_BACK_FILES; i++) {
853 		if (ctx->files[i]) {
854 			strcpy(dev->tgt.backing_file[i], ctx->files[i]);
855 			dev->tgt.nr_backing_files++;
856 		}
857 	}
858 
859 	ret = ublk_ctrl_add_dev(dev);
860 	if (ret < 0) {
861 		ublk_err("%s: can't add dev id %d, type %s ret %d\n",
862 				__func__, dev_id, tgt_type, ret);
863 		goto fail;
864 	}
865 
866 	ret = ublk_start_daemon(ctx, dev);
867 	ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\b", ret);
868 	if (ret < 0)
869 		ublk_ctrl_del_dev(dev);
870 
871 fail:
872 	if (ret < 0)
873 		ublk_send_dev_event(ctx, -1);
874 	ublk_ctrl_deinit(dev);
875 	return ret;
876 }
877 
878 static int __cmd_dev_list(struct dev_ctx *ctx);
879 
cmd_dev_add(struct dev_ctx * ctx)880 static int cmd_dev_add(struct dev_ctx *ctx)
881 {
882 	int res;
883 
884 	if (ctx->fg)
885 		goto run;
886 
887 	ctx->_evtfd = eventfd(0, 0);
888 	if (ctx->_evtfd < 0) {
889 		ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno));
890 		exit(-1);
891 	}
892 
893 	setsid();
894 	res = fork();
895 	if (res == 0) {
896 run:
897 		res = __cmd_dev_add(ctx);
898 		return res;
899 	} else if (res > 0) {
900 		uint64_t id;
901 
902 		res = read(ctx->_evtfd, &id, sizeof(id));
903 		close(ctx->_evtfd);
904 		if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) {
905 			ctx->dev_id = id - 1;
906 			return __cmd_dev_list(ctx);
907 		}
908 		exit(EXIT_FAILURE);
909 	} else {
910 		return res;
911 	}
912 }
913 
__cmd_dev_del(struct dev_ctx * ctx)914 static int __cmd_dev_del(struct dev_ctx *ctx)
915 {
916 	int number = ctx->dev_id;
917 	struct ublk_dev *dev;
918 	int ret;
919 
920 	dev = ublk_ctrl_init();
921 	dev->dev_info.dev_id = number;
922 
923 	ret = ublk_ctrl_get_info(dev);
924 	if (ret < 0)
925 		goto fail;
926 
927 	ret = ublk_ctrl_stop_dev(dev);
928 	if (ret < 0)
929 		ublk_err("%s: stop dev %d failed ret %d\n", __func__, number, ret);
930 
931 	ret = ublk_stop_io_daemon(dev);
932 	if (ret < 0)
933 		ublk_err("%s: stop daemon id %d dev %d, ret %d\n",
934 				__func__, dev->dev_info.ublksrv_pid, number, ret);
935 	ublk_ctrl_del_dev(dev);
936 fail:
937 	ublk_ctrl_deinit(dev);
938 
939 	return (ret >= 0) ? 0 : ret;
940 }
941 
cmd_dev_del(struct dev_ctx * ctx)942 static int cmd_dev_del(struct dev_ctx *ctx)
943 {
944 	int i;
945 
946 	if (ctx->dev_id >= 0 || !ctx->all)
947 		return __cmd_dev_del(ctx);
948 
949 	for (i = 0; i < 255; i++) {
950 		ctx->dev_id = i;
951 		__cmd_dev_del(ctx);
952 	}
953 	return 0;
954 }
955 
__cmd_dev_list(struct dev_ctx * ctx)956 static int __cmd_dev_list(struct dev_ctx *ctx)
957 {
958 	struct ublk_dev *dev = ublk_ctrl_init();
959 	int ret;
960 
961 	if (!dev)
962 		return -ENODEV;
963 
964 	dev->dev_info.dev_id = ctx->dev_id;
965 
966 	ret = ublk_ctrl_get_info(dev);
967 	if (ret < 0) {
968 		if (ctx->logging)
969 			ublk_err("%s: can't get dev info from %d: %d\n",
970 					__func__, ctx->dev_id, ret);
971 	} else {
972 		ublk_ctrl_dump(dev);
973 	}
974 
975 	ublk_ctrl_deinit(dev);
976 
977 	return ret;
978 }
979 
cmd_dev_list(struct dev_ctx * ctx)980 static int cmd_dev_list(struct dev_ctx *ctx)
981 {
982 	int i;
983 
984 	if (ctx->dev_id >= 0 || !ctx->all)
985 		return __cmd_dev_list(ctx);
986 
987 	ctx->logging = false;
988 	for (i = 0; i < 255; i++) {
989 		ctx->dev_id = i;
990 		__cmd_dev_list(ctx);
991 	}
992 	return 0;
993 }
994 
cmd_dev_get_features(void)995 static int cmd_dev_get_features(void)
996 {
997 #define const_ilog2(x) (63 - __builtin_clzll(x))
998 	static const char *feat_map[] = {
999 		[const_ilog2(UBLK_F_SUPPORT_ZERO_COPY)] = "ZERO_COPY",
1000 		[const_ilog2(UBLK_F_URING_CMD_COMP_IN_TASK)] = "COMP_IN_TASK",
1001 		[const_ilog2(UBLK_F_NEED_GET_DATA)] = "GET_DATA",
1002 		[const_ilog2(UBLK_F_USER_RECOVERY)] = "USER_RECOVERY",
1003 		[const_ilog2(UBLK_F_USER_RECOVERY_REISSUE)] = "RECOVERY_REISSUE",
1004 		[const_ilog2(UBLK_F_UNPRIVILEGED_DEV)] = "UNPRIVILEGED_DEV",
1005 		[const_ilog2(UBLK_F_CMD_IOCTL_ENCODE)] = "CMD_IOCTL_ENCODE",
1006 		[const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY",
1007 		[const_ilog2(UBLK_F_ZONED)] = "ZONED",
1008 		[const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO",
1009 	};
1010 	struct ublk_dev *dev;
1011 	__u64 features = 0;
1012 	int ret;
1013 
1014 	dev = ublk_ctrl_init();
1015 	if (!dev) {
1016 		fprintf(stderr, "ublksrv_ctrl_init failed id\n");
1017 		return -EOPNOTSUPP;
1018 	}
1019 
1020 	ret = ublk_ctrl_get_features(dev, &features);
1021 	if (!ret) {
1022 		int i;
1023 
1024 		printf("ublk_drv features: 0x%llx\n", features);
1025 
1026 		for (i = 0; i < sizeof(features) * 8; i++) {
1027 			const char *feat;
1028 
1029 			if (!((1ULL << i)  & features))
1030 				continue;
1031 			if (i < sizeof(feat_map) / sizeof(feat_map[0]))
1032 				feat = feat_map[i];
1033 			else
1034 				feat = "unknown";
1035 			printf("\t%-20s: 0x%llx\n", feat, 1ULL << i);
1036 		}
1037 	}
1038 
1039 	return ret;
1040 }
1041 
cmd_dev_help(char * exe)1042 static int cmd_dev_help(char *exe)
1043 {
1044 	printf("%s add -t [null|loop] [-q nr_queues] [-d depth] [-n dev_id] [backfile1] [backfile2] ...\n", exe);
1045 	printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n");
1046 	printf("%s del [-n dev_id] -a \n", exe);
1047 	printf("\t -a delete all devices -n delete specified device\n");
1048 	printf("%s list [-n dev_id] -a \n", exe);
1049 	printf("\t -a list all devices, -n list specified device, default -a \n");
1050 	printf("%s features\n", exe);
1051 	return 0;
1052 }
1053 
main(int argc,char * argv[])1054 int main(int argc, char *argv[])
1055 {
1056 	static const struct option longopts[] = {
1057 		{ "all",		0,	NULL, 'a' },
1058 		{ "type",		1,	NULL, 't' },
1059 		{ "number",		1,	NULL, 'n' },
1060 		{ "queues",		1,	NULL, 'q' },
1061 		{ "depth",		1,	NULL, 'd' },
1062 		{ "debug_mask",		1,	NULL,  0  },
1063 		{ "quiet",		0,	NULL,  0  },
1064 		{ "zero_copy",          0,      NULL, 'z' },
1065 		{ "foreground",		0,	NULL,  0  },
1066 		{ "chunk_size", 	1,	NULL,  0  },
1067 		{ 0, 0, 0, 0 }
1068 	};
1069 	int option_idx, opt;
1070 	const char *cmd = argv[1];
1071 	struct dev_ctx ctx = {
1072 		.queue_depth	=	128,
1073 		.nr_hw_queues	=	2,
1074 		.dev_id		=	-1,
1075 		.tgt_type	=	"unknown",
1076 		.chunk_size 	= 	65536, 	/* def chunk size is 64K */
1077 	};
1078 	int ret = -EINVAL, i;
1079 
1080 	if (argc == 1)
1081 		return ret;
1082 
1083 	optind = 2;
1084 	while ((opt = getopt_long(argc, argv, "t:n:d:q:az",
1085 				  longopts, &option_idx)) != -1) {
1086 		switch (opt) {
1087 		case 'a':
1088 			ctx.all = 1;
1089 			break;
1090 		case 'n':
1091 			ctx.dev_id = strtol(optarg, NULL, 10);
1092 			break;
1093 		case 't':
1094 			if (strlen(optarg) < sizeof(ctx.tgt_type))
1095 				strcpy(ctx.tgt_type, optarg);
1096 			break;
1097 		case 'q':
1098 			ctx.nr_hw_queues = strtol(optarg, NULL, 10);
1099 			break;
1100 		case 'd':
1101 			ctx.queue_depth = strtol(optarg, NULL, 10);
1102 			break;
1103 		case 'z':
1104 			ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY;
1105 			break;
1106 		case 0:
1107 			if (!strcmp(longopts[option_idx].name, "debug_mask"))
1108 				ublk_dbg_mask = strtol(optarg, NULL, 16);
1109 			if (!strcmp(longopts[option_idx].name, "quiet"))
1110 				ublk_dbg_mask = 0;
1111 			if (!strcmp(longopts[option_idx].name, "foreground"))
1112 				ctx.fg = 1;
1113 			if (!strcmp(longopts[option_idx].name, "chunk_size"))
1114 				ctx.chunk_size = strtol(optarg, NULL, 10);
1115 		}
1116 	}
1117 
1118 	i = optind;
1119 	while (i < argc && ctx.nr_files < MAX_BACK_FILES) {
1120 		ctx.files[ctx.nr_files++] = argv[i++];
1121 	}
1122 
1123 	if (!strcmp(cmd, "add"))
1124 		ret = cmd_dev_add(&ctx);
1125 	else if (!strcmp(cmd, "del"))
1126 		ret = cmd_dev_del(&ctx);
1127 	else if (!strcmp(cmd, "list")) {
1128 		ctx.all = 1;
1129 		ret = cmd_dev_list(&ctx);
1130 	} else if (!strcmp(cmd, "help"))
1131 		ret = cmd_dev_help(argv[0]);
1132 	else if (!strcmp(cmd, "features"))
1133 		ret = cmd_dev_get_features();
1134 	else
1135 		cmd_dev_help(argv[0]);
1136 
1137 	return ret;
1138 }
1139