xref: /linux/tools/testing/selftests/ublk/kublk.c (revision 6d8854216ebb60959ddb6f4ea4123bd449ba6cf6)
16aecda00SMing Lei /* SPDX-License-Identifier: MIT */
26aecda00SMing Lei /*
36aecda00SMing Lei  * Description: uring_cmd based ublk
46aecda00SMing Lei  */
56aecda00SMing Lei 
66aecda00SMing Lei #include "kublk.h"
76aecda00SMing Lei 
8810b88f3SMing Lei #define MAX_NR_TGT_ARG 	64
9810b88f3SMing Lei 
106aecda00SMing Lei unsigned int ublk_dbg_mask = UBLK_LOG;
116aecda00SMing Lei static const struct ublk_tgt_ops *tgt_ops_list[] = {
126aecda00SMing Lei 	&null_tgt_ops,
135d95bfb5SMing Lei 	&loop_tgt_ops,
140f3ebf2dSMing Lei 	&stripe_tgt_ops,
1581586652SUday Shankar 	&fault_inject_tgt_ops,
166aecda00SMing Lei };
176aecda00SMing Lei 
ublk_find_tgt(const char * name)186aecda00SMing Lei static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
196aecda00SMing Lei {
206aecda00SMing Lei 	int i;
216aecda00SMing Lei 
226aecda00SMing Lei 	if (name == NULL)
236aecda00SMing Lei 		return NULL;
246aecda00SMing Lei 
25ec120093SMing Lei 	for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++)
266aecda00SMing Lei 		if (strcmp(tgt_ops_list[i]->name, name) == 0)
276aecda00SMing Lei 			return tgt_ops_list[i];
286aecda00SMing Lei 	return NULL;
296aecda00SMing Lei }
306aecda00SMing Lei 
ublk_setup_ring(struct io_uring * r,int depth,int cq_depth,unsigned flags)316aecda00SMing Lei static inline int ublk_setup_ring(struct io_uring *r, int depth,
326aecda00SMing Lei 		int cq_depth, unsigned flags)
336aecda00SMing Lei {
346aecda00SMing Lei 	struct io_uring_params p;
356aecda00SMing Lei 
366aecda00SMing Lei 	memset(&p, 0, sizeof(p));
376aecda00SMing Lei 	p.flags = flags | IORING_SETUP_CQSIZE;
386aecda00SMing Lei 	p.cq_entries = cq_depth;
396aecda00SMing Lei 
406aecda00SMing Lei 	return io_uring_queue_init_params(depth, r, &p);
416aecda00SMing Lei }
426aecda00SMing Lei 
ublk_ctrl_init_cmd(struct ublk_dev * dev,struct io_uring_sqe * sqe,struct ublk_ctrl_cmd_data * data)436aecda00SMing Lei static void ublk_ctrl_init_cmd(struct ublk_dev *dev,
446aecda00SMing Lei 		struct io_uring_sqe *sqe,
456aecda00SMing Lei 		struct ublk_ctrl_cmd_data *data)
466aecda00SMing Lei {
476aecda00SMing Lei 	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
486aecda00SMing Lei 	struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe);
496aecda00SMing Lei 
506aecda00SMing Lei 	sqe->fd = dev->ctrl_fd;
516aecda00SMing Lei 	sqe->opcode = IORING_OP_URING_CMD;
526aecda00SMing Lei 	sqe->ioprio = 0;
536aecda00SMing Lei 
546aecda00SMing Lei 	if (data->flags & CTRL_CMD_HAS_BUF) {
556aecda00SMing Lei 		cmd->addr = data->addr;
566aecda00SMing Lei 		cmd->len = data->len;
576aecda00SMing Lei 	}
586aecda00SMing Lei 
596aecda00SMing Lei 	if (data->flags & CTRL_CMD_HAS_DATA)
606aecda00SMing Lei 		cmd->data[0] = data->data[0];
616aecda00SMing Lei 
626aecda00SMing Lei 	cmd->dev_id = info->dev_id;
636aecda00SMing Lei 	cmd->queue_id = -1;
646aecda00SMing Lei 
656aecda00SMing Lei 	ublk_set_sqe_cmd_op(sqe, data->cmd_op);
666aecda00SMing Lei 
676aecda00SMing Lei 	io_uring_sqe_set_data(sqe, cmd);
686aecda00SMing Lei }
696aecda00SMing Lei 
__ublk_ctrl_cmd(struct ublk_dev * dev,struct ublk_ctrl_cmd_data * data)706aecda00SMing Lei static int __ublk_ctrl_cmd(struct ublk_dev *dev,
716aecda00SMing Lei 		struct ublk_ctrl_cmd_data *data)
726aecda00SMing Lei {
736aecda00SMing Lei 	struct io_uring_sqe *sqe;
746aecda00SMing Lei 	struct io_uring_cqe *cqe;
756aecda00SMing Lei 	int ret = -EINVAL;
766aecda00SMing Lei 
776aecda00SMing Lei 	sqe = io_uring_get_sqe(&dev->ring);
786aecda00SMing Lei 	if (!sqe) {
796aecda00SMing Lei 		ublk_err("%s: can't get sqe ret %d\n", __func__, ret);
806aecda00SMing Lei 		return ret;
816aecda00SMing Lei 	}
826aecda00SMing Lei 
836aecda00SMing Lei 	ublk_ctrl_init_cmd(dev, sqe, data);
846aecda00SMing Lei 
856aecda00SMing Lei 	ret = io_uring_submit(&dev->ring);
866aecda00SMing Lei 	if (ret < 0) {
876aecda00SMing Lei 		ublk_err("uring submit ret %d\n", ret);
886aecda00SMing Lei 		return ret;
896aecda00SMing Lei 	}
906aecda00SMing Lei 
916aecda00SMing Lei 	ret = io_uring_wait_cqe(&dev->ring, &cqe);
926aecda00SMing Lei 	if (ret < 0) {
936aecda00SMing Lei 		ublk_err("wait cqe: %s\n", strerror(-ret));
946aecda00SMing Lei 		return ret;
956aecda00SMing Lei 	}
966aecda00SMing Lei 	io_uring_cqe_seen(&dev->ring, cqe);
976aecda00SMing Lei 
986aecda00SMing Lei 	return cqe->res;
996aecda00SMing Lei }
1006aecda00SMing Lei 
ublk_ctrl_stop_dev(struct ublk_dev * dev)1016aecda00SMing Lei static int ublk_ctrl_stop_dev(struct ublk_dev *dev)
1026aecda00SMing Lei {
1036aecda00SMing Lei 	struct ublk_ctrl_cmd_data data = {
10453c95929SUday Shankar 		.cmd_op	= UBLK_U_CMD_STOP_DEV,
1056aecda00SMing Lei 	};
1066aecda00SMing Lei 
1076aecda00SMing Lei 	return __ublk_ctrl_cmd(dev, &data);
1086aecda00SMing Lei }
1096aecda00SMing Lei 
ublk_ctrl_start_dev(struct ublk_dev * dev,int daemon_pid)1106aecda00SMing Lei static int ublk_ctrl_start_dev(struct ublk_dev *dev,
1116aecda00SMing Lei 		int daemon_pid)
1126aecda00SMing Lei {
1136aecda00SMing Lei 	struct ublk_ctrl_cmd_data data = {
1146aecda00SMing Lei 		.cmd_op	= UBLK_U_CMD_START_DEV,
1156aecda00SMing Lei 		.flags	= CTRL_CMD_HAS_DATA,
1166aecda00SMing Lei 	};
1176aecda00SMing Lei 
1186aecda00SMing Lei 	dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
1196aecda00SMing Lei 
1206aecda00SMing Lei 	return __ublk_ctrl_cmd(dev, &data);
1216aecda00SMing Lei }
1226aecda00SMing Lei 
ublk_ctrl_start_user_recovery(struct ublk_dev * dev)12357e13a2eSMing Lei static int ublk_ctrl_start_user_recovery(struct ublk_dev *dev)
12457e13a2eSMing Lei {
12557e13a2eSMing Lei 	struct ublk_ctrl_cmd_data data = {
12657e13a2eSMing Lei 		.cmd_op	= UBLK_U_CMD_START_USER_RECOVERY,
12757e13a2eSMing Lei 	};
12857e13a2eSMing Lei 
12957e13a2eSMing Lei 	return __ublk_ctrl_cmd(dev, &data);
13057e13a2eSMing Lei }
13157e13a2eSMing Lei 
ublk_ctrl_end_user_recovery(struct ublk_dev * dev,int daemon_pid)13257e13a2eSMing Lei static int ublk_ctrl_end_user_recovery(struct ublk_dev *dev, int daemon_pid)
13357e13a2eSMing Lei {
13457e13a2eSMing Lei 	struct ublk_ctrl_cmd_data data = {
13557e13a2eSMing Lei 		.cmd_op	= UBLK_U_CMD_END_USER_RECOVERY,
13657e13a2eSMing Lei 		.flags	= CTRL_CMD_HAS_DATA,
13757e13a2eSMing Lei 	};
13857e13a2eSMing Lei 
13957e13a2eSMing Lei 	dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
14057e13a2eSMing Lei 
14157e13a2eSMing Lei 	return __ublk_ctrl_cmd(dev, &data);
14257e13a2eSMing Lei }
14357e13a2eSMing Lei 
ublk_ctrl_add_dev(struct ublk_dev * dev)1446aecda00SMing Lei static int ublk_ctrl_add_dev(struct ublk_dev *dev)
1456aecda00SMing Lei {
1466aecda00SMing Lei 	struct ublk_ctrl_cmd_data data = {
1476aecda00SMing Lei 		.cmd_op	= UBLK_U_CMD_ADD_DEV,
1486aecda00SMing Lei 		.flags	= CTRL_CMD_HAS_BUF,
1496aecda00SMing Lei 		.addr = (__u64) (uintptr_t) &dev->dev_info,
1506aecda00SMing Lei 		.len = sizeof(struct ublksrv_ctrl_dev_info),
1516aecda00SMing Lei 	};
1526aecda00SMing Lei 
1536aecda00SMing Lei 	return __ublk_ctrl_cmd(dev, &data);
1546aecda00SMing Lei }
1556aecda00SMing Lei 
ublk_ctrl_del_dev(struct ublk_dev * dev)1566aecda00SMing Lei static int ublk_ctrl_del_dev(struct ublk_dev *dev)
1576aecda00SMing Lei {
1586aecda00SMing Lei 	struct ublk_ctrl_cmd_data data = {
1596aecda00SMing Lei 		.cmd_op = UBLK_U_CMD_DEL_DEV,
1606aecda00SMing Lei 		.flags = 0,
1616aecda00SMing Lei 	};
1626aecda00SMing Lei 
1636aecda00SMing Lei 	return __ublk_ctrl_cmd(dev, &data);
1646aecda00SMing Lei }
1656aecda00SMing Lei 
ublk_ctrl_get_info(struct ublk_dev * dev)1666aecda00SMing Lei static int ublk_ctrl_get_info(struct ublk_dev *dev)
1676aecda00SMing Lei {
1686aecda00SMing Lei 	struct ublk_ctrl_cmd_data data = {
1696aecda00SMing Lei 		.cmd_op	= UBLK_U_CMD_GET_DEV_INFO,
1706aecda00SMing Lei 		.flags	= CTRL_CMD_HAS_BUF,
1716aecda00SMing Lei 		.addr = (__u64) (uintptr_t) &dev->dev_info,
1726aecda00SMing Lei 		.len = sizeof(struct ublksrv_ctrl_dev_info),
1736aecda00SMing Lei 	};
1746aecda00SMing Lei 
1756aecda00SMing Lei 	return __ublk_ctrl_cmd(dev, &data);
1766aecda00SMing Lei }
1776aecda00SMing Lei 
ublk_ctrl_set_params(struct ublk_dev * dev,struct ublk_params * params)1786aecda00SMing Lei static int ublk_ctrl_set_params(struct ublk_dev *dev,
1796aecda00SMing Lei 		struct ublk_params *params)
1806aecda00SMing Lei {
1816aecda00SMing Lei 	struct ublk_ctrl_cmd_data data = {
1826aecda00SMing Lei 		.cmd_op	= UBLK_U_CMD_SET_PARAMS,
1836aecda00SMing Lei 		.flags	= CTRL_CMD_HAS_BUF,
1846aecda00SMing Lei 		.addr = (__u64) (uintptr_t) params,
1856aecda00SMing Lei 		.len = sizeof(*params),
1866aecda00SMing Lei 	};
1876aecda00SMing Lei 	params->len = sizeof(*params);
1886aecda00SMing Lei 	return __ublk_ctrl_cmd(dev, &data);
1896aecda00SMing Lei }
1906aecda00SMing Lei 
ublk_ctrl_get_params(struct ublk_dev * dev,struct ublk_params * params)1916aecda00SMing Lei static int ublk_ctrl_get_params(struct ublk_dev *dev,
1926aecda00SMing Lei 		struct ublk_params *params)
1936aecda00SMing Lei {
1946aecda00SMing Lei 	struct ublk_ctrl_cmd_data data = {
19553c95929SUday Shankar 		.cmd_op	= UBLK_U_CMD_GET_PARAMS,
1966aecda00SMing Lei 		.flags	= CTRL_CMD_HAS_BUF,
1976aecda00SMing Lei 		.addr = (__u64)params,
1986aecda00SMing Lei 		.len = sizeof(*params),
1996aecda00SMing Lei 	};
2006aecda00SMing Lei 
2016aecda00SMing Lei 	params->len = sizeof(*params);
2026aecda00SMing Lei 
2036aecda00SMing Lei 	return __ublk_ctrl_cmd(dev, &data);
2046aecda00SMing Lei }
2056aecda00SMing Lei 
ublk_ctrl_get_features(struct ublk_dev * dev,__u64 * features)2066aecda00SMing Lei static int ublk_ctrl_get_features(struct ublk_dev *dev,
2076aecda00SMing Lei 		__u64 *features)
2086aecda00SMing Lei {
2096aecda00SMing Lei 	struct ublk_ctrl_cmd_data data = {
2106aecda00SMing Lei 		.cmd_op	= UBLK_U_CMD_GET_FEATURES,
2116aecda00SMing Lei 		.flags	= CTRL_CMD_HAS_BUF,
2126aecda00SMing Lei 		.addr = (__u64) (uintptr_t) features,
2136aecda00SMing Lei 		.len = sizeof(*features),
2146aecda00SMing Lei 	};
2156aecda00SMing Lei 
2166aecda00SMing Lei 	return __ublk_ctrl_cmd(dev, &data);
2176aecda00SMing Lei }
2186aecda00SMing Lei 
ublk_ctrl_update_size(struct ublk_dev * dev,__u64 nr_sects)219f40b1f26SMing Lei static int ublk_ctrl_update_size(struct ublk_dev *dev,
220f40b1f26SMing Lei 		__u64 nr_sects)
221f40b1f26SMing Lei {
222f40b1f26SMing Lei 	struct ublk_ctrl_cmd_data data = {
223f40b1f26SMing Lei 		.cmd_op	= UBLK_U_CMD_UPDATE_SIZE,
224f40b1f26SMing Lei 		.flags	= CTRL_CMD_HAS_DATA,
225f40b1f26SMing Lei 	};
226f40b1f26SMing Lei 
227f40b1f26SMing Lei 	data.data[0] = nr_sects;
228f40b1f26SMing Lei 	return __ublk_ctrl_cmd(dev, &data);
229f40b1f26SMing Lei }
230f40b1f26SMing Lei 
ublk_ctrl_quiesce_dev(struct ublk_dev * dev,unsigned int timeout_ms)231533c87e2SMing Lei static int ublk_ctrl_quiesce_dev(struct ublk_dev *dev,
232533c87e2SMing Lei 				 unsigned int timeout_ms)
233533c87e2SMing Lei {
234533c87e2SMing Lei 	struct ublk_ctrl_cmd_data data = {
235533c87e2SMing Lei 		.cmd_op	= UBLK_U_CMD_QUIESCE_DEV,
236533c87e2SMing Lei 		.flags	= CTRL_CMD_HAS_DATA,
237533c87e2SMing Lei 	};
238533c87e2SMing Lei 
239533c87e2SMing Lei 	data.data[0] = timeout_ms;
240533c87e2SMing Lei 	return __ublk_ctrl_cmd(dev, &data);
241533c87e2SMing Lei }
242533c87e2SMing Lei 
ublk_dev_state_desc(struct ublk_dev * dev)2436aecda00SMing Lei static const char *ublk_dev_state_desc(struct ublk_dev *dev)
2446aecda00SMing Lei {
2456aecda00SMing Lei 	switch (dev->dev_info.state) {
2466aecda00SMing Lei 	case UBLK_S_DEV_DEAD:
2476aecda00SMing Lei 		return "DEAD";
2486aecda00SMing Lei 	case UBLK_S_DEV_LIVE:
2496aecda00SMing Lei 		return "LIVE";
2506aecda00SMing Lei 	case UBLK_S_DEV_QUIESCED:
2516aecda00SMing Lei 		return "QUIESCED";
2526aecda00SMing Lei 	default:
2536aecda00SMing Lei 		return "UNKNOWN";
2546aecda00SMing Lei 	};
2556aecda00SMing Lei }
2566aecda00SMing Lei 
ublk_print_cpu_set(const cpu_set_t * set,char * buf,unsigned len)2572f0a692aSMing Lei static void ublk_print_cpu_set(const cpu_set_t *set, char *buf, unsigned len)
2582f0a692aSMing Lei {
2592f0a692aSMing Lei 	unsigned done = 0;
2602f0a692aSMing Lei 	int i;
2612f0a692aSMing Lei 
2622f0a692aSMing Lei 	for (i = 0; i < CPU_SETSIZE; i++) {
2632f0a692aSMing Lei 		if (CPU_ISSET(i, set))
2642f0a692aSMing Lei 			done += snprintf(&buf[done], len - done, "%d ", i);
2652f0a692aSMing Lei 	}
2662f0a692aSMing Lei }
2672f0a692aSMing Lei 
ublk_adjust_affinity(cpu_set_t * set)2682f0a692aSMing Lei static void ublk_adjust_affinity(cpu_set_t *set)
2692f0a692aSMing Lei {
2702f0a692aSMing Lei 	int j, updated = 0;
2712f0a692aSMing Lei 
2722f0a692aSMing Lei 	/*
2732f0a692aSMing Lei 	 * Just keep the 1st CPU now.
2742f0a692aSMing Lei 	 *
2752f0a692aSMing Lei 	 * In future, auto affinity selection can be tried.
2762f0a692aSMing Lei 	 */
2772f0a692aSMing Lei 	for (j = 0; j < CPU_SETSIZE; j++) {
2782f0a692aSMing Lei 		if (CPU_ISSET(j, set)) {
2792f0a692aSMing Lei 			if (!updated) {
2802f0a692aSMing Lei 				updated = 1;
2812f0a692aSMing Lei 				continue;
2822f0a692aSMing Lei 			}
2832f0a692aSMing Lei 			CPU_CLR(j, set);
2842f0a692aSMing Lei 		}
2852f0a692aSMing Lei 	}
2862f0a692aSMing Lei }
2872f0a692aSMing Lei 
2882f0a692aSMing Lei /* Caller must free the allocated buffer */
ublk_ctrl_get_affinity(struct ublk_dev * ctrl_dev,cpu_set_t ** ptr_buf)2892f0a692aSMing Lei static int ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf)
2902f0a692aSMing Lei {
2912f0a692aSMing Lei 	struct ublk_ctrl_cmd_data data = {
2922f0a692aSMing Lei 		.cmd_op	= UBLK_U_CMD_GET_QUEUE_AFFINITY,
2932f0a692aSMing Lei 		.flags	= CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF,
2942f0a692aSMing Lei 	};
2952f0a692aSMing Lei 	cpu_set_t *buf;
2962f0a692aSMing Lei 	int i, ret;
2972f0a692aSMing Lei 
2982f0a692aSMing Lei 	buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues);
2992f0a692aSMing Lei 	if (!buf)
3002f0a692aSMing Lei 		return -ENOMEM;
3012f0a692aSMing Lei 
3022f0a692aSMing Lei 	for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) {
3032f0a692aSMing Lei 		data.data[0] = i;
3042f0a692aSMing Lei 		data.len = sizeof(cpu_set_t);
3052f0a692aSMing Lei 		data.addr = (__u64)&buf[i];
3062f0a692aSMing Lei 
3072f0a692aSMing Lei 		ret = __ublk_ctrl_cmd(ctrl_dev, &data);
3082f0a692aSMing Lei 		if (ret < 0) {
3092f0a692aSMing Lei 			free(buf);
3102f0a692aSMing Lei 			return ret;
3112f0a692aSMing Lei 		}
3122f0a692aSMing Lei 		ublk_adjust_affinity(&buf[i]);
3132f0a692aSMing Lei 	}
3142f0a692aSMing Lei 
3152f0a692aSMing Lei 	*ptr_buf = buf;
3162f0a692aSMing Lei 	return 0;
3172f0a692aSMing Lei }
3182f0a692aSMing Lei 
ublk_ctrl_dump(struct ublk_dev * dev)3196aecda00SMing Lei static void ublk_ctrl_dump(struct ublk_dev *dev)
3206aecda00SMing Lei {
3216aecda00SMing Lei 	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
3226aecda00SMing Lei 	struct ublk_params p;
3232f0a692aSMing Lei 	cpu_set_t *affinity;
3246aecda00SMing Lei 	int ret;
3256aecda00SMing Lei 
3266aecda00SMing Lei 	ret = ublk_ctrl_get_params(dev, &p);
3276aecda00SMing Lei 	if (ret < 0) {
328f8554f51SUday Shankar 		ublk_err("failed to get params %d %s\n", ret, strerror(-ret));
3296aecda00SMing Lei 		return;
3306aecda00SMing Lei 	}
3316aecda00SMing Lei 
3322f0a692aSMing Lei 	ret = ublk_ctrl_get_affinity(dev, &affinity);
3332f0a692aSMing Lei 	if (ret < 0) {
3342f0a692aSMing Lei 		ublk_err("failed to get affinity %m\n");
3352f0a692aSMing Lei 		return;
3362f0a692aSMing Lei 	}
3372f0a692aSMing Lei 
3386aecda00SMing Lei 	ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n",
3396aecda00SMing Lei 			info->dev_id, info->nr_hw_queues, info->queue_depth,
3406aecda00SMing Lei 			1 << p.basic.logical_bs_shift, p.basic.dev_sectors);
3416aecda00SMing Lei 	ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n",
3426aecda00SMing Lei 			info->max_io_buf_bytes, info->ublksrv_pid, info->flags,
3436aecda00SMing Lei 			ublk_dev_state_desc(dev));
3442f0a692aSMing Lei 
3452f0a692aSMing Lei 	if (affinity) {
3462f0a692aSMing Lei 		char buf[512];
3472f0a692aSMing Lei 		int i;
3482f0a692aSMing Lei 
3492f0a692aSMing Lei 		for (i = 0; i < info->nr_hw_queues; i++) {
3502f0a692aSMing Lei 			ublk_print_cpu_set(&affinity[i], buf, sizeof(buf));
351b9848ca7SUday Shankar 			printf("\tqueue %u: affinity(%s)\n",
352b9848ca7SUday Shankar 					i, buf);
3532f0a692aSMing Lei 		}
3542f0a692aSMing Lei 		free(affinity);
3552f0a692aSMing Lei 	}
3562f0a692aSMing Lei 
3576aecda00SMing Lei 	fflush(stdout);
3586aecda00SMing Lei }
3596aecda00SMing Lei 
ublk_ctrl_deinit(struct ublk_dev * dev)3606aecda00SMing Lei static void ublk_ctrl_deinit(struct ublk_dev *dev)
3616aecda00SMing Lei {
3626aecda00SMing Lei 	close(dev->ctrl_fd);
3636aecda00SMing Lei 	free(dev);
3646aecda00SMing Lei }
3656aecda00SMing Lei 
ublk_ctrl_init(void)3666aecda00SMing Lei static struct ublk_dev *ublk_ctrl_init(void)
3676aecda00SMing Lei {
3686aecda00SMing Lei 	struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev));
3696aecda00SMing Lei 	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
3706aecda00SMing Lei 	int ret;
3716aecda00SMing Lei 
3726aecda00SMing Lei 	dev->ctrl_fd = open(CTRL_DEV, O_RDWR);
3736aecda00SMing Lei 	if (dev->ctrl_fd < 0) {
3746aecda00SMing Lei 		free(dev);
3756aecda00SMing Lei 		return NULL;
3766aecda00SMing Lei 	}
3776aecda00SMing Lei 
3786aecda00SMing Lei 	info->max_io_buf_bytes = UBLK_IO_MAX_BYTES;
3796aecda00SMing Lei 
3806aecda00SMing Lei 	ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH,
3816aecda00SMing Lei 			UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128);
3826aecda00SMing Lei 	if (ret < 0) {
3836aecda00SMing Lei 		ublk_err("queue_init: %s\n", strerror(-ret));
3846aecda00SMing Lei 		free(dev);
3856aecda00SMing Lei 		return NULL;
3866aecda00SMing Lei 	}
3876aecda00SMing Lei 	dev->nr_fds = 1;
3886aecda00SMing Lei 
3896aecda00SMing Lei 	return dev;
3906aecda00SMing Lei }
3916aecda00SMing Lei 
__ublk_queue_cmd_buf_sz(unsigned depth)3926aecda00SMing Lei static int __ublk_queue_cmd_buf_sz(unsigned depth)
3936aecda00SMing Lei {
3946aecda00SMing Lei 	int size =  depth * sizeof(struct ublksrv_io_desc);
3956aecda00SMing Lei 	unsigned int page_sz = getpagesize();
3966aecda00SMing Lei 
3976aecda00SMing Lei 	return round_up(size, page_sz);
3986aecda00SMing Lei }
3996aecda00SMing Lei 
ublk_queue_max_cmd_buf_sz(void)4006aecda00SMing Lei static int ublk_queue_max_cmd_buf_sz(void)
4016aecda00SMing Lei {
4026aecda00SMing Lei 	return __ublk_queue_cmd_buf_sz(UBLK_MAX_QUEUE_DEPTH);
4036aecda00SMing Lei }
4046aecda00SMing Lei 
ublk_queue_cmd_buf_sz(struct ublk_queue * q)4056aecda00SMing Lei static int ublk_queue_cmd_buf_sz(struct ublk_queue *q)
4066aecda00SMing Lei {
4076aecda00SMing Lei 	return __ublk_queue_cmd_buf_sz(q->q_depth);
4086aecda00SMing Lei }
4096aecda00SMing Lei 
ublk_queue_deinit(struct ublk_queue * q)4106aecda00SMing Lei static void ublk_queue_deinit(struct ublk_queue *q)
4116aecda00SMing Lei {
4126aecda00SMing Lei 	int i;
4136aecda00SMing Lei 	int nr_ios = q->q_depth;
4146aecda00SMing Lei 
4158f75ba28SUday Shankar 	if (q->io_cmd_buf)
4168f75ba28SUday Shankar 		munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q));
4178f75ba28SUday Shankar 
4188f75ba28SUday Shankar 	for (i = 0; i < nr_ios; i++)
4198f75ba28SUday Shankar 		free(q->ios[i].buf_addr);
4208f75ba28SUday Shankar }
4218f75ba28SUday Shankar 
ublk_thread_deinit(struct ublk_thread * t)422b9848ca7SUday Shankar static void ublk_thread_deinit(struct ublk_thread *t)
4238f75ba28SUday Shankar {
424b9848ca7SUday Shankar 	io_uring_unregister_buffers(&t->ring);
4258f75ba28SUday Shankar 
426b9848ca7SUday Shankar 	io_uring_unregister_ring_fd(&t->ring);
427bedc9cbcSMing Lei 
428b9848ca7SUday Shankar 	if (t->ring.ring_fd > 0) {
429b9848ca7SUday Shankar 		io_uring_unregister_files(&t->ring);
430b9848ca7SUday Shankar 		close(t->ring.ring_fd);
431b9848ca7SUday Shankar 		t->ring.ring_fd = -1;
4326aecda00SMing Lei 	}
4336aecda00SMing Lei }
4346aecda00SMing Lei 
ublk_queue_init(struct ublk_queue * q,unsigned extra_flags)4356f1a182aSMing Lei static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags)
4366aecda00SMing Lei {
4376aecda00SMing Lei 	struct ublk_dev *dev = q->dev;
4386aecda00SMing Lei 	int depth = dev->dev_info.queue_depth;
4398f75ba28SUday Shankar 	int i;
4406aecda00SMing Lei 	int cmd_buf_size, io_buf_size;
4416aecda00SMing Lei 	unsigned long off;
4426aecda00SMing Lei 
4436aecda00SMing Lei 	q->tgt_ops = dev->tgt.ops;
4446aecda00SMing Lei 	q->state = 0;
4456aecda00SMing Lei 	q->q_depth = depth;
4466aecda00SMing Lei 
4478ccebc19SMing Lei 	if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) {
448bedc9cbcSMing Lei 		q->state |= UBLKSRV_NO_BUF;
4498ccebc19SMing Lei 		if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY)
450bedc9cbcSMing Lei 			q->state |= UBLKSRV_ZC;
4518ccebc19SMing Lei 		if (dev->dev_info.flags & UBLK_F_AUTO_BUF_REG)
4528ccebc19SMing Lei 			q->state |= UBLKSRV_AUTO_BUF_REG;
453bedc9cbcSMing Lei 	}
4546f1a182aSMing Lei 	q->state |= extra_flags;
455bedc9cbcSMing Lei 
4566aecda00SMing Lei 	cmd_buf_size = ublk_queue_cmd_buf_sz(q);
4576aecda00SMing Lei 	off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz();
45825aaa813SCaleb Sander Mateos 	q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ,
4596aecda00SMing Lei 			MAP_SHARED | MAP_POPULATE, dev->fds[0], off);
4606aecda00SMing Lei 	if (q->io_cmd_buf == MAP_FAILED) {
4616aecda00SMing Lei 		ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n",
4626aecda00SMing Lei 				q->dev->dev_info.dev_id, q->q_id);
4636aecda00SMing Lei 		goto fail;
4646aecda00SMing Lei 	}
4656aecda00SMing Lei 
4666aecda00SMing Lei 	io_buf_size = dev->dev_info.max_io_buf_bytes;
4676aecda00SMing Lei 	for (i = 0; i < q->q_depth; i++) {
4686aecda00SMing Lei 		q->ios[i].buf_addr = NULL;
4696aecda00SMing Lei 		q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE;
470b9848ca7SUday Shankar 		q->ios[i].tag = i;
4716aecda00SMing Lei 
4726aecda00SMing Lei 		if (q->state & UBLKSRV_NO_BUF)
4736aecda00SMing Lei 			continue;
4746aecda00SMing Lei 
4756aecda00SMing Lei 		if (posix_memalign((void **)&q->ios[i].buf_addr,
4766aecda00SMing Lei 					getpagesize(), io_buf_size)) {
4776aecda00SMing Lei 			ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n",
4786aecda00SMing Lei 					dev->dev_info.dev_id, q->q_id, i);
4796aecda00SMing Lei 			goto fail;
4806aecda00SMing Lei 		}
4816aecda00SMing Lei 	}
4826aecda00SMing Lei 
4838f75ba28SUday Shankar 	return 0;
4848f75ba28SUday Shankar  fail:
4858f75ba28SUday Shankar 	ublk_queue_deinit(q);
4868f75ba28SUday Shankar 	ublk_err("ublk dev %d queue %d failed\n",
4878f75ba28SUday Shankar 			dev->dev_info.dev_id, q->q_id);
4888f75ba28SUday Shankar 	return -ENOMEM;
4898f75ba28SUday Shankar }
4908f75ba28SUday Shankar 
ublk_thread_init(struct ublk_thread * t)491b9848ca7SUday Shankar static int ublk_thread_init(struct ublk_thread *t)
4928f75ba28SUday Shankar {
493b9848ca7SUday Shankar 	struct ublk_dev *dev = t->dev;
4948f75ba28SUday Shankar 	int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth;
4958f75ba28SUday Shankar 	int ret;
4968f75ba28SUday Shankar 
497b9848ca7SUday Shankar 	ret = ublk_setup_ring(&t->ring, ring_depth, cq_depth,
49862867a04SMing Lei 			IORING_SETUP_COOP_TASKRUN |
49962867a04SMing Lei 			IORING_SETUP_SINGLE_ISSUER |
50062867a04SMing Lei 			IORING_SETUP_DEFER_TASKRUN);
5016aecda00SMing Lei 	if (ret < 0) {
502b9848ca7SUday Shankar 		ublk_err("ublk dev %d thread %d setup io_uring failed %d\n",
503b9848ca7SUday Shankar 				dev->dev_info.dev_id, t->idx, ret);
5046aecda00SMing Lei 		goto fail;
5056aecda00SMing Lei 	}
5066aecda00SMing Lei 
5078ccebc19SMing Lei 	if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) {
508abe54c16SUday Shankar 		unsigned nr_ios = dev->dev_info.queue_depth * dev->dev_info.nr_hw_queues;
509abe54c16SUday Shankar 		unsigned max_nr_ios_per_thread = nr_ios / dev->nthreads;
510abe54c16SUday Shankar 		max_nr_ios_per_thread += !!(nr_ios % dev->nthreads);
511b9848ca7SUday Shankar 		ret = io_uring_register_buffers_sparse(
512abe54c16SUday Shankar 			&t->ring, max_nr_ios_per_thread);
513bedc9cbcSMing Lei 		if (ret) {
514b9848ca7SUday Shankar 			ublk_err("ublk dev %d thread %d register spare buffers failed %d",
515b9848ca7SUday Shankar 					dev->dev_info.dev_id, t->idx, ret);
516bedc9cbcSMing Lei 			goto fail;
517bedc9cbcSMing Lei 		}
518bedc9cbcSMing Lei 	}
519bedc9cbcSMing Lei 
520b9848ca7SUday Shankar 	io_uring_register_ring_fd(&t->ring);
5216aecda00SMing Lei 
522b9848ca7SUday Shankar 	ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds);
5236aecda00SMing Lei 	if (ret) {
524b9848ca7SUday Shankar 		ublk_err("ublk dev %d thread %d register files failed %d\n",
525b9848ca7SUday Shankar 				t->dev->dev_info.dev_id, t->idx, ret);
5266aecda00SMing Lei 		goto fail;
5276aecda00SMing Lei 	}
5286aecda00SMing Lei 
5296aecda00SMing Lei 	return 0;
5306aecda00SMing Lei fail:
531b9848ca7SUday Shankar 	ublk_thread_deinit(t);
532b9848ca7SUday Shankar 	ublk_err("ublk dev %d thread %d init failed\n",
533b9848ca7SUday Shankar 			dev->dev_info.dev_id, t->idx);
5346aecda00SMing Lei 	return -ENOMEM;
5356aecda00SMing Lei }
5366aecda00SMing Lei 
537ffde32a4SMing Lei #define WAIT_USEC 	100000
538ffde32a4SMing Lei #define MAX_WAIT_USEC 	(3 * 1000000)
ublk_dev_prep(const struct dev_ctx * ctx,struct ublk_dev * dev)5398842b72aSMing Lei static int ublk_dev_prep(const struct dev_ctx *ctx, struct ublk_dev *dev)
5406aecda00SMing Lei {
5416aecda00SMing Lei 	int dev_id = dev->dev_info.dev_id;
542ffde32a4SMing Lei 	unsigned int wait_usec = 0;
543ffde32a4SMing Lei 	int ret = 0, fd = -1;
5446aecda00SMing Lei 	char buf[64];
5456aecda00SMing Lei 
5466aecda00SMing Lei 	snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id);
547ffde32a4SMing Lei 
548ffde32a4SMing Lei 	while (wait_usec < MAX_WAIT_USEC) {
549ffde32a4SMing Lei 		fd = open(buf, O_RDWR);
550ffde32a4SMing Lei 		if (fd >= 0)
551ffde32a4SMing Lei 			break;
552ffde32a4SMing Lei 		usleep(WAIT_USEC);
553ffde32a4SMing Lei 		wait_usec += WAIT_USEC;
554ffde32a4SMing Lei 	}
555ffde32a4SMing Lei 	if (fd < 0) {
556ffde32a4SMing Lei 		ublk_err("can't open %s %s\n", buf, strerror(errno));
557ffde32a4SMing Lei 		return -1;
5586aecda00SMing Lei 	}
5596aecda00SMing Lei 
560ffde32a4SMing Lei 	dev->fds[0] = fd;
5616aecda00SMing Lei 	if (dev->tgt.ops->init_tgt)
5628842b72aSMing Lei 		ret = dev->tgt.ops->init_tgt(ctx, dev);
563ffde32a4SMing Lei 	if (ret)
5646aecda00SMing Lei 		close(dev->fds[0]);
5656aecda00SMing Lei 	return ret;
5666aecda00SMing Lei }
5676aecda00SMing Lei 
ublk_dev_unprep(struct ublk_dev * dev)5686aecda00SMing Lei static void ublk_dev_unprep(struct ublk_dev *dev)
5696aecda00SMing Lei {
5706aecda00SMing Lei 	if (dev->tgt.ops->deinit_tgt)
5716aecda00SMing Lei 		dev->tgt.ops->deinit_tgt(dev);
5726aecda00SMing Lei 	close(dev->fds[0]);
5736aecda00SMing Lei }
5746aecda00SMing Lei 
ublk_set_auto_buf_reg(const struct ublk_queue * q,struct io_uring_sqe * sqe,unsigned short tag)5756f1a182aSMing Lei static void ublk_set_auto_buf_reg(const struct ublk_queue *q,
5766f1a182aSMing Lei 				  struct io_uring_sqe *sqe,
5776f1a182aSMing Lei 				  unsigned short tag)
5788ccebc19SMing Lei {
5796f1a182aSMing Lei 	struct ublk_auto_buf_reg buf = {};
5806f1a182aSMing Lei 
5816f1a182aSMing Lei 	if (q->tgt_ops->buf_index)
5826f1a182aSMing Lei 		buf.index = q->tgt_ops->buf_index(q, tag);
5836f1a182aSMing Lei 	else
584abe54c16SUday Shankar 		buf.index = q->ios[tag].buf_index;
5856f1a182aSMing Lei 
5866f1a182aSMing Lei 	if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK)
5876f1a182aSMing Lei 		buf.flags = UBLK_AUTO_BUF_REG_FALLBACK;
5888ccebc19SMing Lei 
5898ccebc19SMing Lei 	sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf);
5908ccebc19SMing Lei }
5918ccebc19SMing Lei 
ublk_queue_io_cmd(struct ublk_io * io)592b9848ca7SUday Shankar int ublk_queue_io_cmd(struct ublk_io *io)
5936aecda00SMing Lei {
594b9848ca7SUday Shankar 	struct ublk_thread *t = io->t;
595b9848ca7SUday Shankar 	struct ublk_queue *q = ublk_io_to_queue(io);
5966aecda00SMing Lei 	struct ublksrv_io_cmd *cmd;
597f2639ed1SMing Lei 	struct io_uring_sqe *sqe[1];
5986aecda00SMing Lei 	unsigned int cmd_op = 0;
5996aecda00SMing Lei 	__u64 user_data;
6006aecda00SMing Lei 
6016aecda00SMing Lei 	/* only freed io can be issued */
6026aecda00SMing Lei 	if (!(io->flags & UBLKSRV_IO_FREE))
6036aecda00SMing Lei 		return 0;
6046aecda00SMing Lei 
605730d8379SMing Lei 	/*
606730d8379SMing Lei 	 * we issue because we need either fetching or committing or
607730d8379SMing Lei 	 * getting data
608730d8379SMing Lei 	 */
6096aecda00SMing Lei 	if (!(io->flags &
610730d8379SMing Lei 		(UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA)))
6116aecda00SMing Lei 		return 0;
6126aecda00SMing Lei 
613730d8379SMing Lei 	if (io->flags & UBLKSRV_NEED_GET_DATA)
614730d8379SMing Lei 		cmd_op = UBLK_U_IO_NEED_GET_DATA;
615730d8379SMing Lei 	else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
6166aecda00SMing Lei 		cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
6176aecda00SMing Lei 	else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
6186aecda00SMing Lei 		cmd_op = UBLK_U_IO_FETCH_REQ;
6196aecda00SMing Lei 
620b9848ca7SUday Shankar 	if (io_uring_sq_space_left(&t->ring) < 1)
621b9848ca7SUday Shankar 		io_uring_submit(&t->ring);
6226aecda00SMing Lei 
623b9848ca7SUday Shankar 	ublk_io_alloc_sqes(io, sqe, 1);
624f2639ed1SMing Lei 	if (!sqe[0]) {
625b9848ca7SUday Shankar 		ublk_err("%s: run out of sqe. thread %u, tag %d\n",
626b9848ca7SUday Shankar 				__func__, t->idx, io->tag);
6276aecda00SMing Lei 		return -1;
6286aecda00SMing Lei 	}
6296aecda00SMing Lei 
630f2639ed1SMing Lei 	cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe[0]);
6316aecda00SMing Lei 
6326aecda00SMing Lei 	if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ)
6336aecda00SMing Lei 		cmd->result = io->result;
6346aecda00SMing Lei 
6356aecda00SMing Lei 	/* These fields should be written once, never change */
636f2639ed1SMing Lei 	ublk_set_sqe_cmd_op(sqe[0], cmd_op);
637f2639ed1SMing Lei 	sqe[0]->fd		= 0;	/* dev->fds[0] */
638f2639ed1SMing Lei 	sqe[0]->opcode	= IORING_OP_URING_CMD;
639f2639ed1SMing Lei 	sqe[0]->flags	= IOSQE_FIXED_FILE;
640f2639ed1SMing Lei 	sqe[0]->rw_flags	= 0;
641b9848ca7SUday Shankar 	cmd->tag	= io->tag;
6426aecda00SMing Lei 	cmd->q_id	= q->q_id;
6436aecda00SMing Lei 	if (!(q->state & UBLKSRV_NO_BUF))
6446aecda00SMing Lei 		cmd->addr	= (__u64) (uintptr_t) io->buf_addr;
6456aecda00SMing Lei 	else
6466aecda00SMing Lei 		cmd->addr	= 0;
6476aecda00SMing Lei 
6488ccebc19SMing Lei 	if (q->state & UBLKSRV_AUTO_BUF_REG)
649b9848ca7SUday Shankar 		ublk_set_auto_buf_reg(q, sqe[0], io->tag);
6508ccebc19SMing Lei 
651b9848ca7SUday Shankar 	user_data = build_user_data(io->tag, _IOC_NR(cmd_op), 0, q->q_id, 0);
652f2639ed1SMing Lei 	io_uring_sqe_set_data64(sqe[0], user_data);
6536aecda00SMing Lei 
6546aecda00SMing Lei 	io->flags = 0;
6556aecda00SMing Lei 
656b9848ca7SUday Shankar 	t->cmd_inflight += 1;
6576aecda00SMing Lei 
658b9848ca7SUday Shankar 	ublk_dbg(UBLK_DBG_IO_CMD, "%s: (thread %u qid %d tag %u cmd_op %u) iof %x stopping %d\n",
659b9848ca7SUday Shankar 			__func__, t->idx, q->q_id, io->tag, cmd_op,
660b9848ca7SUday Shankar 			io->flags, !!(t->state & UBLKSRV_THREAD_STOPPING));
6616aecda00SMing Lei 	return 1;
6626aecda00SMing Lei }
6636aecda00SMing Lei 
ublk_submit_fetch_commands(struct ublk_thread * t)664b9848ca7SUday Shankar static void ublk_submit_fetch_commands(struct ublk_thread *t)
6656aecda00SMing Lei {
666abe54c16SUday Shankar 	struct ublk_queue *q;
667abe54c16SUday Shankar 	struct ublk_io *io;
668abe54c16SUday Shankar 	int i = 0, j = 0;
669abe54c16SUday Shankar 
670abe54c16SUday Shankar 	if (t->dev->per_io_tasks) {
671b9848ca7SUday Shankar 		/*
672abe54c16SUday Shankar 		 * Lexicographically order all the (qid,tag) pairs, with
673abe54c16SUday Shankar 		 * qid taking priority (so (1,0) > (0,1)). Then make
674abe54c16SUday Shankar 		 * this thread the daemon for every Nth entry in this
675abe54c16SUday Shankar 		 * list (N is the number of threads), starting at this
676abe54c16SUday Shankar 		 * thread's index. This ensures that each queue is
677abe54c16SUday Shankar 		 * handled by as many ublk server threads as possible,
678abe54c16SUday Shankar 		 * so that load that is concentrated on one or a few
679abe54c16SUday Shankar 		 * queues can make use of all ublk server threads.
680abe54c16SUday Shankar 		 */
681abe54c16SUday Shankar 		const struct ublksrv_ctrl_dev_info *dinfo = &t->dev->dev_info;
682abe54c16SUday Shankar 		int nr_ios = dinfo->nr_hw_queues * dinfo->queue_depth;
683abe54c16SUday Shankar 		for (i = t->idx; i < nr_ios; i += t->dev->nthreads) {
684abe54c16SUday Shankar 			int q_id = i / dinfo->queue_depth;
685abe54c16SUday Shankar 			int tag = i % dinfo->queue_depth;
686abe54c16SUday Shankar 			q = &t->dev->q[q_id];
687abe54c16SUday Shankar 			io = &q->ios[tag];
688abe54c16SUday Shankar 			io->t = t;
689abe54c16SUday Shankar 			io->buf_index = j++;
690abe54c16SUday Shankar 			ublk_queue_io_cmd(io);
691abe54c16SUday Shankar 		}
692abe54c16SUday Shankar 	} else {
693abe54c16SUday Shankar 		/*
694abe54c16SUday Shankar 		 * Service exclusively the queue whose q_id matches our
695abe54c16SUday Shankar 		 * thread index.
696b9848ca7SUday Shankar 		 */
697b9848ca7SUday Shankar 		struct ublk_queue *q = &t->dev->q[t->idx];
698b9848ca7SUday Shankar 		for (i = 0; i < q->q_depth; i++) {
699b9848ca7SUday Shankar 			io = &q->ios[i];
700b9848ca7SUday Shankar 			io->t = t;
701abe54c16SUday Shankar 			io->buf_index = i;
702b9848ca7SUday Shankar 			ublk_queue_io_cmd(io);
703b9848ca7SUday Shankar 		}
7046aecda00SMing Lei 	}
705abe54c16SUday Shankar }
7066aecda00SMing Lei 
ublk_thread_is_idle(struct ublk_thread * t)707b9848ca7SUday Shankar static int ublk_thread_is_idle(struct ublk_thread *t)
7086aecda00SMing Lei {
709b9848ca7SUday Shankar 	return !io_uring_sq_ready(&t->ring) && !t->io_inflight;
7106aecda00SMing Lei }
7116aecda00SMing Lei 
ublk_thread_is_done(struct ublk_thread * t)712b9848ca7SUday Shankar static int ublk_thread_is_done(struct ublk_thread *t)
7136aecda00SMing Lei {
714b9848ca7SUday Shankar 	return (t->state & UBLKSRV_THREAD_STOPPING) && ublk_thread_is_idle(t);
7156aecda00SMing Lei }
7166aecda00SMing Lei 
ublksrv_handle_tgt_cqe(struct ublk_queue * q,struct io_uring_cqe * cqe)7176aecda00SMing Lei static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
7186aecda00SMing Lei 		struct io_uring_cqe *cqe)
7196aecda00SMing Lei {
7206aecda00SMing Lei 	unsigned tag = user_data_to_tag(cqe->user_data);
7216aecda00SMing Lei 
7226aecda00SMing Lei 	if (cqe->res < 0 && cqe->res != -EAGAIN)
7236aecda00SMing Lei 		ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n",
7246aecda00SMing Lei 			__func__, cqe->res, q->q_id,
7256aecda00SMing Lei 			user_data_to_tag(cqe->user_data),
7266aecda00SMing Lei 			user_data_to_op(cqe->user_data));
7276aecda00SMing Lei 
7286aecda00SMing Lei 	if (q->tgt_ops->tgt_io_done)
7296aecda00SMing Lei 		q->tgt_ops->tgt_io_done(q, tag, cqe);
7306aecda00SMing Lei }
7316aecda00SMing Lei 
ublk_handle_cqe(struct ublk_thread * t,struct io_uring_cqe * cqe,void * data)732b9848ca7SUday Shankar static void ublk_handle_cqe(struct ublk_thread *t,
7336aecda00SMing Lei 		struct io_uring_cqe *cqe, void *data)
7346aecda00SMing Lei {
735b9848ca7SUday Shankar 	struct ublk_dev *dev = t->dev;
736bf098d72SUday Shankar 	unsigned q_id = user_data_to_q_id(cqe->user_data);
737bf098d72SUday Shankar 	struct ublk_queue *q = &dev->q[q_id];
7386aecda00SMing Lei 	unsigned tag = user_data_to_tag(cqe->user_data);
7396aecda00SMing Lei 	unsigned cmd_op = user_data_to_op(cqe->user_data);
7406aecda00SMing Lei 	int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
741b9848ca7SUday Shankar 		!(t->state & UBLKSRV_THREAD_STOPPING);
7426aecda00SMing Lei 	struct ublk_io *io;
7436aecda00SMing Lei 
7446aecda00SMing Lei 	if (cqe->res < 0 && cqe->res != -ENODEV)
7456aecda00SMing Lei 		ublk_err("%s: res %d userdata %llx queue state %x\n", __func__,
7466aecda00SMing Lei 				cqe->res, cqe->user_data, q->state);
7476aecda00SMing Lei 
748bedc9cbcSMing Lei 	ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d/%d) stopping %d\n",
7496aecda00SMing Lei 			__func__, cqe->res, q->q_id, tag, cmd_op,
7506aecda00SMing Lei 			is_target_io(cqe->user_data),
751bedc9cbcSMing Lei 			user_data_to_tgt_data(cqe->user_data),
752b9848ca7SUday Shankar 			(t->state & UBLKSRV_THREAD_STOPPING));
7536aecda00SMing Lei 
7546aecda00SMing Lei 	/* Don't retrieve io in case of target io */
7556aecda00SMing Lei 	if (is_target_io(cqe->user_data)) {
7566aecda00SMing Lei 		ublksrv_handle_tgt_cqe(q, cqe);
7576aecda00SMing Lei 		return;
7586aecda00SMing Lei 	}
7596aecda00SMing Lei 
7606aecda00SMing Lei 	io = &q->ios[tag];
761b9848ca7SUday Shankar 	t->cmd_inflight--;
7626aecda00SMing Lei 
7636aecda00SMing Lei 	if (!fetch) {
764b9848ca7SUday Shankar 		t->state |= UBLKSRV_THREAD_STOPPING;
7656aecda00SMing Lei 		io->flags &= ~UBLKSRV_NEED_FETCH_RQ;
7666aecda00SMing Lei 	}
7676aecda00SMing Lei 
7686aecda00SMing Lei 	if (cqe->res == UBLK_IO_RES_OK) {
7696aecda00SMing Lei 		assert(tag < q->q_depth);
7706aecda00SMing Lei 		if (q->tgt_ops->queue_io)
7716aecda00SMing Lei 			q->tgt_ops->queue_io(q, tag);
772730d8379SMing Lei 	} else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) {
773730d8379SMing Lei 		io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE;
774b9848ca7SUday Shankar 		ublk_queue_io_cmd(io);
7756aecda00SMing Lei 	} else {
7766aecda00SMing Lei 		/*
7776aecda00SMing Lei 		 * COMMIT_REQ will be completed immediately since no fetching
7786aecda00SMing Lei 		 * piggyback is required.
7796aecda00SMing Lei 		 *
7806aecda00SMing Lei 		 * Marking IO_FREE only, then this io won't be issued since
7816aecda00SMing Lei 		 * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*)
7826aecda00SMing Lei 		 *
7836aecda00SMing Lei 		 * */
7846aecda00SMing Lei 		io->flags = UBLKSRV_IO_FREE;
7856aecda00SMing Lei 	}
7866aecda00SMing Lei }
7876aecda00SMing Lei 
ublk_reap_events_uring(struct ublk_thread * t)788b9848ca7SUday Shankar static int ublk_reap_events_uring(struct ublk_thread *t)
7896aecda00SMing Lei {
7906aecda00SMing Lei 	struct io_uring_cqe *cqe;
7916aecda00SMing Lei 	unsigned head;
7926aecda00SMing Lei 	int count = 0;
7936aecda00SMing Lei 
794b9848ca7SUday Shankar 	io_uring_for_each_cqe(&t->ring, head, cqe) {
795b9848ca7SUday Shankar 		ublk_handle_cqe(t, cqe, NULL);
7966aecda00SMing Lei 		count += 1;
7976aecda00SMing Lei 	}
798b9848ca7SUday Shankar 	io_uring_cq_advance(&t->ring, count);
7996aecda00SMing Lei 
8006aecda00SMing Lei 	return count;
8016aecda00SMing Lei }
8026aecda00SMing Lei 
ublk_process_io(struct ublk_thread * t)803b9848ca7SUday Shankar static int ublk_process_io(struct ublk_thread *t)
8046aecda00SMing Lei {
8056aecda00SMing Lei 	int ret, reapped;
8066aecda00SMing Lei 
807b9848ca7SUday Shankar 	ublk_dbg(UBLK_DBG_THREAD, "dev%d-t%u: to_submit %d inflight cmd %u stopping %d\n",
808b9848ca7SUday Shankar 				t->dev->dev_info.dev_id,
809b9848ca7SUday Shankar 				t->idx, io_uring_sq_ready(&t->ring),
810b9848ca7SUday Shankar 				t->cmd_inflight,
811b9848ca7SUday Shankar 				(t->state & UBLKSRV_THREAD_STOPPING));
8126aecda00SMing Lei 
813b9848ca7SUday Shankar 	if (ublk_thread_is_done(t))
8146aecda00SMing Lei 		return -ENODEV;
8156aecda00SMing Lei 
816b9848ca7SUday Shankar 	ret = io_uring_submit_and_wait(&t->ring, 1);
817b9848ca7SUday Shankar 	reapped = ublk_reap_events_uring(t);
8186aecda00SMing Lei 
819b9848ca7SUday Shankar 	ublk_dbg(UBLK_DBG_THREAD, "submit result %d, reapped %d stop %d idle %d\n",
820b9848ca7SUday Shankar 			ret, reapped, (t->state & UBLKSRV_THREAD_STOPPING),
821b9848ca7SUday Shankar 			(t->state & UBLKSRV_THREAD_IDLE));
8226aecda00SMing Lei 
8236aecda00SMing Lei 	return reapped;
8246aecda00SMing Lei }
8256aecda00SMing Lei 
ublk_thread_set_sched_affinity(const struct ublk_thread * t,cpu_set_t * cpuset)826b9848ca7SUday Shankar static void ublk_thread_set_sched_affinity(const struct ublk_thread *t,
8272f0a692aSMing Lei 		cpu_set_t *cpuset)
8282f0a692aSMing Lei {
8292f0a692aSMing Lei         if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0)
830b9848ca7SUday Shankar 		ublk_err("ublk dev %u thread %u set affinity failed",
831b9848ca7SUday Shankar 				t->dev->dev_info.dev_id, t->idx);
8322f0a692aSMing Lei }
8332f0a692aSMing Lei 
834b9848ca7SUday Shankar struct ublk_thread_info {
835b9848ca7SUday Shankar 	struct ublk_dev 	*dev;
836b9848ca7SUday Shankar 	unsigned		idx;
837b9848ca7SUday Shankar 	sem_t 			*ready;
8382f0a692aSMing Lei 	cpu_set_t 		*affinity;
8392f0a692aSMing Lei };
8402f0a692aSMing Lei 
ublk_io_handler_fn(void * data)8416aecda00SMing Lei static void *ublk_io_handler_fn(void *data)
8426aecda00SMing Lei {
843b9848ca7SUday Shankar 	struct ublk_thread_info *info = data;
844b9848ca7SUday Shankar 	struct ublk_thread *t = &info->dev->threads[info->idx];
845b9848ca7SUday Shankar 	int dev_id = info->dev->dev_info.dev_id;
8466aecda00SMing Lei 	int ret;
8476aecda00SMing Lei 
848b9848ca7SUday Shankar 	t->dev = info->dev;
849b9848ca7SUday Shankar 	t->idx = info->idx;
850b9848ca7SUday Shankar 
851b9848ca7SUday Shankar 	ret = ublk_thread_init(t);
8526aecda00SMing Lei 	if (ret) {
853b9848ca7SUday Shankar 		ublk_err("ublk dev %d thread %u init failed\n",
854b9848ca7SUday Shankar 				dev_id, t->idx);
8556aecda00SMing Lei 		return NULL;
8566aecda00SMing Lei 	}
8572f0a692aSMing Lei 	/* IO perf is sensitive with queue pthread affinity on NUMA machine*/
858abe54c16SUday Shankar 	if (info->affinity)
859b9848ca7SUday Shankar 		ublk_thread_set_sched_affinity(t, info->affinity);
860b9848ca7SUday Shankar 	sem_post(info->ready);
8612f0a692aSMing Lei 
862b9848ca7SUday Shankar 	ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %u started\n",
863b9848ca7SUday Shankar 			gettid(), dev_id, t->idx);
8646aecda00SMing Lei 
8656aecda00SMing Lei 	/* submit all io commands to ublk driver */
866b9848ca7SUday Shankar 	ublk_submit_fetch_commands(t);
8676aecda00SMing Lei 	do {
868b9848ca7SUday Shankar 		if (ublk_process_io(t) < 0)
8696aecda00SMing Lei 			break;
8706aecda00SMing Lei 	} while (1);
8716aecda00SMing Lei 
872b9848ca7SUday Shankar 	ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %d exiting\n",
873b9848ca7SUday Shankar 		 gettid(), dev_id, t->idx);
874b9848ca7SUday Shankar 	ublk_thread_deinit(t);
8756aecda00SMing Lei 	return NULL;
8766aecda00SMing Lei }
8776aecda00SMing Lei 
ublk_set_parameters(struct ublk_dev * dev)8786aecda00SMing Lei static void ublk_set_parameters(struct ublk_dev *dev)
8796aecda00SMing Lei {
8806aecda00SMing Lei 	int ret;
8816aecda00SMing Lei 
8826aecda00SMing Lei 	ret = ublk_ctrl_set_params(dev, &dev->tgt.params);
8836aecda00SMing Lei 	if (ret)
8846aecda00SMing Lei 		ublk_err("dev %d set basic parameter failed %d\n",
8856aecda00SMing Lei 				dev->dev_info.dev_id, ret);
8866aecda00SMing Lei }
8876aecda00SMing Lei 
ublk_send_dev_event(const struct dev_ctx * ctx,struct ublk_dev * dev,int dev_id)8882f0a692aSMing Lei static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev, int dev_id)
8896aecda00SMing Lei {
8906aecda00SMing Lei 	uint64_t id;
8916aecda00SMing Lei 	int evtfd = ctx->_evtfd;
8926aecda00SMing Lei 
8936aecda00SMing Lei 	if (evtfd < 0)
8946aecda00SMing Lei 		return -EBADF;
8956aecda00SMing Lei 
8966aecda00SMing Lei 	if (dev_id >= 0)
8976aecda00SMing Lei 		id = dev_id + 1;
8986aecda00SMing Lei 	else
8996aecda00SMing Lei 		id = ERROR_EVTFD_DEVID;
9006aecda00SMing Lei 
9012f0a692aSMing Lei 	if (dev && ctx->shadow_dev)
9022f0a692aSMing Lei 		memcpy(&ctx->shadow_dev->q, &dev->q, sizeof(dev->q));
9032f0a692aSMing Lei 
9046aecda00SMing Lei 	if (write(evtfd, &id, sizeof(id)) != sizeof(id))
9056aecda00SMing Lei 		return -EINVAL;
9066aecda00SMing Lei 
907573840abSMing Lei 	close(evtfd);
9082f0a692aSMing Lei 	shmdt(ctx->shadow_dev);
909573840abSMing Lei 
9106aecda00SMing Lei 	return 0;
9116aecda00SMing Lei }
9126aecda00SMing Lei 
9136aecda00SMing Lei 
ublk_start_daemon(const struct dev_ctx * ctx,struct ublk_dev * dev)9146aecda00SMing Lei static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
9156aecda00SMing Lei {
9166aecda00SMing Lei 	const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
917b9848ca7SUday Shankar 	struct ublk_thread_info *tinfo;
9188f75ba28SUday Shankar 	unsigned extra_flags = 0;
9192f0a692aSMing Lei 	cpu_set_t *affinity_buf;
9202f0a692aSMing Lei 	void *thread_ret;
921b9848ca7SUday Shankar 	sem_t ready;
9222f0a692aSMing Lei 	int ret, i;
9236aecda00SMing Lei 
9246aecda00SMing Lei 	ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
9256aecda00SMing Lei 
926abe54c16SUday Shankar 	tinfo = calloc(sizeof(struct ublk_thread_info), dev->nthreads);
927b9848ca7SUday Shankar 	if (!tinfo)
9282f0a692aSMing Lei 		return -ENOMEM;
9292f0a692aSMing Lei 
930b9848ca7SUday Shankar 	sem_init(&ready, 0, 0);
9318842b72aSMing Lei 	ret = ublk_dev_prep(ctx, dev);
9326aecda00SMing Lei 	if (ret)
9336aecda00SMing Lei 		return ret;
9346aecda00SMing Lei 
9352f0a692aSMing Lei 	ret = ublk_ctrl_get_affinity(dev, &affinity_buf);
9362f0a692aSMing Lei 	if (ret)
9372f0a692aSMing Lei 		return ret;
9382f0a692aSMing Lei 
9398f75ba28SUday Shankar 	if (ctx->auto_zc_fallback)
9408f75ba28SUday Shankar 		extra_flags = UBLKSRV_AUTO_BUF_REG_FALLBACK;
9418f75ba28SUday Shankar 
9426aecda00SMing Lei 	for (i = 0; i < dinfo->nr_hw_queues; i++) {
9436aecda00SMing Lei 		dev->q[i].dev = dev;
9446aecda00SMing Lei 		dev->q[i].q_id = i;
9452f0a692aSMing Lei 
9468f75ba28SUday Shankar 		ret = ublk_queue_init(&dev->q[i], extra_flags);
9478f75ba28SUday Shankar 		if (ret) {
9488f75ba28SUday Shankar 			ublk_err("ublk dev %d queue %d init queue failed\n",
9498f75ba28SUday Shankar 				 dinfo->dev_id, i);
9508f75ba28SUday Shankar 			goto fail;
9518f75ba28SUday Shankar 		}
952abe54c16SUday Shankar 	}
9538f75ba28SUday Shankar 
954abe54c16SUday Shankar 	for (i = 0; i < dev->nthreads; i++) {
955b9848ca7SUday Shankar 		tinfo[i].dev = dev;
956b9848ca7SUday Shankar 		tinfo[i].idx = i;
957b9848ca7SUday Shankar 		tinfo[i].ready = &ready;
958abe54c16SUday Shankar 
959abe54c16SUday Shankar 		/*
960abe54c16SUday Shankar 		 * If threads are not tied 1:1 to queues, setting thread
961abe54c16SUday Shankar 		 * affinity based on queue affinity makes little sense.
962abe54c16SUday Shankar 		 * However, thread CPU affinity has significant impact
963abe54c16SUday Shankar 		 * on performance, so to compare fairly, we'll still set
964abe54c16SUday Shankar 		 * thread CPU affinity based on queue affinity where
965abe54c16SUday Shankar 		 * possible.
966abe54c16SUday Shankar 		 */
967abe54c16SUday Shankar 		if (dev->nthreads == dinfo->nr_hw_queues)
968b9848ca7SUday Shankar 			tinfo[i].affinity = &affinity_buf[i];
969b9848ca7SUday Shankar 		pthread_create(&dev->threads[i].thread, NULL,
9706aecda00SMing Lei 				ublk_io_handler_fn,
971b9848ca7SUday Shankar 				&tinfo[i]);
9726aecda00SMing Lei 	}
9736aecda00SMing Lei 
974abe54c16SUday Shankar 	for (i = 0; i < dev->nthreads; i++)
975b9848ca7SUday Shankar 		sem_wait(&ready);
976b9848ca7SUday Shankar 	free(tinfo);
9772f0a692aSMing Lei 	free(affinity_buf);
9782f0a692aSMing Lei 
9796aecda00SMing Lei 	/* everything is fine now, start us */
98057e13a2eSMing Lei 	if (ctx->recovery)
98157e13a2eSMing Lei 		ret = ublk_ctrl_end_user_recovery(dev, getpid());
98257e13a2eSMing Lei 	else {
9836aecda00SMing Lei 		ublk_set_parameters(dev);
9846aecda00SMing Lei 		ret = ublk_ctrl_start_dev(dev, getpid());
98557e13a2eSMing Lei 	}
9866aecda00SMing Lei 	if (ret < 0) {
9876aecda00SMing Lei 		ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
9886aecda00SMing Lei 		goto fail;
9896aecda00SMing Lei 	}
9906aecda00SMing Lei 
9916aecda00SMing Lei 	ublk_ctrl_get_info(dev);
9922ecdcdfeSMing Lei 	if (ctx->fg)
9932ecdcdfeSMing Lei 		ublk_ctrl_dump(dev);
9942ecdcdfeSMing Lei 	else
9952f0a692aSMing Lei 		ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id);
9966aecda00SMing Lei 
9976aecda00SMing Lei 	/* wait until we are terminated */
998abe54c16SUday Shankar 	for (i = 0; i < dev->nthreads; i++)
999b9848ca7SUday Shankar 		pthread_join(dev->threads[i].thread, &thread_ret);
10006aecda00SMing Lei  fail:
10018f75ba28SUday Shankar 	for (i = 0; i < dinfo->nr_hw_queues; i++)
10028f75ba28SUday Shankar 		ublk_queue_deinit(&dev->q[i]);
10036aecda00SMing Lei 	ublk_dev_unprep(dev);
10046aecda00SMing Lei 	ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__);
10056aecda00SMing Lei 
10066aecda00SMing Lei 	return ret;
10076aecda00SMing Lei }
10086aecda00SMing Lei 
wait_ublk_dev(const char * path,int evt_mask,unsigned timeout)10099894e0eaSMing Lei static int wait_ublk_dev(const char *path, int evt_mask, unsigned timeout)
10106aecda00SMing Lei {
10116aecda00SMing Lei #define EV_SIZE (sizeof(struct inotify_event))
10126aecda00SMing Lei #define EV_BUF_LEN (128 * (EV_SIZE + 16))
10136aecda00SMing Lei 	struct pollfd pfd;
10146aecda00SMing Lei 	int fd, wd;
10156aecda00SMing Lei 	int ret = -EINVAL;
10169894e0eaSMing Lei 	const char *dev_name = basename(path);
10176aecda00SMing Lei 
10186aecda00SMing Lei 	fd = inotify_init();
10196aecda00SMing Lei 	if (fd < 0) {
10206aecda00SMing Lei 		ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__);
10216aecda00SMing Lei 		return fd;
10226aecda00SMing Lei 	}
10236aecda00SMing Lei 
10246aecda00SMing Lei 	wd = inotify_add_watch(fd, "/dev", evt_mask);
10256aecda00SMing Lei 	if (wd == -1) {
10266aecda00SMing Lei 		ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__);
10276aecda00SMing Lei 		goto fail;
10286aecda00SMing Lei 	}
10296aecda00SMing Lei 
10306aecda00SMing Lei 	pfd.fd = fd;
10316aecda00SMing Lei 	pfd.events = POLL_IN;
10326aecda00SMing Lei 	while (1) {
10336aecda00SMing Lei 		int i = 0;
10346aecda00SMing Lei 		char buffer[EV_BUF_LEN];
10356aecda00SMing Lei 		ret = poll(&pfd, 1, 1000 * timeout);
10366aecda00SMing Lei 
10376aecda00SMing Lei 		if (ret == -1) {
10386aecda00SMing Lei 			ublk_err("%s: poll inotify failed: %d\n", __func__, ret);
10396aecda00SMing Lei 			goto rm_watch;
10406aecda00SMing Lei 		} else if (ret == 0) {
10416aecda00SMing Lei 			ublk_err("%s: poll inotify timeout\n", __func__);
10426aecda00SMing Lei 			ret = -ETIMEDOUT;
10436aecda00SMing Lei 			goto rm_watch;
10446aecda00SMing Lei 		}
10456aecda00SMing Lei 
10466aecda00SMing Lei 		ret = read(fd, buffer, EV_BUF_LEN);
10476aecda00SMing Lei 		if (ret < 0) {
10486aecda00SMing Lei 			ublk_err("%s: read inotify fd failed\n", __func__);
10496aecda00SMing Lei 			goto rm_watch;
10506aecda00SMing Lei 		}
10516aecda00SMing Lei 
10526aecda00SMing Lei 		while (i < ret) {
10536aecda00SMing Lei 			struct inotify_event *event = (struct inotify_event *)&buffer[i];
10546aecda00SMing Lei 
10556aecda00SMing Lei 			ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n",
10566aecda00SMing Lei 					__func__, event->mask, event->name);
10576aecda00SMing Lei 			if (event->mask & evt_mask) {
10586aecda00SMing Lei 				if (!strcmp(event->name, dev_name)) {
10596aecda00SMing Lei 					ret = 0;
10606aecda00SMing Lei 					goto rm_watch;
10616aecda00SMing Lei 				}
10626aecda00SMing Lei 			}
10636aecda00SMing Lei 			i += EV_SIZE + event->len;
10646aecda00SMing Lei 		}
10656aecda00SMing Lei 	}
10666aecda00SMing Lei rm_watch:
10676aecda00SMing Lei 	inotify_rm_watch(fd, wd);
10686aecda00SMing Lei fail:
10696aecda00SMing Lei 	close(fd);
10706aecda00SMing Lei 	return ret;
10716aecda00SMing Lei }
10726aecda00SMing Lei 
ublk_stop_io_daemon(const struct ublk_dev * dev)10736aecda00SMing Lei static int ublk_stop_io_daemon(const struct ublk_dev *dev)
10746aecda00SMing Lei {
10756aecda00SMing Lei 	int daemon_pid = dev->dev_info.ublksrv_pid;
10766aecda00SMing Lei 	int dev_id = dev->dev_info.dev_id;
10776aecda00SMing Lei 	char ublkc[64];
10786aecda00SMing Lei 	int ret = 0;
10796aecda00SMing Lei 
10809894e0eaSMing Lei 	if (daemon_pid < 0)
10819894e0eaSMing Lei 		return 0;
10829894e0eaSMing Lei 
10836aecda00SMing Lei 	/* daemon may be dead already */
10846aecda00SMing Lei 	if (kill(daemon_pid, 0) < 0)
10856aecda00SMing Lei 		goto wait;
10866aecda00SMing Lei 
10879894e0eaSMing Lei 	snprintf(ublkc, sizeof(ublkc), "/dev/%s%d", "ublkc", dev_id);
10889894e0eaSMing Lei 
10899894e0eaSMing Lei 	/* ublk char device may be gone already */
10909894e0eaSMing Lei 	if (access(ublkc, F_OK) != 0)
10919894e0eaSMing Lei 		goto wait;
10929894e0eaSMing Lei 
10939894e0eaSMing Lei 	/* Wait until ublk char device is closed, when the daemon is shutdown */
10949894e0eaSMing Lei 	ret = wait_ublk_dev(ublkc, IN_CLOSE, 10);
10959894e0eaSMing Lei 	/* double check and since it may be closed before starting inotify */
10966aecda00SMing Lei 	if (ret == -ETIMEDOUT)
10976aecda00SMing Lei 		ret = kill(daemon_pid, 0) < 0;
10986aecda00SMing Lei wait:
10996aecda00SMing Lei 	waitpid(daemon_pid, NULL, 0);
11006aecda00SMing Lei 	ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n",
11016aecda00SMing Lei 			__func__, daemon_pid, dev_id, ret);
11026aecda00SMing Lei 
11036aecda00SMing Lei 	return ret;
11046aecda00SMing Lei }
11056aecda00SMing Lei 
__cmd_dev_add(const struct dev_ctx * ctx)11066aecda00SMing Lei static int __cmd_dev_add(const struct dev_ctx *ctx)
11076aecda00SMing Lei {
1108abe54c16SUday Shankar 	unsigned nthreads = ctx->nthreads;
11096aecda00SMing Lei 	unsigned nr_queues = ctx->nr_hw_queues;
11106aecda00SMing Lei 	const char *tgt_type = ctx->tgt_type;
11116aecda00SMing Lei 	unsigned depth = ctx->queue_depth;
11126aecda00SMing Lei 	__u64 features;
11136aecda00SMing Lei 	const struct ublk_tgt_ops *ops;
11146aecda00SMing Lei 	struct ublksrv_ctrl_dev_info *info;
1115*a2f4c1aeSUday Shankar 	struct ublk_dev *dev = NULL;
11166aecda00SMing Lei 	int dev_id = ctx->dev_id;
11175d95bfb5SMing Lei 	int ret, i;
11186aecda00SMing Lei 
11196aecda00SMing Lei 	ops = ublk_find_tgt(tgt_type);
11206aecda00SMing Lei 	if (!ops) {
11216aecda00SMing Lei 		ublk_err("%s: no such tgt type, type %s\n",
11226aecda00SMing Lei 				__func__, tgt_type);
1123*a2f4c1aeSUday Shankar 		ret = -ENODEV;
1124*a2f4c1aeSUday Shankar 		goto fail;
11256aecda00SMing Lei 	}
11266aecda00SMing Lei 
11276aecda00SMing Lei 	if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) {
11286aecda00SMing Lei 		ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n",
11296aecda00SMing Lei 				__func__, nr_queues, depth);
1130*a2f4c1aeSUday Shankar 		ret = -EINVAL;
1131*a2f4c1aeSUday Shankar 		goto fail;
11326aecda00SMing Lei 	}
11336aecda00SMing Lei 
1134abe54c16SUday Shankar 	/* default to 1:1 threads:queues if nthreads is unspecified */
1135abe54c16SUday Shankar 	if (!nthreads)
1136abe54c16SUday Shankar 		nthreads = nr_queues;
1137abe54c16SUday Shankar 
1138abe54c16SUday Shankar 	if (nthreads > UBLK_MAX_THREADS) {
1139abe54c16SUday Shankar 		ublk_err("%s: %u is too many threads (max %u)\n",
1140abe54c16SUday Shankar 				__func__, nthreads, UBLK_MAX_THREADS);
1141*a2f4c1aeSUday Shankar 		ret = -EINVAL;
1142*a2f4c1aeSUday Shankar 		goto fail;
1143abe54c16SUday Shankar 	}
1144abe54c16SUday Shankar 
1145abe54c16SUday Shankar 	if (nthreads != nr_queues && !ctx->per_io_tasks) {
1146abe54c16SUday Shankar 		ublk_err("%s: threads %u must be same as queues %u if "
1147abe54c16SUday Shankar 			"not using per_io_tasks\n",
1148abe54c16SUday Shankar 			__func__, nthreads, nr_queues);
1149*a2f4c1aeSUday Shankar 		ret = -EINVAL;
1150*a2f4c1aeSUday Shankar 		goto fail;
1151abe54c16SUday Shankar 	}
1152abe54c16SUday Shankar 
11536aecda00SMing Lei 	dev = ublk_ctrl_init();
11546aecda00SMing Lei 	if (!dev) {
11556aecda00SMing Lei 		ublk_err("%s: can't alloc dev id %d, type %s\n",
11566aecda00SMing Lei 				__func__, dev_id, tgt_type);
1157*a2f4c1aeSUday Shankar 		ret = -ENOMEM;
1158*a2f4c1aeSUday Shankar 		goto fail;
11596aecda00SMing Lei 	}
11606aecda00SMing Lei 
11616aecda00SMing Lei 	/* kernel doesn't support get_features */
11626aecda00SMing Lei 	ret = ublk_ctrl_get_features(dev, &features);
1163*a2f4c1aeSUday Shankar 	if (ret < 0) {
1164*a2f4c1aeSUday Shankar 		ret = -EINVAL;
1165*a2f4c1aeSUday Shankar 		goto fail;
1166*a2f4c1aeSUday Shankar 	}
11676aecda00SMing Lei 
1168*a2f4c1aeSUday Shankar 	if (!(features & UBLK_F_CMD_IOCTL_ENCODE)) {
1169*a2f4c1aeSUday Shankar 		ret = -ENOTSUP;
1170*a2f4c1aeSUday Shankar 		goto fail;
1171*a2f4c1aeSUday Shankar 	}
11726aecda00SMing Lei 
11736aecda00SMing Lei 	info = &dev->dev_info;
11746aecda00SMing Lei 	info->dev_id = ctx->dev_id;
11756aecda00SMing Lei 	info->nr_hw_queues = nr_queues;
11766aecda00SMing Lei 	info->queue_depth = depth;
11776aecda00SMing Lei 	info->flags = ctx->flags;
1178533c87e2SMing Lei 	if ((features & UBLK_F_QUIESCE) &&
1179533c87e2SMing Lei 			(info->flags & UBLK_F_USER_RECOVERY))
1180533c87e2SMing Lei 		info->flags |= UBLK_F_QUIESCE;
1181abe54c16SUday Shankar 	dev->nthreads = nthreads;
1182abe54c16SUday Shankar 	dev->per_io_tasks = ctx->per_io_tasks;
11836aecda00SMing Lei 	dev->tgt.ops = ops;
11846aecda00SMing Lei 	dev->tgt.sq_depth = depth;
11856aecda00SMing Lei 	dev->tgt.cq_depth = depth;
11866aecda00SMing Lei 
11875d95bfb5SMing Lei 	for (i = 0; i < MAX_BACK_FILES; i++) {
11885d95bfb5SMing Lei 		if (ctx->files[i]) {
11895d95bfb5SMing Lei 			strcpy(dev->tgt.backing_file[i], ctx->files[i]);
11905d95bfb5SMing Lei 			dev->tgt.nr_backing_files++;
11915d95bfb5SMing Lei 		}
11925d95bfb5SMing Lei 	}
11935d95bfb5SMing Lei 
119457e13a2eSMing Lei 	if (ctx->recovery)
119557e13a2eSMing Lei 		ret = ublk_ctrl_start_user_recovery(dev);
119657e13a2eSMing Lei 	else
11976aecda00SMing Lei 		ret = ublk_ctrl_add_dev(dev);
11986aecda00SMing Lei 	if (ret < 0) {
11996aecda00SMing Lei 		ublk_err("%s: can't add dev id %d, type %s ret %d\n",
12006aecda00SMing Lei 				__func__, dev_id, tgt_type, ret);
12016aecda00SMing Lei 		goto fail;
12026aecda00SMing Lei 	}
12036aecda00SMing Lei 
12046aecda00SMing Lei 	ret = ublk_start_daemon(ctx, dev);
12056aecda00SMing Lei 	ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\b", ret);
1206ffde32a4SMing Lei 	if (ret < 0)
1207ffde32a4SMing Lei 		ublk_ctrl_del_dev(dev);
12086aecda00SMing Lei 
12096aecda00SMing Lei fail:
12106aecda00SMing Lei 	if (ret < 0)
12112f0a692aSMing Lei 		ublk_send_dev_event(ctx, dev, -1);
1212*a2f4c1aeSUday Shankar 	if (dev)
12136aecda00SMing Lei 		ublk_ctrl_deinit(dev);
12146aecda00SMing Lei 	return ret;
12156aecda00SMing Lei }
12166aecda00SMing Lei 
12176aecda00SMing Lei static int __cmd_dev_list(struct dev_ctx *ctx);
12186aecda00SMing Lei 
cmd_dev_add(struct dev_ctx * ctx)12196aecda00SMing Lei static int cmd_dev_add(struct dev_ctx *ctx)
12206aecda00SMing Lei {
12216aecda00SMing Lei 	int res;
12226aecda00SMing Lei 
12232ecdcdfeSMing Lei 	if (ctx->fg)
12242ecdcdfeSMing Lei 		goto run;
12252ecdcdfeSMing Lei 
12262f0a692aSMing Lei 	ctx->_shmid = shmget(IPC_PRIVATE, sizeof(struct ublk_dev), IPC_CREAT | 0666);
12272f0a692aSMing Lei 	if (ctx->_shmid < 0) {
12282f0a692aSMing Lei 		ublk_err("%s: failed to shmget %s\n", __func__, strerror(errno));
12292f0a692aSMing Lei 		exit(-1);
12302f0a692aSMing Lei 	}
12312f0a692aSMing Lei 	ctx->shadow_dev = (struct ublk_dev *)shmat(ctx->_shmid, NULL, 0);
12322f0a692aSMing Lei 	if (ctx->shadow_dev == (struct ublk_dev *)-1) {
12332f0a692aSMing Lei 		ublk_err("%s: failed to shmat %s\n", __func__, strerror(errno));
12342f0a692aSMing Lei 		exit(-1);
12352f0a692aSMing Lei 	}
12366aecda00SMing Lei 	ctx->_evtfd = eventfd(0, 0);
12376aecda00SMing Lei 	if (ctx->_evtfd < 0) {
12386aecda00SMing Lei 		ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno));
12396aecda00SMing Lei 		exit(-1);
12406aecda00SMing Lei 	}
12416aecda00SMing Lei 
12426aecda00SMing Lei 	res = fork();
12436aecda00SMing Lei 	if (res == 0) {
1244573840abSMing Lei 		int res2;
1245573840abSMing Lei 
1246573840abSMing Lei 		setsid();
1247573840abSMing Lei 		res2 = fork();
1248573840abSMing Lei 		if (res2 == 0) {
1249573840abSMing Lei 			/* prepare for detaching */
1250573840abSMing Lei 			close(STDIN_FILENO);
1251573840abSMing Lei 			close(STDOUT_FILENO);
1252573840abSMing Lei 			close(STDERR_FILENO);
12532ecdcdfeSMing Lei run:
12542ecdcdfeSMing Lei 			res = __cmd_dev_add(ctx);
12552ecdcdfeSMing Lei 			return res;
1256573840abSMing Lei 		} else {
1257573840abSMing Lei 			/* detached from the foreground task */
1258573840abSMing Lei 			exit(EXIT_SUCCESS);
1259573840abSMing Lei 		}
12606aecda00SMing Lei 	} else if (res > 0) {
12616aecda00SMing Lei 		uint64_t id;
1262573840abSMing Lei 		int exit_code = EXIT_FAILURE;
12636aecda00SMing Lei 
12646aecda00SMing Lei 		res = read(ctx->_evtfd, &id, sizeof(id));
12656aecda00SMing Lei 		close(ctx->_evtfd);
12666aecda00SMing Lei 		if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) {
12676aecda00SMing Lei 			ctx->dev_id = id - 1;
1268573840abSMing Lei 			if (__cmd_dev_list(ctx) >= 0)
1269573840abSMing Lei 				exit_code = EXIT_SUCCESS;
12706aecda00SMing Lei 		}
12712f0a692aSMing Lei 		shmdt(ctx->shadow_dev);
12722f0a692aSMing Lei 		shmctl(ctx->_shmid, IPC_RMID, NULL);
1273573840abSMing Lei 		/* wait for child and detach from it */
1274573840abSMing Lei 		wait(NULL);
1275*a2f4c1aeSUday Shankar 		if (exit_code == EXIT_FAILURE)
1276*a2f4c1aeSUday Shankar 			ublk_err("%s: command failed\n", __func__);
1277573840abSMing Lei 		exit(exit_code);
12786aecda00SMing Lei 	} else {
1279573840abSMing Lei 		exit(EXIT_FAILURE);
12806aecda00SMing Lei 	}
12816aecda00SMing Lei }
12826aecda00SMing Lei 
__cmd_dev_del(struct dev_ctx * ctx)12836aecda00SMing Lei static int __cmd_dev_del(struct dev_ctx *ctx)
12846aecda00SMing Lei {
12856aecda00SMing Lei 	int number = ctx->dev_id;
12866aecda00SMing Lei 	struct ublk_dev *dev;
12876aecda00SMing Lei 	int ret;
12886aecda00SMing Lei 
12896aecda00SMing Lei 	dev = ublk_ctrl_init();
12906aecda00SMing Lei 	dev->dev_info.dev_id = number;
12916aecda00SMing Lei 
12926aecda00SMing Lei 	ret = ublk_ctrl_get_info(dev);
12936aecda00SMing Lei 	if (ret < 0)
12946aecda00SMing Lei 		goto fail;
12956aecda00SMing Lei 
12966aecda00SMing Lei 	ret = ublk_ctrl_stop_dev(dev);
12976aecda00SMing Lei 	if (ret < 0)
12986aecda00SMing Lei 		ublk_err("%s: stop dev %d failed ret %d\n", __func__, number, ret);
12996aecda00SMing Lei 
13006aecda00SMing Lei 	ret = ublk_stop_io_daemon(dev);
13016aecda00SMing Lei 	if (ret < 0)
13026aecda00SMing Lei 		ublk_err("%s: stop daemon id %d dev %d, ret %d\n",
13036aecda00SMing Lei 				__func__, dev->dev_info.ublksrv_pid, number, ret);
13046aecda00SMing Lei 	ublk_ctrl_del_dev(dev);
13056aecda00SMing Lei fail:
13066aecda00SMing Lei 	ublk_ctrl_deinit(dev);
13076aecda00SMing Lei 
13086aecda00SMing Lei 	return (ret >= 0) ? 0 : ret;
13096aecda00SMing Lei }
13106aecda00SMing Lei 
cmd_dev_del(struct dev_ctx * ctx)13116aecda00SMing Lei static int cmd_dev_del(struct dev_ctx *ctx)
13126aecda00SMing Lei {
13136aecda00SMing Lei 	int i;
13146aecda00SMing Lei 
13156aecda00SMing Lei 	if (ctx->dev_id >= 0 || !ctx->all)
13166aecda00SMing Lei 		return __cmd_dev_del(ctx);
13176aecda00SMing Lei 
13186aecda00SMing Lei 	for (i = 0; i < 255; i++) {
13196aecda00SMing Lei 		ctx->dev_id = i;
13206aecda00SMing Lei 		__cmd_dev_del(ctx);
13216aecda00SMing Lei 	}
13226aecda00SMing Lei 	return 0;
13236aecda00SMing Lei }
13246aecda00SMing Lei 
__cmd_dev_list(struct dev_ctx * ctx)13256aecda00SMing Lei static int __cmd_dev_list(struct dev_ctx *ctx)
13266aecda00SMing Lei {
13276aecda00SMing Lei 	struct ublk_dev *dev = ublk_ctrl_init();
13286aecda00SMing Lei 	int ret;
13296aecda00SMing Lei 
13306aecda00SMing Lei 	if (!dev)
13316aecda00SMing Lei 		return -ENODEV;
13326aecda00SMing Lei 
13336aecda00SMing Lei 	dev->dev_info.dev_id = ctx->dev_id;
13346aecda00SMing Lei 
13356aecda00SMing Lei 	ret = ublk_ctrl_get_info(dev);
13366aecda00SMing Lei 	if (ret < 0) {
13376aecda00SMing Lei 		if (ctx->logging)
13386aecda00SMing Lei 			ublk_err("%s: can't get dev info from %d: %d\n",
13396aecda00SMing Lei 					__func__, ctx->dev_id, ret);
13406aecda00SMing Lei 	} else {
13412f0a692aSMing Lei 		if (ctx->shadow_dev)
13422f0a692aSMing Lei 			memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q));
13432f0a692aSMing Lei 
13446aecda00SMing Lei 		ublk_ctrl_dump(dev);
13456aecda00SMing Lei 	}
13466aecda00SMing Lei 
13476aecda00SMing Lei 	ublk_ctrl_deinit(dev);
13486aecda00SMing Lei 
13496aecda00SMing Lei 	return ret;
13506aecda00SMing Lei }
13516aecda00SMing Lei 
cmd_dev_list(struct dev_ctx * ctx)13526aecda00SMing Lei static int cmd_dev_list(struct dev_ctx *ctx)
13536aecda00SMing Lei {
13546aecda00SMing Lei 	int i;
13556aecda00SMing Lei 
13566aecda00SMing Lei 	if (ctx->dev_id >= 0 || !ctx->all)
13576aecda00SMing Lei 		return __cmd_dev_list(ctx);
13586aecda00SMing Lei 
13596aecda00SMing Lei 	ctx->logging = false;
13606aecda00SMing Lei 	for (i = 0; i < 255; i++) {
13616aecda00SMing Lei 		ctx->dev_id = i;
13626aecda00SMing Lei 		__cmd_dev_list(ctx);
13636aecda00SMing Lei 	}
13646aecda00SMing Lei 	return 0;
13656aecda00SMing Lei }
13666aecda00SMing Lei 
cmd_dev_get_features(void)13676aecda00SMing Lei static int cmd_dev_get_features(void)
13686aecda00SMing Lei {
13696aecda00SMing Lei #define const_ilog2(x) (63 - __builtin_clzll(x))
13706aecda00SMing Lei 	static const char *feat_map[] = {
13716aecda00SMing Lei 		[const_ilog2(UBLK_F_SUPPORT_ZERO_COPY)] = "ZERO_COPY",
13726aecda00SMing Lei 		[const_ilog2(UBLK_F_URING_CMD_COMP_IN_TASK)] = "COMP_IN_TASK",
13736aecda00SMing Lei 		[const_ilog2(UBLK_F_NEED_GET_DATA)] = "GET_DATA",
13746aecda00SMing Lei 		[const_ilog2(UBLK_F_USER_RECOVERY)] = "USER_RECOVERY",
13756aecda00SMing Lei 		[const_ilog2(UBLK_F_USER_RECOVERY_REISSUE)] = "RECOVERY_REISSUE",
13766aecda00SMing Lei 		[const_ilog2(UBLK_F_UNPRIVILEGED_DEV)] = "UNPRIVILEGED_DEV",
13776aecda00SMing Lei 		[const_ilog2(UBLK_F_CMD_IOCTL_ENCODE)] = "CMD_IOCTL_ENCODE",
13786aecda00SMing Lei 		[const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY",
13796aecda00SMing Lei 		[const_ilog2(UBLK_F_ZONED)] = "ZONED",
13806aecda00SMing Lei 		[const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO",
1381f40b1f26SMing Lei 		[const_ilog2(UBLK_F_UPDATE_SIZE)] = "UPDATE_SIZE",
13828ccebc19SMing Lei 		[const_ilog2(UBLK_F_AUTO_BUF_REG)] = "AUTO_BUF_REG",
1383533c87e2SMing Lei 		[const_ilog2(UBLK_F_QUIESCE)] = "QUIESCE",
1384abe54c16SUday Shankar 		[const_ilog2(UBLK_F_PER_IO_DAEMON)] = "PER_IO_DAEMON",
13856aecda00SMing Lei 	};
13866aecda00SMing Lei 	struct ublk_dev *dev;
13876aecda00SMing Lei 	__u64 features = 0;
13886aecda00SMing Lei 	int ret;
13896aecda00SMing Lei 
13906aecda00SMing Lei 	dev = ublk_ctrl_init();
13916aecda00SMing Lei 	if (!dev) {
13926aecda00SMing Lei 		fprintf(stderr, "ublksrv_ctrl_init failed id\n");
13936aecda00SMing Lei 		return -EOPNOTSUPP;
13946aecda00SMing Lei 	}
13956aecda00SMing Lei 
13966aecda00SMing Lei 	ret = ublk_ctrl_get_features(dev, &features);
13976aecda00SMing Lei 	if (!ret) {
13986aecda00SMing Lei 		int i;
13996aecda00SMing Lei 
14006aecda00SMing Lei 		printf("ublk_drv features: 0x%llx\n", features);
14016aecda00SMing Lei 
14026aecda00SMing Lei 		for (i = 0; i < sizeof(features) * 8; i++) {
14036aecda00SMing Lei 			const char *feat;
14046aecda00SMing Lei 
14056aecda00SMing Lei 			if (!((1ULL << i)  & features))
14066aecda00SMing Lei 				continue;
14076aecda00SMing Lei 			if (i < sizeof(feat_map) / sizeof(feat_map[0]))
14086aecda00SMing Lei 				feat = feat_map[i];
14096aecda00SMing Lei 			else
14106aecda00SMing Lei 				feat = "unknown";
14116aecda00SMing Lei 			printf("\t%-20s: 0x%llx\n", feat, 1ULL << i);
14126aecda00SMing Lei 		}
14136aecda00SMing Lei 	}
14146aecda00SMing Lei 
14156aecda00SMing Lei 	return ret;
14166aecda00SMing Lei }
14176aecda00SMing Lei 
cmd_dev_update_size(struct dev_ctx * ctx)1418f40b1f26SMing Lei static int cmd_dev_update_size(struct dev_ctx *ctx)
1419f40b1f26SMing Lei {
1420f40b1f26SMing Lei 	struct ublk_dev *dev = ublk_ctrl_init();
1421f40b1f26SMing Lei 	struct ublk_params p;
1422f40b1f26SMing Lei 	int ret = -EINVAL;
1423f40b1f26SMing Lei 
1424f40b1f26SMing Lei 	if (!dev)
1425f40b1f26SMing Lei 		return -ENODEV;
1426f40b1f26SMing Lei 
1427f40b1f26SMing Lei 	if (ctx->dev_id < 0) {
1428f40b1f26SMing Lei 		fprintf(stderr, "device id isn't provided\n");
1429f40b1f26SMing Lei 		goto out;
1430f40b1f26SMing Lei 	}
1431f40b1f26SMing Lei 
1432f40b1f26SMing Lei 	dev->dev_info.dev_id = ctx->dev_id;
1433f40b1f26SMing Lei 	ret = ublk_ctrl_get_params(dev, &p);
1434f40b1f26SMing Lei 	if (ret < 0) {
1435f40b1f26SMing Lei 		ublk_err("failed to get params %d %s\n", ret, strerror(-ret));
1436f40b1f26SMing Lei 		goto out;
1437f40b1f26SMing Lei 	}
1438f40b1f26SMing Lei 
1439f40b1f26SMing Lei 	if (ctx->size & ((1 << p.basic.logical_bs_shift) - 1)) {
1440f40b1f26SMing Lei 		ublk_err("size isn't aligned with logical block size\n");
1441f40b1f26SMing Lei 		ret = -EINVAL;
1442f40b1f26SMing Lei 		goto out;
1443f40b1f26SMing Lei 	}
1444f40b1f26SMing Lei 
1445f40b1f26SMing Lei 	ret = ublk_ctrl_update_size(dev, ctx->size >> 9);
1446f40b1f26SMing Lei out:
1447f40b1f26SMing Lei 	ublk_ctrl_deinit(dev);
1448f40b1f26SMing Lei 	return ret;
1449f40b1f26SMing Lei }
1450f40b1f26SMing Lei 
cmd_dev_quiesce(struct dev_ctx * ctx)1451533c87e2SMing Lei static int cmd_dev_quiesce(struct dev_ctx *ctx)
1452533c87e2SMing Lei {
1453533c87e2SMing Lei 	struct ublk_dev *dev = ublk_ctrl_init();
1454533c87e2SMing Lei 	int ret = -EINVAL;
1455533c87e2SMing Lei 
1456533c87e2SMing Lei 	if (!dev)
1457533c87e2SMing Lei 		return -ENODEV;
1458533c87e2SMing Lei 
1459533c87e2SMing Lei 	if (ctx->dev_id < 0) {
1460533c87e2SMing Lei 		fprintf(stderr, "device id isn't provided for quiesce\n");
1461533c87e2SMing Lei 		goto out;
1462533c87e2SMing Lei 	}
1463533c87e2SMing Lei 	dev->dev_info.dev_id = ctx->dev_id;
1464533c87e2SMing Lei 	ret = ublk_ctrl_quiesce_dev(dev, 10000);
1465533c87e2SMing Lei 
1466533c87e2SMing Lei out:
1467533c87e2SMing Lei 	ublk_ctrl_deinit(dev);
1468533c87e2SMing Lei 	return ret;
1469533c87e2SMing Lei }
1470533c87e2SMing Lei 
__cmd_create_help(char * exe,bool recovery)147157e13a2eSMing Lei static void __cmd_create_help(char *exe, bool recovery)
14726aecda00SMing Lei {
1473810b88f3SMing Lei 	int i;
1474810b88f3SMing Lei 
147581586652SUday Shankar 	printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n",
147657e13a2eSMing Lei 			exe, recovery ? "recover" : "add");
14776f1a182aSMing Lei 	printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n");
147857e13a2eSMing Lei 	printf("\t[-e 0|1 ] [-i 0|1]\n");
1479abe54c16SUday Shankar 	printf("\t[--nthreads threads] [--per_io_tasks]\n");
1480810b88f3SMing Lei 	printf("\t[target options] [backfile1] [backfile2] ...\n");
14816c62fd04SMing Lei 	printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n");
1482abe54c16SUday Shankar 	printf("\tdefault: nthreads=nr_queues");
1483810b88f3SMing Lei 
1484810b88f3SMing Lei 	for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) {
1485810b88f3SMing Lei 		const struct ublk_tgt_ops *ops = tgt_ops_list[i];
1486810b88f3SMing Lei 
1487810b88f3SMing Lei 		if (ops->usage)
1488810b88f3SMing Lei 			ops->usage(ops);
1489810b88f3SMing Lei 	}
149057e13a2eSMing Lei }
149157e13a2eSMing Lei 
cmd_add_help(char * exe)149257e13a2eSMing Lei static void cmd_add_help(char *exe)
149357e13a2eSMing Lei {
149457e13a2eSMing Lei 	__cmd_create_help(exe, false);
1495810b88f3SMing Lei 	printf("\n");
149657e13a2eSMing Lei }
149757e13a2eSMing Lei 
cmd_recover_help(char * exe)149857e13a2eSMing Lei static void cmd_recover_help(char *exe)
149957e13a2eSMing Lei {
150057e13a2eSMing Lei 	__cmd_create_help(exe, true);
150157e13a2eSMing Lei 	printf("\tPlease provide exact command line for creating this device with real dev_id\n");
150257e13a2eSMing Lei 	printf("\n");
150357e13a2eSMing Lei }
150457e13a2eSMing Lei 
cmd_dev_help(char * exe)150557e13a2eSMing Lei static int cmd_dev_help(char *exe)
150657e13a2eSMing Lei {
150757e13a2eSMing Lei 	cmd_add_help(exe);
150857e13a2eSMing Lei 	cmd_recover_help(exe);
1509810b88f3SMing Lei 
15106aecda00SMing Lei 	printf("%s del [-n dev_id] -a \n", exe);
1511810b88f3SMing Lei 	printf("\t -a delete all devices -n delete specified device\n\n");
15126aecda00SMing Lei 	printf("%s list [-n dev_id] -a \n", exe);
1513810b88f3SMing Lei 	printf("\t -a list all devices, -n list specified device, default -a \n\n");
15146aecda00SMing Lei 	printf("%s features\n", exe);
1515f40b1f26SMing Lei 	printf("%s update_size -n dev_id -s|--size size_in_bytes \n", exe);
1516533c87e2SMing Lei 	printf("%s quiesce -n dev_id\n", exe);
15176aecda00SMing Lei 	return 0;
15186aecda00SMing Lei }
15196aecda00SMing Lei 
main(int argc,char * argv[])15206aecda00SMing Lei int main(int argc, char *argv[])
15216aecda00SMing Lei {
15226aecda00SMing Lei 	static const struct option longopts[] = {
15236aecda00SMing Lei 		{ "all",		0,	NULL, 'a' },
15246aecda00SMing Lei 		{ "type",		1,	NULL, 't' },
15256aecda00SMing Lei 		{ "number",		1,	NULL, 'n' },
15266aecda00SMing Lei 		{ "queues",		1,	NULL, 'q' },
15276aecda00SMing Lei 		{ "depth",		1,	NULL, 'd' },
15286aecda00SMing Lei 		{ "debug_mask",		1,	NULL,  0  },
15296aecda00SMing Lei 		{ "quiet",		0,	NULL,  0  },
15300f3ebf2dSMing Lei 		{ "zero_copy",          0,      NULL, 'z' },
15312ecdcdfeSMing Lei 		{ "foreground",		0,	NULL,  0  },
153257e13a2eSMing Lei 		{ "recovery", 		1,      NULL, 'r' },
153357e13a2eSMing Lei 		{ "recovery_fail_io",	1,	NULL, 'e'},
153457e13a2eSMing Lei 		{ "recovery_reissue",	1,	NULL, 'i'},
153557e13a2eSMing Lei 		{ "get_data",		1,	NULL, 'g'},
15368ccebc19SMing Lei 		{ "auto_zc",		0,	NULL,  0 },
15376f1a182aSMing Lei 		{ "auto_zc_fallback", 	0,	NULL,  0 },
1538f40b1f26SMing Lei 		{ "size",		1,	NULL, 's'},
1539abe54c16SUday Shankar 		{ "nthreads",		1,	NULL,  0 },
1540abe54c16SUday Shankar 		{ "per_io_tasks",	0,	NULL,  0 },
15416aecda00SMing Lei 		{ 0, 0, 0, 0 }
15426aecda00SMing Lei 	};
1543810b88f3SMing Lei 	const struct ublk_tgt_ops *ops = NULL;
15446aecda00SMing Lei 	int option_idx, opt;
15456aecda00SMing Lei 	const char *cmd = argv[1];
15466aecda00SMing Lei 	struct dev_ctx ctx = {
15476aecda00SMing Lei 		.queue_depth	=	128,
15486aecda00SMing Lei 		.nr_hw_queues	=	2,
15496aecda00SMing Lei 		.dev_id		=	-1,
15506aecda00SMing Lei 		.tgt_type	=	"unknown",
15516aecda00SMing Lei 	};
15526aecda00SMing Lei 	int ret = -EINVAL, i;
1553810b88f3SMing Lei 	int tgt_argc = 1;
1554810b88f3SMing Lei 	char *tgt_argv[MAX_NR_TGT_ARG] = { NULL };
155557e13a2eSMing Lei 	int value;
15566aecda00SMing Lei 
15576aecda00SMing Lei 	if (argc == 1)
15586aecda00SMing Lei 		return ret;
15596aecda00SMing Lei 
1560810b88f3SMing Lei 	opterr = 0;
15616aecda00SMing Lei 	optind = 2;
1562f40b1f26SMing Lei 	while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:s:gaz",
15636aecda00SMing Lei 				  longopts, &option_idx)) != -1) {
15646aecda00SMing Lei 		switch (opt) {
15656aecda00SMing Lei 		case 'a':
15666aecda00SMing Lei 			ctx.all = 1;
15676aecda00SMing Lei 			break;
15686aecda00SMing Lei 		case 'n':
15696aecda00SMing Lei 			ctx.dev_id = strtol(optarg, NULL, 10);
15706aecda00SMing Lei 			break;
15716aecda00SMing Lei 		case 't':
15726aecda00SMing Lei 			if (strlen(optarg) < sizeof(ctx.tgt_type))
15736aecda00SMing Lei 				strcpy(ctx.tgt_type, optarg);
15746aecda00SMing Lei 			break;
15756aecda00SMing Lei 		case 'q':
15766aecda00SMing Lei 			ctx.nr_hw_queues = strtol(optarg, NULL, 10);
15776aecda00SMing Lei 			break;
15786aecda00SMing Lei 		case 'd':
15796aecda00SMing Lei 			ctx.queue_depth = strtol(optarg, NULL, 10);
15806aecda00SMing Lei 			break;
1581bedc9cbcSMing Lei 		case 'z':
1582bedc9cbcSMing Lei 			ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY;
1583bedc9cbcSMing Lei 			break;
158457e13a2eSMing Lei 		case 'r':
158557e13a2eSMing Lei 			value = strtol(optarg, NULL, 10);
158657e13a2eSMing Lei 			if (value)
158757e13a2eSMing Lei 				ctx.flags |= UBLK_F_USER_RECOVERY;
158857e13a2eSMing Lei 			break;
158957e13a2eSMing Lei 		case 'e':
159057e13a2eSMing Lei 			value = strtol(optarg, NULL, 10);
159157e13a2eSMing Lei 			if (value)
159257e13a2eSMing Lei 				ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO;
159357e13a2eSMing Lei 			break;
159457e13a2eSMing Lei 		case 'i':
159557e13a2eSMing Lei 			value = strtol(optarg, NULL, 10);
159657e13a2eSMing Lei 			if (value)
159757e13a2eSMing Lei 				ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE;
159857e13a2eSMing Lei 			break;
159957e13a2eSMing Lei 		case 'g':
160057e13a2eSMing Lei 			ctx.flags |= UBLK_F_NEED_GET_DATA;
16015533bc70SMing Lei 			break;
1602f40b1f26SMing Lei 		case 's':
1603f40b1f26SMing Lei 			ctx.size = strtoull(optarg, NULL, 10);
1604f40b1f26SMing Lei 			break;
16056aecda00SMing Lei 		case 0:
16066aecda00SMing Lei 			if (!strcmp(longopts[option_idx].name, "debug_mask"))
16076aecda00SMing Lei 				ublk_dbg_mask = strtol(optarg, NULL, 16);
16086aecda00SMing Lei 			if (!strcmp(longopts[option_idx].name, "quiet"))
16096aecda00SMing Lei 				ublk_dbg_mask = 0;
16102ecdcdfeSMing Lei 			if (!strcmp(longopts[option_idx].name, "foreground"))
16112ecdcdfeSMing Lei 				ctx.fg = 1;
16128ccebc19SMing Lei 			if (!strcmp(longopts[option_idx].name, "auto_zc"))
16138ccebc19SMing Lei 				ctx.flags |= UBLK_F_AUTO_BUF_REG;
16146f1a182aSMing Lei 			if (!strcmp(longopts[option_idx].name, "auto_zc_fallback"))
16156f1a182aSMing Lei 				ctx.auto_zc_fallback = 1;
1616abe54c16SUday Shankar 			if (!strcmp(longopts[option_idx].name, "nthreads"))
1617abe54c16SUday Shankar 				ctx.nthreads = strtol(optarg, NULL, 10);
1618abe54c16SUday Shankar 			if (!strcmp(longopts[option_idx].name, "per_io_tasks"))
1619abe54c16SUday Shankar 				ctx.per_io_tasks = 1;
1620810b88f3SMing Lei 			break;
1621810b88f3SMing Lei 		case '?':
1622810b88f3SMing Lei 			/*
1623810b88f3SMing Lei 			 * target requires every option must have argument
1624810b88f3SMing Lei 			 */
1625810b88f3SMing Lei 			if (argv[optind][0] == '-' || argv[optind - 1][0] != '-') {
1626810b88f3SMing Lei 				fprintf(stderr, "every target option requires argument: %s %s\n",
1627810b88f3SMing Lei 						argv[optind - 1], argv[optind]);
1628810b88f3SMing Lei 				exit(EXIT_FAILURE);
1629810b88f3SMing Lei 			}
1630810b88f3SMing Lei 
1631810b88f3SMing Lei 			if (tgt_argc < (MAX_NR_TGT_ARG - 1) / 2) {
1632810b88f3SMing Lei 				tgt_argv[tgt_argc++] = argv[optind - 1];
1633810b88f3SMing Lei 				tgt_argv[tgt_argc++] = argv[optind];
1634810b88f3SMing Lei 			} else {
1635810b88f3SMing Lei 				fprintf(stderr, "too many target options\n");
1636810b88f3SMing Lei 				exit(EXIT_FAILURE);
1637810b88f3SMing Lei 			}
1638810b88f3SMing Lei 			optind += 1;
1639810b88f3SMing Lei 			break;
16406aecda00SMing Lei 		}
16416aecda00SMing Lei 	}
16426aecda00SMing Lei 
16436f1a182aSMing Lei 	/* auto_zc_fallback depends on F_AUTO_BUF_REG & F_SUPPORT_ZERO_COPY */
16446f1a182aSMing Lei 	if (ctx.auto_zc_fallback &&
16456f1a182aSMing Lei 	    !((ctx.flags & UBLK_F_AUTO_BUF_REG) &&
16466f1a182aSMing Lei 		    (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY))) {
16476f1a182aSMing Lei 		ublk_err("%s: auto_zc_fallback is set but neither "
16486f1a182aSMing Lei 				"F_AUTO_BUF_REG nor F_SUPPORT_ZERO_COPY is enabled\n",
16496f1a182aSMing Lei 					__func__);
16506f1a182aSMing Lei 		return -EINVAL;
16516f1a182aSMing Lei 	}
16526f1a182aSMing Lei 
16536aecda00SMing Lei 	i = optind;
16546aecda00SMing Lei 	while (i < argc && ctx.nr_files < MAX_BACK_FILES) {
16556aecda00SMing Lei 		ctx.files[ctx.nr_files++] = argv[i++];
16566aecda00SMing Lei 	}
16576aecda00SMing Lei 
1658810b88f3SMing Lei 	ops = ublk_find_tgt(ctx.tgt_type);
1659810b88f3SMing Lei 	if (ops && ops->parse_cmd_line) {
1660810b88f3SMing Lei 		optind = 0;
1661810b88f3SMing Lei 
1662810b88f3SMing Lei 		tgt_argv[0] = ctx.tgt_type;
1663810b88f3SMing Lei 		ops->parse_cmd_line(&ctx, tgt_argc, tgt_argv);
1664810b88f3SMing Lei 	}
1665810b88f3SMing Lei 
16666aecda00SMing Lei 	if (!strcmp(cmd, "add"))
16676aecda00SMing Lei 		ret = cmd_dev_add(&ctx);
166857e13a2eSMing Lei 	else if (!strcmp(cmd, "recover")) {
166957e13a2eSMing Lei 		if (ctx.dev_id < 0) {
167057e13a2eSMing Lei 			fprintf(stderr, "device id isn't provided for recovering\n");
167157e13a2eSMing Lei 			ret = -EINVAL;
167257e13a2eSMing Lei 		} else {
167357e13a2eSMing Lei 			ctx.recovery = 1;
167457e13a2eSMing Lei 			ret = cmd_dev_add(&ctx);
167557e13a2eSMing Lei 		}
167657e13a2eSMing Lei 	} else if (!strcmp(cmd, "del"))
16776aecda00SMing Lei 		ret = cmd_dev_del(&ctx);
16786aecda00SMing Lei 	else if (!strcmp(cmd, "list")) {
16796aecda00SMing Lei 		ctx.all = 1;
16806aecda00SMing Lei 		ret = cmd_dev_list(&ctx);
16816aecda00SMing Lei 	} else if (!strcmp(cmd, "help"))
16826aecda00SMing Lei 		ret = cmd_dev_help(argv[0]);
16836aecda00SMing Lei 	else if (!strcmp(cmd, "features"))
16846aecda00SMing Lei 		ret = cmd_dev_get_features();
1685f40b1f26SMing Lei 	else if (!strcmp(cmd, "update_size"))
1686f40b1f26SMing Lei 		ret = cmd_dev_update_size(&ctx);
1687533c87e2SMing Lei 	else if (!strcmp(cmd, "quiesce"))
1688533c87e2SMing Lei 		ret = cmd_dev_quiesce(&ctx);
16896aecda00SMing Lei 	else
16906aecda00SMing Lei 		cmd_dev_help(argv[0]);
16916aecda00SMing Lei 
16926aecda00SMing Lei 	return ret;
16936aecda00SMing Lei }
1694