xref: /linux/tools/testing/selftests/ublk/file_backed.c (revision ec20aa44ac2629943c9b2b5524bcb55d778f746c)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include "kublk.h"
4 
5 static enum io_uring_op ublk_to_uring_op(const struct ublksrv_io_desc *iod, int zc)
6 {
7 	unsigned ublk_op = ublksrv_get_op(iod);
8 
9 	if (ublk_op == UBLK_IO_OP_READ)
10 		return zc ? IORING_OP_READ_FIXED : IORING_OP_READ;
11 	else if (ublk_op == UBLK_IO_OP_WRITE)
12 		return zc ? IORING_OP_WRITE_FIXED : IORING_OP_WRITE;
13 	ublk_assert(0);
14 }
15 
16 static int loop_queue_flush_io(struct ublk_thread *t, struct ublk_queue *q,
17 			       const struct ublksrv_io_desc *iod, int tag)
18 {
19 	unsigned ublk_op = ublksrv_get_op(iod);
20 	struct io_uring_sqe *sqe[1];
21 
22 	ublk_io_alloc_sqes(t, sqe, 1);
23 	io_uring_prep_fsync(sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/, IORING_FSYNC_DATASYNC);
24 	io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
25 	/* bit63 marks us as tgt io */
26 	sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
27 	return 1;
28 }
29 
30 /*
31  * Shared memory zero-copy I/O: when UBLK_IO_F_SHMEM_ZC is set, the
32  * request's data lives in a registered shared memory buffer. Decode
33  * index + offset from iod->addr and use the server's mmap of that
34  * buffer as the I/O buffer for the backing file.
35  */
36 static int loop_queue_shmem_zc_io(struct ublk_thread *t, struct ublk_queue *q,
37 				  const struct ublksrv_io_desc *iod, int tag)
38 {
39 	unsigned ublk_op = ublksrv_get_op(iod);
40 	enum io_uring_op op = ublk_to_uring_op(iod, 0);
41 	__u64 file_offset = iod->start_sector << 9;
42 	__u32 len = iod->nr_sectors << 9;
43 	__u32 shmem_idx = ublk_shmem_zc_index(iod->addr);
44 	__u32 shmem_off = ublk_shmem_zc_offset(iod->addr);
45 	struct io_uring_sqe *sqe[1];
46 	void *addr;
47 
48 	if (shmem_idx >= UBLK_BUF_MAX || !shmem_table[shmem_idx].mmap_base)
49 		return -EINVAL;
50 
51 	addr = shmem_table[shmem_idx].mmap_base + shmem_off;
52 
53 	ublk_io_alloc_sqes(t, sqe, 1);
54 	if (!sqe[0])
55 		return -ENOMEM;
56 
57 	io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 1),
58 			 addr, len, file_offset);
59 	io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
60 	sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
61 	return 1;
62 }
63 
64 static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
65 				const struct ublksrv_io_desc *iod, int tag)
66 {
67 	unsigned ublk_op = ublksrv_get_op(iod);
68 	unsigned zc = ublk_queue_use_zc(q);
69 	unsigned auto_zc = ublk_queue_use_auto_zc(q);
70 	enum io_uring_op op = ublk_to_uring_op(iod, zc | auto_zc);
71 	struct ublk_io *io = ublk_get_io(q, tag);
72 	__u64 offset = iod->start_sector << 9;
73 	__u32 len = iod->nr_sectors << 9;
74 	struct io_uring_sqe *sqe[3];
75 	void *addr = io->buf_addr;
76 	unsigned short buf_index = ublk_io_buf_idx(t, q, tag);
77 
78 	/* shared memory zero-copy path */
79 	if (iod->op_flags & UBLK_IO_F_SHMEM_ZC)
80 		return loop_queue_shmem_zc_io(t, q, iod, tag);
81 
82 	if (iod->op_flags & UBLK_IO_F_INTEGRITY) {
83 		ublk_io_alloc_sqes(t, sqe, 1);
84 		/* Use second backing file for integrity data */
85 		io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 2),
86 				 io->integrity_buf,
87 				 ublk_integrity_len(q, len),
88 				 ublk_integrity_len(q, offset));
89 		sqe[0]->flags = IOSQE_FIXED_FILE;
90 		/* tgt_data = 1 indicates integrity I/O */
91 		sqe[0]->user_data = build_user_data(tag, ublk_op, 1, q->q_id, 1);
92 	}
93 
94 	if (!zc || auto_zc) {
95 		ublk_io_alloc_sqes(t, sqe, 1);
96 		if (!sqe[0])
97 			return -ENOMEM;
98 
99 		io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/,
100 				addr,
101 				len,
102 				offset);
103 		if (auto_zc)
104 			sqe[0]->buf_index = buf_index;
105 		io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
106 		/* bit63 marks us as tgt io */
107 		sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
108 		return !!(iod->op_flags & UBLK_IO_F_INTEGRITY) + 1;
109 	}
110 
111 	ublk_io_alloc_sqes(t, sqe, 3);
112 
113 	io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, buf_index);
114 	sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
115 	sqe[0]->user_data = build_user_data(tag,
116 			ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
117 
118 	io_uring_prep_rw(op, sqe[1], ublk_get_registered_fd(q, 1) /*fds[1]*/, 0,
119 			len,
120 			offset);
121 	sqe[1]->buf_index = buf_index;
122 	sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK;
123 	sqe[1]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
124 
125 	io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, buf_index);
126 	sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1);
127 
128 	return !!(iod->op_flags & UBLK_IO_F_INTEGRITY) + 2;
129 }
130 
131 static int loop_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q, int tag)
132 {
133 	const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
134 	unsigned ublk_op = ublksrv_get_op(iod);
135 	int ret;
136 
137 	switch (ublk_op) {
138 	case UBLK_IO_OP_FLUSH:
139 		ret = loop_queue_flush_io(t, q, iod, tag);
140 		break;
141 	case UBLK_IO_OP_WRITE_ZEROES:
142 	case UBLK_IO_OP_DISCARD:
143 		ret = -ENOTSUP;
144 		break;
145 	case UBLK_IO_OP_READ:
146 	case UBLK_IO_OP_WRITE:
147 		ret = loop_queue_tgt_rw_io(t, q, iod, tag);
148 		break;
149 	default:
150 		ret = -EINVAL;
151 		break;
152 	}
153 
154 	ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag,
155 			iod->op_flags, iod->start_sector, iod->nr_sectors << 9);
156 	return ret;
157 }
158 
159 static int ublk_loop_queue_io(struct ublk_thread *t, struct ublk_queue *q,
160 			      int tag)
161 {
162 	int queued = loop_queue_tgt_io(t, q, tag);
163 
164 	ublk_queued_tgt_io(t, q, tag, queued);
165 	return 0;
166 }
167 
168 static void ublk_loop_io_done(struct ublk_thread *t, struct ublk_queue *q,
169 		const struct io_uring_cqe *cqe)
170 {
171 	unsigned tag = user_data_to_tag(cqe->user_data);
172 	unsigned op = user_data_to_op(cqe->user_data);
173 	struct ublk_io *io = ublk_get_io(q, tag);
174 
175 	if (cqe->res < 0) {
176 		io->result = cqe->res;
177 		ublk_err("%s: io failed op %x user_data %lx\n",
178 				__func__, op, cqe->user_data);
179 	} else if (op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) {
180 		__s32 data_len = user_data_to_tgt_data(cqe->user_data)
181 			? ublk_integrity_data_len(q, cqe->res)
182 			: cqe->res;
183 
184 		if (!io->result || data_len < io->result)
185 			io->result = data_len;
186 	}
187 
188 	/* buffer register op is IOSQE_CQE_SKIP_SUCCESS */
189 	if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF))
190 		io->tgt_ios += 1;
191 
192 	if (ublk_completed_tgt_io(t, q, tag))
193 		ublk_complete_io(t, q, tag, io->result);
194 }
195 
196 static int ublk_loop_memset_file(int fd, __u8 byte, size_t len)
197 {
198 	off_t offset = 0;
199 	__u8 buf[4096];
200 
201 	memset(buf, byte, sizeof(buf));
202 	while (len) {
203 		int ret = pwrite(fd, buf, min(len, sizeof(buf)), offset);
204 
205 		if (ret < 0)
206 			return -errno;
207 		if (!ret)
208 			return -EIO;
209 
210 		len -= ret;
211 		offset += ret;
212 	}
213 	return 0;
214 }
215 
216 static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
217 {
218 	unsigned long long bytes;
219 	unsigned long blocks;
220 	int ret;
221 	struct ublk_params p = {
222 		.types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DMA_ALIGN,
223 		.basic = {
224 			.attrs = UBLK_ATTR_VOLATILE_CACHE,
225 			.logical_bs_shift	= 9,
226 			.physical_bs_shift	= 12,
227 			.io_opt_shift	= 12,
228 			.io_min_shift	= 9,
229 			.max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
230 		},
231 		.dma = {
232 			.alignment = 511,
233 		},
234 	};
235 
236 	ublk_set_integrity_params(ctx, &p);
237 	if (ctx->auto_zc_fallback) {
238 		ublk_err("%s: not support auto_zc_fallback\n", __func__);
239 		return -EINVAL;
240 	}
241 
242 	/* Use O_DIRECT only for data file */
243 	ret = backing_file_tgt_init(dev, 1);
244 	if (ret)
245 		return ret;
246 
247 	/* Expect a second file for integrity data */
248 	if (dev->tgt.nr_backing_files != 1 + !!ctx->metadata_size)
249 		return -EINVAL;
250 
251 	blocks = dev->tgt.backing_file_size[0] >> p.basic.logical_bs_shift;
252 	if (ctx->metadata_size) {
253 		unsigned long metadata_blocks =
254 			dev->tgt.backing_file_size[1] / ctx->metadata_size;
255 		unsigned long integrity_len;
256 
257 		/* Ensure both data and integrity data fit in backing files */
258 		blocks = min(blocks, metadata_blocks);
259 		integrity_len = blocks * ctx->metadata_size;
260 		/*
261 		 * Initialize PI app tag and ref tag to 0xFF
262 		 * to disable bio-integrity-auto checks
263 		 */
264 		ret = ublk_loop_memset_file(dev->fds[2], 0xFF, integrity_len);
265 		if (ret)
266 			return ret;
267 	}
268 	bytes = blocks << p.basic.logical_bs_shift;
269 	dev->tgt.dev_size = bytes;
270 	p.basic.dev_sectors = bytes >> 9;
271 	dev->tgt.params = p;
272 
273 	return 0;
274 }
275 
276 const struct ublk_tgt_ops loop_tgt_ops = {
277 	.name = "loop",
278 	.init_tgt = ublk_loop_tgt_init,
279 	.deinit_tgt = backing_file_tgt_deinit,
280 	.queue_io = ublk_loop_queue_io,
281 	.tgt_io_done = ublk_loop_io_done,
282 };
283