1 // SPDX-License-Identifier: GPL-2.0
2
3 #include "kublk.h"
4
ublk_to_uring_op(const struct ublksrv_io_desc * iod,int zc)5 static enum io_uring_op ublk_to_uring_op(const struct ublksrv_io_desc *iod, int zc)
6 {
7 unsigned ublk_op = ublksrv_get_op(iod);
8
9 if (ublk_op == UBLK_IO_OP_READ)
10 return zc ? IORING_OP_READ_FIXED : IORING_OP_READ;
11 else if (ublk_op == UBLK_IO_OP_WRITE)
12 return zc ? IORING_OP_WRITE_FIXED : IORING_OP_WRITE;
13 ublk_assert(0);
14 }
15
loop_queue_flush_io(struct ublk_thread * t,struct ublk_queue * q,const struct ublksrv_io_desc * iod,int tag)16 static int loop_queue_flush_io(struct ublk_thread *t, struct ublk_queue *q,
17 const struct ublksrv_io_desc *iod, int tag)
18 {
19 unsigned ublk_op = ublksrv_get_op(iod);
20 struct io_uring_sqe *sqe[1];
21
22 ublk_io_alloc_sqes(t, sqe, 1);
23 io_uring_prep_fsync(sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/, IORING_FSYNC_DATASYNC);
24 io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
25 /* bit63 marks us as tgt io */
26 sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
27 return 1;
28 }
29
loop_queue_tgt_rw_io(struct ublk_thread * t,struct ublk_queue * q,const struct ublksrv_io_desc * iod,int tag)30 static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
31 const struct ublksrv_io_desc *iod, int tag)
32 {
33 unsigned ublk_op = ublksrv_get_op(iod);
34 unsigned zc = ublk_queue_use_zc(q);
35 unsigned auto_zc = ublk_queue_use_auto_zc(q);
36 enum io_uring_op op = ublk_to_uring_op(iod, zc | auto_zc);
37 struct ublk_io *io = ublk_get_io(q, tag);
38 __u64 offset = iod->start_sector << 9;
39 __u32 len = iod->nr_sectors << 9;
40 struct io_uring_sqe *sqe[3];
41 void *addr = io->buf_addr;
42 unsigned short buf_index = ublk_io_buf_idx(t, q, tag);
43
44 if (iod->op_flags & UBLK_IO_F_INTEGRITY) {
45 ublk_io_alloc_sqes(t, sqe, 1);
46 /* Use second backing file for integrity data */
47 io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 2),
48 io->integrity_buf,
49 ublk_integrity_len(q, len),
50 ublk_integrity_len(q, offset));
51 sqe[0]->flags = IOSQE_FIXED_FILE;
52 /* tgt_data = 1 indicates integrity I/O */
53 sqe[0]->user_data = build_user_data(tag, ublk_op, 1, q->q_id, 1);
54 }
55
56 if (!zc || auto_zc) {
57 ublk_io_alloc_sqes(t, sqe, 1);
58 if (!sqe[0])
59 return -ENOMEM;
60
61 io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/,
62 addr,
63 len,
64 offset);
65 if (auto_zc)
66 sqe[0]->buf_index = buf_index;
67 io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
68 /* bit63 marks us as tgt io */
69 sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
70 return !!(iod->op_flags & UBLK_IO_F_INTEGRITY) + 1;
71 }
72
73 ublk_io_alloc_sqes(t, sqe, 3);
74
75 io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, buf_index);
76 sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
77 sqe[0]->user_data = build_user_data(tag,
78 ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
79
80 io_uring_prep_rw(op, sqe[1], ublk_get_registered_fd(q, 1) /*fds[1]*/, 0,
81 len,
82 offset);
83 sqe[1]->buf_index = buf_index;
84 sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK;
85 sqe[1]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
86
87 io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, buf_index);
88 sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1);
89
90 return !!(iod->op_flags & UBLK_IO_F_INTEGRITY) + 2;
91 }
92
loop_queue_tgt_io(struct ublk_thread * t,struct ublk_queue * q,int tag)93 static int loop_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q, int tag)
94 {
95 const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
96 unsigned ublk_op = ublksrv_get_op(iod);
97 int ret;
98
99 switch (ublk_op) {
100 case UBLK_IO_OP_FLUSH:
101 ret = loop_queue_flush_io(t, q, iod, tag);
102 break;
103 case UBLK_IO_OP_WRITE_ZEROES:
104 case UBLK_IO_OP_DISCARD:
105 ret = -ENOTSUP;
106 break;
107 case UBLK_IO_OP_READ:
108 case UBLK_IO_OP_WRITE:
109 ret = loop_queue_tgt_rw_io(t, q, iod, tag);
110 break;
111 default:
112 ret = -EINVAL;
113 break;
114 }
115
116 ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag,
117 iod->op_flags, iod->start_sector, iod->nr_sectors << 9);
118 return ret;
119 }
120
ublk_loop_queue_io(struct ublk_thread * t,struct ublk_queue * q,int tag)121 static int ublk_loop_queue_io(struct ublk_thread *t, struct ublk_queue *q,
122 int tag)
123 {
124 int queued = loop_queue_tgt_io(t, q, tag);
125
126 ublk_queued_tgt_io(t, q, tag, queued);
127 return 0;
128 }
129
ublk_loop_io_done(struct ublk_thread * t,struct ublk_queue * q,const struct io_uring_cqe * cqe)130 static void ublk_loop_io_done(struct ublk_thread *t, struct ublk_queue *q,
131 const struct io_uring_cqe *cqe)
132 {
133 unsigned tag = user_data_to_tag(cqe->user_data);
134 unsigned op = user_data_to_op(cqe->user_data);
135 struct ublk_io *io = ublk_get_io(q, tag);
136
137 if (cqe->res < 0) {
138 io->result = cqe->res;
139 ublk_err("%s: io failed op %x user_data %lx\n",
140 __func__, op, cqe->user_data);
141 } else if (op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) {
142 __s32 data_len = user_data_to_tgt_data(cqe->user_data)
143 ? ublk_integrity_data_len(q, cqe->res)
144 : cqe->res;
145
146 if (!io->result || data_len < io->result)
147 io->result = data_len;
148 }
149
150 /* buffer register op is IOSQE_CQE_SKIP_SUCCESS */
151 if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF))
152 io->tgt_ios += 1;
153
154 if (ublk_completed_tgt_io(t, q, tag))
155 ublk_complete_io(t, q, tag, io->result);
156 }
157
ublk_loop_memset_file(int fd,__u8 byte,size_t len)158 static int ublk_loop_memset_file(int fd, __u8 byte, size_t len)
159 {
160 off_t offset = 0;
161 __u8 buf[4096];
162
163 memset(buf, byte, sizeof(buf));
164 while (len) {
165 int ret = pwrite(fd, buf, min(len, sizeof(buf)), offset);
166
167 if (ret < 0)
168 return -errno;
169 if (!ret)
170 return -EIO;
171
172 len -= ret;
173 offset += ret;
174 }
175 return 0;
176 }
177
ublk_loop_tgt_init(const struct dev_ctx * ctx,struct ublk_dev * dev)178 static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
179 {
180 unsigned long long bytes;
181 unsigned long blocks;
182 int ret;
183 struct ublk_params p = {
184 .types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DMA_ALIGN,
185 .basic = {
186 .attrs = UBLK_ATTR_VOLATILE_CACHE,
187 .logical_bs_shift = 9,
188 .physical_bs_shift = 12,
189 .io_opt_shift = 12,
190 .io_min_shift = 9,
191 .max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
192 },
193 .dma = {
194 .alignment = 511,
195 },
196 };
197
198 ublk_set_integrity_params(ctx, &p);
199 if (ctx->auto_zc_fallback) {
200 ublk_err("%s: not support auto_zc_fallback\n", __func__);
201 return -EINVAL;
202 }
203
204 /* Use O_DIRECT only for data file */
205 ret = backing_file_tgt_init(dev, 1);
206 if (ret)
207 return ret;
208
209 /* Expect a second file for integrity data */
210 if (dev->tgt.nr_backing_files != 1 + !!ctx->metadata_size)
211 return -EINVAL;
212
213 blocks = dev->tgt.backing_file_size[0] >> p.basic.logical_bs_shift;
214 if (ctx->metadata_size) {
215 unsigned long metadata_blocks =
216 dev->tgt.backing_file_size[1] / ctx->metadata_size;
217 unsigned long integrity_len;
218
219 /* Ensure both data and integrity data fit in backing files */
220 blocks = min(blocks, metadata_blocks);
221 integrity_len = blocks * ctx->metadata_size;
222 /*
223 * Initialize PI app tag and ref tag to 0xFF
224 * to disable bio-integrity-auto checks
225 */
226 ret = ublk_loop_memset_file(dev->fds[2], 0xFF, integrity_len);
227 if (ret)
228 return ret;
229 }
230 bytes = blocks << p.basic.logical_bs_shift;
231 dev->tgt.dev_size = bytes;
232 p.basic.dev_sectors = bytes >> 9;
233 dev->tgt.params = p;
234
235 return 0;
236 }
237
238 const struct ublk_tgt_ops loop_tgt_ops = {
239 .name = "loop",
240 .init_tgt = ublk_loop_tgt_init,
241 .deinit_tgt = backing_file_tgt_deinit,
242 .queue_io = ublk_loop_queue_io,
243 .tgt_io_done = ublk_loop_io_done,
244 };
245