1 // SPDX-License-Identifier: GPL-2.0
2
3 #include "kublk.h"
4
5 #define NR_STRIPE MAX_BACK_FILES
6
7 struct stripe_conf {
8 unsigned nr_files;
9 unsigned shift;
10 };
11
12 struct stripe {
13 loff_t start;
14 unsigned nr_sects;
15 int seq;
16
17 struct iovec *vec;
18 unsigned nr_vec;
19 unsigned cap;
20 };
21
22 struct stripe_array {
23 struct stripe s[NR_STRIPE];
24 unsigned nr;
25 struct iovec _vec[];
26 };
27
get_chunk_shift(const struct ublk_queue * q)28 static inline const struct stripe_conf *get_chunk_shift(const struct ublk_queue *q)
29 {
30 return (struct stripe_conf *)q->dev->private_data;
31 }
32
calculate_nr_vec(const struct stripe_conf * conf,const struct ublksrv_io_desc * iod)33 static inline unsigned calculate_nr_vec(const struct stripe_conf *conf,
34 const struct ublksrv_io_desc *iod)
35 {
36 const unsigned shift = conf->shift - 9;
37 const unsigned unit_sects = conf->nr_files << shift;
38 loff_t start = iod->start_sector;
39 loff_t end = start + iod->nr_sectors;
40
41 return (end / unit_sects) - (start / unit_sects) + 1;
42 }
43
alloc_stripe_array(const struct stripe_conf * conf,const struct ublksrv_io_desc * iod)44 static struct stripe_array *alloc_stripe_array(const struct stripe_conf *conf,
45 const struct ublksrv_io_desc *iod)
46 {
47 unsigned nr_vecs = calculate_nr_vec(conf, iod);
48 unsigned total = nr_vecs * conf->nr_files;
49 struct stripe_array *s;
50 int i;
51
52 s = malloc(sizeof(*s) + total * sizeof(struct iovec));
53
54 s->nr = 0;
55 for (i = 0; i < conf->nr_files; i++) {
56 struct stripe *t = &s->s[i];
57
58 t->nr_vec = 0;
59 t->vec = &s->_vec[i * nr_vecs];
60 t->nr_sects = 0;
61 t->cap = nr_vecs;
62 }
63
64 return s;
65 }
66
free_stripe_array(struct stripe_array * s)67 static void free_stripe_array(struct stripe_array *s)
68 {
69 free(s);
70 }
71
calculate_stripe_array(const struct stripe_conf * conf,const struct ublksrv_io_desc * iod,struct stripe_array * s,void * base)72 static void calculate_stripe_array(const struct stripe_conf *conf,
73 const struct ublksrv_io_desc *iod, struct stripe_array *s, void *base)
74 {
75 const unsigned shift = conf->shift - 9;
76 const unsigned chunk_sects = 1 << shift;
77 const unsigned unit_sects = conf->nr_files << shift;
78 off64_t start = iod->start_sector;
79 off64_t end = start + iod->nr_sectors;
80 unsigned long done = 0;
81 unsigned idx = 0;
82
83 while (start < end) {
84 unsigned nr_sects = chunk_sects - (start & (chunk_sects - 1));
85 loff_t unit_off = (start / unit_sects) * unit_sects;
86 unsigned seq = (start - unit_off) >> shift;
87 struct stripe *this = &s->s[idx];
88 loff_t stripe_off = (unit_off / conf->nr_files) +
89 (start & (chunk_sects - 1));
90
91 if (nr_sects > end - start)
92 nr_sects = end - start;
93 if (this->nr_sects == 0) {
94 this->nr_sects = nr_sects;
95 this->start = stripe_off;
96 this->seq = seq;
97 s->nr += 1;
98 } else {
99 ublk_assert(seq == this->seq);
100 ublk_assert(this->start + this->nr_sects == stripe_off);
101 this->nr_sects += nr_sects;
102 }
103
104 ublk_assert(this->nr_vec < this->cap);
105 this->vec[this->nr_vec].iov_base = (void *)(base + done);
106 this->vec[this->nr_vec++].iov_len = nr_sects << 9;
107
108 start += nr_sects;
109 done += nr_sects << 9;
110 idx = (idx + 1) % conf->nr_files;
111 }
112 }
113
stripe_to_uring_op(const struct ublksrv_io_desc * iod,int zc)114 static inline enum io_uring_op stripe_to_uring_op(
115 const struct ublksrv_io_desc *iod, int zc)
116 {
117 unsigned ublk_op = ublksrv_get_op(iod);
118
119 if (ublk_op == UBLK_IO_OP_READ)
120 return zc ? IORING_OP_READV_FIXED : IORING_OP_READV;
121 else if (ublk_op == UBLK_IO_OP_WRITE)
122 return zc ? IORING_OP_WRITEV_FIXED : IORING_OP_WRITEV;
123 ublk_assert(0);
124 }
125
stripe_queue_tgt_rw_io(struct ublk_thread * t,struct ublk_queue * q,const struct ublksrv_io_desc * iod,int tag)126 static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
127 const struct ublksrv_io_desc *iod, int tag)
128 {
129 const struct stripe_conf *conf = get_chunk_shift(q);
130 unsigned auto_zc = (ublk_queue_use_auto_zc(q) != 0);
131 unsigned zc = (ublk_queue_use_zc(q) != 0);
132 enum io_uring_op op = stripe_to_uring_op(iod, zc | auto_zc);
133 struct io_uring_sqe *sqe[NR_STRIPE];
134 struct stripe_array *s = alloc_stripe_array(conf, iod);
135 struct ublk_io *io = ublk_get_io(q, tag);
136 int i, extra = zc ? 2 : 0;
137 void *base = io->buf_addr;
138 unsigned short buf_idx = ublk_io_buf_idx(t, q, tag);
139
140 io->private_data = s;
141 calculate_stripe_array(conf, iod, s, base);
142
143 ublk_io_alloc_sqes(t, sqe, s->nr + extra);
144
145 if (zc) {
146 io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, buf_idx);
147 sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
148 sqe[0]->user_data = build_user_data(tag,
149 ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
150 }
151
152 for (i = zc; i < s->nr + extra - zc; i++) {
153 struct stripe *t = &s->s[i - zc];
154
155 io_uring_prep_rw(op, sqe[i],
156 t->seq + 1,
157 (void *)t->vec,
158 t->nr_vec,
159 t->start << 9);
160 io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
161 if (auto_zc || zc) {
162 sqe[i]->buf_index = buf_idx;
163 if (zc)
164 sqe[i]->flags |= IOSQE_IO_HARDLINK;
165 }
166 /* bit63 marks us as tgt io */
167 sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, q->q_id, 1);
168 }
169 if (zc) {
170 struct io_uring_sqe *unreg = sqe[s->nr + 1];
171
172 io_uring_prep_buf_unregister(unreg, q, tag, q->q_id, buf_idx);
173 unreg->user_data = build_user_data(
174 tag, ublk_cmd_op_nr(unreg->cmd_op), 0, q->q_id, 1);
175 }
176
177 /* register buffer is skip_success */
178 return s->nr + zc;
179 }
180
handle_flush(struct ublk_thread * t,struct ublk_queue * q,const struct ublksrv_io_desc * iod,int tag)181 static int handle_flush(struct ublk_thread *t, struct ublk_queue *q,
182 const struct ublksrv_io_desc *iod, int tag)
183 {
184 const struct stripe_conf *conf = get_chunk_shift(q);
185 struct io_uring_sqe *sqe[NR_STRIPE];
186 int i;
187
188 ublk_io_alloc_sqes(t, sqe, conf->nr_files);
189 for (i = 0; i < conf->nr_files; i++) {
190 io_uring_prep_fsync(sqe[i], i + 1, IORING_FSYNC_DATASYNC);
191 io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
192 sqe[i]->user_data = build_user_data(tag, UBLK_IO_OP_FLUSH, 0, q->q_id, 1);
193 }
194 return conf->nr_files;
195 }
196
stripe_queue_tgt_io(struct ublk_thread * t,struct ublk_queue * q,int tag)197 static int stripe_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q,
198 int tag)
199 {
200 const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
201 unsigned ublk_op = ublksrv_get_op(iod);
202 int ret = 0;
203
204 switch (ublk_op) {
205 case UBLK_IO_OP_FLUSH:
206 ret = handle_flush(t, q, iod, tag);
207 break;
208 case UBLK_IO_OP_WRITE_ZEROES:
209 case UBLK_IO_OP_DISCARD:
210 ret = -ENOTSUP;
211 break;
212 case UBLK_IO_OP_READ:
213 case UBLK_IO_OP_WRITE:
214 ret = stripe_queue_tgt_rw_io(t, q, iod, tag);
215 break;
216 default:
217 ret = -EINVAL;
218 break;
219 }
220 ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u ret %d\n", __func__, tag,
221 iod->op_flags, iod->start_sector, iod->nr_sectors << 9, ret);
222 return ret;
223 }
224
ublk_stripe_queue_io(struct ublk_thread * t,struct ublk_queue * q,int tag)225 static int ublk_stripe_queue_io(struct ublk_thread *t, struct ublk_queue *q,
226 int tag)
227 {
228 int queued = stripe_queue_tgt_io(t, q, tag);
229
230 ublk_queued_tgt_io(t, q, tag, queued);
231 return 0;
232 }
233
ublk_stripe_io_done(struct ublk_thread * t,struct ublk_queue * q,const struct io_uring_cqe * cqe)234 static void ublk_stripe_io_done(struct ublk_thread *t, struct ublk_queue *q,
235 const struct io_uring_cqe *cqe)
236 {
237 unsigned tag = user_data_to_tag(cqe->user_data);
238 const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
239 unsigned op = user_data_to_op(cqe->user_data);
240 struct ublk_io *io = ublk_get_io(q, tag);
241 int res = cqe->res;
242
243 if (res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) {
244 if (!io->result)
245 io->result = res;
246 if (res < 0)
247 ublk_err("%s: io failure %d tag %u\n", __func__, res, tag);
248 }
249
250 /* buffer register op is IOSQE_CQE_SKIP_SUCCESS */
251 if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF))
252 io->tgt_ios += 1;
253
254 /* fail short READ/WRITE simply */
255 if (op == UBLK_IO_OP_READ || op == UBLK_IO_OP_WRITE) {
256 unsigned seq = user_data_to_tgt_data(cqe->user_data);
257 struct stripe_array *s = io->private_data;
258
259 if (res < s->s[seq].nr_sects << 9) {
260 io->result = -EIO;
261 ublk_err("%s: short rw op %u res %d exp %u tag %u\n",
262 __func__, op, res, s->s[seq].vec->iov_len, tag);
263 }
264 }
265
266 if (ublk_completed_tgt_io(t, q, tag)) {
267 int res = io->result;
268
269 if (!res)
270 res = iod->nr_sectors << 9;
271
272 ublk_complete_io(t, q, tag, res);
273
274 free_stripe_array(io->private_data);
275 io->private_data = NULL;
276 }
277 }
278
ublk_stripe_tgt_init(const struct dev_ctx * ctx,struct ublk_dev * dev)279 static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
280 {
281 struct ublk_params p = {
282 .types = UBLK_PARAM_TYPE_BASIC,
283 .basic = {
284 .attrs = UBLK_ATTR_VOLATILE_CACHE,
285 .logical_bs_shift = 9,
286 .physical_bs_shift = 12,
287 .io_opt_shift = 12,
288 .io_min_shift = 9,
289 .max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
290 },
291 };
292 unsigned chunk_size = ctx->stripe.chunk_size;
293 struct stripe_conf *conf;
294 unsigned chunk_shift;
295 loff_t bytes = 0;
296 int ret, i, mul = 1;
297
298 if (ctx->auto_zc_fallback) {
299 ublk_err("%s: not support auto_zc_fallback\n", __func__);
300 return -EINVAL;
301 }
302 if (ctx->metadata_size) {
303 ublk_err("%s: integrity not supported\n", __func__);
304 return -EINVAL;
305 }
306
307 if ((chunk_size & (chunk_size - 1)) || !chunk_size) {
308 ublk_err("invalid chunk size %u\n", chunk_size);
309 return -EINVAL;
310 }
311
312 if (chunk_size < 4096 || chunk_size > 512 * 1024) {
313 ublk_err("invalid chunk size %u\n", chunk_size);
314 return -EINVAL;
315 }
316
317 chunk_shift = ilog2(chunk_size);
318
319 ret = backing_file_tgt_init(dev, dev->tgt.nr_backing_files);
320 if (ret)
321 return ret;
322
323 if (!dev->tgt.nr_backing_files || dev->tgt.nr_backing_files > NR_STRIPE)
324 return -EINVAL;
325
326 ublk_assert(dev->nr_fds == dev->tgt.nr_backing_files + 1);
327
328 for (i = 0; i < dev->tgt.nr_backing_files; i++)
329 dev->tgt.backing_file_size[i] &= ~((1 << chunk_shift) - 1);
330
331 for (i = 0; i < dev->tgt.nr_backing_files; i++) {
332 unsigned long size = dev->tgt.backing_file_size[i];
333
334 if (size != dev->tgt.backing_file_size[0])
335 return -EINVAL;
336 bytes += size;
337 }
338
339 conf = malloc(sizeof(*conf));
340 conf->shift = chunk_shift;
341 conf->nr_files = dev->tgt.nr_backing_files;
342
343 dev->private_data = conf;
344 dev->tgt.dev_size = bytes;
345 p.basic.dev_sectors = bytes >> 9;
346 dev->tgt.params = p;
347
348 if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY)
349 mul = 2;
350 dev->tgt.sq_depth = mul * dev->dev_info.queue_depth * conf->nr_files;
351 dev->tgt.cq_depth = mul * dev->dev_info.queue_depth * conf->nr_files;
352
353 printf("%s: shift %u files %u\n", __func__, conf->shift, conf->nr_files);
354
355 return 0;
356 }
357
ublk_stripe_tgt_deinit(struct ublk_dev * dev)358 static void ublk_stripe_tgt_deinit(struct ublk_dev *dev)
359 {
360 free(dev->private_data);
361 backing_file_tgt_deinit(dev);
362 }
363
ublk_stripe_cmd_line(struct dev_ctx * ctx,int argc,char * argv[])364 static void ublk_stripe_cmd_line(struct dev_ctx *ctx, int argc, char *argv[])
365 {
366 static const struct option longopts[] = {
367 { "chunk_size", 1, NULL, 0 },
368 { 0, 0, 0, 0 }
369 };
370 int option_idx, opt;
371
372 ctx->stripe.chunk_size = 65536;
373 while ((opt = getopt_long(argc, argv, "",
374 longopts, &option_idx)) != -1) {
375 switch (opt) {
376 case 0:
377 if (!strcmp(longopts[option_idx].name, "chunk_size"))
378 ctx->stripe.chunk_size = strtol(optarg, NULL, 10);
379 }
380 }
381 }
382
ublk_stripe_usage(const struct ublk_tgt_ops * ops)383 static void ublk_stripe_usage(const struct ublk_tgt_ops *ops)
384 {
385 printf("\tstripe: [--chunk_size chunk_size (default 65536)]\n");
386 }
387
388 const struct ublk_tgt_ops stripe_tgt_ops = {
389 .name = "stripe",
390 .init_tgt = ublk_stripe_tgt_init,
391 .deinit_tgt = ublk_stripe_tgt_deinit,
392 .queue_io = ublk_stripe_queue_io,
393 .tgt_io_done = ublk_stripe_io_done,
394 .parse_cmd_line = ublk_stripe_cmd_line,
395 .usage = ublk_stripe_usage,
396 };
397