1*0f3ebf2dSMing Lei // SPDX-License-Identifier: GPL-2.0
2*0f3ebf2dSMing Lei
3*0f3ebf2dSMing Lei #include "kublk.h"
4*0f3ebf2dSMing Lei
5*0f3ebf2dSMing Lei #define NR_STRIPE MAX_BACK_FILES
6*0f3ebf2dSMing Lei
7*0f3ebf2dSMing Lei struct stripe_conf {
8*0f3ebf2dSMing Lei unsigned nr_files;
9*0f3ebf2dSMing Lei unsigned shift;
10*0f3ebf2dSMing Lei };
11*0f3ebf2dSMing Lei
12*0f3ebf2dSMing Lei struct stripe {
13*0f3ebf2dSMing Lei loff_t start;
14*0f3ebf2dSMing Lei unsigned nr_sects;
15*0f3ebf2dSMing Lei int seq;
16*0f3ebf2dSMing Lei
17*0f3ebf2dSMing Lei struct iovec *vec;
18*0f3ebf2dSMing Lei unsigned nr_vec;
19*0f3ebf2dSMing Lei unsigned cap;
20*0f3ebf2dSMing Lei };
21*0f3ebf2dSMing Lei
22*0f3ebf2dSMing Lei struct stripe_array {
23*0f3ebf2dSMing Lei struct stripe s[NR_STRIPE];
24*0f3ebf2dSMing Lei unsigned nr;
25*0f3ebf2dSMing Lei struct iovec _vec[];
26*0f3ebf2dSMing Lei };
27*0f3ebf2dSMing Lei
get_chunk_shift(const struct ublk_queue * q)28*0f3ebf2dSMing Lei static inline const struct stripe_conf *get_chunk_shift(const struct ublk_queue *q)
29*0f3ebf2dSMing Lei {
30*0f3ebf2dSMing Lei return (struct stripe_conf *)q->dev->private_data;
31*0f3ebf2dSMing Lei }
32*0f3ebf2dSMing Lei
calculate_nr_vec(const struct stripe_conf * conf,const struct ublksrv_io_desc * iod)33*0f3ebf2dSMing Lei static inline unsigned calculate_nr_vec(const struct stripe_conf *conf,
34*0f3ebf2dSMing Lei const struct ublksrv_io_desc *iod)
35*0f3ebf2dSMing Lei {
36*0f3ebf2dSMing Lei const unsigned shift = conf->shift - 9;
37*0f3ebf2dSMing Lei const unsigned unit_sects = conf->nr_files << shift;
38*0f3ebf2dSMing Lei loff_t start = iod->start_sector;
39*0f3ebf2dSMing Lei loff_t end = start + iod->nr_sectors;
40*0f3ebf2dSMing Lei
41*0f3ebf2dSMing Lei return (end / unit_sects) - (start / unit_sects) + 1;
42*0f3ebf2dSMing Lei }
43*0f3ebf2dSMing Lei
alloc_stripe_array(const struct stripe_conf * conf,const struct ublksrv_io_desc * iod)44*0f3ebf2dSMing Lei static struct stripe_array *alloc_stripe_array(const struct stripe_conf *conf,
45*0f3ebf2dSMing Lei const struct ublksrv_io_desc *iod)
46*0f3ebf2dSMing Lei {
47*0f3ebf2dSMing Lei unsigned nr_vecs = calculate_nr_vec(conf, iod);
48*0f3ebf2dSMing Lei unsigned total = nr_vecs * conf->nr_files;
49*0f3ebf2dSMing Lei struct stripe_array *s;
50*0f3ebf2dSMing Lei int i;
51*0f3ebf2dSMing Lei
52*0f3ebf2dSMing Lei s = malloc(sizeof(*s) + total * sizeof(struct iovec));
53*0f3ebf2dSMing Lei
54*0f3ebf2dSMing Lei s->nr = 0;
55*0f3ebf2dSMing Lei for (i = 0; i < conf->nr_files; i++) {
56*0f3ebf2dSMing Lei struct stripe *t = &s->s[i];
57*0f3ebf2dSMing Lei
58*0f3ebf2dSMing Lei t->nr_vec = 0;
59*0f3ebf2dSMing Lei t->vec = &s->_vec[i * nr_vecs];
60*0f3ebf2dSMing Lei t->nr_sects = 0;
61*0f3ebf2dSMing Lei t->cap = nr_vecs;
62*0f3ebf2dSMing Lei }
63*0f3ebf2dSMing Lei
64*0f3ebf2dSMing Lei return s;
65*0f3ebf2dSMing Lei }
66*0f3ebf2dSMing Lei
free_stripe_array(struct stripe_array * s)67*0f3ebf2dSMing Lei static void free_stripe_array(struct stripe_array *s)
68*0f3ebf2dSMing Lei {
69*0f3ebf2dSMing Lei free(s);
70*0f3ebf2dSMing Lei }
71*0f3ebf2dSMing Lei
calculate_stripe_array(const struct stripe_conf * conf,const struct ublksrv_io_desc * iod,struct stripe_array * s)72*0f3ebf2dSMing Lei static void calculate_stripe_array(const struct stripe_conf *conf,
73*0f3ebf2dSMing Lei const struct ublksrv_io_desc *iod, struct stripe_array *s)
74*0f3ebf2dSMing Lei {
75*0f3ebf2dSMing Lei const unsigned shift = conf->shift - 9;
76*0f3ebf2dSMing Lei const unsigned chunk_sects = 1 << shift;
77*0f3ebf2dSMing Lei const unsigned unit_sects = conf->nr_files << shift;
78*0f3ebf2dSMing Lei off64_t start = iod->start_sector;
79*0f3ebf2dSMing Lei off64_t end = start + iod->nr_sectors;
80*0f3ebf2dSMing Lei unsigned long done = 0;
81*0f3ebf2dSMing Lei unsigned idx = 0;
82*0f3ebf2dSMing Lei
83*0f3ebf2dSMing Lei while (start < end) {
84*0f3ebf2dSMing Lei unsigned nr_sects = chunk_sects - (start & (chunk_sects - 1));
85*0f3ebf2dSMing Lei loff_t unit_off = (start / unit_sects) * unit_sects;
86*0f3ebf2dSMing Lei unsigned seq = (start - unit_off) >> shift;
87*0f3ebf2dSMing Lei struct stripe *this = &s->s[idx];
88*0f3ebf2dSMing Lei loff_t stripe_off = (unit_off / conf->nr_files) +
89*0f3ebf2dSMing Lei (start & (chunk_sects - 1));
90*0f3ebf2dSMing Lei
91*0f3ebf2dSMing Lei if (nr_sects > end - start)
92*0f3ebf2dSMing Lei nr_sects = end - start;
93*0f3ebf2dSMing Lei if (this->nr_sects == 0) {
94*0f3ebf2dSMing Lei this->nr_sects = nr_sects;
95*0f3ebf2dSMing Lei this->start = stripe_off;
96*0f3ebf2dSMing Lei this->seq = seq;
97*0f3ebf2dSMing Lei s->nr += 1;
98*0f3ebf2dSMing Lei } else {
99*0f3ebf2dSMing Lei assert(seq == this->seq);
100*0f3ebf2dSMing Lei assert(this->start + this->nr_sects == stripe_off);
101*0f3ebf2dSMing Lei this->nr_sects += nr_sects;
102*0f3ebf2dSMing Lei }
103*0f3ebf2dSMing Lei
104*0f3ebf2dSMing Lei assert(this->nr_vec < this->cap);
105*0f3ebf2dSMing Lei this->vec[this->nr_vec].iov_base = (void *)(iod->addr + done);
106*0f3ebf2dSMing Lei this->vec[this->nr_vec++].iov_len = nr_sects << 9;
107*0f3ebf2dSMing Lei
108*0f3ebf2dSMing Lei start += nr_sects;
109*0f3ebf2dSMing Lei done += nr_sects << 9;
110*0f3ebf2dSMing Lei idx = (idx + 1) % conf->nr_files;
111*0f3ebf2dSMing Lei }
112*0f3ebf2dSMing Lei }
113*0f3ebf2dSMing Lei
stripe_to_uring_op(const struct ublksrv_io_desc * iod)114*0f3ebf2dSMing Lei static inline enum io_uring_op stripe_to_uring_op(const struct ublksrv_io_desc *iod)
115*0f3ebf2dSMing Lei {
116*0f3ebf2dSMing Lei unsigned ublk_op = ublksrv_get_op(iod);
117*0f3ebf2dSMing Lei
118*0f3ebf2dSMing Lei if (ublk_op == UBLK_IO_OP_READ)
119*0f3ebf2dSMing Lei return IORING_OP_READV;
120*0f3ebf2dSMing Lei else if (ublk_op == UBLK_IO_OP_WRITE)
121*0f3ebf2dSMing Lei return IORING_OP_WRITEV;
122*0f3ebf2dSMing Lei assert(0);
123*0f3ebf2dSMing Lei }
124*0f3ebf2dSMing Lei
stripe_queue_tgt_rw_io(struct ublk_queue * q,const struct ublksrv_io_desc * iod,int tag)125*0f3ebf2dSMing Lei static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
126*0f3ebf2dSMing Lei {
127*0f3ebf2dSMing Lei const struct stripe_conf *conf = get_chunk_shift(q);
128*0f3ebf2dSMing Lei enum io_uring_op op = stripe_to_uring_op(iod);
129*0f3ebf2dSMing Lei struct io_uring_sqe *sqe[NR_STRIPE];
130*0f3ebf2dSMing Lei struct stripe_array *s = alloc_stripe_array(conf, iod);
131*0f3ebf2dSMing Lei struct ublk_io *io = ublk_get_io(q, tag);
132*0f3ebf2dSMing Lei int i;
133*0f3ebf2dSMing Lei
134*0f3ebf2dSMing Lei io->private_data = s;
135*0f3ebf2dSMing Lei calculate_stripe_array(conf, iod, s);
136*0f3ebf2dSMing Lei
137*0f3ebf2dSMing Lei ublk_queue_alloc_sqes(q, sqe, s->nr);
138*0f3ebf2dSMing Lei for (i = 0; i < s->nr; i++) {
139*0f3ebf2dSMing Lei struct stripe *t = &s->s[i];
140*0f3ebf2dSMing Lei
141*0f3ebf2dSMing Lei io_uring_prep_rw(op, sqe[i],
142*0f3ebf2dSMing Lei t->seq + 1,
143*0f3ebf2dSMing Lei (void *)t->vec,
144*0f3ebf2dSMing Lei t->nr_vec,
145*0f3ebf2dSMing Lei t->start << 9);
146*0f3ebf2dSMing Lei io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
147*0f3ebf2dSMing Lei /* bit63 marks us as tgt io */
148*0f3ebf2dSMing Lei sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i, 1);
149*0f3ebf2dSMing Lei }
150*0f3ebf2dSMing Lei return s->nr;
151*0f3ebf2dSMing Lei }
152*0f3ebf2dSMing Lei
handle_flush(struct ublk_queue * q,const struct ublksrv_io_desc * iod,int tag)153*0f3ebf2dSMing Lei static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
154*0f3ebf2dSMing Lei {
155*0f3ebf2dSMing Lei const struct stripe_conf *conf = get_chunk_shift(q);
156*0f3ebf2dSMing Lei struct io_uring_sqe *sqe[NR_STRIPE];
157*0f3ebf2dSMing Lei int i;
158*0f3ebf2dSMing Lei
159*0f3ebf2dSMing Lei ublk_queue_alloc_sqes(q, sqe, conf->nr_files);
160*0f3ebf2dSMing Lei for (i = 0; i < conf->nr_files; i++) {
161*0f3ebf2dSMing Lei io_uring_prep_fsync(sqe[i], i + 1, IORING_FSYNC_DATASYNC);
162*0f3ebf2dSMing Lei io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
163*0f3ebf2dSMing Lei sqe[i]->user_data = build_user_data(tag, UBLK_IO_OP_FLUSH, 0, 1);
164*0f3ebf2dSMing Lei }
165*0f3ebf2dSMing Lei return conf->nr_files;
166*0f3ebf2dSMing Lei }
167*0f3ebf2dSMing Lei
stripe_queue_tgt_io(struct ublk_queue * q,int tag)168*0f3ebf2dSMing Lei static int stripe_queue_tgt_io(struct ublk_queue *q, int tag)
169*0f3ebf2dSMing Lei {
170*0f3ebf2dSMing Lei const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
171*0f3ebf2dSMing Lei unsigned ublk_op = ublksrv_get_op(iod);
172*0f3ebf2dSMing Lei int ret = 0;
173*0f3ebf2dSMing Lei
174*0f3ebf2dSMing Lei switch (ublk_op) {
175*0f3ebf2dSMing Lei case UBLK_IO_OP_FLUSH:
176*0f3ebf2dSMing Lei ret = handle_flush(q, iod, tag);
177*0f3ebf2dSMing Lei break;
178*0f3ebf2dSMing Lei case UBLK_IO_OP_WRITE_ZEROES:
179*0f3ebf2dSMing Lei case UBLK_IO_OP_DISCARD:
180*0f3ebf2dSMing Lei ret = -ENOTSUP;
181*0f3ebf2dSMing Lei break;
182*0f3ebf2dSMing Lei case UBLK_IO_OP_READ:
183*0f3ebf2dSMing Lei case UBLK_IO_OP_WRITE:
184*0f3ebf2dSMing Lei ret = stripe_queue_tgt_rw_io(q, iod, tag);
185*0f3ebf2dSMing Lei break;
186*0f3ebf2dSMing Lei default:
187*0f3ebf2dSMing Lei ret = -EINVAL;
188*0f3ebf2dSMing Lei break;
189*0f3ebf2dSMing Lei }
190*0f3ebf2dSMing Lei ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u ret %d\n", __func__, tag,
191*0f3ebf2dSMing Lei iod->op_flags, iod->start_sector, iod->nr_sectors << 9, ret);
192*0f3ebf2dSMing Lei return ret;
193*0f3ebf2dSMing Lei }
194*0f3ebf2dSMing Lei
ublk_stripe_queue_io(struct ublk_queue * q,int tag)195*0f3ebf2dSMing Lei static int ublk_stripe_queue_io(struct ublk_queue *q, int tag)
196*0f3ebf2dSMing Lei {
197*0f3ebf2dSMing Lei int queued = stripe_queue_tgt_io(q, tag);
198*0f3ebf2dSMing Lei
199*0f3ebf2dSMing Lei ublk_queued_tgt_io(q, tag, queued);
200*0f3ebf2dSMing Lei return 0;
201*0f3ebf2dSMing Lei }
202*0f3ebf2dSMing Lei
ublk_stripe_io_done(struct ublk_queue * q,int tag,const struct io_uring_cqe * cqe)203*0f3ebf2dSMing Lei static void ublk_stripe_io_done(struct ublk_queue *q, int tag,
204*0f3ebf2dSMing Lei const struct io_uring_cqe *cqe)
205*0f3ebf2dSMing Lei {
206*0f3ebf2dSMing Lei const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
207*0f3ebf2dSMing Lei unsigned op = user_data_to_op(cqe->user_data);
208*0f3ebf2dSMing Lei struct ublk_io *io = ublk_get_io(q, tag);
209*0f3ebf2dSMing Lei int res = cqe->res;
210*0f3ebf2dSMing Lei
211*0f3ebf2dSMing Lei if (res < 0) {
212*0f3ebf2dSMing Lei if (!io->result)
213*0f3ebf2dSMing Lei io->result = res;
214*0f3ebf2dSMing Lei ublk_err("%s: io failure %d tag %u\n", __func__, res, tag);
215*0f3ebf2dSMing Lei }
216*0f3ebf2dSMing Lei
217*0f3ebf2dSMing Lei /* fail short READ/WRITE simply */
218*0f3ebf2dSMing Lei if (op == UBLK_IO_OP_READ || op == UBLK_IO_OP_WRITE) {
219*0f3ebf2dSMing Lei unsigned seq = user_data_to_tgt_data(cqe->user_data);
220*0f3ebf2dSMing Lei struct stripe_array *s = io->private_data;
221*0f3ebf2dSMing Lei
222*0f3ebf2dSMing Lei if (res < s->s[seq].vec->iov_len)
223*0f3ebf2dSMing Lei io->result = -EIO;
224*0f3ebf2dSMing Lei }
225*0f3ebf2dSMing Lei
226*0f3ebf2dSMing Lei if (ublk_completed_tgt_io(q, tag)) {
227*0f3ebf2dSMing Lei int res = io->result;
228*0f3ebf2dSMing Lei
229*0f3ebf2dSMing Lei if (!res)
230*0f3ebf2dSMing Lei res = iod->nr_sectors << 9;
231*0f3ebf2dSMing Lei
232*0f3ebf2dSMing Lei ublk_complete_io(q, tag, res);
233*0f3ebf2dSMing Lei
234*0f3ebf2dSMing Lei free_stripe_array(io->private_data);
235*0f3ebf2dSMing Lei io->private_data = NULL;
236*0f3ebf2dSMing Lei }
237*0f3ebf2dSMing Lei }
238*0f3ebf2dSMing Lei
ublk_stripe_tgt_init(const struct dev_ctx * ctx,struct ublk_dev * dev)239*0f3ebf2dSMing Lei static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
240*0f3ebf2dSMing Lei {
241*0f3ebf2dSMing Lei struct ublk_params p = {
242*0f3ebf2dSMing Lei .types = UBLK_PARAM_TYPE_BASIC,
243*0f3ebf2dSMing Lei .basic = {
244*0f3ebf2dSMing Lei .attrs = UBLK_ATTR_VOLATILE_CACHE,
245*0f3ebf2dSMing Lei .logical_bs_shift = 9,
246*0f3ebf2dSMing Lei .physical_bs_shift = 12,
247*0f3ebf2dSMing Lei .io_opt_shift = 12,
248*0f3ebf2dSMing Lei .io_min_shift = 9,
249*0f3ebf2dSMing Lei .max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
250*0f3ebf2dSMing Lei },
251*0f3ebf2dSMing Lei };
252*0f3ebf2dSMing Lei unsigned chunk_size = ctx->chunk_size;
253*0f3ebf2dSMing Lei struct stripe_conf *conf;
254*0f3ebf2dSMing Lei unsigned chunk_shift;
255*0f3ebf2dSMing Lei loff_t bytes = 0;
256*0f3ebf2dSMing Lei int ret, i;
257*0f3ebf2dSMing Lei
258*0f3ebf2dSMing Lei if ((chunk_size & (chunk_size - 1)) || !chunk_size) {
259*0f3ebf2dSMing Lei ublk_err("invalid chunk size %u\n", chunk_size);
260*0f3ebf2dSMing Lei return -EINVAL;
261*0f3ebf2dSMing Lei }
262*0f3ebf2dSMing Lei
263*0f3ebf2dSMing Lei if (chunk_size < 4096 || chunk_size > 512 * 1024) {
264*0f3ebf2dSMing Lei ublk_err("invalid chunk size %u\n", chunk_size);
265*0f3ebf2dSMing Lei return -EINVAL;
266*0f3ebf2dSMing Lei }
267*0f3ebf2dSMing Lei
268*0f3ebf2dSMing Lei chunk_shift = ilog2(chunk_size);
269*0f3ebf2dSMing Lei
270*0f3ebf2dSMing Lei ret = backing_file_tgt_init(dev);
271*0f3ebf2dSMing Lei if (ret)
272*0f3ebf2dSMing Lei return ret;
273*0f3ebf2dSMing Lei
274*0f3ebf2dSMing Lei if (!dev->tgt.nr_backing_files || dev->tgt.nr_backing_files > NR_STRIPE)
275*0f3ebf2dSMing Lei return -EINVAL;
276*0f3ebf2dSMing Lei
277*0f3ebf2dSMing Lei assert(dev->nr_fds == dev->tgt.nr_backing_files + 1);
278*0f3ebf2dSMing Lei
279*0f3ebf2dSMing Lei for (i = 0; i < dev->tgt.nr_backing_files; i++)
280*0f3ebf2dSMing Lei dev->tgt.backing_file_size[i] &= ~((1 << chunk_shift) - 1);
281*0f3ebf2dSMing Lei
282*0f3ebf2dSMing Lei for (i = 0; i < dev->tgt.nr_backing_files; i++) {
283*0f3ebf2dSMing Lei unsigned long size = dev->tgt.backing_file_size[i];
284*0f3ebf2dSMing Lei
285*0f3ebf2dSMing Lei if (size != dev->tgt.backing_file_size[0])
286*0f3ebf2dSMing Lei return -EINVAL;
287*0f3ebf2dSMing Lei bytes += size;
288*0f3ebf2dSMing Lei }
289*0f3ebf2dSMing Lei
290*0f3ebf2dSMing Lei conf = malloc(sizeof(*conf));
291*0f3ebf2dSMing Lei conf->shift = chunk_shift;
292*0f3ebf2dSMing Lei conf->nr_files = dev->tgt.nr_backing_files;
293*0f3ebf2dSMing Lei
294*0f3ebf2dSMing Lei dev->private_data = conf;
295*0f3ebf2dSMing Lei dev->tgt.dev_size = bytes;
296*0f3ebf2dSMing Lei p.basic.dev_sectors = bytes >> 9;
297*0f3ebf2dSMing Lei dev->tgt.params = p;
298*0f3ebf2dSMing Lei dev->tgt.sq_depth = dev->dev_info.queue_depth * conf->nr_files;
299*0f3ebf2dSMing Lei dev->tgt.cq_depth = dev->dev_info.queue_depth * conf->nr_files;
300*0f3ebf2dSMing Lei
301*0f3ebf2dSMing Lei printf("%s: shift %u files %u\n", __func__, conf->shift, conf->nr_files);
302*0f3ebf2dSMing Lei
303*0f3ebf2dSMing Lei return 0;
304*0f3ebf2dSMing Lei }
305*0f3ebf2dSMing Lei
ublk_stripe_tgt_deinit(struct ublk_dev * dev)306*0f3ebf2dSMing Lei static void ublk_stripe_tgt_deinit(struct ublk_dev *dev)
307*0f3ebf2dSMing Lei {
308*0f3ebf2dSMing Lei free(dev->private_data);
309*0f3ebf2dSMing Lei backing_file_tgt_deinit(dev);
310*0f3ebf2dSMing Lei }
311*0f3ebf2dSMing Lei
312*0f3ebf2dSMing Lei const struct ublk_tgt_ops stripe_tgt_ops = {
313*0f3ebf2dSMing Lei .name = "stripe",
314*0f3ebf2dSMing Lei .init_tgt = ublk_stripe_tgt_init,
315*0f3ebf2dSMing Lei .deinit_tgt = ublk_stripe_tgt_deinit,
316*0f3ebf2dSMing Lei .queue_io = ublk_stripe_queue_io,
317*0f3ebf2dSMing Lei .tgt_io_done = ublk_stripe_io_done,
318*0f3ebf2dSMing Lei };
319