1*0f3ebf2dSMing Lei // SPDX-License-Identifier: GPL-2.0 2*0f3ebf2dSMing Lei 3*0f3ebf2dSMing Lei #include "kublk.h" 4*0f3ebf2dSMing Lei 5*0f3ebf2dSMing Lei #define NR_STRIPE MAX_BACK_FILES 6*0f3ebf2dSMing Lei 7*0f3ebf2dSMing Lei struct stripe_conf { 8*0f3ebf2dSMing Lei unsigned nr_files; 9*0f3ebf2dSMing Lei unsigned shift; 10*0f3ebf2dSMing Lei }; 11*0f3ebf2dSMing Lei 12*0f3ebf2dSMing Lei struct stripe { 13*0f3ebf2dSMing Lei loff_t start; 14*0f3ebf2dSMing Lei unsigned nr_sects; 15*0f3ebf2dSMing Lei int seq; 16*0f3ebf2dSMing Lei 17*0f3ebf2dSMing Lei struct iovec *vec; 18*0f3ebf2dSMing Lei unsigned nr_vec; 19*0f3ebf2dSMing Lei unsigned cap; 20*0f3ebf2dSMing Lei }; 21*0f3ebf2dSMing Lei 22*0f3ebf2dSMing Lei struct stripe_array { 23*0f3ebf2dSMing Lei struct stripe s[NR_STRIPE]; 24*0f3ebf2dSMing Lei unsigned nr; 25*0f3ebf2dSMing Lei struct iovec _vec[]; 26*0f3ebf2dSMing Lei }; 27*0f3ebf2dSMing Lei 28*0f3ebf2dSMing Lei static inline const struct stripe_conf *get_chunk_shift(const struct ublk_queue *q) 29*0f3ebf2dSMing Lei { 30*0f3ebf2dSMing Lei return (struct stripe_conf *)q->dev->private_data; 31*0f3ebf2dSMing Lei } 32*0f3ebf2dSMing Lei 33*0f3ebf2dSMing Lei static inline unsigned calculate_nr_vec(const struct stripe_conf *conf, 34*0f3ebf2dSMing Lei const struct ublksrv_io_desc *iod) 35*0f3ebf2dSMing Lei { 36*0f3ebf2dSMing Lei const unsigned shift = conf->shift - 9; 37*0f3ebf2dSMing Lei const unsigned unit_sects = conf->nr_files << shift; 38*0f3ebf2dSMing Lei loff_t start = iod->start_sector; 39*0f3ebf2dSMing Lei loff_t end = start + iod->nr_sectors; 40*0f3ebf2dSMing Lei 41*0f3ebf2dSMing Lei return (end / unit_sects) - (start / unit_sects) + 1; 42*0f3ebf2dSMing Lei } 43*0f3ebf2dSMing Lei 44*0f3ebf2dSMing Lei static struct stripe_array *alloc_stripe_array(const struct stripe_conf *conf, 45*0f3ebf2dSMing Lei const struct ublksrv_io_desc *iod) 46*0f3ebf2dSMing Lei { 47*0f3ebf2dSMing Lei unsigned nr_vecs = calculate_nr_vec(conf, iod); 48*0f3ebf2dSMing Lei unsigned total = nr_vecs * conf->nr_files; 49*0f3ebf2dSMing Lei struct stripe_array *s; 50*0f3ebf2dSMing Lei int i; 51*0f3ebf2dSMing Lei 52*0f3ebf2dSMing Lei s = malloc(sizeof(*s) + total * sizeof(struct iovec)); 53*0f3ebf2dSMing Lei 54*0f3ebf2dSMing Lei s->nr = 0; 55*0f3ebf2dSMing Lei for (i = 0; i < conf->nr_files; i++) { 56*0f3ebf2dSMing Lei struct stripe *t = &s->s[i]; 57*0f3ebf2dSMing Lei 58*0f3ebf2dSMing Lei t->nr_vec = 0; 59*0f3ebf2dSMing Lei t->vec = &s->_vec[i * nr_vecs]; 60*0f3ebf2dSMing Lei t->nr_sects = 0; 61*0f3ebf2dSMing Lei t->cap = nr_vecs; 62*0f3ebf2dSMing Lei } 63*0f3ebf2dSMing Lei 64*0f3ebf2dSMing Lei return s; 65*0f3ebf2dSMing Lei } 66*0f3ebf2dSMing Lei 67*0f3ebf2dSMing Lei static void free_stripe_array(struct stripe_array *s) 68*0f3ebf2dSMing Lei { 69*0f3ebf2dSMing Lei free(s); 70*0f3ebf2dSMing Lei } 71*0f3ebf2dSMing Lei 72*0f3ebf2dSMing Lei static void calculate_stripe_array(const struct stripe_conf *conf, 73*0f3ebf2dSMing Lei const struct ublksrv_io_desc *iod, struct stripe_array *s) 74*0f3ebf2dSMing Lei { 75*0f3ebf2dSMing Lei const unsigned shift = conf->shift - 9; 76*0f3ebf2dSMing Lei const unsigned chunk_sects = 1 << shift; 77*0f3ebf2dSMing Lei const unsigned unit_sects = conf->nr_files << shift; 78*0f3ebf2dSMing Lei off64_t start = iod->start_sector; 79*0f3ebf2dSMing Lei off64_t end = start + iod->nr_sectors; 80*0f3ebf2dSMing Lei unsigned long done = 0; 81*0f3ebf2dSMing Lei unsigned idx = 0; 82*0f3ebf2dSMing Lei 83*0f3ebf2dSMing Lei while (start < end) { 84*0f3ebf2dSMing Lei unsigned nr_sects = chunk_sects - (start & (chunk_sects - 1)); 85*0f3ebf2dSMing Lei loff_t unit_off = (start / unit_sects) * unit_sects; 86*0f3ebf2dSMing Lei unsigned seq = (start - unit_off) >> shift; 87*0f3ebf2dSMing Lei struct stripe *this = &s->s[idx]; 88*0f3ebf2dSMing Lei loff_t stripe_off = (unit_off / conf->nr_files) + 89*0f3ebf2dSMing Lei (start & (chunk_sects - 1)); 90*0f3ebf2dSMing Lei 91*0f3ebf2dSMing Lei if (nr_sects > end - start) 92*0f3ebf2dSMing Lei nr_sects = end - start; 93*0f3ebf2dSMing Lei if (this->nr_sects == 0) { 94*0f3ebf2dSMing Lei this->nr_sects = nr_sects; 95*0f3ebf2dSMing Lei this->start = stripe_off; 96*0f3ebf2dSMing Lei this->seq = seq; 97*0f3ebf2dSMing Lei s->nr += 1; 98*0f3ebf2dSMing Lei } else { 99*0f3ebf2dSMing Lei assert(seq == this->seq); 100*0f3ebf2dSMing Lei assert(this->start + this->nr_sects == stripe_off); 101*0f3ebf2dSMing Lei this->nr_sects += nr_sects; 102*0f3ebf2dSMing Lei } 103*0f3ebf2dSMing Lei 104*0f3ebf2dSMing Lei assert(this->nr_vec < this->cap); 105*0f3ebf2dSMing Lei this->vec[this->nr_vec].iov_base = (void *)(iod->addr + done); 106*0f3ebf2dSMing Lei this->vec[this->nr_vec++].iov_len = nr_sects << 9; 107*0f3ebf2dSMing Lei 108*0f3ebf2dSMing Lei start += nr_sects; 109*0f3ebf2dSMing Lei done += nr_sects << 9; 110*0f3ebf2dSMing Lei idx = (idx + 1) % conf->nr_files; 111*0f3ebf2dSMing Lei } 112*0f3ebf2dSMing Lei } 113*0f3ebf2dSMing Lei 114*0f3ebf2dSMing Lei static inline enum io_uring_op stripe_to_uring_op(const struct ublksrv_io_desc *iod) 115*0f3ebf2dSMing Lei { 116*0f3ebf2dSMing Lei unsigned ublk_op = ublksrv_get_op(iod); 117*0f3ebf2dSMing Lei 118*0f3ebf2dSMing Lei if (ublk_op == UBLK_IO_OP_READ) 119*0f3ebf2dSMing Lei return IORING_OP_READV; 120*0f3ebf2dSMing Lei else if (ublk_op == UBLK_IO_OP_WRITE) 121*0f3ebf2dSMing Lei return IORING_OP_WRITEV; 122*0f3ebf2dSMing Lei assert(0); 123*0f3ebf2dSMing Lei } 124*0f3ebf2dSMing Lei 125*0f3ebf2dSMing Lei static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) 126*0f3ebf2dSMing Lei { 127*0f3ebf2dSMing Lei const struct stripe_conf *conf = get_chunk_shift(q); 128*0f3ebf2dSMing Lei enum io_uring_op op = stripe_to_uring_op(iod); 129*0f3ebf2dSMing Lei struct io_uring_sqe *sqe[NR_STRIPE]; 130*0f3ebf2dSMing Lei struct stripe_array *s = alloc_stripe_array(conf, iod); 131*0f3ebf2dSMing Lei struct ublk_io *io = ublk_get_io(q, tag); 132*0f3ebf2dSMing Lei int i; 133*0f3ebf2dSMing Lei 134*0f3ebf2dSMing Lei io->private_data = s; 135*0f3ebf2dSMing Lei calculate_stripe_array(conf, iod, s); 136*0f3ebf2dSMing Lei 137*0f3ebf2dSMing Lei ublk_queue_alloc_sqes(q, sqe, s->nr); 138*0f3ebf2dSMing Lei for (i = 0; i < s->nr; i++) { 139*0f3ebf2dSMing Lei struct stripe *t = &s->s[i]; 140*0f3ebf2dSMing Lei 141*0f3ebf2dSMing Lei io_uring_prep_rw(op, sqe[i], 142*0f3ebf2dSMing Lei t->seq + 1, 143*0f3ebf2dSMing Lei (void *)t->vec, 144*0f3ebf2dSMing Lei t->nr_vec, 145*0f3ebf2dSMing Lei t->start << 9); 146*0f3ebf2dSMing Lei io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); 147*0f3ebf2dSMing Lei /* bit63 marks us as tgt io */ 148*0f3ebf2dSMing Lei sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i, 1); 149*0f3ebf2dSMing Lei } 150*0f3ebf2dSMing Lei return s->nr; 151*0f3ebf2dSMing Lei } 152*0f3ebf2dSMing Lei 153*0f3ebf2dSMing Lei static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) 154*0f3ebf2dSMing Lei { 155*0f3ebf2dSMing Lei const struct stripe_conf *conf = get_chunk_shift(q); 156*0f3ebf2dSMing Lei struct io_uring_sqe *sqe[NR_STRIPE]; 157*0f3ebf2dSMing Lei int i; 158*0f3ebf2dSMing Lei 159*0f3ebf2dSMing Lei ublk_queue_alloc_sqes(q, sqe, conf->nr_files); 160*0f3ebf2dSMing Lei for (i = 0; i < conf->nr_files; i++) { 161*0f3ebf2dSMing Lei io_uring_prep_fsync(sqe[i], i + 1, IORING_FSYNC_DATASYNC); 162*0f3ebf2dSMing Lei io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); 163*0f3ebf2dSMing Lei sqe[i]->user_data = build_user_data(tag, UBLK_IO_OP_FLUSH, 0, 1); 164*0f3ebf2dSMing Lei } 165*0f3ebf2dSMing Lei return conf->nr_files; 166*0f3ebf2dSMing Lei } 167*0f3ebf2dSMing Lei 168*0f3ebf2dSMing Lei static int stripe_queue_tgt_io(struct ublk_queue *q, int tag) 169*0f3ebf2dSMing Lei { 170*0f3ebf2dSMing Lei const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); 171*0f3ebf2dSMing Lei unsigned ublk_op = ublksrv_get_op(iod); 172*0f3ebf2dSMing Lei int ret = 0; 173*0f3ebf2dSMing Lei 174*0f3ebf2dSMing Lei switch (ublk_op) { 175*0f3ebf2dSMing Lei case UBLK_IO_OP_FLUSH: 176*0f3ebf2dSMing Lei ret = handle_flush(q, iod, tag); 177*0f3ebf2dSMing Lei break; 178*0f3ebf2dSMing Lei case UBLK_IO_OP_WRITE_ZEROES: 179*0f3ebf2dSMing Lei case UBLK_IO_OP_DISCARD: 180*0f3ebf2dSMing Lei ret = -ENOTSUP; 181*0f3ebf2dSMing Lei break; 182*0f3ebf2dSMing Lei case UBLK_IO_OP_READ: 183*0f3ebf2dSMing Lei case UBLK_IO_OP_WRITE: 184*0f3ebf2dSMing Lei ret = stripe_queue_tgt_rw_io(q, iod, tag); 185*0f3ebf2dSMing Lei break; 186*0f3ebf2dSMing Lei default: 187*0f3ebf2dSMing Lei ret = -EINVAL; 188*0f3ebf2dSMing Lei break; 189*0f3ebf2dSMing Lei } 190*0f3ebf2dSMing Lei ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u ret %d\n", __func__, tag, 191*0f3ebf2dSMing Lei iod->op_flags, iod->start_sector, iod->nr_sectors << 9, ret); 192*0f3ebf2dSMing Lei return ret; 193*0f3ebf2dSMing Lei } 194*0f3ebf2dSMing Lei 195*0f3ebf2dSMing Lei static int ublk_stripe_queue_io(struct ublk_queue *q, int tag) 196*0f3ebf2dSMing Lei { 197*0f3ebf2dSMing Lei int queued = stripe_queue_tgt_io(q, tag); 198*0f3ebf2dSMing Lei 199*0f3ebf2dSMing Lei ublk_queued_tgt_io(q, tag, queued); 200*0f3ebf2dSMing Lei return 0; 201*0f3ebf2dSMing Lei } 202*0f3ebf2dSMing Lei 203*0f3ebf2dSMing Lei static void ublk_stripe_io_done(struct ublk_queue *q, int tag, 204*0f3ebf2dSMing Lei const struct io_uring_cqe *cqe) 205*0f3ebf2dSMing Lei { 206*0f3ebf2dSMing Lei const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); 207*0f3ebf2dSMing Lei unsigned op = user_data_to_op(cqe->user_data); 208*0f3ebf2dSMing Lei struct ublk_io *io = ublk_get_io(q, tag); 209*0f3ebf2dSMing Lei int res = cqe->res; 210*0f3ebf2dSMing Lei 211*0f3ebf2dSMing Lei if (res < 0) { 212*0f3ebf2dSMing Lei if (!io->result) 213*0f3ebf2dSMing Lei io->result = res; 214*0f3ebf2dSMing Lei ublk_err("%s: io failure %d tag %u\n", __func__, res, tag); 215*0f3ebf2dSMing Lei } 216*0f3ebf2dSMing Lei 217*0f3ebf2dSMing Lei /* fail short READ/WRITE simply */ 218*0f3ebf2dSMing Lei if (op == UBLK_IO_OP_READ || op == UBLK_IO_OP_WRITE) { 219*0f3ebf2dSMing Lei unsigned seq = user_data_to_tgt_data(cqe->user_data); 220*0f3ebf2dSMing Lei struct stripe_array *s = io->private_data; 221*0f3ebf2dSMing Lei 222*0f3ebf2dSMing Lei if (res < s->s[seq].vec->iov_len) 223*0f3ebf2dSMing Lei io->result = -EIO; 224*0f3ebf2dSMing Lei } 225*0f3ebf2dSMing Lei 226*0f3ebf2dSMing Lei if (ublk_completed_tgt_io(q, tag)) { 227*0f3ebf2dSMing Lei int res = io->result; 228*0f3ebf2dSMing Lei 229*0f3ebf2dSMing Lei if (!res) 230*0f3ebf2dSMing Lei res = iod->nr_sectors << 9; 231*0f3ebf2dSMing Lei 232*0f3ebf2dSMing Lei ublk_complete_io(q, tag, res); 233*0f3ebf2dSMing Lei 234*0f3ebf2dSMing Lei free_stripe_array(io->private_data); 235*0f3ebf2dSMing Lei io->private_data = NULL; 236*0f3ebf2dSMing Lei } 237*0f3ebf2dSMing Lei } 238*0f3ebf2dSMing Lei 239*0f3ebf2dSMing Lei static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) 240*0f3ebf2dSMing Lei { 241*0f3ebf2dSMing Lei struct ublk_params p = { 242*0f3ebf2dSMing Lei .types = UBLK_PARAM_TYPE_BASIC, 243*0f3ebf2dSMing Lei .basic = { 244*0f3ebf2dSMing Lei .attrs = UBLK_ATTR_VOLATILE_CACHE, 245*0f3ebf2dSMing Lei .logical_bs_shift = 9, 246*0f3ebf2dSMing Lei .physical_bs_shift = 12, 247*0f3ebf2dSMing Lei .io_opt_shift = 12, 248*0f3ebf2dSMing Lei .io_min_shift = 9, 249*0f3ebf2dSMing Lei .max_sectors = dev->dev_info.max_io_buf_bytes >> 9, 250*0f3ebf2dSMing Lei }, 251*0f3ebf2dSMing Lei }; 252*0f3ebf2dSMing Lei unsigned chunk_size = ctx->chunk_size; 253*0f3ebf2dSMing Lei struct stripe_conf *conf; 254*0f3ebf2dSMing Lei unsigned chunk_shift; 255*0f3ebf2dSMing Lei loff_t bytes = 0; 256*0f3ebf2dSMing Lei int ret, i; 257*0f3ebf2dSMing Lei 258*0f3ebf2dSMing Lei if ((chunk_size & (chunk_size - 1)) || !chunk_size) { 259*0f3ebf2dSMing Lei ublk_err("invalid chunk size %u\n", chunk_size); 260*0f3ebf2dSMing Lei return -EINVAL; 261*0f3ebf2dSMing Lei } 262*0f3ebf2dSMing Lei 263*0f3ebf2dSMing Lei if (chunk_size < 4096 || chunk_size > 512 * 1024) { 264*0f3ebf2dSMing Lei ublk_err("invalid chunk size %u\n", chunk_size); 265*0f3ebf2dSMing Lei return -EINVAL; 266*0f3ebf2dSMing Lei } 267*0f3ebf2dSMing Lei 268*0f3ebf2dSMing Lei chunk_shift = ilog2(chunk_size); 269*0f3ebf2dSMing Lei 270*0f3ebf2dSMing Lei ret = backing_file_tgt_init(dev); 271*0f3ebf2dSMing Lei if (ret) 272*0f3ebf2dSMing Lei return ret; 273*0f3ebf2dSMing Lei 274*0f3ebf2dSMing Lei if (!dev->tgt.nr_backing_files || dev->tgt.nr_backing_files > NR_STRIPE) 275*0f3ebf2dSMing Lei return -EINVAL; 276*0f3ebf2dSMing Lei 277*0f3ebf2dSMing Lei assert(dev->nr_fds == dev->tgt.nr_backing_files + 1); 278*0f3ebf2dSMing Lei 279*0f3ebf2dSMing Lei for (i = 0; i < dev->tgt.nr_backing_files; i++) 280*0f3ebf2dSMing Lei dev->tgt.backing_file_size[i] &= ~((1 << chunk_shift) - 1); 281*0f3ebf2dSMing Lei 282*0f3ebf2dSMing Lei for (i = 0; i < dev->tgt.nr_backing_files; i++) { 283*0f3ebf2dSMing Lei unsigned long size = dev->tgt.backing_file_size[i]; 284*0f3ebf2dSMing Lei 285*0f3ebf2dSMing Lei if (size != dev->tgt.backing_file_size[0]) 286*0f3ebf2dSMing Lei return -EINVAL; 287*0f3ebf2dSMing Lei bytes += size; 288*0f3ebf2dSMing Lei } 289*0f3ebf2dSMing Lei 290*0f3ebf2dSMing Lei conf = malloc(sizeof(*conf)); 291*0f3ebf2dSMing Lei conf->shift = chunk_shift; 292*0f3ebf2dSMing Lei conf->nr_files = dev->tgt.nr_backing_files; 293*0f3ebf2dSMing Lei 294*0f3ebf2dSMing Lei dev->private_data = conf; 295*0f3ebf2dSMing Lei dev->tgt.dev_size = bytes; 296*0f3ebf2dSMing Lei p.basic.dev_sectors = bytes >> 9; 297*0f3ebf2dSMing Lei dev->tgt.params = p; 298*0f3ebf2dSMing Lei dev->tgt.sq_depth = dev->dev_info.queue_depth * conf->nr_files; 299*0f3ebf2dSMing Lei dev->tgt.cq_depth = dev->dev_info.queue_depth * conf->nr_files; 300*0f3ebf2dSMing Lei 301*0f3ebf2dSMing Lei printf("%s: shift %u files %u\n", __func__, conf->shift, conf->nr_files); 302*0f3ebf2dSMing Lei 303*0f3ebf2dSMing Lei return 0; 304*0f3ebf2dSMing Lei } 305*0f3ebf2dSMing Lei 306*0f3ebf2dSMing Lei static void ublk_stripe_tgt_deinit(struct ublk_dev *dev) 307*0f3ebf2dSMing Lei { 308*0f3ebf2dSMing Lei free(dev->private_data); 309*0f3ebf2dSMing Lei backing_file_tgt_deinit(dev); 310*0f3ebf2dSMing Lei } 311*0f3ebf2dSMing Lei 312*0f3ebf2dSMing Lei const struct ublk_tgt_ops stripe_tgt_ops = { 313*0f3ebf2dSMing Lei .name = "stripe", 314*0f3ebf2dSMing Lei .init_tgt = ublk_stripe_tgt_init, 315*0f3ebf2dSMing Lei .deinit_tgt = ublk_stripe_tgt_deinit, 316*0f3ebf2dSMing Lei .queue_io = ublk_stripe_queue_io, 317*0f3ebf2dSMing Lei .tgt_io_done = ublk_stripe_io_done, 318*0f3ebf2dSMing Lei }; 319