1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "kublk.h" 4 5 #define NR_STRIPE MAX_BACK_FILES 6 7 struct stripe_conf { 8 unsigned nr_files; 9 unsigned shift; 10 }; 11 12 struct stripe { 13 loff_t start; 14 unsigned nr_sects; 15 int seq; 16 17 struct iovec *vec; 18 unsigned nr_vec; 19 unsigned cap; 20 }; 21 22 struct stripe_array { 23 struct stripe s[NR_STRIPE]; 24 unsigned nr; 25 struct iovec _vec[]; 26 }; 27 28 static inline const struct stripe_conf *get_chunk_shift(const struct ublk_queue *q) 29 { 30 return (struct stripe_conf *)q->dev->private_data; 31 } 32 33 static inline unsigned calculate_nr_vec(const struct stripe_conf *conf, 34 const struct ublksrv_io_desc *iod) 35 { 36 const unsigned shift = conf->shift - 9; 37 const unsigned unit_sects = conf->nr_files << shift; 38 loff_t start = iod->start_sector; 39 loff_t end = start + iod->nr_sectors; 40 41 return (end / unit_sects) - (start / unit_sects) + 1; 42 } 43 44 static struct stripe_array *alloc_stripe_array(const struct stripe_conf *conf, 45 const struct ublksrv_io_desc *iod) 46 { 47 unsigned nr_vecs = calculate_nr_vec(conf, iod); 48 unsigned total = nr_vecs * conf->nr_files; 49 struct stripe_array *s; 50 int i; 51 52 s = malloc(sizeof(*s) + total * sizeof(struct iovec)); 53 54 s->nr = 0; 55 for (i = 0; i < conf->nr_files; i++) { 56 struct stripe *t = &s->s[i]; 57 58 t->nr_vec = 0; 59 t->vec = &s->_vec[i * nr_vecs]; 60 t->nr_sects = 0; 61 t->cap = nr_vecs; 62 } 63 64 return s; 65 } 66 67 static void free_stripe_array(struct stripe_array *s) 68 { 69 free(s); 70 } 71 72 static void calculate_stripe_array(const struct stripe_conf *conf, 73 const struct ublksrv_io_desc *iod, struct stripe_array *s, void *base) 74 { 75 const unsigned shift = conf->shift - 9; 76 const unsigned chunk_sects = 1 << shift; 77 const unsigned unit_sects = conf->nr_files << shift; 78 off64_t start = iod->start_sector; 79 off64_t end = start + iod->nr_sectors; 80 unsigned long done = 0; 81 unsigned idx = 0; 82 83 while (start < end) { 84 unsigned nr_sects = chunk_sects - (start & (chunk_sects - 1)); 85 loff_t unit_off = (start / unit_sects) * unit_sects; 86 unsigned seq = (start - unit_off) >> shift; 87 struct stripe *this = &s->s[idx]; 88 loff_t stripe_off = (unit_off / conf->nr_files) + 89 (start & (chunk_sects - 1)); 90 91 if (nr_sects > end - start) 92 nr_sects = end - start; 93 if (this->nr_sects == 0) { 94 this->nr_sects = nr_sects; 95 this->start = stripe_off; 96 this->seq = seq; 97 s->nr += 1; 98 } else { 99 ublk_assert(seq == this->seq); 100 ublk_assert(this->start + this->nr_sects == stripe_off); 101 this->nr_sects += nr_sects; 102 } 103 104 ublk_assert(this->nr_vec < this->cap); 105 this->vec[this->nr_vec].iov_base = (void *)(base + done); 106 this->vec[this->nr_vec++].iov_len = nr_sects << 9; 107 108 start += nr_sects; 109 done += nr_sects << 9; 110 idx = (idx + 1) % conf->nr_files; 111 } 112 } 113 114 static inline enum io_uring_op stripe_to_uring_op( 115 const struct ublksrv_io_desc *iod, int zc) 116 { 117 unsigned ublk_op = ublksrv_get_op(iod); 118 119 if (ublk_op == UBLK_IO_OP_READ) 120 return zc ? IORING_OP_READV_FIXED : IORING_OP_READV; 121 else if (ublk_op == UBLK_IO_OP_WRITE) 122 return zc ? IORING_OP_WRITEV_FIXED : IORING_OP_WRITEV; 123 ublk_assert(0); 124 } 125 126 static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, 127 const struct ublksrv_io_desc *iod, int tag) 128 { 129 const struct stripe_conf *conf = get_chunk_shift(q); 130 unsigned auto_zc = (ublk_queue_use_auto_zc(q) != 0); 131 unsigned zc = (ublk_queue_use_zc(q) != 0); 132 enum io_uring_op op = stripe_to_uring_op(iod, zc | auto_zc); 133 struct io_uring_sqe *sqe[NR_STRIPE]; 134 struct stripe_array *s = alloc_stripe_array(conf, iod); 135 struct ublk_io *io = ublk_get_io(q, tag); 136 int i, extra = zc ? 2 : 0; 137 void *base = io->buf_addr; 138 unsigned short buf_idx = ublk_io_buf_idx(t, q, tag); 139 140 io->private_data = s; 141 calculate_stripe_array(conf, iod, s, base); 142 143 ublk_io_alloc_sqes(t, sqe, s->nr + extra); 144 145 if (zc) { 146 io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, buf_idx); 147 sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; 148 sqe[0]->user_data = build_user_data(tag, 149 ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1); 150 } 151 152 for (i = zc; i < s->nr + extra - zc; i++) { 153 struct stripe *t = &s->s[i - zc]; 154 155 io_uring_prep_rw(op, sqe[i], 156 t->seq + 1, 157 (void *)t->vec, 158 t->nr_vec, 159 t->start << 9); 160 io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); 161 if (auto_zc || zc) { 162 sqe[i]->buf_index = buf_idx; 163 if (zc) 164 sqe[i]->flags |= IOSQE_IO_HARDLINK; 165 } 166 /* bit63 marks us as tgt io */ 167 sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, q->q_id, 1); 168 } 169 if (zc) { 170 struct io_uring_sqe *unreg = sqe[s->nr + 1]; 171 172 io_uring_prep_buf_unregister(unreg, q, tag, q->q_id, buf_idx); 173 unreg->user_data = build_user_data( 174 tag, ublk_cmd_op_nr(unreg->cmd_op), 0, q->q_id, 1); 175 } 176 177 /* register buffer is skip_success */ 178 return s->nr + zc; 179 } 180 181 static int handle_flush(struct ublk_thread *t, struct ublk_queue *q, 182 const struct ublksrv_io_desc *iod, int tag) 183 { 184 const struct stripe_conf *conf = get_chunk_shift(q); 185 struct io_uring_sqe *sqe[NR_STRIPE]; 186 int i; 187 188 ublk_io_alloc_sqes(t, sqe, conf->nr_files); 189 for (i = 0; i < conf->nr_files; i++) { 190 io_uring_prep_fsync(sqe[i], i + 1, IORING_FSYNC_DATASYNC); 191 io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); 192 sqe[i]->user_data = build_user_data(tag, UBLK_IO_OP_FLUSH, 0, q->q_id, 1); 193 } 194 return conf->nr_files; 195 } 196 197 static int stripe_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q, 198 int tag) 199 { 200 const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); 201 unsigned ublk_op = ublksrv_get_op(iod); 202 int ret = 0; 203 204 switch (ublk_op) { 205 case UBLK_IO_OP_FLUSH: 206 ret = handle_flush(t, q, iod, tag); 207 break; 208 case UBLK_IO_OP_WRITE_ZEROES: 209 case UBLK_IO_OP_DISCARD: 210 ret = -ENOTSUP; 211 break; 212 case UBLK_IO_OP_READ: 213 case UBLK_IO_OP_WRITE: 214 ret = stripe_queue_tgt_rw_io(t, q, iod, tag); 215 break; 216 default: 217 ret = -EINVAL; 218 break; 219 } 220 ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u ret %d\n", __func__, tag, 221 iod->op_flags, iod->start_sector, iod->nr_sectors << 9, ret); 222 return ret; 223 } 224 225 static int ublk_stripe_queue_io(struct ublk_thread *t, struct ublk_queue *q, 226 int tag) 227 { 228 int queued = stripe_queue_tgt_io(t, q, tag); 229 230 ublk_queued_tgt_io(t, q, tag, queued); 231 return 0; 232 } 233 234 static void ublk_stripe_io_done(struct ublk_thread *t, struct ublk_queue *q, 235 const struct io_uring_cqe *cqe) 236 { 237 unsigned tag = user_data_to_tag(cqe->user_data); 238 const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); 239 unsigned op = user_data_to_op(cqe->user_data); 240 struct ublk_io *io = ublk_get_io(q, tag); 241 int res = cqe->res; 242 243 if (res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) { 244 if (!io->result) 245 io->result = res; 246 if (res < 0) 247 ublk_err("%s: io failure %d tag %u\n", __func__, res, tag); 248 } 249 250 /* buffer register op is IOSQE_CQE_SKIP_SUCCESS */ 251 if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF)) 252 io->tgt_ios += 1; 253 254 /* fail short READ/WRITE simply */ 255 if (op == UBLK_IO_OP_READ || op == UBLK_IO_OP_WRITE) { 256 unsigned seq = user_data_to_tgt_data(cqe->user_data); 257 struct stripe_array *s = io->private_data; 258 259 if (res < s->s[seq].nr_sects << 9) { 260 io->result = -EIO; 261 ublk_err("%s: short rw op %u res %d exp %u tag %u\n", 262 __func__, op, res, s->s[seq].vec->iov_len, tag); 263 } 264 } 265 266 if (ublk_completed_tgt_io(t, q, tag)) { 267 int res = io->result; 268 269 if (!res) 270 res = iod->nr_sectors << 9; 271 272 ublk_complete_io(t, q, tag, res); 273 274 free_stripe_array(io->private_data); 275 io->private_data = NULL; 276 } 277 } 278 279 static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) 280 { 281 struct ublk_params p = { 282 .types = UBLK_PARAM_TYPE_BASIC, 283 .basic = { 284 .attrs = UBLK_ATTR_VOLATILE_CACHE, 285 .logical_bs_shift = 9, 286 .physical_bs_shift = 12, 287 .io_opt_shift = 12, 288 .io_min_shift = 9, 289 .max_sectors = dev->dev_info.max_io_buf_bytes >> 9, 290 }, 291 }; 292 unsigned chunk_size = ctx->stripe.chunk_size; 293 struct stripe_conf *conf; 294 unsigned chunk_shift; 295 loff_t bytes = 0; 296 int ret, i, mul = 1; 297 298 if (ctx->auto_zc_fallback) { 299 ublk_err("%s: not support auto_zc_fallback\n", __func__); 300 return -EINVAL; 301 } 302 if (ctx->metadata_size) { 303 ublk_err("%s: integrity not supported\n", __func__); 304 return -EINVAL; 305 } 306 307 if ((chunk_size & (chunk_size - 1)) || !chunk_size) { 308 ublk_err("invalid chunk size %u\n", chunk_size); 309 return -EINVAL; 310 } 311 312 if (chunk_size < 4096 || chunk_size > 512 * 1024) { 313 ublk_err("invalid chunk size %u\n", chunk_size); 314 return -EINVAL; 315 } 316 317 chunk_shift = ilog2(chunk_size); 318 319 ret = backing_file_tgt_init(dev, dev->tgt.nr_backing_files); 320 if (ret) 321 return ret; 322 323 if (!dev->tgt.nr_backing_files || dev->tgt.nr_backing_files > NR_STRIPE) 324 return -EINVAL; 325 326 ublk_assert(dev->nr_fds == dev->tgt.nr_backing_files + 1); 327 328 for (i = 0; i < dev->tgt.nr_backing_files; i++) 329 dev->tgt.backing_file_size[i] &= ~((1 << chunk_shift) - 1); 330 331 for (i = 0; i < dev->tgt.nr_backing_files; i++) { 332 unsigned long size = dev->tgt.backing_file_size[i]; 333 334 if (size != dev->tgt.backing_file_size[0]) 335 return -EINVAL; 336 bytes += size; 337 } 338 339 conf = malloc(sizeof(*conf)); 340 conf->shift = chunk_shift; 341 conf->nr_files = dev->tgt.nr_backing_files; 342 343 dev->private_data = conf; 344 dev->tgt.dev_size = bytes; 345 p.basic.dev_sectors = bytes >> 9; 346 dev->tgt.params = p; 347 348 if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) 349 mul = 2; 350 dev->tgt.sq_depth = mul * dev->dev_info.queue_depth * conf->nr_files; 351 dev->tgt.cq_depth = mul * dev->dev_info.queue_depth * conf->nr_files; 352 353 printf("%s: shift %u files %u\n", __func__, conf->shift, conf->nr_files); 354 355 return 0; 356 } 357 358 static void ublk_stripe_tgt_deinit(struct ublk_dev *dev) 359 { 360 free(dev->private_data); 361 backing_file_tgt_deinit(dev); 362 } 363 364 static void ublk_stripe_cmd_line(struct dev_ctx *ctx, int argc, char *argv[]) 365 { 366 static const struct option longopts[] = { 367 { "chunk_size", 1, NULL, 0 }, 368 { 0, 0, 0, 0 } 369 }; 370 int option_idx, opt; 371 372 ctx->stripe.chunk_size = 65536; 373 while ((opt = getopt_long(argc, argv, "", 374 longopts, &option_idx)) != -1) { 375 switch (opt) { 376 case 0: 377 if (!strcmp(longopts[option_idx].name, "chunk_size")) 378 ctx->stripe.chunk_size = strtol(optarg, NULL, 10); 379 } 380 } 381 } 382 383 static void ublk_stripe_usage(const struct ublk_tgt_ops *ops) 384 { 385 printf("\tstripe: [--chunk_size chunk_size (default 65536)]\n"); 386 } 387 388 const struct ublk_tgt_ops stripe_tgt_ops = { 389 .name = "stripe", 390 .init_tgt = ublk_stripe_tgt_init, 391 .deinit_tgt = ublk_stripe_tgt_deinit, 392 .queue_io = ublk_stripe_queue_io, 393 .tgt_io_done = ublk_stripe_io_done, 394 .parse_cmd_line = ublk_stripe_cmd_line, 395 .usage = ublk_stripe_usage, 396 }; 397