1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "kublk.h" 4 5 static enum io_uring_op ublk_to_uring_op(const struct ublksrv_io_desc *iod, int zc) 6 { 7 unsigned ublk_op = ublksrv_get_op(iod); 8 9 if (ublk_op == UBLK_IO_OP_READ) 10 return zc ? IORING_OP_READ_FIXED : IORING_OP_READ; 11 else if (ublk_op == UBLK_IO_OP_WRITE) 12 return zc ? IORING_OP_WRITE_FIXED : IORING_OP_WRITE; 13 ublk_assert(0); 14 } 15 16 static int loop_queue_flush_io(struct ublk_thread *t, struct ublk_queue *q, 17 const struct ublksrv_io_desc *iod, int tag) 18 { 19 unsigned ublk_op = ublksrv_get_op(iod); 20 struct io_uring_sqe *sqe[1]; 21 22 ublk_io_alloc_sqes(t, sqe, 1); 23 io_uring_prep_fsync(sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/, IORING_FSYNC_DATASYNC); 24 io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE); 25 /* bit63 marks us as tgt io */ 26 sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1); 27 return 1; 28 } 29 30 /* 31 * Shared memory zero-copy I/O: when UBLK_IO_F_SHMEM_ZC is set, the 32 * request's data lives in a registered shared memory buffer. Decode 33 * index + offset from iod->addr and use the server's mmap of that 34 * buffer as the I/O buffer for the backing file. 35 */ 36 static int loop_queue_shmem_zc_io(struct ublk_thread *t, struct ublk_queue *q, 37 const struct ublksrv_io_desc *iod, int tag) 38 { 39 unsigned ublk_op = ublksrv_get_op(iod); 40 enum io_uring_op op = ublk_to_uring_op(iod, 0); 41 __u64 file_offset = iod->start_sector << 9; 42 __u32 len = iod->nr_sectors << 9; 43 __u32 shmem_idx = ublk_shmem_zc_index(iod->addr); 44 __u32 shmem_off = ublk_shmem_zc_offset(iod->addr); 45 struct io_uring_sqe *sqe[1]; 46 void *addr; 47 48 if (shmem_idx >= UBLK_BUF_MAX || !shmem_table[shmem_idx].mmap_base) 49 return -EINVAL; 50 51 addr = shmem_table[shmem_idx].mmap_base + shmem_off; 52 53 ublk_io_alloc_sqes(t, sqe, 1); 54 if (!sqe[0]) 55 return -ENOMEM; 56 57 io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 1), 58 addr, len, file_offset); 59 io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE); 60 sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1); 61 return 1; 62 } 63 64 static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, 65 const struct ublksrv_io_desc *iod, int tag) 66 { 67 unsigned ublk_op = ublksrv_get_op(iod); 68 unsigned zc = ublk_queue_use_zc(q); 69 unsigned auto_zc = ublk_queue_use_auto_zc(q); 70 enum io_uring_op op = ublk_to_uring_op(iod, zc | auto_zc); 71 struct ublk_io *io = ublk_get_io(q, tag); 72 __u64 offset = iod->start_sector << 9; 73 __u32 len = iod->nr_sectors << 9; 74 struct io_uring_sqe *sqe[3]; 75 void *addr = io->buf_addr; 76 unsigned short buf_index = ublk_io_buf_idx(t, q, tag); 77 78 /* shared memory zero-copy path */ 79 if (iod->op_flags & UBLK_IO_F_SHMEM_ZC) 80 return loop_queue_shmem_zc_io(t, q, iod, tag); 81 82 if (iod->op_flags & UBLK_IO_F_INTEGRITY) { 83 ublk_io_alloc_sqes(t, sqe, 1); 84 /* Use second backing file for integrity data */ 85 io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 2), 86 io->integrity_buf, 87 ublk_integrity_len(q, len), 88 ublk_integrity_len(q, offset)); 89 sqe[0]->flags = IOSQE_FIXED_FILE; 90 /* tgt_data = 1 indicates integrity I/O */ 91 sqe[0]->user_data = build_user_data(tag, ublk_op, 1, q->q_id, 1); 92 } 93 94 if (!zc || auto_zc) { 95 ublk_io_alloc_sqes(t, sqe, 1); 96 if (!sqe[0]) 97 return -ENOMEM; 98 99 io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/, 100 addr, 101 len, 102 offset); 103 if (auto_zc) 104 sqe[0]->buf_index = buf_index; 105 io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE); 106 /* bit63 marks us as tgt io */ 107 sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1); 108 return !!(iod->op_flags & UBLK_IO_F_INTEGRITY) + 1; 109 } 110 111 ublk_io_alloc_sqes(t, sqe, 3); 112 113 io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, buf_index); 114 sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; 115 sqe[0]->user_data = build_user_data(tag, 116 ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1); 117 118 io_uring_prep_rw(op, sqe[1], ublk_get_registered_fd(q, 1) /*fds[1]*/, 0, 119 len, 120 offset); 121 sqe[1]->buf_index = buf_index; 122 sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK; 123 sqe[1]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1); 124 125 io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, buf_index); 126 sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1); 127 128 return !!(iod->op_flags & UBLK_IO_F_INTEGRITY) + 2; 129 } 130 131 static int loop_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q, int tag) 132 { 133 const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); 134 unsigned ublk_op = ublksrv_get_op(iod); 135 int ret; 136 137 switch (ublk_op) { 138 case UBLK_IO_OP_FLUSH: 139 ret = loop_queue_flush_io(t, q, iod, tag); 140 break; 141 case UBLK_IO_OP_WRITE_ZEROES: 142 case UBLK_IO_OP_DISCARD: 143 ret = -ENOTSUP; 144 break; 145 case UBLK_IO_OP_READ: 146 case UBLK_IO_OP_WRITE: 147 ret = loop_queue_tgt_rw_io(t, q, iod, tag); 148 break; 149 default: 150 ret = -EINVAL; 151 break; 152 } 153 154 ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag, 155 iod->op_flags, iod->start_sector, iod->nr_sectors << 9); 156 return ret; 157 } 158 159 static int ublk_loop_queue_io(struct ublk_thread *t, struct ublk_queue *q, 160 int tag) 161 { 162 int queued = loop_queue_tgt_io(t, q, tag); 163 164 ublk_queued_tgt_io(t, q, tag, queued); 165 return 0; 166 } 167 168 static void ublk_loop_io_done(struct ublk_thread *t, struct ublk_queue *q, 169 const struct io_uring_cqe *cqe) 170 { 171 unsigned tag = user_data_to_tag(cqe->user_data); 172 unsigned op = user_data_to_op(cqe->user_data); 173 struct ublk_io *io = ublk_get_io(q, tag); 174 175 if (cqe->res < 0) { 176 io->result = cqe->res; 177 ublk_err("%s: io failed op %x user_data %lx\n", 178 __func__, op, cqe->user_data); 179 } else if (op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) { 180 __s32 data_len = user_data_to_tgt_data(cqe->user_data) 181 ? ublk_integrity_data_len(q, cqe->res) 182 : cqe->res; 183 184 if (!io->result || data_len < io->result) 185 io->result = data_len; 186 } 187 188 /* buffer register op is IOSQE_CQE_SKIP_SUCCESS */ 189 if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF)) 190 io->tgt_ios += 1; 191 192 if (ublk_completed_tgt_io(t, q, tag)) 193 ublk_complete_io(t, q, tag, io->result); 194 } 195 196 static int ublk_loop_memset_file(int fd, __u8 byte, size_t len) 197 { 198 off_t offset = 0; 199 __u8 buf[4096]; 200 201 memset(buf, byte, sizeof(buf)); 202 while (len) { 203 int ret = pwrite(fd, buf, min(len, sizeof(buf)), offset); 204 205 if (ret < 0) 206 return -errno; 207 if (!ret) 208 return -EIO; 209 210 len -= ret; 211 offset += ret; 212 } 213 return 0; 214 } 215 216 static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) 217 { 218 unsigned long long bytes; 219 unsigned long blocks; 220 int ret; 221 struct ublk_params p = { 222 .types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DMA_ALIGN, 223 .basic = { 224 .attrs = UBLK_ATTR_VOLATILE_CACHE, 225 .logical_bs_shift = 9, 226 .physical_bs_shift = 12, 227 .io_opt_shift = 12, 228 .io_min_shift = 9, 229 .max_sectors = dev->dev_info.max_io_buf_bytes >> 9, 230 }, 231 .dma = { 232 .alignment = 511, 233 }, 234 }; 235 236 ublk_set_integrity_params(ctx, &p); 237 if (ctx->auto_zc_fallback) { 238 ublk_err("%s: not support auto_zc_fallback\n", __func__); 239 return -EINVAL; 240 } 241 242 /* Use O_DIRECT only for data file */ 243 ret = backing_file_tgt_init(dev, 1); 244 if (ret) 245 return ret; 246 247 /* Expect a second file for integrity data */ 248 if (dev->tgt.nr_backing_files != 1 + !!ctx->metadata_size) 249 return -EINVAL; 250 251 blocks = dev->tgt.backing_file_size[0] >> p.basic.logical_bs_shift; 252 if (ctx->metadata_size) { 253 unsigned long metadata_blocks = 254 dev->tgt.backing_file_size[1] / ctx->metadata_size; 255 unsigned long integrity_len; 256 257 /* Ensure both data and integrity data fit in backing files */ 258 blocks = min(blocks, metadata_blocks); 259 integrity_len = blocks * ctx->metadata_size; 260 /* 261 * Initialize PI app tag and ref tag to 0xFF 262 * to disable bio-integrity-auto checks 263 */ 264 ret = ublk_loop_memset_file(dev->fds[2], 0xFF, integrity_len); 265 if (ret) 266 return ret; 267 } 268 bytes = blocks << p.basic.logical_bs_shift; 269 dev->tgt.dev_size = bytes; 270 p.basic.dev_sectors = bytes >> 9; 271 dev->tgt.params = p; 272 273 return 0; 274 } 275 276 const struct ublk_tgt_ops loop_tgt_ops = { 277 .name = "loop", 278 .init_tgt = ublk_loop_tgt_init, 279 .deinit_tgt = backing_file_tgt_deinit, 280 .queue_io = ublk_loop_queue_io, 281 .tgt_io_done = ublk_loop_io_done, 282 }; 283