1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VDPA simulator for block device. 4 * 5 * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 6 * Copyright (c) 2021, Red Hat Inc. All rights reserved. 7 * 8 */ 9 10 #include <linux/init.h> 11 #include <linux/module.h> 12 #include <linux/device.h> 13 #include <linux/kernel.h> 14 #include <linux/blkdev.h> 15 #include <linux/vringh.h> 16 #include <linux/vdpa.h> 17 #include <uapi/linux/virtio_blk.h> 18 19 #include "vdpa_sim.h" 20 21 #define DRV_VERSION "0.1" 22 #define DRV_AUTHOR "Max Gurtovoy <mgurtovoy@nvidia.com>" 23 #define DRV_DESC "vDPA Device Simulator for block device" 24 #define DRV_LICENSE "GPL v2" 25 26 #define VDPASIM_BLK_FEATURES (VDPASIM_FEATURES | \ 27 (1ULL << VIRTIO_BLK_F_FLUSH) | \ 28 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \ 29 (1ULL << VIRTIO_BLK_F_SEG_MAX) | \ 30 (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \ 31 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \ 32 (1ULL << VIRTIO_BLK_F_MQ) | \ 33 (1ULL << VIRTIO_BLK_F_DISCARD) | \ 34 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES)) 35 36 #define VDPASIM_BLK_CAPACITY 0x40000 37 #define VDPASIM_BLK_SIZE_MAX 0x1000 38 #define VDPASIM_BLK_SEG_MAX 32 39 #define VDPASIM_BLK_DWZ_MAX_SECTORS UINT_MAX 40 41 /* 1 virtqueue, 1 address space, 1 virtqueue group */ 42 #define VDPASIM_BLK_VQ_NUM 1 43 #define VDPASIM_BLK_AS_NUM 1 44 #define VDPASIM_BLK_GROUP_NUM 1 45 46 static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim"; 47 48 static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector, 49 u64 num_sectors, u64 max_sectors) 50 { 51 if (start_sector > VDPASIM_BLK_CAPACITY) { 52 dev_dbg(&vdpasim->vdpa.dev, 53 "starting sector exceeds the capacity - start: 0x%llx capacity: 0x%x\n", 54 start_sector, VDPASIM_BLK_CAPACITY); 55 } 56 57 if (num_sectors > max_sectors) { 58 dev_dbg(&vdpasim->vdpa.dev, 59 "number of sectors exceeds the max allowed in a request - num: 0x%llx max: 0x%llx\n", 60 num_sectors, max_sectors); 61 return false; 62 } 63 64 if (num_sectors > VDPASIM_BLK_CAPACITY - start_sector) { 65 dev_dbg(&vdpasim->vdpa.dev, 66 "request exceeds the capacity - start: 0x%llx num: 0x%llx capacity: 0x%x\n", 67 start_sector, num_sectors, VDPASIM_BLK_CAPACITY); 68 return false; 69 } 70 71 return true; 72 } 73 74 /* Returns 'true' if the request is handled (with or without an I/O error) 75 * and the status is correctly written in the last byte of the 'in iov', 76 * 'false' otherwise. 77 */ 78 static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim, 79 struct vdpasim_virtqueue *vq) 80 { 81 size_t pushed = 0, to_pull, to_push; 82 struct virtio_blk_outhdr hdr; 83 bool handled = false; 84 ssize_t bytes; 85 loff_t offset; 86 u64 sector; 87 u8 status; 88 u32 type; 89 int ret; 90 91 ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov, 92 &vq->head, GFP_ATOMIC); 93 if (ret != 1) 94 return false; 95 96 if (vq->out_iov.used < 1 || vq->in_iov.used < 1) { 97 dev_dbg(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n", 98 vq->out_iov.used, vq->in_iov.used); 99 goto err; 100 } 101 102 if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) { 103 dev_dbg(&vdpasim->vdpa.dev, "request in header too short\n"); 104 goto err; 105 } 106 107 /* The last byte is the status and we checked if the last iov has 108 * enough room for it. 109 */ 110 to_push = vringh_kiov_length(&vq->in_iov) - 1; 111 112 to_pull = vringh_kiov_length(&vq->out_iov); 113 114 bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr, 115 sizeof(hdr)); 116 if (bytes != sizeof(hdr)) { 117 dev_dbg(&vdpasim->vdpa.dev, "request out header too short\n"); 118 goto err; 119 } 120 121 to_pull -= bytes; 122 123 type = vdpasim32_to_cpu(vdpasim, hdr.type); 124 sector = vdpasim64_to_cpu(vdpasim, hdr.sector); 125 offset = sector << SECTOR_SHIFT; 126 status = VIRTIO_BLK_S_OK; 127 128 if (type != VIRTIO_BLK_T_IN && type != VIRTIO_BLK_T_OUT && 129 sector != 0) { 130 dev_dbg(&vdpasim->vdpa.dev, 131 "sector must be 0 for %u request - sector: 0x%llx\n", 132 type, sector); 133 status = VIRTIO_BLK_S_IOERR; 134 goto err_status; 135 } 136 137 switch (type) { 138 case VIRTIO_BLK_T_IN: 139 if (!vdpasim_blk_check_range(vdpasim, sector, 140 to_push >> SECTOR_SHIFT, 141 VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) { 142 status = VIRTIO_BLK_S_IOERR; 143 break; 144 } 145 146 bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, 147 vdpasim->buffer + offset, 148 to_push); 149 if (bytes < 0) { 150 dev_dbg(&vdpasim->vdpa.dev, 151 "vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", 152 bytes, offset, to_push); 153 status = VIRTIO_BLK_S_IOERR; 154 break; 155 } 156 157 pushed += bytes; 158 break; 159 160 case VIRTIO_BLK_T_OUT: 161 if (!vdpasim_blk_check_range(vdpasim, sector, 162 to_pull >> SECTOR_SHIFT, 163 VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) { 164 status = VIRTIO_BLK_S_IOERR; 165 break; 166 } 167 168 bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, 169 vdpasim->buffer + offset, 170 to_pull); 171 if (bytes < 0) { 172 dev_dbg(&vdpasim->vdpa.dev, 173 "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", 174 bytes, offset, to_pull); 175 status = VIRTIO_BLK_S_IOERR; 176 break; 177 } 178 break; 179 180 case VIRTIO_BLK_T_GET_ID: 181 bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, 182 vdpasim_blk_id, 183 VIRTIO_BLK_ID_BYTES); 184 if (bytes < 0) { 185 dev_dbg(&vdpasim->vdpa.dev, 186 "vringh_iov_push_iotlb() error: %zd\n", bytes); 187 status = VIRTIO_BLK_S_IOERR; 188 break; 189 } 190 191 pushed += bytes; 192 break; 193 194 case VIRTIO_BLK_T_FLUSH: 195 /* nothing to do */ 196 break; 197 198 case VIRTIO_BLK_T_DISCARD: 199 case VIRTIO_BLK_T_WRITE_ZEROES: { 200 struct virtio_blk_discard_write_zeroes range; 201 u32 num_sectors, flags; 202 203 if (to_pull != sizeof(range)) { 204 dev_dbg(&vdpasim->vdpa.dev, 205 "discard/write_zeroes header len: 0x%zx [expected: 0x%zx]\n", 206 to_pull, sizeof(range)); 207 status = VIRTIO_BLK_S_IOERR; 208 break; 209 } 210 211 bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &range, 212 to_pull); 213 if (bytes < 0) { 214 dev_dbg(&vdpasim->vdpa.dev, 215 "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", 216 bytes, offset, to_pull); 217 status = VIRTIO_BLK_S_IOERR; 218 break; 219 } 220 221 sector = le64_to_cpu(range.sector); 222 offset = sector << SECTOR_SHIFT; 223 num_sectors = le32_to_cpu(range.num_sectors); 224 flags = le32_to_cpu(range.flags); 225 226 if (type == VIRTIO_BLK_T_DISCARD && flags != 0) { 227 dev_dbg(&vdpasim->vdpa.dev, 228 "discard unexpected flags set - flags: 0x%x\n", 229 flags); 230 status = VIRTIO_BLK_S_UNSUPP; 231 break; 232 } 233 234 if (type == VIRTIO_BLK_T_WRITE_ZEROES && 235 flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { 236 dev_dbg(&vdpasim->vdpa.dev, 237 "write_zeroes unexpected flags set - flags: 0x%x\n", 238 flags); 239 status = VIRTIO_BLK_S_UNSUPP; 240 break; 241 } 242 243 if (!vdpasim_blk_check_range(vdpasim, sector, num_sectors, 244 VDPASIM_BLK_DWZ_MAX_SECTORS)) { 245 status = VIRTIO_BLK_S_IOERR; 246 break; 247 } 248 249 if (type == VIRTIO_BLK_T_WRITE_ZEROES) { 250 memset(vdpasim->buffer + offset, 0, 251 num_sectors << SECTOR_SHIFT); 252 } 253 254 break; 255 } 256 default: 257 dev_dbg(&vdpasim->vdpa.dev, 258 "Unsupported request type %d\n", type); 259 status = VIRTIO_BLK_S_IOERR; 260 break; 261 } 262 263 err_status: 264 /* If some operations fail, we need to skip the remaining bytes 265 * to put the status in the last byte 266 */ 267 if (to_push - pushed > 0) 268 vringh_kiov_advance(&vq->in_iov, to_push - pushed); 269 270 /* Last byte is the status */ 271 bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1); 272 if (bytes != 1) 273 goto err; 274 275 pushed += bytes; 276 277 /* Make sure data is wrote before advancing index */ 278 smp_wmb(); 279 280 handled = true; 281 282 err: 283 vringh_complete_iotlb(&vq->vring, vq->head, pushed); 284 285 return handled; 286 } 287 288 static void vdpasim_blk_work(struct vdpasim *vdpasim) 289 { 290 bool reschedule = false; 291 int i; 292 293 mutex_lock(&vdpasim->mutex); 294 295 if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) 296 goto out; 297 298 if (!vdpasim->running) 299 goto out; 300 301 for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) { 302 struct vdpasim_virtqueue *vq = &vdpasim->vqs[i]; 303 int reqs = 0; 304 305 if (!vq->ready) 306 continue; 307 308 while (vdpasim_blk_handle_req(vdpasim, vq)) { 309 /* Make sure used is visible before rasing the interrupt. */ 310 smp_wmb(); 311 312 local_bh_disable(); 313 if (vringh_need_notify_iotlb(&vq->vring) > 0) 314 vringh_notify(&vq->vring); 315 local_bh_enable(); 316 317 if (++reqs > 4) { 318 reschedule = true; 319 break; 320 } 321 } 322 } 323 out: 324 mutex_unlock(&vdpasim->mutex); 325 326 if (reschedule) 327 vdpasim_schedule_work(vdpasim); 328 } 329 330 static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config) 331 { 332 struct virtio_blk_config *blk_config = config; 333 334 memset(config, 0, sizeof(struct virtio_blk_config)); 335 336 blk_config->capacity = cpu_to_vdpasim64(vdpasim, VDPASIM_BLK_CAPACITY); 337 blk_config->size_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SIZE_MAX); 338 blk_config->seg_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SEG_MAX); 339 blk_config->num_queues = cpu_to_vdpasim16(vdpasim, VDPASIM_BLK_VQ_NUM); 340 blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1); 341 blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1); 342 blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE); 343 /* VIRTIO_BLK_F_DISCARD */ 344 blk_config->discard_sector_alignment = 345 cpu_to_vdpasim32(vdpasim, SECTOR_SIZE); 346 blk_config->max_discard_sectors = 347 cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS); 348 blk_config->max_discard_seg = cpu_to_vdpasim32(vdpasim, 1); 349 /* VIRTIO_BLK_F_WRITE_ZEROES */ 350 blk_config->max_write_zeroes_sectors = 351 cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS); 352 blk_config->max_write_zeroes_seg = cpu_to_vdpasim32(vdpasim, 1); 353 354 } 355 356 static void vdpasim_blk_mgmtdev_release(struct device *dev) 357 { 358 } 359 360 static struct device vdpasim_blk_mgmtdev = { 361 .init_name = "vdpasim_blk", 362 .release = vdpasim_blk_mgmtdev_release, 363 }; 364 365 static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, 366 const struct vdpa_dev_set_config *config) 367 { 368 struct vdpasim_dev_attr dev_attr = {}; 369 struct vdpasim *simdev; 370 int ret; 371 372 dev_attr.mgmt_dev = mdev; 373 dev_attr.name = name; 374 dev_attr.id = VIRTIO_ID_BLOCK; 375 dev_attr.supported_features = VDPASIM_BLK_FEATURES; 376 dev_attr.nvqs = VDPASIM_BLK_VQ_NUM; 377 dev_attr.ngroups = VDPASIM_BLK_GROUP_NUM; 378 dev_attr.nas = VDPASIM_BLK_AS_NUM; 379 dev_attr.alloc_size = sizeof(struct vdpasim); 380 dev_attr.config_size = sizeof(struct virtio_blk_config); 381 dev_attr.get_config = vdpasim_blk_get_config; 382 dev_attr.work_fn = vdpasim_blk_work; 383 dev_attr.buffer_size = VDPASIM_BLK_CAPACITY << SECTOR_SHIFT; 384 385 simdev = vdpasim_create(&dev_attr, config); 386 if (IS_ERR(simdev)) 387 return PTR_ERR(simdev); 388 389 ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM); 390 if (ret) 391 goto put_dev; 392 393 return 0; 394 395 put_dev: 396 put_device(&simdev->vdpa.dev); 397 return ret; 398 } 399 400 static void vdpasim_blk_dev_del(struct vdpa_mgmt_dev *mdev, 401 struct vdpa_device *dev) 402 { 403 struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa); 404 405 _vdpa_unregister_device(&simdev->vdpa); 406 } 407 408 static const struct vdpa_mgmtdev_ops vdpasim_blk_mgmtdev_ops = { 409 .dev_add = vdpasim_blk_dev_add, 410 .dev_del = vdpasim_blk_dev_del 411 }; 412 413 static struct virtio_device_id id_table[] = { 414 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 415 { 0 }, 416 }; 417 418 static struct vdpa_mgmt_dev mgmt_dev = { 419 .device = &vdpasim_blk_mgmtdev, 420 .id_table = id_table, 421 .ops = &vdpasim_blk_mgmtdev_ops, 422 }; 423 424 static int __init vdpasim_blk_init(void) 425 { 426 int ret; 427 428 ret = device_register(&vdpasim_blk_mgmtdev); 429 if (ret) { 430 put_device(&vdpasim_blk_mgmtdev); 431 return ret; 432 } 433 434 ret = vdpa_mgmtdev_register(&mgmt_dev); 435 if (ret) 436 goto parent_err; 437 438 return 0; 439 440 parent_err: 441 device_unregister(&vdpasim_blk_mgmtdev); 442 return ret; 443 } 444 445 static void __exit vdpasim_blk_exit(void) 446 { 447 vdpa_mgmtdev_unregister(&mgmt_dev); 448 device_unregister(&vdpasim_blk_mgmtdev); 449 } 450 451 module_init(vdpasim_blk_init) 452 module_exit(vdpasim_blk_exit) 453 454 MODULE_VERSION(DRV_VERSION); 455 MODULE_LICENSE(DRV_LICENSE); 456 MODULE_AUTHOR(DRV_AUTHOR); 457 MODULE_DESCRIPTION(DRV_DESC); 458