1 // SPDX-License-Identifier: GPL-2.0-only 2 //#define DEBUG 3 #include <linux/spinlock.h> 4 #include <linux/slab.h> 5 #include <linux/blkdev.h> 6 #include <linux/hdreg.h> 7 #include <linux/module.h> 8 #include <linux/mutex.h> 9 #include <linux/interrupt.h> 10 #include <linux/virtio.h> 11 #include <linux/virtio_blk.h> 12 #include <linux/scatterlist.h> 13 #include <linux/string_helpers.h> 14 #include <linux/idr.h> 15 #include <linux/blk-mq.h> 16 #include <linux/blk-mq-virtio.h> 17 #include <linux/numa.h> 18 #include <uapi/linux/virtio_ring.h> 19 20 #define PART_BITS 4 21 #define VQ_NAME_LEN 16 22 #define MAX_DISCARD_SEGMENTS 256u 23 24 static int major; 25 static DEFINE_IDA(vd_index_ida); 26 27 static struct workqueue_struct *virtblk_wq; 28 29 struct virtio_blk_vq { 30 struct virtqueue *vq; 31 spinlock_t lock; 32 char name[VQ_NAME_LEN]; 33 } ____cacheline_aligned_in_smp; 34 35 struct virtio_blk { 36 /* 37 * This mutex must be held by anything that may run after 38 * virtblk_remove() sets vblk->vdev to NULL. 39 * 40 * blk-mq, virtqueue processing, and sysfs attribute code paths are 41 * shut down before vblk->vdev is set to NULL and therefore do not need 42 * to hold this mutex. 43 */ 44 struct mutex vdev_mutex; 45 struct virtio_device *vdev; 46 47 /* The disk structure for the kernel. */ 48 struct gendisk *disk; 49 50 /* Block layer tags. */ 51 struct blk_mq_tag_set tag_set; 52 53 /* Process context for config space updates */ 54 struct work_struct config_work; 55 56 /* 57 * Tracks references from block_device_operations open/release and 58 * virtio_driver probe/remove so this object can be freed once no 59 * longer in use. 60 */ 61 refcount_t refs; 62 63 /* What host tells us, plus 2 for header & tailer. */ 64 unsigned int sg_elems; 65 66 /* Ida index - used to track minor number allocations. */ 67 int index; 68 69 /* num of vqs */ 70 int num_vqs; 71 struct virtio_blk_vq *vqs; 72 }; 73 74 struct virtblk_req { 75 struct virtio_blk_outhdr out_hdr; 76 u8 status; 77 struct scatterlist sg[]; 78 }; 79 80 static inline blk_status_t virtblk_result(struct virtblk_req *vbr) 81 { 82 switch (vbr->status) { 83 case VIRTIO_BLK_S_OK: 84 return BLK_STS_OK; 85 case VIRTIO_BLK_S_UNSUPP: 86 return BLK_STS_NOTSUPP; 87 default: 88 return BLK_STS_IOERR; 89 } 90 } 91 92 static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr, 93 struct scatterlist *data_sg, bool have_data) 94 { 95 struct scatterlist hdr, status, *sgs[3]; 96 unsigned int num_out = 0, num_in = 0; 97 98 sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); 99 sgs[num_out++] = &hdr; 100 101 if (have_data) { 102 if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) 103 sgs[num_out++] = data_sg; 104 else 105 sgs[num_out + num_in++] = data_sg; 106 } 107 108 sg_init_one(&status, &vbr->status, sizeof(vbr->status)); 109 sgs[num_out + num_in++] = &status; 110 111 return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); 112 } 113 114 static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap) 115 { 116 unsigned short segments = blk_rq_nr_discard_segments(req); 117 unsigned short n = 0; 118 struct virtio_blk_discard_write_zeroes *range; 119 struct bio *bio; 120 u32 flags = 0; 121 122 if (unmap) 123 flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP; 124 125 range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC); 126 if (!range) 127 return -ENOMEM; 128 129 /* 130 * Single max discard segment means multi-range discard isn't 131 * supported, and block layer only runs contiguity merge like 132 * normal RW request. So we can't reply on bio for retrieving 133 * each range info. 134 */ 135 if (queue_max_discard_segments(req->q) == 1) { 136 range[0].flags = cpu_to_le32(flags); 137 range[0].num_sectors = cpu_to_le32(blk_rq_sectors(req)); 138 range[0].sector = cpu_to_le64(blk_rq_pos(req)); 139 n = 1; 140 } else { 141 __rq_for_each_bio(bio, req) { 142 u64 sector = bio->bi_iter.bi_sector; 143 u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT; 144 145 range[n].flags = cpu_to_le32(flags); 146 range[n].num_sectors = cpu_to_le32(num_sectors); 147 range[n].sector = cpu_to_le64(sector); 148 n++; 149 } 150 } 151 152 WARN_ON_ONCE(n != segments); 153 154 req->special_vec.bv_page = virt_to_page(range); 155 req->special_vec.bv_offset = offset_in_page(range); 156 req->special_vec.bv_len = sizeof(*range) * segments; 157 req->rq_flags |= RQF_SPECIAL_PAYLOAD; 158 159 return 0; 160 } 161 162 static inline void virtblk_request_done(struct request *req) 163 { 164 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 165 166 if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { 167 kfree(page_address(req->special_vec.bv_page) + 168 req->special_vec.bv_offset); 169 } 170 171 blk_mq_end_request(req, virtblk_result(vbr)); 172 } 173 174 static void virtblk_done(struct virtqueue *vq) 175 { 176 struct virtio_blk *vblk = vq->vdev->priv; 177 bool req_done = false; 178 int qid = vq->index; 179 struct virtblk_req *vbr; 180 unsigned long flags; 181 unsigned int len; 182 183 spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 184 do { 185 virtqueue_disable_cb(vq); 186 while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { 187 struct request *req = blk_mq_rq_from_pdu(vbr); 188 189 if (likely(!blk_should_fake_timeout(req->q))) 190 blk_mq_complete_request(req); 191 req_done = true; 192 } 193 if (unlikely(virtqueue_is_broken(vq))) 194 break; 195 } while (!virtqueue_enable_cb(vq)); 196 197 /* In case queue is stopped waiting for more buffers. */ 198 if (req_done) 199 blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); 200 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 201 } 202 203 static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx) 204 { 205 struct virtio_blk *vblk = hctx->queue->queuedata; 206 struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num]; 207 bool kick; 208 209 spin_lock_irq(&vq->lock); 210 kick = virtqueue_kick_prepare(vq->vq); 211 spin_unlock_irq(&vq->lock); 212 213 if (kick) 214 virtqueue_notify(vq->vq); 215 } 216 217 static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, 218 const struct blk_mq_queue_data *bd) 219 { 220 struct virtio_blk *vblk = hctx->queue->queuedata; 221 struct request *req = bd->rq; 222 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 223 unsigned long flags; 224 unsigned int num; 225 int qid = hctx->queue_num; 226 int err; 227 bool notify = false; 228 bool unmap = false; 229 u32 type; 230 231 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); 232 233 switch (req_op(req)) { 234 case REQ_OP_READ: 235 case REQ_OP_WRITE: 236 type = 0; 237 break; 238 case REQ_OP_FLUSH: 239 type = VIRTIO_BLK_T_FLUSH; 240 break; 241 case REQ_OP_DISCARD: 242 type = VIRTIO_BLK_T_DISCARD; 243 break; 244 case REQ_OP_WRITE_ZEROES: 245 type = VIRTIO_BLK_T_WRITE_ZEROES; 246 unmap = !(req->cmd_flags & REQ_NOUNMAP); 247 break; 248 case REQ_OP_DRV_IN: 249 type = VIRTIO_BLK_T_GET_ID; 250 break; 251 default: 252 WARN_ON_ONCE(1); 253 return BLK_STS_IOERR; 254 } 255 256 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type); 257 vbr->out_hdr.sector = type ? 258 0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req)); 259 vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req)); 260 261 blk_mq_start_request(req); 262 263 if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { 264 err = virtblk_setup_discard_write_zeroes(req, unmap); 265 if (err) 266 return BLK_STS_RESOURCE; 267 } 268 269 num = blk_rq_map_sg(hctx->queue, req, vbr->sg); 270 if (num) { 271 if (rq_data_dir(req) == WRITE) 272 vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT); 273 else 274 vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN); 275 } 276 277 spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 278 err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); 279 if (err) { 280 virtqueue_kick(vblk->vqs[qid].vq); 281 /* Don't stop the queue if -ENOMEM: we may have failed to 282 * bounce the buffer due to global resource outage. 283 */ 284 if (err == -ENOSPC) 285 blk_mq_stop_hw_queue(hctx); 286 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 287 switch (err) { 288 case -ENOSPC: 289 return BLK_STS_DEV_RESOURCE; 290 case -ENOMEM: 291 return BLK_STS_RESOURCE; 292 default: 293 return BLK_STS_IOERR; 294 } 295 } 296 297 if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) 298 notify = true; 299 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 300 301 if (notify) 302 virtqueue_notify(vblk->vqs[qid].vq); 303 return BLK_STS_OK; 304 } 305 306 /* return id (s/n) string for *disk to *id_str 307 */ 308 static int virtblk_get_id(struct gendisk *disk, char *id_str) 309 { 310 struct virtio_blk *vblk = disk->private_data; 311 struct request_queue *q = vblk->disk->queue; 312 struct request *req; 313 int err; 314 315 req = blk_get_request(q, REQ_OP_DRV_IN, 0); 316 if (IS_ERR(req)) 317 return PTR_ERR(req); 318 319 err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL); 320 if (err) 321 goto out; 322 323 blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); 324 err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req))); 325 out: 326 blk_put_request(req); 327 return err; 328 } 329 330 static void virtblk_get(struct virtio_blk *vblk) 331 { 332 refcount_inc(&vblk->refs); 333 } 334 335 static void virtblk_put(struct virtio_blk *vblk) 336 { 337 if (refcount_dec_and_test(&vblk->refs)) { 338 ida_simple_remove(&vd_index_ida, vblk->index); 339 mutex_destroy(&vblk->vdev_mutex); 340 kfree(vblk); 341 } 342 } 343 344 static int virtblk_open(struct block_device *bd, fmode_t mode) 345 { 346 struct virtio_blk *vblk = bd->bd_disk->private_data; 347 int ret = 0; 348 349 mutex_lock(&vblk->vdev_mutex); 350 351 if (vblk->vdev) 352 virtblk_get(vblk); 353 else 354 ret = -ENXIO; 355 356 mutex_unlock(&vblk->vdev_mutex); 357 return ret; 358 } 359 360 static void virtblk_release(struct gendisk *disk, fmode_t mode) 361 { 362 struct virtio_blk *vblk = disk->private_data; 363 364 virtblk_put(vblk); 365 } 366 367 /* We provide getgeo only to please some old bootloader/partitioning tools */ 368 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) 369 { 370 struct virtio_blk *vblk = bd->bd_disk->private_data; 371 int ret = 0; 372 373 mutex_lock(&vblk->vdev_mutex); 374 375 if (!vblk->vdev) { 376 ret = -ENXIO; 377 goto out; 378 } 379 380 /* see if the host passed in geometry config */ 381 if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) { 382 virtio_cread(vblk->vdev, struct virtio_blk_config, 383 geometry.cylinders, &geo->cylinders); 384 virtio_cread(vblk->vdev, struct virtio_blk_config, 385 geometry.heads, &geo->heads); 386 virtio_cread(vblk->vdev, struct virtio_blk_config, 387 geometry.sectors, &geo->sectors); 388 } else { 389 /* some standard values, similar to sd */ 390 geo->heads = 1 << 6; 391 geo->sectors = 1 << 5; 392 geo->cylinders = get_capacity(bd->bd_disk) >> 11; 393 } 394 out: 395 mutex_unlock(&vblk->vdev_mutex); 396 return ret; 397 } 398 399 static const struct block_device_operations virtblk_fops = { 400 .owner = THIS_MODULE, 401 .open = virtblk_open, 402 .release = virtblk_release, 403 .getgeo = virtblk_getgeo, 404 }; 405 406 static int index_to_minor(int index) 407 { 408 return index << PART_BITS; 409 } 410 411 static int minor_to_index(int minor) 412 { 413 return minor >> PART_BITS; 414 } 415 416 static ssize_t serial_show(struct device *dev, 417 struct device_attribute *attr, char *buf) 418 { 419 struct gendisk *disk = dev_to_disk(dev); 420 int err; 421 422 /* sysfs gives us a PAGE_SIZE buffer */ 423 BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES); 424 425 buf[VIRTIO_BLK_ID_BYTES] = '\0'; 426 err = virtblk_get_id(disk, buf); 427 if (!err) 428 return strlen(buf); 429 430 if (err == -EIO) /* Unsupported? Make it empty. */ 431 return 0; 432 433 return err; 434 } 435 436 static DEVICE_ATTR_RO(serial); 437 438 /* The queue's logical block size must be set before calling this */ 439 static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) 440 { 441 struct virtio_device *vdev = vblk->vdev; 442 struct request_queue *q = vblk->disk->queue; 443 char cap_str_2[10], cap_str_10[10]; 444 unsigned long long nblocks; 445 u64 capacity; 446 447 /* Host must always specify the capacity. */ 448 virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity); 449 450 /* If capacity is too big, truncate with warning. */ 451 if ((sector_t)capacity != capacity) { 452 dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n", 453 (unsigned long long)capacity); 454 capacity = (sector_t)-1; 455 } 456 457 nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9); 458 459 string_get_size(nblocks, queue_logical_block_size(q), 460 STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); 461 string_get_size(nblocks, queue_logical_block_size(q), 462 STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); 463 464 dev_notice(&vdev->dev, 465 "[%s] %s%llu %d-byte logical blocks (%s/%s)\n", 466 vblk->disk->disk_name, 467 resize ? "new size: " : "", 468 nblocks, 469 queue_logical_block_size(q), 470 cap_str_10, 471 cap_str_2); 472 473 set_capacity_revalidate_and_notify(vblk->disk, capacity, true); 474 } 475 476 static void virtblk_config_changed_work(struct work_struct *work) 477 { 478 struct virtio_blk *vblk = 479 container_of(work, struct virtio_blk, config_work); 480 481 virtblk_update_capacity(vblk, true); 482 } 483 484 static void virtblk_config_changed(struct virtio_device *vdev) 485 { 486 struct virtio_blk *vblk = vdev->priv; 487 488 queue_work(virtblk_wq, &vblk->config_work); 489 } 490 491 static int init_vq(struct virtio_blk *vblk) 492 { 493 int err; 494 int i; 495 vq_callback_t **callbacks; 496 const char **names; 497 struct virtqueue **vqs; 498 unsigned short num_vqs; 499 struct virtio_device *vdev = vblk->vdev; 500 struct irq_affinity desc = { 0, }; 501 502 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, 503 struct virtio_blk_config, num_queues, 504 &num_vqs); 505 if (err) 506 num_vqs = 1; 507 508 num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs); 509 510 vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); 511 if (!vblk->vqs) 512 return -ENOMEM; 513 514 names = kmalloc_array(num_vqs, sizeof(*names), GFP_KERNEL); 515 callbacks = kmalloc_array(num_vqs, sizeof(*callbacks), GFP_KERNEL); 516 vqs = kmalloc_array(num_vqs, sizeof(*vqs), GFP_KERNEL); 517 if (!names || !callbacks || !vqs) { 518 err = -ENOMEM; 519 goto out; 520 } 521 522 for (i = 0; i < num_vqs; i++) { 523 callbacks[i] = virtblk_done; 524 snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i); 525 names[i] = vblk->vqs[i].name; 526 } 527 528 /* Discover virtqueues and write information to configuration. */ 529 err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc); 530 if (err) 531 goto out; 532 533 for (i = 0; i < num_vqs; i++) { 534 spin_lock_init(&vblk->vqs[i].lock); 535 vblk->vqs[i].vq = vqs[i]; 536 } 537 vblk->num_vqs = num_vqs; 538 539 out: 540 kfree(vqs); 541 kfree(callbacks); 542 kfree(names); 543 if (err) 544 kfree(vblk->vqs); 545 return err; 546 } 547 548 /* 549 * Legacy naming scheme used for virtio devices. We are stuck with it for 550 * virtio blk but don't ever use it for any new driver. 551 */ 552 static int virtblk_name_format(char *prefix, int index, char *buf, int buflen) 553 { 554 const int base = 'z' - 'a' + 1; 555 char *begin = buf + strlen(prefix); 556 char *end = buf + buflen; 557 char *p; 558 int unit; 559 560 p = end - 1; 561 *p = '\0'; 562 unit = base; 563 do { 564 if (p == begin) 565 return -EINVAL; 566 *--p = 'a' + (index % unit); 567 index = (index / unit) - 1; 568 } while (index >= 0); 569 570 memmove(begin, p, end - p); 571 memcpy(buf, prefix, strlen(prefix)); 572 573 return 0; 574 } 575 576 static int virtblk_get_cache_mode(struct virtio_device *vdev) 577 { 578 u8 writeback; 579 int err; 580 581 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE, 582 struct virtio_blk_config, wce, 583 &writeback); 584 585 /* 586 * If WCE is not configurable and flush is not available, 587 * assume no writeback cache is in use. 588 */ 589 if (err) 590 writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH); 591 592 return writeback; 593 } 594 595 static void virtblk_update_cache_mode(struct virtio_device *vdev) 596 { 597 u8 writeback = virtblk_get_cache_mode(vdev); 598 struct virtio_blk *vblk = vdev->priv; 599 600 blk_queue_write_cache(vblk->disk->queue, writeback, false); 601 revalidate_disk(vblk->disk); 602 } 603 604 static const char *const virtblk_cache_types[] = { 605 "write through", "write back" 606 }; 607 608 static ssize_t 609 cache_type_store(struct device *dev, struct device_attribute *attr, 610 const char *buf, size_t count) 611 { 612 struct gendisk *disk = dev_to_disk(dev); 613 struct virtio_blk *vblk = disk->private_data; 614 struct virtio_device *vdev = vblk->vdev; 615 int i; 616 617 BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE)); 618 i = sysfs_match_string(virtblk_cache_types, buf); 619 if (i < 0) 620 return i; 621 622 virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i); 623 virtblk_update_cache_mode(vdev); 624 return count; 625 } 626 627 static ssize_t 628 cache_type_show(struct device *dev, struct device_attribute *attr, char *buf) 629 { 630 struct gendisk *disk = dev_to_disk(dev); 631 struct virtio_blk *vblk = disk->private_data; 632 u8 writeback = virtblk_get_cache_mode(vblk->vdev); 633 634 BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types)); 635 return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]); 636 } 637 638 static DEVICE_ATTR_RW(cache_type); 639 640 static struct attribute *virtblk_attrs[] = { 641 &dev_attr_serial.attr, 642 &dev_attr_cache_type.attr, 643 NULL, 644 }; 645 646 static umode_t virtblk_attrs_are_visible(struct kobject *kobj, 647 struct attribute *a, int n) 648 { 649 struct device *dev = container_of(kobj, struct device, kobj); 650 struct gendisk *disk = dev_to_disk(dev); 651 struct virtio_blk *vblk = disk->private_data; 652 struct virtio_device *vdev = vblk->vdev; 653 654 if (a == &dev_attr_cache_type.attr && 655 !virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) 656 return S_IRUGO; 657 658 return a->mode; 659 } 660 661 static const struct attribute_group virtblk_attr_group = { 662 .attrs = virtblk_attrs, 663 .is_visible = virtblk_attrs_are_visible, 664 }; 665 666 static const struct attribute_group *virtblk_attr_groups[] = { 667 &virtblk_attr_group, 668 NULL, 669 }; 670 671 static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, 672 unsigned int hctx_idx, unsigned int numa_node) 673 { 674 struct virtio_blk *vblk = set->driver_data; 675 struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); 676 677 sg_init_table(vbr->sg, vblk->sg_elems); 678 return 0; 679 } 680 681 static int virtblk_map_queues(struct blk_mq_tag_set *set) 682 { 683 struct virtio_blk *vblk = set->driver_data; 684 685 return blk_mq_virtio_map_queues(&set->map[HCTX_TYPE_DEFAULT], 686 vblk->vdev, 0); 687 } 688 689 static const struct blk_mq_ops virtio_mq_ops = { 690 .queue_rq = virtio_queue_rq, 691 .commit_rqs = virtio_commit_rqs, 692 .complete = virtblk_request_done, 693 .init_request = virtblk_init_request, 694 .map_queues = virtblk_map_queues, 695 }; 696 697 static unsigned int virtblk_queue_depth; 698 module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); 699 700 static int virtblk_probe(struct virtio_device *vdev) 701 { 702 struct virtio_blk *vblk; 703 struct request_queue *q; 704 int err, index; 705 706 u32 v, blk_size, max_size, sg_elems, opt_io_size; 707 u16 min_io_size; 708 u8 physical_block_exp, alignment_offset; 709 710 if (!vdev->config->get) { 711 dev_err(&vdev->dev, "%s failure: config access disabled\n", 712 __func__); 713 return -EINVAL; 714 } 715 716 err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS), 717 GFP_KERNEL); 718 if (err < 0) 719 goto out; 720 index = err; 721 722 /* We need to know how many segments before we allocate. */ 723 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX, 724 struct virtio_blk_config, seg_max, 725 &sg_elems); 726 727 /* We need at least one SG element, whatever they say. */ 728 if (err || !sg_elems) 729 sg_elems = 1; 730 731 /* We need an extra sg elements at head and tail. */ 732 sg_elems += 2; 733 vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); 734 if (!vblk) { 735 err = -ENOMEM; 736 goto out_free_index; 737 } 738 739 /* This reference is dropped in virtblk_remove(). */ 740 refcount_set(&vblk->refs, 1); 741 mutex_init(&vblk->vdev_mutex); 742 743 vblk->vdev = vdev; 744 vblk->sg_elems = sg_elems; 745 746 INIT_WORK(&vblk->config_work, virtblk_config_changed_work); 747 748 err = init_vq(vblk); 749 if (err) 750 goto out_free_vblk; 751 752 /* FIXME: How many partitions? How long is a piece of string? */ 753 vblk->disk = alloc_disk(1 << PART_BITS); 754 if (!vblk->disk) { 755 err = -ENOMEM; 756 goto out_free_vq; 757 } 758 759 /* Default queue sizing is to fill the ring. */ 760 if (!virtblk_queue_depth) { 761 virtblk_queue_depth = vblk->vqs[0].vq->num_free; 762 /* ... but without indirect descs, we use 2 descs per req */ 763 if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) 764 virtblk_queue_depth /= 2; 765 } 766 767 memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); 768 vblk->tag_set.ops = &virtio_mq_ops; 769 vblk->tag_set.queue_depth = virtblk_queue_depth; 770 vblk->tag_set.numa_node = NUMA_NO_NODE; 771 vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 772 vblk->tag_set.cmd_size = 773 sizeof(struct virtblk_req) + 774 sizeof(struct scatterlist) * sg_elems; 775 vblk->tag_set.driver_data = vblk; 776 vblk->tag_set.nr_hw_queues = vblk->num_vqs; 777 778 err = blk_mq_alloc_tag_set(&vblk->tag_set); 779 if (err) 780 goto out_put_disk; 781 782 q = blk_mq_init_queue(&vblk->tag_set); 783 if (IS_ERR(q)) { 784 err = -ENOMEM; 785 goto out_free_tags; 786 } 787 vblk->disk->queue = q; 788 789 q->queuedata = vblk; 790 791 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); 792 793 vblk->disk->major = major; 794 vblk->disk->first_minor = index_to_minor(index); 795 vblk->disk->private_data = vblk; 796 vblk->disk->fops = &virtblk_fops; 797 vblk->disk->flags |= GENHD_FL_EXT_DEVT; 798 vblk->index = index; 799 800 /* configure queue flush support */ 801 virtblk_update_cache_mode(vdev); 802 803 /* If disk is read-only in the host, the guest should obey */ 804 if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) 805 set_disk_ro(vblk->disk, 1); 806 807 /* We can handle whatever the host told us to handle. */ 808 blk_queue_max_segments(q, vblk->sg_elems-2); 809 810 /* No real sector limit. */ 811 blk_queue_max_hw_sectors(q, -1U); 812 813 max_size = virtio_max_dma_size(vdev); 814 815 /* Host can optionally specify maximum segment size and number of 816 * segments. */ 817 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX, 818 struct virtio_blk_config, size_max, &v); 819 if (!err) 820 max_size = min(max_size, v); 821 822 blk_queue_max_segment_size(q, max_size); 823 824 /* Host can optionally specify the block size of the device */ 825 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE, 826 struct virtio_blk_config, blk_size, 827 &blk_size); 828 if (!err) 829 blk_queue_logical_block_size(q, blk_size); 830 else 831 blk_size = queue_logical_block_size(q); 832 833 /* Use topology information if available */ 834 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 835 struct virtio_blk_config, physical_block_exp, 836 &physical_block_exp); 837 if (!err && physical_block_exp) 838 blk_queue_physical_block_size(q, 839 blk_size * (1 << physical_block_exp)); 840 841 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 842 struct virtio_blk_config, alignment_offset, 843 &alignment_offset); 844 if (!err && alignment_offset) 845 blk_queue_alignment_offset(q, blk_size * alignment_offset); 846 847 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 848 struct virtio_blk_config, min_io_size, 849 &min_io_size); 850 if (!err && min_io_size) 851 blk_queue_io_min(q, blk_size * min_io_size); 852 853 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 854 struct virtio_blk_config, opt_io_size, 855 &opt_io_size); 856 if (!err && opt_io_size) 857 blk_queue_io_opt(q, blk_size * opt_io_size); 858 859 if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { 860 q->limits.discard_granularity = blk_size; 861 862 virtio_cread(vdev, struct virtio_blk_config, 863 discard_sector_alignment, &v); 864 q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0; 865 866 virtio_cread(vdev, struct virtio_blk_config, 867 max_discard_sectors, &v); 868 blk_queue_max_discard_sectors(q, v ? v : UINT_MAX); 869 870 virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, 871 &v); 872 blk_queue_max_discard_segments(q, 873 min_not_zero(v, 874 MAX_DISCARD_SEGMENTS)); 875 876 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); 877 } 878 879 if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) { 880 virtio_cread(vdev, struct virtio_blk_config, 881 max_write_zeroes_sectors, &v); 882 blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX); 883 } 884 885 virtblk_update_capacity(vblk, false); 886 virtio_device_ready(vdev); 887 888 device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); 889 return 0; 890 891 out_free_tags: 892 blk_mq_free_tag_set(&vblk->tag_set); 893 out_put_disk: 894 put_disk(vblk->disk); 895 out_free_vq: 896 vdev->config->del_vqs(vdev); 897 kfree(vblk->vqs); 898 out_free_vblk: 899 kfree(vblk); 900 out_free_index: 901 ida_simple_remove(&vd_index_ida, index); 902 out: 903 return err; 904 } 905 906 static void virtblk_remove(struct virtio_device *vdev) 907 { 908 struct virtio_blk *vblk = vdev->priv; 909 910 /* Make sure no work handler is accessing the device. */ 911 flush_work(&vblk->config_work); 912 913 del_gendisk(vblk->disk); 914 blk_cleanup_queue(vblk->disk->queue); 915 916 blk_mq_free_tag_set(&vblk->tag_set); 917 918 mutex_lock(&vblk->vdev_mutex); 919 920 /* Stop all the virtqueues. */ 921 vdev->config->reset(vdev); 922 923 /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */ 924 vblk->vdev = NULL; 925 926 put_disk(vblk->disk); 927 vdev->config->del_vqs(vdev); 928 kfree(vblk->vqs); 929 930 mutex_unlock(&vblk->vdev_mutex); 931 932 virtblk_put(vblk); 933 } 934 935 #ifdef CONFIG_PM_SLEEP 936 static int virtblk_freeze(struct virtio_device *vdev) 937 { 938 struct virtio_blk *vblk = vdev->priv; 939 940 /* Ensure we don't receive any more interrupts */ 941 vdev->config->reset(vdev); 942 943 /* Make sure no work handler is accessing the device. */ 944 flush_work(&vblk->config_work); 945 946 blk_mq_quiesce_queue(vblk->disk->queue); 947 948 vdev->config->del_vqs(vdev); 949 return 0; 950 } 951 952 static int virtblk_restore(struct virtio_device *vdev) 953 { 954 struct virtio_blk *vblk = vdev->priv; 955 int ret; 956 957 ret = init_vq(vdev->priv); 958 if (ret) 959 return ret; 960 961 virtio_device_ready(vdev); 962 963 blk_mq_unquiesce_queue(vblk->disk->queue); 964 return 0; 965 } 966 #endif 967 968 static const struct virtio_device_id id_table[] = { 969 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 970 { 0 }, 971 }; 972 973 static unsigned int features_legacy[] = { 974 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 975 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, 976 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, 977 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, 978 } 979 ; 980 static unsigned int features[] = { 981 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 982 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, 983 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, 984 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, 985 }; 986 987 static struct virtio_driver virtio_blk = { 988 .feature_table = features, 989 .feature_table_size = ARRAY_SIZE(features), 990 .feature_table_legacy = features_legacy, 991 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 992 .driver.name = KBUILD_MODNAME, 993 .driver.owner = THIS_MODULE, 994 .id_table = id_table, 995 .probe = virtblk_probe, 996 .remove = virtblk_remove, 997 .config_changed = virtblk_config_changed, 998 #ifdef CONFIG_PM_SLEEP 999 .freeze = virtblk_freeze, 1000 .restore = virtblk_restore, 1001 #endif 1002 }; 1003 1004 static int __init init(void) 1005 { 1006 int error; 1007 1008 virtblk_wq = alloc_workqueue("virtio-blk", 0, 0); 1009 if (!virtblk_wq) 1010 return -ENOMEM; 1011 1012 major = register_blkdev(0, "virtblk"); 1013 if (major < 0) { 1014 error = major; 1015 goto out_destroy_workqueue; 1016 } 1017 1018 error = register_virtio_driver(&virtio_blk); 1019 if (error) 1020 goto out_unregister_blkdev; 1021 return 0; 1022 1023 out_unregister_blkdev: 1024 unregister_blkdev(major, "virtblk"); 1025 out_destroy_workqueue: 1026 destroy_workqueue(virtblk_wq); 1027 return error; 1028 } 1029 1030 static void __exit fini(void) 1031 { 1032 unregister_virtio_driver(&virtio_blk); 1033 unregister_blkdev(major, "virtblk"); 1034 destroy_workqueue(virtblk_wq); 1035 } 1036 module_init(init); 1037 module_exit(fini); 1038 1039 MODULE_DEVICE_TABLE(virtio, id_table); 1040 MODULE_DESCRIPTION("Virtio block driver"); 1041 MODULE_LICENSE("GPL"); 1042