1 // SPDX-License-Identifier: GPL-2.0-only 2 //#define DEBUG 3 #include <linux/spinlock.h> 4 #include <linux/slab.h> 5 #include <linux/blkdev.h> 6 #include <linux/hdreg.h> 7 #include <linux/module.h> 8 #include <linux/mutex.h> 9 #include <linux/interrupt.h> 10 #include <linux/virtio.h> 11 #include <linux/virtio_blk.h> 12 #include <linux/scatterlist.h> 13 #include <linux/string_helpers.h> 14 #include <linux/idr.h> 15 #include <linux/blk-mq.h> 16 #include <linux/blk-mq-virtio.h> 17 #include <linux/numa.h> 18 #include <uapi/linux/virtio_ring.h> 19 20 #define PART_BITS 4 21 #define VQ_NAME_LEN 16 22 #define MAX_DISCARD_SEGMENTS 256u 23 24 /* The maximum number of sg elements that fit into a virtqueue */ 25 #define VIRTIO_BLK_MAX_SG_ELEMS 32768 26 27 static int major; 28 static DEFINE_IDA(vd_index_ida); 29 30 static struct workqueue_struct *virtblk_wq; 31 32 struct virtio_blk_vq { 33 struct virtqueue *vq; 34 spinlock_t lock; 35 char name[VQ_NAME_LEN]; 36 } ____cacheline_aligned_in_smp; 37 38 struct virtio_blk { 39 /* 40 * This mutex must be held by anything that may run after 41 * virtblk_remove() sets vblk->vdev to NULL. 42 * 43 * blk-mq, virtqueue processing, and sysfs attribute code paths are 44 * shut down before vblk->vdev is set to NULL and therefore do not need 45 * to hold this mutex. 46 */ 47 struct mutex vdev_mutex; 48 struct virtio_device *vdev; 49 50 /* The disk structure for the kernel. */ 51 struct gendisk *disk; 52 53 /* Block layer tags. */ 54 struct blk_mq_tag_set tag_set; 55 56 /* Process context for config space updates */ 57 struct work_struct config_work; 58 59 /* 60 * Tracks references from block_device_operations open/release and 61 * virtio_driver probe/remove so this object can be freed once no 62 * longer in use. 63 */ 64 refcount_t refs; 65 66 /* What host tells us, plus 2 for header & tailer. */ 67 unsigned int sg_elems; 68 69 /* Ida index - used to track minor number allocations. */ 70 int index; 71 72 /* num of vqs */ 73 int num_vqs; 74 struct virtio_blk_vq *vqs; 75 }; 76 77 struct virtblk_req { 78 struct virtio_blk_outhdr out_hdr; 79 u8 status; 80 struct scatterlist sg[]; 81 }; 82 83 static inline blk_status_t virtblk_result(struct virtblk_req *vbr) 84 { 85 switch (vbr->status) { 86 case VIRTIO_BLK_S_OK: 87 return BLK_STS_OK; 88 case VIRTIO_BLK_S_UNSUPP: 89 return BLK_STS_NOTSUPP; 90 default: 91 return BLK_STS_IOERR; 92 } 93 } 94 95 static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr, 96 struct scatterlist *data_sg, bool have_data) 97 { 98 struct scatterlist hdr, status, *sgs[3]; 99 unsigned int num_out = 0, num_in = 0; 100 101 sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); 102 sgs[num_out++] = &hdr; 103 104 if (have_data) { 105 if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) 106 sgs[num_out++] = data_sg; 107 else 108 sgs[num_out + num_in++] = data_sg; 109 } 110 111 sg_init_one(&status, &vbr->status, sizeof(vbr->status)); 112 sgs[num_out + num_in++] = &status; 113 114 return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); 115 } 116 117 static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap) 118 { 119 unsigned short segments = blk_rq_nr_discard_segments(req); 120 unsigned short n = 0; 121 struct virtio_blk_discard_write_zeroes *range; 122 struct bio *bio; 123 u32 flags = 0; 124 125 if (unmap) 126 flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP; 127 128 range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC); 129 if (!range) 130 return -ENOMEM; 131 132 /* 133 * Single max discard segment means multi-range discard isn't 134 * supported, and block layer only runs contiguity merge like 135 * normal RW request. So we can't reply on bio for retrieving 136 * each range info. 137 */ 138 if (queue_max_discard_segments(req->q) == 1) { 139 range[0].flags = cpu_to_le32(flags); 140 range[0].num_sectors = cpu_to_le32(blk_rq_sectors(req)); 141 range[0].sector = cpu_to_le64(blk_rq_pos(req)); 142 n = 1; 143 } else { 144 __rq_for_each_bio(bio, req) { 145 u64 sector = bio->bi_iter.bi_sector; 146 u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT; 147 148 range[n].flags = cpu_to_le32(flags); 149 range[n].num_sectors = cpu_to_le32(num_sectors); 150 range[n].sector = cpu_to_le64(sector); 151 n++; 152 } 153 } 154 155 WARN_ON_ONCE(n != segments); 156 157 req->special_vec.bv_page = virt_to_page(range); 158 req->special_vec.bv_offset = offset_in_page(range); 159 req->special_vec.bv_len = sizeof(*range) * segments; 160 req->rq_flags |= RQF_SPECIAL_PAYLOAD; 161 162 return 0; 163 } 164 165 static inline void virtblk_request_done(struct request *req) 166 { 167 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 168 169 if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { 170 kfree(page_address(req->special_vec.bv_page) + 171 req->special_vec.bv_offset); 172 } 173 174 blk_mq_end_request(req, virtblk_result(vbr)); 175 } 176 177 static void virtblk_done(struct virtqueue *vq) 178 { 179 struct virtio_blk *vblk = vq->vdev->priv; 180 bool req_done = false; 181 int qid = vq->index; 182 struct virtblk_req *vbr; 183 unsigned long flags; 184 unsigned int len; 185 186 spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 187 do { 188 virtqueue_disable_cb(vq); 189 while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { 190 struct request *req = blk_mq_rq_from_pdu(vbr); 191 192 if (likely(!blk_should_fake_timeout(req->q))) 193 blk_mq_complete_request(req); 194 req_done = true; 195 } 196 if (unlikely(virtqueue_is_broken(vq))) 197 break; 198 } while (!virtqueue_enable_cb(vq)); 199 200 /* In case queue is stopped waiting for more buffers. */ 201 if (req_done) 202 blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); 203 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 204 } 205 206 static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx) 207 { 208 struct virtio_blk *vblk = hctx->queue->queuedata; 209 struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num]; 210 bool kick; 211 212 spin_lock_irq(&vq->lock); 213 kick = virtqueue_kick_prepare(vq->vq); 214 spin_unlock_irq(&vq->lock); 215 216 if (kick) 217 virtqueue_notify(vq->vq); 218 } 219 220 static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, 221 const struct blk_mq_queue_data *bd) 222 { 223 struct virtio_blk *vblk = hctx->queue->queuedata; 224 struct request *req = bd->rq; 225 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 226 unsigned long flags; 227 unsigned int num; 228 int qid = hctx->queue_num; 229 int err; 230 bool notify = false; 231 bool unmap = false; 232 u32 type; 233 234 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); 235 236 switch (req_op(req)) { 237 case REQ_OP_READ: 238 case REQ_OP_WRITE: 239 type = 0; 240 break; 241 case REQ_OP_FLUSH: 242 type = VIRTIO_BLK_T_FLUSH; 243 break; 244 case REQ_OP_DISCARD: 245 type = VIRTIO_BLK_T_DISCARD; 246 break; 247 case REQ_OP_WRITE_ZEROES: 248 type = VIRTIO_BLK_T_WRITE_ZEROES; 249 unmap = !(req->cmd_flags & REQ_NOUNMAP); 250 break; 251 case REQ_OP_DRV_IN: 252 type = VIRTIO_BLK_T_GET_ID; 253 break; 254 default: 255 WARN_ON_ONCE(1); 256 return BLK_STS_IOERR; 257 } 258 259 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type); 260 vbr->out_hdr.sector = type ? 261 0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req)); 262 vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req)); 263 264 blk_mq_start_request(req); 265 266 if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { 267 err = virtblk_setup_discard_write_zeroes(req, unmap); 268 if (err) 269 return BLK_STS_RESOURCE; 270 } 271 272 num = blk_rq_map_sg(hctx->queue, req, vbr->sg); 273 if (num) { 274 if (rq_data_dir(req) == WRITE) 275 vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT); 276 else 277 vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN); 278 } 279 280 spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 281 err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); 282 if (err) { 283 virtqueue_kick(vblk->vqs[qid].vq); 284 /* Don't stop the queue if -ENOMEM: we may have failed to 285 * bounce the buffer due to global resource outage. 286 */ 287 if (err == -ENOSPC) 288 blk_mq_stop_hw_queue(hctx); 289 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 290 switch (err) { 291 case -ENOSPC: 292 return BLK_STS_DEV_RESOURCE; 293 case -ENOMEM: 294 return BLK_STS_RESOURCE; 295 default: 296 return BLK_STS_IOERR; 297 } 298 } 299 300 if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) 301 notify = true; 302 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 303 304 if (notify) 305 virtqueue_notify(vblk->vqs[qid].vq); 306 return BLK_STS_OK; 307 } 308 309 /* return id (s/n) string for *disk to *id_str 310 */ 311 static int virtblk_get_id(struct gendisk *disk, char *id_str) 312 { 313 struct virtio_blk *vblk = disk->private_data; 314 struct request_queue *q = vblk->disk->queue; 315 struct request *req; 316 int err; 317 318 req = blk_get_request(q, REQ_OP_DRV_IN, 0); 319 if (IS_ERR(req)) 320 return PTR_ERR(req); 321 322 err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL); 323 if (err) 324 goto out; 325 326 blk_execute_rq(vblk->disk, req, false); 327 err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req))); 328 out: 329 blk_put_request(req); 330 return err; 331 } 332 333 static void virtblk_get(struct virtio_blk *vblk) 334 { 335 refcount_inc(&vblk->refs); 336 } 337 338 static void virtblk_put(struct virtio_blk *vblk) 339 { 340 if (refcount_dec_and_test(&vblk->refs)) { 341 ida_simple_remove(&vd_index_ida, vblk->index); 342 mutex_destroy(&vblk->vdev_mutex); 343 kfree(vblk); 344 } 345 } 346 347 static int virtblk_open(struct block_device *bd, fmode_t mode) 348 { 349 struct virtio_blk *vblk = bd->bd_disk->private_data; 350 int ret = 0; 351 352 mutex_lock(&vblk->vdev_mutex); 353 354 if (vblk->vdev) 355 virtblk_get(vblk); 356 else 357 ret = -ENXIO; 358 359 mutex_unlock(&vblk->vdev_mutex); 360 return ret; 361 } 362 363 static void virtblk_release(struct gendisk *disk, fmode_t mode) 364 { 365 struct virtio_blk *vblk = disk->private_data; 366 367 virtblk_put(vblk); 368 } 369 370 /* We provide getgeo only to please some old bootloader/partitioning tools */ 371 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) 372 { 373 struct virtio_blk *vblk = bd->bd_disk->private_data; 374 int ret = 0; 375 376 mutex_lock(&vblk->vdev_mutex); 377 378 if (!vblk->vdev) { 379 ret = -ENXIO; 380 goto out; 381 } 382 383 /* see if the host passed in geometry config */ 384 if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) { 385 virtio_cread(vblk->vdev, struct virtio_blk_config, 386 geometry.cylinders, &geo->cylinders); 387 virtio_cread(vblk->vdev, struct virtio_blk_config, 388 geometry.heads, &geo->heads); 389 virtio_cread(vblk->vdev, struct virtio_blk_config, 390 geometry.sectors, &geo->sectors); 391 } else { 392 /* some standard values, similar to sd */ 393 geo->heads = 1 << 6; 394 geo->sectors = 1 << 5; 395 geo->cylinders = get_capacity(bd->bd_disk) >> 11; 396 } 397 out: 398 mutex_unlock(&vblk->vdev_mutex); 399 return ret; 400 } 401 402 static const struct block_device_operations virtblk_fops = { 403 .owner = THIS_MODULE, 404 .open = virtblk_open, 405 .release = virtblk_release, 406 .getgeo = virtblk_getgeo, 407 }; 408 409 static int index_to_minor(int index) 410 { 411 return index << PART_BITS; 412 } 413 414 static int minor_to_index(int minor) 415 { 416 return minor >> PART_BITS; 417 } 418 419 static ssize_t serial_show(struct device *dev, 420 struct device_attribute *attr, char *buf) 421 { 422 struct gendisk *disk = dev_to_disk(dev); 423 int err; 424 425 /* sysfs gives us a PAGE_SIZE buffer */ 426 BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES); 427 428 buf[VIRTIO_BLK_ID_BYTES] = '\0'; 429 err = virtblk_get_id(disk, buf); 430 if (!err) 431 return strlen(buf); 432 433 if (err == -EIO) /* Unsupported? Make it empty. */ 434 return 0; 435 436 return err; 437 } 438 439 static DEVICE_ATTR_RO(serial); 440 441 /* The queue's logical block size must be set before calling this */ 442 static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) 443 { 444 struct virtio_device *vdev = vblk->vdev; 445 struct request_queue *q = vblk->disk->queue; 446 char cap_str_2[10], cap_str_10[10]; 447 unsigned long long nblocks; 448 u64 capacity; 449 450 /* Host must always specify the capacity. */ 451 virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity); 452 453 nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9); 454 455 string_get_size(nblocks, queue_logical_block_size(q), 456 STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); 457 string_get_size(nblocks, queue_logical_block_size(q), 458 STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); 459 460 dev_notice(&vdev->dev, 461 "[%s] %s%llu %d-byte logical blocks (%s/%s)\n", 462 vblk->disk->disk_name, 463 resize ? "new size: " : "", 464 nblocks, 465 queue_logical_block_size(q), 466 cap_str_10, 467 cap_str_2); 468 469 set_capacity_and_notify(vblk->disk, capacity); 470 } 471 472 static void virtblk_config_changed_work(struct work_struct *work) 473 { 474 struct virtio_blk *vblk = 475 container_of(work, struct virtio_blk, config_work); 476 477 virtblk_update_capacity(vblk, true); 478 } 479 480 static void virtblk_config_changed(struct virtio_device *vdev) 481 { 482 struct virtio_blk *vblk = vdev->priv; 483 484 queue_work(virtblk_wq, &vblk->config_work); 485 } 486 487 static int init_vq(struct virtio_blk *vblk) 488 { 489 int err; 490 int i; 491 vq_callback_t **callbacks; 492 const char **names; 493 struct virtqueue **vqs; 494 unsigned short num_vqs; 495 struct virtio_device *vdev = vblk->vdev; 496 struct irq_affinity desc = { 0, }; 497 498 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, 499 struct virtio_blk_config, num_queues, 500 &num_vqs); 501 if (err) 502 num_vqs = 1; 503 504 num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs); 505 506 vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); 507 if (!vblk->vqs) 508 return -ENOMEM; 509 510 names = kmalloc_array(num_vqs, sizeof(*names), GFP_KERNEL); 511 callbacks = kmalloc_array(num_vqs, sizeof(*callbacks), GFP_KERNEL); 512 vqs = kmalloc_array(num_vqs, sizeof(*vqs), GFP_KERNEL); 513 if (!names || !callbacks || !vqs) { 514 err = -ENOMEM; 515 goto out; 516 } 517 518 for (i = 0; i < num_vqs; i++) { 519 callbacks[i] = virtblk_done; 520 snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i); 521 names[i] = vblk->vqs[i].name; 522 } 523 524 /* Discover virtqueues and write information to configuration. */ 525 err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc); 526 if (err) 527 goto out; 528 529 for (i = 0; i < num_vqs; i++) { 530 spin_lock_init(&vblk->vqs[i].lock); 531 vblk->vqs[i].vq = vqs[i]; 532 } 533 vblk->num_vqs = num_vqs; 534 535 out: 536 kfree(vqs); 537 kfree(callbacks); 538 kfree(names); 539 if (err) 540 kfree(vblk->vqs); 541 return err; 542 } 543 544 /* 545 * Legacy naming scheme used for virtio devices. We are stuck with it for 546 * virtio blk but don't ever use it for any new driver. 547 */ 548 static int virtblk_name_format(char *prefix, int index, char *buf, int buflen) 549 { 550 const int base = 'z' - 'a' + 1; 551 char *begin = buf + strlen(prefix); 552 char *end = buf + buflen; 553 char *p; 554 int unit; 555 556 p = end - 1; 557 *p = '\0'; 558 unit = base; 559 do { 560 if (p == begin) 561 return -EINVAL; 562 *--p = 'a' + (index % unit); 563 index = (index / unit) - 1; 564 } while (index >= 0); 565 566 memmove(begin, p, end - p); 567 memcpy(buf, prefix, strlen(prefix)); 568 569 return 0; 570 } 571 572 static int virtblk_get_cache_mode(struct virtio_device *vdev) 573 { 574 u8 writeback; 575 int err; 576 577 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE, 578 struct virtio_blk_config, wce, 579 &writeback); 580 581 /* 582 * If WCE is not configurable and flush is not available, 583 * assume no writeback cache is in use. 584 */ 585 if (err) 586 writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH); 587 588 return writeback; 589 } 590 591 static void virtblk_update_cache_mode(struct virtio_device *vdev) 592 { 593 u8 writeback = virtblk_get_cache_mode(vdev); 594 struct virtio_blk *vblk = vdev->priv; 595 596 blk_queue_write_cache(vblk->disk->queue, writeback, false); 597 } 598 599 static const char *const virtblk_cache_types[] = { 600 "write through", "write back" 601 }; 602 603 static ssize_t 604 cache_type_store(struct device *dev, struct device_attribute *attr, 605 const char *buf, size_t count) 606 { 607 struct gendisk *disk = dev_to_disk(dev); 608 struct virtio_blk *vblk = disk->private_data; 609 struct virtio_device *vdev = vblk->vdev; 610 int i; 611 612 BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE)); 613 i = sysfs_match_string(virtblk_cache_types, buf); 614 if (i < 0) 615 return i; 616 617 virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i); 618 virtblk_update_cache_mode(vdev); 619 return count; 620 } 621 622 static ssize_t 623 cache_type_show(struct device *dev, struct device_attribute *attr, char *buf) 624 { 625 struct gendisk *disk = dev_to_disk(dev); 626 struct virtio_blk *vblk = disk->private_data; 627 u8 writeback = virtblk_get_cache_mode(vblk->vdev); 628 629 BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types)); 630 return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]); 631 } 632 633 static DEVICE_ATTR_RW(cache_type); 634 635 static struct attribute *virtblk_attrs[] = { 636 &dev_attr_serial.attr, 637 &dev_attr_cache_type.attr, 638 NULL, 639 }; 640 641 static umode_t virtblk_attrs_are_visible(struct kobject *kobj, 642 struct attribute *a, int n) 643 { 644 struct device *dev = kobj_to_dev(kobj); 645 struct gendisk *disk = dev_to_disk(dev); 646 struct virtio_blk *vblk = disk->private_data; 647 struct virtio_device *vdev = vblk->vdev; 648 649 if (a == &dev_attr_cache_type.attr && 650 !virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) 651 return S_IRUGO; 652 653 return a->mode; 654 } 655 656 static const struct attribute_group virtblk_attr_group = { 657 .attrs = virtblk_attrs, 658 .is_visible = virtblk_attrs_are_visible, 659 }; 660 661 static const struct attribute_group *virtblk_attr_groups[] = { 662 &virtblk_attr_group, 663 NULL, 664 }; 665 666 static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, 667 unsigned int hctx_idx, unsigned int numa_node) 668 { 669 struct virtio_blk *vblk = set->driver_data; 670 struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); 671 672 sg_init_table(vbr->sg, vblk->sg_elems); 673 return 0; 674 } 675 676 static int virtblk_map_queues(struct blk_mq_tag_set *set) 677 { 678 struct virtio_blk *vblk = set->driver_data; 679 680 return blk_mq_virtio_map_queues(&set->map[HCTX_TYPE_DEFAULT], 681 vblk->vdev, 0); 682 } 683 684 static const struct blk_mq_ops virtio_mq_ops = { 685 .queue_rq = virtio_queue_rq, 686 .commit_rqs = virtio_commit_rqs, 687 .complete = virtblk_request_done, 688 .init_request = virtblk_init_request, 689 .map_queues = virtblk_map_queues, 690 }; 691 692 static unsigned int virtblk_queue_depth; 693 module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); 694 695 static int virtblk_probe(struct virtio_device *vdev) 696 { 697 struct virtio_blk *vblk; 698 struct request_queue *q; 699 int err, index; 700 701 u32 v, blk_size, max_size, sg_elems, opt_io_size; 702 u16 min_io_size; 703 u8 physical_block_exp, alignment_offset; 704 unsigned int queue_depth; 705 706 if (!vdev->config->get) { 707 dev_err(&vdev->dev, "%s failure: config access disabled\n", 708 __func__); 709 return -EINVAL; 710 } 711 712 err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS), 713 GFP_KERNEL); 714 if (err < 0) 715 goto out; 716 index = err; 717 718 /* We need to know how many segments before we allocate. */ 719 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX, 720 struct virtio_blk_config, seg_max, 721 &sg_elems); 722 723 /* We need at least one SG element, whatever they say. */ 724 if (err || !sg_elems) 725 sg_elems = 1; 726 727 /* Prevent integer overflows and honor max vq size */ 728 sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2); 729 730 /* We need extra sg elements at head and tail. */ 731 sg_elems += 2; 732 vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); 733 if (!vblk) { 734 err = -ENOMEM; 735 goto out_free_index; 736 } 737 738 /* This reference is dropped in virtblk_remove(). */ 739 refcount_set(&vblk->refs, 1); 740 mutex_init(&vblk->vdev_mutex); 741 742 vblk->vdev = vdev; 743 vblk->sg_elems = sg_elems; 744 745 INIT_WORK(&vblk->config_work, virtblk_config_changed_work); 746 747 err = init_vq(vblk); 748 if (err) 749 goto out_free_vblk; 750 751 /* Default queue sizing is to fill the ring. */ 752 if (likely(!virtblk_queue_depth)) { 753 queue_depth = vblk->vqs[0].vq->num_free; 754 /* ... but without indirect descs, we use 2 descs per req */ 755 if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) 756 queue_depth /= 2; 757 } else { 758 queue_depth = virtblk_queue_depth; 759 } 760 761 memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); 762 vblk->tag_set.ops = &virtio_mq_ops; 763 vblk->tag_set.queue_depth = queue_depth; 764 vblk->tag_set.numa_node = NUMA_NO_NODE; 765 vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 766 vblk->tag_set.cmd_size = 767 sizeof(struct virtblk_req) + 768 sizeof(struct scatterlist) * sg_elems; 769 vblk->tag_set.driver_data = vblk; 770 vblk->tag_set.nr_hw_queues = vblk->num_vqs; 771 772 err = blk_mq_alloc_tag_set(&vblk->tag_set); 773 if (err) 774 goto out_free_vq; 775 776 vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, vblk); 777 if (IS_ERR(vblk->disk)) { 778 err = PTR_ERR(vblk->disk); 779 goto out_free_tags; 780 } 781 q = vblk->disk->queue; 782 783 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); 784 785 vblk->disk->major = major; 786 vblk->disk->first_minor = index_to_minor(index); 787 vblk->disk->minors = 1 << PART_BITS; 788 vblk->disk->private_data = vblk; 789 vblk->disk->fops = &virtblk_fops; 790 vblk->disk->flags |= GENHD_FL_EXT_DEVT; 791 vblk->index = index; 792 793 /* configure queue flush support */ 794 virtblk_update_cache_mode(vdev); 795 796 /* If disk is read-only in the host, the guest should obey */ 797 if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) 798 set_disk_ro(vblk->disk, 1); 799 800 /* We can handle whatever the host told us to handle. */ 801 blk_queue_max_segments(q, vblk->sg_elems-2); 802 803 /* No real sector limit. */ 804 blk_queue_max_hw_sectors(q, -1U); 805 806 max_size = virtio_max_dma_size(vdev); 807 808 /* Host can optionally specify maximum segment size and number of 809 * segments. */ 810 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX, 811 struct virtio_blk_config, size_max, &v); 812 if (!err) 813 max_size = min(max_size, v); 814 815 blk_queue_max_segment_size(q, max_size); 816 817 /* Host can optionally specify the block size of the device */ 818 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE, 819 struct virtio_blk_config, blk_size, 820 &blk_size); 821 if (!err) 822 blk_queue_logical_block_size(q, blk_size); 823 else 824 blk_size = queue_logical_block_size(q); 825 826 /* Use topology information if available */ 827 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 828 struct virtio_blk_config, physical_block_exp, 829 &physical_block_exp); 830 if (!err && physical_block_exp) 831 blk_queue_physical_block_size(q, 832 blk_size * (1 << physical_block_exp)); 833 834 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 835 struct virtio_blk_config, alignment_offset, 836 &alignment_offset); 837 if (!err && alignment_offset) 838 blk_queue_alignment_offset(q, blk_size * alignment_offset); 839 840 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 841 struct virtio_blk_config, min_io_size, 842 &min_io_size); 843 if (!err && min_io_size) 844 blk_queue_io_min(q, blk_size * min_io_size); 845 846 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 847 struct virtio_blk_config, opt_io_size, 848 &opt_io_size); 849 if (!err && opt_io_size) 850 blk_queue_io_opt(q, blk_size * opt_io_size); 851 852 if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { 853 q->limits.discard_granularity = blk_size; 854 855 virtio_cread(vdev, struct virtio_blk_config, 856 discard_sector_alignment, &v); 857 q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0; 858 859 virtio_cread(vdev, struct virtio_blk_config, 860 max_discard_sectors, &v); 861 blk_queue_max_discard_sectors(q, v ? v : UINT_MAX); 862 863 virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, 864 &v); 865 blk_queue_max_discard_segments(q, 866 min_not_zero(v, 867 MAX_DISCARD_SEGMENTS)); 868 869 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); 870 } 871 872 if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) { 873 virtio_cread(vdev, struct virtio_blk_config, 874 max_write_zeroes_sectors, &v); 875 blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX); 876 } 877 878 virtblk_update_capacity(vblk, false); 879 virtio_device_ready(vdev); 880 881 device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); 882 return 0; 883 884 out_free_tags: 885 blk_mq_free_tag_set(&vblk->tag_set); 886 out_free_vq: 887 vdev->config->del_vqs(vdev); 888 kfree(vblk->vqs); 889 out_free_vblk: 890 kfree(vblk); 891 out_free_index: 892 ida_simple_remove(&vd_index_ida, index); 893 out: 894 return err; 895 } 896 897 static void virtblk_remove(struct virtio_device *vdev) 898 { 899 struct virtio_blk *vblk = vdev->priv; 900 901 /* Make sure no work handler is accessing the device. */ 902 flush_work(&vblk->config_work); 903 904 del_gendisk(vblk->disk); 905 blk_cleanup_disk(vblk->disk); 906 blk_mq_free_tag_set(&vblk->tag_set); 907 908 mutex_lock(&vblk->vdev_mutex); 909 910 /* Stop all the virtqueues. */ 911 vdev->config->reset(vdev); 912 913 /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */ 914 vblk->vdev = NULL; 915 916 vdev->config->del_vqs(vdev); 917 kfree(vblk->vqs); 918 919 mutex_unlock(&vblk->vdev_mutex); 920 921 virtblk_put(vblk); 922 } 923 924 #ifdef CONFIG_PM_SLEEP 925 static int virtblk_freeze(struct virtio_device *vdev) 926 { 927 struct virtio_blk *vblk = vdev->priv; 928 929 /* Ensure we don't receive any more interrupts */ 930 vdev->config->reset(vdev); 931 932 /* Make sure no work handler is accessing the device. */ 933 flush_work(&vblk->config_work); 934 935 blk_mq_quiesce_queue(vblk->disk->queue); 936 937 vdev->config->del_vqs(vdev); 938 kfree(vblk->vqs); 939 940 return 0; 941 } 942 943 static int virtblk_restore(struct virtio_device *vdev) 944 { 945 struct virtio_blk *vblk = vdev->priv; 946 int ret; 947 948 ret = init_vq(vdev->priv); 949 if (ret) 950 return ret; 951 952 virtio_device_ready(vdev); 953 954 blk_mq_unquiesce_queue(vblk->disk->queue); 955 return 0; 956 } 957 #endif 958 959 static const struct virtio_device_id id_table[] = { 960 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 961 { 0 }, 962 }; 963 964 static unsigned int features_legacy[] = { 965 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 966 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, 967 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, 968 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, 969 } 970 ; 971 static unsigned int features[] = { 972 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 973 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, 974 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, 975 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, 976 }; 977 978 static struct virtio_driver virtio_blk = { 979 .feature_table = features, 980 .feature_table_size = ARRAY_SIZE(features), 981 .feature_table_legacy = features_legacy, 982 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 983 .driver.name = KBUILD_MODNAME, 984 .driver.owner = THIS_MODULE, 985 .id_table = id_table, 986 .probe = virtblk_probe, 987 .remove = virtblk_remove, 988 .config_changed = virtblk_config_changed, 989 #ifdef CONFIG_PM_SLEEP 990 .freeze = virtblk_freeze, 991 .restore = virtblk_restore, 992 #endif 993 }; 994 995 static int __init init(void) 996 { 997 int error; 998 999 virtblk_wq = alloc_workqueue("virtio-blk", 0, 0); 1000 if (!virtblk_wq) 1001 return -ENOMEM; 1002 1003 major = register_blkdev(0, "virtblk"); 1004 if (major < 0) { 1005 error = major; 1006 goto out_destroy_workqueue; 1007 } 1008 1009 error = register_virtio_driver(&virtio_blk); 1010 if (error) 1011 goto out_unregister_blkdev; 1012 return 0; 1013 1014 out_unregister_blkdev: 1015 unregister_blkdev(major, "virtblk"); 1016 out_destroy_workqueue: 1017 destroy_workqueue(virtblk_wq); 1018 return error; 1019 } 1020 1021 static void __exit fini(void) 1022 { 1023 unregister_virtio_driver(&virtio_blk); 1024 unregister_blkdev(major, "virtblk"); 1025 destroy_workqueue(virtblk_wq); 1026 } 1027 module_init(init); 1028 module_exit(fini); 1029 1030 MODULE_DEVICE_TABLE(virtio, id_table); 1031 MODULE_DESCRIPTION("Virtio block driver"); 1032 MODULE_LICENSE("GPL"); 1033