1 // SPDX-License-Identifier: GPL-2.0-only 2 //#define DEBUG 3 #include <linux/spinlock.h> 4 #include <linux/slab.h> 5 #include <linux/blkdev.h> 6 #include <linux/hdreg.h> 7 #include <linux/module.h> 8 #include <linux/mutex.h> 9 #include <linux/interrupt.h> 10 #include <linux/virtio.h> 11 #include <linux/virtio_blk.h> 12 #include <linux/scatterlist.h> 13 #include <linux/string_helpers.h> 14 #include <linux/idr.h> 15 #include <linux/blk-mq.h> 16 #include <linux/blk-mq-virtio.h> 17 #include <linux/numa.h> 18 #include <uapi/linux/virtio_ring.h> 19 20 #define PART_BITS 4 21 #define VQ_NAME_LEN 16 22 #define MAX_DISCARD_SEGMENTS 256u 23 24 static int major; 25 static DEFINE_IDA(vd_index_ida); 26 27 static struct workqueue_struct *virtblk_wq; 28 29 struct virtio_blk_vq { 30 struct virtqueue *vq; 31 spinlock_t lock; 32 char name[VQ_NAME_LEN]; 33 } ____cacheline_aligned_in_smp; 34 35 struct virtio_blk { 36 /* 37 * This mutex must be held by anything that may run after 38 * virtblk_remove() sets vblk->vdev to NULL. 39 * 40 * blk-mq, virtqueue processing, and sysfs attribute code paths are 41 * shut down before vblk->vdev is set to NULL and therefore do not need 42 * to hold this mutex. 43 */ 44 struct mutex vdev_mutex; 45 struct virtio_device *vdev; 46 47 /* The disk structure for the kernel. */ 48 struct gendisk *disk; 49 50 /* Block layer tags. */ 51 struct blk_mq_tag_set tag_set; 52 53 /* Process context for config space updates */ 54 struct work_struct config_work; 55 56 /* 57 * Tracks references from block_device_operations open/release and 58 * virtio_driver probe/remove so this object can be freed once no 59 * longer in use. 60 */ 61 refcount_t refs; 62 63 /* What host tells us, plus 2 for header & tailer. */ 64 unsigned int sg_elems; 65 66 /* Ida index - used to track minor number allocations. */ 67 int index; 68 69 /* num of vqs */ 70 int num_vqs; 71 struct virtio_blk_vq *vqs; 72 }; 73 74 struct virtblk_req { 75 struct virtio_blk_outhdr out_hdr; 76 u8 status; 77 struct scatterlist sg[]; 78 }; 79 80 static inline blk_status_t virtblk_result(struct virtblk_req *vbr) 81 { 82 switch (vbr->status) { 83 case VIRTIO_BLK_S_OK: 84 return BLK_STS_OK; 85 case VIRTIO_BLK_S_UNSUPP: 86 return BLK_STS_NOTSUPP; 87 default: 88 return BLK_STS_IOERR; 89 } 90 } 91 92 static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr, 93 struct scatterlist *data_sg, bool have_data) 94 { 95 struct scatterlist hdr, status, *sgs[3]; 96 unsigned int num_out = 0, num_in = 0; 97 98 sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); 99 sgs[num_out++] = &hdr; 100 101 if (have_data) { 102 if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) 103 sgs[num_out++] = data_sg; 104 else 105 sgs[num_out + num_in++] = data_sg; 106 } 107 108 sg_init_one(&status, &vbr->status, sizeof(vbr->status)); 109 sgs[num_out + num_in++] = &status; 110 111 return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); 112 } 113 114 static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap) 115 { 116 unsigned short segments = blk_rq_nr_discard_segments(req); 117 unsigned short n = 0; 118 struct virtio_blk_discard_write_zeroes *range; 119 struct bio *bio; 120 u32 flags = 0; 121 122 if (unmap) 123 flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP; 124 125 range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC); 126 if (!range) 127 return -ENOMEM; 128 129 __rq_for_each_bio(bio, req) { 130 u64 sector = bio->bi_iter.bi_sector; 131 u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT; 132 133 range[n].flags = cpu_to_le32(flags); 134 range[n].num_sectors = cpu_to_le32(num_sectors); 135 range[n].sector = cpu_to_le64(sector); 136 n++; 137 } 138 139 req->special_vec.bv_page = virt_to_page(range); 140 req->special_vec.bv_offset = offset_in_page(range); 141 req->special_vec.bv_len = sizeof(*range) * segments; 142 req->rq_flags |= RQF_SPECIAL_PAYLOAD; 143 144 return 0; 145 } 146 147 static inline void virtblk_request_done(struct request *req) 148 { 149 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 150 151 if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { 152 kfree(page_address(req->special_vec.bv_page) + 153 req->special_vec.bv_offset); 154 } 155 156 blk_mq_end_request(req, virtblk_result(vbr)); 157 } 158 159 static void virtblk_done(struct virtqueue *vq) 160 { 161 struct virtio_blk *vblk = vq->vdev->priv; 162 bool req_done = false; 163 int qid = vq->index; 164 struct virtblk_req *vbr; 165 unsigned long flags; 166 unsigned int len; 167 168 spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 169 do { 170 virtqueue_disable_cb(vq); 171 while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { 172 struct request *req = blk_mq_rq_from_pdu(vbr); 173 174 blk_mq_complete_request(req); 175 req_done = true; 176 } 177 if (unlikely(virtqueue_is_broken(vq))) 178 break; 179 } while (!virtqueue_enable_cb(vq)); 180 181 /* In case queue is stopped waiting for more buffers. */ 182 if (req_done) 183 blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); 184 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 185 } 186 187 static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx) 188 { 189 struct virtio_blk *vblk = hctx->queue->queuedata; 190 struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num]; 191 bool kick; 192 193 spin_lock_irq(&vq->lock); 194 kick = virtqueue_kick_prepare(vq->vq); 195 spin_unlock_irq(&vq->lock); 196 197 if (kick) 198 virtqueue_notify(vq->vq); 199 } 200 201 static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, 202 const struct blk_mq_queue_data *bd) 203 { 204 struct virtio_blk *vblk = hctx->queue->queuedata; 205 struct request *req = bd->rq; 206 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 207 unsigned long flags; 208 unsigned int num; 209 int qid = hctx->queue_num; 210 int err; 211 bool notify = false; 212 bool unmap = false; 213 u32 type; 214 215 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); 216 217 switch (req_op(req)) { 218 case REQ_OP_READ: 219 case REQ_OP_WRITE: 220 type = 0; 221 break; 222 case REQ_OP_FLUSH: 223 type = VIRTIO_BLK_T_FLUSH; 224 break; 225 case REQ_OP_DISCARD: 226 type = VIRTIO_BLK_T_DISCARD; 227 break; 228 case REQ_OP_WRITE_ZEROES: 229 type = VIRTIO_BLK_T_WRITE_ZEROES; 230 unmap = !(req->cmd_flags & REQ_NOUNMAP); 231 break; 232 case REQ_OP_DRV_IN: 233 type = VIRTIO_BLK_T_GET_ID; 234 break; 235 default: 236 WARN_ON_ONCE(1); 237 return BLK_STS_IOERR; 238 } 239 240 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type); 241 vbr->out_hdr.sector = type ? 242 0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req)); 243 vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req)); 244 245 blk_mq_start_request(req); 246 247 if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { 248 err = virtblk_setup_discard_write_zeroes(req, unmap); 249 if (err) 250 return BLK_STS_RESOURCE; 251 } 252 253 num = blk_rq_map_sg(hctx->queue, req, vbr->sg); 254 if (num) { 255 if (rq_data_dir(req) == WRITE) 256 vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT); 257 else 258 vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN); 259 } 260 261 spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 262 err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); 263 if (err) { 264 virtqueue_kick(vblk->vqs[qid].vq); 265 /* Don't stop the queue if -ENOMEM: we may have failed to 266 * bounce the buffer due to global resource outage. 267 */ 268 if (err == -ENOSPC) 269 blk_mq_stop_hw_queue(hctx); 270 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 271 switch (err) { 272 case -ENOSPC: 273 return BLK_STS_DEV_RESOURCE; 274 case -ENOMEM: 275 return BLK_STS_RESOURCE; 276 default: 277 return BLK_STS_IOERR; 278 } 279 } 280 281 if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) 282 notify = true; 283 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 284 285 if (notify) 286 virtqueue_notify(vblk->vqs[qid].vq); 287 return BLK_STS_OK; 288 } 289 290 /* return id (s/n) string for *disk to *id_str 291 */ 292 static int virtblk_get_id(struct gendisk *disk, char *id_str) 293 { 294 struct virtio_blk *vblk = disk->private_data; 295 struct request_queue *q = vblk->disk->queue; 296 struct request *req; 297 int err; 298 299 req = blk_get_request(q, REQ_OP_DRV_IN, 0); 300 if (IS_ERR(req)) 301 return PTR_ERR(req); 302 303 err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL); 304 if (err) 305 goto out; 306 307 blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); 308 err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req))); 309 out: 310 blk_put_request(req); 311 return err; 312 } 313 314 static void virtblk_get(struct virtio_blk *vblk) 315 { 316 refcount_inc(&vblk->refs); 317 } 318 319 static void virtblk_put(struct virtio_blk *vblk) 320 { 321 if (refcount_dec_and_test(&vblk->refs)) { 322 ida_simple_remove(&vd_index_ida, vblk->index); 323 mutex_destroy(&vblk->vdev_mutex); 324 kfree(vblk); 325 } 326 } 327 328 static int virtblk_open(struct block_device *bd, fmode_t mode) 329 { 330 struct virtio_blk *vblk = bd->bd_disk->private_data; 331 int ret = 0; 332 333 mutex_lock(&vblk->vdev_mutex); 334 335 if (vblk->vdev) 336 virtblk_get(vblk); 337 else 338 ret = -ENXIO; 339 340 mutex_unlock(&vblk->vdev_mutex); 341 return ret; 342 } 343 344 static void virtblk_release(struct gendisk *disk, fmode_t mode) 345 { 346 struct virtio_blk *vblk = disk->private_data; 347 348 virtblk_put(vblk); 349 } 350 351 /* We provide getgeo only to please some old bootloader/partitioning tools */ 352 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) 353 { 354 struct virtio_blk *vblk = bd->bd_disk->private_data; 355 int ret = 0; 356 357 mutex_lock(&vblk->vdev_mutex); 358 359 if (!vblk->vdev) { 360 ret = -ENXIO; 361 goto out; 362 } 363 364 /* see if the host passed in geometry config */ 365 if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) { 366 virtio_cread(vblk->vdev, struct virtio_blk_config, 367 geometry.cylinders, &geo->cylinders); 368 virtio_cread(vblk->vdev, struct virtio_blk_config, 369 geometry.heads, &geo->heads); 370 virtio_cread(vblk->vdev, struct virtio_blk_config, 371 geometry.sectors, &geo->sectors); 372 } else { 373 /* some standard values, similar to sd */ 374 geo->heads = 1 << 6; 375 geo->sectors = 1 << 5; 376 geo->cylinders = get_capacity(bd->bd_disk) >> 11; 377 } 378 out: 379 mutex_unlock(&vblk->vdev_mutex); 380 return ret; 381 } 382 383 static const struct block_device_operations virtblk_fops = { 384 .owner = THIS_MODULE, 385 .open = virtblk_open, 386 .release = virtblk_release, 387 .getgeo = virtblk_getgeo, 388 }; 389 390 static int index_to_minor(int index) 391 { 392 return index << PART_BITS; 393 } 394 395 static int minor_to_index(int minor) 396 { 397 return minor >> PART_BITS; 398 } 399 400 static ssize_t serial_show(struct device *dev, 401 struct device_attribute *attr, char *buf) 402 { 403 struct gendisk *disk = dev_to_disk(dev); 404 int err; 405 406 /* sysfs gives us a PAGE_SIZE buffer */ 407 BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES); 408 409 buf[VIRTIO_BLK_ID_BYTES] = '\0'; 410 err = virtblk_get_id(disk, buf); 411 if (!err) 412 return strlen(buf); 413 414 if (err == -EIO) /* Unsupported? Make it empty. */ 415 return 0; 416 417 return err; 418 } 419 420 static DEVICE_ATTR_RO(serial); 421 422 /* The queue's logical block size must be set before calling this */ 423 static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) 424 { 425 struct virtio_device *vdev = vblk->vdev; 426 struct request_queue *q = vblk->disk->queue; 427 char cap_str_2[10], cap_str_10[10]; 428 unsigned long long nblocks; 429 u64 capacity; 430 431 /* Host must always specify the capacity. */ 432 virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity); 433 434 /* If capacity is too big, truncate with warning. */ 435 if ((sector_t)capacity != capacity) { 436 dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n", 437 (unsigned long long)capacity); 438 capacity = (sector_t)-1; 439 } 440 441 nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9); 442 443 string_get_size(nblocks, queue_logical_block_size(q), 444 STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); 445 string_get_size(nblocks, queue_logical_block_size(q), 446 STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); 447 448 dev_notice(&vdev->dev, 449 "[%s] %s%llu %d-byte logical blocks (%s/%s)\n", 450 vblk->disk->disk_name, 451 resize ? "new size: " : "", 452 nblocks, 453 queue_logical_block_size(q), 454 cap_str_10, 455 cap_str_2); 456 457 set_capacity_revalidate_and_notify(vblk->disk, capacity, true); 458 } 459 460 static void virtblk_config_changed_work(struct work_struct *work) 461 { 462 struct virtio_blk *vblk = 463 container_of(work, struct virtio_blk, config_work); 464 465 virtblk_update_capacity(vblk, true); 466 } 467 468 static void virtblk_config_changed(struct virtio_device *vdev) 469 { 470 struct virtio_blk *vblk = vdev->priv; 471 472 queue_work(virtblk_wq, &vblk->config_work); 473 } 474 475 static int init_vq(struct virtio_blk *vblk) 476 { 477 int err; 478 int i; 479 vq_callback_t **callbacks; 480 const char **names; 481 struct virtqueue **vqs; 482 unsigned short num_vqs; 483 struct virtio_device *vdev = vblk->vdev; 484 struct irq_affinity desc = { 0, }; 485 486 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, 487 struct virtio_blk_config, num_queues, 488 &num_vqs); 489 if (err) 490 num_vqs = 1; 491 492 num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs); 493 494 vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); 495 if (!vblk->vqs) 496 return -ENOMEM; 497 498 names = kmalloc_array(num_vqs, sizeof(*names), GFP_KERNEL); 499 callbacks = kmalloc_array(num_vqs, sizeof(*callbacks), GFP_KERNEL); 500 vqs = kmalloc_array(num_vqs, sizeof(*vqs), GFP_KERNEL); 501 if (!names || !callbacks || !vqs) { 502 err = -ENOMEM; 503 goto out; 504 } 505 506 for (i = 0; i < num_vqs; i++) { 507 callbacks[i] = virtblk_done; 508 snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i); 509 names[i] = vblk->vqs[i].name; 510 } 511 512 /* Discover virtqueues and write information to configuration. */ 513 err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc); 514 if (err) 515 goto out; 516 517 for (i = 0; i < num_vqs; i++) { 518 spin_lock_init(&vblk->vqs[i].lock); 519 vblk->vqs[i].vq = vqs[i]; 520 } 521 vblk->num_vqs = num_vqs; 522 523 out: 524 kfree(vqs); 525 kfree(callbacks); 526 kfree(names); 527 if (err) 528 kfree(vblk->vqs); 529 return err; 530 } 531 532 /* 533 * Legacy naming scheme used for virtio devices. We are stuck with it for 534 * virtio blk but don't ever use it for any new driver. 535 */ 536 static int virtblk_name_format(char *prefix, int index, char *buf, int buflen) 537 { 538 const int base = 'z' - 'a' + 1; 539 char *begin = buf + strlen(prefix); 540 char *end = buf + buflen; 541 char *p; 542 int unit; 543 544 p = end - 1; 545 *p = '\0'; 546 unit = base; 547 do { 548 if (p == begin) 549 return -EINVAL; 550 *--p = 'a' + (index % unit); 551 index = (index / unit) - 1; 552 } while (index >= 0); 553 554 memmove(begin, p, end - p); 555 memcpy(buf, prefix, strlen(prefix)); 556 557 return 0; 558 } 559 560 static int virtblk_get_cache_mode(struct virtio_device *vdev) 561 { 562 u8 writeback; 563 int err; 564 565 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE, 566 struct virtio_blk_config, wce, 567 &writeback); 568 569 /* 570 * If WCE is not configurable and flush is not available, 571 * assume no writeback cache is in use. 572 */ 573 if (err) 574 writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH); 575 576 return writeback; 577 } 578 579 static void virtblk_update_cache_mode(struct virtio_device *vdev) 580 { 581 u8 writeback = virtblk_get_cache_mode(vdev); 582 struct virtio_blk *vblk = vdev->priv; 583 584 blk_queue_write_cache(vblk->disk->queue, writeback, false); 585 revalidate_disk(vblk->disk); 586 } 587 588 static const char *const virtblk_cache_types[] = { 589 "write through", "write back" 590 }; 591 592 static ssize_t 593 cache_type_store(struct device *dev, struct device_attribute *attr, 594 const char *buf, size_t count) 595 { 596 struct gendisk *disk = dev_to_disk(dev); 597 struct virtio_blk *vblk = disk->private_data; 598 struct virtio_device *vdev = vblk->vdev; 599 int i; 600 601 BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE)); 602 i = sysfs_match_string(virtblk_cache_types, buf); 603 if (i < 0) 604 return i; 605 606 virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i); 607 virtblk_update_cache_mode(vdev); 608 return count; 609 } 610 611 static ssize_t 612 cache_type_show(struct device *dev, struct device_attribute *attr, char *buf) 613 { 614 struct gendisk *disk = dev_to_disk(dev); 615 struct virtio_blk *vblk = disk->private_data; 616 u8 writeback = virtblk_get_cache_mode(vblk->vdev); 617 618 BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types)); 619 return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]); 620 } 621 622 static DEVICE_ATTR_RW(cache_type); 623 624 static struct attribute *virtblk_attrs[] = { 625 &dev_attr_serial.attr, 626 &dev_attr_cache_type.attr, 627 NULL, 628 }; 629 630 static umode_t virtblk_attrs_are_visible(struct kobject *kobj, 631 struct attribute *a, int n) 632 { 633 struct device *dev = container_of(kobj, struct device, kobj); 634 struct gendisk *disk = dev_to_disk(dev); 635 struct virtio_blk *vblk = disk->private_data; 636 struct virtio_device *vdev = vblk->vdev; 637 638 if (a == &dev_attr_cache_type.attr && 639 !virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) 640 return S_IRUGO; 641 642 return a->mode; 643 } 644 645 static const struct attribute_group virtblk_attr_group = { 646 .attrs = virtblk_attrs, 647 .is_visible = virtblk_attrs_are_visible, 648 }; 649 650 static const struct attribute_group *virtblk_attr_groups[] = { 651 &virtblk_attr_group, 652 NULL, 653 }; 654 655 static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, 656 unsigned int hctx_idx, unsigned int numa_node) 657 { 658 struct virtio_blk *vblk = set->driver_data; 659 struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); 660 661 sg_init_table(vbr->sg, vblk->sg_elems); 662 return 0; 663 } 664 665 static int virtblk_map_queues(struct blk_mq_tag_set *set) 666 { 667 struct virtio_blk *vblk = set->driver_data; 668 669 return blk_mq_virtio_map_queues(&set->map[HCTX_TYPE_DEFAULT], 670 vblk->vdev, 0); 671 } 672 673 static const struct blk_mq_ops virtio_mq_ops = { 674 .queue_rq = virtio_queue_rq, 675 .commit_rqs = virtio_commit_rqs, 676 .complete = virtblk_request_done, 677 .init_request = virtblk_init_request, 678 .map_queues = virtblk_map_queues, 679 }; 680 681 static unsigned int virtblk_queue_depth; 682 module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); 683 684 static int virtblk_probe(struct virtio_device *vdev) 685 { 686 struct virtio_blk *vblk; 687 struct request_queue *q; 688 int err, index; 689 690 u32 v, blk_size, max_size, sg_elems, opt_io_size; 691 u16 min_io_size; 692 u8 physical_block_exp, alignment_offset; 693 694 if (!vdev->config->get) { 695 dev_err(&vdev->dev, "%s failure: config access disabled\n", 696 __func__); 697 return -EINVAL; 698 } 699 700 err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS), 701 GFP_KERNEL); 702 if (err < 0) 703 goto out; 704 index = err; 705 706 /* We need to know how many segments before we allocate. */ 707 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX, 708 struct virtio_blk_config, seg_max, 709 &sg_elems); 710 711 /* We need at least one SG element, whatever they say. */ 712 if (err || !sg_elems) 713 sg_elems = 1; 714 715 /* We need an extra sg elements at head and tail. */ 716 sg_elems += 2; 717 vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); 718 if (!vblk) { 719 err = -ENOMEM; 720 goto out_free_index; 721 } 722 723 /* This reference is dropped in virtblk_remove(). */ 724 refcount_set(&vblk->refs, 1); 725 mutex_init(&vblk->vdev_mutex); 726 727 vblk->vdev = vdev; 728 vblk->sg_elems = sg_elems; 729 730 INIT_WORK(&vblk->config_work, virtblk_config_changed_work); 731 732 err = init_vq(vblk); 733 if (err) 734 goto out_free_vblk; 735 736 /* FIXME: How many partitions? How long is a piece of string? */ 737 vblk->disk = alloc_disk(1 << PART_BITS); 738 if (!vblk->disk) { 739 err = -ENOMEM; 740 goto out_free_vq; 741 } 742 743 /* Default queue sizing is to fill the ring. */ 744 if (!virtblk_queue_depth) { 745 virtblk_queue_depth = vblk->vqs[0].vq->num_free; 746 /* ... but without indirect descs, we use 2 descs per req */ 747 if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) 748 virtblk_queue_depth /= 2; 749 } 750 751 memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); 752 vblk->tag_set.ops = &virtio_mq_ops; 753 vblk->tag_set.queue_depth = virtblk_queue_depth; 754 vblk->tag_set.numa_node = NUMA_NO_NODE; 755 vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 756 vblk->tag_set.cmd_size = 757 sizeof(struct virtblk_req) + 758 sizeof(struct scatterlist) * sg_elems; 759 vblk->tag_set.driver_data = vblk; 760 vblk->tag_set.nr_hw_queues = vblk->num_vqs; 761 762 err = blk_mq_alloc_tag_set(&vblk->tag_set); 763 if (err) 764 goto out_put_disk; 765 766 q = blk_mq_init_queue(&vblk->tag_set); 767 if (IS_ERR(q)) { 768 err = -ENOMEM; 769 goto out_free_tags; 770 } 771 vblk->disk->queue = q; 772 773 q->queuedata = vblk; 774 775 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); 776 777 vblk->disk->major = major; 778 vblk->disk->first_minor = index_to_minor(index); 779 vblk->disk->private_data = vblk; 780 vblk->disk->fops = &virtblk_fops; 781 vblk->disk->flags |= GENHD_FL_EXT_DEVT; 782 vblk->index = index; 783 784 /* configure queue flush support */ 785 virtblk_update_cache_mode(vdev); 786 787 /* If disk is read-only in the host, the guest should obey */ 788 if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) 789 set_disk_ro(vblk->disk, 1); 790 791 /* We can handle whatever the host told us to handle. */ 792 blk_queue_max_segments(q, vblk->sg_elems-2); 793 794 /* No real sector limit. */ 795 blk_queue_max_hw_sectors(q, -1U); 796 797 max_size = virtio_max_dma_size(vdev); 798 799 /* Host can optionally specify maximum segment size and number of 800 * segments. */ 801 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX, 802 struct virtio_blk_config, size_max, &v); 803 if (!err) 804 max_size = min(max_size, v); 805 806 blk_queue_max_segment_size(q, max_size); 807 808 /* Host can optionally specify the block size of the device */ 809 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE, 810 struct virtio_blk_config, blk_size, 811 &blk_size); 812 if (!err) 813 blk_queue_logical_block_size(q, blk_size); 814 else 815 blk_size = queue_logical_block_size(q); 816 817 /* Use topology information if available */ 818 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 819 struct virtio_blk_config, physical_block_exp, 820 &physical_block_exp); 821 if (!err && physical_block_exp) 822 blk_queue_physical_block_size(q, 823 blk_size * (1 << physical_block_exp)); 824 825 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 826 struct virtio_blk_config, alignment_offset, 827 &alignment_offset); 828 if (!err && alignment_offset) 829 blk_queue_alignment_offset(q, blk_size * alignment_offset); 830 831 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 832 struct virtio_blk_config, min_io_size, 833 &min_io_size); 834 if (!err && min_io_size) 835 blk_queue_io_min(q, blk_size * min_io_size); 836 837 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 838 struct virtio_blk_config, opt_io_size, 839 &opt_io_size); 840 if (!err && opt_io_size) 841 blk_queue_io_opt(q, blk_size * opt_io_size); 842 843 if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { 844 q->limits.discard_granularity = blk_size; 845 846 virtio_cread(vdev, struct virtio_blk_config, 847 discard_sector_alignment, &v); 848 q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0; 849 850 virtio_cread(vdev, struct virtio_blk_config, 851 max_discard_sectors, &v); 852 blk_queue_max_discard_sectors(q, v ? v : UINT_MAX); 853 854 virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, 855 &v); 856 blk_queue_max_discard_segments(q, 857 min_not_zero(v, 858 MAX_DISCARD_SEGMENTS)); 859 860 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); 861 } 862 863 if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) { 864 virtio_cread(vdev, struct virtio_blk_config, 865 max_write_zeroes_sectors, &v); 866 blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX); 867 } 868 869 virtblk_update_capacity(vblk, false); 870 virtio_device_ready(vdev); 871 872 device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); 873 return 0; 874 875 out_free_tags: 876 blk_mq_free_tag_set(&vblk->tag_set); 877 out_put_disk: 878 put_disk(vblk->disk); 879 out_free_vq: 880 vdev->config->del_vqs(vdev); 881 out_free_vblk: 882 kfree(vblk); 883 out_free_index: 884 ida_simple_remove(&vd_index_ida, index); 885 out: 886 return err; 887 } 888 889 static void virtblk_remove(struct virtio_device *vdev) 890 { 891 struct virtio_blk *vblk = vdev->priv; 892 893 /* Make sure no work handler is accessing the device. */ 894 flush_work(&vblk->config_work); 895 896 del_gendisk(vblk->disk); 897 blk_cleanup_queue(vblk->disk->queue); 898 899 blk_mq_free_tag_set(&vblk->tag_set); 900 901 mutex_lock(&vblk->vdev_mutex); 902 903 /* Stop all the virtqueues. */ 904 vdev->config->reset(vdev); 905 906 /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */ 907 vblk->vdev = NULL; 908 909 put_disk(vblk->disk); 910 vdev->config->del_vqs(vdev); 911 kfree(vblk->vqs); 912 913 mutex_unlock(&vblk->vdev_mutex); 914 915 virtblk_put(vblk); 916 } 917 918 #ifdef CONFIG_PM_SLEEP 919 static int virtblk_freeze(struct virtio_device *vdev) 920 { 921 struct virtio_blk *vblk = vdev->priv; 922 923 /* Ensure we don't receive any more interrupts */ 924 vdev->config->reset(vdev); 925 926 /* Make sure no work handler is accessing the device. */ 927 flush_work(&vblk->config_work); 928 929 blk_mq_quiesce_queue(vblk->disk->queue); 930 931 vdev->config->del_vqs(vdev); 932 return 0; 933 } 934 935 static int virtblk_restore(struct virtio_device *vdev) 936 { 937 struct virtio_blk *vblk = vdev->priv; 938 int ret; 939 940 ret = init_vq(vdev->priv); 941 if (ret) 942 return ret; 943 944 virtio_device_ready(vdev); 945 946 blk_mq_unquiesce_queue(vblk->disk->queue); 947 return 0; 948 } 949 #endif 950 951 static const struct virtio_device_id id_table[] = { 952 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 953 { 0 }, 954 }; 955 956 static unsigned int features_legacy[] = { 957 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 958 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, 959 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, 960 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, 961 } 962 ; 963 static unsigned int features[] = { 964 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 965 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, 966 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, 967 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, 968 }; 969 970 static struct virtio_driver virtio_blk = { 971 .feature_table = features, 972 .feature_table_size = ARRAY_SIZE(features), 973 .feature_table_legacy = features_legacy, 974 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 975 .driver.name = KBUILD_MODNAME, 976 .driver.owner = THIS_MODULE, 977 .id_table = id_table, 978 .probe = virtblk_probe, 979 .remove = virtblk_remove, 980 .config_changed = virtblk_config_changed, 981 #ifdef CONFIG_PM_SLEEP 982 .freeze = virtblk_freeze, 983 .restore = virtblk_restore, 984 #endif 985 }; 986 987 static int __init init(void) 988 { 989 int error; 990 991 virtblk_wq = alloc_workqueue("virtio-blk", 0, 0); 992 if (!virtblk_wq) 993 return -ENOMEM; 994 995 major = register_blkdev(0, "virtblk"); 996 if (major < 0) { 997 error = major; 998 goto out_destroy_workqueue; 999 } 1000 1001 error = register_virtio_driver(&virtio_blk); 1002 if (error) 1003 goto out_unregister_blkdev; 1004 return 0; 1005 1006 out_unregister_blkdev: 1007 unregister_blkdev(major, "virtblk"); 1008 out_destroy_workqueue: 1009 destroy_workqueue(virtblk_wq); 1010 return error; 1011 } 1012 1013 static void __exit fini(void) 1014 { 1015 unregister_virtio_driver(&virtio_blk); 1016 unregister_blkdev(major, "virtblk"); 1017 destroy_workqueue(virtblk_wq); 1018 } 1019 module_init(init); 1020 module_exit(fini); 1021 1022 MODULE_DEVICE_TABLE(virtio, id_table); 1023 MODULE_DESCRIPTION("Virtio block driver"); 1024 MODULE_LICENSE("GPL"); 1025