1 // SPDX-License-Identifier: GPL-2.0-only 2 //#define DEBUG 3 #include <linux/spinlock.h> 4 #include <linux/slab.h> 5 #include <linux/blkdev.h> 6 #include <linux/hdreg.h> 7 #include <linux/module.h> 8 #include <linux/mutex.h> 9 #include <linux/interrupt.h> 10 #include <linux/virtio.h> 11 #include <linux/virtio_blk.h> 12 #include <linux/scatterlist.h> 13 #include <linux/string_helpers.h> 14 #include <linux/idr.h> 15 #include <linux/blk-mq.h> 16 #include <linux/blk-mq-virtio.h> 17 #include <linux/numa.h> 18 #include <linux/vmalloc.h> 19 #include <uapi/linux/virtio_ring.h> 20 21 #define PART_BITS 4 22 #define VQ_NAME_LEN 16 23 #define MAX_DISCARD_SEGMENTS 256u 24 25 /* The maximum number of sg elements that fit into a virtqueue */ 26 #define VIRTIO_BLK_MAX_SG_ELEMS 32768 27 28 #ifdef CONFIG_ARCH_NO_SG_CHAIN 29 #define VIRTIO_BLK_INLINE_SG_CNT 0 30 #else 31 #define VIRTIO_BLK_INLINE_SG_CNT 2 32 #endif 33 34 static unsigned int num_request_queues; 35 module_param(num_request_queues, uint, 0644); 36 MODULE_PARM_DESC(num_request_queues, 37 "Limit the number of request queues to use for blk device. " 38 "0 for no limit. " 39 "Values > nr_cpu_ids truncated to nr_cpu_ids."); 40 41 static unsigned int poll_queues; 42 module_param(poll_queues, uint, 0644); 43 MODULE_PARM_DESC(poll_queues, "The number of dedicated virtqueues for polling I/O"); 44 45 static int major; 46 static DEFINE_IDA(vd_index_ida); 47 48 static struct workqueue_struct *virtblk_wq; 49 50 struct virtio_blk_vq { 51 struct virtqueue *vq; 52 spinlock_t lock; 53 char name[VQ_NAME_LEN]; 54 } ____cacheline_aligned_in_smp; 55 56 struct virtio_blk { 57 /* 58 * This mutex must be held by anything that may run after 59 * virtblk_remove() sets vblk->vdev to NULL. 60 * 61 * blk-mq, virtqueue processing, and sysfs attribute code paths are 62 * shut down before vblk->vdev is set to NULL and therefore do not need 63 * to hold this mutex. 64 */ 65 struct mutex vdev_mutex; 66 struct virtio_device *vdev; 67 68 /* The disk structure for the kernel. */ 69 struct gendisk *disk; 70 71 /* Block layer tags. */ 72 struct blk_mq_tag_set tag_set; 73 74 /* Process context for config space updates */ 75 struct work_struct config_work; 76 77 /* Ida index - used to track minor number allocations. */ 78 int index; 79 80 /* num of vqs */ 81 int num_vqs; 82 int io_queues[HCTX_MAX_TYPES]; 83 struct virtio_blk_vq *vqs; 84 85 /* For zoned device */ 86 unsigned int zone_sectors; 87 }; 88 89 struct virtblk_req { 90 /* Out header */ 91 struct virtio_blk_outhdr out_hdr; 92 93 /* In header */ 94 union { 95 u8 status; 96 97 /* 98 * The zone append command has an extended in header. 99 * The status field in zone_append_in_hdr must have 100 * the same offset in virtblk_req as the non-zoned 101 * status field above. 102 */ 103 struct { 104 u8 status; 105 u8 reserved[7]; 106 __le64 append_sector; 107 } zone_append_in_hdr; 108 }; 109 110 size_t in_hdr_len; 111 112 struct sg_table sg_table; 113 struct scatterlist sg[]; 114 }; 115 116 static inline blk_status_t virtblk_result(u8 status) 117 { 118 switch (status) { 119 case VIRTIO_BLK_S_OK: 120 return BLK_STS_OK; 121 case VIRTIO_BLK_S_UNSUPP: 122 return BLK_STS_NOTSUPP; 123 case VIRTIO_BLK_S_ZONE_OPEN_RESOURCE: 124 return BLK_STS_ZONE_OPEN_RESOURCE; 125 case VIRTIO_BLK_S_ZONE_ACTIVE_RESOURCE: 126 return BLK_STS_ZONE_ACTIVE_RESOURCE; 127 case VIRTIO_BLK_S_IOERR: 128 case VIRTIO_BLK_S_ZONE_UNALIGNED_WP: 129 default: 130 return BLK_STS_IOERR; 131 } 132 } 133 134 static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx) 135 { 136 struct virtio_blk *vblk = hctx->queue->queuedata; 137 struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num]; 138 139 return vq; 140 } 141 142 static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr) 143 { 144 struct scatterlist out_hdr, in_hdr, *sgs[3]; 145 unsigned int num_out = 0, num_in = 0; 146 147 sg_init_one(&out_hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); 148 sgs[num_out++] = &out_hdr; 149 150 if (vbr->sg_table.nents) { 151 if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) 152 sgs[num_out++] = vbr->sg_table.sgl; 153 else 154 sgs[num_out + num_in++] = vbr->sg_table.sgl; 155 } 156 157 sg_init_one(&in_hdr, &vbr->status, vbr->in_hdr_len); 158 sgs[num_out + num_in++] = &in_hdr; 159 160 return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); 161 } 162 163 static int virtblk_setup_discard_write_zeroes_erase(struct request *req, bool unmap) 164 { 165 unsigned short segments = blk_rq_nr_discard_segments(req); 166 unsigned short n = 0; 167 struct virtio_blk_discard_write_zeroes *range; 168 struct bio *bio; 169 u32 flags = 0; 170 171 if (unmap) 172 flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP; 173 174 range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC); 175 if (!range) 176 return -ENOMEM; 177 178 /* 179 * Single max discard segment means multi-range discard isn't 180 * supported, and block layer only runs contiguity merge like 181 * normal RW request. So we can't reply on bio for retrieving 182 * each range info. 183 */ 184 if (queue_max_discard_segments(req->q) == 1) { 185 range[0].flags = cpu_to_le32(flags); 186 range[0].num_sectors = cpu_to_le32(blk_rq_sectors(req)); 187 range[0].sector = cpu_to_le64(blk_rq_pos(req)); 188 n = 1; 189 } else { 190 __rq_for_each_bio(bio, req) { 191 u64 sector = bio->bi_iter.bi_sector; 192 u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT; 193 194 range[n].flags = cpu_to_le32(flags); 195 range[n].num_sectors = cpu_to_le32(num_sectors); 196 range[n].sector = cpu_to_le64(sector); 197 n++; 198 } 199 } 200 201 WARN_ON_ONCE(n != segments); 202 203 bvec_set_virt(&req->special_vec, range, sizeof(*range) * segments); 204 req->rq_flags |= RQF_SPECIAL_PAYLOAD; 205 206 return 0; 207 } 208 209 static void virtblk_unmap_data(struct request *req, struct virtblk_req *vbr) 210 { 211 if (blk_rq_nr_phys_segments(req)) 212 sg_free_table_chained(&vbr->sg_table, 213 VIRTIO_BLK_INLINE_SG_CNT); 214 } 215 216 static int virtblk_map_data(struct blk_mq_hw_ctx *hctx, struct request *req, 217 struct virtblk_req *vbr) 218 { 219 int err; 220 221 if (!blk_rq_nr_phys_segments(req)) 222 return 0; 223 224 vbr->sg_table.sgl = vbr->sg; 225 err = sg_alloc_table_chained(&vbr->sg_table, 226 blk_rq_nr_phys_segments(req), 227 vbr->sg_table.sgl, 228 VIRTIO_BLK_INLINE_SG_CNT); 229 if (unlikely(err)) 230 return -ENOMEM; 231 232 return blk_rq_map_sg(hctx->queue, req, vbr->sg_table.sgl); 233 } 234 235 static void virtblk_cleanup_cmd(struct request *req) 236 { 237 if (req->rq_flags & RQF_SPECIAL_PAYLOAD) 238 kfree(bvec_virt(&req->special_vec)); 239 } 240 241 static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, 242 struct request *req, 243 struct virtblk_req *vbr) 244 { 245 size_t in_hdr_len = sizeof(vbr->status); 246 bool unmap = false; 247 u32 type; 248 u64 sector = 0; 249 250 /* Set fields for all request types */ 251 vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); 252 253 switch (req_op(req)) { 254 case REQ_OP_READ: 255 type = VIRTIO_BLK_T_IN; 256 sector = blk_rq_pos(req); 257 break; 258 case REQ_OP_WRITE: 259 type = VIRTIO_BLK_T_OUT; 260 sector = blk_rq_pos(req); 261 break; 262 case REQ_OP_FLUSH: 263 type = VIRTIO_BLK_T_FLUSH; 264 break; 265 case REQ_OP_DISCARD: 266 type = VIRTIO_BLK_T_DISCARD; 267 break; 268 case REQ_OP_WRITE_ZEROES: 269 type = VIRTIO_BLK_T_WRITE_ZEROES; 270 unmap = !(req->cmd_flags & REQ_NOUNMAP); 271 break; 272 case REQ_OP_SECURE_ERASE: 273 type = VIRTIO_BLK_T_SECURE_ERASE; 274 break; 275 case REQ_OP_ZONE_OPEN: 276 type = VIRTIO_BLK_T_ZONE_OPEN; 277 sector = blk_rq_pos(req); 278 break; 279 case REQ_OP_ZONE_CLOSE: 280 type = VIRTIO_BLK_T_ZONE_CLOSE; 281 sector = blk_rq_pos(req); 282 break; 283 case REQ_OP_ZONE_FINISH: 284 type = VIRTIO_BLK_T_ZONE_FINISH; 285 sector = blk_rq_pos(req); 286 break; 287 case REQ_OP_ZONE_APPEND: 288 type = VIRTIO_BLK_T_ZONE_APPEND; 289 sector = blk_rq_pos(req); 290 in_hdr_len = sizeof(vbr->zone_append_in_hdr); 291 break; 292 case REQ_OP_ZONE_RESET: 293 type = VIRTIO_BLK_T_ZONE_RESET; 294 sector = blk_rq_pos(req); 295 break; 296 case REQ_OP_ZONE_RESET_ALL: 297 type = VIRTIO_BLK_T_ZONE_RESET_ALL; 298 break; 299 case REQ_OP_DRV_IN: 300 /* Out header already filled in, nothing to do */ 301 return 0; 302 default: 303 WARN_ON_ONCE(1); 304 return BLK_STS_IOERR; 305 } 306 307 /* Set fields for non-REQ_OP_DRV_IN request types */ 308 vbr->in_hdr_len = in_hdr_len; 309 vbr->out_hdr.type = cpu_to_virtio32(vdev, type); 310 vbr->out_hdr.sector = cpu_to_virtio64(vdev, sector); 311 312 if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES || 313 type == VIRTIO_BLK_T_SECURE_ERASE) { 314 if (virtblk_setup_discard_write_zeroes_erase(req, unmap)) 315 return BLK_STS_RESOURCE; 316 } 317 318 return 0; 319 } 320 321 static inline void virtblk_request_done(struct request *req) 322 { 323 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 324 blk_status_t status = virtblk_result(vbr->status); 325 326 virtblk_unmap_data(req, vbr); 327 virtblk_cleanup_cmd(req); 328 329 if (req_op(req) == REQ_OP_ZONE_APPEND) 330 req->__sector = le64_to_cpu(vbr->zone_append_in_hdr.append_sector); 331 332 blk_mq_end_request(req, status); 333 } 334 335 static void virtblk_complete_batch(struct io_comp_batch *iob) 336 { 337 struct request *req; 338 339 rq_list_for_each(&iob->req_list, req) { 340 virtblk_unmap_data(req, blk_mq_rq_to_pdu(req)); 341 virtblk_cleanup_cmd(req); 342 } 343 blk_mq_end_request_batch(iob); 344 } 345 346 static int virtblk_handle_req(struct virtio_blk_vq *vq, 347 struct io_comp_batch *iob) 348 { 349 struct virtblk_req *vbr; 350 int req_done = 0; 351 unsigned int len; 352 353 while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) { 354 struct request *req = blk_mq_rq_from_pdu(vbr); 355 356 if (likely(!blk_should_fake_timeout(req->q)) && 357 !blk_mq_complete_request_remote(req) && 358 !blk_mq_add_to_batch(req, iob, vbr->status, 359 virtblk_complete_batch)) 360 virtblk_request_done(req); 361 req_done++; 362 } 363 364 return req_done; 365 } 366 367 static void virtblk_done(struct virtqueue *vq) 368 { 369 struct virtio_blk *vblk = vq->vdev->priv; 370 struct virtio_blk_vq *vblk_vq = &vblk->vqs[vq->index]; 371 int req_done = 0; 372 unsigned long flags; 373 DEFINE_IO_COMP_BATCH(iob); 374 375 spin_lock_irqsave(&vblk_vq->lock, flags); 376 do { 377 virtqueue_disable_cb(vq); 378 req_done += virtblk_handle_req(vblk_vq, &iob); 379 380 if (unlikely(virtqueue_is_broken(vq))) 381 break; 382 } while (!virtqueue_enable_cb(vq)); 383 384 if (req_done) { 385 if (!rq_list_empty(iob.req_list)) 386 iob.complete(&iob); 387 388 /* In case queue is stopped waiting for more buffers. */ 389 blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); 390 } 391 spin_unlock_irqrestore(&vblk_vq->lock, flags); 392 } 393 394 static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx) 395 { 396 struct virtio_blk *vblk = hctx->queue->queuedata; 397 struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num]; 398 bool kick; 399 400 spin_lock_irq(&vq->lock); 401 kick = virtqueue_kick_prepare(vq->vq); 402 spin_unlock_irq(&vq->lock); 403 404 if (kick) 405 virtqueue_notify(vq->vq); 406 } 407 408 static blk_status_t virtblk_fail_to_queue(struct request *req, int rc) 409 { 410 virtblk_cleanup_cmd(req); 411 switch (rc) { 412 case -ENOSPC: 413 return BLK_STS_DEV_RESOURCE; 414 case -ENOMEM: 415 return BLK_STS_RESOURCE; 416 default: 417 return BLK_STS_IOERR; 418 } 419 } 420 421 static blk_status_t virtblk_prep_rq(struct blk_mq_hw_ctx *hctx, 422 struct virtio_blk *vblk, 423 struct request *req, 424 struct virtblk_req *vbr) 425 { 426 blk_status_t status; 427 int num; 428 429 status = virtblk_setup_cmd(vblk->vdev, req, vbr); 430 if (unlikely(status)) 431 return status; 432 433 num = virtblk_map_data(hctx, req, vbr); 434 if (unlikely(num < 0)) 435 return virtblk_fail_to_queue(req, -ENOMEM); 436 vbr->sg_table.nents = num; 437 438 blk_mq_start_request(req); 439 440 return BLK_STS_OK; 441 } 442 443 static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, 444 const struct blk_mq_queue_data *bd) 445 { 446 struct virtio_blk *vblk = hctx->queue->queuedata; 447 struct request *req = bd->rq; 448 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 449 unsigned long flags; 450 int qid = hctx->queue_num; 451 bool notify = false; 452 blk_status_t status; 453 int err; 454 455 status = virtblk_prep_rq(hctx, vblk, req, vbr); 456 if (unlikely(status)) 457 return status; 458 459 spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 460 err = virtblk_add_req(vblk->vqs[qid].vq, vbr); 461 if (err) { 462 virtqueue_kick(vblk->vqs[qid].vq); 463 /* Don't stop the queue if -ENOMEM: we may have failed to 464 * bounce the buffer due to global resource outage. 465 */ 466 if (err == -ENOSPC) 467 blk_mq_stop_hw_queue(hctx); 468 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 469 virtblk_unmap_data(req, vbr); 470 return virtblk_fail_to_queue(req, err); 471 } 472 473 if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) 474 notify = true; 475 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 476 477 if (notify) 478 virtqueue_notify(vblk->vqs[qid].vq); 479 return BLK_STS_OK; 480 } 481 482 static bool virtblk_prep_rq_batch(struct request *req) 483 { 484 struct virtio_blk *vblk = req->mq_hctx->queue->queuedata; 485 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 486 487 req->mq_hctx->tags->rqs[req->tag] = req; 488 489 return virtblk_prep_rq(req->mq_hctx, vblk, req, vbr) == BLK_STS_OK; 490 } 491 492 static bool virtblk_add_req_batch(struct virtio_blk_vq *vq, 493 struct request **rqlist) 494 { 495 unsigned long flags; 496 int err; 497 bool kick; 498 499 spin_lock_irqsave(&vq->lock, flags); 500 501 while (!rq_list_empty(*rqlist)) { 502 struct request *req = rq_list_pop(rqlist); 503 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 504 505 err = virtblk_add_req(vq->vq, vbr); 506 if (err) { 507 virtblk_unmap_data(req, vbr); 508 virtblk_cleanup_cmd(req); 509 blk_mq_requeue_request(req, true); 510 } 511 } 512 513 kick = virtqueue_kick_prepare(vq->vq); 514 spin_unlock_irqrestore(&vq->lock, flags); 515 516 return kick; 517 } 518 519 static void virtio_queue_rqs(struct request **rqlist) 520 { 521 struct request *req, *next, *prev = NULL; 522 struct request *requeue_list = NULL; 523 524 rq_list_for_each_safe(rqlist, req, next) { 525 struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx); 526 bool kick; 527 528 if (!virtblk_prep_rq_batch(req)) { 529 rq_list_move(rqlist, &requeue_list, req, prev); 530 req = prev; 531 if (!req) 532 continue; 533 } 534 535 if (!next || req->mq_hctx != next->mq_hctx) { 536 req->rq_next = NULL; 537 kick = virtblk_add_req_batch(vq, rqlist); 538 if (kick) 539 virtqueue_notify(vq->vq); 540 541 *rqlist = next; 542 prev = NULL; 543 } else 544 prev = req; 545 } 546 547 *rqlist = requeue_list; 548 } 549 550 #ifdef CONFIG_BLK_DEV_ZONED 551 static void *virtblk_alloc_report_buffer(struct virtio_blk *vblk, 552 unsigned int nr_zones, 553 unsigned int zone_sectors, 554 size_t *buflen) 555 { 556 struct request_queue *q = vblk->disk->queue; 557 size_t bufsize; 558 void *buf; 559 560 nr_zones = min_t(unsigned int, nr_zones, 561 get_capacity(vblk->disk) >> ilog2(zone_sectors)); 562 563 bufsize = sizeof(struct virtio_blk_zone_report) + 564 nr_zones * sizeof(struct virtio_blk_zone_descriptor); 565 bufsize = min_t(size_t, bufsize, 566 queue_max_hw_sectors(q) << SECTOR_SHIFT); 567 bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT); 568 569 while (bufsize >= sizeof(struct virtio_blk_zone_report)) { 570 buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY); 571 if (buf) { 572 *buflen = bufsize; 573 return buf; 574 } 575 bufsize >>= 1; 576 } 577 578 return NULL; 579 } 580 581 static int virtblk_submit_zone_report(struct virtio_blk *vblk, 582 char *report_buf, size_t report_len, 583 sector_t sector) 584 { 585 struct request_queue *q = vblk->disk->queue; 586 struct request *req; 587 struct virtblk_req *vbr; 588 int err; 589 590 req = blk_mq_alloc_request(q, REQ_OP_DRV_IN, 0); 591 if (IS_ERR(req)) 592 return PTR_ERR(req); 593 594 vbr = blk_mq_rq_to_pdu(req); 595 vbr->in_hdr_len = sizeof(vbr->status); 596 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_ZONE_REPORT); 597 vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, sector); 598 599 err = blk_rq_map_kern(q, req, report_buf, report_len, GFP_KERNEL); 600 if (err) 601 goto out; 602 603 blk_execute_rq(req, false); 604 err = blk_status_to_errno(virtblk_result(vbr->status)); 605 out: 606 blk_mq_free_request(req); 607 return err; 608 } 609 610 static int virtblk_parse_zone(struct virtio_blk *vblk, 611 struct virtio_blk_zone_descriptor *entry, 612 unsigned int idx, unsigned int zone_sectors, 613 report_zones_cb cb, void *data) 614 { 615 struct blk_zone zone = { }; 616 617 if (entry->z_type != VIRTIO_BLK_ZT_SWR && 618 entry->z_type != VIRTIO_BLK_ZT_SWP && 619 entry->z_type != VIRTIO_BLK_ZT_CONV) { 620 dev_err(&vblk->vdev->dev, "invalid zone type %#x\n", 621 entry->z_type); 622 return -EINVAL; 623 } 624 625 zone.type = entry->z_type; 626 zone.cond = entry->z_state; 627 zone.len = zone_sectors; 628 zone.capacity = le64_to_cpu(entry->z_cap); 629 zone.start = le64_to_cpu(entry->z_start); 630 if (zone.cond == BLK_ZONE_COND_FULL) 631 zone.wp = zone.start + zone.len; 632 else 633 zone.wp = le64_to_cpu(entry->z_wp); 634 635 return cb(&zone, idx, data); 636 } 637 638 static int virtblk_report_zones(struct gendisk *disk, sector_t sector, 639 unsigned int nr_zones, report_zones_cb cb, 640 void *data) 641 { 642 struct virtio_blk *vblk = disk->private_data; 643 struct virtio_blk_zone_report *report; 644 unsigned int zone_sectors = vblk->zone_sectors; 645 unsigned int nz, i; 646 int ret, zone_idx = 0; 647 size_t buflen; 648 649 if (WARN_ON_ONCE(!vblk->zone_sectors)) 650 return -EOPNOTSUPP; 651 652 report = virtblk_alloc_report_buffer(vblk, nr_zones, 653 zone_sectors, &buflen); 654 if (!report) 655 return -ENOMEM; 656 657 while (zone_idx < nr_zones && sector < get_capacity(vblk->disk)) { 658 memset(report, 0, buflen); 659 660 ret = virtblk_submit_zone_report(vblk, (char *)report, 661 buflen, sector); 662 if (ret) { 663 if (ret > 0) 664 ret = -EIO; 665 goto out_free; 666 } 667 nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones); 668 if (!nz) 669 break; 670 671 for (i = 0; i < nz && zone_idx < nr_zones; i++) { 672 ret = virtblk_parse_zone(vblk, &report->zones[i], 673 zone_idx, zone_sectors, cb, data); 674 if (ret) 675 goto out_free; 676 sector = le64_to_cpu(report->zones[i].z_start) + zone_sectors; 677 zone_idx++; 678 } 679 } 680 681 if (zone_idx > 0) 682 ret = zone_idx; 683 else 684 ret = -EINVAL; 685 out_free: 686 kvfree(report); 687 return ret; 688 } 689 690 static void virtblk_revalidate_zones(struct virtio_blk *vblk) 691 { 692 u8 model; 693 694 if (!vblk->zone_sectors) 695 return; 696 697 virtio_cread(vblk->vdev, struct virtio_blk_config, 698 zoned.model, &model); 699 if (!blk_revalidate_disk_zones(vblk->disk, NULL)) 700 set_capacity_and_notify(vblk->disk, 0); 701 } 702 703 static int virtblk_probe_zoned_device(struct virtio_device *vdev, 704 struct virtio_blk *vblk, 705 struct request_queue *q) 706 { 707 u32 v; 708 u8 model; 709 int ret; 710 711 virtio_cread(vdev, struct virtio_blk_config, 712 zoned.model, &model); 713 714 switch (model) { 715 case VIRTIO_BLK_Z_NONE: 716 return 0; 717 case VIRTIO_BLK_Z_HM: 718 break; 719 case VIRTIO_BLK_Z_HA: 720 /* 721 * Present the host-aware device as a regular drive. 722 * TODO It is possible to add an option to make it appear 723 * in the system as a zoned drive. 724 */ 725 return 0; 726 default: 727 dev_err(&vdev->dev, "unsupported zone model %d\n", model); 728 return -EINVAL; 729 } 730 731 dev_dbg(&vdev->dev, "probing host-managed zoned device\n"); 732 733 disk_set_zoned(vblk->disk, BLK_ZONED_HM); 734 blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); 735 736 virtio_cread(vdev, struct virtio_blk_config, 737 zoned.max_open_zones, &v); 738 disk_set_max_open_zones(vblk->disk, le32_to_cpu(v)); 739 740 dev_dbg(&vdev->dev, "max open zones = %u\n", le32_to_cpu(v)); 741 742 virtio_cread(vdev, struct virtio_blk_config, 743 zoned.max_active_zones, &v); 744 disk_set_max_active_zones(vblk->disk, le32_to_cpu(v)); 745 dev_dbg(&vdev->dev, "max active zones = %u\n", le32_to_cpu(v)); 746 747 virtio_cread(vdev, struct virtio_blk_config, 748 zoned.write_granularity, &v); 749 if (!v) { 750 dev_warn(&vdev->dev, "zero write granularity reported\n"); 751 return -ENODEV; 752 } 753 blk_queue_physical_block_size(q, le32_to_cpu(v)); 754 blk_queue_io_min(q, le32_to_cpu(v)); 755 756 dev_dbg(&vdev->dev, "write granularity = %u\n", le32_to_cpu(v)); 757 758 /* 759 * virtio ZBD specification doesn't require zones to be a power of 760 * two sectors in size, but the code in this driver expects that. 761 */ 762 virtio_cread(vdev, struct virtio_blk_config, zoned.zone_sectors, &v); 763 vblk->zone_sectors = le32_to_cpu(v); 764 if (vblk->zone_sectors == 0 || !is_power_of_2(vblk->zone_sectors)) { 765 dev_err(&vdev->dev, 766 "zoned device with non power of two zone size %u\n", 767 vblk->zone_sectors); 768 return -ENODEV; 769 } 770 dev_dbg(&vdev->dev, "zone sectors = %u\n", vblk->zone_sectors); 771 772 if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { 773 dev_warn(&vblk->vdev->dev, 774 "ignoring negotiated F_DISCARD for zoned device\n"); 775 blk_queue_max_discard_sectors(q, 0); 776 } 777 778 ret = blk_revalidate_disk_zones(vblk->disk, NULL); 779 if (!ret) { 780 virtio_cread(vdev, struct virtio_blk_config, 781 zoned.max_append_sectors, &v); 782 if (!v) { 783 dev_warn(&vdev->dev, "zero max_append_sectors reported\n"); 784 return -ENODEV; 785 } 786 blk_queue_max_zone_append_sectors(q, le32_to_cpu(v)); 787 dev_dbg(&vdev->dev, "max append sectors = %u\n", le32_to_cpu(v)); 788 } 789 790 return ret; 791 } 792 793 static inline bool virtblk_has_zoned_feature(struct virtio_device *vdev) 794 { 795 return virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED); 796 } 797 #else 798 799 /* 800 * Zoned block device support is not configured in this kernel. 801 * We only need to define a few symbols to avoid compilation errors. 802 */ 803 #define virtblk_report_zones NULL 804 static inline void virtblk_revalidate_zones(struct virtio_blk *vblk) 805 { 806 } 807 static inline int virtblk_probe_zoned_device(struct virtio_device *vdev, 808 struct virtio_blk *vblk, struct request_queue *q) 809 { 810 return -EOPNOTSUPP; 811 } 812 813 static inline bool virtblk_has_zoned_feature(struct virtio_device *vdev) 814 { 815 return false; 816 } 817 #endif /* CONFIG_BLK_DEV_ZONED */ 818 819 /* return id (s/n) string for *disk to *id_str 820 */ 821 static int virtblk_get_id(struct gendisk *disk, char *id_str) 822 { 823 struct virtio_blk *vblk = disk->private_data; 824 struct request_queue *q = vblk->disk->queue; 825 struct request *req; 826 struct virtblk_req *vbr; 827 int err; 828 829 req = blk_mq_alloc_request(q, REQ_OP_DRV_IN, 0); 830 if (IS_ERR(req)) 831 return PTR_ERR(req); 832 833 vbr = blk_mq_rq_to_pdu(req); 834 vbr->in_hdr_len = sizeof(vbr->status); 835 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID); 836 vbr->out_hdr.sector = 0; 837 838 err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL); 839 if (err) 840 goto out; 841 842 blk_execute_rq(req, false); 843 err = blk_status_to_errno(virtblk_result(vbr->status)); 844 out: 845 blk_mq_free_request(req); 846 return err; 847 } 848 849 /* We provide getgeo only to please some old bootloader/partitioning tools */ 850 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) 851 { 852 struct virtio_blk *vblk = bd->bd_disk->private_data; 853 int ret = 0; 854 855 mutex_lock(&vblk->vdev_mutex); 856 857 if (!vblk->vdev) { 858 ret = -ENXIO; 859 goto out; 860 } 861 862 /* see if the host passed in geometry config */ 863 if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) { 864 virtio_cread(vblk->vdev, struct virtio_blk_config, 865 geometry.cylinders, &geo->cylinders); 866 virtio_cread(vblk->vdev, struct virtio_blk_config, 867 geometry.heads, &geo->heads); 868 virtio_cread(vblk->vdev, struct virtio_blk_config, 869 geometry.sectors, &geo->sectors); 870 } else { 871 /* some standard values, similar to sd */ 872 geo->heads = 1 << 6; 873 geo->sectors = 1 << 5; 874 geo->cylinders = get_capacity(bd->bd_disk) >> 11; 875 } 876 out: 877 mutex_unlock(&vblk->vdev_mutex); 878 return ret; 879 } 880 881 static void virtblk_free_disk(struct gendisk *disk) 882 { 883 struct virtio_blk *vblk = disk->private_data; 884 885 ida_free(&vd_index_ida, vblk->index); 886 mutex_destroy(&vblk->vdev_mutex); 887 kfree(vblk); 888 } 889 890 static const struct block_device_operations virtblk_fops = { 891 .owner = THIS_MODULE, 892 .getgeo = virtblk_getgeo, 893 .free_disk = virtblk_free_disk, 894 .report_zones = virtblk_report_zones, 895 }; 896 897 static int index_to_minor(int index) 898 { 899 return index << PART_BITS; 900 } 901 902 static int minor_to_index(int minor) 903 { 904 return minor >> PART_BITS; 905 } 906 907 static ssize_t serial_show(struct device *dev, 908 struct device_attribute *attr, char *buf) 909 { 910 struct gendisk *disk = dev_to_disk(dev); 911 int err; 912 913 /* sysfs gives us a PAGE_SIZE buffer */ 914 BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES); 915 916 buf[VIRTIO_BLK_ID_BYTES] = '\0'; 917 err = virtblk_get_id(disk, buf); 918 if (!err) 919 return strlen(buf); 920 921 if (err == -EIO) /* Unsupported? Make it empty. */ 922 return 0; 923 924 return err; 925 } 926 927 static DEVICE_ATTR_RO(serial); 928 929 /* The queue's logical block size must be set before calling this */ 930 static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) 931 { 932 struct virtio_device *vdev = vblk->vdev; 933 struct request_queue *q = vblk->disk->queue; 934 char cap_str_2[10], cap_str_10[10]; 935 unsigned long long nblocks; 936 u64 capacity; 937 938 /* Host must always specify the capacity. */ 939 virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity); 940 941 nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9); 942 943 string_get_size(nblocks, queue_logical_block_size(q), 944 STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); 945 string_get_size(nblocks, queue_logical_block_size(q), 946 STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); 947 948 dev_notice(&vdev->dev, 949 "[%s] %s%llu %d-byte logical blocks (%s/%s)\n", 950 vblk->disk->disk_name, 951 resize ? "new size: " : "", 952 nblocks, 953 queue_logical_block_size(q), 954 cap_str_10, 955 cap_str_2); 956 957 set_capacity_and_notify(vblk->disk, capacity); 958 } 959 960 static void virtblk_config_changed_work(struct work_struct *work) 961 { 962 struct virtio_blk *vblk = 963 container_of(work, struct virtio_blk, config_work); 964 965 virtblk_revalidate_zones(vblk); 966 virtblk_update_capacity(vblk, true); 967 } 968 969 static void virtblk_config_changed(struct virtio_device *vdev) 970 { 971 struct virtio_blk *vblk = vdev->priv; 972 973 queue_work(virtblk_wq, &vblk->config_work); 974 } 975 976 static int init_vq(struct virtio_blk *vblk) 977 { 978 int err; 979 int i; 980 vq_callback_t **callbacks; 981 const char **names; 982 struct virtqueue **vqs; 983 unsigned short num_vqs; 984 unsigned int num_poll_vqs; 985 struct virtio_device *vdev = vblk->vdev; 986 struct irq_affinity desc = { 0, }; 987 988 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, 989 struct virtio_blk_config, num_queues, 990 &num_vqs); 991 if (err) 992 num_vqs = 1; 993 994 if (!err && !num_vqs) { 995 dev_err(&vdev->dev, "MQ advertised but zero queues reported\n"); 996 return -EINVAL; 997 } 998 999 num_vqs = min_t(unsigned int, 1000 min_not_zero(num_request_queues, nr_cpu_ids), 1001 num_vqs); 1002 1003 num_poll_vqs = min_t(unsigned int, poll_queues, num_vqs - 1); 1004 1005 vblk->io_queues[HCTX_TYPE_DEFAULT] = num_vqs - num_poll_vqs; 1006 vblk->io_queues[HCTX_TYPE_READ] = 0; 1007 vblk->io_queues[HCTX_TYPE_POLL] = num_poll_vqs; 1008 1009 dev_info(&vdev->dev, "%d/%d/%d default/read/poll queues\n", 1010 vblk->io_queues[HCTX_TYPE_DEFAULT], 1011 vblk->io_queues[HCTX_TYPE_READ], 1012 vblk->io_queues[HCTX_TYPE_POLL]); 1013 1014 vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); 1015 if (!vblk->vqs) 1016 return -ENOMEM; 1017 1018 names = kmalloc_array(num_vqs, sizeof(*names), GFP_KERNEL); 1019 callbacks = kmalloc_array(num_vqs, sizeof(*callbacks), GFP_KERNEL); 1020 vqs = kmalloc_array(num_vqs, sizeof(*vqs), GFP_KERNEL); 1021 if (!names || !callbacks || !vqs) { 1022 err = -ENOMEM; 1023 goto out; 1024 } 1025 1026 for (i = 0; i < num_vqs - num_poll_vqs; i++) { 1027 callbacks[i] = virtblk_done; 1028 snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i); 1029 names[i] = vblk->vqs[i].name; 1030 } 1031 1032 for (; i < num_vqs; i++) { 1033 callbacks[i] = NULL; 1034 snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req_poll.%d", i); 1035 names[i] = vblk->vqs[i].name; 1036 } 1037 1038 /* Discover virtqueues and write information to configuration. */ 1039 err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc); 1040 if (err) 1041 goto out; 1042 1043 for (i = 0; i < num_vqs; i++) { 1044 spin_lock_init(&vblk->vqs[i].lock); 1045 vblk->vqs[i].vq = vqs[i]; 1046 } 1047 vblk->num_vqs = num_vqs; 1048 1049 out: 1050 kfree(vqs); 1051 kfree(callbacks); 1052 kfree(names); 1053 if (err) 1054 kfree(vblk->vqs); 1055 return err; 1056 } 1057 1058 /* 1059 * Legacy naming scheme used for virtio devices. We are stuck with it for 1060 * virtio blk but don't ever use it for any new driver. 1061 */ 1062 static int virtblk_name_format(char *prefix, int index, char *buf, int buflen) 1063 { 1064 const int base = 'z' - 'a' + 1; 1065 char *begin = buf + strlen(prefix); 1066 char *end = buf + buflen; 1067 char *p; 1068 int unit; 1069 1070 p = end - 1; 1071 *p = '\0'; 1072 unit = base; 1073 do { 1074 if (p == begin) 1075 return -EINVAL; 1076 *--p = 'a' + (index % unit); 1077 index = (index / unit) - 1; 1078 } while (index >= 0); 1079 1080 memmove(begin, p, end - p); 1081 memcpy(buf, prefix, strlen(prefix)); 1082 1083 return 0; 1084 } 1085 1086 static int virtblk_get_cache_mode(struct virtio_device *vdev) 1087 { 1088 u8 writeback; 1089 int err; 1090 1091 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE, 1092 struct virtio_blk_config, wce, 1093 &writeback); 1094 1095 /* 1096 * If WCE is not configurable and flush is not available, 1097 * assume no writeback cache is in use. 1098 */ 1099 if (err) 1100 writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH); 1101 1102 return writeback; 1103 } 1104 1105 static void virtblk_update_cache_mode(struct virtio_device *vdev) 1106 { 1107 u8 writeback = virtblk_get_cache_mode(vdev); 1108 struct virtio_blk *vblk = vdev->priv; 1109 1110 blk_queue_write_cache(vblk->disk->queue, writeback, false); 1111 } 1112 1113 static const char *const virtblk_cache_types[] = { 1114 "write through", "write back" 1115 }; 1116 1117 static ssize_t 1118 cache_type_store(struct device *dev, struct device_attribute *attr, 1119 const char *buf, size_t count) 1120 { 1121 struct gendisk *disk = dev_to_disk(dev); 1122 struct virtio_blk *vblk = disk->private_data; 1123 struct virtio_device *vdev = vblk->vdev; 1124 int i; 1125 1126 BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE)); 1127 i = sysfs_match_string(virtblk_cache_types, buf); 1128 if (i < 0) 1129 return i; 1130 1131 virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i); 1132 virtblk_update_cache_mode(vdev); 1133 return count; 1134 } 1135 1136 static ssize_t 1137 cache_type_show(struct device *dev, struct device_attribute *attr, char *buf) 1138 { 1139 struct gendisk *disk = dev_to_disk(dev); 1140 struct virtio_blk *vblk = disk->private_data; 1141 u8 writeback = virtblk_get_cache_mode(vblk->vdev); 1142 1143 BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types)); 1144 return sysfs_emit(buf, "%s\n", virtblk_cache_types[writeback]); 1145 } 1146 1147 static DEVICE_ATTR_RW(cache_type); 1148 1149 static struct attribute *virtblk_attrs[] = { 1150 &dev_attr_serial.attr, 1151 &dev_attr_cache_type.attr, 1152 NULL, 1153 }; 1154 1155 static umode_t virtblk_attrs_are_visible(struct kobject *kobj, 1156 struct attribute *a, int n) 1157 { 1158 struct device *dev = kobj_to_dev(kobj); 1159 struct gendisk *disk = dev_to_disk(dev); 1160 struct virtio_blk *vblk = disk->private_data; 1161 struct virtio_device *vdev = vblk->vdev; 1162 1163 if (a == &dev_attr_cache_type.attr && 1164 !virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) 1165 return S_IRUGO; 1166 1167 return a->mode; 1168 } 1169 1170 static const struct attribute_group virtblk_attr_group = { 1171 .attrs = virtblk_attrs, 1172 .is_visible = virtblk_attrs_are_visible, 1173 }; 1174 1175 static const struct attribute_group *virtblk_attr_groups[] = { 1176 &virtblk_attr_group, 1177 NULL, 1178 }; 1179 1180 static void virtblk_map_queues(struct blk_mq_tag_set *set) 1181 { 1182 struct virtio_blk *vblk = set->driver_data; 1183 int i, qoff; 1184 1185 for (i = 0, qoff = 0; i < set->nr_maps; i++) { 1186 struct blk_mq_queue_map *map = &set->map[i]; 1187 1188 map->nr_queues = vblk->io_queues[i]; 1189 map->queue_offset = qoff; 1190 qoff += map->nr_queues; 1191 1192 if (map->nr_queues == 0) 1193 continue; 1194 1195 /* 1196 * Regular queues have interrupts and hence CPU affinity is 1197 * defined by the core virtio code, but polling queues have 1198 * no interrupts so we let the block layer assign CPU affinity. 1199 */ 1200 if (i == HCTX_TYPE_POLL) 1201 blk_mq_map_queues(&set->map[i]); 1202 else 1203 blk_mq_virtio_map_queues(&set->map[i], vblk->vdev, 0); 1204 } 1205 } 1206 1207 static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) 1208 { 1209 struct virtio_blk *vblk = hctx->queue->queuedata; 1210 struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx); 1211 unsigned long flags; 1212 int found = 0; 1213 1214 spin_lock_irqsave(&vq->lock, flags); 1215 found = virtblk_handle_req(vq, iob); 1216 1217 if (found) 1218 blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); 1219 1220 spin_unlock_irqrestore(&vq->lock, flags); 1221 1222 return found; 1223 } 1224 1225 static const struct blk_mq_ops virtio_mq_ops = { 1226 .queue_rq = virtio_queue_rq, 1227 .queue_rqs = virtio_queue_rqs, 1228 .commit_rqs = virtio_commit_rqs, 1229 .complete = virtblk_request_done, 1230 .map_queues = virtblk_map_queues, 1231 .poll = virtblk_poll, 1232 }; 1233 1234 static unsigned int virtblk_queue_depth; 1235 module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); 1236 1237 static int virtblk_probe(struct virtio_device *vdev) 1238 { 1239 struct virtio_blk *vblk; 1240 struct request_queue *q; 1241 int err, index; 1242 1243 u32 v, blk_size, max_size, sg_elems, opt_io_size; 1244 u32 max_discard_segs = 0; 1245 u32 discard_granularity = 0; 1246 u16 min_io_size; 1247 u8 physical_block_exp, alignment_offset; 1248 unsigned int queue_depth; 1249 1250 if (!vdev->config->get) { 1251 dev_err(&vdev->dev, "%s failure: config access disabled\n", 1252 __func__); 1253 return -EINVAL; 1254 } 1255 1256 err = ida_alloc_range(&vd_index_ida, 0, 1257 minor_to_index(1 << MINORBITS) - 1, GFP_KERNEL); 1258 if (err < 0) 1259 goto out; 1260 index = err; 1261 1262 /* We need to know how many segments before we allocate. */ 1263 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX, 1264 struct virtio_blk_config, seg_max, 1265 &sg_elems); 1266 1267 /* We need at least one SG element, whatever they say. */ 1268 if (err || !sg_elems) 1269 sg_elems = 1; 1270 1271 /* Prevent integer overflows and honor max vq size */ 1272 sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2); 1273 1274 vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); 1275 if (!vblk) { 1276 err = -ENOMEM; 1277 goto out_free_index; 1278 } 1279 1280 mutex_init(&vblk->vdev_mutex); 1281 1282 vblk->vdev = vdev; 1283 1284 INIT_WORK(&vblk->config_work, virtblk_config_changed_work); 1285 1286 err = init_vq(vblk); 1287 if (err) 1288 goto out_free_vblk; 1289 1290 /* Default queue sizing is to fill the ring. */ 1291 if (!virtblk_queue_depth) { 1292 queue_depth = vblk->vqs[0].vq->num_free; 1293 /* ... but without indirect descs, we use 2 descs per req */ 1294 if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) 1295 queue_depth /= 2; 1296 } else { 1297 queue_depth = virtblk_queue_depth; 1298 } 1299 1300 memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); 1301 vblk->tag_set.ops = &virtio_mq_ops; 1302 vblk->tag_set.queue_depth = queue_depth; 1303 vblk->tag_set.numa_node = NUMA_NO_NODE; 1304 vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 1305 vblk->tag_set.cmd_size = 1306 sizeof(struct virtblk_req) + 1307 sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT; 1308 vblk->tag_set.driver_data = vblk; 1309 vblk->tag_set.nr_hw_queues = vblk->num_vqs; 1310 vblk->tag_set.nr_maps = 1; 1311 if (vblk->io_queues[HCTX_TYPE_POLL]) 1312 vblk->tag_set.nr_maps = 3; 1313 1314 err = blk_mq_alloc_tag_set(&vblk->tag_set); 1315 if (err) 1316 goto out_free_vq; 1317 1318 vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, vblk); 1319 if (IS_ERR(vblk->disk)) { 1320 err = PTR_ERR(vblk->disk); 1321 goto out_free_tags; 1322 } 1323 q = vblk->disk->queue; 1324 1325 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); 1326 1327 vblk->disk->major = major; 1328 vblk->disk->first_minor = index_to_minor(index); 1329 vblk->disk->minors = 1 << PART_BITS; 1330 vblk->disk->private_data = vblk; 1331 vblk->disk->fops = &virtblk_fops; 1332 vblk->index = index; 1333 1334 /* configure queue flush support */ 1335 virtblk_update_cache_mode(vdev); 1336 1337 /* If disk is read-only in the host, the guest should obey */ 1338 if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) 1339 set_disk_ro(vblk->disk, 1); 1340 1341 /* We can handle whatever the host told us to handle. */ 1342 blk_queue_max_segments(q, sg_elems); 1343 1344 /* No real sector limit. */ 1345 blk_queue_max_hw_sectors(q, UINT_MAX); 1346 1347 max_size = virtio_max_dma_size(vdev); 1348 1349 /* Host can optionally specify maximum segment size and number of 1350 * segments. */ 1351 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX, 1352 struct virtio_blk_config, size_max, &v); 1353 if (!err) 1354 max_size = min(max_size, v); 1355 1356 blk_queue_max_segment_size(q, max_size); 1357 1358 /* Host can optionally specify the block size of the device */ 1359 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE, 1360 struct virtio_blk_config, blk_size, 1361 &blk_size); 1362 if (!err) { 1363 err = blk_validate_block_size(blk_size); 1364 if (err) { 1365 dev_err(&vdev->dev, 1366 "virtio_blk: invalid block size: 0x%x\n", 1367 blk_size); 1368 goto out_cleanup_disk; 1369 } 1370 1371 blk_queue_logical_block_size(q, blk_size); 1372 } else 1373 blk_size = queue_logical_block_size(q); 1374 1375 /* Use topology information if available */ 1376 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 1377 struct virtio_blk_config, physical_block_exp, 1378 &physical_block_exp); 1379 if (!err && physical_block_exp) 1380 blk_queue_physical_block_size(q, 1381 blk_size * (1 << physical_block_exp)); 1382 1383 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 1384 struct virtio_blk_config, alignment_offset, 1385 &alignment_offset); 1386 if (!err && alignment_offset) 1387 blk_queue_alignment_offset(q, blk_size * alignment_offset); 1388 1389 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 1390 struct virtio_blk_config, min_io_size, 1391 &min_io_size); 1392 if (!err && min_io_size) 1393 blk_queue_io_min(q, blk_size * min_io_size); 1394 1395 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 1396 struct virtio_blk_config, opt_io_size, 1397 &opt_io_size); 1398 if (!err && opt_io_size) 1399 blk_queue_io_opt(q, blk_size * opt_io_size); 1400 1401 if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { 1402 virtio_cread(vdev, struct virtio_blk_config, 1403 discard_sector_alignment, &discard_granularity); 1404 1405 virtio_cread(vdev, struct virtio_blk_config, 1406 max_discard_sectors, &v); 1407 blk_queue_max_discard_sectors(q, v ? v : UINT_MAX); 1408 1409 virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, 1410 &max_discard_segs); 1411 } 1412 1413 if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) { 1414 virtio_cread(vdev, struct virtio_blk_config, 1415 max_write_zeroes_sectors, &v); 1416 blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX); 1417 } 1418 1419 /* The discard and secure erase limits are combined since the Linux 1420 * block layer uses the same limit for both commands. 1421 * 1422 * If both VIRTIO_BLK_F_SECURE_ERASE and VIRTIO_BLK_F_DISCARD features 1423 * are negotiated, we will use the minimum between the limits. 1424 * 1425 * discard sector alignment is set to the minimum between discard_sector_alignment 1426 * and secure_erase_sector_alignment. 1427 * 1428 * max discard sectors is set to the minimum between max_discard_seg and 1429 * max_secure_erase_seg. 1430 */ 1431 if (virtio_has_feature(vdev, VIRTIO_BLK_F_SECURE_ERASE)) { 1432 1433 virtio_cread(vdev, struct virtio_blk_config, 1434 secure_erase_sector_alignment, &v); 1435 1436 /* secure_erase_sector_alignment should not be zero, the device should set a 1437 * valid number of sectors. 1438 */ 1439 if (!v) { 1440 dev_err(&vdev->dev, 1441 "virtio_blk: secure_erase_sector_alignment can't be 0\n"); 1442 err = -EINVAL; 1443 goto out_cleanup_disk; 1444 } 1445 1446 discard_granularity = min_not_zero(discard_granularity, v); 1447 1448 virtio_cread(vdev, struct virtio_blk_config, 1449 max_secure_erase_sectors, &v); 1450 1451 /* max_secure_erase_sectors should not be zero, the device should set a 1452 * valid number of sectors. 1453 */ 1454 if (!v) { 1455 dev_err(&vdev->dev, 1456 "virtio_blk: max_secure_erase_sectors can't be 0\n"); 1457 err = -EINVAL; 1458 goto out_cleanup_disk; 1459 } 1460 1461 blk_queue_max_secure_erase_sectors(q, v); 1462 1463 virtio_cread(vdev, struct virtio_blk_config, 1464 max_secure_erase_seg, &v); 1465 1466 /* max_secure_erase_seg should not be zero, the device should set a 1467 * valid number of segments 1468 */ 1469 if (!v) { 1470 dev_err(&vdev->dev, 1471 "virtio_blk: max_secure_erase_seg can't be 0\n"); 1472 err = -EINVAL; 1473 goto out_cleanup_disk; 1474 } 1475 1476 max_discard_segs = min_not_zero(max_discard_segs, v); 1477 } 1478 1479 if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD) || 1480 virtio_has_feature(vdev, VIRTIO_BLK_F_SECURE_ERASE)) { 1481 /* max_discard_seg and discard_granularity will be 0 only 1482 * if max_discard_seg and discard_sector_alignment fields in the virtio 1483 * config are 0 and VIRTIO_BLK_F_SECURE_ERASE feature is not negotiated. 1484 * In this case, we use default values. 1485 */ 1486 if (!max_discard_segs) 1487 max_discard_segs = sg_elems; 1488 1489 blk_queue_max_discard_segments(q, 1490 min(max_discard_segs, MAX_DISCARD_SEGMENTS)); 1491 1492 if (discard_granularity) 1493 q->limits.discard_granularity = discard_granularity << SECTOR_SHIFT; 1494 else 1495 q->limits.discard_granularity = blk_size; 1496 } 1497 1498 virtblk_update_capacity(vblk, false); 1499 virtio_device_ready(vdev); 1500 1501 if (virtblk_has_zoned_feature(vdev)) { 1502 err = virtblk_probe_zoned_device(vdev, vblk, q); 1503 if (err) 1504 goto out_cleanup_disk; 1505 } 1506 1507 dev_info(&vdev->dev, "blk config size: %zu\n", 1508 sizeof(struct virtio_blk_config)); 1509 1510 err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); 1511 if (err) 1512 goto out_cleanup_disk; 1513 1514 return 0; 1515 1516 out_cleanup_disk: 1517 put_disk(vblk->disk); 1518 out_free_tags: 1519 blk_mq_free_tag_set(&vblk->tag_set); 1520 out_free_vq: 1521 vdev->config->del_vqs(vdev); 1522 kfree(vblk->vqs); 1523 out_free_vblk: 1524 kfree(vblk); 1525 out_free_index: 1526 ida_free(&vd_index_ida, index); 1527 out: 1528 return err; 1529 } 1530 1531 static void virtblk_remove(struct virtio_device *vdev) 1532 { 1533 struct virtio_blk *vblk = vdev->priv; 1534 1535 /* Make sure no work handler is accessing the device. */ 1536 flush_work(&vblk->config_work); 1537 1538 del_gendisk(vblk->disk); 1539 blk_mq_free_tag_set(&vblk->tag_set); 1540 1541 mutex_lock(&vblk->vdev_mutex); 1542 1543 /* Stop all the virtqueues. */ 1544 virtio_reset_device(vdev); 1545 1546 /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */ 1547 vblk->vdev = NULL; 1548 1549 vdev->config->del_vqs(vdev); 1550 kfree(vblk->vqs); 1551 1552 mutex_unlock(&vblk->vdev_mutex); 1553 1554 put_disk(vblk->disk); 1555 } 1556 1557 #ifdef CONFIG_PM_SLEEP 1558 static int virtblk_freeze(struct virtio_device *vdev) 1559 { 1560 struct virtio_blk *vblk = vdev->priv; 1561 1562 /* Ensure we don't receive any more interrupts */ 1563 virtio_reset_device(vdev); 1564 1565 /* Make sure no work handler is accessing the device. */ 1566 flush_work(&vblk->config_work); 1567 1568 blk_mq_quiesce_queue(vblk->disk->queue); 1569 1570 vdev->config->del_vqs(vdev); 1571 kfree(vblk->vqs); 1572 1573 return 0; 1574 } 1575 1576 static int virtblk_restore(struct virtio_device *vdev) 1577 { 1578 struct virtio_blk *vblk = vdev->priv; 1579 int ret; 1580 1581 ret = init_vq(vdev->priv); 1582 if (ret) 1583 return ret; 1584 1585 virtio_device_ready(vdev); 1586 1587 blk_mq_unquiesce_queue(vblk->disk->queue); 1588 return 0; 1589 } 1590 #endif 1591 1592 static const struct virtio_device_id id_table[] = { 1593 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 1594 { 0 }, 1595 }; 1596 1597 static unsigned int features_legacy[] = { 1598 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 1599 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, 1600 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, 1601 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, 1602 VIRTIO_BLK_F_SECURE_ERASE, 1603 } 1604 ; 1605 static unsigned int features[] = { 1606 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 1607 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, 1608 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, 1609 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, 1610 VIRTIO_BLK_F_SECURE_ERASE, 1611 #ifdef CONFIG_BLK_DEV_ZONED 1612 VIRTIO_BLK_F_ZONED, 1613 #endif /* CONFIG_BLK_DEV_ZONED */ 1614 }; 1615 1616 static struct virtio_driver virtio_blk = { 1617 .feature_table = features, 1618 .feature_table_size = ARRAY_SIZE(features), 1619 .feature_table_legacy = features_legacy, 1620 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 1621 .driver.name = KBUILD_MODNAME, 1622 .driver.owner = THIS_MODULE, 1623 .id_table = id_table, 1624 .probe = virtblk_probe, 1625 .remove = virtblk_remove, 1626 .config_changed = virtblk_config_changed, 1627 #ifdef CONFIG_PM_SLEEP 1628 .freeze = virtblk_freeze, 1629 .restore = virtblk_restore, 1630 #endif 1631 }; 1632 1633 static int __init virtio_blk_init(void) 1634 { 1635 int error; 1636 1637 virtblk_wq = alloc_workqueue("virtio-blk", 0, 0); 1638 if (!virtblk_wq) 1639 return -ENOMEM; 1640 1641 major = register_blkdev(0, "virtblk"); 1642 if (major < 0) { 1643 error = major; 1644 goto out_destroy_workqueue; 1645 } 1646 1647 error = register_virtio_driver(&virtio_blk); 1648 if (error) 1649 goto out_unregister_blkdev; 1650 return 0; 1651 1652 out_unregister_blkdev: 1653 unregister_blkdev(major, "virtblk"); 1654 out_destroy_workqueue: 1655 destroy_workqueue(virtblk_wq); 1656 return error; 1657 } 1658 1659 static void __exit virtio_blk_fini(void) 1660 { 1661 unregister_virtio_driver(&virtio_blk); 1662 unregister_blkdev(major, "virtblk"); 1663 destroy_workqueue(virtblk_wq); 1664 } 1665 module_init(virtio_blk_init); 1666 module_exit(virtio_blk_fini); 1667 1668 MODULE_DEVICE_TABLE(virtio, id_table); 1669 MODULE_DESCRIPTION("Virtio block driver"); 1670 MODULE_LICENSE("GPL"); 1671