xref: /linux/drivers/block/virtio_blk.c (revision a67ff6a54095e27093ea501fb143fefe51a536c2)
1 //#define DEBUG
2 #include <linux/spinlock.h>
3 #include <linux/slab.h>
4 #include <linux/blkdev.h>
5 #include <linux/hdreg.h>
6 #include <linux/module.h>
7 #include <linux/virtio.h>
8 #include <linux/virtio_blk.h>
9 #include <linux/scatterlist.h>
10 #include <linux/string_helpers.h>
11 #include <scsi/scsi_cmnd.h>
12 #include <linux/idr.h>
13 
14 #define PART_BITS 4
15 
16 static int major;
17 static DEFINE_IDA(vd_index_ida);
18 
19 struct workqueue_struct *virtblk_wq;
20 
21 struct virtio_blk
22 {
23 	spinlock_t lock;
24 
25 	struct virtio_device *vdev;
26 	struct virtqueue *vq;
27 
28 	/* The disk structure for the kernel. */
29 	struct gendisk *disk;
30 
31 	/* Request tracking. */
32 	struct list_head reqs;
33 
34 	mempool_t *pool;
35 
36 	/* Process context for config space updates */
37 	struct work_struct config_work;
38 
39 	/* What host tells us, plus 2 for header & tailer. */
40 	unsigned int sg_elems;
41 
42 	/* Ida index - used to track minor number allocations. */
43 	int index;
44 
45 	/* Scatterlist: can be too big for stack. */
46 	struct scatterlist sg[/*sg_elems*/];
47 };
48 
49 struct virtblk_req
50 {
51 	struct list_head list;
52 	struct request *req;
53 	struct virtio_blk_outhdr out_hdr;
54 	struct virtio_scsi_inhdr in_hdr;
55 	u8 status;
56 };
57 
58 static void blk_done(struct virtqueue *vq)
59 {
60 	struct virtio_blk *vblk = vq->vdev->priv;
61 	struct virtblk_req *vbr;
62 	unsigned int len;
63 	unsigned long flags;
64 
65 	spin_lock_irqsave(&vblk->lock, flags);
66 	while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
67 		int error;
68 
69 		switch (vbr->status) {
70 		case VIRTIO_BLK_S_OK:
71 			error = 0;
72 			break;
73 		case VIRTIO_BLK_S_UNSUPP:
74 			error = -ENOTTY;
75 			break;
76 		default:
77 			error = -EIO;
78 			break;
79 		}
80 
81 		switch (vbr->req->cmd_type) {
82 		case REQ_TYPE_BLOCK_PC:
83 			vbr->req->resid_len = vbr->in_hdr.residual;
84 			vbr->req->sense_len = vbr->in_hdr.sense_len;
85 			vbr->req->errors = vbr->in_hdr.errors;
86 			break;
87 		case REQ_TYPE_SPECIAL:
88 			vbr->req->errors = (error != 0);
89 			break;
90 		default:
91 			break;
92 		}
93 
94 		__blk_end_request_all(vbr->req, error);
95 		list_del(&vbr->list);
96 		mempool_free(vbr, vblk->pool);
97 	}
98 	/* In case queue is stopped waiting for more buffers. */
99 	blk_start_queue(vblk->disk->queue);
100 	spin_unlock_irqrestore(&vblk->lock, flags);
101 }
102 
103 static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
104 		   struct request *req)
105 {
106 	unsigned long num, out = 0, in = 0;
107 	struct virtblk_req *vbr;
108 
109 	vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
110 	if (!vbr)
111 		/* When another request finishes we'll try again. */
112 		return false;
113 
114 	vbr->req = req;
115 
116 	if (req->cmd_flags & REQ_FLUSH) {
117 		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
118 		vbr->out_hdr.sector = 0;
119 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
120 	} else {
121 		switch (req->cmd_type) {
122 		case REQ_TYPE_FS:
123 			vbr->out_hdr.type = 0;
124 			vbr->out_hdr.sector = blk_rq_pos(vbr->req);
125 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
126 			break;
127 		case REQ_TYPE_BLOCK_PC:
128 			vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
129 			vbr->out_hdr.sector = 0;
130 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
131 			break;
132 		case REQ_TYPE_SPECIAL:
133 			vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
134 			vbr->out_hdr.sector = 0;
135 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
136 			break;
137 		default:
138 			/* We don't put anything else in the queue. */
139 			BUG();
140 		}
141 	}
142 
143 	sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
144 
145 	/*
146 	 * If this is a packet command we need a couple of additional headers.
147 	 * Behind the normal outhdr we put a segment with the scsi command
148 	 * block, and before the normal inhdr we put the sense data and the
149 	 * inhdr with additional status information before the normal inhdr.
150 	 */
151 	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC)
152 		sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
153 
154 	num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
155 
156 	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
157 		sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
158 		sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
159 			   sizeof(vbr->in_hdr));
160 	}
161 
162 	sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
163 		   sizeof(vbr->status));
164 
165 	if (num) {
166 		if (rq_data_dir(vbr->req) == WRITE) {
167 			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
168 			out += num;
169 		} else {
170 			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
171 			in += num;
172 		}
173 	}
174 
175 	if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) {
176 		mempool_free(vbr, vblk->pool);
177 		return false;
178 	}
179 
180 	list_add_tail(&vbr->list, &vblk->reqs);
181 	return true;
182 }
183 
184 static void do_virtblk_request(struct request_queue *q)
185 {
186 	struct virtio_blk *vblk = q->queuedata;
187 	struct request *req;
188 	unsigned int issued = 0;
189 
190 	while ((req = blk_peek_request(q)) != NULL) {
191 		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
192 
193 		/* If this request fails, stop queue and wait for something to
194 		   finish to restart it. */
195 		if (!do_req(q, vblk, req)) {
196 			blk_stop_queue(q);
197 			break;
198 		}
199 		blk_start_request(req);
200 		issued++;
201 	}
202 
203 	if (issued)
204 		virtqueue_kick(vblk->vq);
205 }
206 
207 /* return id (s/n) string for *disk to *id_str
208  */
209 static int virtblk_get_id(struct gendisk *disk, char *id_str)
210 {
211 	struct virtio_blk *vblk = disk->private_data;
212 	struct request *req;
213 	struct bio *bio;
214 	int err;
215 
216 	bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
217 			   GFP_KERNEL);
218 	if (IS_ERR(bio))
219 		return PTR_ERR(bio);
220 
221 	req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
222 	if (IS_ERR(req)) {
223 		bio_put(bio);
224 		return PTR_ERR(req);
225 	}
226 
227 	req->cmd_type = REQ_TYPE_SPECIAL;
228 	err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
229 	blk_put_request(req);
230 
231 	return err;
232 }
233 
234 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
235 			     unsigned int cmd, unsigned long data)
236 {
237 	struct gendisk *disk = bdev->bd_disk;
238 	struct virtio_blk *vblk = disk->private_data;
239 
240 	/*
241 	 * Only allow the generic SCSI ioctls if the host can support it.
242 	 */
243 	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
244 		return -ENOTTY;
245 
246 	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd,
247 			      (void __user *)data);
248 }
249 
250 /* We provide getgeo only to please some old bootloader/partitioning tools */
251 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
252 {
253 	struct virtio_blk *vblk = bd->bd_disk->private_data;
254 	struct virtio_blk_geometry vgeo;
255 	int err;
256 
257 	/* see if the host passed in geometry config */
258 	err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY,
259 				offsetof(struct virtio_blk_config, geometry),
260 				&vgeo);
261 
262 	if (!err) {
263 		geo->heads = vgeo.heads;
264 		geo->sectors = vgeo.sectors;
265 		geo->cylinders = vgeo.cylinders;
266 	} else {
267 		/* some standard values, similar to sd */
268 		geo->heads = 1 << 6;
269 		geo->sectors = 1 << 5;
270 		geo->cylinders = get_capacity(bd->bd_disk) >> 11;
271 	}
272 	return 0;
273 }
274 
275 static const struct block_device_operations virtblk_fops = {
276 	.ioctl  = virtblk_ioctl,
277 	.owner  = THIS_MODULE,
278 	.getgeo = virtblk_getgeo,
279 };
280 
281 static int index_to_minor(int index)
282 {
283 	return index << PART_BITS;
284 }
285 
286 static int minor_to_index(int minor)
287 {
288 	return minor >> PART_BITS;
289 }
290 
291 static ssize_t virtblk_serial_show(struct device *dev,
292 				struct device_attribute *attr, char *buf)
293 {
294 	struct gendisk *disk = dev_to_disk(dev);
295 	int err;
296 
297 	/* sysfs gives us a PAGE_SIZE buffer */
298 	BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
299 
300 	buf[VIRTIO_BLK_ID_BYTES] = '\0';
301 	err = virtblk_get_id(disk, buf);
302 	if (!err)
303 		return strlen(buf);
304 
305 	if (err == -EIO) /* Unsupported? Make it empty. */
306 		return 0;
307 
308 	return err;
309 }
310 DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
311 
312 static void virtblk_config_changed_work(struct work_struct *work)
313 {
314 	struct virtio_blk *vblk =
315 		container_of(work, struct virtio_blk, config_work);
316 	struct virtio_device *vdev = vblk->vdev;
317 	struct request_queue *q = vblk->disk->queue;
318 	char cap_str_2[10], cap_str_10[10];
319 	u64 capacity, size;
320 
321 	/* Host must always specify the capacity. */
322 	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
323 			  &capacity, sizeof(capacity));
324 
325 	/* If capacity is too big, truncate with warning. */
326 	if ((sector_t)capacity != capacity) {
327 		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
328 			 (unsigned long long)capacity);
329 		capacity = (sector_t)-1;
330 	}
331 
332 	size = capacity * queue_logical_block_size(q);
333 	string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
334 	string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
335 
336 	dev_notice(&vdev->dev,
337 		  "new size: %llu %d-byte logical blocks (%s/%s)\n",
338 		  (unsigned long long)capacity,
339 		  queue_logical_block_size(q),
340 		  cap_str_10, cap_str_2);
341 
342 	set_capacity(vblk->disk, capacity);
343 }
344 
345 static void virtblk_config_changed(struct virtio_device *vdev)
346 {
347 	struct virtio_blk *vblk = vdev->priv;
348 
349 	queue_work(virtblk_wq, &vblk->config_work);
350 }
351 
352 static int __devinit virtblk_probe(struct virtio_device *vdev)
353 {
354 	struct virtio_blk *vblk;
355 	struct request_queue *q;
356 	int err, index;
357 	u64 cap;
358 	u32 v, blk_size, sg_elems, opt_io_size;
359 	u16 min_io_size;
360 	u8 physical_block_exp, alignment_offset;
361 
362 	err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
363 			     GFP_KERNEL);
364 	if (err < 0)
365 		goto out;
366 	index = err;
367 
368 	/* We need to know how many segments before we allocate. */
369 	err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
370 				offsetof(struct virtio_blk_config, seg_max),
371 				&sg_elems);
372 
373 	/* We need at least one SG element, whatever they say. */
374 	if (err || !sg_elems)
375 		sg_elems = 1;
376 
377 	/* We need an extra sg elements at head and tail. */
378 	sg_elems += 2;
379 	vdev->priv = vblk = kmalloc(sizeof(*vblk) +
380 				    sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
381 	if (!vblk) {
382 		err = -ENOMEM;
383 		goto out_free_index;
384 	}
385 
386 	INIT_LIST_HEAD(&vblk->reqs);
387 	spin_lock_init(&vblk->lock);
388 	vblk->vdev = vdev;
389 	vblk->sg_elems = sg_elems;
390 	sg_init_table(vblk->sg, vblk->sg_elems);
391 	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
392 
393 	/* We expect one virtqueue, for output. */
394 	vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
395 	if (IS_ERR(vblk->vq)) {
396 		err = PTR_ERR(vblk->vq);
397 		goto out_free_vblk;
398 	}
399 
400 	vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
401 	if (!vblk->pool) {
402 		err = -ENOMEM;
403 		goto out_free_vq;
404 	}
405 
406 	/* FIXME: How many partitions?  How long is a piece of string? */
407 	vblk->disk = alloc_disk(1 << PART_BITS);
408 	if (!vblk->disk) {
409 		err = -ENOMEM;
410 		goto out_mempool;
411 	}
412 
413 	q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
414 	if (!q) {
415 		err = -ENOMEM;
416 		goto out_put_disk;
417 	}
418 
419 	q->queuedata = vblk;
420 
421 	if (index < 26) {
422 		sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);
423 	} else if (index < (26 + 1) * 26) {
424 		sprintf(vblk->disk->disk_name, "vd%c%c",
425 			'a' + index / 26 - 1, 'a' + index % 26);
426 	} else {
427 		const unsigned int m1 = (index / 26 - 1) / 26 - 1;
428 		const unsigned int m2 = (index / 26 - 1) % 26;
429 		const unsigned int m3 =  index % 26;
430 		sprintf(vblk->disk->disk_name, "vd%c%c%c",
431 			'a' + m1, 'a' + m2, 'a' + m3);
432 	}
433 
434 	vblk->disk->major = major;
435 	vblk->disk->first_minor = index_to_minor(index);
436 	vblk->disk->private_data = vblk;
437 	vblk->disk->fops = &virtblk_fops;
438 	vblk->disk->driverfs_dev = &vdev->dev;
439 	vblk->index = index;
440 
441 	/* configure queue flush support */
442 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
443 		blk_queue_flush(q, REQ_FLUSH);
444 
445 	/* If disk is read-only in the host, the guest should obey */
446 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
447 		set_disk_ro(vblk->disk, 1);
448 
449 	/* Host must always specify the capacity. */
450 	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
451 			  &cap, sizeof(cap));
452 
453 	/* If capacity is too big, truncate with warning. */
454 	if ((sector_t)cap != cap) {
455 		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
456 			 (unsigned long long)cap);
457 		cap = (sector_t)-1;
458 	}
459 	set_capacity(vblk->disk, cap);
460 
461 	/* We can handle whatever the host told us to handle. */
462 	blk_queue_max_segments(q, vblk->sg_elems-2);
463 
464 	/* No need to bounce any requests */
465 	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
466 
467 	/* No real sector limit. */
468 	blk_queue_max_hw_sectors(q, -1U);
469 
470 	/* Host can optionally specify maximum segment size and number of
471 	 * segments. */
472 	err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX,
473 				offsetof(struct virtio_blk_config, size_max),
474 				&v);
475 	if (!err)
476 		blk_queue_max_segment_size(q, v);
477 	else
478 		blk_queue_max_segment_size(q, -1U);
479 
480 	/* Host can optionally specify the block size of the device */
481 	err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
482 				offsetof(struct virtio_blk_config, blk_size),
483 				&blk_size);
484 	if (!err)
485 		blk_queue_logical_block_size(q, blk_size);
486 	else
487 		blk_size = queue_logical_block_size(q);
488 
489 	/* Use topology information if available */
490 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
491 			offsetof(struct virtio_blk_config, physical_block_exp),
492 			&physical_block_exp);
493 	if (!err && physical_block_exp)
494 		blk_queue_physical_block_size(q,
495 				blk_size * (1 << physical_block_exp));
496 
497 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
498 			offsetof(struct virtio_blk_config, alignment_offset),
499 			&alignment_offset);
500 	if (!err && alignment_offset)
501 		blk_queue_alignment_offset(q, blk_size * alignment_offset);
502 
503 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
504 			offsetof(struct virtio_blk_config, min_io_size),
505 			&min_io_size);
506 	if (!err && min_io_size)
507 		blk_queue_io_min(q, blk_size * min_io_size);
508 
509 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
510 			offsetof(struct virtio_blk_config, opt_io_size),
511 			&opt_io_size);
512 	if (!err && opt_io_size)
513 		blk_queue_io_opt(q, blk_size * opt_io_size);
514 
515 
516 	add_disk(vblk->disk);
517 	err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
518 	if (err)
519 		goto out_del_disk;
520 
521 	return 0;
522 
523 out_del_disk:
524 	del_gendisk(vblk->disk);
525 	blk_cleanup_queue(vblk->disk->queue);
526 out_put_disk:
527 	put_disk(vblk->disk);
528 out_mempool:
529 	mempool_destroy(vblk->pool);
530 out_free_vq:
531 	vdev->config->del_vqs(vdev);
532 out_free_vblk:
533 	kfree(vblk);
534 out_free_index:
535 	ida_simple_remove(&vd_index_ida, index);
536 out:
537 	return err;
538 }
539 
540 static void __devexit virtblk_remove(struct virtio_device *vdev)
541 {
542 	struct virtio_blk *vblk = vdev->priv;
543 	int index = vblk->index;
544 
545 	flush_work(&vblk->config_work);
546 
547 	/* Nothing should be pending. */
548 	BUG_ON(!list_empty(&vblk->reqs));
549 
550 	/* Stop all the virtqueues. */
551 	vdev->config->reset(vdev);
552 
553 	del_gendisk(vblk->disk);
554 	blk_cleanup_queue(vblk->disk->queue);
555 	put_disk(vblk->disk);
556 	mempool_destroy(vblk->pool);
557 	vdev->config->del_vqs(vdev);
558 	kfree(vblk);
559 	ida_simple_remove(&vd_index_ida, index);
560 }
561 
562 static const struct virtio_device_id id_table[] = {
563 	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
564 	{ 0 },
565 };
566 
567 static unsigned int features[] = {
568 	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
569 	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
570 	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
571 };
572 
573 /*
574  * virtio_blk causes spurious section mismatch warning by
575  * simultaneously referring to a __devinit and a __devexit function.
576  * Use __refdata to avoid this warning.
577  */
578 static struct virtio_driver __refdata virtio_blk = {
579 	.feature_table		= features,
580 	.feature_table_size	= ARRAY_SIZE(features),
581 	.driver.name		= KBUILD_MODNAME,
582 	.driver.owner		= THIS_MODULE,
583 	.id_table		= id_table,
584 	.probe			= virtblk_probe,
585 	.remove			= __devexit_p(virtblk_remove),
586 	.config_changed		= virtblk_config_changed,
587 };
588 
589 static int __init init(void)
590 {
591 	int error;
592 
593 	virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
594 	if (!virtblk_wq)
595 		return -ENOMEM;
596 
597 	major = register_blkdev(0, "virtblk");
598 	if (major < 0) {
599 		error = major;
600 		goto out_destroy_workqueue;
601 	}
602 
603 	error = register_virtio_driver(&virtio_blk);
604 	if (error)
605 		goto out_unregister_blkdev;
606 	return 0;
607 
608 out_unregister_blkdev:
609 	unregister_blkdev(major, "virtblk");
610 out_destroy_workqueue:
611 	destroy_workqueue(virtblk_wq);
612 	return error;
613 }
614 
615 static void __exit fini(void)
616 {
617 	unregister_blkdev(major, "virtblk");
618 	unregister_virtio_driver(&virtio_blk);
619 	destroy_workqueue(virtblk_wq);
620 }
621 module_init(init);
622 module_exit(fini);
623 
624 MODULE_DEVICE_TABLE(virtio, id_table);
625 MODULE_DESCRIPTION("Virtio block driver");
626 MODULE_LICENSE("GPL");
627