xref: /linux/drivers/block/virtio_blk.c (revision d229807f669ba3dea9f64467ee965051c4366aed)
1 //#define DEBUG
2 #include <linux/spinlock.h>
3 #include <linux/slab.h>
4 #include <linux/blkdev.h>
5 #include <linux/hdreg.h>
6 #include <linux/module.h>
7 #include <linux/virtio.h>
8 #include <linux/virtio_blk.h>
9 #include <linux/scatterlist.h>
10 #include <linux/string_helpers.h>
11 #include <scsi/scsi_cmnd.h>
12 
13 #define PART_BITS 4
14 
15 static int major, index;
16 struct workqueue_struct *virtblk_wq;
17 
18 struct virtio_blk
19 {
20 	spinlock_t lock;
21 
22 	struct virtio_device *vdev;
23 	struct virtqueue *vq;
24 
25 	/* The disk structure for the kernel. */
26 	struct gendisk *disk;
27 
28 	/* Request tracking. */
29 	struct list_head reqs;
30 
31 	mempool_t *pool;
32 
33 	/* Process context for config space updates */
34 	struct work_struct config_work;
35 
36 	/* What host tells us, plus 2 for header & tailer. */
37 	unsigned int sg_elems;
38 
39 	/* Scatterlist: can be too big for stack. */
40 	struct scatterlist sg[/*sg_elems*/];
41 };
42 
43 struct virtblk_req
44 {
45 	struct list_head list;
46 	struct request *req;
47 	struct virtio_blk_outhdr out_hdr;
48 	struct virtio_scsi_inhdr in_hdr;
49 	u8 status;
50 };
51 
52 static void blk_done(struct virtqueue *vq)
53 {
54 	struct virtio_blk *vblk = vq->vdev->priv;
55 	struct virtblk_req *vbr;
56 	unsigned int len;
57 	unsigned long flags;
58 
59 	spin_lock_irqsave(&vblk->lock, flags);
60 	while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
61 		int error;
62 
63 		switch (vbr->status) {
64 		case VIRTIO_BLK_S_OK:
65 			error = 0;
66 			break;
67 		case VIRTIO_BLK_S_UNSUPP:
68 			error = -ENOTTY;
69 			break;
70 		default:
71 			error = -EIO;
72 			break;
73 		}
74 
75 		switch (vbr->req->cmd_type) {
76 		case REQ_TYPE_BLOCK_PC:
77 			vbr->req->resid_len = vbr->in_hdr.residual;
78 			vbr->req->sense_len = vbr->in_hdr.sense_len;
79 			vbr->req->errors = vbr->in_hdr.errors;
80 			break;
81 		case REQ_TYPE_SPECIAL:
82 			vbr->req->errors = (error != 0);
83 			break;
84 		default:
85 			break;
86 		}
87 
88 		__blk_end_request_all(vbr->req, error);
89 		list_del(&vbr->list);
90 		mempool_free(vbr, vblk->pool);
91 	}
92 	/* In case queue is stopped waiting for more buffers. */
93 	blk_start_queue(vblk->disk->queue);
94 	spin_unlock_irqrestore(&vblk->lock, flags);
95 }
96 
97 static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
98 		   struct request *req)
99 {
100 	unsigned long num, out = 0, in = 0;
101 	struct virtblk_req *vbr;
102 
103 	vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
104 	if (!vbr)
105 		/* When another request finishes we'll try again. */
106 		return false;
107 
108 	vbr->req = req;
109 
110 	if (req->cmd_flags & REQ_FLUSH) {
111 		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
112 		vbr->out_hdr.sector = 0;
113 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
114 	} else {
115 		switch (req->cmd_type) {
116 		case REQ_TYPE_FS:
117 			vbr->out_hdr.type = 0;
118 			vbr->out_hdr.sector = blk_rq_pos(vbr->req);
119 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
120 			break;
121 		case REQ_TYPE_BLOCK_PC:
122 			vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
123 			vbr->out_hdr.sector = 0;
124 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
125 			break;
126 		case REQ_TYPE_SPECIAL:
127 			vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
128 			vbr->out_hdr.sector = 0;
129 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
130 			break;
131 		default:
132 			/* We don't put anything else in the queue. */
133 			BUG();
134 		}
135 	}
136 
137 	sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
138 
139 	/*
140 	 * If this is a packet command we need a couple of additional headers.
141 	 * Behind the normal outhdr we put a segment with the scsi command
142 	 * block, and before the normal inhdr we put the sense data and the
143 	 * inhdr with additional status information before the normal inhdr.
144 	 */
145 	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC)
146 		sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
147 
148 	num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
149 
150 	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
151 		sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
152 		sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
153 			   sizeof(vbr->in_hdr));
154 	}
155 
156 	sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
157 		   sizeof(vbr->status));
158 
159 	if (num) {
160 		if (rq_data_dir(vbr->req) == WRITE) {
161 			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
162 			out += num;
163 		} else {
164 			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
165 			in += num;
166 		}
167 	}
168 
169 	if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) {
170 		mempool_free(vbr, vblk->pool);
171 		return false;
172 	}
173 
174 	list_add_tail(&vbr->list, &vblk->reqs);
175 	return true;
176 }
177 
178 static void do_virtblk_request(struct request_queue *q)
179 {
180 	struct virtio_blk *vblk = q->queuedata;
181 	struct request *req;
182 	unsigned int issued = 0;
183 
184 	while ((req = blk_peek_request(q)) != NULL) {
185 		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
186 
187 		/* If this request fails, stop queue and wait for something to
188 		   finish to restart it. */
189 		if (!do_req(q, vblk, req)) {
190 			blk_stop_queue(q);
191 			break;
192 		}
193 		blk_start_request(req);
194 		issued++;
195 	}
196 
197 	if (issued)
198 		virtqueue_kick(vblk->vq);
199 }
200 
201 /* return id (s/n) string for *disk to *id_str
202  */
203 static int virtblk_get_id(struct gendisk *disk, char *id_str)
204 {
205 	struct virtio_blk *vblk = disk->private_data;
206 	struct request *req;
207 	struct bio *bio;
208 	int err;
209 
210 	bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
211 			   GFP_KERNEL);
212 	if (IS_ERR(bio))
213 		return PTR_ERR(bio);
214 
215 	req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
216 	if (IS_ERR(req)) {
217 		bio_put(bio);
218 		return PTR_ERR(req);
219 	}
220 
221 	req->cmd_type = REQ_TYPE_SPECIAL;
222 	err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
223 	blk_put_request(req);
224 
225 	return err;
226 }
227 
228 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
229 			     unsigned int cmd, unsigned long data)
230 {
231 	struct gendisk *disk = bdev->bd_disk;
232 	struct virtio_blk *vblk = disk->private_data;
233 
234 	/*
235 	 * Only allow the generic SCSI ioctls if the host can support it.
236 	 */
237 	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
238 		return -ENOTTY;
239 
240 	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd,
241 			      (void __user *)data);
242 }
243 
244 /* We provide getgeo only to please some old bootloader/partitioning tools */
245 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
246 {
247 	struct virtio_blk *vblk = bd->bd_disk->private_data;
248 	struct virtio_blk_geometry vgeo;
249 	int err;
250 
251 	/* see if the host passed in geometry config */
252 	err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY,
253 				offsetof(struct virtio_blk_config, geometry),
254 				&vgeo);
255 
256 	if (!err) {
257 		geo->heads = vgeo.heads;
258 		geo->sectors = vgeo.sectors;
259 		geo->cylinders = vgeo.cylinders;
260 	} else {
261 		/* some standard values, similar to sd */
262 		geo->heads = 1 << 6;
263 		geo->sectors = 1 << 5;
264 		geo->cylinders = get_capacity(bd->bd_disk) >> 11;
265 	}
266 	return 0;
267 }
268 
269 static const struct block_device_operations virtblk_fops = {
270 	.ioctl  = virtblk_ioctl,
271 	.owner  = THIS_MODULE,
272 	.getgeo = virtblk_getgeo,
273 };
274 
275 static int index_to_minor(int index)
276 {
277 	return index << PART_BITS;
278 }
279 
280 static ssize_t virtblk_serial_show(struct device *dev,
281 				struct device_attribute *attr, char *buf)
282 {
283 	struct gendisk *disk = dev_to_disk(dev);
284 	int err;
285 
286 	/* sysfs gives us a PAGE_SIZE buffer */
287 	BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
288 
289 	buf[VIRTIO_BLK_ID_BYTES] = '\0';
290 	err = virtblk_get_id(disk, buf);
291 	if (!err)
292 		return strlen(buf);
293 
294 	if (err == -EIO) /* Unsupported? Make it empty. */
295 		return 0;
296 
297 	return err;
298 }
299 DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
300 
301 static void virtblk_config_changed_work(struct work_struct *work)
302 {
303 	struct virtio_blk *vblk =
304 		container_of(work, struct virtio_blk, config_work);
305 	struct virtio_device *vdev = vblk->vdev;
306 	struct request_queue *q = vblk->disk->queue;
307 	char cap_str_2[10], cap_str_10[10];
308 	u64 capacity, size;
309 
310 	/* Host must always specify the capacity. */
311 	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
312 			  &capacity, sizeof(capacity));
313 
314 	/* If capacity is too big, truncate with warning. */
315 	if ((sector_t)capacity != capacity) {
316 		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
317 			 (unsigned long long)capacity);
318 		capacity = (sector_t)-1;
319 	}
320 
321 	size = capacity * queue_logical_block_size(q);
322 	string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
323 	string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
324 
325 	dev_notice(&vdev->dev,
326 		  "new size: %llu %d-byte logical blocks (%s/%s)\n",
327 		  (unsigned long long)capacity,
328 		  queue_logical_block_size(q),
329 		  cap_str_10, cap_str_2);
330 
331 	set_capacity(vblk->disk, capacity);
332 }
333 
334 static void virtblk_config_changed(struct virtio_device *vdev)
335 {
336 	struct virtio_blk *vblk = vdev->priv;
337 
338 	queue_work(virtblk_wq, &vblk->config_work);
339 }
340 
341 static int __devinit virtblk_probe(struct virtio_device *vdev)
342 {
343 	struct virtio_blk *vblk;
344 	struct request_queue *q;
345 	int err;
346 	u64 cap;
347 	u32 v, blk_size, sg_elems, opt_io_size;
348 	u16 min_io_size;
349 	u8 physical_block_exp, alignment_offset;
350 
351 	if (index_to_minor(index) >= 1 << MINORBITS)
352 		return -ENOSPC;
353 
354 	/* We need to know how many segments before we allocate. */
355 	err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
356 				offsetof(struct virtio_blk_config, seg_max),
357 				&sg_elems);
358 
359 	/* We need at least one SG element, whatever they say. */
360 	if (err || !sg_elems)
361 		sg_elems = 1;
362 
363 	/* We need an extra sg elements at head and tail. */
364 	sg_elems += 2;
365 	vdev->priv = vblk = kmalloc(sizeof(*vblk) +
366 				    sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
367 	if (!vblk) {
368 		err = -ENOMEM;
369 		goto out;
370 	}
371 
372 	INIT_LIST_HEAD(&vblk->reqs);
373 	spin_lock_init(&vblk->lock);
374 	vblk->vdev = vdev;
375 	vblk->sg_elems = sg_elems;
376 	sg_init_table(vblk->sg, vblk->sg_elems);
377 	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
378 
379 	/* We expect one virtqueue, for output. */
380 	vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
381 	if (IS_ERR(vblk->vq)) {
382 		err = PTR_ERR(vblk->vq);
383 		goto out_free_vblk;
384 	}
385 
386 	vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
387 	if (!vblk->pool) {
388 		err = -ENOMEM;
389 		goto out_free_vq;
390 	}
391 
392 	/* FIXME: How many partitions?  How long is a piece of string? */
393 	vblk->disk = alloc_disk(1 << PART_BITS);
394 	if (!vblk->disk) {
395 		err = -ENOMEM;
396 		goto out_mempool;
397 	}
398 
399 	q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
400 	if (!q) {
401 		err = -ENOMEM;
402 		goto out_put_disk;
403 	}
404 
405 	q->queuedata = vblk;
406 
407 	if (index < 26) {
408 		sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);
409 	} else if (index < (26 + 1) * 26) {
410 		sprintf(vblk->disk->disk_name, "vd%c%c",
411 			'a' + index / 26 - 1, 'a' + index % 26);
412 	} else {
413 		const unsigned int m1 = (index / 26 - 1) / 26 - 1;
414 		const unsigned int m2 = (index / 26 - 1) % 26;
415 		const unsigned int m3 =  index % 26;
416 		sprintf(vblk->disk->disk_name, "vd%c%c%c",
417 			'a' + m1, 'a' + m2, 'a' + m3);
418 	}
419 
420 	vblk->disk->major = major;
421 	vblk->disk->first_minor = index_to_minor(index);
422 	vblk->disk->private_data = vblk;
423 	vblk->disk->fops = &virtblk_fops;
424 	vblk->disk->driverfs_dev = &vdev->dev;
425 	index++;
426 
427 	/* configure queue flush support */
428 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
429 		blk_queue_flush(q, REQ_FLUSH);
430 
431 	/* If disk is read-only in the host, the guest should obey */
432 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
433 		set_disk_ro(vblk->disk, 1);
434 
435 	/* Host must always specify the capacity. */
436 	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
437 			  &cap, sizeof(cap));
438 
439 	/* If capacity is too big, truncate with warning. */
440 	if ((sector_t)cap != cap) {
441 		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
442 			 (unsigned long long)cap);
443 		cap = (sector_t)-1;
444 	}
445 	set_capacity(vblk->disk, cap);
446 
447 	/* We can handle whatever the host told us to handle. */
448 	blk_queue_max_segments(q, vblk->sg_elems-2);
449 
450 	/* No need to bounce any requests */
451 	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
452 
453 	/* No real sector limit. */
454 	blk_queue_max_hw_sectors(q, -1U);
455 
456 	/* Host can optionally specify maximum segment size and number of
457 	 * segments. */
458 	err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX,
459 				offsetof(struct virtio_blk_config, size_max),
460 				&v);
461 	if (!err)
462 		blk_queue_max_segment_size(q, v);
463 	else
464 		blk_queue_max_segment_size(q, -1U);
465 
466 	/* Host can optionally specify the block size of the device */
467 	err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
468 				offsetof(struct virtio_blk_config, blk_size),
469 				&blk_size);
470 	if (!err)
471 		blk_queue_logical_block_size(q, blk_size);
472 	else
473 		blk_size = queue_logical_block_size(q);
474 
475 	/* Use topology information if available */
476 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
477 			offsetof(struct virtio_blk_config, physical_block_exp),
478 			&physical_block_exp);
479 	if (!err && physical_block_exp)
480 		blk_queue_physical_block_size(q,
481 				blk_size * (1 << physical_block_exp));
482 
483 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
484 			offsetof(struct virtio_blk_config, alignment_offset),
485 			&alignment_offset);
486 	if (!err && alignment_offset)
487 		blk_queue_alignment_offset(q, blk_size * alignment_offset);
488 
489 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
490 			offsetof(struct virtio_blk_config, min_io_size),
491 			&min_io_size);
492 	if (!err && min_io_size)
493 		blk_queue_io_min(q, blk_size * min_io_size);
494 
495 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
496 			offsetof(struct virtio_blk_config, opt_io_size),
497 			&opt_io_size);
498 	if (!err && opt_io_size)
499 		blk_queue_io_opt(q, blk_size * opt_io_size);
500 
501 
502 	add_disk(vblk->disk);
503 	err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
504 	if (err)
505 		goto out_del_disk;
506 
507 	return 0;
508 
509 out_del_disk:
510 	del_gendisk(vblk->disk);
511 	blk_cleanup_queue(vblk->disk->queue);
512 out_put_disk:
513 	put_disk(vblk->disk);
514 out_mempool:
515 	mempool_destroy(vblk->pool);
516 out_free_vq:
517 	vdev->config->del_vqs(vdev);
518 out_free_vblk:
519 	kfree(vblk);
520 out:
521 	return err;
522 }
523 
524 static void __devexit virtblk_remove(struct virtio_device *vdev)
525 {
526 	struct virtio_blk *vblk = vdev->priv;
527 
528 	flush_work(&vblk->config_work);
529 
530 	/* Nothing should be pending. */
531 	BUG_ON(!list_empty(&vblk->reqs));
532 
533 	/* Stop all the virtqueues. */
534 	vdev->config->reset(vdev);
535 
536 	del_gendisk(vblk->disk);
537 	blk_cleanup_queue(vblk->disk->queue);
538 	put_disk(vblk->disk);
539 	mempool_destroy(vblk->pool);
540 	vdev->config->del_vqs(vdev);
541 	kfree(vblk);
542 }
543 
544 static const struct virtio_device_id id_table[] = {
545 	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
546 	{ 0 },
547 };
548 
549 static unsigned int features[] = {
550 	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
551 	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
552 	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
553 };
554 
555 /*
556  * virtio_blk causes spurious section mismatch warning by
557  * simultaneously referring to a __devinit and a __devexit function.
558  * Use __refdata to avoid this warning.
559  */
560 static struct virtio_driver __refdata virtio_blk = {
561 	.feature_table		= features,
562 	.feature_table_size	= ARRAY_SIZE(features),
563 	.driver.name		= KBUILD_MODNAME,
564 	.driver.owner		= THIS_MODULE,
565 	.id_table		= id_table,
566 	.probe			= virtblk_probe,
567 	.remove			= __devexit_p(virtblk_remove),
568 	.config_changed		= virtblk_config_changed,
569 };
570 
571 static int __init init(void)
572 {
573 	int error;
574 
575 	virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
576 	if (!virtblk_wq)
577 		return -ENOMEM;
578 
579 	major = register_blkdev(0, "virtblk");
580 	if (major < 0) {
581 		error = major;
582 		goto out_destroy_workqueue;
583 	}
584 
585 	error = register_virtio_driver(&virtio_blk);
586 	if (error)
587 		goto out_unregister_blkdev;
588 	return 0;
589 
590 out_unregister_blkdev:
591 	unregister_blkdev(major, "virtblk");
592 out_destroy_workqueue:
593 	destroy_workqueue(virtblk_wq);
594 	return error;
595 }
596 
597 static void __exit fini(void)
598 {
599 	unregister_blkdev(major, "virtblk");
600 	unregister_virtio_driver(&virtio_blk);
601 	destroy_workqueue(virtblk_wq);
602 }
603 module_init(init);
604 module_exit(fini);
605 
606 MODULE_DEVICE_TABLE(virtio, id_table);
607 MODULE_DESCRIPTION("Virtio block driver");
608 MODULE_LICENSE("GPL");
609