xref: /linux/drivers/block/virtio_blk.c (revision b889fcf63cb62e7fdb7816565e28f44dbe4a76a5)
1 //#define DEBUG
2 #include <linux/spinlock.h>
3 #include <linux/slab.h>
4 #include <linux/blkdev.h>
5 #include <linux/hdreg.h>
6 #include <linux/module.h>
7 #include <linux/mutex.h>
8 #include <linux/virtio.h>
9 #include <linux/virtio_blk.h>
10 #include <linux/scatterlist.h>
11 #include <linux/string_helpers.h>
12 #include <scsi/scsi_cmnd.h>
13 #include <linux/idr.h>
14 
15 #define PART_BITS 4
16 
17 static bool use_bio;
18 module_param(use_bio, bool, S_IRUGO);
19 
20 static int major;
21 static DEFINE_IDA(vd_index_ida);
22 
23 struct workqueue_struct *virtblk_wq;
24 
25 struct virtio_blk
26 {
27 	struct virtio_device *vdev;
28 	struct virtqueue *vq;
29 	wait_queue_head_t queue_wait;
30 
31 	/* The disk structure for the kernel. */
32 	struct gendisk *disk;
33 
34 	mempool_t *pool;
35 
36 	/* Process context for config space updates */
37 	struct work_struct config_work;
38 
39 	/* Lock for config space updates */
40 	struct mutex config_lock;
41 
42 	/* enable config space updates */
43 	bool config_enable;
44 
45 	/* What host tells us, plus 2 for header & tailer. */
46 	unsigned int sg_elems;
47 
48 	/* Ida index - used to track minor number allocations. */
49 	int index;
50 
51 	/* Scatterlist: can be too big for stack. */
52 	struct scatterlist sg[/*sg_elems*/];
53 };
54 
55 struct virtblk_req
56 {
57 	struct request *req;
58 	struct bio *bio;
59 	struct virtio_blk_outhdr out_hdr;
60 	struct virtio_scsi_inhdr in_hdr;
61 	struct work_struct work;
62 	struct virtio_blk *vblk;
63 	int flags;
64 	u8 status;
65 	struct scatterlist sg[];
66 };
67 
68 enum {
69 	VBLK_IS_FLUSH		= 1,
70 	VBLK_REQ_FLUSH		= 2,
71 	VBLK_REQ_DATA		= 4,
72 	VBLK_REQ_FUA		= 8,
73 };
74 
75 static inline int virtblk_result(struct virtblk_req *vbr)
76 {
77 	switch (vbr->status) {
78 	case VIRTIO_BLK_S_OK:
79 		return 0;
80 	case VIRTIO_BLK_S_UNSUPP:
81 		return -ENOTTY;
82 	default:
83 		return -EIO;
84 	}
85 }
86 
87 static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
88 						    gfp_t gfp_mask)
89 {
90 	struct virtblk_req *vbr;
91 
92 	vbr = mempool_alloc(vblk->pool, gfp_mask);
93 	if (!vbr)
94 		return NULL;
95 
96 	vbr->vblk = vblk;
97 	if (use_bio)
98 		sg_init_table(vbr->sg, vblk->sg_elems);
99 
100 	return vbr;
101 }
102 
103 static void virtblk_add_buf_wait(struct virtio_blk *vblk,
104 				 struct virtblk_req *vbr,
105 				 unsigned long out,
106 				 unsigned long in)
107 {
108 	DEFINE_WAIT(wait);
109 
110 	for (;;) {
111 		prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
112 					  TASK_UNINTERRUPTIBLE);
113 
114 		spin_lock_irq(vblk->disk->queue->queue_lock);
115 		if (virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
116 				      GFP_ATOMIC) < 0) {
117 			spin_unlock_irq(vblk->disk->queue->queue_lock);
118 			io_schedule();
119 		} else {
120 			virtqueue_kick(vblk->vq);
121 			spin_unlock_irq(vblk->disk->queue->queue_lock);
122 			break;
123 		}
124 
125 	}
126 
127 	finish_wait(&vblk->queue_wait, &wait);
128 }
129 
130 static inline void virtblk_add_req(struct virtblk_req *vbr,
131 				   unsigned int out, unsigned int in)
132 {
133 	struct virtio_blk *vblk = vbr->vblk;
134 
135 	spin_lock_irq(vblk->disk->queue->queue_lock);
136 	if (unlikely(virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
137 					GFP_ATOMIC) < 0)) {
138 		spin_unlock_irq(vblk->disk->queue->queue_lock);
139 		virtblk_add_buf_wait(vblk, vbr, out, in);
140 		return;
141 	}
142 	virtqueue_kick(vblk->vq);
143 	spin_unlock_irq(vblk->disk->queue->queue_lock);
144 }
145 
146 static int virtblk_bio_send_flush(struct virtblk_req *vbr)
147 {
148 	unsigned int out = 0, in = 0;
149 
150 	vbr->flags |= VBLK_IS_FLUSH;
151 	vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
152 	vbr->out_hdr.sector = 0;
153 	vbr->out_hdr.ioprio = 0;
154 	sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
155 	sg_set_buf(&vbr->sg[out + in++], &vbr->status, sizeof(vbr->status));
156 
157 	virtblk_add_req(vbr, out, in);
158 
159 	return 0;
160 }
161 
162 static int virtblk_bio_send_data(struct virtblk_req *vbr)
163 {
164 	struct virtio_blk *vblk = vbr->vblk;
165 	unsigned int num, out = 0, in = 0;
166 	struct bio *bio = vbr->bio;
167 
168 	vbr->flags &= ~VBLK_IS_FLUSH;
169 	vbr->out_hdr.type = 0;
170 	vbr->out_hdr.sector = bio->bi_sector;
171 	vbr->out_hdr.ioprio = bio_prio(bio);
172 
173 	sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
174 
175 	num = blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg + out);
176 
177 	sg_set_buf(&vbr->sg[num + out + in++], &vbr->status,
178 		   sizeof(vbr->status));
179 
180 	if (num) {
181 		if (bio->bi_rw & REQ_WRITE) {
182 			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
183 			out += num;
184 		} else {
185 			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
186 			in += num;
187 		}
188 	}
189 
190 	virtblk_add_req(vbr, out, in);
191 
192 	return 0;
193 }
194 
195 static void virtblk_bio_send_data_work(struct work_struct *work)
196 {
197 	struct virtblk_req *vbr;
198 
199 	vbr = container_of(work, struct virtblk_req, work);
200 
201 	virtblk_bio_send_data(vbr);
202 }
203 
204 static void virtblk_bio_send_flush_work(struct work_struct *work)
205 {
206 	struct virtblk_req *vbr;
207 
208 	vbr = container_of(work, struct virtblk_req, work);
209 
210 	virtblk_bio_send_flush(vbr);
211 }
212 
213 static inline void virtblk_request_done(struct virtblk_req *vbr)
214 {
215 	struct virtio_blk *vblk = vbr->vblk;
216 	struct request *req = vbr->req;
217 	int error = virtblk_result(vbr);
218 
219 	if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
220 		req->resid_len = vbr->in_hdr.residual;
221 		req->sense_len = vbr->in_hdr.sense_len;
222 		req->errors = vbr->in_hdr.errors;
223 	} else if (req->cmd_type == REQ_TYPE_SPECIAL) {
224 		req->errors = (error != 0);
225 	}
226 
227 	__blk_end_request_all(req, error);
228 	mempool_free(vbr, vblk->pool);
229 }
230 
231 static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
232 {
233 	struct virtio_blk *vblk = vbr->vblk;
234 
235 	if (vbr->flags & VBLK_REQ_DATA) {
236 		/* Send out the actual write data */
237 		INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
238 		queue_work(virtblk_wq, &vbr->work);
239 	} else {
240 		bio_endio(vbr->bio, virtblk_result(vbr));
241 		mempool_free(vbr, vblk->pool);
242 	}
243 }
244 
245 static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
246 {
247 	struct virtio_blk *vblk = vbr->vblk;
248 
249 	if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
250 		/* Send out a flush before end the bio */
251 		vbr->flags &= ~VBLK_REQ_DATA;
252 		INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
253 		queue_work(virtblk_wq, &vbr->work);
254 	} else {
255 		bio_endio(vbr->bio, virtblk_result(vbr));
256 		mempool_free(vbr, vblk->pool);
257 	}
258 }
259 
260 static inline void virtblk_bio_done(struct virtblk_req *vbr)
261 {
262 	if (unlikely(vbr->flags & VBLK_IS_FLUSH))
263 		virtblk_bio_flush_done(vbr);
264 	else
265 		virtblk_bio_data_done(vbr);
266 }
267 
268 static void virtblk_done(struct virtqueue *vq)
269 {
270 	struct virtio_blk *vblk = vq->vdev->priv;
271 	bool bio_done = false, req_done = false;
272 	struct virtblk_req *vbr;
273 	unsigned long flags;
274 	unsigned int len;
275 
276 	spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
277 	do {
278 		virtqueue_disable_cb(vq);
279 		while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
280 			if (vbr->bio) {
281 				virtblk_bio_done(vbr);
282 				bio_done = true;
283 			} else {
284 				virtblk_request_done(vbr);
285 				req_done = true;
286 			}
287 		}
288 	} while (!virtqueue_enable_cb(vq));
289 	/* In case queue is stopped waiting for more buffers. */
290 	if (req_done)
291 		blk_start_queue(vblk->disk->queue);
292 	spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
293 
294 	if (bio_done)
295 		wake_up(&vblk->queue_wait);
296 }
297 
298 static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
299 		   struct request *req)
300 {
301 	unsigned long num, out = 0, in = 0;
302 	struct virtblk_req *vbr;
303 
304 	vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
305 	if (!vbr)
306 		/* When another request finishes we'll try again. */
307 		return false;
308 
309 	vbr->req = req;
310 	vbr->bio = NULL;
311 	if (req->cmd_flags & REQ_FLUSH) {
312 		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
313 		vbr->out_hdr.sector = 0;
314 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
315 	} else {
316 		switch (req->cmd_type) {
317 		case REQ_TYPE_FS:
318 			vbr->out_hdr.type = 0;
319 			vbr->out_hdr.sector = blk_rq_pos(vbr->req);
320 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
321 			break;
322 		case REQ_TYPE_BLOCK_PC:
323 			vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
324 			vbr->out_hdr.sector = 0;
325 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
326 			break;
327 		case REQ_TYPE_SPECIAL:
328 			vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
329 			vbr->out_hdr.sector = 0;
330 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
331 			break;
332 		default:
333 			/* We don't put anything else in the queue. */
334 			BUG();
335 		}
336 	}
337 
338 	sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
339 
340 	/*
341 	 * If this is a packet command we need a couple of additional headers.
342 	 * Behind the normal outhdr we put a segment with the scsi command
343 	 * block, and before the normal inhdr we put the sense data and the
344 	 * inhdr with additional status information before the normal inhdr.
345 	 */
346 	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC)
347 		sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
348 
349 	num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
350 
351 	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
352 		sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
353 		sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
354 			   sizeof(vbr->in_hdr));
355 	}
356 
357 	sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
358 		   sizeof(vbr->status));
359 
360 	if (num) {
361 		if (rq_data_dir(vbr->req) == WRITE) {
362 			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
363 			out += num;
364 		} else {
365 			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
366 			in += num;
367 		}
368 	}
369 
370 	if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr,
371 			      GFP_ATOMIC) < 0) {
372 		mempool_free(vbr, vblk->pool);
373 		return false;
374 	}
375 
376 	return true;
377 }
378 
379 static void virtblk_request(struct request_queue *q)
380 {
381 	struct virtio_blk *vblk = q->queuedata;
382 	struct request *req;
383 	unsigned int issued = 0;
384 
385 	while ((req = blk_peek_request(q)) != NULL) {
386 		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
387 
388 		/* If this request fails, stop queue and wait for something to
389 		   finish to restart it. */
390 		if (!do_req(q, vblk, req)) {
391 			blk_stop_queue(q);
392 			break;
393 		}
394 		blk_start_request(req);
395 		issued++;
396 	}
397 
398 	if (issued)
399 		virtqueue_kick(vblk->vq);
400 }
401 
402 static void virtblk_make_request(struct request_queue *q, struct bio *bio)
403 {
404 	struct virtio_blk *vblk = q->queuedata;
405 	struct virtblk_req *vbr;
406 
407 	BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
408 
409 	vbr = virtblk_alloc_req(vblk, GFP_NOIO);
410 	if (!vbr) {
411 		bio_endio(bio, -ENOMEM);
412 		return;
413 	}
414 
415 	vbr->bio = bio;
416 	vbr->flags = 0;
417 	if (bio->bi_rw & REQ_FLUSH)
418 		vbr->flags |= VBLK_REQ_FLUSH;
419 	if (bio->bi_rw & REQ_FUA)
420 		vbr->flags |= VBLK_REQ_FUA;
421 	if (bio->bi_size)
422 		vbr->flags |= VBLK_REQ_DATA;
423 
424 	if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
425 		virtblk_bio_send_flush(vbr);
426 	else
427 		virtblk_bio_send_data(vbr);
428 }
429 
430 /* return id (s/n) string for *disk to *id_str
431  */
432 static int virtblk_get_id(struct gendisk *disk, char *id_str)
433 {
434 	struct virtio_blk *vblk = disk->private_data;
435 	struct request *req;
436 	struct bio *bio;
437 	int err;
438 
439 	bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
440 			   GFP_KERNEL);
441 	if (IS_ERR(bio))
442 		return PTR_ERR(bio);
443 
444 	req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
445 	if (IS_ERR(req)) {
446 		bio_put(bio);
447 		return PTR_ERR(req);
448 	}
449 
450 	req->cmd_type = REQ_TYPE_SPECIAL;
451 	err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
452 	blk_put_request(req);
453 
454 	return err;
455 }
456 
457 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
458 			     unsigned int cmd, unsigned long data)
459 {
460 	struct gendisk *disk = bdev->bd_disk;
461 	struct virtio_blk *vblk = disk->private_data;
462 
463 	/*
464 	 * Only allow the generic SCSI ioctls if the host can support it.
465 	 */
466 	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
467 		return -ENOTTY;
468 
469 	return scsi_cmd_blk_ioctl(bdev, mode, cmd,
470 				  (void __user *)data);
471 }
472 
473 /* We provide getgeo only to please some old bootloader/partitioning tools */
474 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
475 {
476 	struct virtio_blk *vblk = bd->bd_disk->private_data;
477 	struct virtio_blk_geometry vgeo;
478 	int err;
479 
480 	/* see if the host passed in geometry config */
481 	err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY,
482 				offsetof(struct virtio_blk_config, geometry),
483 				&vgeo);
484 
485 	if (!err) {
486 		geo->heads = vgeo.heads;
487 		geo->sectors = vgeo.sectors;
488 		geo->cylinders = vgeo.cylinders;
489 	} else {
490 		/* some standard values, similar to sd */
491 		geo->heads = 1 << 6;
492 		geo->sectors = 1 << 5;
493 		geo->cylinders = get_capacity(bd->bd_disk) >> 11;
494 	}
495 	return 0;
496 }
497 
498 static const struct block_device_operations virtblk_fops = {
499 	.ioctl  = virtblk_ioctl,
500 	.owner  = THIS_MODULE,
501 	.getgeo = virtblk_getgeo,
502 };
503 
504 static int index_to_minor(int index)
505 {
506 	return index << PART_BITS;
507 }
508 
509 static int minor_to_index(int minor)
510 {
511 	return minor >> PART_BITS;
512 }
513 
514 static ssize_t virtblk_serial_show(struct device *dev,
515 				struct device_attribute *attr, char *buf)
516 {
517 	struct gendisk *disk = dev_to_disk(dev);
518 	int err;
519 
520 	/* sysfs gives us a PAGE_SIZE buffer */
521 	BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
522 
523 	buf[VIRTIO_BLK_ID_BYTES] = '\0';
524 	err = virtblk_get_id(disk, buf);
525 	if (!err)
526 		return strlen(buf);
527 
528 	if (err == -EIO) /* Unsupported? Make it empty. */
529 		return 0;
530 
531 	return err;
532 }
533 DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
534 
535 static void virtblk_config_changed_work(struct work_struct *work)
536 {
537 	struct virtio_blk *vblk =
538 		container_of(work, struct virtio_blk, config_work);
539 	struct virtio_device *vdev = vblk->vdev;
540 	struct request_queue *q = vblk->disk->queue;
541 	char cap_str_2[10], cap_str_10[10];
542 	u64 capacity, size;
543 
544 	mutex_lock(&vblk->config_lock);
545 	if (!vblk->config_enable)
546 		goto done;
547 
548 	/* Host must always specify the capacity. */
549 	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
550 			  &capacity, sizeof(capacity));
551 
552 	/* If capacity is too big, truncate with warning. */
553 	if ((sector_t)capacity != capacity) {
554 		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
555 			 (unsigned long long)capacity);
556 		capacity = (sector_t)-1;
557 	}
558 
559 	size = capacity * queue_logical_block_size(q);
560 	string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
561 	string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
562 
563 	dev_notice(&vdev->dev,
564 		  "new size: %llu %d-byte logical blocks (%s/%s)\n",
565 		  (unsigned long long)capacity,
566 		  queue_logical_block_size(q),
567 		  cap_str_10, cap_str_2);
568 
569 	set_capacity(vblk->disk, capacity);
570 	revalidate_disk(vblk->disk);
571 done:
572 	mutex_unlock(&vblk->config_lock);
573 }
574 
575 static void virtblk_config_changed(struct virtio_device *vdev)
576 {
577 	struct virtio_blk *vblk = vdev->priv;
578 
579 	queue_work(virtblk_wq, &vblk->config_work);
580 }
581 
582 static int init_vq(struct virtio_blk *vblk)
583 {
584 	int err = 0;
585 
586 	/* We expect one virtqueue, for output. */
587 	vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests");
588 	if (IS_ERR(vblk->vq))
589 		err = PTR_ERR(vblk->vq);
590 
591 	return err;
592 }
593 
594 /*
595  * Legacy naming scheme used for virtio devices.  We are stuck with it for
596  * virtio blk but don't ever use it for any new driver.
597  */
598 static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
599 {
600 	const int base = 'z' - 'a' + 1;
601 	char *begin = buf + strlen(prefix);
602 	char *end = buf + buflen;
603 	char *p;
604 	int unit;
605 
606 	p = end - 1;
607 	*p = '\0';
608 	unit = base;
609 	do {
610 		if (p == begin)
611 			return -EINVAL;
612 		*--p = 'a' + (index % unit);
613 		index = (index / unit) - 1;
614 	} while (index >= 0);
615 
616 	memmove(begin, p, end - p);
617 	memcpy(buf, prefix, strlen(prefix));
618 
619 	return 0;
620 }
621 
622 static int virtblk_get_cache_mode(struct virtio_device *vdev)
623 {
624 	u8 writeback;
625 	int err;
626 
627 	err = virtio_config_val(vdev, VIRTIO_BLK_F_CONFIG_WCE,
628 				offsetof(struct virtio_blk_config, wce),
629 				&writeback);
630 	if (err)
631 		writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE);
632 
633 	return writeback;
634 }
635 
636 static void virtblk_update_cache_mode(struct virtio_device *vdev)
637 {
638 	u8 writeback = virtblk_get_cache_mode(vdev);
639 	struct virtio_blk *vblk = vdev->priv;
640 
641 	if (writeback)
642 		blk_queue_flush(vblk->disk->queue, REQ_FLUSH);
643 	else
644 		blk_queue_flush(vblk->disk->queue, 0);
645 
646 	revalidate_disk(vblk->disk);
647 }
648 
649 static const char *const virtblk_cache_types[] = {
650 	"write through", "write back"
651 };
652 
653 static ssize_t
654 virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
655 			 const char *buf, size_t count)
656 {
657 	struct gendisk *disk = dev_to_disk(dev);
658 	struct virtio_blk *vblk = disk->private_data;
659 	struct virtio_device *vdev = vblk->vdev;
660 	int i;
661 	u8 writeback;
662 
663 	BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
664 	for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
665 		if (sysfs_streq(buf, virtblk_cache_types[i]))
666 			break;
667 
668 	if (i < 0)
669 		return -EINVAL;
670 
671 	writeback = i;
672 	vdev->config->set(vdev,
673 			  offsetof(struct virtio_blk_config, wce),
674 			  &writeback, sizeof(writeback));
675 
676 	virtblk_update_cache_mode(vdev);
677 	return count;
678 }
679 
680 static ssize_t
681 virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
682 			 char *buf)
683 {
684 	struct gendisk *disk = dev_to_disk(dev);
685 	struct virtio_blk *vblk = disk->private_data;
686 	u8 writeback = virtblk_get_cache_mode(vblk->vdev);
687 
688 	BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
689 	return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
690 }
691 
692 static const struct device_attribute dev_attr_cache_type_ro =
693 	__ATTR(cache_type, S_IRUGO,
694 	       virtblk_cache_type_show, NULL);
695 static const struct device_attribute dev_attr_cache_type_rw =
696 	__ATTR(cache_type, S_IRUGO|S_IWUSR,
697 	       virtblk_cache_type_show, virtblk_cache_type_store);
698 
699 static int __devinit virtblk_probe(struct virtio_device *vdev)
700 {
701 	struct virtio_blk *vblk;
702 	struct request_queue *q;
703 	int err, index;
704 	int pool_size;
705 
706 	u64 cap;
707 	u32 v, blk_size, sg_elems, opt_io_size;
708 	u16 min_io_size;
709 	u8 physical_block_exp, alignment_offset;
710 
711 	err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
712 			     GFP_KERNEL);
713 	if (err < 0)
714 		goto out;
715 	index = err;
716 
717 	/* We need to know how many segments before we allocate. */
718 	err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
719 				offsetof(struct virtio_blk_config, seg_max),
720 				&sg_elems);
721 
722 	/* We need at least one SG element, whatever they say. */
723 	if (err || !sg_elems)
724 		sg_elems = 1;
725 
726 	/* We need an extra sg elements at head and tail. */
727 	sg_elems += 2;
728 	vdev->priv = vblk = kmalloc(sizeof(*vblk) +
729 				    sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
730 	if (!vblk) {
731 		err = -ENOMEM;
732 		goto out_free_index;
733 	}
734 
735 	init_waitqueue_head(&vblk->queue_wait);
736 	vblk->vdev = vdev;
737 	vblk->sg_elems = sg_elems;
738 	sg_init_table(vblk->sg, vblk->sg_elems);
739 	mutex_init(&vblk->config_lock);
740 
741 	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
742 	vblk->config_enable = true;
743 
744 	err = init_vq(vblk);
745 	if (err)
746 		goto out_free_vblk;
747 
748 	pool_size = sizeof(struct virtblk_req);
749 	if (use_bio)
750 		pool_size += sizeof(struct scatterlist) * sg_elems;
751 	vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
752 	if (!vblk->pool) {
753 		err = -ENOMEM;
754 		goto out_free_vq;
755 	}
756 
757 	/* FIXME: How many partitions?  How long is a piece of string? */
758 	vblk->disk = alloc_disk(1 << PART_BITS);
759 	if (!vblk->disk) {
760 		err = -ENOMEM;
761 		goto out_mempool;
762 	}
763 
764 	q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
765 	if (!q) {
766 		err = -ENOMEM;
767 		goto out_put_disk;
768 	}
769 
770 	if (use_bio)
771 		blk_queue_make_request(q, virtblk_make_request);
772 	q->queuedata = vblk;
773 
774 	virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
775 
776 	vblk->disk->major = major;
777 	vblk->disk->first_minor = index_to_minor(index);
778 	vblk->disk->private_data = vblk;
779 	vblk->disk->fops = &virtblk_fops;
780 	vblk->disk->driverfs_dev = &vdev->dev;
781 	vblk->index = index;
782 
783 	/* configure queue flush support */
784 	virtblk_update_cache_mode(vdev);
785 
786 	/* If disk is read-only in the host, the guest should obey */
787 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
788 		set_disk_ro(vblk->disk, 1);
789 
790 	/* Host must always specify the capacity. */
791 	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
792 			  &cap, sizeof(cap));
793 
794 	/* If capacity is too big, truncate with warning. */
795 	if ((sector_t)cap != cap) {
796 		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
797 			 (unsigned long long)cap);
798 		cap = (sector_t)-1;
799 	}
800 	set_capacity(vblk->disk, cap);
801 
802 	/* We can handle whatever the host told us to handle. */
803 	blk_queue_max_segments(q, vblk->sg_elems-2);
804 
805 	/* No need to bounce any requests */
806 	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
807 
808 	/* No real sector limit. */
809 	blk_queue_max_hw_sectors(q, -1U);
810 
811 	/* Host can optionally specify maximum segment size and number of
812 	 * segments. */
813 	err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX,
814 				offsetof(struct virtio_blk_config, size_max),
815 				&v);
816 	if (!err)
817 		blk_queue_max_segment_size(q, v);
818 	else
819 		blk_queue_max_segment_size(q, -1U);
820 
821 	/* Host can optionally specify the block size of the device */
822 	err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
823 				offsetof(struct virtio_blk_config, blk_size),
824 				&blk_size);
825 	if (!err)
826 		blk_queue_logical_block_size(q, blk_size);
827 	else
828 		blk_size = queue_logical_block_size(q);
829 
830 	/* Use topology information if available */
831 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
832 			offsetof(struct virtio_blk_config, physical_block_exp),
833 			&physical_block_exp);
834 	if (!err && physical_block_exp)
835 		blk_queue_physical_block_size(q,
836 				blk_size * (1 << physical_block_exp));
837 
838 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
839 			offsetof(struct virtio_blk_config, alignment_offset),
840 			&alignment_offset);
841 	if (!err && alignment_offset)
842 		blk_queue_alignment_offset(q, blk_size * alignment_offset);
843 
844 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
845 			offsetof(struct virtio_blk_config, min_io_size),
846 			&min_io_size);
847 	if (!err && min_io_size)
848 		blk_queue_io_min(q, blk_size * min_io_size);
849 
850 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
851 			offsetof(struct virtio_blk_config, opt_io_size),
852 			&opt_io_size);
853 	if (!err && opt_io_size)
854 		blk_queue_io_opt(q, blk_size * opt_io_size);
855 
856 	add_disk(vblk->disk);
857 	err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
858 	if (err)
859 		goto out_del_disk;
860 
861 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
862 		err = device_create_file(disk_to_dev(vblk->disk),
863 					 &dev_attr_cache_type_rw);
864 	else
865 		err = device_create_file(disk_to_dev(vblk->disk),
866 					 &dev_attr_cache_type_ro);
867 	if (err)
868 		goto out_del_disk;
869 	return 0;
870 
871 out_del_disk:
872 	del_gendisk(vblk->disk);
873 	blk_cleanup_queue(vblk->disk->queue);
874 out_put_disk:
875 	put_disk(vblk->disk);
876 out_mempool:
877 	mempool_destroy(vblk->pool);
878 out_free_vq:
879 	vdev->config->del_vqs(vdev);
880 out_free_vblk:
881 	kfree(vblk);
882 out_free_index:
883 	ida_simple_remove(&vd_index_ida, index);
884 out:
885 	return err;
886 }
887 
888 static void __devexit virtblk_remove(struct virtio_device *vdev)
889 {
890 	struct virtio_blk *vblk = vdev->priv;
891 	int index = vblk->index;
892 
893 	/* Prevent config work handler from accessing the device. */
894 	mutex_lock(&vblk->config_lock);
895 	vblk->config_enable = false;
896 	mutex_unlock(&vblk->config_lock);
897 
898 	del_gendisk(vblk->disk);
899 	blk_cleanup_queue(vblk->disk->queue);
900 
901 	/* Stop all the virtqueues. */
902 	vdev->config->reset(vdev);
903 
904 	flush_work(&vblk->config_work);
905 
906 	put_disk(vblk->disk);
907 	mempool_destroy(vblk->pool);
908 	vdev->config->del_vqs(vdev);
909 	kfree(vblk);
910 	ida_simple_remove(&vd_index_ida, index);
911 }
912 
913 #ifdef CONFIG_PM
914 static int virtblk_freeze(struct virtio_device *vdev)
915 {
916 	struct virtio_blk *vblk = vdev->priv;
917 
918 	/* Ensure we don't receive any more interrupts */
919 	vdev->config->reset(vdev);
920 
921 	/* Prevent config work handler from accessing the device. */
922 	mutex_lock(&vblk->config_lock);
923 	vblk->config_enable = false;
924 	mutex_unlock(&vblk->config_lock);
925 
926 	flush_work(&vblk->config_work);
927 
928 	spin_lock_irq(vblk->disk->queue->queue_lock);
929 	blk_stop_queue(vblk->disk->queue);
930 	spin_unlock_irq(vblk->disk->queue->queue_lock);
931 	blk_sync_queue(vblk->disk->queue);
932 
933 	vdev->config->del_vqs(vdev);
934 	return 0;
935 }
936 
937 static int virtblk_restore(struct virtio_device *vdev)
938 {
939 	struct virtio_blk *vblk = vdev->priv;
940 	int ret;
941 
942 	vblk->config_enable = true;
943 	ret = init_vq(vdev->priv);
944 	if (!ret) {
945 		spin_lock_irq(vblk->disk->queue->queue_lock);
946 		blk_start_queue(vblk->disk->queue);
947 		spin_unlock_irq(vblk->disk->queue->queue_lock);
948 	}
949 	return ret;
950 }
951 #endif
952 
953 static const struct virtio_device_id id_table[] = {
954 	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
955 	{ 0 },
956 };
957 
958 static unsigned int features[] = {
959 	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
960 	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
961 	VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE
962 };
963 
964 /*
965  * virtio_blk causes spurious section mismatch warning by
966  * simultaneously referring to a __devinit and a __devexit function.
967  * Use __refdata to avoid this warning.
968  */
969 static struct virtio_driver __refdata virtio_blk = {
970 	.feature_table		= features,
971 	.feature_table_size	= ARRAY_SIZE(features),
972 	.driver.name		= KBUILD_MODNAME,
973 	.driver.owner		= THIS_MODULE,
974 	.id_table		= id_table,
975 	.probe			= virtblk_probe,
976 	.remove			= __devexit_p(virtblk_remove),
977 	.config_changed		= virtblk_config_changed,
978 #ifdef CONFIG_PM
979 	.freeze			= virtblk_freeze,
980 	.restore		= virtblk_restore,
981 #endif
982 };
983 
984 static int __init init(void)
985 {
986 	int error;
987 
988 	virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
989 	if (!virtblk_wq)
990 		return -ENOMEM;
991 
992 	major = register_blkdev(0, "virtblk");
993 	if (major < 0) {
994 		error = major;
995 		goto out_destroy_workqueue;
996 	}
997 
998 	error = register_virtio_driver(&virtio_blk);
999 	if (error)
1000 		goto out_unregister_blkdev;
1001 	return 0;
1002 
1003 out_unregister_blkdev:
1004 	unregister_blkdev(major, "virtblk");
1005 out_destroy_workqueue:
1006 	destroy_workqueue(virtblk_wq);
1007 	return error;
1008 }
1009 
1010 static void __exit fini(void)
1011 {
1012 	unregister_blkdev(major, "virtblk");
1013 	unregister_virtio_driver(&virtio_blk);
1014 	destroy_workqueue(virtblk_wq);
1015 }
1016 module_init(init);
1017 module_exit(fini);
1018 
1019 MODULE_DEVICE_TABLE(virtio, id_table);
1020 MODULE_DESCRIPTION("Virtio block driver");
1021 MODULE_LICENSE("GPL");
1022