xref: /linux/drivers/block/virtio_blk.c (revision 61d0b5a4b2777dcf5daef245e212b3c1fa8091ca)
1 //#define DEBUG
2 #include <linux/spinlock.h>
3 #include <linux/slab.h>
4 #include <linux/blkdev.h>
5 #include <linux/hdreg.h>
6 #include <linux/module.h>
7 #include <linux/mutex.h>
8 #include <linux/virtio.h>
9 #include <linux/virtio_blk.h>
10 #include <linux/scatterlist.h>
11 #include <linux/string_helpers.h>
12 #include <scsi/scsi_cmnd.h>
13 #include <linux/idr.h>
14 
15 #define PART_BITS 4
16 
17 static bool use_bio;
18 module_param(use_bio, bool, S_IRUGO);
19 
20 static int major;
21 static DEFINE_IDA(vd_index_ida);
22 
23 struct workqueue_struct *virtblk_wq;
24 
25 struct virtio_blk
26 {
27 	struct virtio_device *vdev;
28 	struct virtqueue *vq;
29 	wait_queue_head_t queue_wait;
30 
31 	/* The disk structure for the kernel. */
32 	struct gendisk *disk;
33 
34 	mempool_t *pool;
35 
36 	/* Process context for config space updates */
37 	struct work_struct config_work;
38 
39 	/* Lock for config space updates */
40 	struct mutex config_lock;
41 
42 	/* enable config space updates */
43 	bool config_enable;
44 
45 	/* What host tells us, plus 2 for header & tailer. */
46 	unsigned int sg_elems;
47 
48 	/* Ida index - used to track minor number allocations. */
49 	int index;
50 
51 	/* Scatterlist: can be too big for stack. */
52 	struct scatterlist sg[/*sg_elems*/];
53 };
54 
55 struct virtblk_req
56 {
57 	struct request *req;
58 	struct bio *bio;
59 	struct virtio_blk_outhdr out_hdr;
60 	struct virtio_scsi_inhdr in_hdr;
61 	struct work_struct work;
62 	struct virtio_blk *vblk;
63 	int flags;
64 	u8 status;
65 	struct scatterlist sg[];
66 };
67 
68 enum {
69 	VBLK_IS_FLUSH		= 1,
70 	VBLK_REQ_FLUSH		= 2,
71 	VBLK_REQ_DATA		= 4,
72 	VBLK_REQ_FUA		= 8,
73 };
74 
75 static inline int virtblk_result(struct virtblk_req *vbr)
76 {
77 	switch (vbr->status) {
78 	case VIRTIO_BLK_S_OK:
79 		return 0;
80 	case VIRTIO_BLK_S_UNSUPP:
81 		return -ENOTTY;
82 	default:
83 		return -EIO;
84 	}
85 }
86 
87 static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
88 						    gfp_t gfp_mask)
89 {
90 	struct virtblk_req *vbr;
91 
92 	vbr = mempool_alloc(vblk->pool, gfp_mask);
93 	if (!vbr)
94 		return NULL;
95 
96 	vbr->vblk = vblk;
97 	if (use_bio)
98 		sg_init_table(vbr->sg, vblk->sg_elems);
99 
100 	return vbr;
101 }
102 
103 static void virtblk_add_buf_wait(struct virtio_blk *vblk,
104 				 struct virtblk_req *vbr,
105 				 unsigned long out,
106 				 unsigned long in)
107 {
108 	DEFINE_WAIT(wait);
109 
110 	for (;;) {
111 		prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
112 					  TASK_UNINTERRUPTIBLE);
113 
114 		spin_lock_irq(vblk->disk->queue->queue_lock);
115 		if (virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
116 				      GFP_ATOMIC) < 0) {
117 			spin_unlock_irq(vblk->disk->queue->queue_lock);
118 			io_schedule();
119 		} else {
120 			virtqueue_kick(vblk->vq);
121 			spin_unlock_irq(vblk->disk->queue->queue_lock);
122 			break;
123 		}
124 
125 	}
126 
127 	finish_wait(&vblk->queue_wait, &wait);
128 }
129 
130 static inline void virtblk_add_req(struct virtblk_req *vbr,
131 				   unsigned int out, unsigned int in)
132 {
133 	struct virtio_blk *vblk = vbr->vblk;
134 
135 	spin_lock_irq(vblk->disk->queue->queue_lock);
136 	if (unlikely(virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
137 					GFP_ATOMIC) < 0)) {
138 		spin_unlock_irq(vblk->disk->queue->queue_lock);
139 		virtblk_add_buf_wait(vblk, vbr, out, in);
140 		return;
141 	}
142 	virtqueue_kick(vblk->vq);
143 	spin_unlock_irq(vblk->disk->queue->queue_lock);
144 }
145 
146 static int virtblk_bio_send_flush(struct virtblk_req *vbr)
147 {
148 	unsigned int out = 0, in = 0;
149 
150 	vbr->flags |= VBLK_IS_FLUSH;
151 	vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
152 	vbr->out_hdr.sector = 0;
153 	vbr->out_hdr.ioprio = 0;
154 	sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
155 	sg_set_buf(&vbr->sg[out + in++], &vbr->status, sizeof(vbr->status));
156 
157 	virtblk_add_req(vbr, out, in);
158 
159 	return 0;
160 }
161 
162 static int virtblk_bio_send_data(struct virtblk_req *vbr)
163 {
164 	struct virtio_blk *vblk = vbr->vblk;
165 	unsigned int num, out = 0, in = 0;
166 	struct bio *bio = vbr->bio;
167 
168 	vbr->flags &= ~VBLK_IS_FLUSH;
169 	vbr->out_hdr.type = 0;
170 	vbr->out_hdr.sector = bio->bi_sector;
171 	vbr->out_hdr.ioprio = bio_prio(bio);
172 
173 	sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
174 
175 	num = blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg + out);
176 
177 	sg_set_buf(&vbr->sg[num + out + in++], &vbr->status,
178 		   sizeof(vbr->status));
179 
180 	if (num) {
181 		if (bio->bi_rw & REQ_WRITE) {
182 			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
183 			out += num;
184 		} else {
185 			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
186 			in += num;
187 		}
188 	}
189 
190 	virtblk_add_req(vbr, out, in);
191 
192 	return 0;
193 }
194 
195 static void virtblk_bio_send_data_work(struct work_struct *work)
196 {
197 	struct virtblk_req *vbr;
198 
199 	vbr = container_of(work, struct virtblk_req, work);
200 
201 	virtblk_bio_send_data(vbr);
202 }
203 
204 static void virtblk_bio_send_flush_work(struct work_struct *work)
205 {
206 	struct virtblk_req *vbr;
207 
208 	vbr = container_of(work, struct virtblk_req, work);
209 
210 	virtblk_bio_send_flush(vbr);
211 }
212 
213 static inline void virtblk_request_done(struct virtblk_req *vbr)
214 {
215 	struct virtio_blk *vblk = vbr->vblk;
216 	struct request *req = vbr->req;
217 	int error = virtblk_result(vbr);
218 
219 	if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
220 		req->resid_len = vbr->in_hdr.residual;
221 		req->sense_len = vbr->in_hdr.sense_len;
222 		req->errors = vbr->in_hdr.errors;
223 	} else if (req->cmd_type == REQ_TYPE_SPECIAL) {
224 		req->errors = (error != 0);
225 	}
226 
227 	__blk_end_request_all(req, error);
228 	mempool_free(vbr, vblk->pool);
229 }
230 
231 static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
232 {
233 	struct virtio_blk *vblk = vbr->vblk;
234 
235 	if (vbr->flags & VBLK_REQ_DATA) {
236 		/* Send out the actual write data */
237 		INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
238 		queue_work(virtblk_wq, &vbr->work);
239 	} else {
240 		bio_endio(vbr->bio, virtblk_result(vbr));
241 		mempool_free(vbr, vblk->pool);
242 	}
243 }
244 
245 static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
246 {
247 	struct virtio_blk *vblk = vbr->vblk;
248 
249 	if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
250 		/* Send out a flush before end the bio */
251 		vbr->flags &= ~VBLK_REQ_DATA;
252 		INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
253 		queue_work(virtblk_wq, &vbr->work);
254 	} else {
255 		bio_endio(vbr->bio, virtblk_result(vbr));
256 		mempool_free(vbr, vblk->pool);
257 	}
258 }
259 
260 static inline void virtblk_bio_done(struct virtblk_req *vbr)
261 {
262 	if (unlikely(vbr->flags & VBLK_IS_FLUSH))
263 		virtblk_bio_flush_done(vbr);
264 	else
265 		virtblk_bio_data_done(vbr);
266 }
267 
268 static void virtblk_done(struct virtqueue *vq)
269 {
270 	struct virtio_blk *vblk = vq->vdev->priv;
271 	bool bio_done = false, req_done = false;
272 	struct virtblk_req *vbr;
273 	unsigned long flags;
274 	unsigned int len;
275 
276 	spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
277 	do {
278 		virtqueue_disable_cb(vq);
279 		while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
280 			if (vbr->bio) {
281 				virtblk_bio_done(vbr);
282 				bio_done = true;
283 			} else {
284 				virtblk_request_done(vbr);
285 				req_done = true;
286 			}
287 		}
288 	} while (!virtqueue_enable_cb(vq));
289 	/* In case queue is stopped waiting for more buffers. */
290 	if (req_done)
291 		blk_start_queue(vblk->disk->queue);
292 	spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
293 
294 	if (bio_done)
295 		wake_up(&vblk->queue_wait);
296 }
297 
298 static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
299 		   struct request *req)
300 {
301 	unsigned long num, out = 0, in = 0;
302 	struct virtblk_req *vbr;
303 
304 	vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
305 	if (!vbr)
306 		/* When another request finishes we'll try again. */
307 		return false;
308 
309 	vbr->req = req;
310 	vbr->bio = NULL;
311 	if (req->cmd_flags & REQ_FLUSH) {
312 		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
313 		vbr->out_hdr.sector = 0;
314 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
315 	} else {
316 		switch (req->cmd_type) {
317 		case REQ_TYPE_FS:
318 			vbr->out_hdr.type = 0;
319 			vbr->out_hdr.sector = blk_rq_pos(vbr->req);
320 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
321 			break;
322 		case REQ_TYPE_BLOCK_PC:
323 			vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
324 			vbr->out_hdr.sector = 0;
325 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
326 			break;
327 		case REQ_TYPE_SPECIAL:
328 			vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
329 			vbr->out_hdr.sector = 0;
330 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
331 			break;
332 		default:
333 			/* We don't put anything else in the queue. */
334 			BUG();
335 		}
336 	}
337 
338 	sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
339 
340 	/*
341 	 * If this is a packet command we need a couple of additional headers.
342 	 * Behind the normal outhdr we put a segment with the scsi command
343 	 * block, and before the normal inhdr we put the sense data and the
344 	 * inhdr with additional status information before the normal inhdr.
345 	 */
346 	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC)
347 		sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
348 
349 	num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
350 
351 	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
352 		sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
353 		sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
354 			   sizeof(vbr->in_hdr));
355 	}
356 
357 	sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
358 		   sizeof(vbr->status));
359 
360 	if (num) {
361 		if (rq_data_dir(vbr->req) == WRITE) {
362 			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
363 			out += num;
364 		} else {
365 			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
366 			in += num;
367 		}
368 	}
369 
370 	if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr,
371 			      GFP_ATOMIC) < 0) {
372 		mempool_free(vbr, vblk->pool);
373 		return false;
374 	}
375 
376 	return true;
377 }
378 
379 static void virtblk_request(struct request_queue *q)
380 {
381 	struct virtio_blk *vblk = q->queuedata;
382 	struct request *req;
383 	unsigned int issued = 0;
384 
385 	while ((req = blk_peek_request(q)) != NULL) {
386 		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
387 
388 		/* If this request fails, stop queue and wait for something to
389 		   finish to restart it. */
390 		if (!do_req(q, vblk, req)) {
391 			blk_stop_queue(q);
392 			break;
393 		}
394 		blk_start_request(req);
395 		issued++;
396 	}
397 
398 	if (issued)
399 		virtqueue_kick(vblk->vq);
400 }
401 
402 static void virtblk_make_request(struct request_queue *q, struct bio *bio)
403 {
404 	struct virtio_blk *vblk = q->queuedata;
405 	struct virtblk_req *vbr;
406 
407 	BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
408 
409 	vbr = virtblk_alloc_req(vblk, GFP_NOIO);
410 	if (!vbr) {
411 		bio_endio(bio, -ENOMEM);
412 		return;
413 	}
414 
415 	vbr->bio = bio;
416 	vbr->flags = 0;
417 	if (bio->bi_rw & REQ_FLUSH)
418 		vbr->flags |= VBLK_REQ_FLUSH;
419 	if (bio->bi_rw & REQ_FUA)
420 		vbr->flags |= VBLK_REQ_FUA;
421 	if (bio->bi_size)
422 		vbr->flags |= VBLK_REQ_DATA;
423 
424 	if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
425 		virtblk_bio_send_flush(vbr);
426 	else
427 		virtblk_bio_send_data(vbr);
428 }
429 
430 /* return id (s/n) string for *disk to *id_str
431  */
432 static int virtblk_get_id(struct gendisk *disk, char *id_str)
433 {
434 	struct virtio_blk *vblk = disk->private_data;
435 	struct request *req;
436 	struct bio *bio;
437 	int err;
438 
439 	bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
440 			   GFP_KERNEL);
441 	if (IS_ERR(bio))
442 		return PTR_ERR(bio);
443 
444 	req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
445 	if (IS_ERR(req)) {
446 		bio_put(bio);
447 		return PTR_ERR(req);
448 	}
449 
450 	req->cmd_type = REQ_TYPE_SPECIAL;
451 	err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
452 	blk_put_request(req);
453 
454 	return err;
455 }
456 
457 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
458 			     unsigned int cmd, unsigned long data)
459 {
460 	struct gendisk *disk = bdev->bd_disk;
461 	struct virtio_blk *vblk = disk->private_data;
462 
463 	/*
464 	 * Only allow the generic SCSI ioctls if the host can support it.
465 	 */
466 	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
467 		return -ENOTTY;
468 
469 	return scsi_cmd_blk_ioctl(bdev, mode, cmd,
470 				  (void __user *)data);
471 }
472 
473 /* We provide getgeo only to please some old bootloader/partitioning tools */
474 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
475 {
476 	struct virtio_blk *vblk = bd->bd_disk->private_data;
477 	struct virtio_blk_geometry vgeo;
478 	int err;
479 
480 	/* see if the host passed in geometry config */
481 	err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY,
482 				offsetof(struct virtio_blk_config, geometry),
483 				&vgeo);
484 
485 	if (!err) {
486 		geo->heads = vgeo.heads;
487 		geo->sectors = vgeo.sectors;
488 		geo->cylinders = vgeo.cylinders;
489 	} else {
490 		/* some standard values, similar to sd */
491 		geo->heads = 1 << 6;
492 		geo->sectors = 1 << 5;
493 		geo->cylinders = get_capacity(bd->bd_disk) >> 11;
494 	}
495 	return 0;
496 }
497 
498 static const struct block_device_operations virtblk_fops = {
499 	.ioctl  = virtblk_ioctl,
500 	.owner  = THIS_MODULE,
501 	.getgeo = virtblk_getgeo,
502 };
503 
504 static int index_to_minor(int index)
505 {
506 	return index << PART_BITS;
507 }
508 
509 static int minor_to_index(int minor)
510 {
511 	return minor >> PART_BITS;
512 }
513 
514 static ssize_t virtblk_serial_show(struct device *dev,
515 				struct device_attribute *attr, char *buf)
516 {
517 	struct gendisk *disk = dev_to_disk(dev);
518 	int err;
519 
520 	/* sysfs gives us a PAGE_SIZE buffer */
521 	BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
522 
523 	buf[VIRTIO_BLK_ID_BYTES] = '\0';
524 	err = virtblk_get_id(disk, buf);
525 	if (!err)
526 		return strlen(buf);
527 
528 	if (err == -EIO) /* Unsupported? Make it empty. */
529 		return 0;
530 
531 	return err;
532 }
533 DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
534 
535 static void virtblk_config_changed_work(struct work_struct *work)
536 {
537 	struct virtio_blk *vblk =
538 		container_of(work, struct virtio_blk, config_work);
539 	struct virtio_device *vdev = vblk->vdev;
540 	struct request_queue *q = vblk->disk->queue;
541 	char cap_str_2[10], cap_str_10[10];
542 	char *envp[] = { "RESIZE=1", NULL };
543 	u64 capacity, size;
544 
545 	mutex_lock(&vblk->config_lock);
546 	if (!vblk->config_enable)
547 		goto done;
548 
549 	/* Host must always specify the capacity. */
550 	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
551 			  &capacity, sizeof(capacity));
552 
553 	/* If capacity is too big, truncate with warning. */
554 	if ((sector_t)capacity != capacity) {
555 		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
556 			 (unsigned long long)capacity);
557 		capacity = (sector_t)-1;
558 	}
559 
560 	size = capacity * queue_logical_block_size(q);
561 	string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
562 	string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
563 
564 	dev_notice(&vdev->dev,
565 		  "new size: %llu %d-byte logical blocks (%s/%s)\n",
566 		  (unsigned long long)capacity,
567 		  queue_logical_block_size(q),
568 		  cap_str_10, cap_str_2);
569 
570 	set_capacity(vblk->disk, capacity);
571 	revalidate_disk(vblk->disk);
572 	kobject_uevent_env(&disk_to_dev(vblk->disk)->kobj, KOBJ_CHANGE, envp);
573 done:
574 	mutex_unlock(&vblk->config_lock);
575 }
576 
577 static void virtblk_config_changed(struct virtio_device *vdev)
578 {
579 	struct virtio_blk *vblk = vdev->priv;
580 
581 	queue_work(virtblk_wq, &vblk->config_work);
582 }
583 
584 static int init_vq(struct virtio_blk *vblk)
585 {
586 	int err = 0;
587 
588 	/* We expect one virtqueue, for output. */
589 	vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests");
590 	if (IS_ERR(vblk->vq))
591 		err = PTR_ERR(vblk->vq);
592 
593 	return err;
594 }
595 
596 /*
597  * Legacy naming scheme used for virtio devices.  We are stuck with it for
598  * virtio blk but don't ever use it for any new driver.
599  */
600 static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
601 {
602 	const int base = 'z' - 'a' + 1;
603 	char *begin = buf + strlen(prefix);
604 	char *end = buf + buflen;
605 	char *p;
606 	int unit;
607 
608 	p = end - 1;
609 	*p = '\0';
610 	unit = base;
611 	do {
612 		if (p == begin)
613 			return -EINVAL;
614 		*--p = 'a' + (index % unit);
615 		index = (index / unit) - 1;
616 	} while (index >= 0);
617 
618 	memmove(begin, p, end - p);
619 	memcpy(buf, prefix, strlen(prefix));
620 
621 	return 0;
622 }
623 
624 static int virtblk_get_cache_mode(struct virtio_device *vdev)
625 {
626 	u8 writeback;
627 	int err;
628 
629 	err = virtio_config_val(vdev, VIRTIO_BLK_F_CONFIG_WCE,
630 				offsetof(struct virtio_blk_config, wce),
631 				&writeback);
632 	if (err)
633 		writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE);
634 
635 	return writeback;
636 }
637 
638 static void virtblk_update_cache_mode(struct virtio_device *vdev)
639 {
640 	u8 writeback = virtblk_get_cache_mode(vdev);
641 	struct virtio_blk *vblk = vdev->priv;
642 
643 	if (writeback)
644 		blk_queue_flush(vblk->disk->queue, REQ_FLUSH);
645 	else
646 		blk_queue_flush(vblk->disk->queue, 0);
647 
648 	revalidate_disk(vblk->disk);
649 }
650 
651 static const char *const virtblk_cache_types[] = {
652 	"write through", "write back"
653 };
654 
655 static ssize_t
656 virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
657 			 const char *buf, size_t count)
658 {
659 	struct gendisk *disk = dev_to_disk(dev);
660 	struct virtio_blk *vblk = disk->private_data;
661 	struct virtio_device *vdev = vblk->vdev;
662 	int i;
663 	u8 writeback;
664 
665 	BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
666 	for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
667 		if (sysfs_streq(buf, virtblk_cache_types[i]))
668 			break;
669 
670 	if (i < 0)
671 		return -EINVAL;
672 
673 	writeback = i;
674 	vdev->config->set(vdev,
675 			  offsetof(struct virtio_blk_config, wce),
676 			  &writeback, sizeof(writeback));
677 
678 	virtblk_update_cache_mode(vdev);
679 	return count;
680 }
681 
682 static ssize_t
683 virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
684 			 char *buf)
685 {
686 	struct gendisk *disk = dev_to_disk(dev);
687 	struct virtio_blk *vblk = disk->private_data;
688 	u8 writeback = virtblk_get_cache_mode(vblk->vdev);
689 
690 	BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
691 	return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
692 }
693 
694 static const struct device_attribute dev_attr_cache_type_ro =
695 	__ATTR(cache_type, S_IRUGO,
696 	       virtblk_cache_type_show, NULL);
697 static const struct device_attribute dev_attr_cache_type_rw =
698 	__ATTR(cache_type, S_IRUGO|S_IWUSR,
699 	       virtblk_cache_type_show, virtblk_cache_type_store);
700 
701 static int virtblk_probe(struct virtio_device *vdev)
702 {
703 	struct virtio_blk *vblk;
704 	struct request_queue *q;
705 	int err, index;
706 	int pool_size;
707 
708 	u64 cap;
709 	u32 v, blk_size, sg_elems, opt_io_size;
710 	u16 min_io_size;
711 	u8 physical_block_exp, alignment_offset;
712 
713 	err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
714 			     GFP_KERNEL);
715 	if (err < 0)
716 		goto out;
717 	index = err;
718 
719 	/* We need to know how many segments before we allocate. */
720 	err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
721 				offsetof(struct virtio_blk_config, seg_max),
722 				&sg_elems);
723 
724 	/* We need at least one SG element, whatever they say. */
725 	if (err || !sg_elems)
726 		sg_elems = 1;
727 
728 	/* We need an extra sg elements at head and tail. */
729 	sg_elems += 2;
730 	vdev->priv = vblk = kmalloc(sizeof(*vblk) +
731 				    sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
732 	if (!vblk) {
733 		err = -ENOMEM;
734 		goto out_free_index;
735 	}
736 
737 	init_waitqueue_head(&vblk->queue_wait);
738 	vblk->vdev = vdev;
739 	vblk->sg_elems = sg_elems;
740 	sg_init_table(vblk->sg, vblk->sg_elems);
741 	mutex_init(&vblk->config_lock);
742 
743 	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
744 	vblk->config_enable = true;
745 
746 	err = init_vq(vblk);
747 	if (err)
748 		goto out_free_vblk;
749 
750 	pool_size = sizeof(struct virtblk_req);
751 	if (use_bio)
752 		pool_size += sizeof(struct scatterlist) * sg_elems;
753 	vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
754 	if (!vblk->pool) {
755 		err = -ENOMEM;
756 		goto out_free_vq;
757 	}
758 
759 	/* FIXME: How many partitions?  How long is a piece of string? */
760 	vblk->disk = alloc_disk(1 << PART_BITS);
761 	if (!vblk->disk) {
762 		err = -ENOMEM;
763 		goto out_mempool;
764 	}
765 
766 	q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
767 	if (!q) {
768 		err = -ENOMEM;
769 		goto out_put_disk;
770 	}
771 
772 	if (use_bio)
773 		blk_queue_make_request(q, virtblk_make_request);
774 	q->queuedata = vblk;
775 
776 	virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
777 
778 	vblk->disk->major = major;
779 	vblk->disk->first_minor = index_to_minor(index);
780 	vblk->disk->private_data = vblk;
781 	vblk->disk->fops = &virtblk_fops;
782 	vblk->disk->driverfs_dev = &vdev->dev;
783 	vblk->index = index;
784 
785 	/* configure queue flush support */
786 	virtblk_update_cache_mode(vdev);
787 
788 	/* If disk is read-only in the host, the guest should obey */
789 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
790 		set_disk_ro(vblk->disk, 1);
791 
792 	/* Host must always specify the capacity. */
793 	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
794 			  &cap, sizeof(cap));
795 
796 	/* If capacity is too big, truncate with warning. */
797 	if ((sector_t)cap != cap) {
798 		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
799 			 (unsigned long long)cap);
800 		cap = (sector_t)-1;
801 	}
802 	set_capacity(vblk->disk, cap);
803 
804 	/* We can handle whatever the host told us to handle. */
805 	blk_queue_max_segments(q, vblk->sg_elems-2);
806 
807 	/* No need to bounce any requests */
808 	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
809 
810 	/* No real sector limit. */
811 	blk_queue_max_hw_sectors(q, -1U);
812 
813 	/* Host can optionally specify maximum segment size and number of
814 	 * segments. */
815 	err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX,
816 				offsetof(struct virtio_blk_config, size_max),
817 				&v);
818 	if (!err)
819 		blk_queue_max_segment_size(q, v);
820 	else
821 		blk_queue_max_segment_size(q, -1U);
822 
823 	/* Host can optionally specify the block size of the device */
824 	err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
825 				offsetof(struct virtio_blk_config, blk_size),
826 				&blk_size);
827 	if (!err)
828 		blk_queue_logical_block_size(q, blk_size);
829 	else
830 		blk_size = queue_logical_block_size(q);
831 
832 	/* Use topology information if available */
833 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
834 			offsetof(struct virtio_blk_config, physical_block_exp),
835 			&physical_block_exp);
836 	if (!err && physical_block_exp)
837 		blk_queue_physical_block_size(q,
838 				blk_size * (1 << physical_block_exp));
839 
840 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
841 			offsetof(struct virtio_blk_config, alignment_offset),
842 			&alignment_offset);
843 	if (!err && alignment_offset)
844 		blk_queue_alignment_offset(q, blk_size * alignment_offset);
845 
846 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
847 			offsetof(struct virtio_blk_config, min_io_size),
848 			&min_io_size);
849 	if (!err && min_io_size)
850 		blk_queue_io_min(q, blk_size * min_io_size);
851 
852 	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
853 			offsetof(struct virtio_blk_config, opt_io_size),
854 			&opt_io_size);
855 	if (!err && opt_io_size)
856 		blk_queue_io_opt(q, blk_size * opt_io_size);
857 
858 	add_disk(vblk->disk);
859 	err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
860 	if (err)
861 		goto out_del_disk;
862 
863 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
864 		err = device_create_file(disk_to_dev(vblk->disk),
865 					 &dev_attr_cache_type_rw);
866 	else
867 		err = device_create_file(disk_to_dev(vblk->disk),
868 					 &dev_attr_cache_type_ro);
869 	if (err)
870 		goto out_del_disk;
871 	return 0;
872 
873 out_del_disk:
874 	del_gendisk(vblk->disk);
875 	blk_cleanup_queue(vblk->disk->queue);
876 out_put_disk:
877 	put_disk(vblk->disk);
878 out_mempool:
879 	mempool_destroy(vblk->pool);
880 out_free_vq:
881 	vdev->config->del_vqs(vdev);
882 out_free_vblk:
883 	kfree(vblk);
884 out_free_index:
885 	ida_simple_remove(&vd_index_ida, index);
886 out:
887 	return err;
888 }
889 
890 static void virtblk_remove(struct virtio_device *vdev)
891 {
892 	struct virtio_blk *vblk = vdev->priv;
893 	int index = vblk->index;
894 	int refc;
895 
896 	/* Prevent config work handler from accessing the device. */
897 	mutex_lock(&vblk->config_lock);
898 	vblk->config_enable = false;
899 	mutex_unlock(&vblk->config_lock);
900 
901 	del_gendisk(vblk->disk);
902 	blk_cleanup_queue(vblk->disk->queue);
903 
904 	/* Stop all the virtqueues. */
905 	vdev->config->reset(vdev);
906 
907 	flush_work(&vblk->config_work);
908 
909 	refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
910 	put_disk(vblk->disk);
911 	mempool_destroy(vblk->pool);
912 	vdev->config->del_vqs(vdev);
913 	kfree(vblk);
914 
915 	/* Only free device id if we don't have any users */
916 	if (refc == 1)
917 		ida_simple_remove(&vd_index_ida, index);
918 }
919 
920 #ifdef CONFIG_PM
921 static int virtblk_freeze(struct virtio_device *vdev)
922 {
923 	struct virtio_blk *vblk = vdev->priv;
924 
925 	/* Ensure we don't receive any more interrupts */
926 	vdev->config->reset(vdev);
927 
928 	/* Prevent config work handler from accessing the device. */
929 	mutex_lock(&vblk->config_lock);
930 	vblk->config_enable = false;
931 	mutex_unlock(&vblk->config_lock);
932 
933 	flush_work(&vblk->config_work);
934 
935 	spin_lock_irq(vblk->disk->queue->queue_lock);
936 	blk_stop_queue(vblk->disk->queue);
937 	spin_unlock_irq(vblk->disk->queue->queue_lock);
938 	blk_sync_queue(vblk->disk->queue);
939 
940 	vdev->config->del_vqs(vdev);
941 	return 0;
942 }
943 
944 static int virtblk_restore(struct virtio_device *vdev)
945 {
946 	struct virtio_blk *vblk = vdev->priv;
947 	int ret;
948 
949 	vblk->config_enable = true;
950 	ret = init_vq(vdev->priv);
951 	if (!ret) {
952 		spin_lock_irq(vblk->disk->queue->queue_lock);
953 		blk_start_queue(vblk->disk->queue);
954 		spin_unlock_irq(vblk->disk->queue->queue_lock);
955 	}
956 	return ret;
957 }
958 #endif
959 
960 static const struct virtio_device_id id_table[] = {
961 	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
962 	{ 0 },
963 };
964 
965 static unsigned int features[] = {
966 	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
967 	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
968 	VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE
969 };
970 
971 static struct virtio_driver virtio_blk = {
972 	.feature_table		= features,
973 	.feature_table_size	= ARRAY_SIZE(features),
974 	.driver.name		= KBUILD_MODNAME,
975 	.driver.owner		= THIS_MODULE,
976 	.id_table		= id_table,
977 	.probe			= virtblk_probe,
978 	.remove			= virtblk_remove,
979 	.config_changed		= virtblk_config_changed,
980 #ifdef CONFIG_PM
981 	.freeze			= virtblk_freeze,
982 	.restore		= virtblk_restore,
983 #endif
984 };
985 
986 static int __init init(void)
987 {
988 	int error;
989 
990 	virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
991 	if (!virtblk_wq)
992 		return -ENOMEM;
993 
994 	major = register_blkdev(0, "virtblk");
995 	if (major < 0) {
996 		error = major;
997 		goto out_destroy_workqueue;
998 	}
999 
1000 	error = register_virtio_driver(&virtio_blk);
1001 	if (error)
1002 		goto out_unregister_blkdev;
1003 	return 0;
1004 
1005 out_unregister_blkdev:
1006 	unregister_blkdev(major, "virtblk");
1007 out_destroy_workqueue:
1008 	destroy_workqueue(virtblk_wq);
1009 	return error;
1010 }
1011 
1012 static void __exit fini(void)
1013 {
1014 	unregister_blkdev(major, "virtblk");
1015 	unregister_virtio_driver(&virtio_blk);
1016 	destroy_workqueue(virtblk_wq);
1017 }
1018 module_init(init);
1019 module_exit(fini);
1020 
1021 MODULE_DEVICE_TABLE(virtio, id_table);
1022 MODULE_DESCRIPTION("Virtio block driver");
1023 MODULE_LICENSE("GPL");
1024