xref: /freebsd/sys/dev/xen/blkfront/blkfront.c (revision 9999d2cb7248eb9be1a287a25f2d1fbb64044091)
189e0f4d2SKip Macy /*
28698b76cSKip Macy  * XenBSD block device driver
38698b76cSKip Macy  *
48698b76cSKip Macy  * Copyright (c) 2009 Frank Suchomel, Citrix
59999d2cbSKip Macy  * Copyright (c) 2009 Doug F. Rabson, Citrix
69999d2cbSKip Macy  * Copyright (c) 2005 Kip Macy
79999d2cbSKip Macy  * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
89999d2cbSKip Macy  * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
99999d2cbSKip Macy  *
109999d2cbSKip Macy  *
119999d2cbSKip Macy  * Permission is hereby granted, free of charge, to any person obtaining a copy
129999d2cbSKip Macy  * of this software and associated documentation files (the "Software"), to
139999d2cbSKip Macy  * deal in the Software without restriction, including without limitation the
149999d2cbSKip Macy  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
159999d2cbSKip Macy  * sell copies of the Software, and to permit persons to whom the Software is
169999d2cbSKip Macy  * furnished to do so, subject to the following conditions:
179999d2cbSKip Macy  *
189999d2cbSKip Macy  * The above copyright notice and this permission notice shall be included in
199999d2cbSKip Macy  * all copies or substantial portions of the Software.
209999d2cbSKip Macy  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
219999d2cbSKip Macy  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
229999d2cbSKip Macy  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
239999d2cbSKip Macy  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
249999d2cbSKip Macy  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
259999d2cbSKip Macy  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
269999d2cbSKip Macy  * DEALINGS IN THE SOFTWARE.
2789e0f4d2SKip Macy  */
2889e0f4d2SKip Macy 
2989e0f4d2SKip Macy #include <sys/cdefs.h>
3089e0f4d2SKip Macy __FBSDID("$FreeBSD$");
3189e0f4d2SKip Macy 
3289e0f4d2SKip Macy #include <sys/param.h>
3389e0f4d2SKip Macy #include <sys/systm.h>
3489e0f4d2SKip Macy #include <sys/malloc.h>
3589e0f4d2SKip Macy #include <sys/kernel.h>
3689e0f4d2SKip Macy #include <vm/vm.h>
3789e0f4d2SKip Macy #include <vm/pmap.h>
3889e0f4d2SKip Macy 
3989e0f4d2SKip Macy #include <sys/bio.h>
4089e0f4d2SKip Macy #include <sys/bus.h>
4189e0f4d2SKip Macy #include <sys/conf.h>
4289e0f4d2SKip Macy #include <sys/module.h>
4389e0f4d2SKip Macy 
4489e0f4d2SKip Macy #include <machine/bus.h>
4589e0f4d2SKip Macy #include <sys/rman.h>
4689e0f4d2SKip Macy #include <machine/resource.h>
4789e0f4d2SKip Macy #include <machine/intr_machdep.h>
4889e0f4d2SKip Macy #include <machine/vmparam.h>
4989e0f4d2SKip Macy 
5089e0f4d2SKip Macy #include <machine/xen/xen-os.h>
5112678024SDoug Rabson #include <machine/xen/xenfunc.h>
5212678024SDoug Rabson #include <xen/hypervisor.h>
533a6d1fcfSKip Macy #include <xen/xen_intr.h>
543a6d1fcfSKip Macy #include <xen/evtchn.h>
5512678024SDoug Rabson #include <xen/gnttab.h>
5689e0f4d2SKip Macy #include <xen/interface/grant_table.h>
5723dc5621SKip Macy #include <xen/interface/io/protocols.h>
5823dc5621SKip Macy #include <xen/xenbus/xenbusvar.h>
5989e0f4d2SKip Macy 
6089e0f4d2SKip Macy #include <geom/geom_disk.h>
6189e0f4d2SKip Macy 
6289e0f4d2SKip Macy #include <dev/xen/blkfront/block.h>
6389e0f4d2SKip Macy 
6423dc5621SKip Macy #include "xenbus_if.h"
6523dc5621SKip Macy 
6689e0f4d2SKip Macy #define    ASSERT(S)       KASSERT(S, (#S))
6789e0f4d2SKip Macy /* prototypes */
6889e0f4d2SKip Macy struct xb_softc;
6989e0f4d2SKip Macy static void xb_startio(struct xb_softc *sc);
7023dc5621SKip Macy static void connect(device_t, struct blkfront_info *);
7123dc5621SKip Macy static void blkfront_closing(device_t);
7223dc5621SKip Macy static int blkfront_detach(device_t);
7323dc5621SKip Macy static int talk_to_backend(device_t, struct blkfront_info *);
7423dc5621SKip Macy static int setup_blkring(device_t, struct blkfront_info *);
7589e0f4d2SKip Macy static void blkif_int(void *);
7689e0f4d2SKip Macy #if 0
7789e0f4d2SKip Macy static void blkif_restart_queue(void *arg);
7889e0f4d2SKip Macy #endif
7989e0f4d2SKip Macy static void blkif_recover(struct blkfront_info *);
8089e0f4d2SKip Macy static void blkif_completion(struct blk_shadow *);
8189e0f4d2SKip Macy static void blkif_free(struct blkfront_info *, int);
8289e0f4d2SKip Macy 
8389e0f4d2SKip Macy #define GRANT_INVALID_REF 0
8489e0f4d2SKip Macy #define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
8589e0f4d2SKip Macy 
8689e0f4d2SKip Macy LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head;
8789e0f4d2SKip Macy 
8889e0f4d2SKip Macy /* Control whether runtime update of vbds is enabled. */
8989e0f4d2SKip Macy #define ENABLE_VBD_UPDATE 0
9089e0f4d2SKip Macy 
9189e0f4d2SKip Macy #if ENABLE_VBD_UPDATE
9289e0f4d2SKip Macy static void vbd_update(void);
9389e0f4d2SKip Macy #endif
9489e0f4d2SKip Macy 
9589e0f4d2SKip Macy 
9689e0f4d2SKip Macy #define BLKIF_STATE_DISCONNECTED 0
9789e0f4d2SKip Macy #define BLKIF_STATE_CONNECTED    1
9889e0f4d2SKip Macy #define BLKIF_STATE_SUSPENDED    2
9989e0f4d2SKip Macy 
10089e0f4d2SKip Macy #ifdef notyet
10189e0f4d2SKip Macy static char *blkif_state_name[] = {
10289e0f4d2SKip Macy 	[BLKIF_STATE_DISCONNECTED] = "disconnected",
10389e0f4d2SKip Macy 	[BLKIF_STATE_CONNECTED]    = "connected",
10489e0f4d2SKip Macy 	[BLKIF_STATE_SUSPENDED]    = "closed",
10589e0f4d2SKip Macy };
10689e0f4d2SKip Macy 
10789e0f4d2SKip Macy static char * blkif_status_name[] = {
10889e0f4d2SKip Macy 	[BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
10989e0f4d2SKip Macy 	[BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
11089e0f4d2SKip Macy 	[BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
11189e0f4d2SKip Macy 	[BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
11289e0f4d2SKip Macy };
11389e0f4d2SKip Macy #endif
11489e0f4d2SKip Macy #define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args)
11589e0f4d2SKip Macy #if 0
11612678024SDoug Rabson #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
11789e0f4d2SKip Macy #else
11889e0f4d2SKip Macy #define DPRINTK(fmt, args...)
11989e0f4d2SKip Macy #endif
12089e0f4d2SKip Macy 
12189e0f4d2SKip Macy static grant_ref_t gref_head;
12289e0f4d2SKip Macy #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
12389e0f4d2SKip Macy     (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
12489e0f4d2SKip Macy 
12589e0f4d2SKip Macy static void kick_pending_request_queues(struct blkfront_info *);
12689e0f4d2SKip Macy static int blkif_open(struct disk *dp);
12789e0f4d2SKip Macy static int blkif_close(struct disk *dp);
12889e0f4d2SKip Macy static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
12989e0f4d2SKip Macy static int blkif_queue_request(struct bio *bp);
13089e0f4d2SKip Macy static void xb_strategy(struct bio *bp);
13189e0f4d2SKip Macy 
1328698b76cSKip Macy // In order to quiesce the device during kernel dumps, outstanding requests to
1338698b76cSKip Macy // DOM0 for disk reads/writes need to be accounted for.
1348698b76cSKip Macy static	int	blkif_queued_requests;
1358698b76cSKip Macy static	int	xb_dump(void *, void *, vm_offset_t, off_t, size_t);
13689e0f4d2SKip Macy 
13789e0f4d2SKip Macy 
13889e0f4d2SKip Macy /* XXX move to xb_vbd.c when VBD update support is added */
13989e0f4d2SKip Macy #define MAX_VBDS 64
14089e0f4d2SKip Macy 
14189e0f4d2SKip Macy #define XBD_SECTOR_SIZE		512	/* XXX: assume for now */
14289e0f4d2SKip Macy #define XBD_SECTOR_SHFT		9
14389e0f4d2SKip Macy 
14489e0f4d2SKip Macy static struct mtx blkif_io_lock;
14589e0f4d2SKip Macy 
1465d254c04SKip Macy static vm_paddr_t
1475d254c04SKip Macy pfn_to_mfn(vm_paddr_t pfn)
14889e0f4d2SKip Macy {
14989e0f4d2SKip Macy 	return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT);
15089e0f4d2SKip Macy }
15189e0f4d2SKip Macy 
15223dc5621SKip Macy /*
15323dc5621SKip Macy  * Translate Linux major/minor to an appropriate name and unit
15423dc5621SKip Macy  * number. For HVM guests, this allows us to use the same drive names
15523dc5621SKip Macy  * with blkfront as the emulated drives, easing transition slightly.
15623dc5621SKip Macy  */
15723dc5621SKip Macy static void
15823dc5621SKip Macy blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name)
15923dc5621SKip Macy {
16023dc5621SKip Macy 	static struct vdev_info {
16123dc5621SKip Macy 		int major;
16223dc5621SKip Macy 		int shift;
16323dc5621SKip Macy 		int base;
16423dc5621SKip Macy 		const char *name;
16523dc5621SKip Macy 	} info[] = {
16623dc5621SKip Macy 		{3,	6,	0,	"ad"},	/* ide0 */
16723dc5621SKip Macy 		{22,	6,	2,	"ad"},	/* ide1 */
16823dc5621SKip Macy 		{33,	6,	4,	"ad"},	/* ide2 */
16923dc5621SKip Macy 		{34,	6,	6,	"ad"},	/* ide3 */
17023dc5621SKip Macy 		{56,	6,	8,	"ad"},	/* ide4 */
17123dc5621SKip Macy 		{57,	6,	10,	"ad"},	/* ide5 */
17223dc5621SKip Macy 		{88,	6,	12,	"ad"},	/* ide6 */
17323dc5621SKip Macy 		{89,	6,	14,	"ad"},	/* ide7 */
17423dc5621SKip Macy 		{90,	6,	16,	"ad"},	/* ide8 */
17523dc5621SKip Macy 		{91,	6,	18,	"ad"},	/* ide9 */
17623dc5621SKip Macy 
17723dc5621SKip Macy 		{8,	4,	0,	"da"},	/* scsi disk0 */
17823dc5621SKip Macy 		{65,	4,	16,	"da"},	/* scsi disk1 */
17923dc5621SKip Macy 		{66,	4,	32,	"da"},	/* scsi disk2 */
18023dc5621SKip Macy 		{67,	4,	48,	"da"},	/* scsi disk3 */
18123dc5621SKip Macy 		{68,	4,	64,	"da"},	/* scsi disk4 */
18223dc5621SKip Macy 		{69,	4,	80,	"da"},	/* scsi disk5 */
18323dc5621SKip Macy 		{70,	4,	96,	"da"},	/* scsi disk6 */
18423dc5621SKip Macy 		{71,	4,	112,	"da"},	/* scsi disk7 */
18523dc5621SKip Macy 		{128,	4,	128,	"da"},	/* scsi disk8 */
18623dc5621SKip Macy 		{129,	4,	144,	"da"},	/* scsi disk9 */
18723dc5621SKip Macy 		{130,	4,	160,	"da"},	/* scsi disk10 */
18823dc5621SKip Macy 		{131,	4,	176,	"da"},	/* scsi disk11 */
18923dc5621SKip Macy 		{132,	4,	192,	"da"},	/* scsi disk12 */
19023dc5621SKip Macy 		{133,	4,	208,	"da"},	/* scsi disk13 */
19123dc5621SKip Macy 		{134,	4,	224,	"da"},	/* scsi disk14 */
19223dc5621SKip Macy 		{135,	4,	240,	"da"},	/* scsi disk15 */
19323dc5621SKip Macy 
19423dc5621SKip Macy 		{202,	4,	0,	"xbd"},	/* xbd */
19523dc5621SKip Macy 
19623dc5621SKip Macy 		{0,	0,	0,	NULL},
19723dc5621SKip Macy 	};
19823dc5621SKip Macy 	int major = vdevice >> 8;
19923dc5621SKip Macy 	int minor = vdevice & 0xff;
20023dc5621SKip Macy 	int i;
20123dc5621SKip Macy 
20223dc5621SKip Macy 	if (vdevice & (1 << 28)) {
20323dc5621SKip Macy 		*unit = (vdevice & ((1 << 28) - 1)) >> 8;
20423dc5621SKip Macy 		*name = "xbd";
20523dc5621SKip Macy 	}
20623dc5621SKip Macy 
20723dc5621SKip Macy 	for (i = 0; info[i].major; i++) {
20823dc5621SKip Macy 		if (info[i].major == major) {
20923dc5621SKip Macy 			*unit = info[i].base + (minor >> info[i].shift);
21023dc5621SKip Macy 			*name = info[i].name;
21123dc5621SKip Macy 			return;
21223dc5621SKip Macy 		}
21323dc5621SKip Macy 	}
21423dc5621SKip Macy 
21523dc5621SKip Macy 	*unit = minor >> 4;
21623dc5621SKip Macy 	*name = "xbd";
21723dc5621SKip Macy }
21823dc5621SKip Macy 
21989e0f4d2SKip Macy int
22023dc5621SKip Macy xlvbd_add(device_t dev, blkif_sector_t capacity,
22123dc5621SKip Macy     int vdevice, uint16_t vdisk_info, uint16_t sector_size,
22289e0f4d2SKip Macy     struct blkfront_info *info)
22389e0f4d2SKip Macy {
22489e0f4d2SKip Macy 	struct xb_softc	*sc;
22523dc5621SKip Macy 	int	unit, error = 0;
22623dc5621SKip Macy 	const char *name;
22723dc5621SKip Macy 
22823dc5621SKip Macy 	blkfront_vdevice_to_unit(vdevice, &unit, &name);
22989e0f4d2SKip Macy 
23089e0f4d2SKip Macy 	sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
23123dc5621SKip Macy 	sc->xb_unit = unit;
23289e0f4d2SKip Macy 	sc->xb_info = info;
23389e0f4d2SKip Macy 	info->sc = sc;
23489e0f4d2SKip Macy 
23523dc5621SKip Macy 	if (strcmp(name, "xbd"))
23623dc5621SKip Macy 		device_printf(dev, "attaching as %s%d\n", name, unit);
23723dc5621SKip Macy 
23889e0f4d2SKip Macy 	memset(&sc->xb_disk, 0, sizeof(sc->xb_disk));
23989e0f4d2SKip Macy 	sc->xb_disk = disk_alloc();
2403a6d1fcfSKip Macy 	sc->xb_disk->d_unit = sc->xb_unit;
24189e0f4d2SKip Macy 	sc->xb_disk->d_open = blkif_open;
24289e0f4d2SKip Macy 	sc->xb_disk->d_close = blkif_close;
24389e0f4d2SKip Macy 	sc->xb_disk->d_ioctl = blkif_ioctl;
24489e0f4d2SKip Macy 	sc->xb_disk->d_strategy = xb_strategy;
2458698b76cSKip Macy 	sc->xb_disk->d_dump = xb_dump;
2463a6d1fcfSKip Macy 	sc->xb_disk->d_name = name;
24789e0f4d2SKip Macy 	sc->xb_disk->d_drv1 = sc;
24889e0f4d2SKip Macy 	sc->xb_disk->d_sectorsize = sector_size;
24989e0f4d2SKip Macy 
25089e0f4d2SKip Macy 	/* XXX */
25189e0f4d2SKip Macy 	sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT;
25289e0f4d2SKip Macy #if 0
25389e0f4d2SKip Macy 	sc->xb_disk->d_maxsize = DFLTPHYS;
25489e0f4d2SKip Macy #else /* XXX: xen can't handle large single i/o requests */
25589e0f4d2SKip Macy 	sc->xb_disk->d_maxsize = 4096;
25689e0f4d2SKip Macy #endif
25789e0f4d2SKip Macy #ifdef notyet
25889e0f4d2SKip Macy 	XENPRINTF("attaching device 0x%x unit %d capacity %llu\n",
25989e0f4d2SKip Macy 		  xb_diskinfo[sc->xb_unit].device, sc->xb_unit,
26089e0f4d2SKip Macy 		  sc->xb_disk->d_mediasize);
26189e0f4d2SKip Macy #endif
26289e0f4d2SKip Macy 	sc->xb_disk->d_flags = 0;
26389e0f4d2SKip Macy 	disk_create(sc->xb_disk, DISK_VERSION_00);
26489e0f4d2SKip Macy 	bioq_init(&sc->xb_bioq);
26589e0f4d2SKip Macy 
26689e0f4d2SKip Macy 	return error;
26789e0f4d2SKip Macy }
26889e0f4d2SKip Macy 
26989e0f4d2SKip Macy void
27089e0f4d2SKip Macy xlvbd_del(struct blkfront_info *info)
27189e0f4d2SKip Macy {
27289e0f4d2SKip Macy 	struct xb_softc	*sc;
27389e0f4d2SKip Macy 
27489e0f4d2SKip Macy 	sc = info->sc;
27589e0f4d2SKip Macy 	disk_destroy(sc->xb_disk);
27689e0f4d2SKip Macy }
27789e0f4d2SKip Macy /************************ end VBD support *****************/
27889e0f4d2SKip Macy 
27989e0f4d2SKip Macy /*
28089e0f4d2SKip Macy  * Read/write routine for a buffer.  Finds the proper unit, place it on
28189e0f4d2SKip Macy  * the sortq and kick the controller.
28289e0f4d2SKip Macy  */
28389e0f4d2SKip Macy static void
28489e0f4d2SKip Macy xb_strategy(struct bio *bp)
28589e0f4d2SKip Macy {
28689e0f4d2SKip Macy 	struct xb_softc	*sc = (struct xb_softc *)bp->bio_disk->d_drv1;
28789e0f4d2SKip Macy 
28889e0f4d2SKip Macy 	/* bogus disk? */
28989e0f4d2SKip Macy 	if (sc == NULL) {
29089e0f4d2SKip Macy 		bp->bio_error = EINVAL;
29189e0f4d2SKip Macy 		bp->bio_flags |= BIO_ERROR;
29289e0f4d2SKip Macy 		goto bad;
29389e0f4d2SKip Macy 	}
29489e0f4d2SKip Macy 
29589e0f4d2SKip Macy 	DPRINTK("");
29689e0f4d2SKip Macy 
29789e0f4d2SKip Macy 	/*
29889e0f4d2SKip Macy 	 * Place it in the queue of disk activities for this disk
29989e0f4d2SKip Macy 	 */
30089e0f4d2SKip Macy 	mtx_lock(&blkif_io_lock);
30189e0f4d2SKip Macy 
3028698b76cSKip Macy 	bioq_disksort(&sc->xb_bioq, bp);
30389e0f4d2SKip Macy 	xb_startio(sc);
3048698b76cSKip Macy 
30589e0f4d2SKip Macy 	mtx_unlock(&blkif_io_lock);
30689e0f4d2SKip Macy 	return;
30789e0f4d2SKip Macy 
30889e0f4d2SKip Macy  bad:
30989e0f4d2SKip Macy 	/*
31089e0f4d2SKip Macy 	 * Correctly set the bio to indicate a failed tranfer.
31189e0f4d2SKip Macy 	 */
31289e0f4d2SKip Macy 	bp->bio_resid = bp->bio_bcount;
31389e0f4d2SKip Macy 	biodone(bp);
31489e0f4d2SKip Macy 	return;
31589e0f4d2SKip Macy }
31689e0f4d2SKip Macy 
3178698b76cSKip Macy static void xb_quiesce(struct blkfront_info *info);
3188698b76cSKip Macy // Quiesce the disk writes for a dump file before allowing the next buffer.
3198698b76cSKip Macy static void
3208698b76cSKip Macy xb_quiesce(struct blkfront_info *info)
3218698b76cSKip Macy {
3228698b76cSKip Macy 	int		mtd;
3238698b76cSKip Macy 
3248698b76cSKip Macy 	// While there are outstanding requests
3258698b76cSKip Macy 	while (blkif_queued_requests) {
3268698b76cSKip Macy 		RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, mtd);
3278698b76cSKip Macy 		if (mtd) {
3288698b76cSKip Macy 			// Recieved request completions, update queue.
3298698b76cSKip Macy 			blkif_int(info);
3308698b76cSKip Macy 		}
3318698b76cSKip Macy 		if (blkif_queued_requests) {
3328698b76cSKip Macy 			// Still pending requests, wait for the disk i/o to complete
333be7747b4SKip Macy 			HYPERVISOR_yield();
3348698b76cSKip Macy 		}
3358698b76cSKip Macy 	}
3368698b76cSKip Macy }
3378698b76cSKip Macy 
3388698b76cSKip Macy // Some bio structures for dumping core
3398698b76cSKip Macy #define DUMP_BIO_NO 16				// 16 * 4KB = 64KB dump block
3408698b76cSKip Macy static	struct bio		xb_dump_bp[DUMP_BIO_NO];
3418698b76cSKip Macy 
3428698b76cSKip Macy // Kernel dump function for a paravirtualized disk device
3438698b76cSKip Macy static int
3448698b76cSKip Macy xb_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
3458698b76cSKip Macy         size_t length)
3468698b76cSKip Macy {
3478698b76cSKip Macy 			int				 sbp;
3488698b76cSKip Macy   			int			     mbp;
3498698b76cSKip Macy 			size_t			 chunk;
3508698b76cSKip Macy 	struct	disk   			*dp = arg;
3518698b76cSKip Macy 	struct	xb_softc		*sc = (struct xb_softc *) dp->d_drv1;
3528698b76cSKip Macy 	        int	    		 rc = 0;
3538698b76cSKip Macy 
3548698b76cSKip Macy 	xb_quiesce(sc->xb_info);		// All quiet on the western front.
3558698b76cSKip Macy 	if (length > 0) {
3568698b76cSKip Macy 		// If this lock is held, then this module is failing, and a successful
3578698b76cSKip Macy 		// kernel dump is highly unlikely anyway.
3588698b76cSKip Macy 		mtx_lock(&blkif_io_lock);
3598698b76cSKip Macy 		// Split the 64KB block into 16 4KB blocks
3608698b76cSKip Macy 		for (sbp=0; length>0 && sbp<DUMP_BIO_NO; sbp++) {
3618698b76cSKip Macy 			chunk = length > PAGE_SIZE ? PAGE_SIZE : length;
3628698b76cSKip Macy 			xb_dump_bp[sbp].bio_disk   = dp;
3638698b76cSKip Macy 			xb_dump_bp[sbp].bio_pblkno = offset / dp->d_sectorsize;
3648698b76cSKip Macy 			xb_dump_bp[sbp].bio_bcount = chunk;
3658698b76cSKip Macy 			xb_dump_bp[sbp].bio_resid  = chunk;
3668698b76cSKip Macy 			xb_dump_bp[sbp].bio_data   = virtual;
3678698b76cSKip Macy 			xb_dump_bp[sbp].bio_cmd    = BIO_WRITE;
3688698b76cSKip Macy 			xb_dump_bp[sbp].bio_done   = NULL;
3698698b76cSKip Macy 
3708698b76cSKip Macy 			bioq_disksort(&sc->xb_bioq, &xb_dump_bp[sbp]);
3718698b76cSKip Macy 
3728698b76cSKip Macy 			length -= chunk;
3738698b76cSKip Macy 			offset += chunk;
3748698b76cSKip Macy 			virtual = (char *) virtual + chunk;
3758698b76cSKip Macy 		}
3768698b76cSKip Macy 		// Tell DOM0 to do the I/O
3778698b76cSKip Macy 		xb_startio(sc);
3788698b76cSKip Macy 		mtx_unlock(&blkif_io_lock);
3798698b76cSKip Macy 
3808698b76cSKip Macy 		// Must wait for the completion: the dump routine reuses the same
3818698b76cSKip Macy 		//                               16 x 4KB buffer space.
3828698b76cSKip Macy 		xb_quiesce(sc->xb_info);	// All quite on the eastern front
3838698b76cSKip Macy 		// If there were any errors, bail out...
3848698b76cSKip Macy 		for (mbp=0; mbp<sbp; mbp++) {
3858698b76cSKip Macy 			if ((rc = xb_dump_bp[mbp].bio_error)) break;
3868698b76cSKip Macy 		}
3878698b76cSKip Macy 	}
3888698b76cSKip Macy 	return (rc);
3898698b76cSKip Macy }
3908698b76cSKip Macy 
3918698b76cSKip Macy 
39223dc5621SKip Macy static int
39323dc5621SKip Macy blkfront_probe(device_t dev)
39489e0f4d2SKip Macy {
39523dc5621SKip Macy 
39623dc5621SKip Macy 	if (!strcmp(xenbus_get_type(dev), "vbd")) {
39723dc5621SKip Macy 		device_set_desc(dev, "Virtual Block Device");
39823dc5621SKip Macy 		device_quiet(dev);
39923dc5621SKip Macy 		return (0);
40023dc5621SKip Macy 	}
40123dc5621SKip Macy 
40223dc5621SKip Macy 	return (ENXIO);
40323dc5621SKip Macy }
40423dc5621SKip Macy 
40523dc5621SKip Macy /*
40623dc5621SKip Macy  * Setup supplies the backend dir, virtual device.  We place an event
40723dc5621SKip Macy  * channel and shared frame entries.  We watch backend to wait if it's
40823dc5621SKip Macy  * ok.
40923dc5621SKip Macy  */
41023dc5621SKip Macy static int
41123dc5621SKip Macy blkfront_attach(device_t dev)
41223dc5621SKip Macy {
41312678024SDoug Rabson 	int error, vdevice, i, unit;
41489e0f4d2SKip Macy 	struct blkfront_info *info;
41523dc5621SKip Macy 	const char *name;
41689e0f4d2SKip Macy 
41789e0f4d2SKip Macy 	/* FIXME: Use dynamic device id if this is not set. */
41812678024SDoug Rabson 	error = xenbus_scanf(XBT_NIL, xenbus_get_node(dev),
4193a6d1fcfSKip Macy 	    "virtual-device", NULL, "%i", &vdevice);
42012678024SDoug Rabson 	if (error) {
42112678024SDoug Rabson 		xenbus_dev_fatal(dev, error, "reading virtual-device");
4225d254c04SKip Macy 		printf("couldn't find virtual device");
42312678024SDoug Rabson 		return (error);
42489e0f4d2SKip Macy 	}
42589e0f4d2SKip Macy 
42623dc5621SKip Macy 	blkfront_vdevice_to_unit(vdevice, &unit, &name);
42723dc5621SKip Macy 	if (!strcmp(name, "xbd"))
42823dc5621SKip Macy 		device_set_unit(dev, unit);
42923dc5621SKip Macy 
43023dc5621SKip Macy 	info = device_get_softc(dev);
43189e0f4d2SKip Macy 
43289e0f4d2SKip Macy 	/*
43389e0f4d2SKip Macy 	 * XXX debug only
43489e0f4d2SKip Macy 	 */
43589e0f4d2SKip Macy 	for (i = 0; i < sizeof(*info); i++)
43689e0f4d2SKip Macy 			if (((uint8_t *)info)[i] != 0)
43789e0f4d2SKip Macy 					panic("non-null memory");
43889e0f4d2SKip Macy 
43989e0f4d2SKip Macy 	info->shadow_free = 0;
44089e0f4d2SKip Macy 	info->xbdev = dev;
44189e0f4d2SKip Macy 	info->vdevice = vdevice;
44289e0f4d2SKip Macy 	info->connected = BLKIF_STATE_DISCONNECTED;
44389e0f4d2SKip Macy 
44489e0f4d2SKip Macy 	/* work queue needed ? */
44589e0f4d2SKip Macy 	for (i = 0; i < BLK_RING_SIZE; i++)
44689e0f4d2SKip Macy 		info->shadow[i].req.id = i+1;
44789e0f4d2SKip Macy 	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
44889e0f4d2SKip Macy 
44989e0f4d2SKip Macy 	/* Front end dir is a number, which is used as the id. */
45023dc5621SKip Macy 	info->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0);
45189e0f4d2SKip Macy 
45212678024SDoug Rabson 	error = talk_to_backend(dev, info);
45312678024SDoug Rabson 	if (error)
45412678024SDoug Rabson 		return (error);
45512678024SDoug Rabson 
45612678024SDoug Rabson 	return (0);
45712678024SDoug Rabson }
45812678024SDoug Rabson 
45912678024SDoug Rabson static int
46012678024SDoug Rabson blkfront_suspend(device_t dev)
46112678024SDoug Rabson {
46212678024SDoug Rabson 	struct blkfront_info *info = device_get_softc(dev);
46312678024SDoug Rabson 
46412678024SDoug Rabson 	/* Prevent new requests being issued until we fix things up. */
46512678024SDoug Rabson 	mtx_lock(&blkif_io_lock);
46612678024SDoug Rabson 	info->connected = BLKIF_STATE_SUSPENDED;
46712678024SDoug Rabson 	mtx_unlock(&blkif_io_lock);
46889e0f4d2SKip Macy 
46923dc5621SKip Macy 	return (0);
47089e0f4d2SKip Macy }
47189e0f4d2SKip Macy 
47223dc5621SKip Macy static int
47323dc5621SKip Macy blkfront_resume(device_t dev)
47489e0f4d2SKip Macy {
47523dc5621SKip Macy 	struct blkfront_info *info = device_get_softc(dev);
47689e0f4d2SKip Macy 	int err;
47789e0f4d2SKip Macy 
47812678024SDoug Rabson 	DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev));
47989e0f4d2SKip Macy 
48089e0f4d2SKip Macy 	blkif_free(info, 1);
48189e0f4d2SKip Macy 	err = talk_to_backend(dev, info);
4823a6d1fcfSKip Macy 	if (info->connected == BLKIF_STATE_SUSPENDED && !err)
48389e0f4d2SKip Macy 		blkif_recover(info);
48489e0f4d2SKip Macy 
48512678024SDoug Rabson 	return (err);
48689e0f4d2SKip Macy }
48789e0f4d2SKip Macy 
48889e0f4d2SKip Macy /* Common code used when first setting up, and when resuming. */
48923dc5621SKip Macy static int
49023dc5621SKip Macy talk_to_backend(device_t dev, struct blkfront_info *info)
49189e0f4d2SKip Macy {
49289e0f4d2SKip Macy 	const char *message = NULL;
49389e0f4d2SKip Macy 	struct xenbus_transaction xbt;
49489e0f4d2SKip Macy 	int err;
49589e0f4d2SKip Macy 
49689e0f4d2SKip Macy 	/* Create shared ring, alloc event channel. */
49789e0f4d2SKip Macy 	err = setup_blkring(dev, info);
49889e0f4d2SKip Macy 	if (err)
49989e0f4d2SKip Macy 		goto out;
50089e0f4d2SKip Macy 
50189e0f4d2SKip Macy  again:
50289e0f4d2SKip Macy 	err = xenbus_transaction_start(&xbt);
50389e0f4d2SKip Macy 	if (err) {
50489e0f4d2SKip Macy 		xenbus_dev_fatal(dev, err, "starting transaction");
50589e0f4d2SKip Macy 		goto destroy_blkring;
50689e0f4d2SKip Macy 	}
50789e0f4d2SKip Macy 
50823dc5621SKip Macy 	err = xenbus_printf(xbt, xenbus_get_node(dev),
50989e0f4d2SKip Macy 			    "ring-ref","%u", info->ring_ref);
51089e0f4d2SKip Macy 	if (err) {
51189e0f4d2SKip Macy 		message = "writing ring-ref";
51289e0f4d2SKip Macy 		goto abort_transaction;
51389e0f4d2SKip Macy 	}
51423dc5621SKip Macy 	err = xenbus_printf(xbt, xenbus_get_node(dev),
51589e0f4d2SKip Macy 		"event-channel", "%u", irq_to_evtchn_port(info->irq));
51689e0f4d2SKip Macy 	if (err) {
51789e0f4d2SKip Macy 		message = "writing event-channel";
51889e0f4d2SKip Macy 		goto abort_transaction;
51989e0f4d2SKip Macy 	}
52023dc5621SKip Macy 	err = xenbus_printf(xbt, xenbus_get_node(dev),
52123dc5621SKip Macy 		"protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
52223dc5621SKip Macy 	if (err) {
52323dc5621SKip Macy 		message = "writing protocol";
52423dc5621SKip Macy 		goto abort_transaction;
52523dc5621SKip Macy 	}
52612678024SDoug Rabson 
52789e0f4d2SKip Macy 	err = xenbus_transaction_end(xbt, 0);
52889e0f4d2SKip Macy 	if (err) {
5293a6d1fcfSKip Macy 		if (err == EAGAIN)
53089e0f4d2SKip Macy 			goto again;
53189e0f4d2SKip Macy 		xenbus_dev_fatal(dev, err, "completing transaction");
53289e0f4d2SKip Macy 		goto destroy_blkring;
53389e0f4d2SKip Macy 	}
53423dc5621SKip Macy 	xenbus_set_state(dev, XenbusStateInitialised);
53589e0f4d2SKip Macy 
53689e0f4d2SKip Macy 	return 0;
53789e0f4d2SKip Macy 
53889e0f4d2SKip Macy  abort_transaction:
53989e0f4d2SKip Macy 	xenbus_transaction_end(xbt, 1);
54089e0f4d2SKip Macy 	if (message)
54189e0f4d2SKip Macy 		xenbus_dev_fatal(dev, err, "%s", message);
54289e0f4d2SKip Macy  destroy_blkring:
54389e0f4d2SKip Macy 	blkif_free(info, 0);
54489e0f4d2SKip Macy  out:
54589e0f4d2SKip Macy 	return err;
54689e0f4d2SKip Macy }
54789e0f4d2SKip Macy 
54889e0f4d2SKip Macy static int
54923dc5621SKip Macy setup_blkring(device_t dev, struct blkfront_info *info)
55089e0f4d2SKip Macy {
55189e0f4d2SKip Macy 	blkif_sring_t *sring;
5523a6d1fcfSKip Macy 	int error;
55389e0f4d2SKip Macy 
55489e0f4d2SKip Macy 	info->ring_ref = GRANT_INVALID_REF;
55589e0f4d2SKip Macy 
55689e0f4d2SKip Macy 	sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
55789e0f4d2SKip Macy 	if (sring == NULL) {
55889e0f4d2SKip Macy 		xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring");
55989e0f4d2SKip Macy 		return ENOMEM;
56089e0f4d2SKip Macy 	}
56189e0f4d2SKip Macy 	SHARED_RING_INIT(sring);
56289e0f4d2SKip Macy 	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
56389e0f4d2SKip Macy 
56412678024SDoug Rabson 	error = xenbus_grant_ring(dev,
56512678024SDoug Rabson 	    (vtomach(info->ring.sring) >> PAGE_SHIFT), &info->ring_ref);
5663a6d1fcfSKip Macy 	if (error) {
56789e0f4d2SKip Macy 		free(sring, M_DEVBUF);
56889e0f4d2SKip Macy 		info->ring.sring = NULL;
56989e0f4d2SKip Macy 		goto fail;
57089e0f4d2SKip Macy 	}
57189e0f4d2SKip Macy 
5723a6d1fcfSKip Macy 	error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev),
57389e0f4d2SKip Macy 	    "xbd", (driver_intr_t *)blkif_int, info,
5743a6d1fcfSKip Macy 	    INTR_TYPE_BIO | INTR_MPSAFE, &info->irq);
5753a6d1fcfSKip Macy 	if (error) {
5763a6d1fcfSKip Macy 		xenbus_dev_fatal(dev, error,
57789e0f4d2SKip Macy 		    "bind_evtchn_to_irqhandler failed");
57889e0f4d2SKip Macy 		goto fail;
57989e0f4d2SKip Macy 	}
58089e0f4d2SKip Macy 
5813a6d1fcfSKip Macy 	return (0);
58289e0f4d2SKip Macy  fail:
58389e0f4d2SKip Macy 	blkif_free(info, 0);
5843a6d1fcfSKip Macy 	return (error);
58589e0f4d2SKip Macy }
58689e0f4d2SKip Macy 
58789e0f4d2SKip Macy 
58889e0f4d2SKip Macy /**
58989e0f4d2SKip Macy  * Callback received when the backend's state changes.
59089e0f4d2SKip Macy  */
591cfed3783SKip Macy static int
59223dc5621SKip Macy blkfront_backend_changed(device_t dev, XenbusState backend_state)
59389e0f4d2SKip Macy {
59423dc5621SKip Macy 	struct blkfront_info *info = device_get_softc(dev);
59589e0f4d2SKip Macy 
59612678024SDoug Rabson 	DPRINTK("backend_state=%d\n", backend_state);
59789e0f4d2SKip Macy 
59889e0f4d2SKip Macy 	switch (backend_state) {
59989e0f4d2SKip Macy 	case XenbusStateUnknown:
60089e0f4d2SKip Macy 	case XenbusStateInitialising:
60189e0f4d2SKip Macy 	case XenbusStateInitWait:
60289e0f4d2SKip Macy 	case XenbusStateInitialised:
60389e0f4d2SKip Macy 	case XenbusStateClosed:
604920ba15bSKip Macy 	case XenbusStateReconfigured:
605920ba15bSKip Macy 	case XenbusStateReconfiguring:
60689e0f4d2SKip Macy 		break;
60789e0f4d2SKip Macy 
60889e0f4d2SKip Macy 	case XenbusStateConnected:
60923dc5621SKip Macy 		connect(dev, info);
61089e0f4d2SKip Macy 		break;
61189e0f4d2SKip Macy 
61289e0f4d2SKip Macy 	case XenbusStateClosing:
61389e0f4d2SKip Macy 		if (info->users > 0)
61489e0f4d2SKip Macy 			xenbus_dev_error(dev, -EBUSY,
61589e0f4d2SKip Macy 					 "Device in use; refusing to close");
61689e0f4d2SKip Macy 		else
61789e0f4d2SKip Macy 			blkfront_closing(dev);
61889e0f4d2SKip Macy #ifdef notyet
61989e0f4d2SKip Macy 		bd = bdget(info->dev);
62089e0f4d2SKip Macy 		if (bd == NULL)
62189e0f4d2SKip Macy 			xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
62289e0f4d2SKip Macy 
62389e0f4d2SKip Macy 		down(&bd->bd_sem);
62489e0f4d2SKip Macy 		if (info->users > 0)
62589e0f4d2SKip Macy 			xenbus_dev_error(dev, -EBUSY,
62689e0f4d2SKip Macy 					 "Device in use; refusing to close");
62789e0f4d2SKip Macy 		else
62889e0f4d2SKip Macy 			blkfront_closing(dev);
62989e0f4d2SKip Macy 		up(&bd->bd_sem);
63089e0f4d2SKip Macy 		bdput(bd);
63189e0f4d2SKip Macy #endif
63289e0f4d2SKip Macy 	}
633cfed3783SKip Macy 
634cfed3783SKip Macy 	return (0);
63589e0f4d2SKip Macy }
63689e0f4d2SKip Macy 
63789e0f4d2SKip Macy /*
63889e0f4d2SKip Macy ** Invoked when the backend is finally 'ready' (and has told produced
63989e0f4d2SKip Macy ** the details about the physical device - #sectors, size, etc).
64089e0f4d2SKip Macy */
64189e0f4d2SKip Macy static void
64223dc5621SKip Macy connect(device_t dev, struct blkfront_info *info)
64389e0f4d2SKip Macy {
64489e0f4d2SKip Macy 	unsigned long sectors, sector_size;
64589e0f4d2SKip Macy 	unsigned int binfo;
64689e0f4d2SKip Macy 	int err;
64789e0f4d2SKip Macy 
64889e0f4d2SKip Macy         if( (info->connected == BLKIF_STATE_CONNECTED) ||
64989e0f4d2SKip Macy 	    (info->connected == BLKIF_STATE_SUSPENDED) )
65089e0f4d2SKip Macy 		return;
65189e0f4d2SKip Macy 
65223dc5621SKip Macy 	DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev));
65389e0f4d2SKip Macy 
65423dc5621SKip Macy 	err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev),
65589e0f4d2SKip Macy 			    "sectors", "%lu", &sectors,
65689e0f4d2SKip Macy 			    "info", "%u", &binfo,
65789e0f4d2SKip Macy 			    "sector-size", "%lu", &sector_size,
65889e0f4d2SKip Macy 			    NULL);
65989e0f4d2SKip Macy 	if (err) {
66023dc5621SKip Macy 		xenbus_dev_fatal(dev, err,
66189e0f4d2SKip Macy 		    "reading backend fields at %s",
66223dc5621SKip Macy 		    xenbus_get_otherend_path(dev));
66389e0f4d2SKip Macy 		return;
66489e0f4d2SKip Macy 	}
66523dc5621SKip Macy 	err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev),
66689e0f4d2SKip Macy 			    "feature-barrier", "%lu", &info->feature_barrier,
66789e0f4d2SKip Macy 			    NULL);
66889e0f4d2SKip Macy 	if (err)
66989e0f4d2SKip Macy 		info->feature_barrier = 0;
67089e0f4d2SKip Macy 
67123dc5621SKip Macy 	device_printf(dev, "%juMB <%s> at %s",
67223dc5621SKip Macy 	    (uintmax_t) sectors / (1048576 / sector_size),
67323dc5621SKip Macy 	    device_get_desc(dev),
67423dc5621SKip Macy 	    xenbus_get_node(dev));
67523dc5621SKip Macy 	bus_print_child_footer(device_get_parent(dev), dev);
67689e0f4d2SKip Macy 
67723dc5621SKip Macy 	xlvbd_add(dev, sectors, info->vdevice, binfo, sector_size, info);
67823dc5621SKip Macy 
67923dc5621SKip Macy 	(void)xenbus_set_state(dev, XenbusStateConnected);
68089e0f4d2SKip Macy 
68189e0f4d2SKip Macy 	/* Kick pending requests. */
68289e0f4d2SKip Macy 	mtx_lock(&blkif_io_lock);
68389e0f4d2SKip Macy 	info->connected = BLKIF_STATE_CONNECTED;
68489e0f4d2SKip Macy 	kick_pending_request_queues(info);
68589e0f4d2SKip Macy 	mtx_unlock(&blkif_io_lock);
6864615bad1SKip Macy 	info->is_ready = 1;
68789e0f4d2SKip Macy 
68889e0f4d2SKip Macy #if 0
68989e0f4d2SKip Macy 	add_disk(info->gd);
69089e0f4d2SKip Macy #endif
69189e0f4d2SKip Macy }
69289e0f4d2SKip Macy 
69389e0f4d2SKip Macy /**
69489e0f4d2SKip Macy  * Handle the change of state of the backend to Closing.  We must delete our
69589e0f4d2SKip Macy  * device-layer structures now, to ensure that writes are flushed through to
69689e0f4d2SKip Macy  * the backend.  Once is this done, we can switch to Closed in
69789e0f4d2SKip Macy  * acknowledgement.
69889e0f4d2SKip Macy  */
69923dc5621SKip Macy static void
70023dc5621SKip Macy blkfront_closing(device_t dev)
70189e0f4d2SKip Macy {
70223dc5621SKip Macy 	struct blkfront_info *info = device_get_softc(dev);
70389e0f4d2SKip Macy 
70423dc5621SKip Macy 	DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev));
70589e0f4d2SKip Macy 
70689e0f4d2SKip Macy 	if (info->mi) {
70789e0f4d2SKip Macy 		DPRINTK("Calling xlvbd_del\n");
70889e0f4d2SKip Macy 		xlvbd_del(info);
70989e0f4d2SKip Macy 		info->mi = NULL;
71089e0f4d2SKip Macy 	}
71189e0f4d2SKip Macy 
71223dc5621SKip Macy 	xenbus_set_state(dev, XenbusStateClosed);
71389e0f4d2SKip Macy }
71489e0f4d2SKip Macy 
71589e0f4d2SKip Macy 
71623dc5621SKip Macy static int
71723dc5621SKip Macy blkfront_detach(device_t dev)
71889e0f4d2SKip Macy {
71923dc5621SKip Macy 	struct blkfront_info *info = device_get_softc(dev);
72089e0f4d2SKip Macy 
72123dc5621SKip Macy 	DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev));
72289e0f4d2SKip Macy 
72389e0f4d2SKip Macy 	blkif_free(info, 0);
72489e0f4d2SKip Macy 
72589e0f4d2SKip Macy 	return 0;
72689e0f4d2SKip Macy }
72789e0f4d2SKip Macy 
72889e0f4d2SKip Macy 
72989e0f4d2SKip Macy static inline int
73089e0f4d2SKip Macy GET_ID_FROM_FREELIST(struct blkfront_info *info)
73189e0f4d2SKip Macy {
73289e0f4d2SKip Macy 	unsigned long nfree = info->shadow_free;
73389e0f4d2SKip Macy 
73489e0f4d2SKip Macy 	KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree));
73589e0f4d2SKip Macy 	info->shadow_free = info->shadow[nfree].req.id;
73689e0f4d2SKip Macy 	info->shadow[nfree].req.id = 0x0fffffee; /* debug */
7378698b76cSKip Macy 	atomic_add_int(&blkif_queued_requests, 1);
73889e0f4d2SKip Macy 	return nfree;
73989e0f4d2SKip Macy }
74089e0f4d2SKip Macy 
74189e0f4d2SKip Macy static inline void
74289e0f4d2SKip Macy ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id)
74389e0f4d2SKip Macy {
74489e0f4d2SKip Macy 	info->shadow[id].req.id  = info->shadow_free;
74589e0f4d2SKip Macy 	info->shadow[id].request = 0;
74689e0f4d2SKip Macy 	info->shadow_free = id;
7478698b76cSKip Macy 	atomic_subtract_int(&blkif_queued_requests, 1);
74889e0f4d2SKip Macy }
74989e0f4d2SKip Macy 
75089e0f4d2SKip Macy static inline void
75189e0f4d2SKip Macy flush_requests(struct blkfront_info *info)
75289e0f4d2SKip Macy {
75389e0f4d2SKip Macy 	int notify;
75489e0f4d2SKip Macy 
75589e0f4d2SKip Macy 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
75689e0f4d2SKip Macy 
75789e0f4d2SKip Macy 	if (notify)
75889e0f4d2SKip Macy 		notify_remote_via_irq(info->irq);
75989e0f4d2SKip Macy }
76089e0f4d2SKip Macy 
76189e0f4d2SKip Macy static void
76289e0f4d2SKip Macy kick_pending_request_queues(struct blkfront_info *info)
76389e0f4d2SKip Macy {
76489e0f4d2SKip Macy 	/* XXX check if we can't simplify */
76589e0f4d2SKip Macy #if 0
76689e0f4d2SKip Macy 	if (!RING_FULL(&info->ring)) {
76789e0f4d2SKip Macy 		/* Re-enable calldowns. */
76889e0f4d2SKip Macy 		blk_start_queue(info->rq);
76989e0f4d2SKip Macy 		/* Kick things off immediately. */
77089e0f4d2SKip Macy 		do_blkif_request(info->rq);
77189e0f4d2SKip Macy 	}
77289e0f4d2SKip Macy #endif
77389e0f4d2SKip Macy 	if (!RING_FULL(&info->ring)) {
77489e0f4d2SKip Macy #if 0
77589e0f4d2SKip Macy 		sc = LIST_FIRST(&xbsl_head);
77689e0f4d2SKip Macy 		LIST_REMOVE(sc, entry);
77789e0f4d2SKip Macy 		/* Re-enable calldowns. */
77889e0f4d2SKip Macy 		blk_start_queue(di->rq);
77989e0f4d2SKip Macy #endif
78089e0f4d2SKip Macy 		/* Kick things off immediately. */
78189e0f4d2SKip Macy 		xb_startio(info->sc);
78289e0f4d2SKip Macy 	}
78389e0f4d2SKip Macy }
78489e0f4d2SKip Macy 
78589e0f4d2SKip Macy #if 0
78689e0f4d2SKip Macy /* XXX */
78789e0f4d2SKip Macy static void blkif_restart_queue(void *arg)
78889e0f4d2SKip Macy {
78989e0f4d2SKip Macy 	struct blkfront_info *info = (struct blkfront_info *)arg;
79089e0f4d2SKip Macy 
79189e0f4d2SKip Macy 	mtx_lock(&blkif_io_lock);
79289e0f4d2SKip Macy 	kick_pending_request_queues(info);
79389e0f4d2SKip Macy 	mtx_unlock(&blkif_io_lock);
79489e0f4d2SKip Macy }
79589e0f4d2SKip Macy #endif
79689e0f4d2SKip Macy 
79789e0f4d2SKip Macy static void blkif_restart_queue_callback(void *arg)
79889e0f4d2SKip Macy {
79989e0f4d2SKip Macy #if 0
80089e0f4d2SKip Macy 	struct blkfront_info *info = (struct blkfront_info *)arg;
80189e0f4d2SKip Macy 	/* XXX BSD equiv ? */
80289e0f4d2SKip Macy 
80389e0f4d2SKip Macy 	schedule_work(&info->work);
80489e0f4d2SKip Macy #endif
80589e0f4d2SKip Macy }
80689e0f4d2SKip Macy 
80789e0f4d2SKip Macy static int
80889e0f4d2SKip Macy blkif_open(struct disk *dp)
80989e0f4d2SKip Macy {
81089e0f4d2SKip Macy 	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
81189e0f4d2SKip Macy 
81289e0f4d2SKip Macy 	if (sc == NULL) {
81312678024SDoug Rabson 		printf("xb%d: not found", sc->xb_unit);
81489e0f4d2SKip Macy 		return (ENXIO);
81589e0f4d2SKip Macy 	}
81689e0f4d2SKip Macy 
81789e0f4d2SKip Macy 	sc->xb_flags |= XB_OPEN;
81889e0f4d2SKip Macy 	sc->xb_info->users++;
81989e0f4d2SKip Macy 	return (0);
82089e0f4d2SKip Macy }
82189e0f4d2SKip Macy 
82289e0f4d2SKip Macy static int
82389e0f4d2SKip Macy blkif_close(struct disk *dp)
82489e0f4d2SKip Macy {
82589e0f4d2SKip Macy 	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
82689e0f4d2SKip Macy 
82789e0f4d2SKip Macy 	if (sc == NULL)
82889e0f4d2SKip Macy 		return (ENXIO);
82989e0f4d2SKip Macy 	sc->xb_flags &= ~XB_OPEN;
83089e0f4d2SKip Macy 	if (--(sc->xb_info->users) == 0) {
83189e0f4d2SKip Macy 		/* Check whether we have been instructed to close.  We will
83289e0f4d2SKip Macy 		   have ignored this request initially, as the device was
83389e0f4d2SKip Macy 		   still mounted. */
83423dc5621SKip Macy 		device_t dev = sc->xb_info->xbdev;
83523dc5621SKip Macy 		XenbusState state =
83623dc5621SKip Macy 			xenbus_read_driver_state(xenbus_get_otherend_path(dev));
83789e0f4d2SKip Macy 
83889e0f4d2SKip Macy 		if (state == XenbusStateClosing)
83989e0f4d2SKip Macy 			blkfront_closing(dev);
84089e0f4d2SKip Macy 	}
84189e0f4d2SKip Macy 	return (0);
84289e0f4d2SKip Macy }
84389e0f4d2SKip Macy 
84489e0f4d2SKip Macy static int
84589e0f4d2SKip Macy blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
84689e0f4d2SKip Macy {
84789e0f4d2SKip Macy 	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
84889e0f4d2SKip Macy 
84989e0f4d2SKip Macy 	if (sc == NULL)
85089e0f4d2SKip Macy 		return (ENXIO);
85189e0f4d2SKip Macy 
85289e0f4d2SKip Macy 	return (ENOTTY);
85389e0f4d2SKip Macy }
85489e0f4d2SKip Macy 
85589e0f4d2SKip Macy 
85689e0f4d2SKip Macy /*
85789e0f4d2SKip Macy  * blkif_queue_request
85889e0f4d2SKip Macy  *
85989e0f4d2SKip Macy  * request block io
86089e0f4d2SKip Macy  *
86189e0f4d2SKip Macy  * id: for guest use only.
86289e0f4d2SKip Macy  * operation: BLKIF_OP_{READ,WRITE,PROBE}
86389e0f4d2SKip Macy  * buffer: buffer to read/write into. this should be a
86489e0f4d2SKip Macy  *   virtual address in the guest os.
86589e0f4d2SKip Macy  */
86689e0f4d2SKip Macy static int blkif_queue_request(struct bio *bp)
86789e0f4d2SKip Macy {
86889e0f4d2SKip Macy 	caddr_t alignbuf;
8695d254c04SKip Macy 	vm_paddr_t buffer_ma;
87089e0f4d2SKip Macy 	blkif_request_t     *ring_req;
87189e0f4d2SKip Macy 	unsigned long id;
8725d254c04SKip Macy 	uint64_t fsect, lsect;
87389e0f4d2SKip Macy 	struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
87489e0f4d2SKip Macy 	struct blkfront_info *info = sc->xb_info;
87589e0f4d2SKip Macy 	int ref;
87689e0f4d2SKip Macy 
87789e0f4d2SKip Macy 	if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED))
87889e0f4d2SKip Macy 		return 1;
87989e0f4d2SKip Macy 
88089e0f4d2SKip Macy 	if (gnttab_alloc_grant_references(
88189e0f4d2SKip Macy 		    BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
88289e0f4d2SKip Macy 		gnttab_request_free_callback(
88389e0f4d2SKip Macy 			&info->callback,
88489e0f4d2SKip Macy 			blkif_restart_queue_callback,
88589e0f4d2SKip Macy 			info,
88689e0f4d2SKip Macy 			BLKIF_MAX_SEGMENTS_PER_REQUEST);
88789e0f4d2SKip Macy 		return 1;
88889e0f4d2SKip Macy 	}
88989e0f4d2SKip Macy 
89089e0f4d2SKip Macy 	/* Check if the buffer is properly aligned */
89189e0f4d2SKip Macy 	if ((vm_offset_t)bp->bio_data & PAGE_MASK) {
89289e0f4d2SKip Macy 		int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE :
89389e0f4d2SKip Macy 			PAGE_SIZE;
89489e0f4d2SKip Macy 		caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF,
89589e0f4d2SKip Macy 					M_NOWAIT);
89689e0f4d2SKip Macy 
89789e0f4d2SKip Macy 		alignbuf = (char *)roundup2((u_long)newbuf, align);
89889e0f4d2SKip Macy 
89989e0f4d2SKip Macy 		/* save a copy of the current buffer */
90089e0f4d2SKip Macy 		bp->bio_driver1 = newbuf;
90189e0f4d2SKip Macy 		bp->bio_driver2 = alignbuf;
90289e0f4d2SKip Macy 
90389e0f4d2SKip Macy 		/* Copy the data for a write */
90489e0f4d2SKip Macy 		if (bp->bio_cmd == BIO_WRITE)
90589e0f4d2SKip Macy 			bcopy(bp->bio_data, alignbuf, bp->bio_bcount);
90689e0f4d2SKip Macy 	} else
90789e0f4d2SKip Macy 		alignbuf = bp->bio_data;
90889e0f4d2SKip Macy 
90989e0f4d2SKip Macy 	/* Fill out a communications ring structure. */
91089e0f4d2SKip Macy 	ring_req 	         = RING_GET_REQUEST(&info->ring,
91189e0f4d2SKip Macy 						    info->ring.req_prod_pvt);
91289e0f4d2SKip Macy 	id		         = GET_ID_FROM_FREELIST(info);
91389e0f4d2SKip Macy 	info->shadow[id].request = (unsigned long)bp;
91489e0f4d2SKip Macy 
91589e0f4d2SKip Macy 	ring_req->id 	         = id;
91689e0f4d2SKip Macy 	ring_req->operation 	 = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
91789e0f4d2SKip Macy 		BLKIF_OP_WRITE;
91889e0f4d2SKip Macy 
91989e0f4d2SKip Macy 	ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno;
92089e0f4d2SKip Macy 	ring_req->handle 	  = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
92189e0f4d2SKip Macy 
92289e0f4d2SKip Macy 	ring_req->nr_segments  = 0;	/* XXX not doing scatter/gather since buffer
92389e0f4d2SKip Macy 					 * chaining is not supported.
92489e0f4d2SKip Macy 					 */
92589e0f4d2SKip Macy 
92689e0f4d2SKip Macy 	buffer_ma = vtomach(alignbuf);
92789e0f4d2SKip Macy 	fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
92889e0f4d2SKip Macy 	lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1;
92989e0f4d2SKip Macy 	/* install a grant reference. */
93089e0f4d2SKip Macy 	ref = gnttab_claim_grant_reference(&gref_head);
93189e0f4d2SKip Macy 	KASSERT( ref != -ENOSPC, ("grant_reference failed") );
93289e0f4d2SKip Macy 
93389e0f4d2SKip Macy 	gnttab_grant_foreign_access_ref(
93489e0f4d2SKip Macy 		ref,
93523dc5621SKip Macy 		xenbus_get_otherend_id(info->xbdev),
93689e0f4d2SKip Macy 		buffer_ma >> PAGE_SHIFT,
93789e0f4d2SKip Macy 		ring_req->operation & 1 ); /* ??? */
93889e0f4d2SKip Macy 	info->shadow[id].frame[ring_req->nr_segments] =
93989e0f4d2SKip Macy 		buffer_ma >> PAGE_SHIFT;
94089e0f4d2SKip Macy 
94189e0f4d2SKip Macy 	ring_req->seg[ring_req->nr_segments] =
94289e0f4d2SKip Macy 		(struct blkif_request_segment) {
94389e0f4d2SKip Macy 			.gref       = ref,
94489e0f4d2SKip Macy 			.first_sect = fsect,
94589e0f4d2SKip Macy 			.last_sect  = lsect };
94689e0f4d2SKip Macy 
94789e0f4d2SKip Macy 	ring_req->nr_segments++;
94889e0f4d2SKip Macy 	KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0,
94989e0f4d2SKip Macy 		("XEN buffer must be sector aligned"));
95089e0f4d2SKip Macy 	KASSERT(lsect <= 7,
95189e0f4d2SKip Macy 		("XEN disk driver data cannot cross a page boundary"));
95289e0f4d2SKip Macy 
95389e0f4d2SKip Macy 	buffer_ma &= ~PAGE_MASK;
95489e0f4d2SKip Macy 
95589e0f4d2SKip Macy 	info->ring.req_prod_pvt++;
95689e0f4d2SKip Macy 
95789e0f4d2SKip Macy 	/* Keep a private copy so we can reissue requests when recovering. */
95889e0f4d2SKip Macy 	info->shadow[id].req = *ring_req;
95989e0f4d2SKip Macy 
96089e0f4d2SKip Macy 	gnttab_free_grant_references(gref_head);
96189e0f4d2SKip Macy 
96289e0f4d2SKip Macy 	return 0;
96389e0f4d2SKip Macy }
96489e0f4d2SKip Macy 
96589e0f4d2SKip Macy 
96689e0f4d2SKip Macy 
96789e0f4d2SKip Macy /*
96889e0f4d2SKip Macy  * Dequeue buffers and place them in the shared communication ring.
96989e0f4d2SKip Macy  * Return when no more requests can be accepted or all buffers have
97089e0f4d2SKip Macy  * been queued.
97189e0f4d2SKip Macy  *
97289e0f4d2SKip Macy  * Signal XEN once the ring has been filled out.
97389e0f4d2SKip Macy  */
97489e0f4d2SKip Macy static void
97589e0f4d2SKip Macy xb_startio(struct xb_softc *sc)
97689e0f4d2SKip Macy {
97789e0f4d2SKip Macy 	struct bio		*bp;
97889e0f4d2SKip Macy 	int			queued = 0;
97989e0f4d2SKip Macy 	struct blkfront_info *info = sc->xb_info;
98089e0f4d2SKip Macy 	DPRINTK("");
98189e0f4d2SKip Macy 
98289e0f4d2SKip Macy 	mtx_assert(&blkif_io_lock, MA_OWNED);
98389e0f4d2SKip Macy 
98489e0f4d2SKip Macy 	while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) {
98589e0f4d2SKip Macy 
98689e0f4d2SKip Macy 		if (RING_FULL(&info->ring))
98789e0f4d2SKip Macy 			goto wait;
98889e0f4d2SKip Macy 
98989e0f4d2SKip Macy 		if (blkif_queue_request(bp)) {
99089e0f4d2SKip Macy 		wait:
99189e0f4d2SKip Macy 			bioq_insert_head(&sc->xb_bioq, bp);
99289e0f4d2SKip Macy 			break;
99389e0f4d2SKip Macy 		}
99489e0f4d2SKip Macy 		queued++;
99589e0f4d2SKip Macy 	}
99689e0f4d2SKip Macy 
99789e0f4d2SKip Macy 	if (queued != 0)
99889e0f4d2SKip Macy 		flush_requests(sc->xb_info);
99989e0f4d2SKip Macy }
100089e0f4d2SKip Macy 
100189e0f4d2SKip Macy static void
100289e0f4d2SKip Macy blkif_int(void *xsc)
100389e0f4d2SKip Macy {
100489e0f4d2SKip Macy 	struct xb_softc *sc = NULL;
100589e0f4d2SKip Macy 	struct bio *bp;
100689e0f4d2SKip Macy 	blkif_response_t *bret;
100789e0f4d2SKip Macy 	RING_IDX i, rp;
100889e0f4d2SKip Macy 	struct blkfront_info *info = xsc;
100989e0f4d2SKip Macy 	DPRINTK("");
101089e0f4d2SKip Macy 
101189e0f4d2SKip Macy 	TRACE_ENTER;
101289e0f4d2SKip Macy 
101389e0f4d2SKip Macy 	mtx_lock(&blkif_io_lock);
101489e0f4d2SKip Macy 
101589e0f4d2SKip Macy 	if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
101689e0f4d2SKip Macy 		mtx_unlock(&blkif_io_lock);
101789e0f4d2SKip Macy 		return;
101889e0f4d2SKip Macy 	}
101989e0f4d2SKip Macy 
102089e0f4d2SKip Macy  again:
102189e0f4d2SKip Macy 	rp = info->ring.sring->rsp_prod;
102289e0f4d2SKip Macy 	rmb(); /* Ensure we see queued responses up to 'rp'. */
102389e0f4d2SKip Macy 
102489e0f4d2SKip Macy 	for (i = info->ring.rsp_cons; i != rp; i++) {
102589e0f4d2SKip Macy 		unsigned long id;
102689e0f4d2SKip Macy 
102789e0f4d2SKip Macy 		bret = RING_GET_RESPONSE(&info->ring, i);
102889e0f4d2SKip Macy 		id   = bret->id;
102989e0f4d2SKip Macy 		bp   = (struct bio *)info->shadow[id].request;
103089e0f4d2SKip Macy 
103189e0f4d2SKip Macy 		blkif_completion(&info->shadow[id]);
103289e0f4d2SKip Macy 
103389e0f4d2SKip Macy 		ADD_ID_TO_FREELIST(info, id);
103489e0f4d2SKip Macy 
103589e0f4d2SKip Macy 		switch (bret->operation) {
103689e0f4d2SKip Macy 		case BLKIF_OP_READ:
103789e0f4d2SKip Macy 			/* had an unaligned buffer that needs to be copied */
103889e0f4d2SKip Macy 			if (bp->bio_driver1)
103989e0f4d2SKip Macy 				bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount);
104089e0f4d2SKip Macy 			/* FALLTHROUGH */
104189e0f4d2SKip Macy 		case BLKIF_OP_WRITE:
104289e0f4d2SKip Macy 
104389e0f4d2SKip Macy 			/* free the copy buffer */
104489e0f4d2SKip Macy 			if (bp->bio_driver1) {
104589e0f4d2SKip Macy 				free(bp->bio_driver1, M_DEVBUF);
104689e0f4d2SKip Macy 				bp->bio_driver1 = NULL;
104789e0f4d2SKip Macy 			}
104889e0f4d2SKip Macy 
104989e0f4d2SKip Macy 			if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) {
10505d254c04SKip Macy 					printf("Bad return from blkdev data request: %x\n",
105189e0f4d2SKip Macy 					  bret->status);
105289e0f4d2SKip Macy 				bp->bio_flags |= BIO_ERROR;
105389e0f4d2SKip Macy 			}
105489e0f4d2SKip Macy 
105589e0f4d2SKip Macy 			sc = (struct xb_softc *)bp->bio_disk->d_drv1;
105689e0f4d2SKip Macy 
105789e0f4d2SKip Macy 			if (bp->bio_flags & BIO_ERROR)
105889e0f4d2SKip Macy 				bp->bio_error = EIO;
105989e0f4d2SKip Macy 			else
106089e0f4d2SKip Macy 				bp->bio_resid = 0;
106189e0f4d2SKip Macy 
106289e0f4d2SKip Macy 			biodone(bp);
106389e0f4d2SKip Macy 			break;
106489e0f4d2SKip Macy 		default:
106589e0f4d2SKip Macy 			panic("received invalid operation");
106689e0f4d2SKip Macy 			break;
106789e0f4d2SKip Macy 		}
106889e0f4d2SKip Macy 	}
106989e0f4d2SKip Macy 
107089e0f4d2SKip Macy 	info->ring.rsp_cons = i;
107189e0f4d2SKip Macy 
107289e0f4d2SKip Macy 	if (i != info->ring.req_prod_pvt) {
107389e0f4d2SKip Macy 		int more_to_do;
107489e0f4d2SKip Macy 		RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
107589e0f4d2SKip Macy 		if (more_to_do)
107689e0f4d2SKip Macy 			goto again;
107789e0f4d2SKip Macy 	} else {
107889e0f4d2SKip Macy 		info->ring.sring->rsp_event = i + 1;
107989e0f4d2SKip Macy 	}
108089e0f4d2SKip Macy 
108189e0f4d2SKip Macy 	kick_pending_request_queues(info);
108289e0f4d2SKip Macy 
108389e0f4d2SKip Macy 	mtx_unlock(&blkif_io_lock);
108489e0f4d2SKip Macy }
108589e0f4d2SKip Macy 
108689e0f4d2SKip Macy static void
108789e0f4d2SKip Macy blkif_free(struct blkfront_info *info, int suspend)
108889e0f4d2SKip Macy {
108989e0f4d2SKip Macy 
109089e0f4d2SKip Macy /* Prevent new requests being issued until we fix things up. */
109189e0f4d2SKip Macy 	mtx_lock(&blkif_io_lock);
109289e0f4d2SKip Macy 	info->connected = suspend ?
109389e0f4d2SKip Macy 		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
109489e0f4d2SKip Macy 	mtx_unlock(&blkif_io_lock);
109589e0f4d2SKip Macy 
109689e0f4d2SKip Macy 	/* Free resources associated with old device channel. */
109789e0f4d2SKip Macy 	if (info->ring_ref != GRANT_INVALID_REF) {
1098920ba15bSKip Macy 		gnttab_end_foreign_access(info->ring_ref,
109989e0f4d2SKip Macy 					  info->ring.sring);
110089e0f4d2SKip Macy 		info->ring_ref = GRANT_INVALID_REF;
110189e0f4d2SKip Macy 		info->ring.sring = NULL;
110289e0f4d2SKip Macy 	}
110389e0f4d2SKip Macy 	if (info->irq)
11043a6d1fcfSKip Macy 		unbind_from_irqhandler(info->irq);
110589e0f4d2SKip Macy 	info->irq = 0;
110689e0f4d2SKip Macy 
110789e0f4d2SKip Macy }
110889e0f4d2SKip Macy 
110989e0f4d2SKip Macy static void
111089e0f4d2SKip Macy blkif_completion(struct blk_shadow *s)
111189e0f4d2SKip Macy {
111289e0f4d2SKip Macy 	int i;
111389e0f4d2SKip Macy 
111489e0f4d2SKip Macy 	for (i = 0; i < s->req.nr_segments; i++)
1115920ba15bSKip Macy 		gnttab_end_foreign_access(s->req.seg[i].gref, 0UL);
111689e0f4d2SKip Macy }
111789e0f4d2SKip Macy 
111889e0f4d2SKip Macy static void
111989e0f4d2SKip Macy blkif_recover(struct blkfront_info *info)
112089e0f4d2SKip Macy {
112189e0f4d2SKip Macy 	int i, j;
112289e0f4d2SKip Macy 	blkif_request_t *req;
112389e0f4d2SKip Macy 	struct blk_shadow *copy;
112489e0f4d2SKip Macy 
112512678024SDoug Rabson 	if (!info->sc)
112612678024SDoug Rabson 		return;
112712678024SDoug Rabson 
112889e0f4d2SKip Macy 	/* Stage 1: Make a safe copy of the shadow state. */
112989e0f4d2SKip Macy 	copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO);
113089e0f4d2SKip Macy 	memcpy(copy, info->shadow, sizeof(info->shadow));
113189e0f4d2SKip Macy 
113289e0f4d2SKip Macy 	/* Stage 2: Set up free list. */
113389e0f4d2SKip Macy 	memset(&info->shadow, 0, sizeof(info->shadow));
113489e0f4d2SKip Macy 	for (i = 0; i < BLK_RING_SIZE; i++)
113589e0f4d2SKip Macy 		info->shadow[i].req.id = i+1;
113689e0f4d2SKip Macy 	info->shadow_free = info->ring.req_prod_pvt;
113789e0f4d2SKip Macy 	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
113889e0f4d2SKip Macy 
113989e0f4d2SKip Macy 	/* Stage 3: Find pending requests and requeue them. */
114089e0f4d2SKip Macy 	for (i = 0; i < BLK_RING_SIZE; i++) {
114189e0f4d2SKip Macy 		/* Not in use? */
114289e0f4d2SKip Macy 		if (copy[i].request == 0)
114389e0f4d2SKip Macy 			continue;
114489e0f4d2SKip Macy 
114589e0f4d2SKip Macy 		/* Grab a request slot and copy shadow state into it. */
114689e0f4d2SKip Macy 		req = RING_GET_REQUEST(
114789e0f4d2SKip Macy 			&info->ring, info->ring.req_prod_pvt);
114889e0f4d2SKip Macy 		*req = copy[i].req;
114989e0f4d2SKip Macy 
115089e0f4d2SKip Macy 		/* We get a new request id, and must reset the shadow state. */
115189e0f4d2SKip Macy 		req->id = GET_ID_FROM_FREELIST(info);
115289e0f4d2SKip Macy 		memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
115389e0f4d2SKip Macy 
115489e0f4d2SKip Macy 		/* Rewrite any grant references invalidated by suspend/resume. */
115589e0f4d2SKip Macy 		for (j = 0; j < req->nr_segments; j++)
115689e0f4d2SKip Macy 			gnttab_grant_foreign_access_ref(
115789e0f4d2SKip Macy 				req->seg[j].gref,
115823dc5621SKip Macy 				xenbus_get_otherend_id(info->xbdev),
115989e0f4d2SKip Macy 				pfn_to_mfn(info->shadow[req->id].frame[j]),
116089e0f4d2SKip Macy 				0 /* assume not readonly */);
116189e0f4d2SKip Macy 
116289e0f4d2SKip Macy 		info->shadow[req->id].req = *req;
116389e0f4d2SKip Macy 
116489e0f4d2SKip Macy 		info->ring.req_prod_pvt++;
116589e0f4d2SKip Macy 	}
116689e0f4d2SKip Macy 
116789e0f4d2SKip Macy 	free(copy, M_DEVBUF);
116889e0f4d2SKip Macy 
116923dc5621SKip Macy 	xenbus_set_state(info->xbdev, XenbusStateConnected);
117089e0f4d2SKip Macy 
117189e0f4d2SKip Macy 	/* Now safe for us to use the shared ring */
117289e0f4d2SKip Macy 	mtx_lock(&blkif_io_lock);
117389e0f4d2SKip Macy 	info->connected = BLKIF_STATE_CONNECTED;
117489e0f4d2SKip Macy 	mtx_unlock(&blkif_io_lock);
117589e0f4d2SKip Macy 
117689e0f4d2SKip Macy 	/* Send off requeued requests */
117789e0f4d2SKip Macy 	mtx_lock(&blkif_io_lock);
117889e0f4d2SKip Macy 	flush_requests(info);
117989e0f4d2SKip Macy 
118089e0f4d2SKip Macy 	/* Kick any other new requests queued since we resumed */
118189e0f4d2SKip Macy 	kick_pending_request_queues(info);
118289e0f4d2SKip Macy 	mtx_unlock(&blkif_io_lock);
118389e0f4d2SKip Macy }
118489e0f4d2SKip Macy 
118523dc5621SKip Macy /* ** Driver registration ** */
118623dc5621SKip Macy static device_method_t blkfront_methods[] = {
118723dc5621SKip Macy 	/* Device interface */
118823dc5621SKip Macy 	DEVMETHOD(device_probe,         blkfront_probe),
118923dc5621SKip Macy 	DEVMETHOD(device_attach,        blkfront_attach),
119023dc5621SKip Macy 	DEVMETHOD(device_detach,        blkfront_detach),
119123dc5621SKip Macy 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
119212678024SDoug Rabson 	DEVMETHOD(device_suspend,       blkfront_suspend),
119323dc5621SKip Macy 	DEVMETHOD(device_resume,        blkfront_resume),
119489e0f4d2SKip Macy 
119523dc5621SKip Macy 	/* Xenbus interface */
119623dc5621SKip Macy 	DEVMETHOD(xenbus_backend_changed, blkfront_backend_changed),
119789e0f4d2SKip Macy 
119823dc5621SKip Macy 	{ 0, 0 }
119989e0f4d2SKip Macy };
120089e0f4d2SKip Macy 
120123dc5621SKip Macy static driver_t blkfront_driver = {
120223dc5621SKip Macy 	"xbd",
120323dc5621SKip Macy 	blkfront_methods,
120423dc5621SKip Macy 	sizeof(struct blkfront_info),
120589e0f4d2SKip Macy };
120623dc5621SKip Macy devclass_t blkfront_devclass;
120789e0f4d2SKip Macy 
120823dc5621SKip Macy DRIVER_MODULE(xbd, xenbus, blkfront_driver, blkfront_devclass, 0, 0);
120989e0f4d2SKip Macy 
121089e0f4d2SKip Macy MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */
121189e0f4d2SKip Macy 
1212