xref: /freebsd/sys/dev/xen/blkfront/blkfront.c (revision cdf5d66f2fa350cfc3b9bb57939a785cfb570027)
1 /*
2  * XenBSD block device driver
3  *
4  * Copyright (c) 2010-2013 Spectra Logic Corporation
5  * Copyright (c) 2009 Scott Long, Yahoo!
6  * Copyright (c) 2009 Frank Suchomel, Citrix
7  * Copyright (c) 2009 Doug F. Rabson, Citrix
8  * Copyright (c) 2005 Kip Macy
9  * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
10  * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
11  *
12  *
13  * Permission is hereby granted, free of charge, to any person obtaining a copy
14  * of this software and associated documentation files (the "Software"), to
15  * deal in the Software without restriction, including without limitation the
16  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
17  * sell copies of the Software, and to permit persons to whom the Software is
18  * furnished to do so, subject to the following conditions:
19  *
20  * The above copyright notice and this permission notice shall be included in
21  * all copies or substantial portions of the Software.
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/kernel.h>
38 #include <vm/vm.h>
39 #include <vm/pmap.h>
40 
41 #include <sys/bio.h>
42 #include <sys/bus.h>
43 #include <sys/conf.h>
44 #include <sys/module.h>
45 #include <sys/sysctl.h>
46 
47 #include <machine/bus.h>
48 #include <sys/rman.h>
49 #include <machine/resource.h>
50 #include <machine/intr_machdep.h>
51 #include <machine/vmparam.h>
52 #include <sys/bus_dma.h>
53 
54 #include <machine/_inttypes.h>
55 #include <machine/xen/xen-os.h>
56 #include <machine/xen/xenvar.h>
57 #include <machine/xen/xenfunc.h>
58 
59 #include <xen/hypervisor.h>
60 #include <xen/xen_intr.h>
61 #include <xen/evtchn.h>
62 #include <xen/gnttab.h>
63 #include <xen/interface/grant_table.h>
64 #include <xen/interface/io/protocols.h>
65 #include <xen/xenbus/xenbusvar.h>
66 
67 #include <geom/geom_disk.h>
68 
69 #include <dev/xen/blkfront/block.h>
70 
71 #include "xenbus_if.h"
72 
73 /*--------------------------- Forward Declarations ---------------------------*/
74 static void xbd_closing(device_t);
75 static void xbd_startio(struct xbd_softc *sc);
76 
77 /*---------------------------------- Macros ----------------------------------*/
78 #if 0
79 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
80 #else
81 #define DPRINTK(fmt, args...)
82 #endif
83 
84 /* XXX move to xbd_vbd.c when VBD update support is added */
85 #define MAX_VBDS 64
86 
87 #define XBD_SECTOR_SIZE		512	/* XXX: assume for now */
88 #define XBD_SECTOR_SHFT		9
89 
90 #define GRANT_INVALID_REF 0
91 
92 /* Control whether runtime update of vbds is enabled. */
93 #define ENABLE_VBD_UPDATE 0
94 
95 #if ENABLE_VBD_UPDATE
96 static void vbd_update(void);
97 #endif
98 
99 #define XBD_STATE_DISCONNECTED 0
100 #define XBD_STATE_CONNECTED    1
101 #define XBD_STATE_SUSPENDED    2
102 
103 /*---------------------------- Global Static Data ----------------------------*/
104 static MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data");
105 
106 /*---------------------------- Command Processing ----------------------------*/
107 static inline void
108 xbd_flush_requests(struct xbd_softc *sc)
109 {
110 	int notify;
111 
112 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->xbd_ring, notify);
113 
114 	if (notify)
115 		notify_remote_via_irq(sc->xbd_irq);
116 }
117 
118 static void
119 xbd_free_command(struct xbd_command *cm)
120 {
121 
122 	KASSERT((cm->cm_flags & XBD_ON_XBDQ_MASK) == 0,
123 	    ("Freeing command that is still on a queue\n"));
124 
125 	cm->cm_flags = 0;
126 	cm->cm_bp = NULL;
127 	cm->cm_complete = NULL;
128 	xbd_enqueue_free(cm);
129 }
130 
131 static void
132 xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
133 {
134 	struct xbd_softc *sc;
135 	struct xbd_command *cm;
136 	blkif_request_t	*ring_req;
137 	struct blkif_request_segment *sg;
138 	struct blkif_request_segment *last_block_sg;
139 	grant_ref_t *sg_ref;
140 	vm_paddr_t buffer_ma;
141 	uint64_t fsect, lsect;
142 	int ref;
143 	int op;
144 	int block_segs;
145 
146 	cm = arg;
147 	sc = cm->cm_sc;
148 
149 	if (error) {
150 		printf("error %d in xbd_queue_cb\n", error);
151 		cm->cm_bp->bio_error = EIO;
152 		biodone(cm->cm_bp);
153 		xbd_free_command(cm);
154 		return;
155 	}
156 
157 	/* Fill out a communications ring structure. */
158 	ring_req = RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt);
159 	sc->xbd_ring.req_prod_pvt++;
160 	ring_req->id = cm->cm_id;
161 	ring_req->operation = cm->cm_operation;
162 	ring_req->sector_number = cm->cm_sector_number;
163 	ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk;
164 	ring_req->nr_segments = nsegs;
165 	cm->cm_nseg = nsegs;
166 
167 	block_segs    = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK);
168 	sg            = ring_req->seg;
169 	last_block_sg = sg + block_segs;
170 	sg_ref        = cm->cm_sg_refs;
171 
172 	while (1) {
173 
174 		while (sg < last_block_sg) {
175 			buffer_ma = segs->ds_addr;
176 			fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
177 			lsect = fsect + (segs->ds_len  >> XBD_SECTOR_SHFT) - 1;
178 
179 			KASSERT(lsect <= 7, ("XEN disk driver data cannot "
180 			    "cross a page boundary"));
181 
182 			/* install a grant reference. */
183 			ref = gnttab_claim_grant_reference(&cm->cm_gref_head);
184 
185 			/*
186 			 * GNTTAB_LIST_END == 0xffffffff, but it is private
187 			 * to gnttab.c.
188 			 */
189 			KASSERT(ref != ~0, ("grant_reference failed"));
190 
191 			gnttab_grant_foreign_access_ref(
192 			    ref,
193 			    xenbus_get_otherend_id(sc->xbd_dev),
194 			    buffer_ma >> PAGE_SHIFT,
195 			    ring_req->operation == BLKIF_OP_WRITE);
196 
197 			*sg_ref = ref;
198 			*sg = (struct blkif_request_segment) {
199 				.gref       = ref,
200 				.first_sect = fsect,
201 				.last_sect  = lsect
202 			};
203 			sg++;
204 			sg_ref++;
205 			segs++;
206 			nsegs--;
207 		}
208 		block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK);
209 		if (block_segs == 0)
210 			break;
211 
212 		sg = BLKRING_GET_SEG_BLOCK(&sc->xbd_ring,
213 		    sc->xbd_ring.req_prod_pvt);
214 		sc->xbd_ring.req_prod_pvt++;
215 		last_block_sg = sg + block_segs;
216 	}
217 
218 	if (cm->cm_operation == BLKIF_OP_READ)
219 		op = BUS_DMASYNC_PREREAD;
220 	else if (cm->cm_operation == BLKIF_OP_WRITE)
221 		op = BUS_DMASYNC_PREWRITE;
222 	else
223 		op = 0;
224 	bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op);
225 
226 	gnttab_free_grant_references(cm->cm_gref_head);
227 
228 	xbd_enqueue_busy(cm);
229 
230 	/*
231 	 * This flag means that we're probably executing in the busdma swi
232 	 * instead of in the startio context, so an explicit flush is needed.
233 	 */
234 	if (cm->cm_flags & XBD_CMD_FROZEN)
235 		xbd_flush_requests(sc);
236 
237 	return;
238 }
239 
240 static int
241 xbd_queue_request(struct xbd_softc *sc, struct xbd_command *cm)
242 {
243 	int error;
244 
245 	error = bus_dmamap_load(sc->xbd_io_dmat, cm->cm_map, cm->cm_data,
246 	    cm->cm_datalen, xbd_queue_cb, cm, 0);
247 	if (error == EINPROGRESS) {
248 		printf("EINPROGRESS\n");
249 		sc->xbd_flags |= XBD_FROZEN;
250 		cm->cm_flags |= XBD_CMD_FROZEN;
251 		return (0);
252 	}
253 
254 	return (error);
255 }
256 
257 static void
258 xbd_restart_queue_callback(void *arg)
259 {
260 	struct xbd_softc *sc = arg;
261 
262 	mtx_lock(&sc->xbd_io_lock);
263 
264 	xbd_startio(sc);
265 
266 	mtx_unlock(&sc->xbd_io_lock);
267 }
268 
269 static struct xbd_command *
270 xbd_bio_command(struct xbd_softc *sc)
271 {
272 	struct xbd_command *cm;
273 	struct bio *bp;
274 
275 	if (unlikely(sc->xbd_connected != XBD_STATE_CONNECTED))
276 		return (NULL);
277 
278 	bp = xbd_dequeue_bio(sc);
279 	if (bp == NULL)
280 		return (NULL);
281 
282 	if ((cm = xbd_dequeue_free(sc)) == NULL) {
283 		xbd_requeue_bio(sc, bp);
284 		return (NULL);
285 	}
286 
287 	if (gnttab_alloc_grant_references(sc->xbd_max_request_segments,
288 	    &cm->cm_gref_head) != 0) {
289 		gnttab_request_free_callback(&sc->xbd_callback,
290 		    xbd_restart_queue_callback, sc,
291 		    sc->xbd_max_request_segments);
292 		xbd_requeue_bio(sc, bp);
293 		xbd_enqueue_free(cm);
294 		sc->xbd_flags |= XBD_FROZEN;
295 		return (NULL);
296 	}
297 
298 	cm->cm_bp = bp;
299 	cm->cm_data = bp->bio_data;
300 	cm->cm_datalen = bp->bio_bcount;
301 	cm->cm_operation = (bp->bio_cmd == BIO_READ) ?
302 	    BLKIF_OP_READ : BLKIF_OP_WRITE;
303 	cm->cm_sector_number = (blkif_sector_t)bp->bio_pblkno;
304 
305 	return (cm);
306 }
307 
308 /*
309  * Dequeue buffers and place them in the shared communication ring.
310  * Return when no more requests can be accepted or all buffers have
311  * been queued.
312  *
313  * Signal XEN once the ring has been filled out.
314  */
315 static void
316 xbd_startio(struct xbd_softc *sc)
317 {
318 	struct xbd_command *cm;
319 	int error, queued = 0;
320 
321 	mtx_assert(&sc->xbd_io_lock, MA_OWNED);
322 
323 	if (sc->xbd_connected != XBD_STATE_CONNECTED)
324 		return;
325 
326 	while (RING_FREE_REQUESTS(&sc->xbd_ring) >=
327 	    sc->xbd_max_request_blocks) {
328 		if (sc->xbd_flags & XBD_FROZEN)
329 			break;
330 
331 		cm = xbd_dequeue_ready(sc);
332 
333 		if (cm == NULL)
334 		    cm = xbd_bio_command(sc);
335 
336 		if (cm == NULL)
337 			break;
338 
339 		if ((error = xbd_queue_request(sc, cm)) != 0) {
340 			printf("xbd_queue_request returned %d\n", error);
341 			break;
342 		}
343 		queued++;
344 	}
345 
346 	if (queued != 0)
347 		xbd_flush_requests(sc);
348 }
349 
350 static void
351 xbd_bio_complete(struct xbd_softc *sc, struct xbd_command *cm)
352 {
353 	struct bio *bp;
354 
355 	bp = cm->cm_bp;
356 
357 	if (unlikely(cm->cm_status != BLKIF_RSP_OKAY)) {
358 		disk_err(bp, "disk error" , -1, 0);
359 		printf(" status: %x\n", cm->cm_status);
360 		bp->bio_flags |= BIO_ERROR;
361 	}
362 
363 	if (bp->bio_flags & BIO_ERROR)
364 		bp->bio_error = EIO;
365 	else
366 		bp->bio_resid = 0;
367 
368 	xbd_free_command(cm);
369 	biodone(bp);
370 }
371 
372 static int
373 xbd_completion(struct xbd_command *cm)
374 {
375 	gnttab_end_foreign_access_references(cm->cm_nseg, cm->cm_sg_refs);
376 	return (BLKIF_SEGS_TO_BLOCKS(cm->cm_nseg));
377 }
378 
379 static void
380 xbd_int(void *xsc)
381 {
382 	struct xbd_softc *sc = xsc;
383 	struct xbd_command *cm;
384 	blkif_response_t *bret;
385 	RING_IDX i, rp;
386 	int op;
387 
388 	mtx_lock(&sc->xbd_io_lock);
389 
390 	if (unlikely(sc->xbd_connected == XBD_STATE_DISCONNECTED)) {
391 		mtx_unlock(&sc->xbd_io_lock);
392 		return;
393 	}
394 
395  again:
396 	rp = sc->xbd_ring.sring->rsp_prod;
397 	rmb(); /* Ensure we see queued responses up to 'rp'. */
398 
399 	for (i = sc->xbd_ring.rsp_cons; i != rp;) {
400 		bret = RING_GET_RESPONSE(&sc->xbd_ring, i);
401 		cm   = &sc->xbd_shadow[bret->id];
402 
403 		xbd_remove_busy(cm);
404 		i += xbd_completion(cm);
405 
406 		if (cm->cm_operation == BLKIF_OP_READ)
407 			op = BUS_DMASYNC_POSTREAD;
408 		else if (cm->cm_operation == BLKIF_OP_WRITE)
409 			op = BUS_DMASYNC_POSTWRITE;
410 		else
411 			op = 0;
412 		bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op);
413 		bus_dmamap_unload(sc->xbd_io_dmat, cm->cm_map);
414 
415 		/*
416 		 * If commands are completing then resources are probably
417 		 * being freed as well.  It's a cheap assumption even when
418 		 * wrong.
419 		 */
420 		sc->xbd_flags &= ~XBD_FROZEN;
421 
422 		/*
423 		 * Directly call the i/o complete routine to save an
424 		 * an indirection in the common case.
425 		 */
426 		cm->cm_status = bret->status;
427 		if (cm->cm_bp)
428 			xbd_bio_complete(sc, cm);
429 		else if (cm->cm_complete != NULL)
430 			cm->cm_complete(cm);
431 		else
432 			xbd_free_command(cm);
433 	}
434 
435 	sc->xbd_ring.rsp_cons = i;
436 
437 	if (i != sc->xbd_ring.req_prod_pvt) {
438 		int more_to_do;
439 		RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, more_to_do);
440 		if (more_to_do)
441 			goto again;
442 	} else {
443 		sc->xbd_ring.sring->rsp_event = i + 1;
444 	}
445 
446 	xbd_startio(sc);
447 
448 	if (unlikely(sc->xbd_connected == XBD_STATE_SUSPENDED))
449 		wakeup(&sc->xbd_cm_busy);
450 
451 	mtx_unlock(&sc->xbd_io_lock);
452 }
453 
454 /*------------------------------- Dump Support -------------------------------*/
455 /**
456  * Quiesce the disk writes for a dump file before allowing the next buffer.
457  */
458 static void
459 xbd_quiesce(struct xbd_softc *sc)
460 {
461 	int mtd;
462 
463 	// While there are outstanding requests
464 	while (!TAILQ_EMPTY(&sc->xbd_cm_busy)) {
465 		RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, mtd);
466 		if (mtd) {
467 			/* Recieved request completions, update queue. */
468 			xbd_int(sc);
469 		}
470 		if (!TAILQ_EMPTY(&sc->xbd_cm_busy)) {
471 			/*
472 			 * Still pending requests, wait for the disk i/o
473 			 * to complete.
474 			 */
475 			HYPERVISOR_yield();
476 		}
477 	}
478 }
479 
480 /* Kernel dump function for a paravirtualized disk device */
481 static void
482 xbd_dump_complete(struct xbd_command *cm)
483 {
484 
485 	xbd_enqueue_complete(cm);
486 }
487 
488 static int
489 xbd_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
490     size_t length)
491 {
492 	struct disk *dp = arg;
493 	struct xbd_softc *sc = dp->d_drv1;
494 	struct xbd_command *cm;
495 	size_t chunk;
496 	int sbp;
497 	int rc = 0;
498 
499 	if (length <= 0)
500 		return (rc);
501 
502 	xbd_quiesce(sc);	/* All quiet on the western front. */
503 
504 	/*
505 	 * If this lock is held, then this module is failing, and a
506 	 * successful kernel dump is highly unlikely anyway.
507 	 */
508 	mtx_lock(&sc->xbd_io_lock);
509 
510 	/* Split the 64KB block as needed */
511 	for (sbp=0; length > 0; sbp++) {
512 		cm = xbd_dequeue_free(sc);
513 		if (cm == NULL) {
514 			mtx_unlock(&sc->xbd_io_lock);
515 			device_printf(sc->xbd_dev, "dump: no more commands?\n");
516 			return (EBUSY);
517 		}
518 
519 		if (gnttab_alloc_grant_references(sc->xbd_max_request_segments,
520 		    &cm->cm_gref_head) != 0) {
521 			xbd_free_command(cm);
522 			mtx_unlock(&sc->xbd_io_lock);
523 			device_printf(sc->xbd_dev, "no more grant allocs?\n");
524 			return (EBUSY);
525 		}
526 
527 		chunk = length > sc->xbd_max_request_size ?
528 		    sc->xbd_max_request_size : length;
529 		cm->cm_data = virtual;
530 		cm->cm_datalen = chunk;
531 		cm->cm_operation = BLKIF_OP_WRITE;
532 		cm->cm_sector_number = offset / dp->d_sectorsize;
533 		cm->cm_complete = xbd_dump_complete;
534 
535 		xbd_enqueue_ready(cm);
536 
537 		length -= chunk;
538 		offset += chunk;
539 		virtual = (char *) virtual + chunk;
540 	}
541 
542 	/* Tell DOM0 to do the I/O */
543 	xbd_startio(sc);
544 	mtx_unlock(&sc->xbd_io_lock);
545 
546 	/* Poll for the completion. */
547 	xbd_quiesce(sc);	/* All quite on the eastern front */
548 
549 	/* If there were any errors, bail out... */
550 	while ((cm = xbd_dequeue_complete(sc)) != NULL) {
551 		if (cm->cm_status != BLKIF_RSP_OKAY) {
552 			device_printf(sc->xbd_dev,
553 			    "Dump I/O failed at sector %jd\n",
554 			    cm->cm_sector_number);
555 			rc = EIO;
556 		}
557 		xbd_free_command(cm);
558 	}
559 
560 	return (rc);
561 }
562 
563 /*----------------------------- Disk Entrypoints -----------------------------*/
564 static int
565 xbd_open(struct disk *dp)
566 {
567 	struct xbd_softc *sc = dp->d_drv1;
568 
569 	if (sc == NULL) {
570 		printf("xb%d: not found", sc->xbd_unit);
571 		return (ENXIO);
572 	}
573 
574 	sc->xbd_flags |= XBD_OPEN;
575 	sc->xbd_users++;
576 	return (0);
577 }
578 
579 static int
580 xbd_close(struct disk *dp)
581 {
582 	struct xbd_softc *sc = dp->d_drv1;
583 
584 	if (sc == NULL)
585 		return (ENXIO);
586 	sc->xbd_flags &= ~XBD_OPEN;
587 	if (--(sc->xbd_users) == 0) {
588 		/*
589 		 * Check whether we have been instructed to close.  We will
590 		 * have ignored this request initially, as the device was
591 		 * still mounted.
592 		 */
593 		if (xenbus_get_otherend_state(sc->xbd_dev) ==
594 		    XenbusStateClosing)
595 			xbd_closing(sc->xbd_dev);
596 	}
597 	return (0);
598 }
599 
600 static int
601 xbd_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
602 {
603 	struct xbd_softc *sc = dp->d_drv1;
604 
605 	if (sc == NULL)
606 		return (ENXIO);
607 
608 	return (ENOTTY);
609 }
610 
611 /*
612  * Read/write routine for a buffer.  Finds the proper unit, place it on
613  * the sortq and kick the controller.
614  */
615 static void
616 xbd_strategy(struct bio *bp)
617 {
618 	struct xbd_softc *sc = bp->bio_disk->d_drv1;
619 
620 	/* bogus disk? */
621 	if (sc == NULL) {
622 		bp->bio_error = EINVAL;
623 		bp->bio_flags |= BIO_ERROR;
624 		bp->bio_resid = bp->bio_bcount;
625 		biodone(bp);
626 		return;
627 	}
628 
629 	/*
630 	 * Place it in the queue of disk activities for this disk
631 	 */
632 	mtx_lock(&sc->xbd_io_lock);
633 
634 	xbd_enqueue_bio(sc, bp);
635 	xbd_startio(sc);
636 
637 	mtx_unlock(&sc->xbd_io_lock);
638 	return;
639 }
640 
641 /*------------------------------ Ring Management -----------------------------*/
642 static int
643 xbd_alloc_ring(struct xbd_softc *sc)
644 {
645 	blkif_sring_t *sring;
646 	uintptr_t sring_page_addr;
647 	int error;
648 	int i;
649 
650 	sring = malloc(sc->xbd_ring_pages * PAGE_SIZE, M_XENBLOCKFRONT,
651 	    M_NOWAIT|M_ZERO);
652 	if (sring == NULL) {
653 		xenbus_dev_fatal(sc->xbd_dev, ENOMEM, "allocating shared ring");
654 		return (ENOMEM);
655 	}
656 	SHARED_RING_INIT(sring);
657 	FRONT_RING_INIT(&sc->xbd_ring, sring, sc->xbd_ring_pages * PAGE_SIZE);
658 
659 	for (i = 0, sring_page_addr = (uintptr_t)sring;
660 	     i < sc->xbd_ring_pages;
661 	     i++, sring_page_addr += PAGE_SIZE) {
662 
663 		error = xenbus_grant_ring(sc->xbd_dev,
664 		    (vtomach(sring_page_addr) >> PAGE_SHIFT),
665 		    &sc->xbd_ring_ref[i]);
666 		if (error) {
667 			xenbus_dev_fatal(sc->xbd_dev, error,
668 			    "granting ring_ref(%d)", i);
669 			return (error);
670 		}
671 	}
672 	if (sc->xbd_ring_pages == 1) {
673 		error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev),
674 		    "ring-ref", "%u", sc->xbd_ring_ref[0]);
675 		if (error) {
676 			xenbus_dev_fatal(sc->xbd_dev, error,
677 			    "writing %s/ring-ref",
678 			    xenbus_get_node(sc->xbd_dev));
679 			return (error);
680 		}
681 	} else {
682 		for (i = 0; i < sc->xbd_ring_pages; i++) {
683 			char ring_ref_name[]= "ring_refXX";
684 
685 			snprintf(ring_ref_name, sizeof(ring_ref_name),
686 			    "ring-ref%u", i);
687 			error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev),
688 			     ring_ref_name, "%u", sc->xbd_ring_ref[i]);
689 			if (error) {
690 				xenbus_dev_fatal(sc->xbd_dev, error,
691 				    "writing %s/%s",
692 				    xenbus_get_node(sc->xbd_dev),
693 				    ring_ref_name);
694 				return (error);
695 			}
696 		}
697 	}
698 
699 	error = bind_listening_port_to_irqhandler(
700 	    xenbus_get_otherend_id(sc->xbd_dev),
701 	    "xbd", (driver_intr_t *)xbd_int, sc,
702 	    INTR_TYPE_BIO | INTR_MPSAFE, &sc->xbd_irq);
703 	if (error) {
704 		xenbus_dev_fatal(sc->xbd_dev, error,
705 		    "bind_evtchn_to_irqhandler failed");
706 		return (error);
707 	}
708 
709 	return (0);
710 }
711 
712 /*-------------------------- Initialization/Teardown -------------------------*/
713 static void
714 xbd_setup_sysctl(struct xbd_softc *xbd)
715 {
716 	struct sysctl_ctx_list *sysctl_ctx = NULL;
717 	struct sysctl_oid *sysctl_tree = NULL;
718 
719 	sysctl_ctx = device_get_sysctl_ctx(xbd->xbd_dev);
720 	if (sysctl_ctx == NULL)
721 		return;
722 
723 	sysctl_tree = device_get_sysctl_tree(xbd->xbd_dev);
724 	if (sysctl_tree == NULL)
725 		return;
726 
727 	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
728 	    "max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1,
729 	    "maximum outstanding requests (negotiated)");
730 
731 	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
732 	    "max_request_segments", CTLFLAG_RD,
733 	    &xbd->xbd_max_request_segments, 0,
734 	    "maximum number of pages per requests (negotiated)");
735 
736 	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
737 	    "max_request_size", CTLFLAG_RD, &xbd->xbd_max_request_size, 0,
738 	    "maximum size in bytes of a request (negotiated)");
739 
740 	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
741 	    "ring_pages", CTLFLAG_RD, &xbd->xbd_ring_pages, 0,
742 	    "communication channel pages (negotiated)");
743 }
744 
745 /*
746  * Translate Linux major/minor to an appropriate name and unit
747  * number. For HVM guests, this allows us to use the same drive names
748  * with blkfront as the emulated drives, easing transition slightly.
749  */
750 static void
751 xbd_vdevice_to_unit(uint32_t vdevice, int *unit, const char **name)
752 {
753 	static struct vdev_info {
754 		int major;
755 		int shift;
756 		int base;
757 		const char *name;
758 	} info[] = {
759 		{3,	6,	0,	"ada"},	/* ide0 */
760 		{22,	6,	2,	"ada"},	/* ide1 */
761 		{33,	6,	4,	"ada"},	/* ide2 */
762 		{34,	6,	6,	"ada"},	/* ide3 */
763 		{56,	6,	8,	"ada"},	/* ide4 */
764 		{57,	6,	10,	"ada"},	/* ide5 */
765 		{88,	6,	12,	"ada"},	/* ide6 */
766 		{89,	6,	14,	"ada"},	/* ide7 */
767 		{90,	6,	16,	"ada"},	/* ide8 */
768 		{91,	6,	18,	"ada"},	/* ide9 */
769 
770 		{8,	4,	0,	"da"},	/* scsi disk0 */
771 		{65,	4,	16,	"da"},	/* scsi disk1 */
772 		{66,	4,	32,	"da"},	/* scsi disk2 */
773 		{67,	4,	48,	"da"},	/* scsi disk3 */
774 		{68,	4,	64,	"da"},	/* scsi disk4 */
775 		{69,	4,	80,	"da"},	/* scsi disk5 */
776 		{70,	4,	96,	"da"},	/* scsi disk6 */
777 		{71,	4,	112,	"da"},	/* scsi disk7 */
778 		{128,	4,	128,	"da"},	/* scsi disk8 */
779 		{129,	4,	144,	"da"},	/* scsi disk9 */
780 		{130,	4,	160,	"da"},	/* scsi disk10 */
781 		{131,	4,	176,	"da"},	/* scsi disk11 */
782 		{132,	4,	192,	"da"},	/* scsi disk12 */
783 		{133,	4,	208,	"da"},	/* scsi disk13 */
784 		{134,	4,	224,	"da"},	/* scsi disk14 */
785 		{135,	4,	240,	"da"},	/* scsi disk15 */
786 
787 		{202,	4,	0,	"xbd"},	/* xbd */
788 
789 		{0,	0,	0,	NULL},
790 	};
791 	int major = vdevice >> 8;
792 	int minor = vdevice & 0xff;
793 	int i;
794 
795 	if (vdevice & (1 << 28)) {
796 		*unit = (vdevice & ((1 << 28) - 1)) >> 8;
797 		*name = "xbd";
798 		return;
799 	}
800 
801 	for (i = 0; info[i].major; i++) {
802 		if (info[i].major == major) {
803 			*unit = info[i].base + (minor >> info[i].shift);
804 			*name = info[i].name;
805 			return;
806 		}
807 	}
808 
809 	*unit = minor >> 4;
810 	*name = "xbd";
811 }
812 
813 int
814 xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors,
815     int vdevice, uint16_t vdisk_info, unsigned long sector_size)
816 {
817 	int unit, error = 0;
818 	const char *name;
819 
820 	xbd_vdevice_to_unit(vdevice, &unit, &name);
821 
822 	sc->xbd_unit = unit;
823 
824 	if (strcmp(name, "xbd"))
825 		device_printf(sc->xbd_dev, "attaching as %s%d\n", name, unit);
826 
827 	sc->xbd_disk = disk_alloc();
828 	sc->xbd_disk->d_unit = sc->xbd_unit;
829 	sc->xbd_disk->d_open = xbd_open;
830 	sc->xbd_disk->d_close = xbd_close;
831 	sc->xbd_disk->d_ioctl = xbd_ioctl;
832 	sc->xbd_disk->d_strategy = xbd_strategy;
833 	sc->xbd_disk->d_dump = xbd_dump;
834 	sc->xbd_disk->d_name = name;
835 	sc->xbd_disk->d_drv1 = sc;
836 	sc->xbd_disk->d_sectorsize = sector_size;
837 
838 	sc->xbd_disk->d_mediasize = sectors * sector_size;
839 	sc->xbd_disk->d_maxsize = sc->xbd_max_request_size;
840 	sc->xbd_disk->d_flags = 0;
841 	disk_create(sc->xbd_disk, DISK_VERSION);
842 
843 	return error;
844 }
845 
846 static void
847 xbd_free(struct xbd_softc *sc)
848 {
849 	uint8_t *sring_page_ptr;
850 	int i;
851 
852 	/* Prevent new requests being issued until we fix things up. */
853 	mtx_lock(&sc->xbd_io_lock);
854 	sc->xbd_connected = XBD_STATE_DISCONNECTED;
855 	mtx_unlock(&sc->xbd_io_lock);
856 
857 	/* Free resources associated with old device channel. */
858 	if (sc->xbd_ring.sring != NULL) {
859 		sring_page_ptr = (uint8_t *)sc->xbd_ring.sring;
860 		for (i = 0; i < sc->xbd_ring_pages; i++) {
861 			grant_ref_t *ref;
862 
863 			ref = &sc->xbd_ring_ref[i];
864 			if (*ref != GRANT_INVALID_REF) {
865 				gnttab_end_foreign_access_ref(*ref);
866 				*ref = GRANT_INVALID_REF;
867 			}
868 			sring_page_ptr += PAGE_SIZE;
869 		}
870 		free(sc->xbd_ring.sring, M_XENBLOCKFRONT);
871 		sc->xbd_ring.sring = NULL;
872 	}
873 
874 	if (sc->xbd_shadow) {
875 
876 		for (i = 0; i < sc->xbd_max_requests; i++) {
877 			struct xbd_command *cm;
878 
879 			cm = &sc->xbd_shadow[i];
880 			if (cm->cm_sg_refs != NULL) {
881 				free(cm->cm_sg_refs, M_XENBLOCKFRONT);
882 				cm->cm_sg_refs = NULL;
883 			}
884 
885 			bus_dmamap_destroy(sc->xbd_io_dmat, cm->cm_map);
886 		}
887 		free(sc->xbd_shadow, M_XENBLOCKFRONT);
888 		sc->xbd_shadow = NULL;
889 
890 		bus_dma_tag_destroy(sc->xbd_io_dmat);
891 
892 		xbd_initq_free(sc);
893 		xbd_initq_ready(sc);
894 		xbd_initq_complete(sc);
895 	}
896 
897 	if (sc->xbd_irq) {
898 		unbind_from_irqhandler(sc->xbd_irq);
899 		sc->xbd_irq = 0;
900 	}
901 }
902 
903 /*--------------------------- State Change Handlers --------------------------*/
904 static void
905 xbd_initialize(struct xbd_softc *sc)
906 {
907 	const char *otherend_path;
908 	const char *node_path;
909 	uint32_t max_ring_page_order;
910 	int error;
911 	int i;
912 
913 	if (xenbus_get_state(sc->xbd_dev) != XenbusStateInitialising) {
914 		/* Initialization has already been performed. */
915 		return;
916 	}
917 
918 	/*
919 	 * Protocol defaults valid even if negotiation for a
920 	 * setting fails.
921 	 */
922 	max_ring_page_order = 0;
923 	sc->xbd_ring_pages = 1;
924 	sc->xbd_max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK;
925 	sc->xbd_max_request_size =
926 	    XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments);
927 	sc->xbd_max_request_blocks =
928 	    BLKIF_SEGS_TO_BLOCKS(sc->xbd_max_request_segments);
929 
930 	/*
931 	 * Protocol negotiation.
932 	 *
933 	 * \note xs_gather() returns on the first encountered error, so
934 	 *       we must use independant calls in order to guarantee
935 	 *       we don't miss information in a sparsly populated back-end
936 	 *       tree.
937 	 *
938 	 * \note xs_scanf() does not update variables for unmatched
939 	 *	 fields.
940 	 */
941 	otherend_path = xenbus_get_otherend_path(sc->xbd_dev);
942 	node_path = xenbus_get_node(sc->xbd_dev);
943 
944 	/* Support both backend schemes for relaying ring page limits. */
945 	(void)xs_scanf(XST_NIL, otherend_path,
946 	    "max-ring-page-order", NULL, "%" PRIu32,
947 	    &max_ring_page_order);
948 	sc->xbd_ring_pages = 1 << max_ring_page_order;
949 	(void)xs_scanf(XST_NIL, otherend_path,
950 	    "max-ring-pages", NULL, "%" PRIu32,
951 	    &sc->xbd_ring_pages);
952 	if (sc->xbd_ring_pages < 1)
953 		sc->xbd_ring_pages = 1;
954 
955 	sc->xbd_max_requests =
956 	    BLKIF_MAX_RING_REQUESTS(sc->xbd_ring_pages * PAGE_SIZE);
957 	(void)xs_scanf(XST_NIL, otherend_path,
958 	    "max-requests", NULL, "%" PRIu32,
959 	    &sc->xbd_max_requests);
960 
961 	(void)xs_scanf(XST_NIL, otherend_path,
962 	    "max-request-segments", NULL, "%" PRIu32,
963 	    &sc->xbd_max_request_segments);
964 
965 	(void)xs_scanf(XST_NIL, otherend_path,
966 	    "max-request-size", NULL, "%" PRIu32,
967 	    &sc->xbd_max_request_size);
968 
969 	if (sc->xbd_ring_pages > XBD_MAX_RING_PAGES) {
970 		device_printf(sc->xbd_dev,
971 		    "Back-end specified ring-pages of %u "
972 		    "limited to front-end limit of %zu.\n",
973 		    sc->xbd_ring_pages, XBD_MAX_RING_PAGES);
974 		sc->xbd_ring_pages = XBD_MAX_RING_PAGES;
975 	}
976 
977 	if (powerof2(sc->xbd_ring_pages) == 0) {
978 		uint32_t new_page_limit;
979 
980 		new_page_limit = 0x01 << (fls(sc->xbd_ring_pages) - 1);
981 		device_printf(sc->xbd_dev,
982 		    "Back-end specified ring-pages of %u "
983 		    "is not a power of 2. Limited to %u.\n",
984 		    sc->xbd_ring_pages, new_page_limit);
985 		sc->xbd_ring_pages = new_page_limit;
986 	}
987 
988 	if (sc->xbd_max_requests > XBD_MAX_REQUESTS) {
989 		device_printf(sc->xbd_dev,
990 		    "Back-end specified max_requests of %u "
991 		    "limited to front-end limit of %u.\n",
992 		    sc->xbd_max_requests, XBD_MAX_REQUESTS);
993 		sc->xbd_max_requests = XBD_MAX_REQUESTS;
994 	}
995 
996 	if (sc->xbd_max_request_segments > XBD_MAX_SEGMENTS_PER_REQUEST) {
997 		device_printf(sc->xbd_dev,
998 		    "Back-end specified max_request_segments of %u "
999 		    "limited to front-end limit of %u.\n",
1000 		    sc->xbd_max_request_segments,
1001 		    XBD_MAX_SEGMENTS_PER_REQUEST);
1002 		sc->xbd_max_request_segments = XBD_MAX_SEGMENTS_PER_REQUEST;
1003 	}
1004 
1005 	if (sc->xbd_max_request_size > XBD_MAX_REQUEST_SIZE) {
1006 		device_printf(sc->xbd_dev,
1007 		    "Back-end specified max_request_size of %u "
1008 		    "limited to front-end limit of %u.\n",
1009 		    sc->xbd_max_request_size,
1010 		    XBD_MAX_REQUEST_SIZE);
1011 		sc->xbd_max_request_size = XBD_MAX_REQUEST_SIZE;
1012 	}
1013 
1014  	if (sc->xbd_max_request_size >
1015 	    XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments)) {
1016  		device_printf(sc->xbd_dev,
1017 		    "Back-end specified max_request_size of %u "
1018 		    "limited to front-end limit of %u.  (Too few segments.)\n",
1019 		    sc->xbd_max_request_size,
1020 		    XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments));
1021  		sc->xbd_max_request_size =
1022  		    XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments);
1023  	}
1024 
1025 	sc->xbd_max_request_blocks =
1026 	    BLKIF_SEGS_TO_BLOCKS(sc->xbd_max_request_segments);
1027 
1028 	/* Allocate datastructures based on negotiated values. */
1029 	error = bus_dma_tag_create(
1030 	    bus_get_dma_tag(sc->xbd_dev),	/* parent */
1031 	    512, PAGE_SIZE,			/* algnmnt, boundary */
1032 	    BUS_SPACE_MAXADDR,			/* lowaddr */
1033 	    BUS_SPACE_MAXADDR,			/* highaddr */
1034 	    NULL, NULL,				/* filter, filterarg */
1035 	    sc->xbd_max_request_size,
1036 	    sc->xbd_max_request_segments,
1037 	    PAGE_SIZE,				/* maxsegsize */
1038 	    BUS_DMA_ALLOCNOW,			/* flags */
1039 	    busdma_lock_mutex,			/* lockfunc */
1040 	    &sc->xbd_io_lock,			/* lockarg */
1041 	    &sc->xbd_io_dmat);
1042 	if (error != 0) {
1043 		xenbus_dev_fatal(sc->xbd_dev, error,
1044 		    "Cannot allocate parent DMA tag\n");
1045 		return;
1046 	}
1047 
1048 	/* Per-transaction data allocation. */
1049 	sc->xbd_shadow = malloc(sizeof(*sc->xbd_shadow) * sc->xbd_max_requests,
1050 	    M_XENBLOCKFRONT, M_NOWAIT|M_ZERO);
1051 	if (sc->xbd_shadow == NULL) {
1052 		bus_dma_tag_destroy(sc->xbd_io_dmat);
1053 		xenbus_dev_fatal(sc->xbd_dev, error,
1054 		    "Cannot allocate request structures\n");
1055 		return;
1056 	}
1057 
1058 	for (i = 0; i < sc->xbd_max_requests; i++) {
1059 		struct xbd_command *cm;
1060 
1061 		cm = &sc->xbd_shadow[i];
1062 		cm->cm_sg_refs = malloc(
1063 		    sizeof(grant_ref_t) * sc->xbd_max_request_segments,
1064 		    M_XENBLOCKFRONT, M_NOWAIT);
1065 		if (cm->cm_sg_refs == NULL)
1066 			break;
1067 		cm->cm_id = i;
1068 		cm->cm_sc = sc;
1069 		if (bus_dmamap_create(sc->xbd_io_dmat, 0, &cm->cm_map) != 0)
1070 			break;
1071 		xbd_free_command(cm);
1072 	}
1073 
1074 	if (xbd_alloc_ring(sc) != 0)
1075 		return;
1076 
1077 	/* Support both backend schemes for relaying ring page limits. */
1078 	if (sc->xbd_ring_pages > 1) {
1079 		error = xs_printf(XST_NIL, node_path,
1080 		    "num-ring-pages","%u",
1081 		    sc->xbd_ring_pages);
1082 		if (error) {
1083 			xenbus_dev_fatal(sc->xbd_dev, error,
1084 			    "writing %s/num-ring-pages",
1085 			    node_path);
1086 			return;
1087 		}
1088 
1089 		error = xs_printf(XST_NIL, node_path,
1090 		    "ring-page-order", "%u",
1091 		    fls(sc->xbd_ring_pages) - 1);
1092 		if (error) {
1093 			xenbus_dev_fatal(sc->xbd_dev, error,
1094 			    "writing %s/ring-page-order",
1095 			    node_path);
1096 			return;
1097 		}
1098 	}
1099 
1100 	error = xs_printf(XST_NIL, node_path,
1101 	    "max-requests","%u",
1102 	    sc->xbd_max_requests);
1103 	if (error) {
1104 		xenbus_dev_fatal(sc->xbd_dev, error,
1105 		    "writing %s/max-requests",
1106 		    node_path);
1107 		return;
1108 	}
1109 
1110 	error = xs_printf(XST_NIL, node_path,
1111 	    "max-request-segments","%u",
1112 	    sc->xbd_max_request_segments);
1113 	if (error) {
1114 		xenbus_dev_fatal(sc->xbd_dev, error,
1115 		    "writing %s/max-request-segments",
1116 		    node_path);
1117 		return;
1118 	}
1119 
1120 	error = xs_printf(XST_NIL, node_path,
1121 	    "max-request-size","%u",
1122 	    sc->xbd_max_request_size);
1123 	if (error) {
1124 		xenbus_dev_fatal(sc->xbd_dev, error,
1125 		    "writing %s/max-request-size",
1126 		    node_path);
1127 		return;
1128 	}
1129 
1130 	error = xs_printf(XST_NIL, node_path, "event-channel",
1131 	    "%u", irq_to_evtchn_port(sc->xbd_irq));
1132 	if (error) {
1133 		xenbus_dev_fatal(sc->xbd_dev, error,
1134 		    "writing %s/event-channel",
1135 		    node_path);
1136 		return;
1137 	}
1138 
1139 	error = xs_printf(XST_NIL, node_path, "protocol",
1140 	    "%s", XEN_IO_PROTO_ABI_NATIVE);
1141 	if (error) {
1142 		xenbus_dev_fatal(sc->xbd_dev, error,
1143 		    "writing %s/protocol",
1144 		    node_path);
1145 		return;
1146 	}
1147 
1148 	xenbus_set_state(sc->xbd_dev, XenbusStateInitialised);
1149 }
1150 
1151 /*
1152  * Invoked when the backend is finally 'ready' (and has published
1153  * the details about the physical device - #sectors, size, etc).
1154  */
1155 static void
1156 xbd_connect(struct xbd_softc *sc)
1157 {
1158 	device_t dev = sc->xbd_dev;
1159 	unsigned long sectors, sector_size;
1160 	unsigned int binfo;
1161 	int err, feature_barrier;
1162 
1163 	if ((sc->xbd_connected == XBD_STATE_CONNECTED) ||
1164 	    (sc->xbd_connected == XBD_STATE_SUSPENDED))
1165 		return;
1166 
1167 	DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev));
1168 
1169 	err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
1170 	    "sectors", "%lu", &sectors,
1171 	    "info", "%u", &binfo,
1172 	    "sector-size", "%lu", &sector_size,
1173 	    NULL);
1174 	if (err) {
1175 		xenbus_dev_fatal(dev, err,
1176 		    "reading backend fields at %s",
1177 		    xenbus_get_otherend_path(dev));
1178 		return;
1179 	}
1180 	err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
1181 	     "feature-barrier", "%lu", &feature_barrier,
1182 	     NULL);
1183 	if (!err || feature_barrier)
1184 		sc->xbd_flags |= XBD_BARRIER;
1185 
1186 	if (sc->xbd_disk == NULL) {
1187 		device_printf(dev, "%juMB <%s> at %s",
1188 		    (uintmax_t) sectors / (1048576 / sector_size),
1189 		    device_get_desc(dev),
1190 		    xenbus_get_node(dev));
1191 		bus_print_child_footer(device_get_parent(dev), dev);
1192 
1193 		xbd_instance_create(sc, sectors, sc->xbd_vdevice, binfo,
1194 		    sector_size);
1195 	}
1196 
1197 	(void)xenbus_set_state(dev, XenbusStateConnected);
1198 
1199 	/* Kick pending requests. */
1200 	mtx_lock(&sc->xbd_io_lock);
1201 	sc->xbd_connected = XBD_STATE_CONNECTED;
1202 	xbd_startio(sc);
1203 	sc->xbd_flags |= XBD_READY;
1204 	mtx_unlock(&sc->xbd_io_lock);
1205 }
1206 
1207 /**
1208  * Handle the change of state of the backend to Closing.  We must delete our
1209  * device-layer structures now, to ensure that writes are flushed through to
1210  * the backend.  Once this is done, we can switch to Closed in
1211  * acknowledgement.
1212  */
1213 static void
1214 xbd_closing(device_t dev)
1215 {
1216 	struct xbd_softc *sc = device_get_softc(dev);
1217 
1218 	xenbus_set_state(dev, XenbusStateClosing);
1219 
1220 	DPRINTK("xbd_closing: %s removed\n", xenbus_get_node(dev));
1221 
1222 	if (sc->xbd_disk != NULL) {
1223 		disk_destroy(sc->xbd_disk);
1224 		sc->xbd_disk = NULL;
1225 	}
1226 
1227 	xenbus_set_state(dev, XenbusStateClosed);
1228 }
1229 
1230 /*---------------------------- NewBus Entrypoints ----------------------------*/
1231 static int
1232 xbd_probe(device_t dev)
1233 {
1234 
1235 	if (!strcmp(xenbus_get_type(dev), "vbd")) {
1236 		device_set_desc(dev, "Virtual Block Device");
1237 		device_quiet(dev);
1238 		return (0);
1239 	}
1240 
1241 	return (ENXIO);
1242 }
1243 
1244 /*
1245  * Setup supplies the backend dir, virtual device.  We place an event
1246  * channel and shared frame entries.  We watch backend to wait if it's
1247  * ok.
1248  */
1249 static int
1250 xbd_attach(device_t dev)
1251 {
1252 	struct xbd_softc *sc;
1253 	const char *name;
1254 	uint32_t vdevice;
1255 	int error;
1256 	int i;
1257 	int unit;
1258 
1259 	/* FIXME: Use dynamic device id if this is not set. */
1260 	error = xs_scanf(XST_NIL, xenbus_get_node(dev),
1261 	    "virtual-device", NULL, "%" PRIu32, &vdevice);
1262 	if (error) {
1263 		xenbus_dev_fatal(dev, error, "reading virtual-device");
1264 		device_printf(dev, "Couldn't determine virtual device.\n");
1265 		return (error);
1266 	}
1267 
1268 	xbd_vdevice_to_unit(vdevice, &unit, &name);
1269 	if (!strcmp(name, "xbd"))
1270 		device_set_unit(dev, unit);
1271 
1272 	sc = device_get_softc(dev);
1273 	mtx_init(&sc->xbd_io_lock, "blkfront i/o lock", NULL, MTX_DEF);
1274 	xbd_initq_free(sc);
1275 	xbd_initq_busy(sc);
1276 	xbd_initq_ready(sc);
1277 	xbd_initq_complete(sc);
1278 	xbd_initq_bio(sc);
1279 	for (i = 0; i < XBD_MAX_RING_PAGES; i++)
1280 		sc->xbd_ring_ref[i] = GRANT_INVALID_REF;
1281 
1282 	sc->xbd_dev = dev;
1283 	sc->xbd_vdevice = vdevice;
1284 	sc->xbd_connected = XBD_STATE_DISCONNECTED;
1285 
1286 	xbd_setup_sysctl(sc);
1287 
1288 	/* Wait for backend device to publish its protocol capabilities. */
1289 	xenbus_set_state(dev, XenbusStateInitialising);
1290 
1291 	return (0);
1292 }
1293 
1294 static int
1295 xbd_detach(device_t dev)
1296 {
1297 	struct xbd_softc *sc = device_get_softc(dev);
1298 
1299 	DPRINTK("xbd_remove: %s removed\n", xenbus_get_node(dev));
1300 
1301 	xbd_free(sc);
1302 	mtx_destroy(&sc->xbd_io_lock);
1303 
1304 	return 0;
1305 }
1306 
1307 static int
1308 xbd_suspend(device_t dev)
1309 {
1310 	struct xbd_softc *sc = device_get_softc(dev);
1311 	int retval;
1312 	int saved_state;
1313 
1314 	/* Prevent new requests being issued until we fix things up. */
1315 	mtx_lock(&sc->xbd_io_lock);
1316 	saved_state = sc->xbd_connected;
1317 	sc->xbd_connected = XBD_STATE_SUSPENDED;
1318 
1319 	/* Wait for outstanding I/O to drain. */
1320 	retval = 0;
1321 	while (TAILQ_EMPTY(&sc->xbd_cm_busy) == 0) {
1322 		if (msleep(&sc->xbd_cm_busy, &sc->xbd_io_lock,
1323 		    PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) {
1324 			retval = EBUSY;
1325 			break;
1326 		}
1327 	}
1328 	mtx_unlock(&sc->xbd_io_lock);
1329 
1330 	if (retval != 0)
1331 		sc->xbd_connected = saved_state;
1332 
1333 	return (retval);
1334 }
1335 
1336 static int
1337 xbd_resume(device_t dev)
1338 {
1339 	struct xbd_softc *sc = device_get_softc(dev);
1340 
1341 	DPRINTK("xbd_resume: %s\n", xenbus_get_node(dev));
1342 
1343 	xbd_free(sc);
1344 	xbd_initialize(sc);
1345 	return (0);
1346 }
1347 
1348 /**
1349  * Callback received when the backend's state changes.
1350  */
1351 static void
1352 xbd_backend_changed(device_t dev, XenbusState backend_state)
1353 {
1354 	struct xbd_softc *sc = device_get_softc(dev);
1355 
1356 	DPRINTK("backend_state=%d\n", backend_state);
1357 
1358 	switch (backend_state) {
1359 	case XenbusStateUnknown:
1360 	case XenbusStateInitialising:
1361 	case XenbusStateReconfigured:
1362 	case XenbusStateReconfiguring:
1363 	case XenbusStateClosed:
1364 		break;
1365 
1366 	case XenbusStateInitWait:
1367 	case XenbusStateInitialised:
1368 		xbd_initialize(sc);
1369 		break;
1370 
1371 	case XenbusStateConnected:
1372 		xbd_initialize(sc);
1373 		xbd_connect(sc);
1374 		break;
1375 
1376 	case XenbusStateClosing:
1377 		if (sc->xbd_users > 0)
1378 			xenbus_dev_error(dev, -EBUSY,
1379 			    "Device in use; refusing to close");
1380 		else
1381 			xbd_closing(dev);
1382 		break;
1383 	}
1384 }
1385 
1386 /*---------------------------- NewBus Registration ---------------------------*/
1387 static device_method_t xbd_methods[] = {
1388 	/* Device interface */
1389 	DEVMETHOD(device_probe,         xbd_probe),
1390 	DEVMETHOD(device_attach,        xbd_attach),
1391 	DEVMETHOD(device_detach,        xbd_detach),
1392 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
1393 	DEVMETHOD(device_suspend,       xbd_suspend),
1394 	DEVMETHOD(device_resume,        xbd_resume),
1395 
1396 	/* Xenbus interface */
1397 	DEVMETHOD(xenbus_otherend_changed, xbd_backend_changed),
1398 
1399 	{ 0, 0 }
1400 };
1401 
1402 static driver_t xbd_driver = {
1403 	"xbd",
1404 	xbd_methods,
1405 	sizeof(struct xbd_softc),
1406 };
1407 devclass_t xbd_devclass;
1408 
1409 DRIVER_MODULE(xbd, xenbusb_front, xbd_driver, xbd_devclass, 0, 0);
1410