xref: /freebsd/sys/dev/xen/blkfront/blkfront.c (revision 89e0f4d24c4a430a0893930e4400ff6a63e63864)
1 /*-
2  * All rights reserved.
3  *
4  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
5  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
6  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
7  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
8  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
10  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
11  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
12  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
13  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
14  * SUCH DAMAGE.
15  *
16  */
17 
18 /*
19  * XenoBSD block device driver
20  */
21 
22 #include <sys/cdefs.h>
23 __FBSDID("$FreeBSD$");
24 
25 #include <sys/param.h>
26 #include <sys/systm.h>
27 #include <sys/malloc.h>
28 #include <sys/kernel.h>
29 #include <vm/vm.h>
30 #include <vm/pmap.h>
31 
32 #include <sys/bio.h>
33 #include <sys/bus.h>
34 #include <sys/conf.h>
35 #include <sys/module.h>
36 
37 #include <machine/bus.h>
38 #include <sys/rman.h>
39 #include <machine/resource.h>
40 #include <machine/intr_machdep.h>
41 #include <machine/vmparam.h>
42 
43 #include <machine/xen/hypervisor.h>
44 #include <machine/xen/xen-os.h>
45 #include <machine/xen/xen_intr.h>
46 #include <machine/xen/xenbus.h>
47 #include <machine/xen/evtchn.h>
48 #include <xen/interface/grant_table.h>
49 
50 #include <geom/geom_disk.h>
51 #include <machine/xen/xenfunc.h>
52 #include <xen/gnttab.h>
53 
54 #include <dev/xen/blkfront/block.h>
55 
56 #define    ASSERT(S)       KASSERT(S, (#S))
57 /* prototypes */
58 struct xb_softc;
59 static void xb_startio(struct xb_softc *sc);
60 static void connect(struct blkfront_info *);
61 static void blkfront_closing(struct xenbus_device *);
62 static int blkfront_remove(struct xenbus_device *);
63 static int talk_to_backend(struct xenbus_device *, struct blkfront_info *);
64 static int setup_blkring(struct xenbus_device *, struct blkfront_info *);
65 static void blkif_int(void *);
66 #if 0
67 static void blkif_restart_queue(void *arg);
68 #endif
69 static void blkif_recover(struct blkfront_info *);
70 static void blkif_completion(struct blk_shadow *);
71 static void blkif_free(struct blkfront_info *, int);
72 
73 #define GRANT_INVALID_REF 0
74 #define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
75 
76 LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head;
77 
78 /* Control whether runtime update of vbds is enabled. */
79 #define ENABLE_VBD_UPDATE 0
80 
81 #if ENABLE_VBD_UPDATE
82 static void vbd_update(void);
83 #endif
84 
85 
86 #define BLKIF_STATE_DISCONNECTED 0
87 #define BLKIF_STATE_CONNECTED    1
88 #define BLKIF_STATE_SUSPENDED    2
89 
90 #ifdef notyet
91 static char *blkif_state_name[] = {
92 	[BLKIF_STATE_DISCONNECTED] = "disconnected",
93 	[BLKIF_STATE_CONNECTED]    = "connected",
94 	[BLKIF_STATE_SUSPENDED]    = "closed",
95 };
96 
97 static char * blkif_status_name[] = {
98 	[BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
99 	[BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
100 	[BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
101 	[BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
102 };
103 #endif
104 #define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args)
105 #if 0
106 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d" fmt ".\n", __FUNCTION__, __LINE__,##args)
107 #else
108 #define DPRINTK(fmt, args...)
109 #endif
110 
111 static grant_ref_t gref_head;
112 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
113     (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
114 
115 static void kick_pending_request_queues(struct blkfront_info *);
116 static int blkif_open(struct disk *dp);
117 static int blkif_close(struct disk *dp);
118 static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
119 static int blkif_queue_request(struct bio *bp);
120 static void xb_strategy(struct bio *bp);
121 
122 
123 
124 /* XXX move to xb_vbd.c when VBD update support is added */
125 #define MAX_VBDS 64
126 
127 #define XBD_SECTOR_SIZE		512	/* XXX: assume for now */
128 #define XBD_SECTOR_SHFT		9
129 
130 static struct mtx blkif_io_lock;
131 
132 static unsigned long
133 pfn_to_mfn(unsigned long pfn)
134 {
135 	return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT);
136 }
137 
138 
139 int
140 xlvbd_add(blkif_sector_t capacity, int unit, uint16_t vdisk_info, uint16_t sector_size,
141 	  struct blkfront_info *info)
142 {
143 	struct xb_softc	*sc;
144 	int			error = 0;
145 
146 	sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
147 	sc->xb_unit = unit;
148 	sc->xb_info = info;
149 	info->sc = sc;
150 
151 	memset(&sc->xb_disk, 0, sizeof(sc->xb_disk));
152 	sc->xb_disk = disk_alloc();
153 	sc->xb_disk->d_unit = unit;
154 	sc->xb_disk->d_open = blkif_open;
155 	sc->xb_disk->d_close = blkif_close;
156 	sc->xb_disk->d_ioctl = blkif_ioctl;
157 	sc->xb_disk->d_strategy = xb_strategy;
158 	sc->xb_disk->d_name = "xbd";
159 	sc->xb_disk->d_drv1 = sc;
160 	sc->xb_disk->d_sectorsize = sector_size;
161 
162 	/* XXX */
163 	sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT;
164 #if 0
165 	sc->xb_disk->d_maxsize = DFLTPHYS;
166 #else /* XXX: xen can't handle large single i/o requests */
167 	sc->xb_disk->d_maxsize = 4096;
168 #endif
169 #ifdef notyet
170 	XENPRINTF("attaching device 0x%x unit %d capacity %llu\n",
171 		  xb_diskinfo[sc->xb_unit].device, sc->xb_unit,
172 		  sc->xb_disk->d_mediasize);
173 #endif
174 	sc->xb_disk->d_flags = 0;
175 	disk_create(sc->xb_disk, DISK_VERSION_00);
176 	bioq_init(&sc->xb_bioq);
177 
178 	return error;
179 }
180 
181 void
182 xlvbd_del(struct blkfront_info *info)
183 {
184 	struct xb_softc	*sc;
185 
186 	sc = info->sc;
187 	disk_destroy(sc->xb_disk);
188 }
189 /************************ end VBD support *****************/
190 
191 /*
192  * Read/write routine for a buffer.  Finds the proper unit, place it on
193  * the sortq and kick the controller.
194  */
195 static void
196 xb_strategy(struct bio *bp)
197 {
198 	struct xb_softc	*sc = (struct xb_softc *)bp->bio_disk->d_drv1;
199 
200 	/* bogus disk? */
201 	if (sc == NULL) {
202 		bp->bio_error = EINVAL;
203 		bp->bio_flags |= BIO_ERROR;
204 		goto bad;
205 	}
206 
207 	DPRINTK("");
208 
209 	/*
210 	 * Place it in the queue of disk activities for this disk
211 	 */
212 	mtx_lock(&blkif_io_lock);
213 	bioq_disksort(&sc->xb_bioq, bp);
214 
215 	xb_startio(sc);
216 	mtx_unlock(&blkif_io_lock);
217 	return;
218 
219  bad:
220 	/*
221 	 * Correctly set the bio to indicate a failed tranfer.
222 	 */
223 	bp->bio_resid = bp->bio_bcount;
224 	biodone(bp);
225 	return;
226 }
227 
228 
229 /* Setup supplies the backend dir, virtual device.
230 
231 We place an event channel and shared frame entries.
232 We watch backend to wait if it's ok. */
233 static int blkfront_probe(struct xenbus_device *dev,
234 			  const struct xenbus_device_id *id)
235 {
236 	int err, vdevice, i;
237 	struct blkfront_info *info;
238 
239 	/* FIXME: Use dynamic device id if this is not set. */
240 	err = xenbus_scanf(XBT_NIL, dev->nodename,
241 			   "virtual-device", "%i", &vdevice);
242 	if (err != 1) {
243 		xenbus_dev_fatal(dev, err, "reading virtual-device");
244 		return err;
245 	}
246 
247 	info = malloc(sizeof(*info), M_DEVBUF, M_NOWAIT|M_ZERO);
248 	if (info == NULL) {
249 		xenbus_dev_fatal(dev, ENOMEM, "allocating info structure");
250 		return ENOMEM;
251 	}
252 
253 	/*
254 	 * XXX debug only
255 	 */
256 	for (i = 0; i < sizeof(*info); i++)
257 			if (((uint8_t *)info)[i] != 0)
258 					panic("non-null memory");
259 
260 	info->shadow_free = 0;
261 	info->xbdev = dev;
262 	info->vdevice = vdevice;
263 	info->connected = BLKIF_STATE_DISCONNECTED;
264 
265 	/* work queue needed ? */
266 	for (i = 0; i < BLK_RING_SIZE; i++)
267 		info->shadow[i].req.id = i+1;
268 	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
269 
270 	/* Front end dir is a number, which is used as the id. */
271 	info->handle = strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
272 	dev->dev_driver_data = info;
273 
274 	err = talk_to_backend(dev, info);
275 	if (err) {
276 		free(info, M_DEVBUF);
277 		dev->dev_driver_data = NULL;
278 		return err;
279 	}
280 
281 	return 0;
282 }
283 
284 
285 static int blkfront_resume(struct xenbus_device *dev)
286 {
287 	struct blkfront_info *info = dev->dev_driver_data;
288 	int err;
289 
290 	DPRINTK("blkfront_resume: %s\n", dev->nodename);
291 
292 	blkif_free(info, 1);
293 
294 	err = talk_to_backend(dev, info);
295 	if (!err)
296 		blkif_recover(info);
297 
298 	return err;
299 }
300 
301 /* Common code used when first setting up, and when resuming. */
302 static int talk_to_backend(struct xenbus_device *dev,
303 			   struct blkfront_info *info)
304 {
305 	const char *message = NULL;
306 	struct xenbus_transaction xbt;
307 	int err;
308 
309 	/* Create shared ring, alloc event channel. */
310 	err = setup_blkring(dev, info);
311 	if (err)
312 		goto out;
313 
314  again:
315 	err = xenbus_transaction_start(&xbt);
316 	if (err) {
317 		xenbus_dev_fatal(dev, err, "starting transaction");
318 		goto destroy_blkring;
319 	}
320 
321 	err = xenbus_printf(xbt, dev->nodename,
322 			    "ring-ref","%u", info->ring_ref);
323 	if (err) {
324 		message = "writing ring-ref";
325 		goto abort_transaction;
326 	}
327 	err = xenbus_printf(xbt, dev->nodename,
328 		"event-channel", "%u", irq_to_evtchn_port(info->irq));
329 	if (err) {
330 		message = "writing event-channel";
331 		goto abort_transaction;
332 	}
333 
334 	err = xenbus_transaction_end(xbt, 0);
335 	if (err) {
336 		if (err == -EAGAIN)
337 			goto again;
338 		xenbus_dev_fatal(dev, err, "completing transaction");
339 		goto destroy_blkring;
340 	}
341 	xenbus_switch_state(dev, XenbusStateInitialised);
342 
343 	return 0;
344 
345  abort_transaction:
346 	xenbus_transaction_end(xbt, 1);
347 	if (message)
348 		xenbus_dev_fatal(dev, err, "%s", message);
349  destroy_blkring:
350 	blkif_free(info, 0);
351  out:
352 	return err;
353 }
354 
355 static int
356 setup_blkring(struct xenbus_device *dev, struct blkfront_info *info)
357 {
358 	blkif_sring_t *sring;
359 	int err;
360 
361 	info->ring_ref = GRANT_INVALID_REF;
362 
363 	sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
364 	if (sring == NULL) {
365 		xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring");
366 		return ENOMEM;
367 	}
368 	SHARED_RING_INIT(sring);
369 	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
370 
371 	err = xenbus_grant_ring(dev, (vtomach(info->ring.sring) >> PAGE_SHIFT));
372 	if (err < 0) {
373 		free(sring, M_DEVBUF);
374 		info->ring.sring = NULL;
375 		goto fail;
376 	}
377 	info->ring_ref = err;
378 
379 	err = bind_listening_port_to_irqhandler(dev->otherend_id,
380 		"xbd", (driver_intr_t *)blkif_int, info,
381 					INTR_TYPE_BIO | INTR_MPSAFE, NULL);
382 	if (err <= 0) {
383 		xenbus_dev_fatal(dev, err,
384 				 "bind_evtchn_to_irqhandler failed");
385 		goto fail;
386 	}
387 	info->irq = err;
388 
389 	return 0;
390  fail:
391 	blkif_free(info, 0);
392 	return err;
393 }
394 
395 
396 /**
397  * Callback received when the backend's state changes.
398  */
399 static void backend_changed(struct xenbus_device *dev,
400 			    XenbusState backend_state)
401 {
402 	struct blkfront_info *info = dev->dev_driver_data;
403 
404 	DPRINTK("blkfront:backend_changed.\n");
405 
406 	switch (backend_state) {
407 	case XenbusStateUnknown:
408 	case XenbusStateInitialising:
409 	case XenbusStateInitWait:
410 	case XenbusStateInitialised:
411 	case XenbusStateClosed:
412 		break;
413 
414 	case XenbusStateConnected:
415 		connect(info);
416 		break;
417 
418 	case XenbusStateClosing:
419 		if (info->users > 0)
420 			xenbus_dev_error(dev, -EBUSY,
421 					 "Device in use; refusing to close");
422 		else
423 			blkfront_closing(dev);
424 #ifdef notyet
425 		bd = bdget(info->dev);
426 		if (bd == NULL)
427 			xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
428 
429 		down(&bd->bd_sem);
430 		if (info->users > 0)
431 			xenbus_dev_error(dev, -EBUSY,
432 					 "Device in use; refusing to close");
433 		else
434 			blkfront_closing(dev);
435 		up(&bd->bd_sem);
436 		bdput(bd);
437 #endif
438 	}
439 }
440 
441 /*
442 ** Invoked when the backend is finally 'ready' (and has told produced
443 ** the details about the physical device - #sectors, size, etc).
444 */
445 static void
446 connect(struct blkfront_info *info)
447 {
448 	unsigned long sectors, sector_size;
449 	unsigned int binfo;
450 	int err;
451 
452         if( (info->connected == BLKIF_STATE_CONNECTED) ||
453 	    (info->connected == BLKIF_STATE_SUSPENDED) )
454 		return;
455 
456 	DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
457 
458 	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
459 			    "sectors", "%lu", &sectors,
460 			    "info", "%u", &binfo,
461 			    "sector-size", "%lu", &sector_size,
462 			    NULL);
463 	if (err) {
464 		xenbus_dev_fatal(info->xbdev, err,
465 				 "reading backend fields at %s",
466 				 info->xbdev->otherend);
467 		return;
468 	}
469 	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
470 			    "feature-barrier", "%lu", &info->feature_barrier,
471 			    NULL);
472 	if (err)
473 		info->feature_barrier = 0;
474 
475 	xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
476 
477 	(void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
478 
479 	/* Kick pending requests. */
480 	mtx_lock(&blkif_io_lock);
481 	info->connected = BLKIF_STATE_CONNECTED;
482 	kick_pending_request_queues(info);
483 	mtx_unlock(&blkif_io_lock);
484 
485 #if 0
486 	add_disk(info->gd);
487 #endif
488 }
489 
490 /**
491  * Handle the change of state of the backend to Closing.  We must delete our
492  * device-layer structures now, to ensure that writes are flushed through to
493  * the backend.  Once is this done, we can switch to Closed in
494  * acknowledgement.
495  */
496 static void blkfront_closing(struct xenbus_device *dev)
497 {
498 	struct blkfront_info *info = dev->dev_driver_data;
499 
500 	DPRINTK("blkfront_closing: %s removed\n", dev->nodename);
501 
502 	if (info->mi) {
503 		DPRINTK("Calling xlvbd_del\n");
504 		xlvbd_del(info);
505 		info->mi = NULL;
506 	}
507 
508 	xenbus_switch_state(dev, XenbusStateClosed);
509 }
510 
511 
512 static int blkfront_remove(struct xenbus_device *dev)
513 {
514 	struct blkfront_info *info = dev->dev_driver_data;
515 
516 	DPRINTK("blkfront_remove: %s removed\n", dev->nodename);
517 
518 	blkif_free(info, 0);
519 
520 	free(info, M_DEVBUF);
521 
522 	return 0;
523 }
524 
525 
526 static inline int
527 GET_ID_FROM_FREELIST(struct blkfront_info *info)
528 {
529 	unsigned long nfree = info->shadow_free;
530 
531 	KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree));
532 	info->shadow_free = info->shadow[nfree].req.id;
533 	info->shadow[nfree].req.id = 0x0fffffee; /* debug */
534 	return nfree;
535 }
536 
537 static inline void
538 ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id)
539 {
540 	info->shadow[id].req.id  = info->shadow_free;
541 	info->shadow[id].request = 0;
542 	info->shadow_free = id;
543 }
544 
545 static inline void
546 flush_requests(struct blkfront_info *info)
547 {
548 	int notify;
549 
550 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
551 
552 	if (notify)
553 		notify_remote_via_irq(info->irq);
554 }
555 
556 static void
557 kick_pending_request_queues(struct blkfront_info *info)
558 {
559 	/* XXX check if we can't simplify */
560 #if 0
561 	if (!RING_FULL(&info->ring)) {
562 		/* Re-enable calldowns. */
563 		blk_start_queue(info->rq);
564 		/* Kick things off immediately. */
565 		do_blkif_request(info->rq);
566 	}
567 #endif
568 	if (!RING_FULL(&info->ring)) {
569 #if 0
570 		sc = LIST_FIRST(&xbsl_head);
571 		LIST_REMOVE(sc, entry);
572 		/* Re-enable calldowns. */
573 		blk_start_queue(di->rq);
574 #endif
575 		/* Kick things off immediately. */
576 		xb_startio(info->sc);
577 	}
578 }
579 
580 #if 0
581 /* XXX */
582 static void blkif_restart_queue(void *arg)
583 {
584 	struct blkfront_info *info = (struct blkfront_info *)arg;
585 
586 	mtx_lock(&blkif_io_lock);
587 	kick_pending_request_queues(info);
588 	mtx_unlock(&blkif_io_lock);
589 }
590 #endif
591 
592 static void blkif_restart_queue_callback(void *arg)
593 {
594 #if 0
595 	struct blkfront_info *info = (struct blkfront_info *)arg;
596 	/* XXX BSD equiv ? */
597 
598 	schedule_work(&info->work);
599 #endif
600 }
601 
602 static int
603 blkif_open(struct disk *dp)
604 {
605 	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
606 
607 	if (sc == NULL) {
608 		printk("xb%d: not found", sc->xb_unit);
609 		return (ENXIO);
610 	}
611 
612 	sc->xb_flags |= XB_OPEN;
613 	sc->xb_info->users++;
614 	return (0);
615 }
616 
617 static int
618 blkif_close(struct disk *dp)
619 {
620 	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
621 
622 	if (sc == NULL)
623 		return (ENXIO);
624 	sc->xb_flags &= ~XB_OPEN;
625 	if (--(sc->xb_info->users) == 0) {
626 		/* Check whether we have been instructed to close.  We will
627 		   have ignored this request initially, as the device was
628 		   still mounted. */
629 		struct xenbus_device * dev = sc->xb_info->xbdev;
630 		XenbusState state = xenbus_read_driver_state(dev->otherend);
631 
632 		if (state == XenbusStateClosing)
633 			blkfront_closing(dev);
634 	}
635 	return (0);
636 }
637 
638 static int
639 blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
640 {
641 	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
642 
643 	if (sc == NULL)
644 		return (ENXIO);
645 
646 	return (ENOTTY);
647 }
648 
649 
650 /*
651  * blkif_queue_request
652  *
653  * request block io
654  *
655  * id: for guest use only.
656  * operation: BLKIF_OP_{READ,WRITE,PROBE}
657  * buffer: buffer to read/write into. this should be a
658  *   virtual address in the guest os.
659  */
660 static int blkif_queue_request(struct bio *bp)
661 {
662 	caddr_t alignbuf;
663 	unsigned long  	buffer_ma;
664 	blkif_request_t     *ring_req;
665 	unsigned long id;
666 	unsigned int fsect, lsect;
667 	struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
668 	struct blkfront_info *info = sc->xb_info;
669 	int ref;
670 
671 	if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED))
672 		return 1;
673 
674 	if (gnttab_alloc_grant_references(
675 		    BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
676 		gnttab_request_free_callback(
677 			&info->callback,
678 			blkif_restart_queue_callback,
679 			info,
680 			BLKIF_MAX_SEGMENTS_PER_REQUEST);
681 		return 1;
682 	}
683 
684 	/* Check if the buffer is properly aligned */
685 	if ((vm_offset_t)bp->bio_data & PAGE_MASK) {
686 		int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE :
687 			PAGE_SIZE;
688 		caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF,
689 					M_NOWAIT);
690 
691 		alignbuf = (char *)roundup2((u_long)newbuf, align);
692 
693 		/* save a copy of the current buffer */
694 		bp->bio_driver1 = newbuf;
695 		bp->bio_driver2 = alignbuf;
696 
697 		/* Copy the data for a write */
698 		if (bp->bio_cmd == BIO_WRITE)
699 			bcopy(bp->bio_data, alignbuf, bp->bio_bcount);
700 	} else
701 		alignbuf = bp->bio_data;
702 
703 	/* Fill out a communications ring structure. */
704 	ring_req 	         = RING_GET_REQUEST(&info->ring,
705 						    info->ring.req_prod_pvt);
706 	id		         = GET_ID_FROM_FREELIST(info);
707 	info->shadow[id].request = (unsigned long)bp;
708 
709 	ring_req->id 	         = id;
710 	ring_req->operation 	 = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
711 		BLKIF_OP_WRITE;
712 
713 	ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno;
714 	ring_req->handle 	  = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
715 
716 	ring_req->nr_segments  = 0;	/* XXX not doing scatter/gather since buffer
717 					 * chaining is not supported.
718 					 */
719 
720 	buffer_ma = vtomach(alignbuf);
721 	fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
722 	lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1;
723 	/* install a grant reference. */
724 	ref = gnttab_claim_grant_reference(&gref_head);
725 	KASSERT( ref != -ENOSPC, ("grant_reference failed") );
726 
727 	gnttab_grant_foreign_access_ref(
728 		ref,
729 		info->xbdev->otherend_id,
730 		buffer_ma >> PAGE_SHIFT,
731 		ring_req->operation & 1 ); /* ??? */
732 	info->shadow[id].frame[ring_req->nr_segments] =
733 		buffer_ma >> PAGE_SHIFT;
734 
735 	ring_req->seg[ring_req->nr_segments] =
736 		(struct blkif_request_segment) {
737 			.gref       = ref,
738 			.first_sect = fsect,
739 			.last_sect  = lsect };
740 
741 	ring_req->nr_segments++;
742 	KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0,
743 		("XEN buffer must be sector aligned"));
744 	KASSERT(lsect <= 7,
745 		("XEN disk driver data cannot cross a page boundary"));
746 
747 	buffer_ma &= ~PAGE_MASK;
748 
749 	info->ring.req_prod_pvt++;
750 
751 	/* Keep a private copy so we can reissue requests when recovering. */
752 	info->shadow[id].req = *ring_req;
753 
754 	gnttab_free_grant_references(gref_head);
755 
756 	return 0;
757 }
758 
759 
760 
761 /*
762  * Dequeue buffers and place them in the shared communication ring.
763  * Return when no more requests can be accepted or all buffers have
764  * been queued.
765  *
766  * Signal XEN once the ring has been filled out.
767  */
768 static void
769 xb_startio(struct xb_softc *sc)
770 {
771 	struct bio		*bp;
772 	int			queued = 0;
773 	struct blkfront_info *info = sc->xb_info;
774 	DPRINTK("");
775 
776 	mtx_assert(&blkif_io_lock, MA_OWNED);
777 
778 	while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) {
779 
780 		if (RING_FULL(&info->ring))
781 			goto wait;
782 
783 		if (blkif_queue_request(bp)) {
784 		wait:
785 			bioq_insert_head(&sc->xb_bioq, bp);
786 			break;
787 		}
788 		queued++;
789 	}
790 
791 	if (queued != 0)
792 		flush_requests(sc->xb_info);
793 }
794 
795 static void
796 blkif_int(void *xsc)
797 {
798 	struct xb_softc *sc = NULL;
799 	struct bio *bp;
800 	blkif_response_t *bret;
801 	RING_IDX i, rp;
802 	struct blkfront_info *info = xsc;
803 	DPRINTK("");
804 
805 	TRACE_ENTER;
806 
807 	mtx_lock(&blkif_io_lock);
808 
809 	if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
810 		mtx_unlock(&blkif_io_lock);
811 		return;
812 	}
813 
814  again:
815 	rp = info->ring.sring->rsp_prod;
816 	rmb(); /* Ensure we see queued responses up to 'rp'. */
817 
818 	for (i = info->ring.rsp_cons; i != rp; i++) {
819 		unsigned long id;
820 
821 		bret = RING_GET_RESPONSE(&info->ring, i);
822 		id   = bret->id;
823 		bp   = (struct bio *)info->shadow[id].request;
824 
825 		blkif_completion(&info->shadow[id]);
826 
827 		ADD_ID_TO_FREELIST(info, id);
828 
829 		switch (bret->operation) {
830 		case BLKIF_OP_READ:
831 			/* had an unaligned buffer that needs to be copied */
832 			if (bp->bio_driver1)
833 				bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount);
834 			/* FALLTHROUGH */
835 		case BLKIF_OP_WRITE:
836 
837 			/* free the copy buffer */
838 			if (bp->bio_driver1) {
839 				free(bp->bio_driver1, M_DEVBUF);
840 				bp->bio_driver1 = NULL;
841 			}
842 
843 			if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) {
844 				XENPRINTF("Bad return from blkdev data request: %x\n",
845 					  bret->status);
846 				bp->bio_flags |= BIO_ERROR;
847 			}
848 
849 			sc = (struct xb_softc *)bp->bio_disk->d_drv1;
850 
851 			if (bp->bio_flags & BIO_ERROR)
852 				bp->bio_error = EIO;
853 			else
854 				bp->bio_resid = 0;
855 
856 			biodone(bp);
857 			break;
858 		default:
859 			panic("received invalid operation");
860 			break;
861 		}
862 	}
863 
864 	info->ring.rsp_cons = i;
865 
866 	if (i != info->ring.req_prod_pvt) {
867 		int more_to_do;
868 		RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
869 		if (more_to_do)
870 			goto again;
871 	} else {
872 		info->ring.sring->rsp_event = i + 1;
873 	}
874 
875 	kick_pending_request_queues(info);
876 
877 	mtx_unlock(&blkif_io_lock);
878 }
879 
880 static void
881 blkif_free(struct blkfront_info *info, int suspend)
882 {
883 
884 /* Prevent new requests being issued until we fix things up. */
885 	mtx_lock(&blkif_io_lock);
886 	info->connected = suspend ?
887 		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
888 	mtx_unlock(&blkif_io_lock);
889 
890 	/* Free resources associated with old device channel. */
891 	if (info->ring_ref != GRANT_INVALID_REF) {
892 		gnttab_end_foreign_access(info->ring_ref, 0,
893 					  info->ring.sring);
894 		info->ring_ref = GRANT_INVALID_REF;
895 		info->ring.sring = NULL;
896 	}
897 	if (info->irq)
898 		unbind_from_irqhandler(info->irq, info);
899 	info->irq = 0;
900 
901 }
902 
903 static void
904 blkif_completion(struct blk_shadow *s)
905 {
906 	int i;
907 
908 	for (i = 0; i < s->req.nr_segments; i++)
909 		gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
910 }
911 
912 static void
913 blkif_recover(struct blkfront_info *info)
914 {
915 	int i, j;
916 	blkif_request_t *req;
917 	struct blk_shadow *copy;
918 
919 	/* Stage 1: Make a safe copy of the shadow state. */
920 	copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO);
921 	PANIC_IF(copy == NULL);
922 	memcpy(copy, info->shadow, sizeof(info->shadow));
923 
924 	/* Stage 2: Set up free list. */
925 	memset(&info->shadow, 0, sizeof(info->shadow));
926 	for (i = 0; i < BLK_RING_SIZE; i++)
927 		info->shadow[i].req.id = i+1;
928 	info->shadow_free = info->ring.req_prod_pvt;
929 	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
930 
931 	/* Stage 3: Find pending requests and requeue them. */
932 	for (i = 0; i < BLK_RING_SIZE; i++) {
933 		/* Not in use? */
934 		if (copy[i].request == 0)
935 			continue;
936 
937 		/* Grab a request slot and copy shadow state into it. */
938 		req = RING_GET_REQUEST(
939 			&info->ring, info->ring.req_prod_pvt);
940 		*req = copy[i].req;
941 
942 		/* We get a new request id, and must reset the shadow state. */
943 		req->id = GET_ID_FROM_FREELIST(info);
944 		memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
945 
946 		/* Rewrite any grant references invalidated by suspend/resume. */
947 		for (j = 0; j < req->nr_segments; j++)
948 			gnttab_grant_foreign_access_ref(
949 				req->seg[j].gref,
950 				info->xbdev->otherend_id,
951 				pfn_to_mfn(info->shadow[req->id].frame[j]),
952 				0 /* assume not readonly */);
953 
954 		info->shadow[req->id].req = *req;
955 
956 		info->ring.req_prod_pvt++;
957 	}
958 
959 	free(copy, M_DEVBUF);
960 
961 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
962 
963 	/* Now safe for us to use the shared ring */
964 	mtx_lock(&blkif_io_lock);
965 	info->connected = BLKIF_STATE_CONNECTED;
966 	mtx_unlock(&blkif_io_lock);
967 
968 	/* Send off requeued requests */
969 	mtx_lock(&blkif_io_lock);
970 	flush_requests(info);
971 
972 	/* Kick any other new requests queued since we resumed */
973 	kick_pending_request_queues(info);
974 	mtx_unlock(&blkif_io_lock);
975 }
976 
977 static int
978 blkfront_is_ready(struct xenbus_device *dev)
979 {
980 	struct blkfront_info *info = dev->dev_driver_data;
981 
982 	return info->is_ready;
983 }
984 
985 static struct xenbus_device_id blkfront_ids[] = {
986 	{ "vbd" },
987 	{ "" }
988 };
989 
990 
991 static struct xenbus_driver blkfront = {
992 	.name             = "vbd",
993 	.ids              = blkfront_ids,
994 	.probe            = blkfront_probe,
995 	.remove           = blkfront_remove,
996 	.resume           = blkfront_resume,
997 	.otherend_changed = backend_changed,
998 	.is_ready		  = blkfront_is_ready,
999 };
1000 
1001 
1002 
1003 static void
1004 xenbus_init(void)
1005 {
1006 	xenbus_register_frontend(&blkfront);
1007 }
1008 
1009 MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */
1010 SYSINIT(xbdev, SI_SUB_PSEUDO, SI_ORDER_SECOND, xenbus_init, NULL);
1011 
1012 
1013 /*
1014  * Local variables:
1015  * mode: C
1016  * c-set-style: "BSD"
1017  * c-basic-offset: 8
1018  * tab-width: 4
1019  * indent-tabs-mode: t
1020  * End:
1021  */
1022