xref: /freebsd/sys/dev/xen/blkfront/blkfront.c (revision 920ba15bf993776c4fbe5808f94e2384a52c56f2)
1 /*-
2  * All rights reserved.
3  *
4  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
5  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
6  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
7  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
8  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
10  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
11  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
12  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
13  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
14  * SUCH DAMAGE.
15  *
16  */
17 
18 /*
19  * XenoBSD block device driver
20  */
21 
22 #include <sys/cdefs.h>
23 __FBSDID("$FreeBSD$");
24 
25 #include <sys/param.h>
26 #include <sys/systm.h>
27 #include <sys/malloc.h>
28 #include <sys/kernel.h>
29 #include <vm/vm.h>
30 #include <vm/pmap.h>
31 
32 #include <sys/bio.h>
33 #include <sys/bus.h>
34 #include <sys/conf.h>
35 #include <sys/module.h>
36 
37 #include <machine/bus.h>
38 #include <sys/rman.h>
39 #include <machine/resource.h>
40 #include <machine/intr_machdep.h>
41 #include <machine/vmparam.h>
42 
43 #include <machine/xen/hypervisor.h>
44 #include <machine/xen/xen-os.h>
45 #include <machine/xen/xen_intr.h>
46 #include <machine/xen/xenbus.h>
47 #include <machine/xen/evtchn.h>
48 #include <xen/interface/grant_table.h>
49 
50 #include <geom/geom_disk.h>
51 #include <machine/xen/xenfunc.h>
52 #include <xen/gnttab.h>
53 
54 #include <dev/xen/blkfront/block.h>
55 
56 #define    ASSERT(S)       KASSERT(S, (#S))
57 /* prototypes */
58 struct xb_softc;
59 static void xb_startio(struct xb_softc *sc);
60 static void connect(struct blkfront_info *);
61 static void blkfront_closing(struct xenbus_device *);
62 static int blkfront_remove(struct xenbus_device *);
63 static int talk_to_backend(struct xenbus_device *, struct blkfront_info *);
64 static int setup_blkring(struct xenbus_device *, struct blkfront_info *);
65 static void blkif_int(void *);
66 #if 0
67 static void blkif_restart_queue(void *arg);
68 #endif
69 static void blkif_recover(struct blkfront_info *);
70 static void blkif_completion(struct blk_shadow *);
71 static void blkif_free(struct blkfront_info *, int);
72 
73 #define GRANT_INVALID_REF 0
74 #define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
75 
76 LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head;
77 
78 /* Control whether runtime update of vbds is enabled. */
79 #define ENABLE_VBD_UPDATE 0
80 
81 #if ENABLE_VBD_UPDATE
82 static void vbd_update(void);
83 #endif
84 
85 
86 #define BLKIF_STATE_DISCONNECTED 0
87 #define BLKIF_STATE_CONNECTED    1
88 #define BLKIF_STATE_SUSPENDED    2
89 
90 #ifdef notyet
91 static char *blkif_state_name[] = {
92 	[BLKIF_STATE_DISCONNECTED] = "disconnected",
93 	[BLKIF_STATE_CONNECTED]    = "connected",
94 	[BLKIF_STATE_SUSPENDED]    = "closed",
95 };
96 
97 static char * blkif_status_name[] = {
98 	[BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
99 	[BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
100 	[BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
101 	[BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
102 };
103 #endif
104 #define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args)
105 #if 0
106 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d" fmt ".\n", __FUNCTION__, __LINE__,##args)
107 #else
108 #define DPRINTK(fmt, args...)
109 #endif
110 
111 static grant_ref_t gref_head;
112 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
113     (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
114 
115 static void kick_pending_request_queues(struct blkfront_info *);
116 static int blkif_open(struct disk *dp);
117 static int blkif_close(struct disk *dp);
118 static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
119 static int blkif_queue_request(struct bio *bp);
120 static void xb_strategy(struct bio *bp);
121 
122 
123 
124 /* XXX move to xb_vbd.c when VBD update support is added */
125 #define MAX_VBDS 64
126 
127 #define XBD_SECTOR_SIZE		512	/* XXX: assume for now */
128 #define XBD_SECTOR_SHFT		9
129 
130 static struct mtx blkif_io_lock;
131 
132 static vm_paddr_t
133 pfn_to_mfn(vm_paddr_t pfn)
134 {
135 	return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT);
136 }
137 
138 
139 int
140 xlvbd_add(blkif_sector_t capacity, int unit, uint16_t vdisk_info, uint16_t sector_size,
141 	  struct blkfront_info *info)
142 {
143 	struct xb_softc	*sc;
144 	int			error = 0;
145 	int unitno = unit - 767;
146 
147 	sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
148 	sc->xb_unit = unitno;
149 	sc->xb_info = info;
150 	info->sc = sc;
151 
152 	memset(&sc->xb_disk, 0, sizeof(sc->xb_disk));
153 	sc->xb_disk = disk_alloc();
154 	sc->xb_disk->d_unit = unitno;
155 	sc->xb_disk->d_open = blkif_open;
156 	sc->xb_disk->d_close = blkif_close;
157 	sc->xb_disk->d_ioctl = blkif_ioctl;
158 	sc->xb_disk->d_strategy = xb_strategy;
159 	sc->xb_disk->d_name = "xbd";
160 	sc->xb_disk->d_drv1 = sc;
161 	sc->xb_disk->d_sectorsize = sector_size;
162 
163 	/* XXX */
164 	sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT;
165 #if 0
166 	sc->xb_disk->d_maxsize = DFLTPHYS;
167 #else /* XXX: xen can't handle large single i/o requests */
168 	sc->xb_disk->d_maxsize = 4096;
169 #endif
170 #ifdef notyet
171 	XENPRINTF("attaching device 0x%x unit %d capacity %llu\n",
172 		  xb_diskinfo[sc->xb_unit].device, sc->xb_unit,
173 		  sc->xb_disk->d_mediasize);
174 #endif
175 	sc->xb_disk->d_flags = 0;
176 	disk_create(sc->xb_disk, DISK_VERSION_00);
177 	bioq_init(&sc->xb_bioq);
178 
179 	return error;
180 }
181 
182 void
183 xlvbd_del(struct blkfront_info *info)
184 {
185 	struct xb_softc	*sc;
186 
187 	sc = info->sc;
188 	disk_destroy(sc->xb_disk);
189 }
190 /************************ end VBD support *****************/
191 
192 /*
193  * Read/write routine for a buffer.  Finds the proper unit, place it on
194  * the sortq and kick the controller.
195  */
196 static void
197 xb_strategy(struct bio *bp)
198 {
199 	struct xb_softc	*sc = (struct xb_softc *)bp->bio_disk->d_drv1;
200 
201 	/* bogus disk? */
202 	if (sc == NULL) {
203 		bp->bio_error = EINVAL;
204 		bp->bio_flags |= BIO_ERROR;
205 		goto bad;
206 	}
207 
208 	DPRINTK("");
209 
210 	/*
211 	 * Place it in the queue of disk activities for this disk
212 	 */
213 	mtx_lock(&blkif_io_lock);
214 	bioq_disksort(&sc->xb_bioq, bp);
215 
216 	xb_startio(sc);
217 	mtx_unlock(&blkif_io_lock);
218 	return;
219 
220  bad:
221 	/*
222 	 * Correctly set the bio to indicate a failed tranfer.
223 	 */
224 	bp->bio_resid = bp->bio_bcount;
225 	biodone(bp);
226 	return;
227 }
228 
229 
230 /* Setup supplies the backend dir, virtual device.
231 
232 We place an event channel and shared frame entries.
233 We watch backend to wait if it's ok. */
234 static int blkfront_probe(struct xenbus_device *dev,
235 			  const struct xenbus_device_id *id)
236 {
237 	int err, vdevice, i;
238 	struct blkfront_info *info;
239 
240 	/* FIXME: Use dynamic device id if this is not set. */
241 	err = xenbus_scanf(XBT_NIL, dev->nodename,
242 			   "virtual-device", "%i", &vdevice);
243 	if (err != 1) {
244 		xenbus_dev_fatal(dev, err, "reading virtual-device");
245 		printf("couldn't find virtual device");
246 		return (err);
247 	}
248 
249 	info = malloc(sizeof(*info), M_DEVBUF, M_NOWAIT|M_ZERO);
250 	if (info == NULL) {
251 		xenbus_dev_fatal(dev, ENOMEM, "allocating info structure");
252 		return ENOMEM;
253 	}
254 
255 	/*
256 	 * XXX debug only
257 	 */
258 	for (i = 0; i < sizeof(*info); i++)
259 			if (((uint8_t *)info)[i] != 0)
260 					panic("non-null memory");
261 
262 	info->shadow_free = 0;
263 	info->xbdev = dev;
264 	info->vdevice = vdevice;
265 	info->connected = BLKIF_STATE_DISCONNECTED;
266 
267 	/* work queue needed ? */
268 	for (i = 0; i < BLK_RING_SIZE; i++)
269 		info->shadow[i].req.id = i+1;
270 	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
271 
272 	/* Front end dir is a number, which is used as the id. */
273 	info->handle = strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
274 	dev->dev_driver_data = info;
275 
276 	err = talk_to_backend(dev, info);
277 	if (err) {
278 		free(info, M_DEVBUF);
279 		dev->dev_driver_data = NULL;
280 		return err;
281 	}
282 
283 	return 0;
284 }
285 
286 
287 static int blkfront_resume(struct xenbus_device *dev)
288 {
289 	struct blkfront_info *info = dev->dev_driver_data;
290 	int err;
291 
292 	DPRINTK("blkfront_resume: %s\n", dev->nodename);
293 
294 	blkif_free(info, 1);
295 
296 	err = talk_to_backend(dev, info);
297 	if (!err)
298 		blkif_recover(info);
299 
300 	return err;
301 }
302 
303 /* Common code used when first setting up, and when resuming. */
304 static int talk_to_backend(struct xenbus_device *dev,
305 			   struct blkfront_info *info)
306 {
307 	const char *message = NULL;
308 	struct xenbus_transaction xbt;
309 	int err;
310 
311 	/* Create shared ring, alloc event channel. */
312 	err = setup_blkring(dev, info);
313 	if (err)
314 		goto out;
315 
316  again:
317 	err = xenbus_transaction_start(&xbt);
318 	if (err) {
319 		xenbus_dev_fatal(dev, err, "starting transaction");
320 		goto destroy_blkring;
321 	}
322 
323 	err = xenbus_printf(xbt, dev->nodename,
324 			    "ring-ref","%u", info->ring_ref);
325 	if (err) {
326 		message = "writing ring-ref";
327 		goto abort_transaction;
328 	}
329 	err = xenbus_printf(xbt, dev->nodename,
330 		"event-channel", "%u", irq_to_evtchn_port(info->irq));
331 	if (err) {
332 		message = "writing event-channel";
333 		goto abort_transaction;
334 	}
335 
336 	err = xenbus_transaction_end(xbt, 0);
337 	if (err) {
338 		if (err == -EAGAIN)
339 			goto again;
340 		xenbus_dev_fatal(dev, err, "completing transaction");
341 		goto destroy_blkring;
342 	}
343 	xenbus_switch_state(dev, XenbusStateInitialised);
344 
345 	return 0;
346 
347  abort_transaction:
348 	xenbus_transaction_end(xbt, 1);
349 	if (message)
350 		xenbus_dev_fatal(dev, err, "%s", message);
351  destroy_blkring:
352 	blkif_free(info, 0);
353  out:
354 	return err;
355 }
356 
357 static int
358 setup_blkring(struct xenbus_device *dev, struct blkfront_info *info)
359 {
360 	blkif_sring_t *sring;
361 	int err;
362 
363 	info->ring_ref = GRANT_INVALID_REF;
364 
365 	sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
366 	if (sring == NULL) {
367 		xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring");
368 		return ENOMEM;
369 	}
370 	SHARED_RING_INIT(sring);
371 	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
372 
373 	err = xenbus_grant_ring(dev, (vtomach(info->ring.sring) >> PAGE_SHIFT));
374 	if (err < 0) {
375 		free(sring, M_DEVBUF);
376 		info->ring.sring = NULL;
377 		goto fail;
378 	}
379 	info->ring_ref = err;
380 
381 	err = bind_listening_port_to_irqhandler(dev->otherend_id,
382 		"xbd", (driver_intr_t *)blkif_int, info,
383 					INTR_TYPE_BIO | INTR_MPSAFE, NULL);
384 	if (err <= 0) {
385 		xenbus_dev_fatal(dev, err,
386 				 "bind_evtchn_to_irqhandler failed");
387 		goto fail;
388 	}
389 	info->irq = err;
390 
391 	return 0;
392  fail:
393 	blkif_free(info, 0);
394 	return err;
395 }
396 
397 
398 /**
399  * Callback received when the backend's state changes.
400  */
401 static void backend_changed(struct xenbus_device *dev,
402 			    XenbusState backend_state)
403 {
404 	struct blkfront_info *info = dev->dev_driver_data;
405 
406 	DPRINTK("blkfront:backend_changed.\n");
407 
408 	switch (backend_state) {
409 	case XenbusStateUnknown:
410 	case XenbusStateInitialising:
411 	case XenbusStateInitWait:
412 	case XenbusStateInitialised:
413 	case XenbusStateClosed:
414 	case XenbusStateReconfigured:
415 	case XenbusStateReconfiguring:
416 		break;
417 
418 	case XenbusStateConnected:
419 		connect(info);
420 		break;
421 
422 	case XenbusStateClosing:
423 		if (info->users > 0)
424 			xenbus_dev_error(dev, -EBUSY,
425 					 "Device in use; refusing to close");
426 		else
427 			blkfront_closing(dev);
428 #ifdef notyet
429 		bd = bdget(info->dev);
430 		if (bd == NULL)
431 			xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
432 
433 		down(&bd->bd_sem);
434 		if (info->users > 0)
435 			xenbus_dev_error(dev, -EBUSY,
436 					 "Device in use; refusing to close");
437 		else
438 			blkfront_closing(dev);
439 		up(&bd->bd_sem);
440 		bdput(bd);
441 #endif
442 	}
443 }
444 
445 /*
446 ** Invoked when the backend is finally 'ready' (and has told produced
447 ** the details about the physical device - #sectors, size, etc).
448 */
449 static void
450 connect(struct blkfront_info *info)
451 {
452 	unsigned long sectors, sector_size;
453 	unsigned int binfo;
454 	int err;
455 
456         if( (info->connected == BLKIF_STATE_CONNECTED) ||
457 	    (info->connected == BLKIF_STATE_SUSPENDED) )
458 		return;
459 
460 	DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
461 
462 	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
463 			    "sectors", "%lu", &sectors,
464 			    "info", "%u", &binfo,
465 			    "sector-size", "%lu", &sector_size,
466 			    NULL);
467 	if (err) {
468 		xenbus_dev_fatal(info->xbdev, err,
469 				 "reading backend fields at %s",
470 				 info->xbdev->otherend);
471 		return;
472 	}
473 	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
474 			    "feature-barrier", "%lu", &info->feature_barrier,
475 			    NULL);
476 	if (err)
477 		info->feature_barrier = 0;
478 
479 	xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
480 
481 	(void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
482 
483 	/* Kick pending requests. */
484 	mtx_lock(&blkif_io_lock);
485 	info->connected = BLKIF_STATE_CONNECTED;
486 	kick_pending_request_queues(info);
487 	mtx_unlock(&blkif_io_lock);
488 	info->is_ready = 1;
489 
490 #if 0
491 	add_disk(info->gd);
492 #endif
493 }
494 
495 /**
496  * Handle the change of state of the backend to Closing.  We must delete our
497  * device-layer structures now, to ensure that writes are flushed through to
498  * the backend.  Once is this done, we can switch to Closed in
499  * acknowledgement.
500  */
501 static void blkfront_closing(struct xenbus_device *dev)
502 {
503 	struct blkfront_info *info = dev->dev_driver_data;
504 
505 	DPRINTK("blkfront_closing: %s removed\n", dev->nodename);
506 
507 	if (info->mi) {
508 		DPRINTK("Calling xlvbd_del\n");
509 		xlvbd_del(info);
510 		info->mi = NULL;
511 	}
512 
513 	xenbus_switch_state(dev, XenbusStateClosed);
514 }
515 
516 
517 static int blkfront_remove(struct xenbus_device *dev)
518 {
519 	struct blkfront_info *info = dev->dev_driver_data;
520 
521 	DPRINTK("blkfront_remove: %s removed\n", dev->nodename);
522 
523 	blkif_free(info, 0);
524 
525 	free(info, M_DEVBUF);
526 
527 	return 0;
528 }
529 
530 
531 static inline int
532 GET_ID_FROM_FREELIST(struct blkfront_info *info)
533 {
534 	unsigned long nfree = info->shadow_free;
535 
536 	KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree));
537 	info->shadow_free = info->shadow[nfree].req.id;
538 	info->shadow[nfree].req.id = 0x0fffffee; /* debug */
539 	return nfree;
540 }
541 
542 static inline void
543 ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id)
544 {
545 	info->shadow[id].req.id  = info->shadow_free;
546 	info->shadow[id].request = 0;
547 	info->shadow_free = id;
548 }
549 
550 static inline void
551 flush_requests(struct blkfront_info *info)
552 {
553 	int notify;
554 
555 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
556 
557 	if (notify)
558 		notify_remote_via_irq(info->irq);
559 }
560 
561 static void
562 kick_pending_request_queues(struct blkfront_info *info)
563 {
564 	/* XXX check if we can't simplify */
565 #if 0
566 	if (!RING_FULL(&info->ring)) {
567 		/* Re-enable calldowns. */
568 		blk_start_queue(info->rq);
569 		/* Kick things off immediately. */
570 		do_blkif_request(info->rq);
571 	}
572 #endif
573 	if (!RING_FULL(&info->ring)) {
574 #if 0
575 		sc = LIST_FIRST(&xbsl_head);
576 		LIST_REMOVE(sc, entry);
577 		/* Re-enable calldowns. */
578 		blk_start_queue(di->rq);
579 #endif
580 		/* Kick things off immediately. */
581 		xb_startio(info->sc);
582 	}
583 }
584 
585 #if 0
586 /* XXX */
587 static void blkif_restart_queue(void *arg)
588 {
589 	struct blkfront_info *info = (struct blkfront_info *)arg;
590 
591 	mtx_lock(&blkif_io_lock);
592 	kick_pending_request_queues(info);
593 	mtx_unlock(&blkif_io_lock);
594 }
595 #endif
596 
597 static void blkif_restart_queue_callback(void *arg)
598 {
599 #if 0
600 	struct blkfront_info *info = (struct blkfront_info *)arg;
601 	/* XXX BSD equiv ? */
602 
603 	schedule_work(&info->work);
604 #endif
605 }
606 
607 static int
608 blkif_open(struct disk *dp)
609 {
610 	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
611 
612 	if (sc == NULL) {
613 		printk("xb%d: not found", sc->xb_unit);
614 		return (ENXIO);
615 	}
616 
617 	sc->xb_flags |= XB_OPEN;
618 	sc->xb_info->users++;
619 	return (0);
620 }
621 
622 static int
623 blkif_close(struct disk *dp)
624 {
625 	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
626 
627 	if (sc == NULL)
628 		return (ENXIO);
629 	sc->xb_flags &= ~XB_OPEN;
630 	if (--(sc->xb_info->users) == 0) {
631 		/* Check whether we have been instructed to close.  We will
632 		   have ignored this request initially, as the device was
633 		   still mounted. */
634 		struct xenbus_device * dev = sc->xb_info->xbdev;
635 		XenbusState state = xenbus_read_driver_state(dev->otherend);
636 
637 		if (state == XenbusStateClosing)
638 			blkfront_closing(dev);
639 	}
640 	return (0);
641 }
642 
643 static int
644 blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
645 {
646 	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
647 
648 	if (sc == NULL)
649 		return (ENXIO);
650 
651 	return (ENOTTY);
652 }
653 
654 
655 /*
656  * blkif_queue_request
657  *
658  * request block io
659  *
660  * id: for guest use only.
661  * operation: BLKIF_OP_{READ,WRITE,PROBE}
662  * buffer: buffer to read/write into. this should be a
663  *   virtual address in the guest os.
664  */
665 static int blkif_queue_request(struct bio *bp)
666 {
667 	caddr_t alignbuf;
668 	vm_paddr_t buffer_ma;
669 	blkif_request_t     *ring_req;
670 	unsigned long id;
671 	uint64_t fsect, lsect;
672 	struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
673 	struct blkfront_info *info = sc->xb_info;
674 	int ref;
675 
676 	if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED))
677 		return 1;
678 
679 	if (gnttab_alloc_grant_references(
680 		    BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
681 		gnttab_request_free_callback(
682 			&info->callback,
683 			blkif_restart_queue_callback,
684 			info,
685 			BLKIF_MAX_SEGMENTS_PER_REQUEST);
686 		return 1;
687 	}
688 
689 	/* Check if the buffer is properly aligned */
690 	if ((vm_offset_t)bp->bio_data & PAGE_MASK) {
691 		int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE :
692 			PAGE_SIZE;
693 		caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF,
694 					M_NOWAIT);
695 
696 		alignbuf = (char *)roundup2((u_long)newbuf, align);
697 
698 		/* save a copy of the current buffer */
699 		bp->bio_driver1 = newbuf;
700 		bp->bio_driver2 = alignbuf;
701 
702 		/* Copy the data for a write */
703 		if (bp->bio_cmd == BIO_WRITE)
704 			bcopy(bp->bio_data, alignbuf, bp->bio_bcount);
705 	} else
706 		alignbuf = bp->bio_data;
707 
708 	/* Fill out a communications ring structure. */
709 	ring_req 	         = RING_GET_REQUEST(&info->ring,
710 						    info->ring.req_prod_pvt);
711 	id		         = GET_ID_FROM_FREELIST(info);
712 	info->shadow[id].request = (unsigned long)bp;
713 
714 	ring_req->id 	         = id;
715 	ring_req->operation 	 = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
716 		BLKIF_OP_WRITE;
717 
718 	ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno;
719 	ring_req->handle 	  = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
720 
721 	ring_req->nr_segments  = 0;	/* XXX not doing scatter/gather since buffer
722 					 * chaining is not supported.
723 					 */
724 
725 	buffer_ma = vtomach(alignbuf);
726 	fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
727 	lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1;
728 	/* install a grant reference. */
729 	ref = gnttab_claim_grant_reference(&gref_head);
730 	KASSERT( ref != -ENOSPC, ("grant_reference failed") );
731 
732 	gnttab_grant_foreign_access_ref(
733 		ref,
734 		info->xbdev->otherend_id,
735 		buffer_ma >> PAGE_SHIFT,
736 		ring_req->operation & 1 ); /* ??? */
737 	info->shadow[id].frame[ring_req->nr_segments] =
738 		buffer_ma >> PAGE_SHIFT;
739 
740 	ring_req->seg[ring_req->nr_segments] =
741 		(struct blkif_request_segment) {
742 			.gref       = ref,
743 			.first_sect = fsect,
744 			.last_sect  = lsect };
745 
746 	ring_req->nr_segments++;
747 	KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0,
748 		("XEN buffer must be sector aligned"));
749 	KASSERT(lsect <= 7,
750 		("XEN disk driver data cannot cross a page boundary"));
751 
752 	buffer_ma &= ~PAGE_MASK;
753 
754 	info->ring.req_prod_pvt++;
755 
756 	/* Keep a private copy so we can reissue requests when recovering. */
757 	info->shadow[id].req = *ring_req;
758 
759 	gnttab_free_grant_references(gref_head);
760 
761 	return 0;
762 }
763 
764 
765 
766 /*
767  * Dequeue buffers and place them in the shared communication ring.
768  * Return when no more requests can be accepted or all buffers have
769  * been queued.
770  *
771  * Signal XEN once the ring has been filled out.
772  */
773 static void
774 xb_startio(struct xb_softc *sc)
775 {
776 	struct bio		*bp;
777 	int			queued = 0;
778 	struct blkfront_info *info = sc->xb_info;
779 	DPRINTK("");
780 
781 	mtx_assert(&blkif_io_lock, MA_OWNED);
782 
783 	while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) {
784 
785 		if (RING_FULL(&info->ring))
786 			goto wait;
787 
788 		if (blkif_queue_request(bp)) {
789 		wait:
790 			bioq_insert_head(&sc->xb_bioq, bp);
791 			break;
792 		}
793 		queued++;
794 	}
795 
796 	if (queued != 0)
797 		flush_requests(sc->xb_info);
798 }
799 
800 static void
801 blkif_int(void *xsc)
802 {
803 	struct xb_softc *sc = NULL;
804 	struct bio *bp;
805 	blkif_response_t *bret;
806 	RING_IDX i, rp;
807 	struct blkfront_info *info = xsc;
808 	DPRINTK("");
809 
810 	TRACE_ENTER;
811 
812 	mtx_lock(&blkif_io_lock);
813 
814 	if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
815 		mtx_unlock(&blkif_io_lock);
816 		return;
817 	}
818 
819  again:
820 	rp = info->ring.sring->rsp_prod;
821 	rmb(); /* Ensure we see queued responses up to 'rp'. */
822 
823 	for (i = info->ring.rsp_cons; i != rp; i++) {
824 		unsigned long id;
825 
826 		bret = RING_GET_RESPONSE(&info->ring, i);
827 		id   = bret->id;
828 		bp   = (struct bio *)info->shadow[id].request;
829 
830 		blkif_completion(&info->shadow[id]);
831 
832 		ADD_ID_TO_FREELIST(info, id);
833 
834 		switch (bret->operation) {
835 		case BLKIF_OP_READ:
836 			/* had an unaligned buffer that needs to be copied */
837 			if (bp->bio_driver1)
838 				bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount);
839 			/* FALLTHROUGH */
840 		case BLKIF_OP_WRITE:
841 
842 			/* free the copy buffer */
843 			if (bp->bio_driver1) {
844 				free(bp->bio_driver1, M_DEVBUF);
845 				bp->bio_driver1 = NULL;
846 			}
847 
848 			if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) {
849 					printf("Bad return from blkdev data request: %x\n",
850 					  bret->status);
851 				bp->bio_flags |= BIO_ERROR;
852 			}
853 
854 			sc = (struct xb_softc *)bp->bio_disk->d_drv1;
855 
856 			if (bp->bio_flags & BIO_ERROR)
857 				bp->bio_error = EIO;
858 			else
859 				bp->bio_resid = 0;
860 
861 			biodone(bp);
862 			break;
863 		default:
864 			panic("received invalid operation");
865 			break;
866 		}
867 	}
868 
869 	info->ring.rsp_cons = i;
870 
871 	if (i != info->ring.req_prod_pvt) {
872 		int more_to_do;
873 		RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
874 		if (more_to_do)
875 			goto again;
876 	} else {
877 		info->ring.sring->rsp_event = i + 1;
878 	}
879 
880 	kick_pending_request_queues(info);
881 
882 	mtx_unlock(&blkif_io_lock);
883 }
884 
885 static void
886 blkif_free(struct blkfront_info *info, int suspend)
887 {
888 
889 /* Prevent new requests being issued until we fix things up. */
890 	mtx_lock(&blkif_io_lock);
891 	info->connected = suspend ?
892 		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
893 	mtx_unlock(&blkif_io_lock);
894 
895 	/* Free resources associated with old device channel. */
896 	if (info->ring_ref != GRANT_INVALID_REF) {
897 		gnttab_end_foreign_access(info->ring_ref,
898 					  info->ring.sring);
899 		info->ring_ref = GRANT_INVALID_REF;
900 		info->ring.sring = NULL;
901 	}
902 	if (info->irq)
903 		unbind_from_irqhandler(info->irq, info);
904 	info->irq = 0;
905 
906 }
907 
908 static void
909 blkif_completion(struct blk_shadow *s)
910 {
911 	int i;
912 
913 	for (i = 0; i < s->req.nr_segments; i++)
914 		gnttab_end_foreign_access(s->req.seg[i].gref, 0UL);
915 }
916 
917 static void
918 blkif_recover(struct blkfront_info *info)
919 {
920 	int i, j;
921 	blkif_request_t *req;
922 	struct blk_shadow *copy;
923 
924 	/* Stage 1: Make a safe copy of the shadow state. */
925 	copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO);
926 	PANIC_IF(copy == NULL);
927 	memcpy(copy, info->shadow, sizeof(info->shadow));
928 
929 	/* Stage 2: Set up free list. */
930 	memset(&info->shadow, 0, sizeof(info->shadow));
931 	for (i = 0; i < BLK_RING_SIZE; i++)
932 		info->shadow[i].req.id = i+1;
933 	info->shadow_free = info->ring.req_prod_pvt;
934 	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
935 
936 	/* Stage 3: Find pending requests and requeue them. */
937 	for (i = 0; i < BLK_RING_SIZE; i++) {
938 		/* Not in use? */
939 		if (copy[i].request == 0)
940 			continue;
941 
942 		/* Grab a request slot and copy shadow state into it. */
943 		req = RING_GET_REQUEST(
944 			&info->ring, info->ring.req_prod_pvt);
945 		*req = copy[i].req;
946 
947 		/* We get a new request id, and must reset the shadow state. */
948 		req->id = GET_ID_FROM_FREELIST(info);
949 		memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
950 
951 		/* Rewrite any grant references invalidated by suspend/resume. */
952 		for (j = 0; j < req->nr_segments; j++)
953 			gnttab_grant_foreign_access_ref(
954 				req->seg[j].gref,
955 				info->xbdev->otherend_id,
956 				pfn_to_mfn(info->shadow[req->id].frame[j]),
957 				0 /* assume not readonly */);
958 
959 		info->shadow[req->id].req = *req;
960 
961 		info->ring.req_prod_pvt++;
962 	}
963 
964 	free(copy, M_DEVBUF);
965 
966 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
967 
968 	/* Now safe for us to use the shared ring */
969 	mtx_lock(&blkif_io_lock);
970 	info->connected = BLKIF_STATE_CONNECTED;
971 	mtx_unlock(&blkif_io_lock);
972 
973 	/* Send off requeued requests */
974 	mtx_lock(&blkif_io_lock);
975 	flush_requests(info);
976 
977 	/* Kick any other new requests queued since we resumed */
978 	kick_pending_request_queues(info);
979 	mtx_unlock(&blkif_io_lock);
980 }
981 
982 static int
983 blkfront_is_ready(struct xenbus_device *dev)
984 {
985 	struct blkfront_info *info = dev->dev_driver_data;
986 
987 	return info->is_ready;
988 }
989 
990 static struct xenbus_device_id blkfront_ids[] = {
991 	{ "vbd" },
992 	{ "" }
993 };
994 
995 
996 static struct xenbus_driver blkfront = {
997 	.name             = "vbd",
998 	.ids              = blkfront_ids,
999 	.probe            = blkfront_probe,
1000 	.remove           = blkfront_remove,
1001 	.resume           = blkfront_resume,
1002 	.otherend_changed = backend_changed,
1003 	.is_ready		  = blkfront_is_ready,
1004 };
1005 
1006 
1007 
1008 static void
1009 xenbus_init(void)
1010 {
1011 	xenbus_register_frontend(&blkfront);
1012 }
1013 
1014 MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */
1015 SYSINIT(xbdev, SI_SUB_PSEUDO, SI_ORDER_SECOND, xenbus_init, NULL);
1016 
1017 
1018 /*
1019  * Local variables:
1020  * mode: C
1021  * c-set-style: "BSD"
1022  * c-basic-offset: 8
1023  * tab-width: 4
1024  * indent-tabs-mode: t
1025  * End:
1026  */
1027