xref: /freebsd/sys/dev/xen/blkback/blkback.c (revision aa64588d28258aef88cc33b8043112e8856948d0)
1 /*
2  * Copyright (c) 2006, Cisco Systems, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors
15  *    may be used to endorse or promote products derived from this software
16  *    without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/mbuf.h>
37 #include <sys/malloc.h>
38 #include <sys/kernel.h>
39 #include <sys/socket.h>
40 #include <sys/queue.h>
41 #include <sys/taskqueue.h>
42 #include <sys/namei.h>
43 #include <sys/proc.h>
44 #include <sys/filedesc.h>
45 #include <sys/vnode.h>
46 #include <sys/fcntl.h>
47 #include <sys/disk.h>
48 #include <sys/bio.h>
49 
50 #include <sys/module.h>
51 #include <sys/bus.h>
52 #include <sys/sysctl.h>
53 
54 #include <geom/geom.h>
55 
56 #include <vm/vm_extern.h>
57 #include <vm/vm_kern.h>
58 
59 #include <machine/xen-os.h>
60 #include <machine/hypervisor.h>
61 #include <machine/hypervisor-ifs.h>
62 #include <machine/xen_intr.h>
63 #include <machine/evtchn.h>
64 #include <machine/xenbus.h>
65 #include <machine/gnttab.h>
66 #include <machine/xen-public/memory.h>
67 #include <dev/xen/xenbus/xenbus_comms.h>
68 
69 
70 #if XEN_BLKBACK_DEBUG
71 #define DPRINTF(fmt, args...) \
72     printf("blkback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
73 #else
74 #define DPRINTF(fmt, args...) ((void)0)
75 #endif
76 
77 #define WPRINTF(fmt, args...) \
78     printf("blkback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
79 
80 #define BLKBACK_INVALID_HANDLE (~0)
81 
82 struct ring_ref {
83 	vm_offset_t va;
84 	grant_handle_t handle;
85 	uint64_t bus_addr;
86 };
87 
88 typedef struct blkback_info {
89 
90 	/* Schedule lists */
91 	STAILQ_ENTRY(blkback_info) next_req;
92 	int on_req_sched_list;
93 
94 	struct xenbus_device *xdev;
95 	XenbusState frontend_state;
96 
97 	domid_t domid;
98 
99 	int state;
100 	int ring_connected;
101 	struct ring_ref rr;
102 	blkif_back_ring_t ring;
103 	evtchn_port_t evtchn;
104 	int irq;
105 	void *irq_cookie;
106 
107 	int ref_cnt;
108 
109 	int handle;
110 	char *mode;
111 	char *type;
112 	char *dev_name;
113 
114 	struct vnode *vn;
115 	struct cdev *cdev;
116 	struct cdevsw *csw;
117 	u_int sector_size;
118 	int sector_size_shift;
119 	off_t media_size;
120 	u_int media_num_sectors;
121 	int major;
122 	int minor;
123 	int read_only;
124 
125 	struct mtx blk_ring_lock;
126 
127 	device_t ndev;
128 
129 	/* Stats */
130 	int st_rd_req;
131 	int st_wr_req;
132 	int st_oo_req;
133 	int st_err_req;
134 } blkif_t;
135 
136 /*
137  * These are rather arbitrary. They are fairly large because adjacent requests
138  * pulled from a communication ring are quite likely to end up being part of
139  * the same scatter/gather request at the disc.
140  *
141  * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
142  *
143  * This will increase the chances of being able to write whole tracks.
144  * 64 should be enough to keep us competitive with Linux.
145  */
146 static int blkif_reqs = 64;
147 TUNABLE_INT("xen.vbd.blkif_reqs", &blkif_reqs);
148 
149 static int mmap_pages;
150 
151 /*
152  * Each outstanding request that we've passed to the lower device layers has a
153  * 'pending_req' allocated to it. Each buffer_head that completes decrements
154  * the pendcnt towards zero. When it hits zero, the specified domain has a
155  * response queued for it, with the saved 'id' passed back.
156  */
157 typedef struct pending_req {
158 	blkif_t       *blkif;
159 	uint64_t       id;
160 	int            nr_pages;
161 	int            pendcnt;
162 	unsigned short operation;
163 	int            status;
164 	STAILQ_ENTRY(pending_req) free_list;
165 } pending_req_t;
166 
167 static pending_req_t *pending_reqs;
168 static STAILQ_HEAD(pending_reqs_list, pending_req) pending_free =
169 	STAILQ_HEAD_INITIALIZER(pending_free);
170 static struct mtx pending_free_lock;
171 
172 static STAILQ_HEAD(blkback_req_sched_list, blkback_info) req_sched_list =
173 	STAILQ_HEAD_INITIALIZER(req_sched_list);
174 static struct mtx req_sched_list_lock;
175 
176 static unsigned long mmap_vstart;
177 static unsigned long *pending_vaddrs;
178 static grant_handle_t *pending_grant_handles;
179 
180 static struct task blk_req_task;
181 
182 /* Protos */
183 static void disconnect_ring(blkif_t *blkif);
184 static int vbd_add_dev(struct xenbus_device *xdev);
185 
186 static inline int vaddr_pagenr(pending_req_t *req, int seg)
187 {
188 	return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
189 }
190 
191 static inline unsigned long vaddr(pending_req_t *req, int seg)
192 {
193 	return pending_vaddrs[vaddr_pagenr(req, seg)];
194 }
195 
196 #define pending_handle(_req, _seg) \
197 	(pending_grant_handles[vaddr_pagenr(_req, _seg)])
198 
199 static unsigned long
200 alloc_empty_page_range(unsigned long nr_pages)
201 {
202 	void *pages;
203 	int i = 0, j = 0;
204 	multicall_entry_t mcl[17];
205 	unsigned long mfn_list[16];
206 	struct xen_memory_reservation reservation = {
207 		.extent_start = mfn_list,
208 		.nr_extents   = 0,
209 		.address_bits = 0,
210 		.extent_order = 0,
211 		.domid        = DOMID_SELF
212 	};
213 
214 	pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT);
215 	if (pages == NULL)
216 		return 0;
217 
218 	memset(mcl, 0, sizeof(mcl));
219 
220 	while (i < nr_pages) {
221 		unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE);
222 
223 		mcl[j].op = __HYPERVISOR_update_va_mapping;
224 		mcl[j].args[0] = va;
225 
226 		mfn_list[j++] = vtomach(va) >> PAGE_SHIFT;
227 
228 		xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY;
229 
230 		if (j == 16 || i == nr_pages) {
231 			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL;
232 
233 			reservation.nr_extents = j;
234 
235 			mcl[j].op = __HYPERVISOR_memory_op;
236 			mcl[j].args[0] = XENMEM_decrease_reservation;
237 			mcl[j].args[1] =  (unsigned long)&reservation;
238 
239 			(void)HYPERVISOR_multicall(mcl, j+1);
240 
241 			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0;
242 			j = 0;
243 		}
244 	}
245 
246 	return (unsigned long)pages;
247 }
248 
249 static pending_req_t *
250 alloc_req(void)
251 {
252 	pending_req_t *req;
253 	mtx_lock(&pending_free_lock);
254 	if ((req = STAILQ_FIRST(&pending_free))) {
255 		STAILQ_REMOVE(&pending_free, req, pending_req, free_list);
256 		STAILQ_NEXT(req, free_list) = NULL;
257 	}
258 	mtx_unlock(&pending_free_lock);
259 	return req;
260 }
261 
262 static void
263 free_req(pending_req_t *req)
264 {
265 	int was_empty;
266 
267 	mtx_lock(&pending_free_lock);
268 	was_empty = STAILQ_EMPTY(&pending_free);
269 	STAILQ_INSERT_TAIL(&pending_free, req, free_list);
270 	mtx_unlock(&pending_free_lock);
271 	if (was_empty)
272 		taskqueue_enqueue(taskqueue_swi, &blk_req_task);
273 }
274 
275 static void
276 fast_flush_area(pending_req_t *req)
277 {
278 	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
279 	unsigned int i, invcount = 0;
280 	grant_handle_t handle;
281 	int ret;
282 
283 	for (i = 0; i < req->nr_pages; i++) {
284 		handle = pending_handle(req, i);
285 		if (handle == BLKBACK_INVALID_HANDLE)
286 			continue;
287 		unmap[invcount].host_addr    = vaddr(req, i);
288 		unmap[invcount].dev_bus_addr = 0;
289 		unmap[invcount].handle       = handle;
290 		pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
291 		invcount++;
292 	}
293 
294 	ret = HYPERVISOR_grant_table_op(
295 		GNTTABOP_unmap_grant_ref, unmap, invcount);
296 	PANIC_IF(ret);
297 }
298 
299 static void
300 blkif_get(blkif_t *blkif)
301 {
302 	atomic_add_int(&blkif->ref_cnt, 1);
303 }
304 
305 static void
306 blkif_put(blkif_t *blkif)
307 {
308 	if (atomic_fetchadd_int(&blkif->ref_cnt, -1) == 1) {
309 		DPRINTF("Removing %x\n", (unsigned int)blkif);
310 		disconnect_ring(blkif);
311 		if (blkif->mode)
312 			free(blkif->mode, M_DEVBUF);
313 		if (blkif->type)
314 			free(blkif->type, M_DEVBUF);
315 		if (blkif->dev_name)
316 			free(blkif->dev_name, M_DEVBUF);
317 		free(blkif, M_DEVBUF);
318 	}
319 }
320 
321 static int
322 blkif_create(struct xenbus_device *xdev, long handle, char *mode, char *type, char *params)
323 {
324 	blkif_t *blkif;
325 
326 	blkif = (blkif_t *)malloc(sizeof(*blkif), M_DEVBUF, M_NOWAIT | M_ZERO);
327 	if (!blkif)
328 		return ENOMEM;
329 
330 	DPRINTF("Created %x\n", (unsigned int)blkif);
331 
332 	blkif->ref_cnt = 1;
333 	blkif->domid = xdev->otherend_id;
334 	blkif->handle = handle;
335 	blkif->mode = mode;
336 	blkif->type = type;
337 	blkif->dev_name = params;
338 	blkif->xdev = xdev;
339 	xdev->data = blkif;
340 
341 	mtx_init(&blkif->blk_ring_lock, "blk_ring_ock", "blkback ring lock", MTX_DEF);
342 
343 	if (strcmp(mode, "w"))
344 		blkif->read_only = 1;
345 
346 	return 0;
347 }
348 
349 static void
350 add_to_req_schedule_list_tail(blkif_t *blkif)
351 {
352 	if (!blkif->on_req_sched_list) {
353 		mtx_lock(&req_sched_list_lock);
354 		if (!blkif->on_req_sched_list && (blkif->state == XenbusStateConnected)) {
355 			blkif_get(blkif);
356 			STAILQ_INSERT_TAIL(&req_sched_list, blkif, next_req);
357 			blkif->on_req_sched_list = 1;
358 			taskqueue_enqueue(taskqueue_swi, &blk_req_task);
359 		}
360 		mtx_unlock(&req_sched_list_lock);
361 	}
362 }
363 
364 /* This routine does not call blkif_get(), does not schedule the blk_req_task to run,
365    and assumes that the state is connected */
366 static void
367 add_to_req_schedule_list_tail2(blkif_t *blkif)
368 {
369 	mtx_lock(&req_sched_list_lock);
370 	if (!blkif->on_req_sched_list) {
371 		STAILQ_INSERT_TAIL(&req_sched_list, blkif, next_req);
372 		blkif->on_req_sched_list = 1;
373 	}
374 	mtx_unlock(&req_sched_list_lock);
375 }
376 
377 /* Removes blkif from front of list and does not call blkif_put() (caller must) */
378 static blkif_t *
379 remove_from_req_schedule_list(void)
380 {
381 	blkif_t *blkif;
382 
383 	mtx_lock(&req_sched_list_lock);
384 
385 	if ((blkif = STAILQ_FIRST(&req_sched_list))) {
386 		STAILQ_REMOVE(&req_sched_list, blkif, blkback_info, next_req);
387 		STAILQ_NEXT(blkif, next_req) = NULL;
388 		blkif->on_req_sched_list = 0;
389 	}
390 
391 	mtx_unlock(&req_sched_list_lock);
392 
393 	return blkif;
394 }
395 
396 static void
397 make_response(blkif_t *blkif, uint64_t id,
398 			  unsigned short op, int st)
399 {
400 	blkif_response_t *resp;
401 	blkif_back_ring_t *blk_ring = &blkif->ring;
402 	int more_to_do = 0;
403 	int notify;
404 
405 	mtx_lock(&blkif->blk_ring_lock);
406 
407 
408 	/* Place on the response ring for the relevant domain. */
409 	resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
410 	resp->id        = id;
411 	resp->operation = op;
412 	resp->status    = st;
413 	blk_ring->rsp_prod_pvt++;
414 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify);
415 
416 	if (blk_ring->rsp_prod_pvt == blk_ring->req_cons) {
417 		/*
418 		 * Tail check for pending requests. Allows frontend to avoid
419 		 * notifications if requests are already in flight (lower
420 		 * overheads and promotes batching).
421 		 */
422 		RING_FINAL_CHECK_FOR_REQUESTS(blk_ring, more_to_do);
423 
424 	} else if (RING_HAS_UNCONSUMED_REQUESTS(blk_ring))
425 		more_to_do = 1;
426 
427 	mtx_unlock(&blkif->blk_ring_lock);
428 
429 	if (more_to_do)
430 		add_to_req_schedule_list_tail(blkif);
431 
432 	if (notify)
433 		notify_remote_via_irq(blkif->irq);
434 }
435 
436 static void
437 end_block_io_op(struct bio *bio)
438 {
439 	pending_req_t *pending_req = bio->bio_caller2;
440 
441 	if (bio->bio_error) {
442 		DPRINTF("BIO returned error %d for operation on device %s\n",
443 				bio->bio_error, pending_req->blkif->dev_name);
444 		pending_req->status = BLKIF_RSP_ERROR;
445 		pending_req->blkif->st_err_req++;
446 	}
447 
448 #if 0
449 	printf("done: bio=%x error=%x completed=%llu resid=%lu flags=%x\n",
450 		   (unsigned int)bio, bio->bio_error, bio->bio_completed, bio->bio_resid, bio->bio_flags);
451 #endif
452 
453 	if (atomic_fetchadd_int(&pending_req->pendcnt, -1) == 1) {
454 		fast_flush_area(pending_req);
455 		make_response(pending_req->blkif, pending_req->id,
456 			      pending_req->operation, pending_req->status);
457 		blkif_put(pending_req->blkif);
458 		free_req(pending_req);
459 	}
460 
461 	g_destroy_bio(bio);
462 }
463 
464 static void
465 dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req, pending_req_t *pending_req)
466 {
467 	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
468 	struct {
469 		unsigned long buf; unsigned int nsec;
470 	} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
471 	unsigned int nseg = req->nr_segments, nr_sects = 0;
472 	struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
473 	int operation, ret, i, nbio = 0;
474 
475 	/* Check that number of segments is sane. */
476 	if (unlikely(nseg == 0) ||
477 	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
478 		DPRINTF("Bad number of segments in request (%d)\n", nseg);
479 		goto fail_response;
480 	}
481 
482 	if (req->operation == BLKIF_OP_WRITE) {
483 		if (blkif->read_only) {
484 			DPRINTF("Attempt to write to read only device %s\n", blkif->dev_name);
485 			goto fail_response;
486 		}
487 		operation = BIO_WRITE;
488 	} else
489 		operation = BIO_READ;
490 
491 	pending_req->blkif     = blkif;
492 	pending_req->id        = req->id;
493 	pending_req->operation = req->operation;
494 	pending_req->status    = BLKIF_RSP_OKAY;
495 	pending_req->nr_pages  = nseg;
496 
497 	for (i = 0; i < nseg; i++) {
498 		seg[i].nsec = req->seg[i].last_sect -
499 			req->seg[i].first_sect + 1;
500 
501 		if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
502 		    (seg[i].nsec <= 0))
503 			goto fail_response;
504 		nr_sects += seg[i].nsec;
505 
506 		map[i].host_addr = vaddr(pending_req, i);
507 		map[i].dom = blkif->domid;
508 		map[i].ref = req->seg[i].gref;
509 		map[i].flags = GNTMAP_host_map;
510 		if (operation == BIO_WRITE)
511 			map[i].flags |= GNTMAP_readonly;
512 	}
513 
514 	/* Convert to the disk's sector size */
515 	nr_sects = (nr_sects << 9) >> blkif->sector_size_shift;
516 
517 	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
518 	PANIC_IF(ret);
519 
520 	for (i = 0; i < nseg; i++) {
521 		if (unlikely(map[i].status != 0)) {
522 			DPRINTF("invalid buffer -- could not remap it\n");
523 			goto fail_flush;
524 		}
525 
526 		pending_handle(pending_req, i) = map[i].handle;
527 #if 0
528 		/* Can't do this in FreeBSD since vtophys() returns the pfn */
529 		/* of the remote domain who loaned us the machine page - DPT */
530 		xen_phys_machine[(vtophys(vaddr(pending_req, i)) >> PAGE_SHIFT)] =
531 			map[i]dev_bus_addr >> PAGE_SHIFT;
532 #endif
533 		seg[i].buf  = map[i].dev_bus_addr |
534 			(req->seg[i].first_sect << 9);
535 	}
536 
537 	if (req->sector_number + nr_sects > blkif->media_num_sectors) {
538 		DPRINTF("%s of [%llu,%llu] extends past end of device %s\n",
539 			operation == BIO_READ ? "read" : "write",
540 			req->sector_number,
541 			req->sector_number + nr_sects, blkif->dev_name);
542 		goto fail_flush;
543 	}
544 
545 	for (i = 0; i < nseg; i++) {
546 		struct bio *bio;
547 
548 		if ((int)seg[i].nsec & ((blkif->sector_size >> 9) - 1)) {
549 			DPRINTF("Misaligned I/O request from domain %d", blkif->domid);
550 			goto fail_put_bio;
551 		}
552 
553 		bio = biolist[nbio++] = g_new_bio();
554 		if (unlikely(bio == NULL))
555 			goto fail_put_bio;
556 
557 		bio->bio_cmd = operation;
558 		bio->bio_offset = req->sector_number << blkif->sector_size_shift;
559 		bio->bio_length = seg[i].nsec << 9;
560 		bio->bio_bcount = bio->bio_length;
561 		bio->bio_data = (caddr_t)(vaddr(pending_req, i) | (seg[i].buf & PAGE_MASK));
562 		bio->bio_done = end_block_io_op;
563 		bio->bio_caller2 = pending_req;
564 		bio->bio_dev = blkif->cdev;
565 
566 		req->sector_number += (seg[i].nsec << 9) >> blkif->sector_size_shift;
567 #if 0
568 		printf("new: bio=%x cmd=%d sect=%llu nsect=%u iosize_max=%u @ %08lx\n",
569 			(unsigned int)bio, req->operation, req->sector_number, seg[i].nsec,
570 			blkif->cdev->si_iosize_max, seg[i].buf);
571 #endif
572 	}
573 
574 	pending_req->pendcnt = nbio;
575 	blkif_get(blkif);
576 
577 	for (i = 0; i < nbio; i++)
578 		(*blkif->csw->d_strategy)(biolist[i]);
579 
580 	return;
581 
582  fail_put_bio:
583 	for (i = 0; i < (nbio-1); i++)
584 		g_destroy_bio(biolist[i]);
585  fail_flush:
586 	fast_flush_area(pending_req);
587  fail_response:
588 	make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
589 	free_req(pending_req);
590 }
591 
592 static void
593 blk_req_action(void *context, int pending)
594 {
595 	blkif_t *blkif;
596 
597 	DPRINTF("\n");
598 
599 	while (!STAILQ_EMPTY(&req_sched_list)) {
600 		blkif_back_ring_t *blk_ring;
601 		RING_IDX rc, rp;
602 
603 		blkif = remove_from_req_schedule_list();
604 
605 		blk_ring = &blkif->ring;
606 		rc = blk_ring->req_cons;
607 		rp = blk_ring->sring->req_prod;
608 		rmb(); /* Ensure we see queued requests up to 'rp'. */
609 
610 		while ((rc != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, rc)) {
611 			blkif_request_t *req;
612 			pending_req_t *pending_req;
613 
614 			pending_req = alloc_req();
615 			if (pending_req == NULL)
616 				goto out_of_preqs;
617 
618 			req = RING_GET_REQUEST(blk_ring, rc);
619 			blk_ring->req_cons = ++rc; /* before make_response() */
620 
621 			switch (req->operation) {
622 			case BLKIF_OP_READ:
623 				blkif->st_rd_req++;
624 				dispatch_rw_block_io(blkif, req, pending_req);
625 				break;
626 			case BLKIF_OP_WRITE:
627 				blkif->st_wr_req++;
628 				dispatch_rw_block_io(blkif, req, pending_req);
629 				break;
630 			default:
631 				blkif->st_err_req++;
632 				DPRINTF("error: unknown block io operation [%d]\n",
633 						req->operation);
634 				make_response(blkif, req->id, req->operation,
635 							  BLKIF_RSP_ERROR);
636 				free_req(pending_req);
637 				break;
638 			}
639 		}
640 
641 		blkif_put(blkif);
642 	}
643 
644 	return;
645 
646  out_of_preqs:
647 	/* We ran out of pending req structs */
648 	/* Just requeue interface and wait to be rescheduled to run when one is freed */
649 	add_to_req_schedule_list_tail2(blkif);
650 	blkif->st_oo_req++;
651 }
652 
653 /* Handle interrupt from a frontend */
654 static void
655 blkback_intr(void *arg)
656 {
657 	blkif_t *blkif = arg;
658 	DPRINTF("%x\n", (unsigned int)blkif);
659 	add_to_req_schedule_list_tail(blkif);
660 }
661 
662 /* Map grant ref for ring */
663 static int
664 map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring)
665 {
666 	struct gnttab_map_grant_ref op;
667 
668 	ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
669 	if (ring->va == 0)
670 		return ENOMEM;
671 
672 	op.host_addr = ring->va;
673 	op.flags = GNTMAP_host_map;
674 	op.ref = ref;
675 	op.dom = dom;
676 	HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
677 	if (op.status) {
678 		WPRINTF("grant table op err=%d\n", op.status);
679 		kmem_free(kernel_map, ring->va, PAGE_SIZE);
680 		ring->va = 0;
681 		return EACCES;
682 	}
683 
684 	ring->handle = op.handle;
685 	ring->bus_addr = op.dev_bus_addr;
686 
687 	return 0;
688 }
689 
690 /* Unmap grant ref for ring */
691 static void
692 unmap_ring(struct ring_ref *ring)
693 {
694 	struct gnttab_unmap_grant_ref op;
695 
696 	op.host_addr = ring->va;
697 	op.dev_bus_addr = ring->bus_addr;
698 	op.handle = ring->handle;
699 	HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
700 	if (op.status)
701 		WPRINTF("grant table op err=%d\n", op.status);
702 
703 	kmem_free(kernel_map, ring->va, PAGE_SIZE);
704 	ring->va = 0;
705 }
706 
707 static int
708 connect_ring(blkif_t *blkif)
709 {
710 	struct xenbus_device *xdev = blkif->xdev;
711 	blkif_sring_t *ring;
712 	unsigned long ring_ref;
713 	evtchn_port_t evtchn;
714 	evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
715 	int err;
716 
717 	if (blkif->ring_connected)
718 		return 0;
719 
720 	// Grab FE data and map his memory
721 	err = xenbus_gather(NULL, xdev->otherend,
722 			"ring-ref", "%lu", &ring_ref,
723 		    "event-channel", "%u", &evtchn, NULL);
724 	if (err) {
725 		xenbus_dev_fatal(xdev, err,
726 			"reading %s/ring-ref and event-channel",
727 			xdev->otherend);
728 		return err;
729 	}
730 
731 	err = map_ring(ring_ref, blkif->domid, &blkif->rr);
732 	if (err) {
733 		xenbus_dev_fatal(xdev, err, "mapping ring");
734 		return err;
735 	}
736 	ring = (blkif_sring_t *)blkif->rr.va;
737 	BACK_RING_INIT(&blkif->ring, ring, PAGE_SIZE);
738 
739 	op.u.bind_interdomain.remote_dom = blkif->domid;
740 	op.u.bind_interdomain.remote_port = evtchn;
741 	err = HYPERVISOR_event_channel_op(&op);
742 	if (err) {
743 		unmap_ring(&blkif->rr);
744 		xenbus_dev_fatal(xdev, err, "binding event channel");
745 		return err;
746 	}
747 	blkif->evtchn = op.u.bind_interdomain.local_port;
748 
749 	/* bind evtchn to irq handler */
750 	blkif->irq =
751 		bind_evtchn_to_irqhandler(blkif->evtchn, "blkback",
752 			blkback_intr, blkif, INTR_TYPE_NET|INTR_MPSAFE, &blkif->irq_cookie);
753 
754 	blkif->ring_connected = 1;
755 
756 	DPRINTF("%x rings connected! evtchn=%d irq=%d\n",
757 			(unsigned int)blkif, blkif->evtchn, blkif->irq);
758 
759 	return 0;
760 }
761 
762 static void
763 disconnect_ring(blkif_t *blkif)
764 {
765 	DPRINTF("\n");
766 
767 	if (blkif->ring_connected) {
768 		unbind_from_irqhandler(blkif->irq, blkif->irq_cookie);
769 		blkif->irq = 0;
770 		unmap_ring(&blkif->rr);
771 		blkif->ring_connected = 0;
772 	}
773 }
774 
775 static void
776 connect(blkif_t *blkif)
777 {
778 	struct xenbus_transaction *xbt;
779 	struct xenbus_device *xdev = blkif->xdev;
780 	int err;
781 
782 	if (!blkif->ring_connected ||
783 		blkif->vn == NULL ||
784 		blkif->state == XenbusStateConnected)
785 		return;
786 
787 	DPRINTF("%s\n", xdev->otherend);
788 
789 	/* Supply the information about the device the frontend needs */
790 again:
791 	xbt = xenbus_transaction_start();
792 	if (IS_ERR(xbt)) {
793 		xenbus_dev_fatal(xdev, PTR_ERR(xbt),
794 						 "Error writing configuration for backend "
795 						 "(start transaction)");
796 		return;
797 	}
798 
799 	err = xenbus_printf(xbt, xdev->nodename, "sectors", "%u",
800 				blkif->media_num_sectors);
801 	if (err) {
802 		xenbus_dev_fatal(xdev, err, "writing %s/sectors",
803 				 xdev->nodename);
804 		goto abort;
805 	}
806 
807 	err = xenbus_printf(xbt, xdev->nodename, "info", "%u",
808 				blkif->read_only ? VDISK_READONLY : 0);
809 	if (err) {
810 		xenbus_dev_fatal(xdev, err, "writing %s/info",
811 				 xdev->nodename);
812 		goto abort;
813 	}
814 	err = xenbus_printf(xbt, xdev->nodename, "sector-size", "%u",
815 			    blkif->sector_size);
816 	if (err) {
817 		xenbus_dev_fatal(xdev, err, "writing %s/sector-size",
818 				 xdev->nodename);
819 		goto abort;
820 	}
821 
822 	err = xenbus_transaction_end(xbt, 0);
823 	if (err == -EAGAIN)
824 		goto again;
825 	if (err)
826 		xenbus_dev_fatal(xdev, err, "ending transaction");
827 
828 	err = xenbus_switch_state(xdev, NULL, XenbusStateConnected);
829 	if (err)
830 		xenbus_dev_fatal(xdev, err, "switching to Connected state",
831 				 xdev->nodename);
832 
833 	blkif->state = XenbusStateConnected;
834 
835 	return;
836 
837  abort:
838 	xenbus_transaction_end(xbt, 1);
839 }
840 
841 static int
842 blkback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id)
843 {
844 	int err;
845 	char *p, *mode = NULL, *type = NULL, *params = NULL;
846 	long handle;
847 
848 	DPRINTF("node=%s\n", xdev->nodename);
849 
850 	p = strrchr(xdev->otherend, '/') + 1;
851 	handle = strtoul(p, NULL, 0);
852 
853 	mode = xenbus_read(NULL, xdev->nodename, "mode", NULL);
854 	if (IS_ERR(mode)) {
855 		xenbus_dev_fatal(xdev, PTR_ERR(mode), "reading mode");
856 		err = PTR_ERR(mode);
857 		goto error;
858 	}
859 
860 	type = xenbus_read(NULL, xdev->nodename, "type", NULL);
861 	if (IS_ERR(type)) {
862 		xenbus_dev_fatal(xdev, PTR_ERR(type), "reading type");
863 		err = PTR_ERR(type);
864 		goto error;
865 	}
866 
867 	params = xenbus_read(NULL, xdev->nodename, "params", NULL);
868 	if (IS_ERR(type)) {
869 		xenbus_dev_fatal(xdev, PTR_ERR(params), "reading params");
870 		err = PTR_ERR(params);
871 		goto error;
872 	}
873 
874 	err = blkif_create(xdev, handle, mode, type, params);
875 	if (err) {
876 		xenbus_dev_fatal(xdev, err, "creating blkif");
877 		goto error;
878 	}
879 
880 	err = vbd_add_dev(xdev);
881 	if (err) {
882 		blkif_put((blkif_t *)xdev->data);
883 		xenbus_dev_fatal(xdev, err, "adding vbd device");
884 	}
885 
886 	return err;
887 
888  error:
889 	if (mode)
890 		free(mode, M_DEVBUF);
891 	if (type)
892 		free(type, M_DEVBUF);
893 	if (params)
894 		free(params, M_DEVBUF);
895 	return err;
896 }
897 
898 static int
899 blkback_remove(struct xenbus_device *xdev)
900 {
901 	blkif_t *blkif = xdev->data;
902 	device_t ndev;
903 
904 	DPRINTF("node=%s\n", xdev->nodename);
905 
906 	blkif->state = XenbusStateClosing;
907 
908 	if ((ndev = blkif->ndev)) {
909 		blkif->ndev = NULL;
910 		mtx_lock(&Giant);
911 		device_detach(ndev);
912 		mtx_unlock(&Giant);
913 	}
914 
915 	xdev->data = NULL;
916 	blkif->xdev = NULL;
917 	blkif_put(blkif);
918 
919 	return 0;
920 }
921 
922 static int
923 blkback_resume(struct xenbus_device *xdev)
924 {
925 	DPRINTF("node=%s\n", xdev->nodename);
926 	return 0;
927 }
928 
929 static void
930 frontend_changed(struct xenbus_device *xdev,
931 				 XenbusState frontend_state)
932 {
933 	blkif_t *blkif = xdev->data;
934 
935 	DPRINTF("state=%d\n", frontend_state);
936 
937 	blkif->frontend_state = frontend_state;
938 
939 	switch (frontend_state) {
940 	case XenbusStateInitialising:
941 		break;
942 	case XenbusStateInitialised:
943 	case XenbusStateConnected:
944 		connect_ring(blkif);
945 		connect(blkif);
946 		break;
947 	case XenbusStateClosing:
948 		xenbus_switch_state(xdev, NULL, XenbusStateClosing);
949 		break;
950 	case XenbusStateClosed:
951 		xenbus_remove_device(xdev);
952 		break;
953 	case XenbusStateUnknown:
954 	case XenbusStateInitWait:
955 		xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend",
956 						 frontend_state);
957 		break;
958 	}
959 }
960 
961 /* ** Driver registration ** */
962 
963 static struct xenbus_device_id blkback_ids[] = {
964 	{ "vbd" },
965 	{ "" }
966 };
967 
968 static struct xenbus_driver blkback = {
969 	.name = "blkback",
970 	.ids = blkback_ids,
971 	.probe = blkback_probe,
972 	.remove = blkback_remove,
973 	.resume = blkback_resume,
974 	.otherend_changed = frontend_changed,
975 };
976 
977 static void
978 blkback_init(void *unused)
979 {
980 	int i;
981 
982 	TASK_INIT(&blk_req_task, 0, blk_req_action, NULL);
983 	mtx_init(&req_sched_list_lock, "blk_req_sched_lock", "blkback req sched lock", MTX_DEF);
984 
985 	mtx_init(&pending_free_lock, "blk_pending_req_ock", "blkback pending request lock", MTX_DEF);
986 
987 	mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
988 	pending_reqs = malloc(sizeof(pending_reqs[0]) *
989 		blkif_reqs, M_DEVBUF, M_ZERO|M_NOWAIT);
990 	pending_grant_handles = malloc(sizeof(pending_grant_handles[0]) *
991 		mmap_pages, M_DEVBUF, M_NOWAIT);
992 	pending_vaddrs = malloc(sizeof(pending_vaddrs[0]) *
993 		mmap_pages, M_DEVBUF, M_NOWAIT);
994 	mmap_vstart = alloc_empty_page_range(mmap_pages);
995 	if (!pending_reqs || !pending_grant_handles || !pending_vaddrs || !mmap_vstart) {
996 		if (pending_reqs)
997 			free(pending_reqs, M_DEVBUF);
998 		if (pending_grant_handles)
999 			free(pending_grant_handles, M_DEVBUF);
1000 		if (pending_vaddrs)
1001 			free(pending_vaddrs, M_DEVBUF);
1002 		WPRINTF("out of memory\n");
1003 		return;
1004 	}
1005 
1006 	for (i = 0; i < mmap_pages; i++) {
1007 		pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
1008 		pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
1009 	}
1010 
1011 	for (i = 0; i < blkif_reqs; i++) {
1012 		STAILQ_INSERT_TAIL(&pending_free, &pending_reqs[i], free_list);
1013 	}
1014 
1015 	DPRINTF("registering %s\n", blkback.name);
1016 	xenbus_register_backend(&blkback);
1017 }
1018 
1019 SYSINIT(xbbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, blkback_init, NULL)
1020 
1021 static void
1022 close_device(blkif_t *blkif)
1023 {
1024 	DPRINTF("closing dev=%s\n", blkif->dev_name);
1025 	if (blkif->vn) {
1026 		int flags = FREAD;
1027 
1028 		if (!blkif->read_only)
1029 			flags |= FWRITE;
1030 
1031 		if (blkif->csw) {
1032 			dev_relthread(blkif->cdev);
1033 			blkif->csw = NULL;
1034 		}
1035 
1036 		(void)vn_close(blkif->vn, flags, NOCRED, curthread);
1037 		blkif->vn = NULL;
1038 	}
1039 }
1040 
1041 static int
1042 open_device(blkif_t *blkif)
1043 {
1044 	struct nameidata nd;
1045 	struct vattr vattr;
1046 	struct cdev *dev;
1047 	struct cdevsw *devsw;
1048 	int flags = FREAD, err = 0;
1049 
1050 	DPRINTF("opening dev=%s\n", blkif->dev_name);
1051 
1052 	if (!blkif->read_only)
1053 		flags |= FWRITE;
1054 
1055 	if (!curthread->td_proc->p_fd->fd_cdir) {
1056 		curthread->td_proc->p_fd->fd_cdir = rootvnode;
1057 		VREF(rootvnode);
1058 	}
1059 	if (!curthread->td_proc->p_fd->fd_rdir) {
1060 		curthread->td_proc->p_fd->fd_rdir = rootvnode;
1061 		VREF(rootvnode);
1062 	}
1063 	if (!curthread->td_proc->p_fd->fd_jdir) {
1064 		curthread->td_proc->p_fd->fd_jdir = rootvnode;
1065 		VREF(rootvnode);
1066 	}
1067 
1068  again:
1069 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, blkif->dev_name, curthread);
1070 	err = vn_open(&nd, &flags, 0, -1);
1071 	if (err) {
1072 		if (blkif->dev_name[0] != '/') {
1073 			char *dev_path = "/dev/";
1074 			char *dev_name;
1075 
1076 			/* Try adding device path at beginning of name */
1077 			dev_name = malloc(strlen(blkif->dev_name) + strlen(dev_path) + 1, M_DEVBUF, M_NOWAIT);
1078 			if (dev_name) {
1079 				sprintf(dev_name, "%s%s", dev_path, blkif->dev_name);
1080 				free(blkif->dev_name, M_DEVBUF);
1081 				blkif->dev_name = dev_name;
1082 				goto again;
1083 			}
1084 		}
1085 		xenbus_dev_fatal(blkif->xdev, err, "error opening device %s", blkif->dev_name);
1086 		return err;
1087 	}
1088 	NDFREE(&nd, NDF_ONLY_PNBUF);
1089 
1090 	blkif->vn = nd.ni_vp;
1091 
1092 	/* We only support disks for now */
1093 	if (!vn_isdisk(blkif->vn, &err)) {
1094 		xenbus_dev_fatal(blkif->xdev, err, "device %s is not a disk", blkif->dev_name);
1095 		VOP_UNLOCK(blkif->vn, 0, curthread);
1096 		goto error;
1097 	}
1098 
1099 	blkif->cdev = blkif->vn->v_rdev;
1100 	blkif->csw = dev_refthread(blkif->cdev);
1101 	PANIC_IF(blkif->csw == NULL);
1102 
1103 	err = VOP_GETATTR(blkif->vn, &vattr, NOCRED);
1104 	if (err) {
1105 		xenbus_dev_fatal(blkif->xdev, err,
1106 			"error getting vnode attributes for device %s", blkif->dev_name);
1107 		VOP_UNLOCK(blkif->vn, 0, curthread);
1108 		goto error;
1109 	}
1110 
1111 	VOP_UNLOCK(blkif->vn, 0, curthread);
1112 
1113 	dev = blkif->vn->v_rdev;
1114 	devsw = dev->si_devsw;
1115 	if (!devsw->d_ioctl) {
1116 		err = ENODEV;
1117 		xenbus_dev_fatal(blkif->xdev, err,
1118 			"no d_ioctl for device %s!", blkif->dev_name);
1119 		goto error;
1120 	}
1121 
1122 	err = (*devsw->d_ioctl)(dev, DIOCGSECTORSIZE, (caddr_t)&blkif->sector_size, FREAD, curthread);
1123 	if (err) {
1124 		xenbus_dev_fatal(blkif->xdev, err,
1125 			"error calling ioctl DIOCGSECTORSIZE for device %s", blkif->dev_name);
1126 		goto error;
1127 	}
1128 	blkif->sector_size_shift = fls(blkif->sector_size) - 1;
1129 
1130 	err = (*devsw->d_ioctl)(dev, DIOCGMEDIASIZE, (caddr_t)&blkif->media_size, FREAD, curthread);
1131 	if (err) {
1132 		xenbus_dev_fatal(blkif->xdev, err,
1133 			"error calling ioctl DIOCGMEDIASIZE for device %s", blkif->dev_name);
1134 		goto error;
1135 	}
1136 	blkif->media_num_sectors = blkif->media_size >> blkif->sector_size_shift;
1137 
1138 	blkif->major = major(vattr.va_rdev);
1139 	blkif->minor = minor(vattr.va_rdev);
1140 
1141 	DPRINTF("opened dev=%s major=%d minor=%d sector_size=%u media_size=%lld\n",
1142 			blkif->dev_name, blkif->major, blkif->minor, blkif->sector_size, blkif->media_size);
1143 
1144 	return 0;
1145 
1146  error:
1147 	close_device(blkif);
1148 	return err;
1149 }
1150 
1151 static int
1152 vbd_add_dev(struct xenbus_device *xdev)
1153 {
1154 	blkif_t *blkif = xdev->data;
1155 	device_t nexus, ndev;
1156 	devclass_t dc;
1157 	int err = 0;
1158 
1159 	mtx_lock(&Giant);
1160 
1161 	/* We will add a vbd device as a child of nexus0 (for now) */
1162 	if (!(dc = devclass_find("nexus")) ||
1163 		!(nexus = devclass_get_device(dc, 0))) {
1164 		WPRINTF("could not find nexus0!\n");
1165 		err = ENOENT;
1166 		goto done;
1167 	}
1168 
1169 
1170 	/* Create a newbus device representing the vbd */
1171 	ndev = BUS_ADD_CHILD(nexus, 0, "vbd", blkif->handle);
1172 	if (!ndev) {
1173 		WPRINTF("could not create newbus device vbd%d!\n", blkif->handle);
1174 		err = EFAULT;
1175 		goto done;
1176 	}
1177 
1178 	blkif_get(blkif);
1179 	device_set_ivars(ndev, blkif);
1180 	blkif->ndev = ndev;
1181 
1182 	device_probe_and_attach(ndev);
1183 
1184  done:
1185 
1186 	mtx_unlock(&Giant);
1187 
1188 	return err;
1189 }
1190 
1191 enum {
1192 	VBD_SYSCTL_DOMID,
1193 	VBD_SYSCTL_ST_RD_REQ,
1194 	VBD_SYSCTL_ST_WR_REQ,
1195 	VBD_SYSCTL_ST_OO_REQ,
1196 	VBD_SYSCTL_ST_ERR_REQ,
1197 	VBD_SYSCTL_RING,
1198 };
1199 
1200 static char *
1201 vbd_sysctl_ring_info(blkif_t *blkif, int cmd)
1202 {
1203 	char *buf = malloc(256, M_DEVBUF, M_WAITOK);
1204 	if (buf) {
1205 		if (!blkif->ring_connected)
1206 			sprintf(buf, "ring not connected\n");
1207 		else {
1208 			blkif_back_ring_t *ring = &blkif->ring;
1209 			sprintf(buf, "nr_ents=%x req_cons=%x"
1210 					" req_prod=%x req_event=%x"
1211 					" rsp_prod=%x rsp_event=%x",
1212 					ring->nr_ents, ring->req_cons,
1213 					ring->sring->req_prod, ring->sring->req_event,
1214 					ring->sring->rsp_prod, ring->sring->rsp_event);
1215 		}
1216 	}
1217 	return buf;
1218 }
1219 
1220 static int
1221 vbd_sysctl_handler(SYSCTL_HANDLER_ARGS)
1222 {
1223 	device_t dev = (device_t)arg1;
1224 	blkif_t *blkif = (blkif_t *)device_get_ivars(dev);
1225 	const char *value;
1226 	char *buf = NULL;
1227 	int err;
1228 
1229 	switch (arg2) {
1230 	case VBD_SYSCTL_DOMID:
1231 		return sysctl_handle_int(oidp, NULL, blkif->domid, req);
1232 	case VBD_SYSCTL_ST_RD_REQ:
1233 		return sysctl_handle_int(oidp, NULL, blkif->st_rd_req, req);
1234 	case VBD_SYSCTL_ST_WR_REQ:
1235 		return sysctl_handle_int(oidp, NULL, blkif->st_wr_req, req);
1236 	case VBD_SYSCTL_ST_OO_REQ:
1237 		return sysctl_handle_int(oidp, NULL, blkif->st_oo_req, req);
1238 	case VBD_SYSCTL_ST_ERR_REQ:
1239 		return sysctl_handle_int(oidp, NULL, blkif->st_err_req, req);
1240 	case VBD_SYSCTL_RING:
1241 		value = buf = vbd_sysctl_ring_info(blkif, arg2);
1242 		break;
1243 	default:
1244 		return (EINVAL);
1245 	}
1246 
1247 	err = SYSCTL_OUT(req, value, strlen(value));
1248 	if (buf != NULL)
1249 		free(buf, M_DEVBUF);
1250 
1251 	return err;
1252 }
1253 
1254 /* Newbus vbd device driver probe */
1255 static int
1256 vbd_probe(device_t dev)
1257 {
1258 	DPRINTF("vbd%d\n", device_get_unit(dev));
1259 	return 0;
1260 }
1261 
1262 /* Newbus vbd device driver attach */
1263 static int
1264 vbd_attach(device_t dev)
1265 {
1266 	blkif_t *blkif = (blkif_t *)device_get_ivars(dev);
1267 
1268 	DPRINTF("%s\n", blkif->dev_name);
1269 
1270 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1271 	    OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD,
1272 	    dev, VBD_SYSCTL_DOMID, vbd_sysctl_handler, "I",
1273 	    "domid of frontend");
1274 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1275 	    OID_AUTO, "rd_reqs", CTLTYPE_INT|CTLFLAG_RD,
1276 	    dev, VBD_SYSCTL_ST_RD_REQ, vbd_sysctl_handler, "I",
1277 	    "number of read reqs");
1278 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1279 	    OID_AUTO, "wr_reqs", CTLTYPE_INT|CTLFLAG_RD,
1280 	    dev, VBD_SYSCTL_ST_WR_REQ, vbd_sysctl_handler, "I",
1281 	    "number of write reqs");
1282 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1283 	    OID_AUTO, "oo_reqs", CTLTYPE_INT|CTLFLAG_RD,
1284 	    dev, VBD_SYSCTL_ST_OO_REQ, vbd_sysctl_handler, "I",
1285 	    "number of deferred reqs");
1286 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1287 	    OID_AUTO, "err_reqs", CTLTYPE_INT|CTLFLAG_RD,
1288 	    dev, VBD_SYSCTL_ST_ERR_REQ, vbd_sysctl_handler, "I",
1289 	    "number of reqs that returned error");
1290 #if XEN_BLKBACK_DEBUG
1291 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1292 	    OID_AUTO, "ring", CTLFLAG_RD,
1293 	    dev, VBD_SYSCTL_RING, vbd_sysctl_handler, "A",
1294 	    "req ring info");
1295 #endif
1296 
1297 	if (!open_device(blkif))
1298 		connect(blkif);
1299 
1300 	return bus_generic_attach(dev);
1301 }
1302 
1303 /* Newbus vbd device driver detach */
1304 static int
1305 vbd_detach(device_t dev)
1306 {
1307 	blkif_t *blkif = (blkif_t *)device_get_ivars(dev);
1308 
1309 	DPRINTF("%s\n", blkif->dev_name);
1310 
1311 	close_device(blkif);
1312 
1313 	bus_generic_detach(dev);
1314 
1315 	blkif_put(blkif);
1316 
1317 	return 0;
1318 }
1319 
1320 static device_method_t vbd_methods[] = {
1321 	/* Device interface */
1322 	DEVMETHOD(device_probe,		vbd_probe),
1323 	DEVMETHOD(device_attach, 	vbd_attach),
1324 	DEVMETHOD(device_detach,	vbd_detach),
1325 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
1326 	DEVMETHOD(device_suspend,	bus_generic_suspend),
1327 	DEVMETHOD(device_resume,	bus_generic_resume),
1328 	{0, 0}
1329 };
1330 
1331 static devclass_t vbd_devclass;
1332 
1333 static driver_t vbd_driver = {
1334 	"vbd",
1335 	vbd_methods,
1336 	0,
1337 };
1338 
1339 DRIVER_MODULE(vbd, nexus, vbd_driver, vbd_devclass, 0, 0);
1340 
1341 /*
1342  * Local variables:
1343  * mode: C
1344  * c-set-style: "BSD"
1345  * c-basic-offset: 4
1346  * tab-width: 4
1347  * indent-tabs-mode: t
1348  * End:
1349  */
1350