xref: /illumos-gate/usr/src/uts/common/xen/io/blk_common.c (revision 5422785d352a2bb398daceab3d1898a8aa64d006)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 
28 #include <sys/errno.h>
29 #include <sys/types.h>
30 #include <sys/conf.h>
31 #include <sys/kmem.h>
32 #include <sys/ddi.h>
33 #include <sys/stat.h>
34 #include <sys/sunddi.h>
35 #include <sys/file.h>
36 #include <sys/open.h>
37 #include <sys/modctl.h>
38 #include <sys/ddi_impldefs.h>
39 #include <sys/sysmacros.h>
40 #include <sys/ddidevmap.h>
41 #include <sys/xendev.h>
42 #include <public/io/protocols.h>
43 #include <xen/io/blkif_impl.h>
44 
45 #include "blk_common.h"
46 
47 
48 /* blk interface status */
49 enum blk_if_state {
50 	/*
51 	 * initial state
52 	 */
53 	BLK_IF_UNKNOWN = 0,
54 	/*
55 	 * frontend xenbus state changed to XenbusStateConnected,
56 	 * we finally connect
57 	 */
58 	BLK_IF_CONNECTED,
59 	/*
60 	 * frontend xenbus state changed to XenbusStateClosed,
61 	 * interface disconnected
62 	 */
63 	BLK_IF_DISCONNECTED
64 };
65 
66 /* backend device status */
67 enum blk_be_state {
68 	/* initial state */
69 	BLK_BE_UNKNOWN = 0,
70 	/* backend device is ready (hotplug script finishes successfully) */
71 	BLK_BE_READY
72 };
73 
74 /* frontend status */
75 enum blk_fe_state {
76 	/* initial state */
77 	BLK_FE_UNKNOWN = 0,
78 	/*
79 	 * frontend's xenbus state has changed to
80 	 * XenbusStateInitialised, is ready for connecting
81 	 */
82 	BLK_FE_READY
83 };
84 
85 typedef struct blk_ring_state_s {
86 	kmutex_t		rs_mutex;
87 	boolean_t		rs_sleeping_on_ring;
88 	boolean_t		rs_ring_up;
89 	kcondvar_t		rs_cv;
90 } blk_ring_state_t;
91 
92 /* Disk Statistics */
93 static char *blk_stats[] = {
94 	"rd_reqs",
95 	"wr_reqs",
96 	"br_reqs",
97 	"fl_reqs",
98 	"oo_reqs"
99 };
100 
101 typedef struct blk_stats_s {
102 	uint64_t bs_req_reads;
103 	uint64_t bs_req_writes;
104 	uint64_t bs_req_barriers;
105 	uint64_t bs_req_flushes;
106 } blk_stats_t;
107 
108 struct blk_ring_s {
109 	kmutex_t		ri_mutex;
110 	dev_info_t		*ri_dip;
111 
112 	kstat_t			*ri_kstats;
113 	blk_stats_t		ri_stats;
114 
115 	blk_intr_t		ri_intr;
116 	caddr_t			ri_intr_arg;
117 	blk_ring_cb_t		ri_ringup;
118 	caddr_t			ri_ringup_arg;
119 	blk_ring_cb_t		ri_ringdown;
120 	caddr_t			ri_ringdown_arg;
121 
122 	/* blk interface, backend, and frontend status */
123 	enum blk_if_state	ri_if_status;
124 	enum blk_be_state	ri_be_status;
125 	enum blk_fe_state	ri_fe_status;
126 
127 	domid_t			ri_fe;
128 
129 	enum blkif_protocol	ri_protocol;
130 	size_t			ri_nentry;
131 	size_t			ri_entrysize;
132 
133 	xendev_ring_t		*ri_ring;
134 	blk_ring_state_t	ri_state;
135 };
136 
137 
138 static void blk_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id,
139     void *arg, void *impl_data);
140 static void blk_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id,
141     void *arg, void *impl_data);
142 static int blk_check_state_transition(blk_ring_t ring, XenbusState oestate);
143 static int blk_start_connect(blk_ring_t ring);
144 static void blk_start_disconnect(blk_ring_t ring);
145 static void blk_ring_close(blk_ring_t ring);
146 static int blk_bindto_frontend(blk_ring_t ring);
147 static void blk_unbindfrom_frontend(blk_ring_t ring);
148 static uint_t blk_intr(caddr_t arg);
149 
150 static int blk_kstat_init(blk_ring_t ring);
151 static void blk_kstat_fini(blk_ring_t ring);
152 static int blk_kstat_update(kstat_t *ksp, int flag);
153 
154 static void blk_ring_request_32(blkif_request_t *dst,
155     blkif_x86_32_request_t *src);
156 static void blk_ring_request_64(blkif_request_t *dst,
157     blkif_x86_64_request_t *src);
158 
159 static void blk_ring_response_32(blkif_x86_32_response_t *dst,
160     blkif_response_t *src);
161 static void blk_ring_response_64(blkif_x86_64_response_t *dst,
162     blkif_response_t *src);
163 
164 
165 /*
166  * blk_ring_init()
167  */
168 int
169 blk_ring_init(blk_ringinit_args_t *args, blk_ring_t *ringp)
170 {
171 	blk_ring_t ring;
172 	int e;
173 
174 
175 	ring = kmem_zalloc(sizeof (struct blk_ring_s), KM_SLEEP);
176 	mutex_init(&ring->ri_mutex, NULL, MUTEX_DRIVER, NULL);
177 	ring->ri_dip = args->ar_dip;
178 	ring->ri_intr = args->ar_intr;
179 	ring->ri_intr_arg = args->ar_intr_arg;
180 	ring->ri_ringup = args->ar_ringup;
181 	ring->ri_ringup_arg = args->ar_ringup_arg;
182 	ring->ri_ringdown = args->ar_ringdown;
183 	ring->ri_ringdown_arg = args->ar_ringdown_arg;
184 
185 	ring->ri_if_status = BLK_IF_UNKNOWN;
186 	ring->ri_be_status = BLK_BE_UNKNOWN;
187 	ring->ri_fe_status = BLK_FE_UNKNOWN;
188 	ring->ri_state.rs_sleeping_on_ring = B_FALSE;
189 	ring->ri_state.rs_ring_up = B_FALSE;
190 
191 	mutex_init(&ring->ri_state.rs_mutex, NULL, MUTEX_DRIVER, NULL);
192 	cv_init(&ring->ri_state.rs_cv, NULL, CV_DRIVER, NULL);
193 
194 	e = blk_kstat_init(ring);
195 	if (e != DDI_SUCCESS) {
196 		goto ringinitfail_kstat;
197 	}
198 
199 	/* Watch frontend and hotplug state change */
200 	if (xvdi_add_event_handler(ring->ri_dip, XS_OE_STATE,
201 	    blk_oe_state_change, ring) != DDI_SUCCESS) {
202 		goto ringinitfail_oestate;
203 	}
204 	if (xvdi_add_event_handler(ring->ri_dip, XS_HP_STATE,
205 	    blk_hp_state_change, ring) != DDI_SUCCESS) {
206 		goto ringinitfail_hpstate;
207 	}
208 
209 	/*
210 	 * Kick-off hotplug script
211 	 */
212 	if (xvdi_post_event(ring->ri_dip, XEN_HP_ADD) != DDI_SUCCESS) {
213 		cmn_err(CE_WARN, "blk@%s: failed to start hotplug script",
214 		    ddi_get_name_addr(ring->ri_dip));
215 		goto ringinitfail_postevent;
216 	}
217 
218 	/*
219 	 * start waiting for hotplug event and otherend state event
220 	 * mainly for debugging, frontend will not take any op seeing this
221 	 */
222 	(void) xvdi_switch_state(ring->ri_dip, XBT_NULL, XenbusStateInitWait);
223 
224 	*ringp = ring;
225 	return (DDI_SUCCESS);
226 
227 ringinitfail_postevent:
228 	xvdi_remove_event_handler(ring->ri_dip, XS_HP_STATE);
229 ringinitfail_hpstate:
230 	xvdi_remove_event_handler(ring->ri_dip, XS_OE_STATE);
231 ringinitfail_oestate:
232 	blk_kstat_fini(ring);
233 ringinitfail_kstat:
234 	cv_destroy(&ring->ri_state.rs_cv);
235 	mutex_destroy(&ring->ri_state.rs_mutex);
236 	mutex_destroy(&ring->ri_mutex);
237 	kmem_free(ring, sizeof (struct blk_ring_s));
238 	return (DDI_FAILURE);
239 }
240 
241 
242 /*
243  * blk_ring_fini()
244  */
245 void
246 blk_ring_fini(blk_ring_t *ringp)
247 {
248 	blk_ring_t ring;
249 
250 
251 	ring = *ringp;
252 
253 	mutex_enter(&ring->ri_mutex);
254 	if (ring->ri_if_status != BLK_IF_DISCONNECTED) {
255 		blk_ring_close(ring);
256 	}
257 	mutex_exit(&ring->ri_mutex);
258 
259 	xvdi_remove_event_handler(ring->ri_dip, NULL);
260 	blk_kstat_fini(ring);
261 	cv_destroy(&ring->ri_state.rs_cv);
262 	mutex_destroy(&ring->ri_state.rs_mutex);
263 	mutex_destroy(&ring->ri_mutex);
264 	kmem_free(ring, sizeof (struct blk_ring_s));
265 
266 	*ringp = NULL;
267 }
268 
269 
270 /*
271  * blk_kstat_init()
272  */
273 static int
274 blk_kstat_init(blk_ring_t ring)
275 {
276 	int nstat = sizeof (blk_stats) / sizeof (blk_stats[0]);
277 	char **cp = blk_stats;
278 	kstat_named_t *knp;
279 
280 	ring->ri_kstats = kstat_create(ddi_get_name(ring->ri_dip),
281 	    ddi_get_instance(ring->ri_dip), "req_statistics", "block",
282 	    KSTAT_TYPE_NAMED, nstat, 0);
283 	if (ring->ri_kstats == NULL) {
284 		return (DDI_FAILURE);
285 	}
286 
287 	ring->ri_kstats->ks_private = ring;
288 	ring->ri_kstats->ks_update = blk_kstat_update;
289 
290 	knp = ring->ri_kstats->ks_data;
291 	while (nstat > 0) {
292 		kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
293 		knp++;
294 		cp++;
295 		nstat--;
296 	}
297 
298 	kstat_install(ring->ri_kstats);
299 
300 	return (DDI_SUCCESS);
301 }
302 
303 
304 /*
305  * blk_kstat_fini()
306  */
307 static void
308 blk_kstat_fini(blk_ring_t ring)
309 {
310 	kstat_delete(ring->ri_kstats);
311 }
312 
313 
314 /*
315  * blk_kstat_update()
316  */
317 static int
318 blk_kstat_update(kstat_t *ksp, int flag)
319 {
320 	kstat_named_t *knp;
321 	blk_stats_t *stats;
322 	blk_ring_t ring;
323 
324 
325 	if (flag != KSTAT_READ) {
326 		return (EACCES);
327 	}
328 
329 	ring = ksp->ks_private;
330 	stats = &ring->ri_stats;
331 	knp = ksp->ks_data;
332 
333 	/*
334 	 * Assignment order should match that of the names in
335 	 * blk_stats.
336 	 */
337 	(knp++)->value.ui64 = stats->bs_req_reads;
338 	(knp++)->value.ui64 = stats->bs_req_writes;
339 	(knp++)->value.ui64 = stats->bs_req_barriers;
340 	(knp++)->value.ui64 = stats->bs_req_flushes;
341 	(knp++)->value.ui64 = 0; /* oo_req */
342 
343 	return (0);
344 }
345 
346 
347 /*
348  * blk_oe_state_change()
349  */
350 /*ARGSUSED*/
351 static void
352 blk_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg,
353     void *impl_data)
354 {
355 	XenbusState new_state;
356 	blk_ring_t ring;
357 
358 
359 	ring = (blk_ring_t)arg;
360 	new_state = *(XenbusState *)impl_data;
361 
362 	mutex_enter(&ring->ri_mutex);
363 
364 	if (blk_check_state_transition(ring, new_state) == DDI_FAILURE) {
365 		mutex_exit(&ring->ri_mutex);
366 		return;
367 	}
368 
369 	switch (new_state) {
370 	case XenbusStateInitialised:
371 		ASSERT(ring->ri_if_status == BLK_IF_UNKNOWN);
372 
373 		/* frontend is ready for connecting */
374 		ring->ri_fe_status = BLK_FE_READY;
375 
376 		if (ring->ri_be_status == BLK_BE_READY) {
377 			mutex_exit(&ring->ri_mutex);
378 			if (blk_start_connect(ring) != DDI_SUCCESS)
379 				(void) blk_start_disconnect(ring);
380 			mutex_enter(&ring->ri_mutex);
381 		}
382 		break;
383 	case XenbusStateClosing:
384 		(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing);
385 		break;
386 	case XenbusStateClosed:
387 		/* clean up */
388 		(void) xvdi_post_event(ring->ri_dip, XEN_HP_REMOVE);
389 		if (ring->ri_ringdown != NULL) {
390 			(*(ring->ri_ringdown))(ring->ri_ringdown_arg);
391 		}
392 		blk_ring_close(ring);
393 
394 		/* reset state in case of reconnect */
395 		ring->ri_if_status = BLK_IF_UNKNOWN;
396 		ring->ri_be_status = BLK_BE_UNKNOWN;
397 		ring->ri_fe_status = BLK_FE_UNKNOWN;
398 		ring->ri_state.rs_sleeping_on_ring = B_FALSE;
399 		ring->ri_state.rs_ring_up = B_FALSE;
400 
401 		break;
402 	default:
403 		ASSERT(0);
404 	}
405 
406 	mutex_exit(&ring->ri_mutex);
407 }
408 
409 
410 /*
411  * blk_hp_state_change()
412  */
413 /*ARGSUSED*/
414 static void
415 blk_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg,
416     void *impl_data)
417 {
418 	xendev_hotplug_state_t hpstate;
419 	blk_ring_t ring;
420 
421 
422 	ring = (blk_ring_t)arg;
423 	hpstate = *(xendev_hotplug_state_t *)impl_data;
424 
425 	mutex_enter(&ring->ri_mutex);
426 	if (hpstate == Connected) {
427 		/* Hotplug script has completed successfully */
428 		if (ring->ri_be_status == BLK_BE_UNKNOWN) {
429 			ring->ri_be_status = BLK_BE_READY;
430 			if (ring->ri_fe_status == BLK_FE_READY) {
431 				mutex_exit(&ring->ri_mutex);
432 				/* try to connect to frontend */
433 				if (blk_start_connect(ring) != DDI_SUCCESS)
434 					(void) blk_start_disconnect(ring);
435 				mutex_enter(&ring->ri_mutex);
436 			}
437 		}
438 	}
439 	mutex_exit(&ring->ri_mutex);
440 }
441 
442 
443 /*
444  * blk_check_state_transition()
445  *    check the XenbusState change to see if the change is a valid transition
446  *    or not. The new state is written by frontend domain, or by running
447  *    xenstore-write to change it manually in dom0.
448  */
449 static int
450 blk_check_state_transition(blk_ring_t ring, XenbusState oestate)
451 {
452 	switch (ring->ri_if_status) {
453 	case BLK_IF_UNKNOWN:
454 		if (ring->ri_fe_status == BLK_FE_UNKNOWN) {
455 			if ((oestate == XenbusStateUnknown)		||
456 			    (oestate == XenbusStateConnected))
457 				goto statechkfail_bug;
458 			else if ((oestate == XenbusStateInitialising)	||
459 			    (oestate == XenbusStateInitWait))
460 				goto statechkfail_nop;
461 		} else {
462 			if ((oestate == XenbusStateUnknown)		||
463 			    (oestate == XenbusStateInitialising)	||
464 			    (oestate == XenbusStateInitWait)		||
465 			    (oestate == XenbusStateConnected))
466 				goto statechkfail_bug;
467 			else if (oestate == XenbusStateInitialised)
468 				goto statechkfail_nop;
469 		}
470 		break;
471 
472 	case BLK_IF_CONNECTED:
473 		if ((oestate == XenbusStateUnknown)		||
474 		    (oestate == XenbusStateInitialising)	||
475 		    (oestate == XenbusStateInitWait)		||
476 		    (oestate == XenbusStateInitialised))
477 			goto statechkfail_bug;
478 		else if (oestate == XenbusStateConnected)
479 			goto statechkfail_nop;
480 		break;
481 
482 	case BLK_IF_DISCONNECTED:
483 	default:
484 		goto statechkfail_bug;
485 	}
486 
487 	return (DDI_SUCCESS);
488 
489 statechkfail_bug:
490 	cmn_err(CE_NOTE, "blk@%s: unexpected otherend "
491 	    "state change to %d!, when status is %d",
492 	    ddi_get_name_addr(ring->ri_dip), oestate,
493 	    ring->ri_if_status);
494 
495 statechkfail_nop:
496 	return (DDI_FAILURE);
497 }
498 
499 
500 /*
501  * blk_start_connect()
502  *    Kick-off connect process
503  *    If ri_fe_status == BLK_FE_READY and ri_be_status == BLK_BE_READY
504  *    the ri_if_status will be changed to BLK_IF_CONNECTED on success,
505  *    otherwise, ri_if_status will not be changed
506  */
507 static int
508 blk_start_connect(blk_ring_t ring)
509 {
510 	xenbus_transaction_t xbt;
511 	dev_info_t *dip;
512 	char *barrier;
513 	char *xsnode;
514 	uint_t len;
515 	int e;
516 
517 
518 	dip = ring->ri_dip;
519 
520 	/*
521 	 * Start connect to frontend only when backend device are ready
522 	 * and frontend has moved to XenbusStateInitialised, which means
523 	 * ready to connect
524 	 */
525 	ASSERT(ring->ri_fe_status == BLK_FE_READY);
526 	ASSERT(ring->ri_be_status == BLK_BE_READY);
527 
528 	xsnode = xvdi_get_xsname(dip);
529 	if (xsnode == NULL) {
530 		goto startconnectfail_get_xsname;
531 	}
532 
533 	ring->ri_fe = xvdi_get_oeid(dip);
534 	if (ring->ri_fe == (domid_t)-1) {
535 		goto startconnectfail_get_oeid;
536 	}
537 
538 	e =  xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialised);
539 	if (e > 0) {
540 		goto startconnectfail_switch_init;
541 	}
542 
543 	e = blk_bindto_frontend(ring);
544 	if (e != DDI_SUCCESS) {
545 		goto startconnectfail_bindto_frontend;
546 	}
547 	ring->ri_if_status = BLK_IF_CONNECTED;
548 
549 	e = ddi_add_intr(dip, 0, NULL, NULL, blk_intr, (caddr_t)ring);
550 	if (e != DDI_SUCCESS) {
551 		goto startconnectfail_add_intr;
552 	}
553 
554 trans_retry:
555 	e = xenbus_transaction_start(&xbt);
556 	if (e != 0) {
557 		xvdi_fatal_error(dip, e, "transaction start");
558 		goto startconnectfail_transaction_start;
559 	}
560 
561 	/* xentop requires the instance in xenstore */
562 	e = xenbus_printf(xbt, xsnode, "instance", "%d",
563 	    ddi_get_instance(ring->ri_dip));
564 	if (e != 0) {
565 		cmn_err(CE_WARN, "xdb@%s: failed to write 'instance'",
566 		    ddi_get_name_addr(dip));
567 		xvdi_fatal_error(dip, e, "writing 'instance'");
568 		(void) xenbus_transaction_end(xbt, 1);
569 		goto startconnectfail_xenbus_printf;
570 	}
571 
572 	/* If feature-barrier isn't present in xenstore, add it */
573 	e = xenbus_read(xbt, xsnode, "feature-barrier", (void **)&barrier,
574 	    &len);
575 	if (e != 0) {
576 		e = xenbus_printf(xbt, xsnode, "feature-barrier", "%d", 1);
577 		if (e != 0) {
578 			cmn_err(CE_WARN, "xdb@%s: failed to write "
579 			    "'feature-barrier'", ddi_get_name_addr(dip));
580 			xvdi_fatal_error(dip, e, "writing 'feature-barrier'");
581 			(void) xenbus_transaction_end(xbt, 1);
582 			goto startconnectfail_xenbus_printf;
583 		}
584 	} else {
585 		kmem_free(barrier, len);
586 	}
587 
588 	e = xvdi_switch_state(dip, xbt, XenbusStateConnected);
589 	if (e > 0) {
590 		xvdi_fatal_error(dip, e, "writing 'state'");
591 		(void) xenbus_transaction_end(xbt, 1);
592 		goto startconnectfail_switch_connected;
593 	}
594 
595 	e = xenbus_transaction_end(xbt, 0);
596 	if (e != 0) {
597 		if (e == EAGAIN) {
598 			/* transaction is ended, don't need to abort it */
599 			goto trans_retry;
600 		}
601 		xvdi_fatal_error(dip, e, "completing transaction");
602 		goto startconnectfail_transaction_end;
603 	}
604 
605 	mutex_enter(&ring->ri_state.rs_mutex);
606 	ring->ri_state.rs_ring_up = B_TRUE;
607 	if (ring->ri_state.rs_sleeping_on_ring) {
608 		ring->ri_state.rs_sleeping_on_ring = B_FALSE;
609 		cv_signal(&ring->ri_state.rs_cv);
610 	}
611 	mutex_exit(&ring->ri_state.rs_mutex);
612 
613 	if (ring->ri_ringup != NULL) {
614 		(*(ring->ri_ringup))(ring->ri_ringup_arg);
615 	}
616 
617 	return (DDI_SUCCESS);
618 
619 
620 startconnectfail_transaction_end:
621 startconnectfail_switch_connected:
622 startconnectfail_xenbus_printf:
623 startconnectfail_transaction_start:
624 	ddi_remove_intr(dip, 0, NULL);
625 startconnectfail_add_intr:
626 	blk_unbindfrom_frontend(ring);
627 	ring->ri_fe = (domid_t)-1;
628 startconnectfail_bindto_frontend:
629 	(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
630 startconnectfail_switch_init:
631 startconnectfail_get_oeid:
632 startconnectfail_get_xsname:
633 	return (DDI_FAILURE);
634 }
635 
636 
637 /*
638  * blk_start_disconnect()
639  *    Kick-off disconnect process. ri_if_status will not be changed
640  */
641 static void
642 blk_start_disconnect(blk_ring_t ring)
643 {
644 	/* Kick-off disconnect process */
645 	(void) xvdi_switch_state(ring->ri_dip, XBT_NULL, XenbusStateClosing);
646 }
647 
648 
649 /*
650  * blk_ring_close()
651  *    Disconnect from frontend and close backend device
652  *    ifstatus will be changed to BLK_DISCONNECTED
653  *    Xenbus state will be changed to XenbusStateClosed
654  */
655 static void
656 blk_ring_close(blk_ring_t ring)
657 {
658 	dev_info_t *dip;
659 
660 
661 	/* mutex protect ri_if_status only here */
662 	ASSERT(MUTEX_HELD(&ring->ri_mutex));
663 
664 	dip = ring->ri_dip;
665 
666 	if (ring->ri_if_status != BLK_IF_CONNECTED) {
667 		return;
668 	}
669 
670 	ring->ri_if_status = BLK_IF_DISCONNECTED;
671 	mutex_exit(&ring->ri_mutex);
672 
673 	/* stop accepting I/O request from frontend */
674 	ddi_remove_intr(dip, 0, NULL);
675 
676 	blk_unbindfrom_frontend(ring);
677 	ring->ri_fe = (domid_t)-1;
678 	(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
679 	mutex_enter(&ring->ri_mutex);
680 }
681 
682 
683 /*
684  * blk_bindto_frontend()
685  */
686 static int
687 blk_bindto_frontend(blk_ring_t ring)
688 {
689 	evtchn_port_t evtchn;
690 	char protocol[64];
691 	grant_ref_t gref;
692 	dev_info_t *dip;
693 	char *oename;
694 	int e;
695 
696 
697 	dip = ring->ri_dip;
698 	protocol[0] = 0x0;
699 
700 	/*
701 	 * Gather info from frontend
702 	 */
703 	oename = xvdi_get_oename(dip);
704 	if (oename == NULL) {
705 		return (DDI_FAILURE);
706 	}
707 
708 	e = xenbus_gather(XBT_NULL, oename, "ring-ref", "%lu", &gref,
709 	    "event-channel", "%u", &evtchn, NULL);
710 	if (e != 0) {
711 		xvdi_fatal_error(dip, e,
712 		    "Getting ring-ref and evtchn from frontend");
713 		return (DDI_FAILURE);
714 	}
715 
716 	e = xenbus_gather(XBT_NULL, oename, "protocol", "%63s",
717 	    protocol, NULL);
718 	if (e != 0) {
719 		(void) strcpy(protocol, "unspecified, assuming native");
720 	} else if (strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE) == 0) {
721 		ring->ri_protocol = BLKIF_PROTOCOL_NATIVE;
722 		ring->ri_nentry = BLKIF_RING_SIZE;
723 		ring->ri_entrysize = sizeof (union blkif_sring_entry);
724 	} else if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_32) == 0) {
725 		ring->ri_protocol = BLKIF_PROTOCOL_X86_32;
726 		ring->ri_nentry = BLKIF_X86_32_RING_SIZE;
727 		ring->ri_entrysize = sizeof (union blkif_x86_32_sring_entry);
728 	} else if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_64) == 0) {
729 		ring->ri_protocol = BLKIF_PROTOCOL_X86_64;
730 		ring->ri_nentry = BLKIF_X86_64_RING_SIZE;
731 		ring->ri_entrysize = sizeof (union blkif_x86_64_sring_entry);
732 	} else {
733 		xvdi_fatal_error(dip, e, "unknown fe protocol");
734 		return (DDI_FAILURE);
735 	}
736 
737 	/*
738 	 * map and init ring
739 	 */
740 	e = xvdi_map_ring(dip, ring->ri_nentry, ring->ri_entrysize, gref,
741 	    &ring->ri_ring);
742 	if (e != DDI_SUCCESS) {
743 		return (DDI_FAILURE);
744 	}
745 
746 	/*
747 	 * bind event channel
748 	 */
749 	e = xvdi_bind_evtchn(dip, evtchn);
750 	if (e != DDI_SUCCESS) {
751 		xvdi_unmap_ring(ring->ri_ring);
752 		return (DDI_FAILURE);
753 	}
754 
755 
756 	return (DDI_SUCCESS);
757 }
758 
759 
760 /*
761  * blk_unbindfrom_frontend()
762  */
763 static void
764 blk_unbindfrom_frontend(blk_ring_t ring)
765 {
766 	xvdi_free_evtchn(ring->ri_dip);
767 	xvdi_unmap_ring(ring->ri_ring);
768 }
769 
770 
771 /*
772  * blk_intr()
773  */
774 static uint_t
775 blk_intr(caddr_t arg)
776 {
777 	blk_ring_t ring;
778 
779 	ring = (blk_ring_t)arg;
780 	if (ring->ri_if_status != BLK_IF_CONNECTED) {
781 		return (DDI_INTR_CLAIMED);
782 	}
783 
784 	(void) (*ring->ri_intr)(ring->ri_intr_arg);
785 	return (DDI_INTR_CLAIMED);
786 }
787 
788 
789 /*
790  * blk_ring_request_get()
791  */
792 boolean_t
793 blk_ring_request_get(blk_ring_t ring, blkif_request_t *req)
794 {
795 	blkif_request_t *src;
796 	blk_stats_t *stats;
797 
798 
799 	mutex_enter(&ring->ri_mutex);
800 
801 	if (ring->ri_if_status != BLK_IF_CONNECTED) {
802 		mutex_exit(&ring->ri_mutex);
803 		return (B_FALSE);
804 	}
805 
806 	src = xvdi_ring_get_request(ring->ri_ring);
807 	if (src == NULL) {
808 		mutex_exit(&ring->ri_mutex);
809 		return (B_FALSE);
810 	}
811 
812 	switch (ring->ri_protocol) {
813 	case BLKIF_PROTOCOL_NATIVE:
814 		bcopy(src, req, sizeof (*req));
815 		break;
816 	case BLKIF_PROTOCOL_X86_32:
817 		blk_ring_request_32(req, (blkif_x86_32_request_t *)src);
818 		break;
819 	case BLKIF_PROTOCOL_X86_64:
820 		blk_ring_request_64(req, (blkif_x86_64_request_t *)src);
821 		break;
822 	default:
823 		cmn_err(CE_WARN, "blkif@%s: unrecognised protocol: %d",
824 		    ddi_get_name_addr(ring->ri_dip),
825 		    ring->ri_protocol);
826 	}
827 	mutex_exit(&ring->ri_mutex);
828 
829 	stats = &ring->ri_stats;
830 	switch (req->operation) {
831 	case BLKIF_OP_READ:
832 		stats->bs_req_reads++;
833 		break;
834 	case BLKIF_OP_WRITE:
835 		stats->bs_req_writes++;
836 		break;
837 	case BLKIF_OP_WRITE_BARRIER:
838 		stats->bs_req_barriers++;
839 		break;
840 	case BLKIF_OP_FLUSH_DISKCACHE:
841 		stats->bs_req_flushes++;
842 		break;
843 	}
844 
845 	return (B_TRUE);
846 }
847 
848 
849 /*
850  * blk_ring_request_requeue()
851  *    if a request is requeued, caller will have to poll for request
852  *    later.
853  */
854 void
855 blk_ring_request_requeue(blk_ring_t ring)
856 {
857 	mutex_enter(&ring->ri_mutex);
858 
859 	if (ring->ri_if_status != BLK_IF_CONNECTED) {
860 		mutex_exit(&ring->ri_mutex);
861 		return;
862 	}
863 
864 	ring->ri_ring->xr_sring.br.req_cons--;
865 
866 	mutex_exit(&ring->ri_mutex);
867 }
868 
869 
870 /*
871  * blk_ring_response_put()
872  */
873 void
874 blk_ring_response_put(blk_ring_t ring, blkif_response_t *src)
875 {
876 	blkif_response_t *rsp;
877 	int e;
878 
879 
880 	mutex_enter(&ring->ri_mutex);
881 
882 	if (ring->ri_if_status != BLK_IF_CONNECTED) {
883 		mutex_exit(&ring->ri_mutex);
884 		return;
885 	}
886 
887 	rsp = xvdi_ring_get_response(ring->ri_ring);
888 	ASSERT(rsp);
889 
890 	switch (ring->ri_protocol) {
891 	case BLKIF_PROTOCOL_NATIVE:
892 		bcopy(src, rsp, sizeof (*rsp));
893 		break;
894 	case BLKIF_PROTOCOL_X86_32:
895 		blk_ring_response_32((blkif_x86_32_response_t *)rsp, src);
896 		break;
897 	case BLKIF_PROTOCOL_X86_64:
898 		blk_ring_response_64((blkif_x86_64_response_t *)rsp, src);
899 		break;
900 	default:
901 		cmn_err(CE_WARN, "blk@%s: unrecognised protocol: %d",
902 		    ddi_get_name_addr(ring->ri_dip),
903 		    ring->ri_protocol);
904 	}
905 
906 	e = xvdi_ring_push_response(ring->ri_ring);
907 	if (e != 0) {
908 		xvdi_notify_oe(ring->ri_dip);
909 	}
910 
911 	mutex_exit(&ring->ri_mutex);
912 }
913 
914 
915 /*
916  * blk_ring_request_32()
917  */
918 static void
919 blk_ring_request_32(blkif_request_t *dst, blkif_x86_32_request_t *src)
920 {
921 	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
922 	dst->operation = src->operation;
923 	dst->nr_segments = src->nr_segments;
924 	dst->handle = src->handle;
925 	dst->id = src->id;
926 	dst->sector_number = src->sector_number;
927 	if (n > src->nr_segments)
928 		n = src->nr_segments;
929 	for (i = 0; i < n; i++)
930 		dst->seg[i] = src->seg[i];
931 }
932 
933 
934 /*
935  * blk_ring_request_64()
936  */
937 static void
938 blk_ring_request_64(blkif_request_t *dst, blkif_x86_64_request_t *src)
939 {
940 	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
941 	dst->operation = src->operation;
942 	dst->nr_segments = src->nr_segments;
943 	dst->handle = src->handle;
944 	dst->id = src->id;
945 	dst->sector_number = src->sector_number;
946 	if (n > src->nr_segments)
947 		n = src->nr_segments;
948 	for (i = 0; i < n; i++)
949 		dst->seg[i] = src->seg[i];
950 }
951 
952 
953 /*
954  * blk_ring_response_32()
955  */
956 static void
957 blk_ring_response_32(blkif_x86_32_response_t *dst, blkif_response_t *src)
958 {
959 	dst->id = src->id;
960 	dst->operation = src->operation;
961 	dst->status = src->status;
962 }
963 
964 
965 /*
966  * blk_ring_response_64()
967  */
968 static void
969 blk_ring_response_64(blkif_x86_64_response_t *dst, blkif_response_t *src)
970 {
971 	dst->id = src->id;
972 	dst->operation = src->operation;
973 	dst->status = src->status;
974 }
975 
976 
977 /*
978  * blk_ring_request_dump()
979  */
980 void
981 blk_ring_request_dump(blkif_request_t *req)
982 {
983 	int i;
984 
985 	/*
986 	 * Exploit the public interface definitions for BLKIF_OP_READ
987 	 * etc..
988 	 */
989 	char *op_name[] = { "read", "write", "barrier", "flush" };
990 
991 	cmn_err(CE_NOTE, "   op=%s", op_name[req->operation]);
992 	cmn_err(CE_NOTE, "   num of segments=%d", req->nr_segments);
993 	cmn_err(CE_NOTE, "   handle=%d", req->handle);
994 	cmn_err(CE_NOTE, "   id=0x%llx", (unsigned long long)req->id);
995 	cmn_err(CE_NOTE, "   start sector=%llu",
996 	    (unsigned long long)req->sector_number);
997 	for (i = 0; i < req->nr_segments; i++) {
998 		cmn_err(CE_NOTE, "   gref=%d, first sec=%d,"
999 		    "last sec=%d", req->seg[i].gref, req->seg[i].first_sect,
1000 		    req->seg[i].last_sect);
1001 	}
1002 }
1003 
1004 
1005 /*
1006  * blk_ring_response_dump()
1007  */
1008 void
1009 blk_ring_response_dump(blkif_response_t *resp)
1010 {
1011 	/*
1012 	 * Exploit the public interface definitions for BLKIF_OP_READ
1013 	 * etc..
1014 	 */
1015 	char *op_name[] = { "read", "write", "barrier", "flush" };
1016 
1017 	cmn_err(CE_NOTE, "   op=%d:%s", resp->operation,
1018 	    op_name[resp->operation]);
1019 	cmn_err(CE_NOTE, "   op=%d", resp->operation);
1020 	cmn_err(CE_NOTE, "   status=%d", resp->status);
1021 }
1022