xref: /titanic_52/usr/src/uts/common/xen/io/xenbus_xs.c (revision 4c4c91100283a5863df30701c15987cf4d7ff9db)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  *
29  * xenbus_xs.c
30  *
31  * This is the kernel equivalent of the "xs" library.  We don't need everything
32  * and we use xenbus_comms for communication.
33  *
34  * Copyright (C) 2005 Rusty Russell, IBM Corporation
35  *
36  * This file may be distributed separately from the Linux kernel, or
37  * incorporated into other software packages, subject to the following license:
38  *
39  * Permission is hereby granted, free of charge, to any person obtaining a copy
40  * of this source file (the "Software"), to deal in the Software without
41  * restriction, including without limitation the rights to use, copy, modify,
42  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
43  * and to permit persons to whom the Software is furnished to do so, subject to
44  * the following conditions:
45  *
46  * The above copyright notice and this permission notice shall be included in
47  * all copies or substantial portions of the Software.
48  *
49  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
50  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
51  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
52  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
53  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
54  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
55  * IN THE SOFTWARE.
56  */
57 
58 /*
59  * NOTE: To future maintainers of the Solaris version of this file:
60  * I found the Linux version of this code to be very disgusting in
61  * overloading pointers and error codes into void * return values.
62  * The main difference you will find is that all such usage is changed
63  * to pass pointers to void* to be filled in with return values and
64  * the functions return error codes.
65  */
66 
67 #pragma ident	"%Z%%M%	%I%	%E% SMI"
68 
69 #include <sys/errno.h>
70 #include <sys/types.h>
71 #include <sys/sysmacros.h>
72 #include <sys/uio.h>
73 #include <sys/mutex.h>
74 #include <sys/condvar.h>
75 #include <sys/rwlock.h>
76 #include <sys/disp.h>
77 #include <sys/ddi.h>
78 #include <sys/sunddi.h>
79 #include <sys/avintr.h>
80 #include <sys/cmn_err.h>
81 #include <util/sscanf.h>
82 #define	_XSD_ERRORS_DEFINED
83 #include <sys/hypervisor.h>
84 #include <sys/mach_mmu.h>
85 #include <sys/taskq.h>
86 #include <sys/sdt.h>
87 #include <xen/sys/xenbus_impl.h>
88 #include <xen/sys/xenbus_comms.h>
89 #include <xen/sys/xendev.h>
90 #include <xen/public/io/xs_wire.h>
91 
92 #define	streq(a, b) (strcmp((a), (b)) == 0)
93 
94 #define	list_empty(list) (list_head(list) == NULL)
95 
96 struct xs_stored_msg {
97 	list_t list;
98 
99 	struct xsd_sockmsg hdr;
100 
101 	union {
102 		/* Queued replies. */
103 		struct {
104 			char *body;
105 		} reply;
106 
107 		/* Queued watch events. */
108 		struct {
109 			struct xenbus_watch *handle;
110 			char **vec;
111 			unsigned int vec_size;
112 		} watch;
113 	} un;
114 };
115 
116 static struct xs_handle {
117 	/* A list of replies. Currently only one will ever be outstanding. */
118 	list_t reply_list;
119 	kmutex_t reply_lock;
120 	kcondvar_t reply_cv;
121 
122 	/* One request at a time. */
123 	kmutex_t request_mutex;
124 
125 	/* Protect transactions against save/restore. */
126 	krwlock_t suspend_lock;
127 } xs_state;
128 
129 static int last_req_id;
130 
131 /*
132  * List of clients wanting a xenstore up notification, and a lock to protect it
133  */
134 static boolean_t xenstore_up;
135 static list_t notify_list;
136 static kmutex_t notify_list_lock;
137 static taskq_t *xenbus_taskq;
138 
139 /* List of registered watches, and a lock to protect it. */
140 static list_t watches;
141 static kmutex_t watches_lock;
142 
143 /* List of pending watch callback events, and a lock to protect it. */
144 static list_t watch_events;
145 static kmutex_t watch_events_lock;
146 
147 /*
148  * Details of the xenwatch callback kernel thread. The thread waits on the
149  * watch_events_cv for work to do (queued on watch_events list). When it
150  * wakes up it acquires the xenwatch_mutex before reading the list and
151  * carrying out work.
152  */
153 static kmutex_t xenwatch_mutex;
154 static kcondvar_t watch_events_cv;
155 
156 static int process_msg(void);
157 
158 static int
159 get_error(const char *errorstring)
160 {
161 	unsigned int i;
162 
163 	for (i = 0; !streq(errorstring, xsd_errors[i].errstring); i++) {
164 		if (i == (sizeof (xsd_errors) / sizeof (xsd_errors[0])) - 1) {
165 			cmn_err(CE_WARN,
166 			    "XENBUS xen store gave: unknown error %s",
167 			    errorstring);
168 			return (EINVAL);
169 		}
170 	}
171 	return (xsd_errors[i].errnum);
172 }
173 
174 /*
175  * Read a synchronous reply from xenstore.  Since we can return early before
176  * reading a relevant reply, we discard any messages not matching the request
177  * ID.  Caller must free returned message on success.
178  */
179 static int
180 read_reply(struct xsd_sockmsg *req_hdr, struct xs_stored_msg **reply)
181 {
182 	extern int do_polled_io;
183 
184 	mutex_enter(&xs_state.reply_lock);
185 
186 	for (;;) {
187 		while (list_empty(&xs_state.reply_list)) {
188 			if (interrupts_unleashed && !do_polled_io) {
189 				if (cv_wait_sig(&xs_state.reply_cv,
190 				    &xs_state.reply_lock) == 0) {
191 					mutex_exit(&xs_state.reply_lock);
192 					*reply = NULL;
193 					return (EINTR);
194 				}
195 			} else { /* polled mode needed for early probes */
196 				mutex_exit(&xs_state.reply_lock);
197 				(void) HYPERVISOR_yield();
198 				(void) process_msg();
199 				mutex_enter(&xs_state.reply_lock);
200 			}
201 		}
202 
203 		*reply = list_head(&xs_state.reply_list);
204 		list_remove(&xs_state.reply_list, *reply);
205 
206 		if ((*reply)->hdr.req_id == req_hdr->req_id)
207 			break;
208 	}
209 
210 	mutex_exit(&xs_state.reply_lock);
211 	return (0);
212 }
213 
214 /* Emergency write. */
215 void
216 xenbus_debug_write(const char *str, unsigned int count)
217 {
218 	struct xsd_sockmsg msg = { 0 };
219 
220 	msg.type = XS_DEBUG;
221 	msg.len = sizeof ("print") + count + 1;
222 
223 	mutex_enter(&xs_state.request_mutex);
224 	(void) xb_write(&msg, sizeof (msg));
225 	(void) xb_write("print", sizeof ("print"));
226 	(void) xb_write(str, count);
227 	(void) xb_write("", 1);
228 	mutex_exit(&xs_state.request_mutex);
229 }
230 
231 /*
232  * This is pretty unpleasant.  First off, there's the horrible logic around
233  * suspend_lock and transactions.  Also, we can be interrupted either before we
234  * write a message, or before we receive a reply.  A client that wants to
235  * survive this can't know which case happened.  Luckily all clients don't care
236  * about signals currently, and the alternative (a hard wait on a userspace
237  * daemon) isn't exactly preferable.  Caller must free 'reply' on success.
238  */
239 int
240 xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **reply)
241 {
242 	struct xsd_sockmsg req_msg = *msg;
243 	struct xs_stored_msg *reply_msg = NULL;
244 	int err;
245 
246 	if (req_msg.type == XS_TRANSACTION_START)
247 		rw_enter(&xs_state.suspend_lock, RW_READER);
248 
249 	mutex_enter(&xs_state.request_mutex);
250 
251 	msg->req_id = last_req_id++;
252 
253 	err = xb_write(msg, sizeof (*msg) + msg->len);
254 	if (err) {
255 		if (req_msg.type == XS_TRANSACTION_START)
256 			rw_exit(&xs_state.suspend_lock);
257 		msg->type = XS_ERROR;
258 		*reply = NULL;
259 		goto out;
260 	}
261 
262 	err = read_reply(msg, &reply_msg);
263 
264 	if (err) {
265 		if (msg->type == XS_TRANSACTION_START)
266 			rw_exit(&xs_state.suspend_lock);
267 		*reply = NULL;
268 		goto out;
269 	}
270 
271 	*reply = reply_msg->un.reply.body;
272 	*msg = reply_msg->hdr;
273 
274 	if (reply_msg->hdr.type == XS_TRANSACTION_END)
275 		rw_exit(&xs_state.suspend_lock);
276 
277 out:
278 	if (reply_msg != NULL)
279 		kmem_free(reply_msg, sizeof (*reply_msg));
280 
281 	mutex_exit(&xs_state.request_mutex);
282 	return (err);
283 }
284 
285 /*
286  * Send message to xs, return errcode, rval filled in with pointer
287  * to kmem_alloc'ed reply.
288  */
289 static int
290 xs_talkv(xenbus_transaction_t t,
291 		    enum xsd_sockmsg_type type,
292 		    const iovec_t *iovec,
293 		    unsigned int num_vecs,
294 		    void **rval,
295 		    unsigned int *len)
296 {
297 	struct xsd_sockmsg msg;
298 	struct xs_stored_msg *reply_msg;
299 	char *reply;
300 	unsigned int i;
301 	int err;
302 
303 	msg.tx_id = (uint32_t)(unsigned long)t;
304 	msg.type = type;
305 	msg.len = 0;
306 	for (i = 0; i < num_vecs; i++)
307 		msg.len += iovec[i].iov_len;
308 
309 	mutex_enter(&xs_state.request_mutex);
310 
311 	msg.req_id = last_req_id++;
312 
313 	err = xb_write(&msg, sizeof (msg));
314 	if (err) {
315 		mutex_exit(&xs_state.request_mutex);
316 		return (err);
317 	}
318 
319 	for (i = 0; i < num_vecs; i++) {
320 		err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
321 		if (err) {
322 			mutex_exit(&xs_state.request_mutex);
323 			return (err);
324 		}
325 	}
326 
327 	err = read_reply(&msg, &reply_msg);
328 
329 	mutex_exit(&xs_state.request_mutex);
330 
331 	if (err)
332 		return (err);
333 
334 	reply = reply_msg->un.reply.body;
335 
336 	if (reply_msg->hdr.type == XS_ERROR) {
337 		err = get_error(reply);
338 		kmem_free(reply, reply_msg->hdr.len + 1);
339 		goto out;
340 	}
341 
342 	if (len != NULL)
343 		*len = reply_msg->hdr.len + 1;
344 
345 	ASSERT(reply_msg->hdr.type == type);
346 
347 	if (rval != NULL)
348 		*rval = reply;
349 	else
350 		kmem_free(reply, reply_msg->hdr.len + 1);
351 
352 out:
353 	kmem_free(reply_msg, sizeof (*reply_msg));
354 	return (err);
355 }
356 
357 /* Simplified version of xs_talkv: single message. */
358 static int
359 xs_single(xenbus_transaction_t t,
360 			enum xsd_sockmsg_type type,
361 			const char *string, void **ret,
362 			unsigned int *len)
363 {
364 	iovec_t iovec;
365 
366 	iovec.iov_base = (char *)string;
367 	iovec.iov_len = strlen(string) + 1;
368 	return (xs_talkv(t, type, &iovec, 1, ret, len));
369 }
370 
371 static unsigned int
372 count_strings(const char *strings, unsigned int len)
373 {
374 	unsigned int num;
375 	const char *p;
376 
377 	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
378 		num++;
379 
380 	return (num);
381 }
382 
383 /* Return the path to dir with /name appended. Buffer must be kmem_free()'ed */
384 static char *
385 join(const char *dir, const char *name)
386 {
387 	char *buffer;
388 	size_t slashlen;
389 
390 	slashlen = streq(name, "") ? 0 : 1;
391 	buffer = kmem_alloc(strlen(dir) + slashlen + strlen(name) + 1,
392 	    KM_SLEEP);
393 
394 	(void) strcpy(buffer, dir);
395 	if (slashlen != 0) {
396 		(void) strcat(buffer, "/");
397 		(void) strcat(buffer, name);
398 	}
399 	return (buffer);
400 }
401 
402 static char **
403 split(char *strings, unsigned int len, unsigned int *num)
404 {
405 	char *p, **ret;
406 
407 	/* Count the strings. */
408 	if ((*num = count_strings(strings, len - 1)) == 0)
409 		return (NULL);
410 
411 	/* Transfer to one big alloc for easy freeing. */
412 	ret = kmem_alloc(*num * sizeof (char *) + (len - 1), KM_SLEEP);
413 	(void) memcpy(&ret[*num], strings, len - 1);
414 	kmem_free(strings, len);
415 
416 	strings = (char *)&ret[*num];
417 	for (p = strings, *num = 0; p < strings + (len - 1);
418 	    p += strlen(p) + 1) {
419 		ret[(*num)++] = p;
420 	}
421 
422 	return (ret);
423 }
424 
425 char **
426 xenbus_directory(xenbus_transaction_t t,
427 			const char *dir, const char *node, unsigned int *num)
428 {
429 	char *strings, *path;
430 	unsigned int len;
431 	int err;
432 
433 	path = join(dir, node);
434 	err = xs_single(t, XS_DIRECTORY, path, (void **)&strings, &len);
435 	kmem_free(path, strlen(path) + 1);
436 	if (err != 0 || strings == NULL) {
437 		/* sigh, we lose error code info here */
438 		*num = 0;
439 		return (NULL);
440 	}
441 
442 	return (split(strings, len, num));
443 }
444 
445 /* Check if a path exists. Return 1 if it does. */
446 int
447 xenbus_exists(xenbus_transaction_t t, const char *dir, const char *node)
448 {
449 	char **d;
450 	unsigned int dir_n;
451 	int i, len;
452 
453 	d = xenbus_directory(t, dir, node, &dir_n);
454 	if (d == NULL)
455 		return (0);
456 	for (i = 0, len = 0; i < dir_n; i++)
457 		len += strlen(d[i]) + 1 + sizeof (char *);
458 	kmem_free(d, len);
459 	return (1);
460 }
461 
462 /*
463  * Get the value of a single file.
464  * Returns a kmem_alloced value in retp: call kmem_free() on it after use.
465  * len indicates length in bytes.
466  */
467 int
468 xenbus_read(xenbus_transaction_t t,
469 	    const char *dir, const char *node, void **retp, unsigned int *len)
470 {
471 	char *path;
472 	int err;
473 
474 	path = join(dir, node);
475 	err = xs_single(t, XS_READ, path, retp, len);
476 	kmem_free(path, strlen(path) + 1);
477 	return (err);
478 }
479 
480 /*
481  * Write the value of a single file.
482  * Returns err on failure.
483  */
484 int
485 xenbus_write(xenbus_transaction_t t,
486 		const char *dir, const char *node, const char *string)
487 {
488 	char *path;
489 	iovec_t iovec[2];
490 	int ret;
491 
492 	path = join(dir, node);
493 
494 	iovec[0].iov_base = (void *)path;
495 	iovec[0].iov_len = strlen(path) + 1;
496 	iovec[1].iov_base = (void *)string;
497 	iovec[1].iov_len = strlen(string);
498 
499 	ret = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
500 	kmem_free(path, iovec[0].iov_len);
501 	return (ret);
502 }
503 
504 /* Create a new directory. */
505 int
506 xenbus_mkdir(xenbus_transaction_t t, const char *dir, const char *node)
507 {
508 	char *path;
509 	int ret;
510 
511 	path = join(dir, node);
512 	ret = xs_single(t, XS_MKDIR, path, NULL, NULL);
513 	kmem_free(path, strlen(path) + 1);
514 	return (ret);
515 }
516 
517 /* Destroy a file or directory (directories must be empty). */
518 int
519 xenbus_rm(xenbus_transaction_t t, const char *dir, const char *node)
520 {
521 	char *path;
522 	int ret;
523 
524 	path = join(dir, node);
525 	ret = xs_single(t, XS_RM, path, NULL, NULL);
526 	kmem_free(path, strlen(path) + 1);
527 	return (ret);
528 }
529 
530 /*
531  * Start a transaction: changes by others will not be seen during this
532  * transaction, and changes will not be visible to others until end.
533  */
534 int
535 xenbus_transaction_start(xenbus_transaction_t *t)
536 {
537 	void *id_str;
538 	unsigned long id;
539 	int err;
540 	unsigned int len;
541 
542 	rw_enter(&xs_state.suspend_lock, RW_READER);
543 
544 	err = xs_single(XBT_NULL, XS_TRANSACTION_START, "", &id_str, &len);
545 	if (err) {
546 		rw_exit(&xs_state.suspend_lock);
547 		return (err);
548 	}
549 
550 	(void) ddi_strtoul((char *)id_str, NULL, 0, &id);
551 	*t = (xenbus_transaction_t)id;
552 	kmem_free(id_str, len);
553 
554 	return (0);
555 }
556 
557 /*
558  * End a transaction.
559  * If abandon is true, transaction is discarded instead of committed.
560  */
561 int
562 xenbus_transaction_end(xenbus_transaction_t t, int abort)
563 {
564 	char abortstr[2];
565 	int err;
566 
567 	if (abort)
568 		(void) strcpy(abortstr, "F");
569 	else
570 		(void) strcpy(abortstr, "T");
571 
572 	err = xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL);
573 
574 	rw_exit(&xs_state.suspend_lock);
575 
576 	return (err);
577 }
578 
579 /*
580  * Single read and scanf: returns errno or 0.  This can only handle a single
581  * conversion specifier.
582  */
583 /* SCANFLIKE4 */
584 int
585 xenbus_scanf(xenbus_transaction_t t,
586 		const char *dir, const char *node, const char *fmt, ...)
587 {
588 	va_list ap;
589 	int ret;
590 	char *val;
591 	unsigned int len;
592 
593 	ret = xenbus_read(t, dir, node, (void **)&val, &len);
594 	if (ret)
595 		return (ret);
596 
597 	va_start(ap, fmt);
598 	if (vsscanf(val, fmt, ap) != 1)
599 		ret = ERANGE;
600 	va_end(ap);
601 	kmem_free(val, len);
602 	return (ret);
603 }
604 
605 /* Single printf and write: returns errno or 0. */
606 /* PRINTFLIKE4 */
607 int
608 xenbus_printf(xenbus_transaction_t t,
609 		const char *dir, const char *node, const char *fmt, ...)
610 {
611 	va_list ap;
612 	int ret;
613 #define	PRINTF_BUFFER_SIZE 4096
614 	char *printf_buffer;
615 
616 	printf_buffer = kmem_alloc(PRINTF_BUFFER_SIZE, KM_SLEEP);
617 
618 	va_start(ap, fmt);
619 	ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
620 	va_end(ap);
621 
622 	ASSERT(ret <= PRINTF_BUFFER_SIZE-1);
623 	ret = xenbus_write(t, dir, node, printf_buffer);
624 
625 	kmem_free(printf_buffer, PRINTF_BUFFER_SIZE);
626 
627 	return (ret);
628 }
629 
630 
631 /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
632 int
633 xenbus_gather(xenbus_transaction_t t, const char *dir, ...)
634 {
635 	va_list ap;
636 	const char *name;
637 	int ret = 0;
638 	unsigned int len;
639 
640 	va_start(ap, dir);
641 	while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
642 		const char *fmt = va_arg(ap, char *);
643 		void *result = va_arg(ap, void *);
644 		char *p;
645 
646 		ret = xenbus_read(t, dir, name, (void **)&p, &len);
647 		if (ret)
648 			break;
649 		if (fmt) {
650 			ASSERT(result != NULL);
651 			if (sscanf(p, fmt, result) != 1)
652 				ret = EINVAL;
653 			kmem_free(p, len);
654 		} else
655 			*(char **)result = p;
656 	}
657 	va_end(ap);
658 	return (ret);
659 }
660 
661 static int
662 xs_watch(const char *path, const char *token)
663 {
664 	iovec_t iov[2];
665 
666 	iov[0].iov_base = (void *)path;
667 	iov[0].iov_len = strlen(path) + 1;
668 	iov[1].iov_base = (void *)token;
669 	iov[1].iov_len = strlen(token) + 1;
670 
671 	return (xs_talkv(XBT_NULL, XS_WATCH, iov, 2, NULL, NULL));
672 }
673 
674 static int
675 xs_unwatch(const char *path, const char *token)
676 {
677 	iovec_t iov[2];
678 
679 	iov[0].iov_base = (char *)path;
680 	iov[0].iov_len = strlen(path) + 1;
681 	iov[1].iov_base = (char *)token;
682 	iov[1].iov_len = strlen(token) + 1;
683 
684 	return (xs_talkv(XBT_NULL, XS_UNWATCH, iov, 2, NULL, NULL));
685 }
686 
687 static struct xenbus_watch *
688 find_watch(const char *token)
689 {
690 	struct xenbus_watch *i, *cmp;
691 
692 	(void) ddi_strtoul(token, NULL, 16, (unsigned long *)&cmp);
693 
694 	for (i = list_head(&watches); i != NULL; i = list_next(&watches, i))
695 		if (i == cmp)
696 			break;
697 
698 	return (i);
699 }
700 
701 /* Register a xenstore state notify callback */
702 int
703 xs_register_xenbus_callback(void (*callback)(int))
704 {
705 	struct xenbus_notify *xbn, *xnp;
706 
707 	xbn = kmem_alloc(sizeof (struct xenbus_notify), KM_SLEEP);
708 	xbn->notify_func = callback;
709 	mutex_enter(&notify_list_lock);
710 	/*
711 	 * Make sure not already on the list
712 	 */
713 	xnp = list_head(&notify_list);
714 	for (; xnp != NULL; xnp = list_next(&notify_list, xnp)) {
715 		if (xnp->notify_func == callback) {
716 			kmem_free(xbn, sizeof (struct xenbus_notify));
717 			mutex_exit(&notify_list_lock);
718 			return (EEXIST);
719 		}
720 	}
721 	xnp = xbn;
722 	list_insert_tail(&notify_list, xbn);
723 done:
724 	if (xenstore_up)
725 		xnp->notify_func(XENSTORE_UP);
726 	mutex_exit(&notify_list_lock);
727 	return (0);
728 }
729 
730 /*
731  * Notify clients of xenstore state
732  */
733 static void
734 do_notify_callbacks(void *arg)
735 {
736 	struct xenbus_notify *xnp;
737 
738 	mutex_enter(&notify_list_lock);
739 	xnp = list_head(&notify_list);
740 	for (; xnp != NULL; xnp = list_next(&notify_list, xnp)) {
741 		xnp->notify_func((int)((uintptr_t)arg));
742 	}
743 	mutex_exit(&notify_list_lock);
744 }
745 
746 void
747 xs_notify_xenstore_up(void)
748 {
749 	xenstore_up = B_TRUE;
750 	(void) taskq_dispatch(xenbus_taskq, do_notify_callbacks,
751 	    (void *)XENSTORE_UP, 0);
752 }
753 
754 void
755 xs_notify_xenstore_down(void)
756 {
757 	xenstore_up = B_FALSE;
758 	(void) taskq_dispatch(xenbus_taskq, do_notify_callbacks,
759 	    (void *)XENSTORE_DOWN, 0);
760 }
761 
762 /* Register callback to watch this node. */
763 int
764 register_xenbus_watch(struct xenbus_watch *watch)
765 {
766 	/* Pointer in ascii is the token. */
767 	char token[sizeof (watch) * 2 + 1];
768 	int err;
769 
770 	ASSERT(xenstore_up);
771 	(void) snprintf(token, sizeof (token), "%lX", (long)watch);
772 
773 	rw_enter(&xs_state.suspend_lock, RW_READER);
774 
775 	mutex_enter(&watches_lock);
776 	/*
777 	 * May be re-registering a watch if xenstore daemon was restarted
778 	 */
779 	if (find_watch(token) == NULL)
780 		list_insert_tail(&watches, watch);
781 	mutex_exit(&watches_lock);
782 
783 	DTRACE_XPV3(xenbus__register__watch, const char *, watch->node,
784 	    uintptr_t, watch->callback, struct xenbus_watch *, watch);
785 
786 	err = xs_watch(watch->node, token);
787 
788 	/* Ignore errors due to multiple registration. */
789 	if ((err != 0) && (err != EEXIST)) {
790 		mutex_enter(&watches_lock);
791 		list_remove(&watches, watch);
792 		mutex_exit(&watches_lock);
793 	}
794 
795 	rw_exit(&xs_state.suspend_lock);
796 
797 	return (err);
798 }
799 
800 static void
801 free_stored_msg(struct xs_stored_msg *msg)
802 {
803 	int i, len = 0;
804 
805 	for (i = 0; i < msg->un.watch.vec_size; i++)
806 		len += strlen(msg->un.watch.vec[i]) + 1 + sizeof (char *);
807 	kmem_free(msg->un.watch.vec, len);
808 	kmem_free(msg, sizeof (*msg));
809 }
810 
811 void
812 unregister_xenbus_watch(struct xenbus_watch *watch)
813 {
814 	struct xs_stored_msg *msg;
815 	char token[sizeof (watch) * 2 + 1];
816 	int err;
817 
818 	(void) snprintf(token, sizeof (token), "%lX", (long)watch);
819 
820 	rw_enter(&xs_state.suspend_lock, RW_READER);
821 
822 	mutex_enter(&watches_lock);
823 	ASSERT(find_watch(token));
824 	list_remove(&watches, watch);
825 	mutex_exit(&watches_lock);
826 
827 	DTRACE_XPV3(xenbus__unregister__watch, const char *, watch->node,
828 	    uintptr_t, watch->callback, struct xenbus_watch *, watch);
829 
830 	err = xs_unwatch(watch->node, token);
831 	if (err)
832 		cmn_err(CE_WARN, "XENBUS Failed to release watch %s: %d",
833 		    watch->node, err);
834 
835 	rw_exit(&xs_state.suspend_lock);
836 
837 	/* Cancel pending watch events. */
838 	mutex_enter(&watch_events_lock);
839 	msg = list_head(&watch_events);
840 
841 	while (msg != NULL) {
842 		struct xs_stored_msg *tmp = list_next(&watch_events, msg);
843 		if (msg->un.watch.handle == watch) {
844 			list_remove(&watch_events, msg);
845 			free_stored_msg(msg);
846 		}
847 		msg = tmp;
848 	}
849 
850 	mutex_exit(&watch_events_lock);
851 
852 	/* Flush any currently-executing callback, unless we are it. :-) */
853 	if (mutex_owner(&xenwatch_mutex) != curthread) {
854 		mutex_enter(&xenwatch_mutex);
855 		mutex_exit(&xenwatch_mutex);
856 	}
857 }
858 
859 void
860 xenbus_suspend(void)
861 {
862 	rw_enter(&xs_state.suspend_lock, RW_WRITER);
863 	mutex_enter(&xs_state.request_mutex);
864 
865 	xb_suspend();
866 }
867 
868 void
869 xenbus_resume(void)
870 {
871 	struct xenbus_watch *watch;
872 	char token[sizeof (watch) * 2 + 1];
873 
874 	mutex_exit(&xs_state.request_mutex);
875 
876 	xb_init();
877 	xb_setup_intr();
878 
879 	/* No need for watches_lock: the suspend_lock is sufficient. */
880 	for (watch = list_head(&watches); watch != NULL;
881 	    watch = list_next(&watches, watch)) {
882 		(void) snprintf(token, sizeof (token), "%lX", (long)watch);
883 		(void) xs_watch(watch->node, token);
884 	}
885 
886 	rw_exit(&xs_state.suspend_lock);
887 }
888 
889 static void
890 xenwatch_thread(void)
891 {
892 	struct xs_stored_msg *msg;
893 	struct xenbus_watch *watch;
894 
895 	for (;;) {
896 		mutex_enter(&watch_events_lock);
897 		while (list_empty(&watch_events))
898 			cv_wait(&watch_events_cv, &watch_events_lock);
899 		msg = list_head(&watch_events);
900 		ASSERT(msg != NULL);
901 		list_remove(&watch_events, msg);
902 		watch = msg->un.watch.handle;
903 		mutex_exit(&watch_events_lock);
904 
905 		mutex_enter(&xenwatch_mutex);
906 
907 		DTRACE_XPV4(xenbus__fire__watch,
908 		    const char *, watch->node,
909 		    uintptr_t, watch->callback,
910 		    struct xenbus_watch *, watch,
911 		    const char *, msg->un.watch.vec[XS_WATCH_PATH]);
912 
913 		watch->callback(watch, (const char **)msg->un.watch.vec,
914 		    msg->un.watch.vec_size);
915 
916 		free_stored_msg(msg);
917 		mutex_exit(&xenwatch_mutex);
918 	}
919 }
920 
921 static int
922 process_msg(void)
923 {
924 	struct xs_stored_msg *msg;
925 	char *body;
926 	int err, mlen;
927 
928 	msg = kmem_alloc(sizeof (*msg), KM_SLEEP);
929 
930 	err = xb_read(&msg->hdr, sizeof (msg->hdr));
931 	if (err) {
932 		kmem_free(msg, sizeof (*msg));
933 		return (err);
934 	}
935 
936 	mlen = msg->hdr.len + 1;
937 	body = kmem_alloc(mlen, KM_SLEEP);
938 
939 	err = xb_read(body, msg->hdr.len);
940 	if (err) {
941 		kmem_free(body, mlen);
942 		kmem_free(msg, sizeof (*msg));
943 		return (err);
944 	}
945 
946 	body[mlen - 1] = '\0';
947 
948 	if (msg->hdr.type == XS_WATCH_EVENT) {
949 		const char *token;
950 		msg->un.watch.vec = split(body, msg->hdr.len + 1,
951 		    &msg->un.watch.vec_size);
952 		if (msg->un.watch.vec == NULL) {
953 			kmem_free(msg, sizeof (*msg));
954 			return (EIO);
955 		}
956 
957 		mutex_enter(&watches_lock);
958 		token = msg->un.watch.vec[XS_WATCH_TOKEN];
959 		if ((msg->un.watch.handle = find_watch(token)) != NULL) {
960 			mutex_enter(&watch_events_lock);
961 
962 			DTRACE_XPV4(xenbus__enqueue__watch,
963 			    const char *, msg->un.watch.handle->node,
964 			    uintptr_t, msg->un.watch.handle->callback,
965 			    struct xenbus_watch *, msg->un.watch.handle,
966 			    const char *, msg->un.watch.vec[XS_WATCH_PATH]);
967 
968 			list_insert_tail(&watch_events, msg);
969 			cv_broadcast(&watch_events_cv);
970 			mutex_exit(&watch_events_lock);
971 		} else {
972 			free_stored_msg(msg);
973 		}
974 		mutex_exit(&watches_lock);
975 	} else {
976 		msg->un.reply.body = body;
977 		mutex_enter(&xs_state.reply_lock);
978 		list_insert_tail(&xs_state.reply_list, msg);
979 		mutex_exit(&xs_state.reply_lock);
980 		cv_signal(&xs_state.reply_cv);
981 	}
982 
983 	return (0);
984 }
985 
986 static void
987 xenbus_thread(void)
988 {
989 	int err;
990 
991 	for (; interrupts_unleashed != 0; ) {
992 		err = process_msg();
993 		if (err)
994 			cmn_err(CE_WARN, "XENBUS error %d while reading "
995 			    "message", err);
996 	}
997 }
998 
999 /*
1000  * When setting up xenbus, dom0 and domU have to take different paths, which
1001  * makes this code a little confusing. For dom0:
1002  *
1003  * xs_early_init - mutex init only
1004  * xs_dom0_init - called on xenbus dev attach: set up our xenstore page and
1005  * event channel; start xenbus threads for responding to interrupts.
1006  *
1007  * And for domU:
1008  *
1009  * xs_early_init - mutex init; set up our xenstore page and event channel
1010  * xs_domu_init - installation of IRQ handler; start xenbus threads.
1011  *
1012  * We need an early init on domU so we can use xenbus in polled mode to
1013  * discover devices, VCPUs etc.
1014  *
1015  * On resume, we use xb_init() and xb_setup_intr() to restore xenbus to a
1016  * working state.
1017  */
1018 
1019 void
1020 xs_early_init(void)
1021 {
1022 	list_create(&xs_state.reply_list, sizeof (struct xs_stored_msg),
1023 	    offsetof(struct xs_stored_msg, list));
1024 	list_create(&watch_events, sizeof (struct xs_stored_msg),
1025 	    offsetof(struct xs_stored_msg, list));
1026 	list_create(&watches, sizeof (struct xenbus_watch),
1027 	    offsetof(struct xenbus_watch, list));
1028 	list_create(&notify_list, sizeof (struct xenbus_notify),
1029 	    offsetof(struct xenbus_notify, list));
1030 	mutex_init(&xs_state.reply_lock, NULL, MUTEX_DEFAULT, NULL);
1031 	mutex_init(&xs_state.request_mutex, NULL, MUTEX_DEFAULT, NULL);
1032 	mutex_init(&notify_list_lock, NULL, MUTEX_DEFAULT, NULL);
1033 	rw_init(&xs_state.suspend_lock, NULL, RW_DEFAULT, NULL);
1034 	cv_init(&xs_state.reply_cv, NULL, CV_DEFAULT, NULL);
1035 
1036 	if (DOMAIN_IS_INITDOMAIN(xen_info))
1037 		return;
1038 
1039 	xb_init();
1040 	xenstore_up = B_TRUE;
1041 }
1042 
1043 static void
1044 xs_thread_init(void)
1045 {
1046 	(void) thread_create(NULL, 0, xenwatch_thread, NULL, 0, &p0,
1047 	    TS_RUN, minclsyspri);
1048 	(void) thread_create(NULL, 0, xenbus_thread, NULL, 0, &p0,
1049 	    TS_RUN, minclsyspri);
1050 	xenbus_taskq = taskq_create("xenbus_taskq", 1,
1051 	    maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
1052 	ASSERT(xenbus_taskq != NULL);
1053 }
1054 
1055 void
1056 xs_domu_init(void)
1057 {
1058 	if (DOMAIN_IS_INITDOMAIN(xen_info))
1059 		return;
1060 
1061 	/*
1062 	 * Add interrupt handler for xenbus now, must wait till after
1063 	 * psm module is loaded.  All use of xenbus is in polled mode
1064 	 * until xs_init is called since it is what kicks off the xs
1065 	 * server threads.
1066 	 */
1067 	xs_thread_init();
1068 	xb_setup_intr();
1069 }
1070 
1071 
1072 void
1073 xs_dom0_init(void)
1074 {
1075 	static boolean_t initialized = B_FALSE;
1076 
1077 	ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1078 
1079 	/*
1080 	 * The xenbus driver might be re-attaching.
1081 	 */
1082 	if (initialized)
1083 		return;
1084 
1085 	xb_init();
1086 	xs_thread_init();
1087 	xb_setup_intr();
1088 
1089 	initialized = B_TRUE;
1090 }
1091