xref: /titanic_50/usr/src/uts/common/xen/io/xenbus_xs.c (revision a574db851cdc636fc3939b68e80d79fe7fbd57f2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  *
29  * xenbus_xs.c
30  *
31  * This is the kernel equivalent of the "xs" library.  We don't need everything
32  * and we use xenbus_comms for communication.
33  *
34  * Copyright (C) 2005 Rusty Russell, IBM Corporation
35  *
36  * This file may be distributed separately from the Linux kernel, or
37  * incorporated into other software packages, subject to the following license:
38  *
39  * Permission is hereby granted, free of charge, to any person obtaining a copy
40  * of this source file (the "Software"), to deal in the Software without
41  * restriction, including without limitation the rights to use, copy, modify,
42  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
43  * and to permit persons to whom the Software is furnished to do so, subject to
44  * the following conditions:
45  *
46  * The above copyright notice and this permission notice shall be included in
47  * all copies or substantial portions of the Software.
48  *
49  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
50  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
51  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
52  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
53  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
54  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
55  * IN THE SOFTWARE.
56  */
57 
58 /*
59  * NOTE: To future maintainers of the Solaris version of this file:
60  * I found the Linux version of this code to be very disgusting in
61  * overloading pointers and error codes into void * return values.
62  * The main difference you will find is that all such usage is changed
63  * to pass pointers to void* to be filled in with return values and
64  * the functions return error codes.
65  */
66 
67 #pragma ident	"%Z%%M%	%I%	%E% SMI"
68 
69 #include <sys/errno.h>
70 #include <sys/types.h>
71 #include <sys/sysmacros.h>
72 #include <sys/uio.h>
73 #include <sys/mutex.h>
74 #include <sys/condvar.h>
75 #include <sys/rwlock.h>
76 #include <sys/disp.h>
77 #include <sys/ddi.h>
78 #include <sys/sunddi.h>
79 #include <sys/avintr.h>
80 #include <sys/cmn_err.h>
81 #include <sys/mach_mmu.h>
82 #include <util/sscanf.h>
83 #define	_XSD_ERRORS_DEFINED
84 #ifdef XPV_HVM_DRIVER
85 #include <sys/xpv_support.h>
86 #endif
87 #include <sys/hypervisor.h>
88 #include <sys/taskq.h>
89 #include <sys/sdt.h>
90 #include <xen/sys/xenbus_impl.h>
91 #include <xen/sys/xenbus_comms.h>
92 #include <xen/sys/xendev.h>
93 #include <xen/public/io/xs_wire.h>
94 
95 #define	streq(a, b) (strcmp((a), (b)) == 0)
96 
97 #define	list_empty(list) (list_head(list) == NULL)
98 
99 struct xs_stored_msg {
100 	list_t list;
101 
102 	struct xsd_sockmsg hdr;
103 
104 	union {
105 		/* Queued replies. */
106 		struct {
107 			char *body;
108 		} reply;
109 
110 		/* Queued watch events. */
111 		struct {
112 			struct xenbus_watch *handle;
113 			char **vec;
114 			unsigned int vec_size;
115 		} watch;
116 	} un;
117 };
118 
119 static struct xs_handle {
120 	/* A list of replies. Currently only one will ever be outstanding. */
121 	list_t reply_list;
122 	kmutex_t reply_lock;
123 	kcondvar_t reply_cv;
124 
125 	/* One request at a time. */
126 	kmutex_t request_mutex;
127 
128 	/* Protect transactions against save/restore. */
129 	krwlock_t suspend_lock;
130 } xs_state;
131 
132 static int last_req_id;
133 
134 /*
135  * List of clients wanting a xenstore up notification, and a lock to protect it
136  */
137 static boolean_t xenstore_up;
138 static list_t notify_list;
139 static kmutex_t notify_list_lock;
140 static taskq_t *xenbus_taskq;
141 
142 /* List of registered watches, and a lock to protect it. */
143 static list_t watches;
144 static kmutex_t watches_lock;
145 
146 /* List of pending watch callback events, and a lock to protect it. */
147 static list_t watch_events;
148 static kmutex_t watch_events_lock;
149 
150 /*
151  * Details of the xenwatch callback kernel thread. The thread waits on the
152  * watch_events_cv for work to do (queued on watch_events list). When it
153  * wakes up it acquires the xenwatch_mutex before reading the list and
154  * carrying out work.
155  */
156 static kmutex_t xenwatch_mutex;
157 static kcondvar_t watch_events_cv;
158 
159 static int process_msg(void);
160 
161 static int
162 get_error(const char *errorstring)
163 {
164 	unsigned int i;
165 
166 	for (i = 0; !streq(errorstring, xsd_errors[i].errstring); i++) {
167 		if (i == (sizeof (xsd_errors) / sizeof (xsd_errors[0])) - 1) {
168 			cmn_err(CE_WARN,
169 			    "XENBUS xen store gave: unknown error %s",
170 			    errorstring);
171 			return (EINVAL);
172 		}
173 	}
174 	return (xsd_errors[i].errnum);
175 }
176 
177 /*
178  * Read a synchronous reply from xenstore.  Since we can return early before
179  * reading a relevant reply, we discard any messages not matching the request
180  * ID.  Caller must free returned message on success.
181  */
182 static int
183 read_reply(struct xsd_sockmsg *req_hdr, struct xs_stored_msg **reply)
184 {
185 	extern int do_polled_io;
186 
187 	mutex_enter(&xs_state.reply_lock);
188 
189 	for (;;) {
190 		while (list_empty(&xs_state.reply_list)) {
191 			if (interrupts_unleashed && !do_polled_io) {
192 				if (cv_wait_sig(&xs_state.reply_cv,
193 				    &xs_state.reply_lock) == 0) {
194 					mutex_exit(&xs_state.reply_lock);
195 					*reply = NULL;
196 					return (EINTR);
197 				}
198 			} else { /* polled mode needed for early probes */
199 				mutex_exit(&xs_state.reply_lock);
200 				(void) HYPERVISOR_yield();
201 				(void) process_msg();
202 				mutex_enter(&xs_state.reply_lock);
203 			}
204 		}
205 
206 		*reply = list_head(&xs_state.reply_list);
207 		list_remove(&xs_state.reply_list, *reply);
208 
209 		if ((*reply)->hdr.req_id == req_hdr->req_id)
210 			break;
211 	}
212 
213 	mutex_exit(&xs_state.reply_lock);
214 	return (0);
215 }
216 
217 /* Emergency write. */
218 void
219 xenbus_debug_write(const char *str, unsigned int count)
220 {
221 	struct xsd_sockmsg msg = { 0 };
222 
223 	msg.type = XS_DEBUG;
224 	msg.len = sizeof ("print") + count + 1;
225 
226 	mutex_enter(&xs_state.request_mutex);
227 	(void) xb_write(&msg, sizeof (msg));
228 	(void) xb_write("print", sizeof ("print"));
229 	(void) xb_write(str, count);
230 	(void) xb_write("", 1);
231 	mutex_exit(&xs_state.request_mutex);
232 }
233 
234 /*
235  * This is pretty unpleasant.  First off, there's the horrible logic around
236  * suspend_lock and transactions.  Also, we can be interrupted either before we
237  * write a message, or before we receive a reply.  A client that wants to
238  * survive this can't know which case happened.  Luckily all clients don't care
239  * about signals currently, and the alternative (a hard wait on a userspace
240  * daemon) isn't exactly preferable.  Caller must free 'reply' on success.
241  */
242 int
243 xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **reply)
244 {
245 	struct xsd_sockmsg req_msg = *msg;
246 	struct xs_stored_msg *reply_msg = NULL;
247 	int err;
248 
249 	if (req_msg.type == XS_TRANSACTION_START)
250 		rw_enter(&xs_state.suspend_lock, RW_READER);
251 
252 	mutex_enter(&xs_state.request_mutex);
253 
254 	msg->req_id = last_req_id++;
255 
256 	err = xb_write(msg, sizeof (*msg) + msg->len);
257 	if (err) {
258 		if (req_msg.type == XS_TRANSACTION_START)
259 			rw_exit(&xs_state.suspend_lock);
260 		msg->type = XS_ERROR;
261 		*reply = NULL;
262 		goto out;
263 	}
264 
265 	err = read_reply(msg, &reply_msg);
266 
267 	if (err) {
268 		if (msg->type == XS_TRANSACTION_START)
269 			rw_exit(&xs_state.suspend_lock);
270 		*reply = NULL;
271 		goto out;
272 	}
273 
274 	*reply = reply_msg->un.reply.body;
275 	*msg = reply_msg->hdr;
276 
277 	if (reply_msg->hdr.type == XS_TRANSACTION_END)
278 		rw_exit(&xs_state.suspend_lock);
279 
280 out:
281 	if (reply_msg != NULL)
282 		kmem_free(reply_msg, sizeof (*reply_msg));
283 
284 	mutex_exit(&xs_state.request_mutex);
285 	return (err);
286 }
287 
288 /*
289  * Send message to xs, return errcode, rval filled in with pointer
290  * to kmem_alloc'ed reply.
291  */
292 static int
293 xs_talkv(xenbus_transaction_t t,
294 		    enum xsd_sockmsg_type type,
295 		    const iovec_t *iovec,
296 		    unsigned int num_vecs,
297 		    void **rval,
298 		    unsigned int *len)
299 {
300 	struct xsd_sockmsg msg;
301 	struct xs_stored_msg *reply_msg;
302 	char *reply;
303 	unsigned int i;
304 	int err;
305 
306 	msg.tx_id = (uint32_t)(unsigned long)t;
307 	msg.type = type;
308 	msg.len = 0;
309 	for (i = 0; i < num_vecs; i++)
310 		msg.len += iovec[i].iov_len;
311 
312 	mutex_enter(&xs_state.request_mutex);
313 
314 	msg.req_id = last_req_id++;
315 
316 	err = xb_write(&msg, sizeof (msg));
317 	if (err) {
318 		mutex_exit(&xs_state.request_mutex);
319 		return (err);
320 	}
321 
322 	for (i = 0; i < num_vecs; i++) {
323 		err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
324 		if (err) {
325 			mutex_exit(&xs_state.request_mutex);
326 			return (err);
327 		}
328 	}
329 
330 	err = read_reply(&msg, &reply_msg);
331 
332 	mutex_exit(&xs_state.request_mutex);
333 
334 	if (err)
335 		return (err);
336 
337 	reply = reply_msg->un.reply.body;
338 
339 	if (reply_msg->hdr.type == XS_ERROR) {
340 		err = get_error(reply);
341 		kmem_free(reply, reply_msg->hdr.len + 1);
342 		goto out;
343 	}
344 
345 	if (len != NULL)
346 		*len = reply_msg->hdr.len + 1;
347 
348 	ASSERT(reply_msg->hdr.type == type);
349 
350 	if (rval != NULL)
351 		*rval = reply;
352 	else
353 		kmem_free(reply, reply_msg->hdr.len + 1);
354 
355 out:
356 	kmem_free(reply_msg, sizeof (*reply_msg));
357 	return (err);
358 }
359 
360 /* Simplified version of xs_talkv: single message. */
361 static int
362 xs_single(xenbus_transaction_t t,
363 			enum xsd_sockmsg_type type,
364 			const char *string, void **ret,
365 			unsigned int *len)
366 {
367 	iovec_t iovec;
368 
369 	iovec.iov_base = (char *)string;
370 	iovec.iov_len = strlen(string) + 1;
371 	return (xs_talkv(t, type, &iovec, 1, ret, len));
372 }
373 
374 static unsigned int
375 count_strings(const char *strings, unsigned int len)
376 {
377 	unsigned int num;
378 	const char *p;
379 
380 	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
381 		num++;
382 
383 	return (num);
384 }
385 
386 /* Return the path to dir with /name appended. Buffer must be kmem_free()'ed */
387 static char *
388 join(const char *dir, const char *name)
389 {
390 	char *buffer;
391 	size_t slashlen;
392 
393 	slashlen = streq(name, "") ? 0 : 1;
394 	buffer = kmem_alloc(strlen(dir) + slashlen + strlen(name) + 1,
395 	    KM_SLEEP);
396 
397 	(void) strcpy(buffer, dir);
398 	if (slashlen != 0) {
399 		(void) strcat(buffer, "/");
400 		(void) strcat(buffer, name);
401 	}
402 	return (buffer);
403 }
404 
405 static char **
406 split(char *strings, unsigned int len, unsigned int *num)
407 {
408 	char *p, **ret;
409 
410 	/* Count the strings. */
411 	if ((*num = count_strings(strings, len - 1)) == 0)
412 		return (NULL);
413 
414 	/* Transfer to one big alloc for easy freeing. */
415 	ret = kmem_alloc(*num * sizeof (char *) + (len - 1), KM_SLEEP);
416 	(void) memcpy(&ret[*num], strings, len - 1);
417 	kmem_free(strings, len);
418 
419 	strings = (char *)&ret[*num];
420 	for (p = strings, *num = 0; p < strings + (len - 1);
421 	    p += strlen(p) + 1) {
422 		ret[(*num)++] = p;
423 	}
424 
425 	return (ret);
426 }
427 
428 char **
429 xenbus_directory(xenbus_transaction_t t,
430 			const char *dir, const char *node, unsigned int *num)
431 {
432 	char *strings, *path;
433 	unsigned int len;
434 	int err;
435 
436 	path = join(dir, node);
437 	err = xs_single(t, XS_DIRECTORY, path, (void **)&strings, &len);
438 	kmem_free(path, strlen(path) + 1);
439 	if (err != 0 || strings == NULL) {
440 		/* sigh, we lose error code info here */
441 		*num = 0;
442 		return (NULL);
443 	}
444 
445 	return (split(strings, len, num));
446 }
447 
448 /* Check if a path exists. Return 1 if it does. */
449 int
450 xenbus_exists(xenbus_transaction_t t, const char *dir, const char *node)
451 {
452 	char **d;
453 	unsigned int dir_n;
454 	int i, len;
455 
456 	d = xenbus_directory(t, dir, node, &dir_n);
457 	if (d == NULL)
458 		return (0);
459 	for (i = 0, len = 0; i < dir_n; i++)
460 		len += strlen(d[i]) + 1 + sizeof (char *);
461 	kmem_free(d, len);
462 	return (1);
463 }
464 
465 /*
466  * Get the value of a single file.
467  * Returns a kmem_alloced value in retp: call kmem_free() on it after use.
468  * len indicates length in bytes.
469  */
470 int
471 xenbus_read(xenbus_transaction_t t,
472 	    const char *dir, const char *node, void **retp, unsigned int *len)
473 {
474 	char *path;
475 	int err;
476 
477 	path = join(dir, node);
478 	err = xs_single(t, XS_READ, path, retp, len);
479 	kmem_free(path, strlen(path) + 1);
480 	return (err);
481 }
482 
483 /*
484  * Write the value of a single file.
485  * Returns err on failure.
486  */
487 int
488 xenbus_write(xenbus_transaction_t t,
489 		const char *dir, const char *node, const char *string)
490 {
491 	char *path;
492 	iovec_t iovec[2];
493 	int ret;
494 
495 	path = join(dir, node);
496 
497 	iovec[0].iov_base = (void *)path;
498 	iovec[0].iov_len = strlen(path) + 1;
499 	iovec[1].iov_base = (void *)string;
500 	iovec[1].iov_len = strlen(string);
501 
502 	ret = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
503 	kmem_free(path, iovec[0].iov_len);
504 	return (ret);
505 }
506 
507 /* Create a new directory. */
508 int
509 xenbus_mkdir(xenbus_transaction_t t, const char *dir, const char *node)
510 {
511 	char *path;
512 	int ret;
513 
514 	path = join(dir, node);
515 	ret = xs_single(t, XS_MKDIR, path, NULL, NULL);
516 	kmem_free(path, strlen(path) + 1);
517 	return (ret);
518 }
519 
520 /* Destroy a file or directory (directories must be empty). */
521 int
522 xenbus_rm(xenbus_transaction_t t, const char *dir, const char *node)
523 {
524 	char *path;
525 	int ret;
526 
527 	path = join(dir, node);
528 	ret = xs_single(t, XS_RM, path, NULL, NULL);
529 	kmem_free(path, strlen(path) + 1);
530 	return (ret);
531 }
532 
533 /*
534  * Start a transaction: changes by others will not be seen during this
535  * transaction, and changes will not be visible to others until end.
536  */
537 int
538 xenbus_transaction_start(xenbus_transaction_t *t)
539 {
540 	void *id_str;
541 	unsigned long id;
542 	int err;
543 	unsigned int len;
544 
545 	rw_enter(&xs_state.suspend_lock, RW_READER);
546 
547 	err = xs_single(XBT_NULL, XS_TRANSACTION_START, "", &id_str, &len);
548 	if (err) {
549 		rw_exit(&xs_state.suspend_lock);
550 		return (err);
551 	}
552 
553 	(void) ddi_strtoul((char *)id_str, NULL, 0, &id);
554 	*t = (xenbus_transaction_t)id;
555 	kmem_free(id_str, len);
556 
557 	return (0);
558 }
559 
560 /*
561  * End a transaction.
562  * If abandon is true, transaction is discarded instead of committed.
563  */
564 int
565 xenbus_transaction_end(xenbus_transaction_t t, int abort)
566 {
567 	char abortstr[2];
568 	int err;
569 
570 	if (abort)
571 		(void) strcpy(abortstr, "F");
572 	else
573 		(void) strcpy(abortstr, "T");
574 
575 	err = xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL);
576 
577 	rw_exit(&xs_state.suspend_lock);
578 
579 	return (err);
580 }
581 
582 /*
583  * Single read and scanf: returns errno or 0.  This can only handle a single
584  * conversion specifier.
585  */
586 /* SCANFLIKE4 */
587 int
588 xenbus_scanf(xenbus_transaction_t t,
589 		const char *dir, const char *node, const char *fmt, ...)
590 {
591 	va_list ap;
592 	int ret;
593 	char *val;
594 	unsigned int len;
595 
596 	ret = xenbus_read(t, dir, node, (void **)&val, &len);
597 	if (ret)
598 		return (ret);
599 
600 	va_start(ap, fmt);
601 	if (vsscanf(val, fmt, ap) != 1)
602 		ret = ERANGE;
603 	va_end(ap);
604 	kmem_free(val, len);
605 	return (ret);
606 }
607 
608 /* Single printf and write: returns errno or 0. */
609 /* PRINTFLIKE4 */
610 int
611 xenbus_printf(xenbus_transaction_t t,
612 		const char *dir, const char *node, const char *fmt, ...)
613 {
614 	va_list ap;
615 	int ret;
616 #define	PRINTF_BUFFER_SIZE 4096
617 	char *printf_buffer;
618 
619 	printf_buffer = kmem_alloc(PRINTF_BUFFER_SIZE, KM_SLEEP);
620 
621 	va_start(ap, fmt);
622 	ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
623 	va_end(ap);
624 
625 	ASSERT(ret <= PRINTF_BUFFER_SIZE-1);
626 	ret = xenbus_write(t, dir, node, printf_buffer);
627 
628 	kmem_free(printf_buffer, PRINTF_BUFFER_SIZE);
629 
630 	return (ret);
631 }
632 
633 
634 /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
635 int
636 xenbus_gather(xenbus_transaction_t t, const char *dir, ...)
637 {
638 	va_list ap;
639 	const char *name;
640 	int ret = 0;
641 	unsigned int len;
642 
643 	va_start(ap, dir);
644 	while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
645 		const char *fmt = va_arg(ap, char *);
646 		void *result = va_arg(ap, void *);
647 		char *p;
648 
649 		ret = xenbus_read(t, dir, name, (void **)&p, &len);
650 		if (ret)
651 			break;
652 		if (fmt) {
653 			ASSERT(result != NULL);
654 			if (sscanf(p, fmt, result) != 1)
655 				ret = EINVAL;
656 			kmem_free(p, len);
657 		} else
658 			*(char **)result = p;
659 	}
660 	va_end(ap);
661 	return (ret);
662 }
663 
664 static int
665 xs_watch(const char *path, const char *token)
666 {
667 	iovec_t iov[2];
668 
669 	iov[0].iov_base = (void *)path;
670 	iov[0].iov_len = strlen(path) + 1;
671 	iov[1].iov_base = (void *)token;
672 	iov[1].iov_len = strlen(token) + 1;
673 
674 	return (xs_talkv(XBT_NULL, XS_WATCH, iov, 2, NULL, NULL));
675 }
676 
677 static int
678 xs_unwatch(const char *path, const char *token)
679 {
680 	iovec_t iov[2];
681 
682 	iov[0].iov_base = (char *)path;
683 	iov[0].iov_len = strlen(path) + 1;
684 	iov[1].iov_base = (char *)token;
685 	iov[1].iov_len = strlen(token) + 1;
686 
687 	return (xs_talkv(XBT_NULL, XS_UNWATCH, iov, 2, NULL, NULL));
688 }
689 
690 static struct xenbus_watch *
691 find_watch(const char *token)
692 {
693 	struct xenbus_watch *i, *cmp;
694 
695 	(void) ddi_strtoul(token, NULL, 16, (unsigned long *)&cmp);
696 
697 	for (i = list_head(&watches); i != NULL; i = list_next(&watches, i))
698 		if (i == cmp)
699 			break;
700 
701 	return (i);
702 }
703 
704 /* Register a xenstore state notify callback */
705 int
706 xs_register_xenbus_callback(void (*callback)(int))
707 {
708 	struct xenbus_notify *xbn, *xnp;
709 
710 	xbn = kmem_alloc(sizeof (struct xenbus_notify), KM_SLEEP);
711 	xbn->notify_func = callback;
712 	mutex_enter(&notify_list_lock);
713 	/*
714 	 * Make sure not already on the list
715 	 */
716 	xnp = list_head(&notify_list);
717 	for (; xnp != NULL; xnp = list_next(&notify_list, xnp)) {
718 		if (xnp->notify_func == callback) {
719 			kmem_free(xbn, sizeof (struct xenbus_notify));
720 			mutex_exit(&notify_list_lock);
721 			return (EEXIST);
722 		}
723 	}
724 	xnp = xbn;
725 	list_insert_tail(&notify_list, xbn);
726 done:
727 	if (xenstore_up)
728 		xnp->notify_func(XENSTORE_UP);
729 	mutex_exit(&notify_list_lock);
730 	return (0);
731 }
732 
733 /*
734  * Notify clients of xenstore state
735  */
736 static void
737 do_notify_callbacks(void *arg)
738 {
739 	struct xenbus_notify *xnp;
740 
741 	mutex_enter(&notify_list_lock);
742 	xnp = list_head(&notify_list);
743 	for (; xnp != NULL; xnp = list_next(&notify_list, xnp)) {
744 		xnp->notify_func((int)((uintptr_t)arg));
745 	}
746 	mutex_exit(&notify_list_lock);
747 }
748 
749 void
750 xs_notify_xenstore_up(void)
751 {
752 	xenstore_up = B_TRUE;
753 	(void) taskq_dispatch(xenbus_taskq, do_notify_callbacks,
754 	    (void *)XENSTORE_UP, 0);
755 }
756 
757 void
758 xs_notify_xenstore_down(void)
759 {
760 	xenstore_up = B_FALSE;
761 	(void) taskq_dispatch(xenbus_taskq, do_notify_callbacks,
762 	    (void *)XENSTORE_DOWN, 0);
763 }
764 
765 /* Register callback to watch this node. */
766 int
767 register_xenbus_watch(struct xenbus_watch *watch)
768 {
769 	/* Pointer in ascii is the token. */
770 	char token[sizeof (watch) * 2 + 1];
771 	int err;
772 
773 	ASSERT(xenstore_up);
774 	(void) snprintf(token, sizeof (token), "%lX", (long)watch);
775 
776 	rw_enter(&xs_state.suspend_lock, RW_READER);
777 
778 	mutex_enter(&watches_lock);
779 	/*
780 	 * May be re-registering a watch if xenstore daemon was restarted
781 	 */
782 	if (find_watch(token) == NULL)
783 		list_insert_tail(&watches, watch);
784 	mutex_exit(&watches_lock);
785 
786 	DTRACE_XPV3(xenbus__register__watch, const char *, watch->node,
787 	    uintptr_t, watch->callback, struct xenbus_watch *, watch);
788 
789 	err = xs_watch(watch->node, token);
790 
791 	/* Ignore errors due to multiple registration. */
792 	if ((err != 0) && (err != EEXIST)) {
793 		mutex_enter(&watches_lock);
794 		list_remove(&watches, watch);
795 		mutex_exit(&watches_lock);
796 	}
797 
798 	rw_exit(&xs_state.suspend_lock);
799 
800 	return (err);
801 }
802 
803 static void
804 free_stored_msg(struct xs_stored_msg *msg)
805 {
806 	int i, len = 0;
807 
808 	for (i = 0; i < msg->un.watch.vec_size; i++)
809 		len += strlen(msg->un.watch.vec[i]) + 1 + sizeof (char *);
810 	kmem_free(msg->un.watch.vec, len);
811 	kmem_free(msg, sizeof (*msg));
812 }
813 
814 void
815 unregister_xenbus_watch(struct xenbus_watch *watch)
816 {
817 	struct xs_stored_msg *msg;
818 	char token[sizeof (watch) * 2 + 1];
819 	int err;
820 
821 	(void) snprintf(token, sizeof (token), "%lX", (long)watch);
822 
823 	rw_enter(&xs_state.suspend_lock, RW_READER);
824 
825 	mutex_enter(&watches_lock);
826 	ASSERT(find_watch(token));
827 	list_remove(&watches, watch);
828 	mutex_exit(&watches_lock);
829 
830 	DTRACE_XPV3(xenbus__unregister__watch, const char *, watch->node,
831 	    uintptr_t, watch->callback, struct xenbus_watch *, watch);
832 
833 	err = xs_unwatch(watch->node, token);
834 	if (err)
835 		cmn_err(CE_WARN, "XENBUS Failed to release watch %s: %d",
836 		    watch->node, err);
837 
838 	rw_exit(&xs_state.suspend_lock);
839 
840 	/* Cancel pending watch events. */
841 	mutex_enter(&watch_events_lock);
842 	msg = list_head(&watch_events);
843 
844 	while (msg != NULL) {
845 		struct xs_stored_msg *tmp = list_next(&watch_events, msg);
846 		if (msg->un.watch.handle == watch) {
847 			list_remove(&watch_events, msg);
848 			free_stored_msg(msg);
849 		}
850 		msg = tmp;
851 	}
852 
853 	mutex_exit(&watch_events_lock);
854 
855 	/* Flush any currently-executing callback, unless we are it. :-) */
856 	if (mutex_owner(&xenwatch_mutex) != curthread) {
857 		mutex_enter(&xenwatch_mutex);
858 		mutex_exit(&xenwatch_mutex);
859 	}
860 }
861 
862 void
863 xenbus_suspend(void)
864 {
865 	rw_enter(&xs_state.suspend_lock, RW_WRITER);
866 	mutex_enter(&xs_state.request_mutex);
867 
868 	xb_suspend();
869 }
870 
871 void
872 xenbus_resume(void)
873 {
874 	struct xenbus_watch *watch;
875 	char token[sizeof (watch) * 2 + 1];
876 
877 	mutex_exit(&xs_state.request_mutex);
878 
879 	xb_init();
880 	xb_setup_intr();
881 
882 	/* No need for watches_lock: the suspend_lock is sufficient. */
883 	for (watch = list_head(&watches); watch != NULL;
884 	    watch = list_next(&watches, watch)) {
885 		(void) snprintf(token, sizeof (token), "%lX", (long)watch);
886 		(void) xs_watch(watch->node, token);
887 	}
888 
889 	rw_exit(&xs_state.suspend_lock);
890 }
891 
892 static void
893 xenwatch_thread(void)
894 {
895 	struct xs_stored_msg *msg;
896 	struct xenbus_watch *watch;
897 
898 	for (;;) {
899 		mutex_enter(&watch_events_lock);
900 		while (list_empty(&watch_events))
901 			cv_wait(&watch_events_cv, &watch_events_lock);
902 		msg = list_head(&watch_events);
903 		ASSERT(msg != NULL);
904 		list_remove(&watch_events, msg);
905 		watch = msg->un.watch.handle;
906 		mutex_exit(&watch_events_lock);
907 
908 		mutex_enter(&xenwatch_mutex);
909 
910 		DTRACE_XPV4(xenbus__fire__watch,
911 		    const char *, watch->node,
912 		    uintptr_t, watch->callback,
913 		    struct xenbus_watch *, watch,
914 		    const char *, msg->un.watch.vec[XS_WATCH_PATH]);
915 
916 		watch->callback(watch, (const char **)msg->un.watch.vec,
917 		    msg->un.watch.vec_size);
918 
919 		free_stored_msg(msg);
920 		mutex_exit(&xenwatch_mutex);
921 	}
922 }
923 
924 static int
925 process_msg(void)
926 {
927 	struct xs_stored_msg *msg;
928 	char *body;
929 	int err, mlen;
930 
931 	msg = kmem_alloc(sizeof (*msg), KM_SLEEP);
932 
933 	err = xb_read(&msg->hdr, sizeof (msg->hdr));
934 	if (err) {
935 		kmem_free(msg, sizeof (*msg));
936 		return (err);
937 	}
938 
939 	mlen = msg->hdr.len + 1;
940 	body = kmem_alloc(mlen, KM_SLEEP);
941 
942 	err = xb_read(body, msg->hdr.len);
943 	if (err) {
944 		kmem_free(body, mlen);
945 		kmem_free(msg, sizeof (*msg));
946 		return (err);
947 	}
948 
949 	body[mlen - 1] = '\0';
950 
951 	if (msg->hdr.type == XS_WATCH_EVENT) {
952 		const char *token;
953 		msg->un.watch.vec = split(body, msg->hdr.len + 1,
954 		    &msg->un.watch.vec_size);
955 		if (msg->un.watch.vec == NULL) {
956 			kmem_free(msg, sizeof (*msg));
957 			return (EIO);
958 		}
959 
960 		mutex_enter(&watches_lock);
961 		token = msg->un.watch.vec[XS_WATCH_TOKEN];
962 		if ((msg->un.watch.handle = find_watch(token)) != NULL) {
963 			mutex_enter(&watch_events_lock);
964 
965 			DTRACE_XPV4(xenbus__enqueue__watch,
966 			    const char *, msg->un.watch.handle->node,
967 			    uintptr_t, msg->un.watch.handle->callback,
968 			    struct xenbus_watch *, msg->un.watch.handle,
969 			    const char *, msg->un.watch.vec[XS_WATCH_PATH]);
970 
971 			list_insert_tail(&watch_events, msg);
972 			cv_broadcast(&watch_events_cv);
973 			mutex_exit(&watch_events_lock);
974 		} else {
975 			free_stored_msg(msg);
976 		}
977 		mutex_exit(&watches_lock);
978 	} else {
979 		msg->un.reply.body = body;
980 		mutex_enter(&xs_state.reply_lock);
981 		list_insert_tail(&xs_state.reply_list, msg);
982 		mutex_exit(&xs_state.reply_lock);
983 		cv_signal(&xs_state.reply_cv);
984 	}
985 
986 	return (0);
987 }
988 
989 static void
990 xenbus_thread(void)
991 {
992 	int err;
993 
994 	for (; interrupts_unleashed != 0; ) {
995 		err = process_msg();
996 		if (err)
997 			cmn_err(CE_WARN, "XENBUS error %d while reading "
998 			    "message", err);
999 	}
1000 }
1001 
1002 /*
1003  * When setting up xenbus, dom0 and domU have to take different paths, which
1004  * makes this code a little confusing. For dom0:
1005  *
1006  * xs_early_init - mutex init only
1007  * xs_dom0_init - called on xenbus dev attach: set up our xenstore page and
1008  * event channel; start xenbus threads for responding to interrupts.
1009  *
1010  * And for domU:
1011  *
1012  * xs_early_init - mutex init; set up our xenstore page and event channel
1013  * xs_domu_init - installation of IRQ handler; start xenbus threads.
1014  *
1015  * We need an early init on domU so we can use xenbus in polled mode to
1016  * discover devices, VCPUs etc.
1017  *
1018  * On resume, we use xb_init() and xb_setup_intr() to restore xenbus to a
1019  * working state.
1020  */
1021 
1022 void
1023 xs_early_init(void)
1024 {
1025 	list_create(&xs_state.reply_list, sizeof (struct xs_stored_msg),
1026 	    offsetof(struct xs_stored_msg, list));
1027 	list_create(&watch_events, sizeof (struct xs_stored_msg),
1028 	    offsetof(struct xs_stored_msg, list));
1029 	list_create(&watches, sizeof (struct xenbus_watch),
1030 	    offsetof(struct xenbus_watch, list));
1031 	list_create(&notify_list, sizeof (struct xenbus_notify),
1032 	    offsetof(struct xenbus_notify, list));
1033 	mutex_init(&xs_state.reply_lock, NULL, MUTEX_DEFAULT, NULL);
1034 	mutex_init(&xs_state.request_mutex, NULL, MUTEX_DEFAULT, NULL);
1035 	mutex_init(&notify_list_lock, NULL, MUTEX_DEFAULT, NULL);
1036 	rw_init(&xs_state.suspend_lock, NULL, RW_DEFAULT, NULL);
1037 	cv_init(&xs_state.reply_cv, NULL, CV_DEFAULT, NULL);
1038 
1039 	if (DOMAIN_IS_INITDOMAIN(xen_info))
1040 		return;
1041 
1042 	xb_init();
1043 	xenstore_up = B_TRUE;
1044 }
1045 
1046 static void
1047 xs_thread_init(void)
1048 {
1049 	(void) thread_create(NULL, 0, xenwatch_thread, NULL, 0, &p0,
1050 	    TS_RUN, minclsyspri);
1051 	(void) thread_create(NULL, 0, xenbus_thread, NULL, 0, &p0,
1052 	    TS_RUN, minclsyspri);
1053 	xenbus_taskq = taskq_create("xenbus_taskq", 1,
1054 	    maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
1055 	ASSERT(xenbus_taskq != NULL);
1056 }
1057 
1058 void
1059 xs_domu_init(void)
1060 {
1061 	if (DOMAIN_IS_INITDOMAIN(xen_info))
1062 		return;
1063 
1064 	/*
1065 	 * Add interrupt handler for xenbus now, must wait till after
1066 	 * psm module is loaded.  All use of xenbus is in polled mode
1067 	 * until xs_init is called since it is what kicks off the xs
1068 	 * server threads.
1069 	 */
1070 	xs_thread_init();
1071 	xb_setup_intr();
1072 }
1073 
1074 
1075 void
1076 xs_dom0_init(void)
1077 {
1078 	static boolean_t initialized = B_FALSE;
1079 
1080 	ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1081 
1082 	/*
1083 	 * The xenbus driver might be re-attaching.
1084 	 */
1085 	if (initialized)
1086 		return;
1087 
1088 	xb_init();
1089 	xs_thread_init();
1090 	xb_setup_intr();
1091 
1092 	initialized = B_TRUE;
1093 }
1094