xref: /titanic_44/usr/src/uts/common/xen/io/xenbus_xs.c (revision d0f8ff6ee41946134faff06b3a9f643e21aefa78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  *
29  * xenbus_xs.c
30  *
31  * This is the kernel equivalent of the "xs" library.  We don't need everything
32  * and we use xenbus_comms for communication.
33  *
34  * Copyright (C) 2005 Rusty Russell, IBM Corporation
35  *
36  * This file may be distributed separately from the Linux kernel, or
37  * incorporated into other software packages, subject to the following license:
38  *
39  * Permission is hereby granted, free of charge, to any person obtaining a copy
40  * of this source file (the "Software"), to deal in the Software without
41  * restriction, including without limitation the rights to use, copy, modify,
42  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
43  * and to permit persons to whom the Software is furnished to do so, subject to
44  * the following conditions:
45  *
46  * The above copyright notice and this permission notice shall be included in
47  * all copies or substantial portions of the Software.
48  *
49  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
50  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
51  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
52  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
53  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
54  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
55  * IN THE SOFTWARE.
56  */
57 
58 /*
59  * NOTE: To future maintainers of the Solaris version of this file:
60  * I found the Linux version of this code to be very disgusting in
61  * overloading pointers and error codes into void * return values.
62  * The main difference you will find is that all such usage is changed
63  * to pass pointers to void* to be filled in with return values and
64  * the functions return error codes.
65  */
66 
67 #pragma ident	"%Z%%M%	%I%	%E% SMI"
68 
69 #include <sys/errno.h>
70 #include <sys/types.h>
71 #include <sys/sysmacros.h>
72 #include <sys/uio.h>
73 #include <sys/mutex.h>
74 #include <sys/condvar.h>
75 #include <sys/rwlock.h>
76 #include <sys/disp.h>
77 #include <sys/ddi.h>
78 #include <sys/sunddi.h>
79 #include <sys/avintr.h>
80 #include <sys/cmn_err.h>
81 #include <util/sscanf.h>
82 #define	_XSD_ERRORS_DEFINED
83 #include <sys/hypervisor.h>
84 #include <sys/mach_mmu.h>
85 #include <xen/sys/xenbus_impl.h>
86 #include <xen/sys/xenbus_comms.h>
87 #include <xen/sys/xendev.h>
88 #include <sys/taskq.h>
89 #include <xen/public/io/xs_wire.h>
90 
91 #define	streq(a, b) (strcmp((a), (b)) == 0)
92 
93 #define	list_empty(list) (list_head(list) == NULL)
94 
95 struct xs_stored_msg {
96 	list_t list;
97 
98 	struct xsd_sockmsg hdr;
99 
100 	union {
101 		/* Queued replies. */
102 		struct {
103 			char *body;
104 		} reply;
105 
106 		/* Queued watch events. */
107 		struct {
108 			struct xenbus_watch *handle;
109 			char **vec;
110 			unsigned int vec_size;
111 		} watch;
112 	} un;
113 };
114 
115 static struct xs_handle {
116 	/* A list of replies. Currently only one will ever be outstanding. */
117 	list_t reply_list;
118 	kmutex_t reply_lock;
119 	kcondvar_t reply_cv;
120 
121 	/* One request at a time. */
122 	kmutex_t request_mutex;
123 
124 	/* Protect transactions against save/restore. */
125 	krwlock_t suspend_lock;
126 } xs_state;
127 
128 static int last_req_id;
129 
130 /*
131  * List of clients wanting a xenstore up notification, and a lock to protect it
132  */
133 static boolean_t xenstore_up;
134 static list_t notify_list;
135 static kmutex_t notify_list_lock;
136 static taskq_t *xenbus_taskq;
137 
138 /* List of registered watches, and a lock to protect it. */
139 static list_t watches;
140 static kmutex_t watches_lock;
141 
142 /* List of pending watch callback events, and a lock to protect it. */
143 static list_t watch_events;
144 static kmutex_t watch_events_lock;
145 
146 /*
147  * Details of the xenwatch callback kernel thread. The thread waits on the
148  * watch_events_cv for work to do (queued on watch_events list). When it
149  * wakes up it acquires the xenwatch_mutex before reading the list and
150  * carrying out work.
151  */
152 static kmutex_t xenwatch_mutex;
153 static kcondvar_t watch_events_cv;
154 
155 static int process_msg(void);
156 
157 static int
158 get_error(const char *errorstring)
159 {
160 	unsigned int i;
161 
162 	for (i = 0; !streq(errorstring, xsd_errors[i].errstring); i++) {
163 		if (i == (sizeof (xsd_errors) / sizeof (xsd_errors[0])) - 1) {
164 			cmn_err(CE_WARN,
165 			    "XENBUS xen store gave: unknown error %s",
166 			    errorstring);
167 			return (EINVAL);
168 		}
169 	}
170 	return (xsd_errors[i].errnum);
171 }
172 
173 /*
174  * Read a synchronous reply from xenstore.  Since we can return early before
175  * reading a relevant reply, we discard any messages not matching the request
176  * ID.  Caller must free returned message on success.
177  */
178 static int
179 read_reply(struct xsd_sockmsg *req_hdr, struct xs_stored_msg **reply)
180 {
181 	extern int do_polled_io;
182 
183 	mutex_enter(&xs_state.reply_lock);
184 
185 	for (;;) {
186 		while (list_empty(&xs_state.reply_list)) {
187 			if (interrupts_unleashed && !do_polled_io) {
188 				if (cv_wait_sig(&xs_state.reply_cv,
189 				    &xs_state.reply_lock) == 0) {
190 					mutex_exit(&xs_state.reply_lock);
191 					*reply = NULL;
192 					return (EINTR);
193 				}
194 			} else { /* polled mode needed for early probes */
195 				mutex_exit(&xs_state.reply_lock);
196 				(void) HYPERVISOR_yield();
197 				(void) process_msg();
198 				mutex_enter(&xs_state.reply_lock);
199 			}
200 		}
201 
202 		*reply = list_head(&xs_state.reply_list);
203 		list_remove(&xs_state.reply_list, *reply);
204 
205 		if ((*reply)->hdr.req_id == req_hdr->req_id)
206 			break;
207 	}
208 
209 	mutex_exit(&xs_state.reply_lock);
210 	return (0);
211 }
212 
213 /* Emergency write. */
214 void
215 xenbus_debug_write(const char *str, unsigned int count)
216 {
217 	struct xsd_sockmsg msg = { 0 };
218 
219 	msg.type = XS_DEBUG;
220 	msg.len = sizeof ("print") + count + 1;
221 
222 	mutex_enter(&xs_state.request_mutex);
223 	(void) xb_write(&msg, sizeof (msg));
224 	(void) xb_write("print", sizeof ("print"));
225 	(void) xb_write(str, count);
226 	(void) xb_write("", 1);
227 	mutex_exit(&xs_state.request_mutex);
228 }
229 
230 /*
231  * This is pretty unpleasant.  First off, there's the horrible logic around
232  * suspend_lock and transactions.  Also, we can be interrupted either before we
233  * write a message, or before we receive a reply.  A client that wants to
234  * survive this can't know which case happened.  Luckily all clients don't care
235  * about signals currently, and the alternative (a hard wait on a userspace
236  * daemon) isn't exactly preferable.  Caller must free 'reply' on success.
237  */
238 int
239 xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **reply)
240 {
241 	struct xsd_sockmsg req_msg = *msg;
242 	struct xs_stored_msg *reply_msg = NULL;
243 	int err;
244 
245 	if (req_msg.type == XS_TRANSACTION_START)
246 		rw_enter(&xs_state.suspend_lock, RW_READER);
247 
248 	mutex_enter(&xs_state.request_mutex);
249 
250 	msg->req_id = last_req_id++;
251 
252 	err = xb_write(msg, sizeof (*msg) + msg->len);
253 	if (err) {
254 		if (req_msg.type == XS_TRANSACTION_START)
255 			rw_exit(&xs_state.suspend_lock);
256 		msg->type = XS_ERROR;
257 		*reply = NULL;
258 		goto out;
259 	}
260 
261 	err = read_reply(msg, &reply_msg);
262 
263 	if (err) {
264 		if (msg->type == XS_TRANSACTION_START)
265 			rw_exit(&xs_state.suspend_lock);
266 		*reply = NULL;
267 		goto out;
268 	}
269 
270 	*reply = reply_msg->un.reply.body;
271 	*msg = reply_msg->hdr;
272 
273 	if (reply_msg->hdr.type == XS_TRANSACTION_END)
274 		rw_exit(&xs_state.suspend_lock);
275 
276 out:
277 	if (reply_msg != NULL)
278 		kmem_free(reply_msg, sizeof (*reply_msg));
279 
280 	mutex_exit(&xs_state.request_mutex);
281 	return (err);
282 }
283 
284 /*
285  * Send message to xs, return errcode, rval filled in with pointer
286  * to kmem_alloc'ed reply.
287  */
288 static int
289 xs_talkv(xenbus_transaction_t t,
290 		    enum xsd_sockmsg_type type,
291 		    const iovec_t *iovec,
292 		    unsigned int num_vecs,
293 		    void **rval,
294 		    unsigned int *len)
295 {
296 	struct xsd_sockmsg msg;
297 	struct xs_stored_msg *reply_msg;
298 	char *reply;
299 	unsigned int i;
300 	int err;
301 
302 	msg.tx_id = (uint32_t)(unsigned long)t;
303 	msg.type = type;
304 	msg.len = 0;
305 	for (i = 0; i < num_vecs; i++)
306 		msg.len += iovec[i].iov_len;
307 
308 	mutex_enter(&xs_state.request_mutex);
309 
310 	msg.req_id = last_req_id++;
311 
312 	err = xb_write(&msg, sizeof (msg));
313 	if (err) {
314 		mutex_exit(&xs_state.request_mutex);
315 		return (err);
316 	}
317 
318 	for (i = 0; i < num_vecs; i++) {
319 		err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
320 		if (err) {
321 			mutex_exit(&xs_state.request_mutex);
322 			return (err);
323 		}
324 	}
325 
326 	err = read_reply(&msg, &reply_msg);
327 
328 	mutex_exit(&xs_state.request_mutex);
329 
330 	if (err)
331 		return (err);
332 
333 	reply = reply_msg->un.reply.body;
334 
335 	if (reply_msg->hdr.type == XS_ERROR) {
336 		err = get_error(reply);
337 		kmem_free(reply, reply_msg->hdr.len + 1);
338 		goto out;
339 	}
340 
341 	if (len != NULL)
342 		*len = reply_msg->hdr.len + 1;
343 
344 	ASSERT(reply_msg->hdr.type == type);
345 
346 	if (rval != NULL)
347 		*rval = reply;
348 	else
349 		kmem_free(reply, reply_msg->hdr.len + 1);
350 
351 out:
352 	kmem_free(reply_msg, sizeof (*reply_msg));
353 	return (err);
354 }
355 
356 /* Simplified version of xs_talkv: single message. */
357 static int
358 xs_single(xenbus_transaction_t t,
359 			enum xsd_sockmsg_type type,
360 			const char *string, void **ret,
361 			unsigned int *len)
362 {
363 	iovec_t iovec;
364 
365 	iovec.iov_base = (char *)string;
366 	iovec.iov_len = strlen(string) + 1;
367 	return (xs_talkv(t, type, &iovec, 1, ret, len));
368 }
369 
370 static unsigned int
371 count_strings(const char *strings, unsigned int len)
372 {
373 	unsigned int num;
374 	const char *p;
375 
376 	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
377 		num++;
378 
379 	return (num);
380 }
381 
382 /* Return the path to dir with /name appended. Buffer must be kmem_free()'ed */
383 static char *
384 join(const char *dir, const char *name)
385 {
386 	char *buffer;
387 	size_t slashlen;
388 
389 	slashlen = streq(name, "") ? 0 : 1;
390 	buffer = kmem_alloc(strlen(dir) + slashlen + strlen(name) + 1,
391 	    KM_SLEEP);
392 
393 	(void) strcpy(buffer, dir);
394 	if (slashlen != 0) {
395 		(void) strcat(buffer, "/");
396 		(void) strcat(buffer, name);
397 	}
398 	return (buffer);
399 }
400 
401 static char **
402 split(char *strings, unsigned int len, unsigned int *num)
403 {
404 	char *p, **ret;
405 
406 	/* Count the strings. */
407 	if ((*num = count_strings(strings, len - 1)) == 0)
408 		return (NULL);
409 
410 	/* Transfer to one big alloc for easy freeing. */
411 	ret = kmem_alloc(*num * sizeof (char *) + (len - 1), KM_SLEEP);
412 	(void) memcpy(&ret[*num], strings, len - 1);
413 	kmem_free(strings, len);
414 
415 	strings = (char *)&ret[*num];
416 	for (p = strings, *num = 0; p < strings + (len - 1);
417 	    p += strlen(p) + 1) {
418 		ret[(*num)++] = p;
419 	}
420 
421 	return (ret);
422 }
423 
424 char **
425 xenbus_directory(xenbus_transaction_t t,
426 			const char *dir, const char *node, unsigned int *num)
427 {
428 	char *strings, *path;
429 	unsigned int len;
430 	int err;
431 
432 	path = join(dir, node);
433 	err = xs_single(t, XS_DIRECTORY, path, (void **)&strings, &len);
434 	kmem_free(path, strlen(path) + 1);
435 	if (err != 0 || strings == NULL) {
436 		/* sigh, we lose error code info here */
437 		*num = 0;
438 		return (NULL);
439 	}
440 
441 	return (split(strings, len, num));
442 }
443 
444 /* Check if a path exists. Return 1 if it does. */
445 int
446 xenbus_exists(xenbus_transaction_t t, const char *dir, const char *node)
447 {
448 	char **d;
449 	unsigned int dir_n;
450 	int i, len;
451 
452 	d = xenbus_directory(t, dir, node, &dir_n);
453 	if (d == NULL)
454 		return (0);
455 	for (i = 0, len = 0; i < dir_n; i++)
456 		len += strlen(d[i]) + 1 + sizeof (char *);
457 	kmem_free(d, len);
458 	return (1);
459 }
460 
461 /*
462  * Get the value of a single file.
463  * Returns a kmem_alloced value in retp: call kmem_free() on it after use.
464  * len indicates length in bytes.
465  */
466 int
467 xenbus_read(xenbus_transaction_t t,
468 	    const char *dir, const char *node, void **retp, unsigned int *len)
469 {
470 	char *path;
471 	int err;
472 
473 	path = join(dir, node);
474 	err = xs_single(t, XS_READ, path, retp, len);
475 	kmem_free(path, strlen(path) + 1);
476 	return (err);
477 }
478 
479 /*
480  * Write the value of a single file.
481  * Returns err on failure.
482  */
483 int
484 xenbus_write(xenbus_transaction_t t,
485 		const char *dir, const char *node, const char *string)
486 {
487 	char *path;
488 	iovec_t iovec[2];
489 	int ret;
490 
491 	path = join(dir, node);
492 
493 	iovec[0].iov_base = (void *)path;
494 	iovec[0].iov_len = strlen(path) + 1;
495 	iovec[1].iov_base = (void *)string;
496 	iovec[1].iov_len = strlen(string);
497 
498 	ret = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
499 	kmem_free(path, iovec[0].iov_len);
500 	return (ret);
501 }
502 
503 /* Create a new directory. */
504 int
505 xenbus_mkdir(xenbus_transaction_t t, const char *dir, const char *node)
506 {
507 	char *path;
508 	int ret;
509 
510 	path = join(dir, node);
511 	ret = xs_single(t, XS_MKDIR, path, NULL, NULL);
512 	kmem_free(path, strlen(path) + 1);
513 	return (ret);
514 }
515 
516 /* Destroy a file or directory (directories must be empty). */
517 int
518 xenbus_rm(xenbus_transaction_t t, const char *dir, const char *node)
519 {
520 	char *path;
521 	int ret;
522 
523 	path = join(dir, node);
524 	ret = xs_single(t, XS_RM, path, NULL, NULL);
525 	kmem_free(path, strlen(path) + 1);
526 	return (ret);
527 }
528 
529 /*
530  * Start a transaction: changes by others will not be seen during this
531  * transaction, and changes will not be visible to others until end.
532  */
533 int
534 xenbus_transaction_start(xenbus_transaction_t *t)
535 {
536 	void *id_str;
537 	unsigned long id;
538 	int err;
539 	unsigned int len;
540 
541 	rw_enter(&xs_state.suspend_lock, RW_READER);
542 
543 	err = xs_single(XBT_NULL, XS_TRANSACTION_START, "", &id_str, &len);
544 	if (err) {
545 		rw_exit(&xs_state.suspend_lock);
546 		return (err);
547 	}
548 
549 	(void) ddi_strtoul((char *)id_str, NULL, 0, &id);
550 	*t = (xenbus_transaction_t)id;
551 	kmem_free(id_str, len);
552 
553 	return (0);
554 }
555 
556 /*
557  * End a transaction.
558  * If abandon is true, transaction is discarded instead of committed.
559  */
560 int
561 xenbus_transaction_end(xenbus_transaction_t t, int abort)
562 {
563 	char abortstr[2];
564 	int err;
565 
566 	if (abort)
567 		(void) strcpy(abortstr, "F");
568 	else
569 		(void) strcpy(abortstr, "T");
570 
571 	err = xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL);
572 
573 	rw_exit(&xs_state.suspend_lock);
574 
575 	return (err);
576 }
577 
578 /*
579  * Single read and scanf: returns errno or 0.  This can only handle a single
580  * conversion specifier.
581  */
582 /* SCANFLIKE4 */
583 int
584 xenbus_scanf(xenbus_transaction_t t,
585 		const char *dir, const char *node, const char *fmt, ...)
586 {
587 	va_list ap;
588 	int ret;
589 	char *val;
590 	unsigned int len;
591 
592 	ret = xenbus_read(t, dir, node, (void **)&val, &len);
593 	if (ret)
594 		return (ret);
595 
596 	va_start(ap, fmt);
597 	if (vsscanf(val, fmt, ap) != 1)
598 		ret = ERANGE;
599 	va_end(ap);
600 	kmem_free(val, len);
601 	return (ret);
602 }
603 
604 /* Single printf and write: returns errno or 0. */
605 /* PRINTFLIKE4 */
606 int
607 xenbus_printf(xenbus_transaction_t t,
608 		const char *dir, const char *node, const char *fmt, ...)
609 {
610 	va_list ap;
611 	int ret;
612 #define	PRINTF_BUFFER_SIZE 4096
613 	char *printf_buffer;
614 
615 	printf_buffer = kmem_alloc(PRINTF_BUFFER_SIZE, KM_SLEEP);
616 
617 	va_start(ap, fmt);
618 	ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
619 	va_end(ap);
620 
621 	ASSERT(ret <= PRINTF_BUFFER_SIZE-1);
622 	ret = xenbus_write(t, dir, node, printf_buffer);
623 
624 	kmem_free(printf_buffer, PRINTF_BUFFER_SIZE);
625 
626 	return (ret);
627 }
628 
629 
630 /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
631 int
632 xenbus_gather(xenbus_transaction_t t, const char *dir, ...)
633 {
634 	va_list ap;
635 	const char *name;
636 	int ret = 0;
637 	unsigned int len;
638 
639 	va_start(ap, dir);
640 	while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
641 		const char *fmt = va_arg(ap, char *);
642 		void *result = va_arg(ap, void *);
643 		char *p;
644 
645 		ret = xenbus_read(t, dir, name, (void **)&p, &len);
646 		if (ret)
647 			break;
648 		if (fmt) {
649 			ASSERT(result != NULL);
650 			if (sscanf(p, fmt, result) != 1)
651 				ret = EINVAL;
652 			kmem_free(p, len);
653 		} else
654 			*(char **)result = p;
655 	}
656 	va_end(ap);
657 	return (ret);
658 }
659 
660 static int
661 xs_watch(const char *path, const char *token)
662 {
663 	iovec_t iov[2];
664 
665 	iov[0].iov_base = (void *)path;
666 	iov[0].iov_len = strlen(path) + 1;
667 	iov[1].iov_base = (void *)token;
668 	iov[1].iov_len = strlen(token) + 1;
669 
670 	return (xs_talkv(XBT_NULL, XS_WATCH, iov, 2, NULL, NULL));
671 }
672 
673 static int
674 xs_unwatch(const char *path, const char *token)
675 {
676 	iovec_t iov[2];
677 
678 	iov[0].iov_base = (char *)path;
679 	iov[0].iov_len = strlen(path) + 1;
680 	iov[1].iov_base = (char *)token;
681 	iov[1].iov_len = strlen(token) + 1;
682 
683 	return (xs_talkv(XBT_NULL, XS_UNWATCH, iov, 2, NULL, NULL));
684 }
685 
686 static struct xenbus_watch *
687 find_watch(const char *token)
688 {
689 	struct xenbus_watch *i, *cmp;
690 
691 	(void) ddi_strtoul(token, NULL, 16, (unsigned long *)&cmp);
692 
693 	for (i = list_head(&watches); i != NULL; i = list_next(&watches, i))
694 		if (i == cmp)
695 			break;
696 
697 	return (i);
698 }
699 
700 /* Register a xenstore state notify callback */
701 int
702 xs_register_xenbus_callback(void (*callback)(int))
703 {
704 	struct xenbus_notify *xbn, *xnp;
705 
706 	xbn = kmem_alloc(sizeof (struct xenbus_notify), KM_SLEEP);
707 	xbn->notify_func = callback;
708 	mutex_enter(&notify_list_lock);
709 	/*
710 	 * Make sure not already on the list
711 	 */
712 	xnp = list_head(&notify_list);
713 	for (; xnp != NULL; xnp = list_next(&notify_list, xnp)) {
714 		if (xnp->notify_func == callback) {
715 			kmem_free(xbn, sizeof (struct xenbus_notify));
716 			mutex_exit(&notify_list_lock);
717 			return (EEXIST);
718 		}
719 	}
720 	xnp = xbn;
721 	list_insert_tail(&notify_list, xbn);
722 done:
723 	if (xenstore_up)
724 		xnp->notify_func(XENSTORE_UP);
725 	mutex_exit(&notify_list_lock);
726 	return (0);
727 }
728 
729 /*
730  * Notify clients of xenstore state
731  */
732 static void
733 do_notify_callbacks(void *arg)
734 {
735 	struct xenbus_notify *xnp;
736 
737 	mutex_enter(&notify_list_lock);
738 	xnp = list_head(&notify_list);
739 	for (; xnp != NULL; xnp = list_next(&notify_list, xnp)) {
740 		xnp->notify_func((int)((uintptr_t)arg));
741 	}
742 	mutex_exit(&notify_list_lock);
743 }
744 
745 void
746 xs_notify_xenstore_up(void)
747 {
748 	xenstore_up = B_TRUE;
749 	(void) taskq_dispatch(xenbus_taskq, do_notify_callbacks,
750 	    (void *)XENSTORE_UP, 0);
751 }
752 
753 void
754 xs_notify_xenstore_down(void)
755 {
756 	xenstore_up = B_FALSE;
757 	(void) taskq_dispatch(xenbus_taskq, do_notify_callbacks,
758 	    (void *)XENSTORE_DOWN, 0);
759 }
760 
761 /* Register callback to watch this node. */
762 int
763 register_xenbus_watch(struct xenbus_watch *watch)
764 {
765 	/* Pointer in ascii is the token. */
766 	char token[sizeof (watch) * 2 + 1];
767 	int err;
768 
769 	ASSERT(xenstore_up);
770 	(void) snprintf(token, sizeof (token), "%lX", (long)watch);
771 
772 	rw_enter(&xs_state.suspend_lock, RW_READER);
773 
774 	mutex_enter(&watches_lock);
775 	/*
776 	 * May be re-registering a watch if xenstore daemon was restarted
777 	 */
778 	if (find_watch(token) == NULL)
779 		list_insert_tail(&watches, watch);
780 	mutex_exit(&watches_lock);
781 
782 	err = xs_watch(watch->node, token);
783 
784 	/* Ignore errors due to multiple registration. */
785 	if ((err != 0) && (err != EEXIST)) {
786 		mutex_enter(&watches_lock);
787 		list_remove(&watches, watch);
788 		mutex_exit(&watches_lock);
789 	}
790 
791 	rw_exit(&xs_state.suspend_lock);
792 
793 	return (err);
794 }
795 
796 static void
797 free_stored_msg(struct xs_stored_msg *msg)
798 {
799 	int i, len = 0;
800 
801 	for (i = 0; i < msg->un.watch.vec_size; i++)
802 		len += strlen(msg->un.watch.vec[i]) + 1 + sizeof (char *);
803 	kmem_free(msg->un.watch.vec, len);
804 	kmem_free(msg, sizeof (*msg));
805 }
806 
807 void
808 unregister_xenbus_watch(struct xenbus_watch *watch)
809 {
810 	struct xs_stored_msg *msg;
811 	char token[sizeof (watch) * 2 + 1];
812 	int err;
813 
814 	(void) snprintf(token, sizeof (token), "%lX", (long)watch);
815 
816 	rw_enter(&xs_state.suspend_lock, RW_READER);
817 
818 	mutex_enter(&watches_lock);
819 	ASSERT(find_watch(token));
820 	list_remove(&watches, watch);
821 	mutex_exit(&watches_lock);
822 
823 	err = xs_unwatch(watch->node, token);
824 	if (err)
825 		cmn_err(CE_WARN, "XENBUS Failed to release watch %s: %d",
826 		    watch->node, err);
827 
828 	rw_exit(&xs_state.suspend_lock);
829 
830 	/* Cancel pending watch events. */
831 	mutex_enter(&watch_events_lock);
832 	msg = list_head(&watch_events);
833 
834 	while (msg != NULL) {
835 		struct xs_stored_msg *tmp = list_next(&watch_events, msg);
836 		if (msg->un.watch.handle == watch) {
837 			list_remove(&watch_events, msg);
838 			free_stored_msg(msg);
839 		}
840 		msg = tmp;
841 	}
842 
843 	mutex_exit(&watch_events_lock);
844 
845 	/* Flush any currently-executing callback, unless we are it. :-) */
846 	if (mutex_owner(&xenwatch_mutex) != curthread) {
847 		mutex_enter(&xenwatch_mutex);
848 		mutex_exit(&xenwatch_mutex);
849 	}
850 }
851 
852 void
853 xenbus_suspend(void)
854 {
855 	rw_enter(&xs_state.suspend_lock, RW_WRITER);
856 	mutex_enter(&xs_state.request_mutex);
857 
858 	xb_suspend();
859 }
860 
861 void
862 xenbus_resume(void)
863 {
864 	struct xenbus_watch *watch;
865 	char token[sizeof (watch) * 2 + 1];
866 
867 	mutex_exit(&xs_state.request_mutex);
868 
869 	xb_init();
870 	xb_setup_intr();
871 
872 	/* No need for watches_lock: the suspend_lock is sufficient. */
873 	for (watch = list_head(&watches); watch != NULL;
874 	    watch = list_next(&watches, watch)) {
875 		(void) snprintf(token, sizeof (token), "%lX", (long)watch);
876 		(void) xs_watch(watch->node, token);
877 	}
878 
879 	rw_exit(&xs_state.suspend_lock);
880 }
881 
882 static void
883 xenwatch_thread(void)
884 {
885 	struct xs_stored_msg *msg;
886 
887 	for (;;) {
888 		mutex_enter(&watch_events_lock);
889 		while (list_empty(&watch_events))
890 			cv_wait(&watch_events_cv, &watch_events_lock);
891 		msg = list_head(&watch_events);
892 		ASSERT(msg != NULL);
893 		list_remove(&watch_events, msg);
894 		mutex_exit(&watch_events_lock);
895 
896 		mutex_enter(&xenwatch_mutex);
897 		msg->un.watch.handle->callback(msg->un.watch.handle,
898 		    (const char **)msg->un.watch.vec, msg->un.watch.vec_size);
899 		free_stored_msg(msg);
900 		mutex_exit(&xenwatch_mutex);
901 	}
902 }
903 
904 static int
905 process_msg(void)
906 {
907 	struct xs_stored_msg *msg;
908 	char *body;
909 	int err, mlen;
910 
911 	msg = kmem_alloc(sizeof (*msg), KM_SLEEP);
912 
913 	err = xb_read(&msg->hdr, sizeof (msg->hdr));
914 	if (err) {
915 		kmem_free(msg, sizeof (*msg));
916 		return (err);
917 	}
918 
919 	mlen = msg->hdr.len + 1;
920 	body = kmem_alloc(mlen, KM_SLEEP);
921 
922 	err = xb_read(body, msg->hdr.len);
923 	if (err) {
924 		kmem_free(body, mlen);
925 		kmem_free(msg, sizeof (*msg));
926 		return (err);
927 	}
928 
929 	body[mlen - 1] = '\0';
930 
931 	if (msg->hdr.type == XS_WATCH_EVENT) {
932 		msg->un.watch.vec = split(body, msg->hdr.len + 1,
933 		    &msg->un.watch.vec_size);
934 		if (msg->un.watch.vec == NULL) {
935 			kmem_free(msg, sizeof (*msg));
936 			return (EIO);
937 		}
938 
939 		mutex_enter(&watches_lock);
940 		msg->un.watch.handle = find_watch(
941 		    msg->un.watch.vec[XS_WATCH_TOKEN]);
942 		if (msg->un.watch.handle != NULL) {
943 			mutex_enter(&watch_events_lock);
944 			list_insert_tail(&watch_events, msg);
945 			cv_broadcast(&watch_events_cv);
946 			mutex_exit(&watch_events_lock);
947 		} else {
948 			free_stored_msg(msg);
949 		}
950 		mutex_exit(&watches_lock);
951 	} else {
952 		msg->un.reply.body = body;
953 		mutex_enter(&xs_state.reply_lock);
954 		list_insert_tail(&xs_state.reply_list, msg);
955 		mutex_exit(&xs_state.reply_lock);
956 		cv_signal(&xs_state.reply_cv);
957 	}
958 
959 	return (0);
960 }
961 
962 static void
963 xenbus_thread(void)
964 {
965 	int err;
966 
967 	for (; interrupts_unleashed != 0; ) {
968 		err = process_msg();
969 		if (err)
970 			cmn_err(CE_WARN, "XENBUS error %d while reading "
971 			    "message", err);
972 	}
973 }
974 
975 /*
976  * When setting up xenbus, dom0 and domU have to take different paths, which
977  * makes this code a little confusing. For dom0:
978  *
979  * xs_early_init - mutex init only
980  * xs_dom0_init - called on xenbus dev attach: set up our xenstore page and
981  * event channel; start xenbus threads for responding to interrupts.
982  *
983  * And for domU:
984  *
985  * xs_early_init - mutex init; set up our xenstore page and event channel
986  * xs_domu_init - installation of IRQ handler; start xenbus threads.
987  *
988  * We need an early init on domU so we can use xenbus in polled mode to
989  * discover devices, VCPUs etc.
990  *
991  * On resume, we use xb_init() and xb_setup_intr() to restore xenbus to a
992  * working state.
993  */
994 
995 void
996 xs_early_init(void)
997 {
998 	list_create(&xs_state.reply_list, sizeof (struct xs_stored_msg),
999 	    offsetof(struct xs_stored_msg, list));
1000 	list_create(&watch_events, sizeof (struct xs_stored_msg),
1001 	    offsetof(struct xs_stored_msg, list));
1002 	list_create(&watches, sizeof (struct xenbus_watch),
1003 	    offsetof(struct xenbus_watch, list));
1004 	list_create(&notify_list, sizeof (struct xenbus_notify),
1005 	    offsetof(struct xenbus_notify, list));
1006 	mutex_init(&xs_state.reply_lock, NULL, MUTEX_DEFAULT, NULL);
1007 	mutex_init(&xs_state.request_mutex, NULL, MUTEX_DEFAULT, NULL);
1008 	mutex_init(&notify_list_lock, NULL, MUTEX_DEFAULT, NULL);
1009 	rw_init(&xs_state.suspend_lock, NULL, RW_DEFAULT, NULL);
1010 	cv_init(&xs_state.reply_cv, NULL, CV_DEFAULT, NULL);
1011 
1012 	if (DOMAIN_IS_INITDOMAIN(xen_info))
1013 		return;
1014 
1015 	xb_init();
1016 	xenstore_up = B_TRUE;
1017 }
1018 
1019 static void
1020 xs_thread_init(void)
1021 {
1022 	(void) thread_create(NULL, 0, xenwatch_thread, NULL, 0, &p0,
1023 	    TS_RUN, minclsyspri);
1024 	(void) thread_create(NULL, 0, xenbus_thread, NULL, 0, &p0,
1025 	    TS_RUN, minclsyspri);
1026 	xenbus_taskq = taskq_create("xenbus_taskq", 1,
1027 	    maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
1028 	ASSERT(xenbus_taskq != NULL);
1029 }
1030 
1031 void
1032 xs_domu_init(void)
1033 {
1034 	if (DOMAIN_IS_INITDOMAIN(xen_info))
1035 		return;
1036 
1037 	/*
1038 	 * Add interrupt handler for xenbus now, must wait till after
1039 	 * psm module is loaded.  All use of xenbus is in polled mode
1040 	 * until xs_init is called since it is what kicks off the xs
1041 	 * server threads.
1042 	 */
1043 	xs_thread_init();
1044 	xb_setup_intr();
1045 }
1046 
1047 
1048 void
1049 xs_dom0_init(void)
1050 {
1051 	static boolean_t initialized = B_FALSE;
1052 
1053 	ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1054 
1055 	/*
1056 	 * The xenbus driver might be re-attaching.
1057 	 */
1058 	if (initialized)
1059 		return;
1060 
1061 	xb_init();
1062 	xs_thread_init();
1063 	xb_setup_intr();
1064 
1065 	initialized = B_TRUE;
1066 }
1067