xref: /titanic_52/usr/src/uts/common/xen/io/xenbus_xs.c (revision 721fffe35d40e548a5a58dc53a2ec9c6762172d9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  *
29  * xenbus_xs.c
30  *
31  * This is the kernel equivalent of the "xs" library.  We don't need everything
32  * and we use xenbus_comms for communication.
33  *
34  * Copyright (C) 2005 Rusty Russell, IBM Corporation
35  *
36  * This file may be distributed separately from the Linux kernel, or
37  * incorporated into other software packages, subject to the following license:
38  *
39  * Permission is hereby granted, free of charge, to any person obtaining a copy
40  * of this source file (the "Software"), to deal in the Software without
41  * restriction, including without limitation the rights to use, copy, modify,
42  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
43  * and to permit persons to whom the Software is furnished to do so, subject to
44  * the following conditions:
45  *
46  * The above copyright notice and this permission notice shall be included in
47  * all copies or substantial portions of the Software.
48  *
49  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
50  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
51  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
52  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
53  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
54  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
55  * IN THE SOFTWARE.
56  */
57 
58 /*
59  * NOTE: To future maintainers of the Solaris version of this file:
60  * I found the Linux version of this code to be very disgusting in
61  * overloading pointers and error codes into void * return values.
62  * The main difference you will find is that all such usage is changed
63  * to pass pointers to void* to be filled in with return values and
64  * the functions return error codes.
65  */
66 
67 #include <sys/errno.h>
68 #include <sys/types.h>
69 #include <sys/sysmacros.h>
70 #include <sys/uio.h>
71 #include <sys/mutex.h>
72 #include <sys/condvar.h>
73 #include <sys/rwlock.h>
74 #include <sys/disp.h>
75 #include <sys/ddi.h>
76 #include <sys/sunddi.h>
77 #include <sys/avintr.h>
78 #include <sys/cmn_err.h>
79 #include <sys/mach_mmu.h>
80 #include <util/sscanf.h>
81 #define	_XSD_ERRORS_DEFINED
82 #ifdef XPV_HVM_DRIVER
83 #include <sys/xpv_support.h>
84 #endif
85 #include <sys/hypervisor.h>
86 #include <sys/taskq.h>
87 #include <sys/sdt.h>
88 #include <xen/sys/xenbus_impl.h>
89 #include <xen/sys/xenbus_comms.h>
90 #include <xen/sys/xendev.h>
91 #include <xen/public/io/xs_wire.h>
92 
93 #define	streq(a, b) (strcmp((a), (b)) == 0)
94 
95 #define	list_empty(list) (list_head(list) == NULL)
96 
97 struct xs_stored_msg {
98 	list_node_t list;
99 
100 	struct xsd_sockmsg hdr;
101 
102 	union {
103 		/* Queued replies. */
104 		struct {
105 			char *body;
106 		} reply;
107 
108 		/* Queued watch events. */
109 		struct {
110 			struct xenbus_watch *handle;
111 			char **vec;
112 			unsigned int vec_size;
113 		} watch;
114 	} un;
115 };
116 
117 static struct xs_handle {
118 	/* A list of replies. Currently only one will ever be outstanding. */
119 	list_t reply_list;
120 	kmutex_t reply_lock;
121 	kcondvar_t reply_cv;
122 
123 	/* One request at a time. */
124 	kmutex_t request_mutex;
125 
126 	/* Protect transactions against save/restore. */
127 	krwlock_t suspend_lock;
128 } xs_state;
129 
130 static int last_req_id;
131 
132 /*
133  * List of clients wanting a xenstore up notification, and a lock to protect it
134  */
135 static boolean_t xenstore_up;
136 static list_t notify_list;
137 static kmutex_t notify_list_lock;
138 static taskq_t *xenbus_taskq;
139 
140 /* List of registered watches, and a lock to protect it. */
141 static list_t watches;
142 static kmutex_t watches_lock;
143 
144 /* List of pending watch callback events, and a lock to protect it. */
145 static list_t watch_events;
146 static kmutex_t watch_events_lock;
147 
148 /*
149  * Details of the xenwatch callback kernel thread. The thread waits on the
150  * watch_events_cv for work to do (queued on watch_events list). When it
151  * wakes up it acquires the xenwatch_mutex before reading the list and
152  * carrying out work.
153  */
154 static kmutex_t xenwatch_mutex;
155 static kcondvar_t watch_events_cv;
156 
157 static int process_msg(void);
158 
159 static int
160 get_error(const char *errorstring)
161 {
162 	unsigned int i;
163 
164 	for (i = 0; !streq(errorstring, xsd_errors[i].errstring); i++) {
165 		if (i == (sizeof (xsd_errors) / sizeof (xsd_errors[0])) - 1) {
166 			cmn_err(CE_WARN,
167 			    "XENBUS xen store gave: unknown error %s",
168 			    errorstring);
169 			return (EINVAL);
170 		}
171 	}
172 	return (xsd_errors[i].errnum);
173 }
174 
175 /*
176  * Read a synchronous reply from xenstore.  Since we can return early before
177  * reading a relevant reply, we discard any messages not matching the request
178  * ID.  Caller must free returned message on success.
179  */
180 static int
181 read_reply(struct xsd_sockmsg *req_hdr, struct xs_stored_msg **reply)
182 {
183 	extern int do_polled_io;
184 
185 	mutex_enter(&xs_state.reply_lock);
186 
187 	for (;;) {
188 		while (list_empty(&xs_state.reply_list)) {
189 			if (interrupts_unleashed && !do_polled_io) {
190 				if (cv_wait_sig(&xs_state.reply_cv,
191 				    &xs_state.reply_lock) == 0) {
192 					mutex_exit(&xs_state.reply_lock);
193 					*reply = NULL;
194 					return (EINTR);
195 				}
196 			} else { /* polled mode needed for early probes */
197 				mutex_exit(&xs_state.reply_lock);
198 				(void) HYPERVISOR_yield();
199 				(void) process_msg();
200 				mutex_enter(&xs_state.reply_lock);
201 			}
202 		}
203 
204 		*reply = list_head(&xs_state.reply_list);
205 		list_remove(&xs_state.reply_list, *reply);
206 
207 		if ((*reply)->hdr.req_id == req_hdr->req_id)
208 			break;
209 	}
210 
211 	mutex_exit(&xs_state.reply_lock);
212 	return (0);
213 }
214 
215 /* Emergency write. */
216 void
217 xenbus_debug_write(const char *str, unsigned int count)
218 {
219 	struct xsd_sockmsg msg = { 0 };
220 
221 	msg.type = XS_DEBUG;
222 	msg.len = sizeof ("print") + count + 1;
223 
224 	mutex_enter(&xs_state.request_mutex);
225 	(void) xb_write(&msg, sizeof (msg));
226 	(void) xb_write("print", sizeof ("print"));
227 	(void) xb_write(str, count);
228 	(void) xb_write("", 1);
229 	mutex_exit(&xs_state.request_mutex);
230 }
231 
232 /*
233  * This is pretty unpleasant.  First off, there's the horrible logic around
234  * suspend_lock and transactions.  Also, we can be interrupted either before we
235  * write a message, or before we receive a reply.  A client that wants to
236  * survive this can't know which case happened.  Luckily all clients don't care
237  * about signals currently, and the alternative (a hard wait on a userspace
238  * daemon) isn't exactly preferable.  Caller must free 'reply' on success.
239  */
240 int
241 xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **reply)
242 {
243 	struct xsd_sockmsg req_msg = *msg;
244 	struct xs_stored_msg *reply_msg = NULL;
245 	int err;
246 
247 	if (req_msg.type == XS_TRANSACTION_START)
248 		rw_enter(&xs_state.suspend_lock, RW_READER);
249 
250 	mutex_enter(&xs_state.request_mutex);
251 
252 	msg->req_id = last_req_id++;
253 
254 	err = xb_write(msg, sizeof (*msg) + msg->len);
255 	if (err) {
256 		if (req_msg.type == XS_TRANSACTION_START)
257 			rw_exit(&xs_state.suspend_lock);
258 		msg->type = XS_ERROR;
259 		*reply = NULL;
260 		goto out;
261 	}
262 
263 	err = read_reply(msg, &reply_msg);
264 
265 	if (err) {
266 		if (msg->type == XS_TRANSACTION_START)
267 			rw_exit(&xs_state.suspend_lock);
268 		*reply = NULL;
269 		goto out;
270 	}
271 
272 	*reply = reply_msg->un.reply.body;
273 	*msg = reply_msg->hdr;
274 
275 	if (reply_msg->hdr.type == XS_TRANSACTION_END)
276 		rw_exit(&xs_state.suspend_lock);
277 
278 out:
279 	if (reply_msg != NULL)
280 		kmem_free(reply_msg, sizeof (*reply_msg));
281 
282 	mutex_exit(&xs_state.request_mutex);
283 	return (err);
284 }
285 
286 /*
287  * Send message to xs, return errcode, rval filled in with pointer
288  * to kmem_alloc'ed reply.
289  */
290 static int
291 xs_talkv(xenbus_transaction_t t,
292 		    enum xsd_sockmsg_type type,
293 		    const iovec_t *iovec,
294 		    unsigned int num_vecs,
295 		    void **rval,
296 		    unsigned int *len)
297 {
298 	struct xsd_sockmsg msg;
299 	struct xs_stored_msg *reply_msg;
300 	char *reply;
301 	unsigned int i;
302 	int err;
303 
304 	msg.tx_id = (uint32_t)(unsigned long)t;
305 	msg.type = type;
306 	msg.len = 0;
307 	for (i = 0; i < num_vecs; i++)
308 		msg.len += iovec[i].iov_len;
309 
310 	mutex_enter(&xs_state.request_mutex);
311 
312 	msg.req_id = last_req_id++;
313 
314 	err = xb_write(&msg, sizeof (msg));
315 	if (err) {
316 		mutex_exit(&xs_state.request_mutex);
317 		return (err);
318 	}
319 
320 	for (i = 0; i < num_vecs; i++) {
321 		err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
322 		if (err) {
323 			mutex_exit(&xs_state.request_mutex);
324 			return (err);
325 		}
326 	}
327 
328 	err = read_reply(&msg, &reply_msg);
329 
330 	mutex_exit(&xs_state.request_mutex);
331 
332 	if (err)
333 		return (err);
334 
335 	reply = reply_msg->un.reply.body;
336 
337 	if (reply_msg->hdr.type == XS_ERROR) {
338 		err = get_error(reply);
339 		kmem_free(reply, reply_msg->hdr.len + 1);
340 		goto out;
341 	}
342 
343 	if (len != NULL)
344 		*len = reply_msg->hdr.len + 1;
345 
346 	ASSERT(reply_msg->hdr.type == type);
347 
348 	if (rval != NULL)
349 		*rval = reply;
350 	else
351 		kmem_free(reply, reply_msg->hdr.len + 1);
352 
353 out:
354 	kmem_free(reply_msg, sizeof (*reply_msg));
355 	return (err);
356 }
357 
358 /* Simplified version of xs_talkv: single message. */
359 static int
360 xs_single(xenbus_transaction_t t,
361 			enum xsd_sockmsg_type type,
362 			const char *string, void **ret,
363 			unsigned int *len)
364 {
365 	iovec_t iovec;
366 
367 	iovec.iov_base = (char *)string;
368 	iovec.iov_len = strlen(string) + 1;
369 	return (xs_talkv(t, type, &iovec, 1, ret, len));
370 }
371 
372 static unsigned int
373 count_strings(const char *strings, unsigned int len)
374 {
375 	unsigned int num;
376 	const char *p;
377 
378 	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
379 		num++;
380 
381 	return (num);
382 }
383 
384 /* Return the path to dir with /name appended. Buffer must be kmem_free()'ed */
385 static char *
386 join(const char *dir, const char *name)
387 {
388 	char *buffer;
389 	size_t slashlen;
390 
391 	slashlen = streq(name, "") ? 0 : 1;
392 	buffer = kmem_alloc(strlen(dir) + slashlen + strlen(name) + 1,
393 	    KM_SLEEP);
394 
395 	(void) strcpy(buffer, dir);
396 	if (slashlen != 0) {
397 		(void) strcat(buffer, "/");
398 		(void) strcat(buffer, name);
399 	}
400 	return (buffer);
401 }
402 
403 static char **
404 split(char *strings, unsigned int len, unsigned int *num)
405 {
406 	char *p, **ret;
407 
408 	/* Count the strings. */
409 	if ((*num = count_strings(strings, len - 1)) == 0)
410 		return (NULL);
411 
412 	/* Transfer to one big alloc for easy freeing. */
413 	ret = kmem_alloc(*num * sizeof (char *) + (len - 1), KM_SLEEP);
414 	(void) memcpy(&ret[*num], strings, len - 1);
415 	kmem_free(strings, len);
416 
417 	strings = (char *)&ret[*num];
418 	for (p = strings, *num = 0; p < strings + (len - 1);
419 	    p += strlen(p) + 1) {
420 		ret[(*num)++] = p;
421 	}
422 
423 	return (ret);
424 }
425 
426 char **
427 xenbus_directory(xenbus_transaction_t t,
428 			const char *dir, const char *node, unsigned int *num)
429 {
430 	char *strings, *path;
431 	unsigned int len;
432 	int err;
433 
434 	path = join(dir, node);
435 	err = xs_single(t, XS_DIRECTORY, path, (void **)&strings, &len);
436 	kmem_free(path, strlen(path) + 1);
437 	if (err != 0 || strings == NULL) {
438 		/* sigh, we lose error code info here */
439 		*num = 0;
440 		return (NULL);
441 	}
442 
443 	return (split(strings, len, num));
444 }
445 
446 /* Check if a path exists. */
447 boolean_t
448 xenbus_exists(const char *dir, const char *node)
449 {
450 	void	*p;
451 	uint_t	n;
452 
453 	if (xenbus_read(XBT_NULL, dir, node, &p, &n) != 0)
454 		return (B_FALSE);
455 	kmem_free(p, n);
456 	return (B_TRUE);
457 }
458 
459 /* Check if a directory path exists. */
460 boolean_t
461 xenbus_exists_dir(const char *dir, const char *node)
462 {
463 	char **d;
464 	unsigned int dir_n;
465 	int i, len;
466 
467 	d = xenbus_directory(XBT_NULL, dir, node, &dir_n);
468 	if (d == NULL)
469 		return (B_FALSE);
470 	for (i = 0, len = 0; i < dir_n; i++)
471 		len += strlen(d[i]) + 1 + sizeof (char *);
472 	kmem_free(d, len);
473 	return (B_TRUE);
474 }
475 
476 /*
477  * Get the value of a single file.
478  * Returns a kmem_alloced value in retp: call kmem_free() on it after use.
479  * len indicates length in bytes.
480  */
481 int
482 xenbus_read(xenbus_transaction_t t,
483 	    const char *dir, const char *node, void **retp, unsigned int *len)
484 {
485 	char *path;
486 	int err;
487 
488 	path = join(dir, node);
489 	err = xs_single(t, XS_READ, path, retp, len);
490 	kmem_free(path, strlen(path) + 1);
491 	return (err);
492 }
493 
494 int
495 xenbus_read_str(const char *dir, const char *node, char **retp)
496 {
497 	uint_t	n;
498 	int	err;
499 	char	*str;
500 
501 	/*
502 	 * Since we access the xenbus value immediatly we can't be
503 	 * part of a transaction.
504 	 */
505 	if ((err = xenbus_read(XBT_NULL, dir, node, (void **)&str, &n)) != 0)
506 		return (err);
507 	ASSERT((str != NULL) && (n > 0));
508 
509 	/*
510 	 * Why bother with this?  Because xenbus is truly annoying in the
511 	 * fact that when it returns a string, it doesn't guarantee that
512 	 * the memory that holds the string is of size strlen() + 1.
513 	 * This forces callers to keep track of the size of the memory
514 	 * containing the string.  Ugh.  We'll work around this by
515 	 * re-allocate strings to always be of size strlen() + 1.
516 	 */
517 	*retp = strdup(str);
518 	kmem_free(str, n);
519 	return (0);
520 }
521 
522 /*
523  * Write the value of a single file.
524  * Returns err on failure.
525  */
526 int
527 xenbus_write(xenbus_transaction_t t,
528 		const char *dir, const char *node, const char *string)
529 {
530 	char *path;
531 	iovec_t iovec[2];
532 	int ret;
533 
534 	path = join(dir, node);
535 
536 	iovec[0].iov_base = (void *)path;
537 	iovec[0].iov_len = strlen(path) + 1;
538 	iovec[1].iov_base = (void *)string;
539 	iovec[1].iov_len = strlen(string);
540 
541 	ret = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
542 	kmem_free(path, iovec[0].iov_len);
543 	return (ret);
544 }
545 
546 /* Create a new directory. */
547 int
548 xenbus_mkdir(xenbus_transaction_t t, const char *dir, const char *node)
549 {
550 	char *path;
551 	int ret;
552 
553 	path = join(dir, node);
554 	ret = xs_single(t, XS_MKDIR, path, NULL, NULL);
555 	kmem_free(path, strlen(path) + 1);
556 	return (ret);
557 }
558 
559 /* Destroy a file or directory (directories must be empty). */
560 int
561 xenbus_rm(xenbus_transaction_t t, const char *dir, const char *node)
562 {
563 	char *path;
564 	int ret;
565 
566 	path = join(dir, node);
567 	ret = xs_single(t, XS_RM, path, NULL, NULL);
568 	kmem_free(path, strlen(path) + 1);
569 	return (ret);
570 }
571 
572 /*
573  * Start a transaction: changes by others will not be seen during this
574  * transaction, and changes will not be visible to others until end.
575  */
576 int
577 xenbus_transaction_start(xenbus_transaction_t *t)
578 {
579 	void *id_str;
580 	unsigned long id;
581 	int err;
582 	unsigned int len;
583 
584 	rw_enter(&xs_state.suspend_lock, RW_READER);
585 
586 	err = xs_single(XBT_NULL, XS_TRANSACTION_START, "", &id_str, &len);
587 	if (err) {
588 		rw_exit(&xs_state.suspend_lock);
589 		return (err);
590 	}
591 
592 	(void) ddi_strtoul((char *)id_str, NULL, 0, &id);
593 	*t = (xenbus_transaction_t)id;
594 	kmem_free(id_str, len);
595 
596 	return (0);
597 }
598 
599 /*
600  * End a transaction.
601  * If abandon is true, transaction is discarded instead of committed.
602  */
603 int
604 xenbus_transaction_end(xenbus_transaction_t t, int abort)
605 {
606 	char abortstr[2];
607 	int err;
608 
609 	if (abort)
610 		(void) strcpy(abortstr, "F");
611 	else
612 		(void) strcpy(abortstr, "T");
613 
614 	err = xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL);
615 
616 	rw_exit(&xs_state.suspend_lock);
617 
618 	return (err);
619 }
620 
621 /*
622  * Single read and scanf: returns errno or 0.  This can only handle a single
623  * conversion specifier.
624  */
625 /* SCANFLIKE4 */
626 int
627 xenbus_scanf(xenbus_transaction_t t,
628 		const char *dir, const char *node, const char *fmt, ...)
629 {
630 	va_list ap;
631 	int ret;
632 	char *val;
633 	unsigned int len;
634 
635 	ret = xenbus_read(t, dir, node, (void **)&val, &len);
636 	if (ret)
637 		return (ret);
638 
639 	va_start(ap, fmt);
640 	if (vsscanf(val, fmt, ap) != 1)
641 		ret = ERANGE;
642 	va_end(ap);
643 	kmem_free(val, len);
644 	return (ret);
645 }
646 
647 /* Single printf and write: returns errno or 0. */
648 /* PRINTFLIKE4 */
649 int
650 xenbus_printf(xenbus_transaction_t t,
651 		const char *dir, const char *node, const char *fmt, ...)
652 {
653 	va_list ap;
654 	int ret;
655 #define	PRINTF_BUFFER_SIZE 4096
656 	char *printf_buffer;
657 
658 	printf_buffer = kmem_alloc(PRINTF_BUFFER_SIZE, KM_SLEEP);
659 
660 	va_start(ap, fmt);
661 	ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
662 	va_end(ap);
663 
664 	ASSERT(ret <= PRINTF_BUFFER_SIZE-1);
665 	ret = xenbus_write(t, dir, node, printf_buffer);
666 
667 	kmem_free(printf_buffer, PRINTF_BUFFER_SIZE);
668 
669 	return (ret);
670 }
671 
672 
673 /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
674 int
675 xenbus_gather(xenbus_transaction_t t, const char *dir, ...)
676 {
677 	va_list ap;
678 	const char *name;
679 	int ret = 0;
680 	unsigned int len;
681 
682 	va_start(ap, dir);
683 	while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
684 		const char *fmt = va_arg(ap, char *);
685 		void *result = va_arg(ap, void *);
686 		char *p;
687 
688 		ret = xenbus_read(t, dir, name, (void **)&p, &len);
689 		if (ret)
690 			break;
691 		if (fmt) {
692 			ASSERT(result != NULL);
693 			if (sscanf(p, fmt, result) != 1)
694 				ret = EINVAL;
695 			kmem_free(p, len);
696 		} else
697 			*(char **)result = p;
698 	}
699 	va_end(ap);
700 	return (ret);
701 }
702 
703 static int
704 xs_watch(const char *path, const char *token)
705 {
706 	iovec_t iov[2];
707 
708 	iov[0].iov_base = (void *)path;
709 	iov[0].iov_len = strlen(path) + 1;
710 	iov[1].iov_base = (void *)token;
711 	iov[1].iov_len = strlen(token) + 1;
712 
713 	return (xs_talkv(XBT_NULL, XS_WATCH, iov, 2, NULL, NULL));
714 }
715 
716 static int
717 xs_unwatch(const char *path, const char *token)
718 {
719 	iovec_t iov[2];
720 
721 	iov[0].iov_base = (char *)path;
722 	iov[0].iov_len = strlen(path) + 1;
723 	iov[1].iov_base = (char *)token;
724 	iov[1].iov_len = strlen(token) + 1;
725 
726 	return (xs_talkv(XBT_NULL, XS_UNWATCH, iov, 2, NULL, NULL));
727 }
728 
729 static struct xenbus_watch *
730 find_watch(const char *token)
731 {
732 	struct xenbus_watch *i, *cmp;
733 
734 	(void) ddi_strtoul(token, NULL, 16, (unsigned long *)&cmp);
735 
736 	for (i = list_head(&watches); i != NULL; i = list_next(&watches, i))
737 		if (i == cmp)
738 			break;
739 
740 	return (i);
741 }
742 
743 /* Register a xenstore state notify callback */
744 int
745 xs_register_xenbus_callback(void (*callback)(int))
746 {
747 	struct xenbus_notify *xbn, *xnp;
748 
749 	xbn = kmem_alloc(sizeof (struct xenbus_notify), KM_SLEEP);
750 	xbn->notify_func = callback;
751 	mutex_enter(&notify_list_lock);
752 	/*
753 	 * Make sure not already on the list
754 	 */
755 	xnp = list_head(&notify_list);
756 	for (; xnp != NULL; xnp = list_next(&notify_list, xnp)) {
757 		if (xnp->notify_func == callback) {
758 			kmem_free(xbn, sizeof (struct xenbus_notify));
759 			mutex_exit(&notify_list_lock);
760 			return (EEXIST);
761 		}
762 	}
763 	xnp = xbn;
764 	list_insert_tail(&notify_list, xbn);
765 done:
766 	if (xenstore_up)
767 		xnp->notify_func(XENSTORE_UP);
768 	mutex_exit(&notify_list_lock);
769 	return (0);
770 }
771 
772 /*
773  * Notify clients of xenstore state
774  */
775 static void
776 do_notify_callbacks(void *arg)
777 {
778 	struct xenbus_notify *xnp;
779 
780 	mutex_enter(&notify_list_lock);
781 	xnp = list_head(&notify_list);
782 	for (; xnp != NULL; xnp = list_next(&notify_list, xnp)) {
783 		xnp->notify_func((int)((uintptr_t)arg));
784 	}
785 	mutex_exit(&notify_list_lock);
786 }
787 
788 void
789 xs_notify_xenstore_up(void)
790 {
791 	xenstore_up = B_TRUE;
792 	(void) taskq_dispatch(xenbus_taskq, do_notify_callbacks,
793 	    (void *)XENSTORE_UP, 0);
794 }
795 
796 void
797 xs_notify_xenstore_down(void)
798 {
799 	xenstore_up = B_FALSE;
800 	(void) taskq_dispatch(xenbus_taskq, do_notify_callbacks,
801 	    (void *)XENSTORE_DOWN, 0);
802 }
803 
804 /* Register callback to watch this node. */
805 int
806 register_xenbus_watch(struct xenbus_watch *watch)
807 {
808 	/* Pointer in ascii is the token. */
809 	char token[sizeof (watch) * 2 + 1];
810 	int err;
811 
812 	ASSERT(xenstore_up);
813 	(void) snprintf(token, sizeof (token), "%lX", (long)watch);
814 
815 	rw_enter(&xs_state.suspend_lock, RW_READER);
816 
817 	mutex_enter(&watches_lock);
818 	/*
819 	 * May be re-registering a watch if xenstore daemon was restarted
820 	 */
821 	if (find_watch(token) == NULL)
822 		list_insert_tail(&watches, watch);
823 	mutex_exit(&watches_lock);
824 
825 	DTRACE_XPV3(xenbus__register__watch, const char *, watch->node,
826 	    uintptr_t, watch->callback, struct xenbus_watch *, watch);
827 
828 	err = xs_watch(watch->node, token);
829 
830 	/* Ignore errors due to multiple registration. */
831 	if ((err != 0) && (err != EEXIST)) {
832 		mutex_enter(&watches_lock);
833 		list_remove(&watches, watch);
834 		mutex_exit(&watches_lock);
835 	}
836 
837 	rw_exit(&xs_state.suspend_lock);
838 
839 	return (err);
840 }
841 
842 static void
843 free_stored_msg(struct xs_stored_msg *msg)
844 {
845 	int i, len = 0;
846 
847 	for (i = 0; i < msg->un.watch.vec_size; i++)
848 		len += strlen(msg->un.watch.vec[i]) + 1 + sizeof (char *);
849 	kmem_free(msg->un.watch.vec, len);
850 	kmem_free(msg, sizeof (*msg));
851 }
852 
853 void
854 unregister_xenbus_watch(struct xenbus_watch *watch)
855 {
856 	struct xs_stored_msg *msg;
857 	char token[sizeof (watch) * 2 + 1];
858 	int err;
859 
860 	(void) snprintf(token, sizeof (token), "%lX", (long)watch);
861 
862 	rw_enter(&xs_state.suspend_lock, RW_READER);
863 
864 	mutex_enter(&watches_lock);
865 	ASSERT(find_watch(token));
866 	list_remove(&watches, watch);
867 	mutex_exit(&watches_lock);
868 
869 	DTRACE_XPV3(xenbus__unregister__watch, const char *, watch->node,
870 	    uintptr_t, watch->callback, struct xenbus_watch *, watch);
871 
872 	err = xs_unwatch(watch->node, token);
873 	if (err)
874 		cmn_err(CE_WARN, "XENBUS Failed to release watch %s: %d",
875 		    watch->node, err);
876 
877 	rw_exit(&xs_state.suspend_lock);
878 
879 	/* Cancel pending watch events. */
880 	mutex_enter(&watch_events_lock);
881 	msg = list_head(&watch_events);
882 
883 	while (msg != NULL) {
884 		struct xs_stored_msg *tmp = list_next(&watch_events, msg);
885 		if (msg->un.watch.handle == watch) {
886 			list_remove(&watch_events, msg);
887 			free_stored_msg(msg);
888 		}
889 		msg = tmp;
890 	}
891 
892 	mutex_exit(&watch_events_lock);
893 
894 	/* Flush any currently-executing callback, unless we are it. :-) */
895 	if (mutex_owner(&xenwatch_mutex) != curthread) {
896 		mutex_enter(&xenwatch_mutex);
897 		mutex_exit(&xenwatch_mutex);
898 	}
899 }
900 
901 void
902 xenbus_suspend(void)
903 {
904 	rw_enter(&xs_state.suspend_lock, RW_WRITER);
905 	mutex_enter(&xs_state.request_mutex);
906 
907 	xb_suspend();
908 }
909 
910 void
911 xenbus_resume(void)
912 {
913 	struct xenbus_watch *watch;
914 	char token[sizeof (watch) * 2 + 1];
915 
916 	mutex_exit(&xs_state.request_mutex);
917 
918 	xb_init();
919 	xb_setup_intr();
920 
921 	/* No need for watches_lock: the suspend_lock is sufficient. */
922 	for (watch = list_head(&watches); watch != NULL;
923 	    watch = list_next(&watches, watch)) {
924 		(void) snprintf(token, sizeof (token), "%lX", (long)watch);
925 		(void) xs_watch(watch->node, token);
926 	}
927 
928 	rw_exit(&xs_state.suspend_lock);
929 }
930 
931 static void
932 xenwatch_thread(void)
933 {
934 	struct xs_stored_msg *msg;
935 	struct xenbus_watch *watch;
936 
937 	for (;;) {
938 		mutex_enter(&watch_events_lock);
939 		while (list_empty(&watch_events))
940 			cv_wait(&watch_events_cv, &watch_events_lock);
941 		msg = list_head(&watch_events);
942 		ASSERT(msg != NULL);
943 		list_remove(&watch_events, msg);
944 		watch = msg->un.watch.handle;
945 		mutex_exit(&watch_events_lock);
946 
947 		mutex_enter(&xenwatch_mutex);
948 
949 		DTRACE_XPV4(xenbus__fire__watch,
950 		    const char *, watch->node,
951 		    uintptr_t, watch->callback,
952 		    struct xenbus_watch *, watch,
953 		    const char *, msg->un.watch.vec[XS_WATCH_PATH]);
954 
955 		watch->callback(watch, (const char **)msg->un.watch.vec,
956 		    msg->un.watch.vec_size);
957 
958 		free_stored_msg(msg);
959 		mutex_exit(&xenwatch_mutex);
960 	}
961 }
962 
963 static int
964 process_msg(void)
965 {
966 	struct xs_stored_msg *msg;
967 	char *body;
968 	int err, mlen;
969 
970 	msg = kmem_alloc(sizeof (*msg), KM_SLEEP);
971 
972 	err = xb_read(&msg->hdr, sizeof (msg->hdr));
973 	if (err) {
974 		kmem_free(msg, sizeof (*msg));
975 		return (err);
976 	}
977 
978 	mlen = msg->hdr.len + 1;
979 	body = kmem_alloc(mlen, KM_SLEEP);
980 
981 	err = xb_read(body, msg->hdr.len);
982 	if (err) {
983 		kmem_free(body, mlen);
984 		kmem_free(msg, sizeof (*msg));
985 		return (err);
986 	}
987 
988 	body[mlen - 1] = '\0';
989 
990 	if (msg->hdr.type == XS_WATCH_EVENT) {
991 		const char *token;
992 		msg->un.watch.vec = split(body, msg->hdr.len + 1,
993 		    &msg->un.watch.vec_size);
994 		if (msg->un.watch.vec == NULL) {
995 			kmem_free(msg, sizeof (*msg));
996 			return (EIO);
997 		}
998 
999 		mutex_enter(&watches_lock);
1000 		token = msg->un.watch.vec[XS_WATCH_TOKEN];
1001 		if ((msg->un.watch.handle = find_watch(token)) != NULL) {
1002 			mutex_enter(&watch_events_lock);
1003 
1004 			DTRACE_XPV4(xenbus__enqueue__watch,
1005 			    const char *, msg->un.watch.handle->node,
1006 			    uintptr_t, msg->un.watch.handle->callback,
1007 			    struct xenbus_watch *, msg->un.watch.handle,
1008 			    const char *, msg->un.watch.vec[XS_WATCH_PATH]);
1009 
1010 			list_insert_tail(&watch_events, msg);
1011 			cv_broadcast(&watch_events_cv);
1012 			mutex_exit(&watch_events_lock);
1013 		} else {
1014 			free_stored_msg(msg);
1015 		}
1016 		mutex_exit(&watches_lock);
1017 	} else {
1018 		msg->un.reply.body = body;
1019 		mutex_enter(&xs_state.reply_lock);
1020 		list_insert_tail(&xs_state.reply_list, msg);
1021 		mutex_exit(&xs_state.reply_lock);
1022 		cv_signal(&xs_state.reply_cv);
1023 	}
1024 
1025 	return (0);
1026 }
1027 
1028 static void
1029 xenbus_thread(void)
1030 {
1031 	int err;
1032 
1033 	/*
1034 	 * We have to wait for interrupts to be ready, so we don't clash
1035 	 * with the polled-IO code in read_reply().
1036 	 */
1037 	while (!interrupts_unleashed)
1038 		delay(10);
1039 
1040 	for (;;) {
1041 		err = process_msg();
1042 		if (err)
1043 			cmn_err(CE_WARN, "XENBUS error %d while reading "
1044 			    "message", err);
1045 	}
1046 }
1047 
1048 /*
1049  * When setting up xenbus, dom0 and domU have to take different paths, which
1050  * makes this code a little confusing. For dom0:
1051  *
1052  * xs_early_init - mutex init only
1053  * xs_dom0_init - called on xenbus dev attach: set up our xenstore page and
1054  * event channel; start xenbus threads for responding to interrupts.
1055  *
1056  * And for domU:
1057  *
1058  * xs_early_init - mutex init; set up our xenstore page and event channel
1059  * xs_domu_init - installation of IRQ handler; start xenbus threads.
1060  *
1061  * We need an early init on domU so we can use xenbus in polled mode to
1062  * discover devices, VCPUs etc.
1063  *
1064  * On resume, we use xb_init() and xb_setup_intr() to restore xenbus to a
1065  * working state.
1066  */
1067 
1068 void
1069 xs_early_init(void)
1070 {
1071 	list_create(&xs_state.reply_list, sizeof (struct xs_stored_msg),
1072 	    offsetof(struct xs_stored_msg, list));
1073 	list_create(&watch_events, sizeof (struct xs_stored_msg),
1074 	    offsetof(struct xs_stored_msg, list));
1075 	list_create(&watches, sizeof (struct xenbus_watch),
1076 	    offsetof(struct xenbus_watch, list));
1077 	list_create(&notify_list, sizeof (struct xenbus_notify),
1078 	    offsetof(struct xenbus_notify, list));
1079 	mutex_init(&xs_state.reply_lock, NULL, MUTEX_DEFAULT, NULL);
1080 	mutex_init(&xs_state.request_mutex, NULL, MUTEX_DEFAULT, NULL);
1081 	mutex_init(&notify_list_lock, NULL, MUTEX_DEFAULT, NULL);
1082 	rw_init(&xs_state.suspend_lock, NULL, RW_DEFAULT, NULL);
1083 	cv_init(&xs_state.reply_cv, NULL, CV_DEFAULT, NULL);
1084 
1085 	if (DOMAIN_IS_INITDOMAIN(xen_info))
1086 		return;
1087 
1088 	xb_init();
1089 	xenstore_up = B_TRUE;
1090 }
1091 
1092 static void
1093 xs_thread_init(void)
1094 {
1095 	(void) thread_create(NULL, 0, xenwatch_thread, NULL, 0, &p0,
1096 	    TS_RUN, minclsyspri);
1097 	(void) thread_create(NULL, 0, xenbus_thread, NULL, 0, &p0,
1098 	    TS_RUN, minclsyspri);
1099 	xenbus_taskq = taskq_create("xenbus_taskq", 1,
1100 	    maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
1101 	ASSERT(xenbus_taskq != NULL);
1102 }
1103 
1104 void
1105 xs_domu_init(void)
1106 {
1107 	if (DOMAIN_IS_INITDOMAIN(xen_info))
1108 		return;
1109 
1110 	/*
1111 	 * Add interrupt handler for xenbus now, must wait till after
1112 	 * psm module is loaded.  All use of xenbus is in polled mode
1113 	 * until xs_init is called since it is what kicks off the xs
1114 	 * server threads.
1115 	 */
1116 	xs_thread_init();
1117 	xb_setup_intr();
1118 }
1119 
1120 
1121 void
1122 xs_dom0_init(void)
1123 {
1124 	static boolean_t initialized = B_FALSE;
1125 
1126 	ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1127 
1128 	/*
1129 	 * The xenbus driver might be re-attaching.
1130 	 */
1131 	if (initialized)
1132 		return;
1133 
1134 	xb_init();
1135 	xs_thread_init();
1136 	xb_setup_intr();
1137 
1138 	initialized = B_TRUE;
1139 }
1140