1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 *
29 * xenbus_xs.c
30 *
31 * This is the kernel equivalent of the "xs" library. We don't need everything
32 * and we use xenbus_comms for communication.
33 *
34 * Copyright (C) 2005 Rusty Russell, IBM Corporation
35 *
36 * This file may be distributed separately from the Linux kernel, or
37 * incorporated into other software packages, subject to the following license:
38 *
39 * Permission is hereby granted, free of charge, to any person obtaining a copy
40 * of this source file (the "Software"), to deal in the Software without
41 * restriction, including without limitation the rights to use, copy, modify,
42 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
43 * and to permit persons to whom the Software is furnished to do so, subject to
44 * the following conditions:
45 *
46 * The above copyright notice and this permission notice shall be included in
47 * all copies or substantial portions of the Software.
48 *
49 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
50 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
51 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
52 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
53 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
54 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
55 * IN THE SOFTWARE.
56 */
57
58 /*
59 * NOTE: To future maintainers of the Solaris version of this file:
60 * I found the Linux version of this code to be very disgusting in
61 * overloading pointers and error codes into void * return values.
62 * The main difference you will find is that all such usage is changed
63 * to pass pointers to void* to be filled in with return values and
64 * the functions return error codes.
65 */
66
67 #include <sys/errno.h>
68 #include <sys/types.h>
69 #include <sys/sysmacros.h>
70 #include <sys/uio.h>
71 #include <sys/mutex.h>
72 #include <sys/condvar.h>
73 #include <sys/rwlock.h>
74 #include <sys/disp.h>
75 #include <sys/ddi.h>
76 #include <sys/sunddi.h>
77 #include <sys/avintr.h>
78 #include <sys/cmn_err.h>
79 #include <sys/mach_mmu.h>
80 #include <util/sscanf.h>
81 #define _XSD_ERRORS_DEFINED
82 #ifdef XPV_HVM_DRIVER
83 #include <sys/xpv_support.h>
84 #endif
85 #include <sys/hypervisor.h>
86 #include <sys/taskq.h>
87 #include <sys/sdt.h>
88 #include <xen/sys/xenbus_impl.h>
89 #include <xen/sys/xenbus_comms.h>
90 #include <xen/sys/xendev.h>
91 #include <xen/public/io/xs_wire.h>
92
93 #define streq(a, b) (strcmp((a), (b)) == 0)
94
95 #define list_empty(list) (list_head(list) == NULL)
96
97 struct xs_stored_msg {
98 list_node_t list;
99
100 struct xsd_sockmsg hdr;
101
102 union {
103 /* Queued replies. */
104 struct {
105 char *body;
106 } reply;
107
108 /* Queued watch events. */
109 struct {
110 struct xenbus_watch *handle;
111 char **vec;
112 unsigned int vec_size;
113 } watch;
114 } un;
115 };
116
117 static struct xs_handle {
118 /* A list of replies. Currently only one will ever be outstanding. */
119 list_t reply_list;
120 kmutex_t reply_lock;
121 kcondvar_t reply_cv;
122
123 /* One request at a time. */
124 kmutex_t request_mutex;
125
126 /* Protect transactions against save/restore. */
127 krwlock_t suspend_lock;
128 } xs_state;
129
130 static int last_req_id;
131
132 /*
133 * List of clients wanting a xenstore up notification, and a lock to protect it
134 */
135 static boolean_t xenstore_up;
136 static list_t notify_list;
137 static kmutex_t notify_list_lock;
138 static taskq_t *xenbus_taskq;
139
140 /* List of registered watches, and a lock to protect it. */
141 static list_t watches;
142 static kmutex_t watches_lock;
143
144 /* List of pending watch callback events, and a lock to protect it. */
145 static list_t watch_events;
146 static kmutex_t watch_events_lock;
147
148 /*
149 * Details of the xenwatch callback kernel thread. The thread waits on the
150 * watch_events_cv for work to do (queued on watch_events list). When it
151 * wakes up it acquires the xenwatch_mutex before reading the list and
152 * carrying out work.
153 */
154 static kmutex_t xenwatch_mutex;
155 static kcondvar_t watch_events_cv;
156
157 static int process_msg(void);
158
159 static int
get_error(const char * errorstring)160 get_error(const char *errorstring)
161 {
162 unsigned int i;
163
164 for (i = 0; !streq(errorstring, xsd_errors[i].errstring); i++) {
165 if (i == (sizeof (xsd_errors) / sizeof (xsd_errors[0])) - 1) {
166 cmn_err(CE_WARN,
167 "XENBUS xen store gave: unknown error %s",
168 errorstring);
169 return (EINVAL);
170 }
171 }
172 return (xsd_errors[i].errnum);
173 }
174
175 /*
176 * Read a synchronous reply from xenstore. Since we can return early before
177 * reading a relevant reply, we discard any messages not matching the request
178 * ID. Caller must free returned message on success.
179 */
180 static int
read_reply(struct xsd_sockmsg * req_hdr,struct xs_stored_msg ** reply)181 read_reply(struct xsd_sockmsg *req_hdr, struct xs_stored_msg **reply)
182 {
183 extern int do_polled_io;
184
185 mutex_enter(&xs_state.reply_lock);
186
187 for (;;) {
188 while (list_empty(&xs_state.reply_list)) {
189 if (interrupts_unleashed && !do_polled_io) {
190 if (cv_wait_sig(&xs_state.reply_cv,
191 &xs_state.reply_lock) == 0) {
192 mutex_exit(&xs_state.reply_lock);
193 *reply = NULL;
194 return (EINTR);
195 }
196 } else { /* polled mode needed for early probes */
197 mutex_exit(&xs_state.reply_lock);
198 (void) HYPERVISOR_yield();
199 (void) process_msg();
200 mutex_enter(&xs_state.reply_lock);
201 }
202 }
203
204 *reply = list_head(&xs_state.reply_list);
205 list_remove(&xs_state.reply_list, *reply);
206
207 if ((*reply)->hdr.req_id == req_hdr->req_id)
208 break;
209 }
210
211 mutex_exit(&xs_state.reply_lock);
212 return (0);
213 }
214
215 /* Emergency write. */
216 void
xenbus_debug_write(const char * str,unsigned int count)217 xenbus_debug_write(const char *str, unsigned int count)
218 {
219 struct xsd_sockmsg msg = { 0 };
220
221 msg.type = XS_DEBUG;
222 msg.len = sizeof ("print") + count + 1;
223
224 mutex_enter(&xs_state.request_mutex);
225 (void) xb_write(&msg, sizeof (msg));
226 (void) xb_write("print", sizeof ("print"));
227 (void) xb_write(str, count);
228 (void) xb_write("", 1);
229 mutex_exit(&xs_state.request_mutex);
230 }
231
232 /*
233 * This is pretty unpleasant. First off, there's the horrible logic around
234 * suspend_lock and transactions. Also, we can be interrupted either before we
235 * write a message, or before we receive a reply. A client that wants to
236 * survive this can't know which case happened. Luckily all clients don't care
237 * about signals currently, and the alternative (a hard wait on a userspace
238 * daemon) isn't exactly preferable. Caller must free 'reply' on success.
239 */
240 int
xenbus_dev_request_and_reply(struct xsd_sockmsg * msg,void ** reply)241 xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **reply)
242 {
243 struct xsd_sockmsg req_msg = *msg;
244 struct xs_stored_msg *reply_msg = NULL;
245 int err;
246
247 if (req_msg.type == XS_TRANSACTION_START)
248 rw_enter(&xs_state.suspend_lock, RW_READER);
249
250 mutex_enter(&xs_state.request_mutex);
251
252 msg->req_id = last_req_id++;
253
254 err = xb_write(msg, sizeof (*msg) + msg->len);
255 if (err) {
256 if (req_msg.type == XS_TRANSACTION_START)
257 rw_exit(&xs_state.suspend_lock);
258 msg->type = XS_ERROR;
259 *reply = NULL;
260 goto out;
261 }
262
263 err = read_reply(msg, &reply_msg);
264
265 if (err) {
266 if (msg->type == XS_TRANSACTION_START)
267 rw_exit(&xs_state.suspend_lock);
268 *reply = NULL;
269 goto out;
270 }
271
272 *reply = reply_msg->un.reply.body;
273 *msg = reply_msg->hdr;
274
275 if (reply_msg->hdr.type == XS_TRANSACTION_END)
276 rw_exit(&xs_state.suspend_lock);
277
278 out:
279 if (reply_msg != NULL)
280 kmem_free(reply_msg, sizeof (*reply_msg));
281
282 mutex_exit(&xs_state.request_mutex);
283 return (err);
284 }
285
286 /*
287 * Send message to xs, return errcode, rval filled in with pointer
288 * to kmem_alloc'ed reply.
289 */
290 static int
xs_talkv(xenbus_transaction_t t,enum xsd_sockmsg_type type,const iovec_t * iovec,unsigned int num_vecs,void ** rval,unsigned int * len)291 xs_talkv(xenbus_transaction_t t,
292 enum xsd_sockmsg_type type,
293 const iovec_t *iovec,
294 unsigned int num_vecs,
295 void **rval,
296 unsigned int *len)
297 {
298 struct xsd_sockmsg msg;
299 struct xs_stored_msg *reply_msg;
300 char *reply;
301 unsigned int i;
302 int err;
303
304 msg.tx_id = (uint32_t)(unsigned long)t;
305 msg.type = type;
306 msg.len = 0;
307 for (i = 0; i < num_vecs; i++)
308 msg.len += iovec[i].iov_len;
309
310 mutex_enter(&xs_state.request_mutex);
311
312 msg.req_id = last_req_id++;
313
314 err = xb_write(&msg, sizeof (msg));
315 if (err) {
316 mutex_exit(&xs_state.request_mutex);
317 return (err);
318 }
319
320 for (i = 0; i < num_vecs; i++) {
321 err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
322 if (err) {
323 mutex_exit(&xs_state.request_mutex);
324 return (err);
325 }
326 }
327
328 err = read_reply(&msg, &reply_msg);
329
330 mutex_exit(&xs_state.request_mutex);
331
332 if (err)
333 return (err);
334
335 reply = reply_msg->un.reply.body;
336
337 if (reply_msg->hdr.type == XS_ERROR) {
338 err = get_error(reply);
339 kmem_free(reply, reply_msg->hdr.len + 1);
340 goto out;
341 }
342
343 if (len != NULL)
344 *len = reply_msg->hdr.len + 1;
345
346 ASSERT(reply_msg->hdr.type == type);
347
348 if (rval != NULL)
349 *rval = reply;
350 else
351 kmem_free(reply, reply_msg->hdr.len + 1);
352
353 out:
354 kmem_free(reply_msg, sizeof (*reply_msg));
355 return (err);
356 }
357
358 /* Simplified version of xs_talkv: single message. */
359 static int
xs_single(xenbus_transaction_t t,enum xsd_sockmsg_type type,const char * string,void ** ret,unsigned int * len)360 xs_single(xenbus_transaction_t t,
361 enum xsd_sockmsg_type type,
362 const char *string, void **ret,
363 unsigned int *len)
364 {
365 iovec_t iovec;
366
367 iovec.iov_base = (char *)string;
368 iovec.iov_len = strlen(string) + 1;
369 return (xs_talkv(t, type, &iovec, 1, ret, len));
370 }
371
372 static unsigned int
count_strings(const char * strings,unsigned int len)373 count_strings(const char *strings, unsigned int len)
374 {
375 unsigned int num;
376 const char *p;
377
378 for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
379 num++;
380
381 return (num);
382 }
383
384 /* Return the path to dir with /name appended. Buffer must be kmem_free()'ed */
385 static char *
join(const char * dir,const char * name)386 join(const char *dir, const char *name)
387 {
388 char *buffer;
389 size_t slashlen;
390
391 slashlen = streq(name, "") ? 0 : 1;
392 buffer = kmem_alloc(strlen(dir) + slashlen + strlen(name) + 1,
393 KM_SLEEP);
394
395 (void) strcpy(buffer, dir);
396 if (slashlen != 0) {
397 (void) strcat(buffer, "/");
398 (void) strcat(buffer, name);
399 }
400 return (buffer);
401 }
402
403 static char **
split(char * strings,unsigned int len,unsigned int * num)404 split(char *strings, unsigned int len, unsigned int *num)
405 {
406 char *p, **ret;
407
408 /* Count the strings. */
409 if ((*num = count_strings(strings, len - 1)) == 0)
410 return (NULL);
411
412 /* Transfer to one big alloc for easy freeing. */
413 ret = kmem_alloc(*num * sizeof (char *) + (len - 1), KM_SLEEP);
414 (void) memcpy(&ret[*num], strings, len - 1);
415 kmem_free(strings, len);
416
417 strings = (char *)&ret[*num];
418 for (p = strings, *num = 0; p < strings + (len - 1);
419 p += strlen(p) + 1) {
420 ret[(*num)++] = p;
421 }
422
423 return (ret);
424 }
425
426 char **
xenbus_directory(xenbus_transaction_t t,const char * dir,const char * node,unsigned int * num)427 xenbus_directory(xenbus_transaction_t t,
428 const char *dir, const char *node, unsigned int *num)
429 {
430 char *strings, *path;
431 unsigned int len;
432 int err;
433
434 path = join(dir, node);
435 err = xs_single(t, XS_DIRECTORY, path, (void **)&strings, &len);
436 kmem_free(path, strlen(path) + 1);
437 if (err != 0 || strings == NULL) {
438 /* sigh, we lose error code info here */
439 *num = 0;
440 return (NULL);
441 }
442
443 return (split(strings, len, num));
444 }
445
446 /* Check if a path exists. */
447 boolean_t
xenbus_exists(const char * dir,const char * node)448 xenbus_exists(const char *dir, const char *node)
449 {
450 void *p;
451 uint_t n;
452
453 if (xenbus_read(XBT_NULL, dir, node, &p, &n) != 0)
454 return (B_FALSE);
455 kmem_free(p, n);
456 return (B_TRUE);
457 }
458
459 /* Check if a directory path exists. */
460 boolean_t
xenbus_exists_dir(const char * dir,const char * node)461 xenbus_exists_dir(const char *dir, const char *node)
462 {
463 char **d;
464 unsigned int dir_n;
465 int i, len;
466
467 d = xenbus_directory(XBT_NULL, dir, node, &dir_n);
468 if (d == NULL)
469 return (B_FALSE);
470 for (i = 0, len = 0; i < dir_n; i++)
471 len += strlen(d[i]) + 1 + sizeof (char *);
472 kmem_free(d, len);
473 return (B_TRUE);
474 }
475
476 /*
477 * Get the value of a single file.
478 * Returns a kmem_alloced value in retp: call kmem_free() on it after use.
479 * len indicates length in bytes.
480 */
481 int
xenbus_read(xenbus_transaction_t t,const char * dir,const char * node,void ** retp,unsigned int * len)482 xenbus_read(xenbus_transaction_t t,
483 const char *dir, const char *node, void **retp, unsigned int *len)
484 {
485 char *path;
486 int err;
487
488 path = join(dir, node);
489 err = xs_single(t, XS_READ, path, retp, len);
490 kmem_free(path, strlen(path) + 1);
491 return (err);
492 }
493
494 int
xenbus_read_str(const char * dir,const char * node,char ** retp)495 xenbus_read_str(const char *dir, const char *node, char **retp)
496 {
497 uint_t n;
498 int err;
499 char *str;
500
501 /*
502 * Since we access the xenbus value immediatly we can't be
503 * part of a transaction.
504 */
505 if ((err = xenbus_read(XBT_NULL, dir, node, (void **)&str, &n)) != 0)
506 return (err);
507 ASSERT((str != NULL) && (n > 0));
508
509 /*
510 * Why bother with this? Because xenbus is truly annoying in the
511 * fact that when it returns a string, it doesn't guarantee that
512 * the memory that holds the string is of size strlen() + 1.
513 * This forces callers to keep track of the size of the memory
514 * containing the string. Ugh. We'll work around this by
515 * re-allocate strings to always be of size strlen() + 1.
516 */
517 *retp = strdup(str);
518 kmem_free(str, n);
519 return (0);
520 }
521
522 /*
523 * Write the value of a single file.
524 * Returns err on failure.
525 */
526 int
xenbus_write(xenbus_transaction_t t,const char * dir,const char * node,const char * string)527 xenbus_write(xenbus_transaction_t t,
528 const char *dir, const char *node, const char *string)
529 {
530 char *path;
531 iovec_t iovec[2];
532 int ret;
533
534 path = join(dir, node);
535
536 iovec[0].iov_base = (void *)path;
537 iovec[0].iov_len = strlen(path) + 1;
538 iovec[1].iov_base = (void *)string;
539 iovec[1].iov_len = strlen(string);
540
541 ret = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
542 kmem_free(path, iovec[0].iov_len);
543 return (ret);
544 }
545
546 /* Create a new directory. */
547 int
xenbus_mkdir(xenbus_transaction_t t,const char * dir,const char * node)548 xenbus_mkdir(xenbus_transaction_t t, const char *dir, const char *node)
549 {
550 char *path;
551 int ret;
552
553 path = join(dir, node);
554 ret = xs_single(t, XS_MKDIR, path, NULL, NULL);
555 kmem_free(path, strlen(path) + 1);
556 return (ret);
557 }
558
559 /* Destroy a file or directory (directories must be empty). */
560 int
xenbus_rm(xenbus_transaction_t t,const char * dir,const char * node)561 xenbus_rm(xenbus_transaction_t t, const char *dir, const char *node)
562 {
563 char *path;
564 int ret;
565
566 path = join(dir, node);
567 ret = xs_single(t, XS_RM, path, NULL, NULL);
568 kmem_free(path, strlen(path) + 1);
569 return (ret);
570 }
571
572 /*
573 * Start a transaction: changes by others will not be seen during this
574 * transaction, and changes will not be visible to others until end.
575 */
576 int
xenbus_transaction_start(xenbus_transaction_t * t)577 xenbus_transaction_start(xenbus_transaction_t *t)
578 {
579 void *id_str;
580 unsigned long id;
581 int err;
582 unsigned int len;
583
584 rw_enter(&xs_state.suspend_lock, RW_READER);
585
586 err = xs_single(XBT_NULL, XS_TRANSACTION_START, "", &id_str, &len);
587 if (err) {
588 rw_exit(&xs_state.suspend_lock);
589 return (err);
590 }
591
592 (void) ddi_strtoul((char *)id_str, NULL, 0, &id);
593 *t = (xenbus_transaction_t)id;
594 kmem_free(id_str, len);
595
596 return (0);
597 }
598
599 /*
600 * End a transaction.
601 * If abandon is true, transaction is discarded instead of committed.
602 */
603 int
xenbus_transaction_end(xenbus_transaction_t t,int abort)604 xenbus_transaction_end(xenbus_transaction_t t, int abort)
605 {
606 char abortstr[2];
607 int err;
608
609 if (abort)
610 (void) strcpy(abortstr, "F");
611 else
612 (void) strcpy(abortstr, "T");
613
614 err = xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL);
615
616 rw_exit(&xs_state.suspend_lock);
617
618 return (err);
619 }
620
621 /*
622 * Single read and scanf: returns errno or 0. This can only handle a single
623 * conversion specifier.
624 */
625 /* SCANFLIKE4 */
626 int
xenbus_scanf(xenbus_transaction_t t,const char * dir,const char * node,const char * fmt,...)627 xenbus_scanf(xenbus_transaction_t t,
628 const char *dir, const char *node, const char *fmt, ...)
629 {
630 va_list ap;
631 int ret;
632 char *val;
633 unsigned int len;
634
635 ret = xenbus_read(t, dir, node, (void **)&val, &len);
636 if (ret)
637 return (ret);
638
639 va_start(ap, fmt);
640 if (vsscanf(val, fmt, ap) != 1)
641 ret = ERANGE;
642 va_end(ap);
643 kmem_free(val, len);
644 return (ret);
645 }
646
647 /* Single printf and write: returns errno or 0. */
648 /* PRINTFLIKE4 */
649 int
xenbus_printf(xenbus_transaction_t t,const char * dir,const char * node,const char * fmt,...)650 xenbus_printf(xenbus_transaction_t t,
651 const char *dir, const char *node, const char *fmt, ...)
652 {
653 va_list ap;
654 int ret;
655 #define PRINTF_BUFFER_SIZE 4096
656 char *printf_buffer;
657
658 printf_buffer = kmem_alloc(PRINTF_BUFFER_SIZE, KM_SLEEP);
659
660 va_start(ap, fmt);
661 ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
662 va_end(ap);
663
664 ASSERT(ret <= PRINTF_BUFFER_SIZE-1);
665 ret = xenbus_write(t, dir, node, printf_buffer);
666
667 kmem_free(printf_buffer, PRINTF_BUFFER_SIZE);
668
669 return (ret);
670 }
671
672
673 /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
674 int
xenbus_gather(xenbus_transaction_t t,const char * dir,...)675 xenbus_gather(xenbus_transaction_t t, const char *dir, ...)
676 {
677 va_list ap;
678 const char *name;
679 int ret = 0;
680 unsigned int len;
681
682 va_start(ap, dir);
683 while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
684 const char *fmt = va_arg(ap, char *);
685 void *result = va_arg(ap, void *);
686 char *p;
687
688 ret = xenbus_read(t, dir, name, (void **)&p, &len);
689 if (ret)
690 break;
691 if (fmt) {
692 ASSERT(result != NULL);
693 if (sscanf(p, fmt, result) != 1)
694 ret = EINVAL;
695 kmem_free(p, len);
696 } else
697 *(char **)result = p;
698 }
699 va_end(ap);
700 return (ret);
701 }
702
703 static int
xs_watch(const char * path,const char * token)704 xs_watch(const char *path, const char *token)
705 {
706 iovec_t iov[2];
707
708 iov[0].iov_base = (void *)path;
709 iov[0].iov_len = strlen(path) + 1;
710 iov[1].iov_base = (void *)token;
711 iov[1].iov_len = strlen(token) + 1;
712
713 return (xs_talkv(XBT_NULL, XS_WATCH, iov, 2, NULL, NULL));
714 }
715
716 static int
xs_unwatch(const char * path,const char * token)717 xs_unwatch(const char *path, const char *token)
718 {
719 iovec_t iov[2];
720
721 iov[0].iov_base = (char *)path;
722 iov[0].iov_len = strlen(path) + 1;
723 iov[1].iov_base = (char *)token;
724 iov[1].iov_len = strlen(token) + 1;
725
726 return (xs_talkv(XBT_NULL, XS_UNWATCH, iov, 2, NULL, NULL));
727 }
728
729 static struct xenbus_watch *
find_watch(const char * token)730 find_watch(const char *token)
731 {
732 struct xenbus_watch *i, *cmp;
733
734 (void) ddi_strtoul(token, NULL, 16, (unsigned long *)&cmp);
735
736 for (i = list_head(&watches); i != NULL; i = list_next(&watches, i))
737 if (i == cmp)
738 break;
739
740 return (i);
741 }
742
743 /* Register a xenstore state notify callback */
744 int
xs_register_xenbus_callback(void (* callback)(int))745 xs_register_xenbus_callback(void (*callback)(int))
746 {
747 struct xenbus_notify *xbn, *xnp;
748
749 xbn = kmem_alloc(sizeof (struct xenbus_notify), KM_SLEEP);
750 xbn->notify_func = callback;
751 mutex_enter(¬ify_list_lock);
752 /*
753 * Make sure not already on the list
754 */
755 xnp = list_head(¬ify_list);
756 for (; xnp != NULL; xnp = list_next(¬ify_list, xnp)) {
757 if (xnp->notify_func == callback) {
758 kmem_free(xbn, sizeof (struct xenbus_notify));
759 mutex_exit(¬ify_list_lock);
760 return (EEXIST);
761 }
762 }
763 xnp = xbn;
764 list_insert_tail(¬ify_list, xbn);
765 if (xenstore_up)
766 xnp->notify_func(XENSTORE_UP);
767 mutex_exit(¬ify_list_lock);
768 return (0);
769 }
770
771 /*
772 * Notify clients of xenstore state
773 */
774 static void
do_notify_callbacks(void * arg)775 do_notify_callbacks(void *arg)
776 {
777 struct xenbus_notify *xnp;
778
779 mutex_enter(¬ify_list_lock);
780 xnp = list_head(¬ify_list);
781 for (; xnp != NULL; xnp = list_next(¬ify_list, xnp)) {
782 xnp->notify_func((int)((uintptr_t)arg));
783 }
784 mutex_exit(¬ify_list_lock);
785 }
786
787 void
xs_notify_xenstore_up(void)788 xs_notify_xenstore_up(void)
789 {
790 xenstore_up = B_TRUE;
791 (void) taskq_dispatch(xenbus_taskq, do_notify_callbacks,
792 (void *)XENSTORE_UP, 0);
793 }
794
795 void
xs_notify_xenstore_down(void)796 xs_notify_xenstore_down(void)
797 {
798 xenstore_up = B_FALSE;
799 (void) taskq_dispatch(xenbus_taskq, do_notify_callbacks,
800 (void *)XENSTORE_DOWN, 0);
801 }
802
803 /* Register callback to watch this node. */
804 int
register_xenbus_watch(struct xenbus_watch * watch)805 register_xenbus_watch(struct xenbus_watch *watch)
806 {
807 /* Pointer in ascii is the token. */
808 char token[sizeof (watch) * 2 + 1];
809 int err;
810
811 ASSERT(xenstore_up);
812 (void) snprintf(token, sizeof (token), "%lX", (long)watch);
813
814 rw_enter(&xs_state.suspend_lock, RW_READER);
815
816 mutex_enter(&watches_lock);
817 /*
818 * May be re-registering a watch if xenstore daemon was restarted
819 */
820 if (find_watch(token) == NULL)
821 list_insert_tail(&watches, watch);
822 mutex_exit(&watches_lock);
823
824 DTRACE_XPV3(xenbus__register__watch, const char *, watch->node,
825 uintptr_t, watch->callback, struct xenbus_watch *, watch);
826
827 err = xs_watch(watch->node, token);
828
829 /* Ignore errors due to multiple registration. */
830 if ((err != 0) && (err != EEXIST)) {
831 mutex_enter(&watches_lock);
832 list_remove(&watches, watch);
833 mutex_exit(&watches_lock);
834 }
835
836 rw_exit(&xs_state.suspend_lock);
837
838 return (err);
839 }
840
841 static void
free_stored_msg(struct xs_stored_msg * msg)842 free_stored_msg(struct xs_stored_msg *msg)
843 {
844 int i, len = 0;
845
846 for (i = 0; i < msg->un.watch.vec_size; i++)
847 len += strlen(msg->un.watch.vec[i]) + 1 + sizeof (char *);
848 kmem_free(msg->un.watch.vec, len);
849 kmem_free(msg, sizeof (*msg));
850 }
851
852 void
unregister_xenbus_watch(struct xenbus_watch * watch)853 unregister_xenbus_watch(struct xenbus_watch *watch)
854 {
855 struct xs_stored_msg *msg;
856 char token[sizeof (watch) * 2 + 1];
857 int err;
858
859 (void) snprintf(token, sizeof (token), "%lX", (long)watch);
860
861 rw_enter(&xs_state.suspend_lock, RW_READER);
862
863 mutex_enter(&watches_lock);
864 ASSERT(find_watch(token));
865 list_remove(&watches, watch);
866 mutex_exit(&watches_lock);
867
868 DTRACE_XPV3(xenbus__unregister__watch, const char *, watch->node,
869 uintptr_t, watch->callback, struct xenbus_watch *, watch);
870
871 err = xs_unwatch(watch->node, token);
872 if (err)
873 cmn_err(CE_WARN, "XENBUS Failed to release watch %s: %d",
874 watch->node, err);
875
876 rw_exit(&xs_state.suspend_lock);
877
878 /* Cancel pending watch events. */
879 mutex_enter(&watch_events_lock);
880 msg = list_head(&watch_events);
881
882 while (msg != NULL) {
883 struct xs_stored_msg *tmp = list_next(&watch_events, msg);
884 if (msg->un.watch.handle == watch) {
885 list_remove(&watch_events, msg);
886 free_stored_msg(msg);
887 }
888 msg = tmp;
889 }
890
891 mutex_exit(&watch_events_lock);
892
893 /* Flush any currently-executing callback, unless we are it. :-) */
894 if (mutex_owner(&xenwatch_mutex) != curthread) {
895 mutex_enter(&xenwatch_mutex);
896 mutex_exit(&xenwatch_mutex);
897 }
898 }
899
900 void
xenbus_suspend(void)901 xenbus_suspend(void)
902 {
903 rw_enter(&xs_state.suspend_lock, RW_WRITER);
904 mutex_enter(&xs_state.request_mutex);
905
906 xb_suspend();
907 }
908
909 void
xenbus_resume(void)910 xenbus_resume(void)
911 {
912 struct xenbus_watch *watch;
913 char token[sizeof (watch) * 2 + 1];
914
915 mutex_exit(&xs_state.request_mutex);
916
917 xb_init();
918 xb_setup_intr();
919
920 /* No need for watches_lock: the suspend_lock is sufficient. */
921 for (watch = list_head(&watches); watch != NULL;
922 watch = list_next(&watches, watch)) {
923 (void) snprintf(token, sizeof (token), "%lX", (long)watch);
924 (void) xs_watch(watch->node, token);
925 }
926
927 rw_exit(&xs_state.suspend_lock);
928 }
929
930 static void
xenwatch_thread(void)931 xenwatch_thread(void)
932 {
933 struct xs_stored_msg *msg;
934 struct xenbus_watch *watch;
935
936 for (;;) {
937 mutex_enter(&watch_events_lock);
938 while (list_empty(&watch_events))
939 cv_wait(&watch_events_cv, &watch_events_lock);
940 msg = list_head(&watch_events);
941 ASSERT(msg != NULL);
942 list_remove(&watch_events, msg);
943 watch = msg->un.watch.handle;
944 mutex_exit(&watch_events_lock);
945
946 mutex_enter(&xenwatch_mutex);
947
948 DTRACE_XPV4(xenbus__fire__watch,
949 const char *, watch->node,
950 uintptr_t, watch->callback,
951 struct xenbus_watch *, watch,
952 const char *, msg->un.watch.vec[XS_WATCH_PATH]);
953
954 watch->callback(watch, (const char **)msg->un.watch.vec,
955 msg->un.watch.vec_size);
956
957 free_stored_msg(msg);
958 mutex_exit(&xenwatch_mutex);
959 }
960 }
961
962 static int
process_msg(void)963 process_msg(void)
964 {
965 struct xs_stored_msg *msg;
966 char *body;
967 int err, mlen;
968
969 msg = kmem_alloc(sizeof (*msg), KM_SLEEP);
970
971 err = xb_read(&msg->hdr, sizeof (msg->hdr));
972 if (err) {
973 kmem_free(msg, sizeof (*msg));
974 return (err);
975 }
976
977 mlen = msg->hdr.len + 1;
978 body = kmem_alloc(mlen, KM_SLEEP);
979
980 err = xb_read(body, msg->hdr.len);
981 if (err) {
982 kmem_free(body, mlen);
983 kmem_free(msg, sizeof (*msg));
984 return (err);
985 }
986
987 body[mlen - 1] = '\0';
988
989 if (msg->hdr.type == XS_WATCH_EVENT) {
990 const char *token;
991 msg->un.watch.vec = split(body, msg->hdr.len + 1,
992 &msg->un.watch.vec_size);
993 if (msg->un.watch.vec == NULL) {
994 kmem_free(msg, sizeof (*msg));
995 return (EIO);
996 }
997
998 mutex_enter(&watches_lock);
999 token = msg->un.watch.vec[XS_WATCH_TOKEN];
1000 if ((msg->un.watch.handle = find_watch(token)) != NULL) {
1001 mutex_enter(&watch_events_lock);
1002
1003 DTRACE_XPV4(xenbus__enqueue__watch,
1004 const char *, msg->un.watch.handle->node,
1005 uintptr_t, msg->un.watch.handle->callback,
1006 struct xenbus_watch *, msg->un.watch.handle,
1007 const char *, msg->un.watch.vec[XS_WATCH_PATH]);
1008
1009 list_insert_tail(&watch_events, msg);
1010 cv_broadcast(&watch_events_cv);
1011 mutex_exit(&watch_events_lock);
1012 } else {
1013 free_stored_msg(msg);
1014 }
1015 mutex_exit(&watches_lock);
1016 } else {
1017 msg->un.reply.body = body;
1018 mutex_enter(&xs_state.reply_lock);
1019 list_insert_tail(&xs_state.reply_list, msg);
1020 mutex_exit(&xs_state.reply_lock);
1021 cv_signal(&xs_state.reply_cv);
1022 }
1023
1024 return (0);
1025 }
1026
1027 static void
xenbus_thread(void)1028 xenbus_thread(void)
1029 {
1030 int err;
1031
1032 /*
1033 * We have to wait for interrupts to be ready, so we don't clash
1034 * with the polled-IO code in read_reply().
1035 */
1036 while (!interrupts_unleashed)
1037 delay(10);
1038
1039 for (;;) {
1040 err = process_msg();
1041 if (err)
1042 cmn_err(CE_WARN, "XENBUS error %d while reading "
1043 "message", err);
1044 }
1045 }
1046
1047 /*
1048 * When setting up xenbus, dom0 and domU have to take different paths, which
1049 * makes this code a little confusing. For dom0:
1050 *
1051 * xs_early_init - mutex init only
1052 * xs_dom0_init - called on xenbus dev attach: set up our xenstore page and
1053 * event channel; start xenbus threads for responding to interrupts.
1054 *
1055 * And for domU:
1056 *
1057 * xs_early_init - mutex init; set up our xenstore page and event channel
1058 * xs_domu_init - installation of IRQ handler; start xenbus threads.
1059 *
1060 * We need an early init on domU so we can use xenbus in polled mode to
1061 * discover devices, VCPUs etc.
1062 *
1063 * On resume, we use xb_init() and xb_setup_intr() to restore xenbus to a
1064 * working state.
1065 */
1066
1067 void
xs_early_init(void)1068 xs_early_init(void)
1069 {
1070 list_create(&xs_state.reply_list, sizeof (struct xs_stored_msg),
1071 offsetof(struct xs_stored_msg, list));
1072 list_create(&watch_events, sizeof (struct xs_stored_msg),
1073 offsetof(struct xs_stored_msg, list));
1074 list_create(&watches, sizeof (struct xenbus_watch),
1075 offsetof(struct xenbus_watch, list));
1076 list_create(¬ify_list, sizeof (struct xenbus_notify),
1077 offsetof(struct xenbus_notify, list));
1078 mutex_init(&xs_state.reply_lock, NULL, MUTEX_DEFAULT, NULL);
1079 mutex_init(&xs_state.request_mutex, NULL, MUTEX_DEFAULT, NULL);
1080 mutex_init(¬ify_list_lock, NULL, MUTEX_DEFAULT, NULL);
1081 rw_init(&xs_state.suspend_lock, NULL, RW_DEFAULT, NULL);
1082 cv_init(&xs_state.reply_cv, NULL, CV_DEFAULT, NULL);
1083
1084 if (DOMAIN_IS_INITDOMAIN(xen_info))
1085 return;
1086
1087 xb_init();
1088 xenstore_up = B_TRUE;
1089 }
1090
1091 static void
xs_thread_init(void)1092 xs_thread_init(void)
1093 {
1094 (void) thread_create(NULL, 0, xenwatch_thread, NULL, 0, &p0,
1095 TS_RUN, minclsyspri);
1096 (void) thread_create(NULL, 0, xenbus_thread, NULL, 0, &p0,
1097 TS_RUN, minclsyspri);
1098 xenbus_taskq = taskq_create("xenbus_taskq", 1,
1099 maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
1100 ASSERT(xenbus_taskq != NULL);
1101 }
1102
1103 void
xs_domu_init(void)1104 xs_domu_init(void)
1105 {
1106 if (DOMAIN_IS_INITDOMAIN(xen_info))
1107 return;
1108
1109 /*
1110 * Add interrupt handler for xenbus now, must wait till after
1111 * psm module is loaded. All use of xenbus is in polled mode
1112 * until xs_init is called since it is what kicks off the xs
1113 * server threads.
1114 */
1115 xs_thread_init();
1116 xb_setup_intr();
1117 }
1118
1119
1120 void
xs_dom0_init(void)1121 xs_dom0_init(void)
1122 {
1123 static boolean_t initialized = B_FALSE;
1124
1125 ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1126
1127 /*
1128 * The xenbus driver might be re-attaching.
1129 */
1130 if (initialized)
1131 return;
1132
1133 xb_init();
1134 xs_thread_init();
1135 xb_setup_intr();
1136
1137 initialized = B_TRUE;
1138 }
1139