1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 *
29 * xenbus_xs.c
30 *
31 * This is the kernel equivalent of the "xs" library. We don't need everything
32 * and we use xenbus_comms for communication.
33 *
34 * Copyright (C) 2005 Rusty Russell, IBM Corporation
35 *
36 * This file may be distributed separately from the Linux kernel, or
37 * incorporated into other software packages, subject to the following license:
38 *
39 * Permission is hereby granted, free of charge, to any person obtaining a copy
40 * of this source file (the "Software"), to deal in the Software without
41 * restriction, including without limitation the rights to use, copy, modify,
42 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
43 * and to permit persons to whom the Software is furnished to do so, subject to
44 * the following conditions:
45 *
46 * The above copyright notice and this permission notice shall be included in
47 * all copies or substantial portions of the Software.
48 *
49 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
50 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
51 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
52 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
53 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
54 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
55 * IN THE SOFTWARE.
56 */
57
58 /*
59 * NOTE: To future maintainers of the Solaris version of this file:
60 * I found the Linux version of this code to be very disgusting in
61 * overloading pointers and error codes into void * return values.
62 * The main difference you will find is that all such usage is changed
63 * to pass pointers to void* to be filled in with return values and
64 * the functions return error codes.
65 */
66
67 #include <sys/errno.h>
68 #include <sys/types.h>
69 #include <sys/sysmacros.h>
70 #include <sys/uio.h>
71 #include <sys/mutex.h>
72 #include <sys/condvar.h>
73 #include <sys/rwlock.h>
74 #include <sys/disp.h>
75 #include <sys/ddi.h>
76 #include <sys/sunddi.h>
77 #include <sys/avintr.h>
78 #include <sys/cmn_err.h>
79 #include <sys/mach_mmu.h>
80 #include <util/sscanf.h>
81 #define _XSD_ERRORS_DEFINED
82 #ifdef XPV_HVM_DRIVER
83 #include <sys/xpv_support.h>
84 #endif
85 #include <sys/hypervisor.h>
86 #include <sys/taskq.h>
87 #include <sys/sdt.h>
88 #include <xen/sys/xenbus_impl.h>
89 #include <xen/sys/xenbus_comms.h>
90 #include <xen/sys/xendev.h>
91 #include <xen/public/io/xs_wire.h>
92
93 #define streq(a, b) (strcmp((a), (b)) == 0)
94
95 #define list_empty(list) (list_head(list) == NULL)
96
97 struct xs_stored_msg {
98 list_node_t list;
99
100 struct xsd_sockmsg hdr;
101
102 union {
103 /* Queued replies. */
104 struct {
105 char *body;
106 } reply;
107
108 /* Queued watch events. */
109 struct {
110 struct xenbus_watch *handle;
111 char **vec;
112 unsigned int vec_size;
113 } watch;
114 } un;
115 };
116
117 static struct xs_handle {
118 /* A list of replies. Currently only one will ever be outstanding. */
119 list_t reply_list;
120 kmutex_t reply_lock;
121 kcondvar_t reply_cv;
122
123 /* One request at a time. */
124 kmutex_t request_mutex;
125
126 /* Protect transactions against save/restore. */
127 krwlock_t suspend_lock;
128 } xs_state;
129
130 static int last_req_id;
131
132 /*
133 * List of clients wanting a xenstore up notification, and a lock to protect it
134 */
135 static boolean_t xenstore_up;
136 static list_t notify_list;
137 static kmutex_t notify_list_lock;
138 static taskq_t *xenbus_taskq;
139
140 /* List of registered watches, and a lock to protect it. */
141 static list_t watches;
142 static kmutex_t watches_lock;
143
144 /* List of pending watch callback events, and a lock to protect it. */
145 static list_t watch_events;
146 static kmutex_t watch_events_lock;
147
148 /*
149 * Details of the xenwatch callback kernel thread. The thread waits on the
150 * watch_events_cv for work to do (queued on watch_events list). When it
151 * wakes up it acquires the xenwatch_mutex before reading the list and
152 * carrying out work.
153 */
154 static kmutex_t xenwatch_mutex;
155 static kcondvar_t watch_events_cv;
156
157 static int process_msg(void);
158
159 static int
get_error(const char * errorstring)160 get_error(const char *errorstring)
161 {
162 unsigned int i;
163
164 for (i = 0; !streq(errorstring, xsd_errors[i].errstring); i++) {
165 if (i == (sizeof (xsd_errors) / sizeof (xsd_errors[0])) - 1) {
166 cmn_err(CE_WARN,
167 "XENBUS xen store gave: unknown error %s",
168 errorstring);
169 return (EINVAL);
170 }
171 }
172 return (xsd_errors[i].errnum);
173 }
174
175 /*
176 * Read a synchronous reply from xenstore. Since we can return early before
177 * reading a relevant reply, we discard any messages not matching the request
178 * ID. Caller must free returned message on success.
179 */
180 static int
read_reply(struct xsd_sockmsg * req_hdr,struct xs_stored_msg ** reply)181 read_reply(struct xsd_sockmsg *req_hdr, struct xs_stored_msg **reply)
182 {
183 extern int do_polled_io;
184
185 mutex_enter(&xs_state.reply_lock);
186
187 for (;;) {
188 while (list_empty(&xs_state.reply_list)) {
189 if (interrupts_unleashed && !do_polled_io) {
190 if (cv_wait_sig(&xs_state.reply_cv,
191 &xs_state.reply_lock) == 0) {
192 mutex_exit(&xs_state.reply_lock);
193 *reply = NULL;
194 return (EINTR);
195 }
196 } else { /* polled mode needed for early probes */
197 mutex_exit(&xs_state.reply_lock);
198 (void) HYPERVISOR_yield();
199 (void) process_msg();
200 mutex_enter(&xs_state.reply_lock);
201 }
202 }
203
204 *reply = list_head(&xs_state.reply_list);
205 list_remove(&xs_state.reply_list, *reply);
206
207 if ((*reply)->hdr.req_id == req_hdr->req_id)
208 break;
209 }
210
211 mutex_exit(&xs_state.reply_lock);
212 return (0);
213 }
214
215 /* Emergency write. */
216 void
xenbus_debug_write(const char * str,unsigned int count)217 xenbus_debug_write(const char *str, unsigned int count)
218 {
219 struct xsd_sockmsg msg = { 0 };
220
221 msg.type = XS_DEBUG;
222 msg.len = sizeof ("print") + count + 1;
223
224 mutex_enter(&xs_state.request_mutex);
225 (void) xb_write(&msg, sizeof (msg));
226 (void) xb_write("print", sizeof ("print"));
227 (void) xb_write(str, count);
228 (void) xb_write("", 1);
229 mutex_exit(&xs_state.request_mutex);
230 }
231
232 /*
233 * This is pretty unpleasant. First off, there's the horrible logic around
234 * suspend_lock and transactions. Also, we can be interrupted either before we
235 * write a message, or before we receive a reply. A client that wants to
236 * survive this can't know which case happened. Luckily all clients don't care
237 * about signals currently, and the alternative (a hard wait on a userspace
238 * daemon) isn't exactly preferable. Caller must free 'reply' on success.
239 */
240 int
xenbus_dev_request_and_reply(struct xsd_sockmsg * msg,void ** reply)241 xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **reply)
242 {
243 struct xsd_sockmsg req_msg = *msg;
244 struct xs_stored_msg *reply_msg = NULL;
245 int err;
246
247 if (req_msg.type == XS_TRANSACTION_START)
248 rw_enter(&xs_state.suspend_lock, RW_READER);
249
250 mutex_enter(&xs_state.request_mutex);
251
252 msg->req_id = last_req_id++;
253
254 err = xb_write(msg, sizeof (*msg) + msg->len);
255 if (err) {
256 if (req_msg.type == XS_TRANSACTION_START)
257 rw_exit(&xs_state.suspend_lock);
258 msg->type = XS_ERROR;
259 *reply = NULL;
260 goto out;
261 }
262
263 err = read_reply(msg, &reply_msg);
264
265 if (err) {
266 if (msg->type == XS_TRANSACTION_START)
267 rw_exit(&xs_state.suspend_lock);
268 *reply = NULL;
269 goto out;
270 }
271
272 *reply = reply_msg->un.reply.body;
273 *msg = reply_msg->hdr;
274
275 if (reply_msg->hdr.type == XS_TRANSACTION_END)
276 rw_exit(&xs_state.suspend_lock);
277
278 out:
279 if (reply_msg != NULL)
280 kmem_free(reply_msg, sizeof (*reply_msg));
281
282 mutex_exit(&xs_state.request_mutex);
283 return (err);
284 }
285
286 /*
287 * Send message to xs, return errcode, rval filled in with pointer
288 * to kmem_alloc'ed reply.
289 */
290 static int
xs_talkv(xenbus_transaction_t t,enum xsd_sockmsg_type type,const iovec_t * iovec,unsigned int num_vecs,void ** rval,unsigned int * len)291 xs_talkv(xenbus_transaction_t t,
292 enum xsd_sockmsg_type type,
293 const iovec_t *iovec,
294 unsigned int num_vecs,
295 void **rval,
296 unsigned int *len)
297 {
298 struct xsd_sockmsg msg;
299 struct xs_stored_msg *reply_msg;
300 char *reply;
301 unsigned int i;
302 int err;
303
304 msg.tx_id = (uint32_t)(unsigned long)t;
305 msg.type = type;
306 msg.len = 0;
307 for (i = 0; i < num_vecs; i++)
308 msg.len += iovec[i].iov_len;
309
310 mutex_enter(&xs_state.request_mutex);
311
312 msg.req_id = last_req_id++;
313
314 err = xb_write(&msg, sizeof (msg));
315 if (err) {
316 mutex_exit(&xs_state.request_mutex);
317 return (err);
318 }
319
320 for (i = 0; i < num_vecs; i++) {
321 err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
322 if (err) {
323 mutex_exit(&xs_state.request_mutex);
324 return (err);
325 }
326 }
327
328 err = read_reply(&msg, &reply_msg);
329
330 mutex_exit(&xs_state.request_mutex);
331
332 if (err)
333 return (err);
334
335 reply = reply_msg->un.reply.body;
336
337 if (reply_msg->hdr.type == XS_ERROR) {
338 err = get_error(reply);
339 kmem_free(reply, reply_msg->hdr.len + 1);
340 goto out;
341 }
342
343 if (len != NULL)
344 *len = reply_msg->hdr.len + 1;
345
346 ASSERT(reply_msg->hdr.type == type);
347
348 if (rval != NULL)
349 *rval = reply;
350 else
351 kmem_free(reply, reply_msg->hdr.len + 1);
352
353 out:
354 kmem_free(reply_msg, sizeof (*reply_msg));
355 return (err);
356 }
357
358 /* Simplified version of xs_talkv: single message. */
359 static int
xs_single(xenbus_transaction_t t,enum xsd_sockmsg_type type,const char * string,void ** ret,unsigned int * len)360 xs_single(xenbus_transaction_t t,
361 enum xsd_sockmsg_type type,
362 const char *string, void **ret,
363 unsigned int *len)
364 {
365 iovec_t iovec;
366
367 iovec.iov_base = (char *)string;
368 iovec.iov_len = strlen(string) + 1;
369 return (xs_talkv(t, type, &iovec, 1, ret, len));
370 }
371
372 static unsigned int
count_strings(const char * strings,unsigned int len)373 count_strings(const char *strings, unsigned int len)
374 {
375 unsigned int num;
376 const char *p;
377
378 for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
379 num++;
380
381 return (num);
382 }
383
384 /* Return the path to dir with /name appended. Buffer must be kmem_free()'ed */
385 static char *
join(const char * dir,const char * name)386 join(const char *dir, const char *name)
387 {
388 char *buffer;
389 size_t slashlen;
390
391 slashlen = streq(name, "") ? 0 : 1;
392 buffer = kmem_alloc(strlen(dir) + slashlen + strlen(name) + 1,
393 KM_SLEEP);
394
395 (void) strcpy(buffer, dir);
396 if (slashlen != 0) {
397 (void) strcat(buffer, "/");
398 (void) strcat(buffer, name);
399 }
400 return (buffer);
401 }
402
403 static char **
split(char * strings,unsigned int len,unsigned int * num)404 split(char *strings, unsigned int len, unsigned int *num)
405 {
406 char *p, **ret;
407
408 /* Count the strings. */
409 if ((*num = count_strings(strings, len - 1)) == 0)
410 return (NULL);
411
412 /* Transfer to one big alloc for easy freeing. */
413 ret = kmem_alloc(*num * sizeof (char *) + (len - 1), KM_SLEEP);
414 (void) memcpy(&ret[*num], strings, len - 1);
415 kmem_free(strings, len);
416
417 strings = (char *)&ret[*num];
418 for (p = strings, *num = 0; p < strings + (len - 1);
419 p += strlen(p) + 1) {
420 ret[(*num)++] = p;
421 }
422
423 return (ret);
424 }
425
426 char **
xenbus_directory(xenbus_transaction_t t,const char * dir,const char * node,unsigned int * num)427 xenbus_directory(xenbus_transaction_t t,
428 const char *dir, const char *node, unsigned int *num)
429 {
430 char *strings, *path;
431 unsigned int len;
432 int err;
433
434 path = join(dir, node);
435 err = xs_single(t, XS_DIRECTORY, path, (void **)&strings, &len);
436 kmem_free(path, strlen(path) + 1);
437 if (err != 0 || strings == NULL) {
438 /* sigh, we lose error code info here */
439 *num = 0;
440 return (NULL);
441 }
442
443 return (split(strings, len, num));
444 }
445
446 /* Check if a path exists. */
447 boolean_t
xenbus_exists(const char * dir,const char * node)448 xenbus_exists(const char *dir, const char *node)
449 {
450 void *p;
451 uint_t n;
452
453 if (xenbus_read(XBT_NULL, dir, node, &p, &n) != 0)
454 return (B_FALSE);
455 kmem_free(p, n);
456 return (B_TRUE);
457 }
458
459 /* Check if a directory path exists. */
460 boolean_t
xenbus_exists_dir(const char * dir,const char * node)461 xenbus_exists_dir(const char *dir, const char *node)
462 {
463 char **d;
464 unsigned int dir_n;
465 int i, len;
466
467 d = xenbus_directory(XBT_NULL, dir, node, &dir_n);
468 if (d == NULL)
469 return (B_FALSE);
470 for (i = 0, len = 0; i < dir_n; i++)
471 len += strlen(d[i]) + 1 + sizeof (char *);
472 kmem_free(d, len);
473 return (B_TRUE);
474 }
475
476 /*
477 * Get the value of a single file.
478 * Returns a kmem_alloced value in retp: call kmem_free() on it after use.
479 * len indicates length in bytes.
480 */
481 int
xenbus_read(xenbus_transaction_t t,const char * dir,const char * node,void ** retp,unsigned int * len)482 xenbus_read(xenbus_transaction_t t,
483 const char *dir, const char *node, void **retp, unsigned int *len)
484 {
485 char *path;
486 int err;
487
488 path = join(dir, node);
489 err = xs_single(t, XS_READ, path, retp, len);
490 kmem_free(path, strlen(path) + 1);
491 return (err);
492 }
493
494 int
xenbus_read_str(const char * dir,const char * node,char ** retp)495 xenbus_read_str(const char *dir, const char *node, char **retp)
496 {
497 uint_t n;
498 int err;
499 char *str;
500
501 /*
502 * Since we access the xenbus value immediatly we can't be
503 * part of a transaction.
504 */
505 if ((err = xenbus_read(XBT_NULL, dir, node, (void **)&str, &n)) != 0)
506 return (err);
507 ASSERT((str != NULL) && (n > 0));
508
509 /*
510 * Why bother with this? Because xenbus is truly annoying in the
511 * fact that when it returns a string, it doesn't guarantee that
512 * the memory that holds the string is of size strlen() + 1.
513 * This forces callers to keep track of the size of the memory
514 * containing the string. Ugh. We'll work around this by
515 * re-allocate strings to always be of size strlen() + 1.
516 */
517 *retp = strdup(str);
518 kmem_free(str, n);
519 return (0);
520 }
521
522 /*
523 * Write the value of a single file.
524 * Returns err on failure.
525 */
526 int
xenbus_write(xenbus_transaction_t t,const char * dir,const char * node,const char * string)527 xenbus_write(xenbus_transaction_t t,
528 const char *dir, const char *node, const char *string)
529 {
530 char *path;
531 iovec_t iovec[2];
532 int ret;
533
534 path = join(dir, node);
535
536 iovec[0].iov_base = (void *)path;
537 iovec[0].iov_len = strlen(path) + 1;
538 iovec[1].iov_base = (void *)string;
539 iovec[1].iov_len = strlen(string);
540
541 ret = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
542 kmem_free(path, iovec[0].iov_len);
543 return (ret);
544 }
545
546 /* Create a new directory. */
547 int
xenbus_mkdir(xenbus_transaction_t t,const char * dir,const char * node)548 xenbus_mkdir(xenbus_transaction_t t, const char *dir, const char *node)
549 {
550 char *path;
551 int ret;
552
553 path = join(dir, node);
554 ret = xs_single(t, XS_MKDIR, path, NULL, NULL);
555 kmem_free(path, strlen(path) + 1);
556 return (ret);
557 }
558
559 /* Destroy a file or directory (directories must be empty). */
560 int
xenbus_rm(xenbus_transaction_t t,const char * dir,const char * node)561 xenbus_rm(xenbus_transaction_t t, const char *dir, const char *node)
562 {
563 char *path;
564 int ret;
565
566 path = join(dir, node);
567 ret = xs_single(t, XS_RM, path, NULL, NULL);
568 kmem_free(path, strlen(path) + 1);
569 return (ret);
570 }
571
572 /*
573 * Start a transaction: changes by others will not be seen during this
574 * transaction, and changes will not be visible to others until end.
575 */
576 int
xenbus_transaction_start(xenbus_transaction_t * t)577 xenbus_transaction_start(xenbus_transaction_t *t)
578 {
579 void *id_str;
580 unsigned long id;
581 int err;
582 unsigned int len;
583
584 rw_enter(&xs_state.suspend_lock, RW_READER);
585
586 err = xs_single(XBT_NULL, XS_TRANSACTION_START, "", &id_str, &len);
587 if (err) {
588 rw_exit(&xs_state.suspend_lock);
589 return (err);
590 }
591
592 (void) ddi_strtoul((char *)id_str, NULL, 0, &id);
593 *t = (xenbus_transaction_t)id;
594 kmem_free(id_str, len);
595
596 return (0);
597 }
598
599 /*
600 * End a transaction.
601 * If abandon is true, transaction is discarded instead of committed.
602 */
603 int
xenbus_transaction_end(xenbus_transaction_t t,int abort)604 xenbus_transaction_end(xenbus_transaction_t t, int abort)
605 {
606 char abortstr[2];
607 int err;
608
609 if (abort)
610 (void) strcpy(abortstr, "F");
611 else
612 (void) strcpy(abortstr, "T");
613
614 err = xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL);
615
616 rw_exit(&xs_state.suspend_lock);
617
618 return (err);
619 }
620
621 /*
622 * Single read and scanf: returns errno or 0. This can only handle a single
623 * conversion specifier.
624 */
625 /* SCANFLIKE4 */
626 int
xenbus_scanf(xenbus_transaction_t t,const char * dir,const char * node,const char * fmt,...)627 xenbus_scanf(xenbus_transaction_t t,
628 const char *dir, const char *node, const char *fmt, ...)
629 {
630 va_list ap;
631 int ret;
632 char *val;
633 unsigned int len;
634
635 ret = xenbus_read(t, dir, node, (void **)&val, &len);
636 if (ret)
637 return (ret);
638
639 va_start(ap, fmt);
640 if (vsscanf(val, fmt, ap) != 1)
641 ret = ERANGE;
642 va_end(ap);
643 kmem_free(val, len);
644 return (ret);
645 }
646
647 /* Single printf and write: returns errno or 0. */
648 /* PRINTFLIKE4 */
649 int
xenbus_printf(xenbus_transaction_t t,const char * dir,const char * node,const char * fmt,...)650 xenbus_printf(xenbus_transaction_t t,
651 const char *dir, const char *node, const char *fmt, ...)
652 {
653 va_list ap;
654 int ret;
655 #define PRINTF_BUFFER_SIZE 4096
656 char *printf_buffer;
657
658 printf_buffer = kmem_alloc(PRINTF_BUFFER_SIZE, KM_SLEEP);
659
660 va_start(ap, fmt);
661 ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
662 va_end(ap);
663
664 ASSERT(ret <= PRINTF_BUFFER_SIZE-1);
665 ret = xenbus_write(t, dir, node, printf_buffer);
666
667 kmem_free(printf_buffer, PRINTF_BUFFER_SIZE);
668
669 return (ret);
670 }
671
672
673 /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
674 int
xenbus_gather(xenbus_transaction_t t,const char * dir,...)675 xenbus_gather(xenbus_transaction_t t, const char *dir, ...)
676 {
677 va_list ap;
678 const char *name;
679 int ret = 0;
680 unsigned int len;
681
682 va_start(ap, dir);
683 while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
684 const char *fmt = va_arg(ap, char *);
685 void *result = va_arg(ap, void *);
686 char *p;
687
688 ret = xenbus_read(t, dir, name, (void **)&p, &len);
689 if (ret)
690 break;
691 if (fmt) {
692 ASSERT(result != NULL);
693 if (sscanf(p, fmt, result) != 1)
694 ret = EINVAL;
695 kmem_free(p, len);
696 } else
697 *(char **)result = p;
698 }
699 va_end(ap);
700 return (ret);
701 }
702
703 static int
xs_watch(const char * path,const char * token)704 xs_watch(const char *path, const char *token)
705 {
706 iovec_t iov[2];
707
708 iov[0].iov_base = (void *)path;
709 iov[0].iov_len = strlen(path) + 1;
710 iov[1].iov_base = (void *)token;
711 iov[1].iov_len = strlen(token) + 1;
712
713 return (xs_talkv(XBT_NULL, XS_WATCH, iov, 2, NULL, NULL));
714 }
715
716 static int
xs_unwatch(const char * path,const char * token)717 xs_unwatch(const char *path, const char *token)
718 {
719 iovec_t iov[2];
720
721 iov[0].iov_base = (char *)path;
722 iov[0].iov_len = strlen(path) + 1;
723 iov[1].iov_base = (char *)token;
724 iov[1].iov_len = strlen(token) + 1;
725
726 return (xs_talkv(XBT_NULL, XS_UNWATCH, iov, 2, NULL, NULL));
727 }
728
729 static struct xenbus_watch *
find_watch(const char * token)730 find_watch(const char *token)
731 {
732 struct xenbus_watch *i, *cmp;
733
734 (void) ddi_strtoul(token, NULL, 16, (unsigned long *)&cmp);
735
736 for (i = list_head(&watches); i != NULL; i = list_next(&watches, i))
737 if (i == cmp)
738 break;
739
740 return (i);
741 }
742
743 /* Register a xenstore state notify callback */
744 int
xs_register_xenbus_callback(void (* callback)(int))745 xs_register_xenbus_callback(void (*callback)(int))
746 {
747 struct xenbus_notify *xbn, *xnp;
748
749 xbn = kmem_alloc(sizeof (struct xenbus_notify), KM_SLEEP);
750 xbn->notify_func = callback;
751 mutex_enter(¬ify_list_lock);
752 /*
753 * Make sure not already on the list
754 */
755 xnp = list_head(¬ify_list);
756 for (; xnp != NULL; xnp = list_next(¬ify_list, xnp)) {
757 if (xnp->notify_func == callback) {
758 kmem_free(xbn, sizeof (struct xenbus_notify));
759 mutex_exit(¬ify_list_lock);
760 return (EEXIST);
761 }
762 }
763 xnp = xbn;
764 list_insert_tail(¬ify_list, xbn);
765 done:
766 if (xenstore_up)
767 xnp->notify_func(XENSTORE_UP);
768 mutex_exit(¬ify_list_lock);
769 return (0);
770 }
771
772 /*
773 * Notify clients of xenstore state
774 */
775 static void
do_notify_callbacks(void * arg)776 do_notify_callbacks(void *arg)
777 {
778 struct xenbus_notify *xnp;
779
780 mutex_enter(¬ify_list_lock);
781 xnp = list_head(¬ify_list);
782 for (; xnp != NULL; xnp = list_next(¬ify_list, xnp)) {
783 xnp->notify_func((int)((uintptr_t)arg));
784 }
785 mutex_exit(¬ify_list_lock);
786 }
787
788 void
xs_notify_xenstore_up(void)789 xs_notify_xenstore_up(void)
790 {
791 xenstore_up = B_TRUE;
792 (void) taskq_dispatch(xenbus_taskq, do_notify_callbacks,
793 (void *)XENSTORE_UP, 0);
794 }
795
796 void
xs_notify_xenstore_down(void)797 xs_notify_xenstore_down(void)
798 {
799 xenstore_up = B_FALSE;
800 (void) taskq_dispatch(xenbus_taskq, do_notify_callbacks,
801 (void *)XENSTORE_DOWN, 0);
802 }
803
804 /* Register callback to watch this node. */
805 int
register_xenbus_watch(struct xenbus_watch * watch)806 register_xenbus_watch(struct xenbus_watch *watch)
807 {
808 /* Pointer in ascii is the token. */
809 char token[sizeof (watch) * 2 + 1];
810 int err;
811
812 ASSERT(xenstore_up);
813 (void) snprintf(token, sizeof (token), "%lX", (long)watch);
814
815 rw_enter(&xs_state.suspend_lock, RW_READER);
816
817 mutex_enter(&watches_lock);
818 /*
819 * May be re-registering a watch if xenstore daemon was restarted
820 */
821 if (find_watch(token) == NULL)
822 list_insert_tail(&watches, watch);
823 mutex_exit(&watches_lock);
824
825 DTRACE_XPV3(xenbus__register__watch, const char *, watch->node,
826 uintptr_t, watch->callback, struct xenbus_watch *, watch);
827
828 err = xs_watch(watch->node, token);
829
830 /* Ignore errors due to multiple registration. */
831 if ((err != 0) && (err != EEXIST)) {
832 mutex_enter(&watches_lock);
833 list_remove(&watches, watch);
834 mutex_exit(&watches_lock);
835 }
836
837 rw_exit(&xs_state.suspend_lock);
838
839 return (err);
840 }
841
842 static void
free_stored_msg(struct xs_stored_msg * msg)843 free_stored_msg(struct xs_stored_msg *msg)
844 {
845 int i, len = 0;
846
847 for (i = 0; i < msg->un.watch.vec_size; i++)
848 len += strlen(msg->un.watch.vec[i]) + 1 + sizeof (char *);
849 kmem_free(msg->un.watch.vec, len);
850 kmem_free(msg, sizeof (*msg));
851 }
852
853 void
unregister_xenbus_watch(struct xenbus_watch * watch)854 unregister_xenbus_watch(struct xenbus_watch *watch)
855 {
856 struct xs_stored_msg *msg;
857 char token[sizeof (watch) * 2 + 1];
858 int err;
859
860 (void) snprintf(token, sizeof (token), "%lX", (long)watch);
861
862 rw_enter(&xs_state.suspend_lock, RW_READER);
863
864 mutex_enter(&watches_lock);
865 ASSERT(find_watch(token));
866 list_remove(&watches, watch);
867 mutex_exit(&watches_lock);
868
869 DTRACE_XPV3(xenbus__unregister__watch, const char *, watch->node,
870 uintptr_t, watch->callback, struct xenbus_watch *, watch);
871
872 err = xs_unwatch(watch->node, token);
873 if (err)
874 cmn_err(CE_WARN, "XENBUS Failed to release watch %s: %d",
875 watch->node, err);
876
877 rw_exit(&xs_state.suspend_lock);
878
879 /* Cancel pending watch events. */
880 mutex_enter(&watch_events_lock);
881 msg = list_head(&watch_events);
882
883 while (msg != NULL) {
884 struct xs_stored_msg *tmp = list_next(&watch_events, msg);
885 if (msg->un.watch.handle == watch) {
886 list_remove(&watch_events, msg);
887 free_stored_msg(msg);
888 }
889 msg = tmp;
890 }
891
892 mutex_exit(&watch_events_lock);
893
894 /* Flush any currently-executing callback, unless we are it. :-) */
895 if (mutex_owner(&xenwatch_mutex) != curthread) {
896 mutex_enter(&xenwatch_mutex);
897 mutex_exit(&xenwatch_mutex);
898 }
899 }
900
901 void
xenbus_suspend(void)902 xenbus_suspend(void)
903 {
904 rw_enter(&xs_state.suspend_lock, RW_WRITER);
905 mutex_enter(&xs_state.request_mutex);
906
907 xb_suspend();
908 }
909
910 void
xenbus_resume(void)911 xenbus_resume(void)
912 {
913 struct xenbus_watch *watch;
914 char token[sizeof (watch) * 2 + 1];
915
916 mutex_exit(&xs_state.request_mutex);
917
918 xb_init();
919 xb_setup_intr();
920
921 /* No need for watches_lock: the suspend_lock is sufficient. */
922 for (watch = list_head(&watches); watch != NULL;
923 watch = list_next(&watches, watch)) {
924 (void) snprintf(token, sizeof (token), "%lX", (long)watch);
925 (void) xs_watch(watch->node, token);
926 }
927
928 rw_exit(&xs_state.suspend_lock);
929 }
930
931 static void
xenwatch_thread(void)932 xenwatch_thread(void)
933 {
934 struct xs_stored_msg *msg;
935 struct xenbus_watch *watch;
936
937 for (;;) {
938 mutex_enter(&watch_events_lock);
939 while (list_empty(&watch_events))
940 cv_wait(&watch_events_cv, &watch_events_lock);
941 msg = list_head(&watch_events);
942 ASSERT(msg != NULL);
943 list_remove(&watch_events, msg);
944 watch = msg->un.watch.handle;
945 mutex_exit(&watch_events_lock);
946
947 mutex_enter(&xenwatch_mutex);
948
949 DTRACE_XPV4(xenbus__fire__watch,
950 const char *, watch->node,
951 uintptr_t, watch->callback,
952 struct xenbus_watch *, watch,
953 const char *, msg->un.watch.vec[XS_WATCH_PATH]);
954
955 watch->callback(watch, (const char **)msg->un.watch.vec,
956 msg->un.watch.vec_size);
957
958 free_stored_msg(msg);
959 mutex_exit(&xenwatch_mutex);
960 }
961 }
962
963 static int
process_msg(void)964 process_msg(void)
965 {
966 struct xs_stored_msg *msg;
967 char *body;
968 int err, mlen;
969
970 msg = kmem_alloc(sizeof (*msg), KM_SLEEP);
971
972 err = xb_read(&msg->hdr, sizeof (msg->hdr));
973 if (err) {
974 kmem_free(msg, sizeof (*msg));
975 return (err);
976 }
977
978 mlen = msg->hdr.len + 1;
979 body = kmem_alloc(mlen, KM_SLEEP);
980
981 err = xb_read(body, msg->hdr.len);
982 if (err) {
983 kmem_free(body, mlen);
984 kmem_free(msg, sizeof (*msg));
985 return (err);
986 }
987
988 body[mlen - 1] = '\0';
989
990 if (msg->hdr.type == XS_WATCH_EVENT) {
991 const char *token;
992 msg->un.watch.vec = split(body, msg->hdr.len + 1,
993 &msg->un.watch.vec_size);
994 if (msg->un.watch.vec == NULL) {
995 kmem_free(msg, sizeof (*msg));
996 return (EIO);
997 }
998
999 mutex_enter(&watches_lock);
1000 token = msg->un.watch.vec[XS_WATCH_TOKEN];
1001 if ((msg->un.watch.handle = find_watch(token)) != NULL) {
1002 mutex_enter(&watch_events_lock);
1003
1004 DTRACE_XPV4(xenbus__enqueue__watch,
1005 const char *, msg->un.watch.handle->node,
1006 uintptr_t, msg->un.watch.handle->callback,
1007 struct xenbus_watch *, msg->un.watch.handle,
1008 const char *, msg->un.watch.vec[XS_WATCH_PATH]);
1009
1010 list_insert_tail(&watch_events, msg);
1011 cv_broadcast(&watch_events_cv);
1012 mutex_exit(&watch_events_lock);
1013 } else {
1014 free_stored_msg(msg);
1015 }
1016 mutex_exit(&watches_lock);
1017 } else {
1018 msg->un.reply.body = body;
1019 mutex_enter(&xs_state.reply_lock);
1020 list_insert_tail(&xs_state.reply_list, msg);
1021 mutex_exit(&xs_state.reply_lock);
1022 cv_signal(&xs_state.reply_cv);
1023 }
1024
1025 return (0);
1026 }
1027
1028 static void
xenbus_thread(void)1029 xenbus_thread(void)
1030 {
1031 int err;
1032
1033 /*
1034 * We have to wait for interrupts to be ready, so we don't clash
1035 * with the polled-IO code in read_reply().
1036 */
1037 while (!interrupts_unleashed)
1038 delay(10);
1039
1040 for (;;) {
1041 err = process_msg();
1042 if (err)
1043 cmn_err(CE_WARN, "XENBUS error %d while reading "
1044 "message", err);
1045 }
1046 }
1047
1048 /*
1049 * When setting up xenbus, dom0 and domU have to take different paths, which
1050 * makes this code a little confusing. For dom0:
1051 *
1052 * xs_early_init - mutex init only
1053 * xs_dom0_init - called on xenbus dev attach: set up our xenstore page and
1054 * event channel; start xenbus threads for responding to interrupts.
1055 *
1056 * And for domU:
1057 *
1058 * xs_early_init - mutex init; set up our xenstore page and event channel
1059 * xs_domu_init - installation of IRQ handler; start xenbus threads.
1060 *
1061 * We need an early init on domU so we can use xenbus in polled mode to
1062 * discover devices, VCPUs etc.
1063 *
1064 * On resume, we use xb_init() and xb_setup_intr() to restore xenbus to a
1065 * working state.
1066 */
1067
1068 void
xs_early_init(void)1069 xs_early_init(void)
1070 {
1071 list_create(&xs_state.reply_list, sizeof (struct xs_stored_msg),
1072 offsetof(struct xs_stored_msg, list));
1073 list_create(&watch_events, sizeof (struct xs_stored_msg),
1074 offsetof(struct xs_stored_msg, list));
1075 list_create(&watches, sizeof (struct xenbus_watch),
1076 offsetof(struct xenbus_watch, list));
1077 list_create(¬ify_list, sizeof (struct xenbus_notify),
1078 offsetof(struct xenbus_notify, list));
1079 mutex_init(&xs_state.reply_lock, NULL, MUTEX_DEFAULT, NULL);
1080 mutex_init(&xs_state.request_mutex, NULL, MUTEX_DEFAULT, NULL);
1081 mutex_init(¬ify_list_lock, NULL, MUTEX_DEFAULT, NULL);
1082 rw_init(&xs_state.suspend_lock, NULL, RW_DEFAULT, NULL);
1083 cv_init(&xs_state.reply_cv, NULL, CV_DEFAULT, NULL);
1084
1085 if (DOMAIN_IS_INITDOMAIN(xen_info))
1086 return;
1087
1088 xb_init();
1089 xenstore_up = B_TRUE;
1090 }
1091
1092 static void
xs_thread_init(void)1093 xs_thread_init(void)
1094 {
1095 (void) thread_create(NULL, 0, xenwatch_thread, NULL, 0, &p0,
1096 TS_RUN, minclsyspri);
1097 (void) thread_create(NULL, 0, xenbus_thread, NULL, 0, &p0,
1098 TS_RUN, minclsyspri);
1099 xenbus_taskq = taskq_create("xenbus_taskq", 1,
1100 maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
1101 ASSERT(xenbus_taskq != NULL);
1102 }
1103
1104 void
xs_domu_init(void)1105 xs_domu_init(void)
1106 {
1107 if (DOMAIN_IS_INITDOMAIN(xen_info))
1108 return;
1109
1110 /*
1111 * Add interrupt handler for xenbus now, must wait till after
1112 * psm module is loaded. All use of xenbus is in polled mode
1113 * until xs_init is called since it is what kicks off the xs
1114 * server threads.
1115 */
1116 xs_thread_init();
1117 xb_setup_intr();
1118 }
1119
1120
1121 void
xs_dom0_init(void)1122 xs_dom0_init(void)
1123 {
1124 static boolean_t initialized = B_FALSE;
1125
1126 ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
1127
1128 /*
1129 * The xenbus driver might be re-attaching.
1130 */
1131 if (initialized)
1132 return;
1133
1134 xb_init();
1135 xs_thread_init();
1136 xb_setup_intr();
1137
1138 initialized = B_TRUE;
1139 }
1140