xref: /freebsd/sys/dev/xen/xenstore/xenstore.c (revision 924226fba12cc9a228c73b956e1b7fa24c60b055)
1 /******************************************************************************
2  * xenstore.c
3  *
4  * Low-level kernel interface to the XenStore.
5  *
6  * Copyright (C) 2005 Rusty Russell, IBM Corporation
7  * Copyright (C) 2009,2010 Spectra Logic Corporation
8  *
9  * This file may be distributed separately from the Linux kernel, or
10  * incorporated into other software packages, subject to the following license:
11  *
12  * Permission is hereby granted, free of charge, to any person obtaining a copy
13  * of this source file (the "Software"), to deal in the Software without
14  * restriction, including without limitation the rights to use, copy, modify,
15  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16  * and to permit persons to whom the Software is furnished to do so, subject to
17  * the following conditions:
18  *
19  * The above copyright notice and this permission notice shall be included in
20  * all copies or substantial portions of the Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28  * IN THE SOFTWARE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/bus.h>
36 #include <sys/kernel.h>
37 #include <sys/lock.h>
38 #include <sys/module.h>
39 #include <sys/mutex.h>
40 #include <sys/sx.h>
41 #include <sys/syslog.h>
42 #include <sys/malloc.h>
43 #include <sys/systm.h>
44 #include <sys/proc.h>
45 #include <sys/kthread.h>
46 #include <sys/sbuf.h>
47 #include <sys/sysctl.h>
48 #include <sys/uio.h>
49 #include <sys/unistd.h>
50 #include <sys/queue.h>
51 #include <sys/taskqueue.h>
52 
53 #include <machine/stdarg.h>
54 
55 #include <xen/xen-os.h>
56 #include <xen/hypervisor.h>
57 #include <xen/xen_intr.h>
58 
59 #include <contrib/xen/hvm/params.h>
60 #include <xen/hvm.h>
61 
62 #include <xen/xenstore/xenstorevar.h>
63 #include <xen/xenstore/xenstore_internal.h>
64 
65 #include <vm/vm.h>
66 #include <vm/pmap.h>
67 
68 /**
69  * \file xenstore.c
70  * \brief XenStore interface
71  *
72  * The XenStore interface is a simple storage system that is a means of
73  * communicating state and configuration data between the Xen Domain 0
74  * and the various guest domains.  All configuration data other than
75  * a small amount of essential information required during the early
76  * boot process of launching a Xen aware guest, is managed using the
77  * XenStore.
78  *
79  * The XenStore is ASCII string based, and has a structure and semantics
80  * similar to a filesystem.  There are files and directories, the directories
81  * able to contain files or other directories.  The depth of the hierarchy
82  * is only limited by the XenStore's maximum path length.
83  *
84  * The communication channel between the XenStore service and other
85  * domains is via two, guest specific, ring buffers in a shared memory
86  * area.  One ring buffer is used for communicating in each direction.
87  * The grant table references for this shared memory are given to the
88  * guest either via the xen_start_info structure for a fully para-
89  * virtualized guest, or via HVM hypercalls for a hardware virtualized
90  * guest.
91  *
92  * The XenStore communication relies on an event channel and thus
93  * interrupts.  For this reason, the attachment of the XenStore
94  * relies on an interrupt driven configuration hook to hold off
95  * boot processing until communication with the XenStore service
96  * can be established.
97  *
98  * Several Xen services depend on the XenStore, most notably the
99  * XenBus used to discover and manage Xen devices.  These services
100  * are implemented as NewBus child attachments to a bus exported
101  * by this XenStore driver.
102  */
103 
104 static struct xs_watch *find_watch(const char *token);
105 
106 MALLOC_DEFINE(M_XENSTORE, "xenstore", "XenStore data and results");
107 
108 /**
109  * Pointer to shared memory communication structures allowing us
110  * to communicate with the XenStore service.
111  *
112  * When operating in full PV mode, this pointer is set early in kernel
113  * startup from within xen_machdep.c.  In HVM mode, we use hypercalls
114  * to get the guest frame number for the shared page and then map it
115  * into kva.  See xs_init() for details.
116  */
117 static struct xenstore_domain_interface *xen_store;
118 
119 /*-------------------------- Private Data Structures ------------------------*/
120 
121 /**
122  * Structure capturing messages received from the XenStore service.
123  */
124 struct xs_stored_msg {
125 	TAILQ_ENTRY(xs_stored_msg) list;
126 
127 	struct xsd_sockmsg hdr;
128 
129 	union {
130 		/* Queued replies. */
131 		struct {
132 			char *body;
133 		} reply;
134 
135 		/* Queued watch events. */
136 		struct {
137 			struct xs_watch *handle;
138 			const char **vec;
139 			u_int vec_size;
140 		} watch;
141 	} u;
142 };
143 TAILQ_HEAD(xs_stored_msg_list, xs_stored_msg);
144 
145 /**
146  * Container for all XenStore related state.
147  */
148 struct xs_softc {
149 	/** Newbus device for the XenStore. */
150 	device_t xs_dev;
151 
152 	/**
153 	 * Lock serializing access to ring producer/consumer
154 	 * indexes.  Use of this lock guarantees that wakeups
155 	 * of blocking readers/writers are not missed due to
156 	 * races with the XenStore service.
157 	 */
158 	struct mtx ring_lock;
159 
160 	/*
161 	 * Mutex used to insure exclusive access to the outgoing
162 	 * communication ring.  We use a lock type that can be
163 	 * held while sleeping so that xs_write() can block waiting
164 	 * for space in the ring to free up, without allowing another
165 	 * writer to come in and corrupt a partial message write.
166 	 */
167 	struct sx request_mutex;
168 
169 	/**
170 	 * A list of replies to our requests.
171 	 *
172 	 * The reply list is filled by xs_rcv_thread().  It
173 	 * is consumed by the context that issued the request
174 	 * to which a reply is made.  The requester blocks in
175 	 * xs_read_reply().
176 	 *
177 	 * /note Only one requesting context can be active at a time.
178 	 *       This is guaranteed by the request_mutex and insures
179 	 *	 that the requester sees replies matching the order
180 	 *	 of its requests.
181 	 */
182 	struct xs_stored_msg_list reply_list;
183 
184 	/** Lock protecting the reply list. */
185 	struct mtx reply_lock;
186 
187 	/**
188 	 * List of registered watches.
189 	 */
190 	struct xs_watch_list  registered_watches;
191 
192 	/** Lock protecting the registered watches list. */
193 	struct mtx registered_watches_lock;
194 
195 	/**
196 	 * List of pending watch callback events.
197 	 */
198 	struct xs_stored_msg_list watch_events;
199 
200 	/** Lock protecting the watch calback list. */
201 	struct mtx watch_events_lock;
202 
203 	/**
204 	 * The processid of the xenwatch thread.
205 	 */
206 	pid_t xenwatch_pid;
207 
208 	/**
209 	 * Sleepable mutex used to gate the execution of XenStore
210 	 * watch event callbacks.
211 	 *
212 	 * xenwatch_thread holds an exclusive lock on this mutex
213 	 * while delivering event callbacks, and xenstore_unregister_watch()
214 	 * uses an exclusive lock of this mutex to guarantee that no
215 	 * callbacks of the just unregistered watch are pending
216 	 * before returning to its caller.
217 	 */
218 	struct sx xenwatch_mutex;
219 
220 	/**
221 	 * The HVM guest pseudo-physical frame number.  This is Xen's mapping
222 	 * of the true machine frame number into our "physical address space".
223 	 */
224 	unsigned long gpfn;
225 
226 	/**
227 	 * The event channel for communicating with the
228 	 * XenStore service.
229 	 */
230 	int evtchn;
231 
232 	/** Handle for XenStore interrupts. */
233 	xen_intr_handle_t xen_intr_handle;
234 
235 	/**
236 	 * Interrupt driven config hook allowing us to defer
237 	 * attaching children until interrupts (and thus communication
238 	 * with the XenStore service) are available.
239 	 */
240 	struct intr_config_hook xs_attachcb;
241 
242 	/**
243 	 * Xenstore is a user-space process that usually runs in Dom0,
244 	 * so if this domain is booting as Dom0, xenstore wont we accessible,
245 	 * and we have to defer the initialization of xenstore related
246 	 * devices to later (when xenstore is started).
247 	 */
248 	bool initialized;
249 
250 	/**
251 	 * Task to run when xenstore is initialized (Dom0 only), will
252 	 * take care of attaching xenstore related devices.
253 	 */
254 	struct task xs_late_init;
255 };
256 
257 /*-------------------------------- Global Data ------------------------------*/
258 static struct xs_softc xs;
259 
260 /*------------------------- Private Utility Functions -----------------------*/
261 
262 /**
263  * Count and optionally record pointers to a number of NUL terminated
264  * strings in a buffer.
265  *
266  * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
267  * \param dest	   An array to store pointers to each string found in strings.
268  * \param len	   The length of the buffer pointed to by strings.
269  *
270  * \return  A count of the number of strings found.
271  */
272 static u_int
273 extract_strings(const char *strings, const char **dest, u_int len)
274 {
275 	u_int num;
276 	const char *p;
277 
278 	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) {
279 		if (dest != NULL)
280 			*dest++ = p;
281 		num++;
282 	}
283 
284 	return (num);
285 }
286 
287 /**
288  * Convert a contiguous buffer containing a series of NUL terminated
289  * strings into an array of pointers to strings.
290  *
291  * The returned pointer references the array of string pointers which
292  * is followed by the storage for the string data.  It is the client's
293  * responsibility to free this storage.
294  *
295  * The storage addressed by strings is free'd prior to split returning.
296  *
297  * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
298  * \param len	   The length of the buffer pointed to by strings.
299  * \param num	   The number of strings found and returned in the strings
300  *                 array.
301  *
302  * \return  An array of pointers to the strings found in the input buffer.
303  */
304 static const char **
305 split(char *strings, u_int len, u_int *num)
306 {
307 	const char **ret;
308 
309 	/* Protect against unterminated buffers. */
310 	if (len > 0)
311 		strings[len - 1] = '\0';
312 
313 	/* Count the strings. */
314 	*num = extract_strings(strings, /*dest*/NULL, len);
315 
316 	/* Transfer to one big alloc for easy freeing by the caller. */
317 	ret = malloc(*num * sizeof(char *) + len, M_XENSTORE, M_WAITOK);
318 	memcpy(&ret[*num], strings, len);
319 	free(strings, M_XENSTORE);
320 
321 	/* Extract pointers to newly allocated array. */
322 	strings = (char *)&ret[*num];
323 	(void)extract_strings(strings, /*dest*/ret, len);
324 
325 	return (ret);
326 }
327 
328 /*------------------------- Public Utility Functions -------------------------*/
329 /*------- API comments for these methods can be found in xenstorevar.h -------*/
330 struct sbuf *
331 xs_join(const char *dir, const char *name)
332 {
333 	struct sbuf *sb;
334 
335 	sb = sbuf_new_auto();
336 	sbuf_cat(sb, dir);
337 	if (name[0] != '\0') {
338 		sbuf_putc(sb, '/');
339 		sbuf_cat(sb, name);
340 	}
341 	sbuf_finish(sb);
342 
343 	return (sb);
344 }
345 
346 /*-------------------- Low Level Communication Management --------------------*/
347 /**
348  * Interrupt handler for the XenStore event channel.
349  *
350  * XenStore reads and writes block on "xen_store" for buffer
351  * space.  Wakeup any blocking operations when the XenStore
352  * service has modified the queues.
353  */
354 static void
355 xs_intr(void * arg __unused /*__attribute__((unused))*/)
356 {
357 
358 	/* If xenstore has not been initialized, initialize it now */
359 	if (!xs.initialized) {
360 		xs.initialized = true;
361 		/*
362 		 * Since this task is probing and attaching devices we
363 		 * have to hold the Giant lock.
364 		 */
365 		taskqueue_enqueue(taskqueue_swi_giant, &xs.xs_late_init);
366 	}
367 
368 	/*
369 	 * Hold ring lock across wakeup so that clients
370 	 * cannot miss a wakeup.
371 	 */
372 	mtx_lock(&xs.ring_lock);
373 	wakeup(xen_store);
374 	mtx_unlock(&xs.ring_lock);
375 }
376 
377 /**
378  * Verify that the indexes for a ring are valid.
379  *
380  * The difference between the producer and consumer cannot
381  * exceed the size of the ring.
382  *
383  * \param cons  The consumer index for the ring to test.
384  * \param prod  The producer index for the ring to test.
385  *
386  * \retval 1  If indexes are in range.
387  * \retval 0  If the indexes are out of range.
388  */
389 static int
390 xs_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
391 {
392 
393 	return ((prod - cons) <= XENSTORE_RING_SIZE);
394 }
395 
396 /**
397  * Return a pointer to, and the length of, the contiguous
398  * free region available for output in a ring buffer.
399  *
400  * \param cons  The consumer index for the ring.
401  * \param prod  The producer index for the ring.
402  * \param buf   The base address of the ring's storage.
403  * \param len   The amount of contiguous storage available.
404  *
405  * \return  A pointer to the start location of the free region.
406  */
407 static void *
408 xs_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
409     char *buf, uint32_t *len)
410 {
411 
412 	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
413 	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
414 		*len = XENSTORE_RING_SIZE - (prod - cons);
415 	return (buf + MASK_XENSTORE_IDX(prod));
416 }
417 
418 /**
419  * Return a pointer to, and the length of, the contiguous
420  * data available to read from a ring buffer.
421  *
422  * \param cons  The consumer index for the ring.
423  * \param prod  The producer index for the ring.
424  * \param buf   The base address of the ring's storage.
425  * \param len   The amount of contiguous data available to read.
426  *
427  * \return  A pointer to the start location of the available data.
428  */
429 static const void *
430 xs_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
431     const char *buf, uint32_t *len)
432 {
433 
434 	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
435 	if ((prod - cons) < *len)
436 		*len = prod - cons;
437 	return (buf + MASK_XENSTORE_IDX(cons));
438 }
439 
440 /**
441  * Transmit data to the XenStore service.
442  *
443  * \param tdata  A pointer to the contiguous data to send.
444  * \param len    The amount of data to send.
445  *
446  * \return  On success 0, otherwise an errno value indicating the
447  *          cause of failure.
448  *
449  * \invariant  Called from thread context.
450  * \invariant  The buffer pointed to by tdata is at least len bytes
451  *             in length.
452  * \invariant  xs.request_mutex exclusively locked.
453  */
454 static int
455 xs_write_store(const void *tdata, unsigned len)
456 {
457 	XENSTORE_RING_IDX cons, prod;
458 	const char *data = (const char *)tdata;
459 	int error;
460 
461 	sx_assert(&xs.request_mutex, SX_XLOCKED);
462 	while (len != 0) {
463 		void *dst;
464 		u_int avail;
465 
466 		/* Hold lock so we can't miss wakeups should we block. */
467 		mtx_lock(&xs.ring_lock);
468 		cons = xen_store->req_cons;
469 		prod = xen_store->req_prod;
470 		if ((prod - cons) == XENSTORE_RING_SIZE) {
471 			/*
472 			 * Output ring is full. Wait for a ring event.
473 			 *
474 			 * Note that the events from both queues
475 			 * are combined, so being woken does not
476 			 * guarantee that data exist in the read
477 			 * ring.
478 			 *
479 			 * To simplify error recovery and the retry,
480 			 * we specify PDROP so our lock is *not* held
481 			 * when msleep returns.
482 			 */
483 			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
484 			     "xbwrite", /*timeout*/0);
485 			if (error && error != EWOULDBLOCK)
486 				return (error);
487 
488 			/* Try again. */
489 			continue;
490 		}
491 		mtx_unlock(&xs.ring_lock);
492 
493 		/* Verify queue sanity. */
494 		if (!xs_check_indexes(cons, prod)) {
495 			xen_store->req_cons = xen_store->req_prod = 0;
496 			return (EIO);
497 		}
498 
499 		dst = xs_get_output_chunk(cons, prod, xen_store->req, &avail);
500 		if (avail > len)
501 			avail = len;
502 
503 		memcpy(dst, data, avail);
504 		data += avail;
505 		len -= avail;
506 
507 		/*
508 		 * The store to the producer index, which indicates
509 		 * to the other side that new data has arrived, must
510 		 * be visible only after our copy of the data into the
511 		 * ring has completed.
512 		 */
513 		wmb();
514 		xen_store->req_prod += avail;
515 
516 		/*
517 		 * xen_intr_signal() implies mb(). The other side will see
518 		 * the change to req_prod at the time of the interrupt.
519 		 */
520 		xen_intr_signal(xs.xen_intr_handle);
521 	}
522 
523 	return (0);
524 }
525 
526 /**
527  * Receive data from the XenStore service.
528  *
529  * \param tdata  A pointer to the contiguous buffer to receive the data.
530  * \param len    The amount of data to receive.
531  *
532  * \return  On success 0, otherwise an errno value indicating the
533  *          cause of failure.
534  *
535  * \invariant  Called from thread context.
536  * \invariant  The buffer pointed to by tdata is at least len bytes
537  *             in length.
538  *
539  * \note xs_read does not perform any internal locking to guarantee
540  *       serial access to the incoming ring buffer.  However, there
541  *	 is only one context processing reads: xs_rcv_thread().
542  */
543 static int
544 xs_read_store(void *tdata, unsigned len)
545 {
546 	XENSTORE_RING_IDX cons, prod;
547 	char *data = (char *)tdata;
548 	int error;
549 
550 	while (len != 0) {
551 		u_int avail;
552 		const char *src;
553 
554 		/* Hold lock so we can't miss wakeups should we block. */
555 		mtx_lock(&xs.ring_lock);
556 		cons = xen_store->rsp_cons;
557 		prod = xen_store->rsp_prod;
558 		if (cons == prod) {
559 			/*
560 			 * Nothing to read. Wait for a ring event.
561 			 *
562 			 * Note that the events from both queues
563 			 * are combined, so being woken does not
564 			 * guarantee that data exist in the read
565 			 * ring.
566 			 *
567 			 * To simplify error recovery and the retry,
568 			 * we specify PDROP so our lock is *not* held
569 			 * when msleep returns.
570 			 */
571 			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
572 			    "xbread", /*timeout*/0);
573 			if (error && error != EWOULDBLOCK)
574 				return (error);
575 			continue;
576 		}
577 		mtx_unlock(&xs.ring_lock);
578 
579 		/* Verify queue sanity. */
580 		if (!xs_check_indexes(cons, prod)) {
581 			xen_store->rsp_cons = xen_store->rsp_prod = 0;
582 			return (EIO);
583 		}
584 
585 		src = xs_get_input_chunk(cons, prod, xen_store->rsp, &avail);
586 		if (avail > len)
587 			avail = len;
588 
589 		/*
590 		 * Insure the data we read is related to the indexes
591 		 * we read above.
592 		 */
593 		rmb();
594 
595 		memcpy(data, src, avail);
596 		data += avail;
597 		len -= avail;
598 
599 		/*
600 		 * Insure that the producer of this ring does not see
601 		 * the ring space as free until after we have copied it
602 		 * out.
603 		 */
604 		mb();
605 		xen_store->rsp_cons += avail;
606 
607 		/*
608 		 * xen_intr_signal() implies mb(). The producer will see
609 		 * the updated consumer index when the event is delivered.
610 		 */
611 		xen_intr_signal(xs.xen_intr_handle);
612 	}
613 
614 	return (0);
615 }
616 
617 /*----------------------- Received Message Processing ------------------------*/
618 /**
619  * Block reading the next message from the XenStore service and
620  * process the result.
621  *
622  * \param type  The returned type of the XenStore message received.
623  *
624  * \return  0 on success.  Otherwise an errno value indicating the
625  *          type of failure encountered.
626  */
627 static int
628 xs_process_msg(enum xsd_sockmsg_type *type)
629 {
630 	struct xs_stored_msg *msg;
631 	char *body;
632 	int error;
633 
634 	msg = malloc(sizeof(*msg), M_XENSTORE, M_WAITOK);
635 	error = xs_read_store(&msg->hdr, sizeof(msg->hdr));
636 	if (error) {
637 		free(msg, M_XENSTORE);
638 		return (error);
639 	}
640 
641 	body = malloc(msg->hdr.len + 1, M_XENSTORE, M_WAITOK);
642 	error = xs_read_store(body, msg->hdr.len);
643 	if (error) {
644 		free(body, M_XENSTORE);
645 		free(msg, M_XENSTORE);
646 		return (error);
647 	}
648 	body[msg->hdr.len] = '\0';
649 
650 	*type = msg->hdr.type;
651 	if (msg->hdr.type == XS_WATCH_EVENT) {
652 		msg->u.watch.vec = split(body, msg->hdr.len,
653 		    &msg->u.watch.vec_size);
654 
655 		mtx_lock(&xs.registered_watches_lock);
656 		msg->u.watch.handle = find_watch(
657 		    msg->u.watch.vec[XS_WATCH_TOKEN]);
658 		mtx_lock(&xs.watch_events_lock);
659 		if (msg->u.watch.handle != NULL &&
660 		    (!msg->u.watch.handle->max_pending ||
661 		    msg->u.watch.handle->pending <
662 		    msg->u.watch.handle->max_pending)) {
663 			msg->u.watch.handle->pending++;
664 			TAILQ_INSERT_TAIL(&xs.watch_events, msg, list);
665 			wakeup(&xs.watch_events);
666 			mtx_unlock(&xs.watch_events_lock);
667 		} else {
668 			mtx_unlock(&xs.watch_events_lock);
669 			free(msg->u.watch.vec, M_XENSTORE);
670 			free(msg, M_XENSTORE);
671 		}
672 		mtx_unlock(&xs.registered_watches_lock);
673 	} else {
674 		msg->u.reply.body = body;
675 		mtx_lock(&xs.reply_lock);
676 		TAILQ_INSERT_TAIL(&xs.reply_list, msg, list);
677 		wakeup(&xs.reply_list);
678 		mtx_unlock(&xs.reply_lock);
679 	}
680 
681 	return (0);
682 }
683 
684 /**
685  * Thread body of the XenStore receive thread.
686  *
687  * This thread blocks waiting for data from the XenStore service
688  * and processes and received messages.
689  */
690 static void
691 xs_rcv_thread(void *arg __unused)
692 {
693 	int error;
694 	enum xsd_sockmsg_type type;
695 
696 	for (;;) {
697 		error = xs_process_msg(&type);
698 		if (error)
699 			printf("XENSTORE error %d while reading message\n",
700 			    error);
701 	}
702 }
703 
704 /*---------------- XenStore Message Request/Reply Processing -----------------*/
705 #define xsd_error_count	(sizeof(xsd_errors) / sizeof(xsd_errors[0]))
706 
707 /**
708  * Convert a XenStore error string into an errno number.
709  *
710  * \param errorstring  The error string to convert.
711  *
712  * \return  The errno best matching the input string.
713  *
714  * \note Unknown error strings are converted to EINVAL.
715  */
716 static int
717 xs_get_error(const char *errorstring)
718 {
719 	u_int i;
720 
721 	for (i = 0; i < xsd_error_count; i++) {
722 		if (!strcmp(errorstring, xsd_errors[i].errstring))
723 			return (xsd_errors[i].errnum);
724 	}
725 	log(LOG_WARNING, "XENSTORE xen store gave: unknown error %s",
726 	    errorstring);
727 	return (EINVAL);
728 }
729 
730 /**
731  * Block waiting for a reply to a message request.
732  *
733  * \param type	  The returned type of the reply.
734  * \param len	  The returned body length of the reply.
735  * \param result  The returned body of the reply.
736  *
737  * \return  0 on success.  Otherwise an errno indicating the
738  *          cause of failure.
739  */
740 static int
741 xs_read_reply(enum xsd_sockmsg_type *type, u_int *len, void **result)
742 {
743 	struct xs_stored_msg *msg;
744 	char *body;
745 	int error;
746 
747 	mtx_lock(&xs.reply_lock);
748 	while (TAILQ_EMPTY(&xs.reply_list)) {
749 		error = mtx_sleep(&xs.reply_list, &xs.reply_lock, 0, "xswait",
750 		    hz/10);
751 		if (error && error != EWOULDBLOCK) {
752 			mtx_unlock(&xs.reply_lock);
753 			return (error);
754 		}
755 	}
756 	msg = TAILQ_FIRST(&xs.reply_list);
757 	TAILQ_REMOVE(&xs.reply_list, msg, list);
758 	mtx_unlock(&xs.reply_lock);
759 
760 	*type = msg->hdr.type;
761 	if (len)
762 		*len = msg->hdr.len;
763 	body = msg->u.reply.body;
764 
765 	free(msg, M_XENSTORE);
766 	*result = body;
767 	return (0);
768 }
769 
770 /**
771  * Pass-thru interface for XenStore access by userland processes
772  * via the XenStore device.
773  *
774  * Reply type and length data are returned by overwriting these
775  * fields in the passed in request message.
776  *
777  * \param msg	  A properly formatted message to transmit to
778  *		  the XenStore service.
779  * \param result  The returned body of the reply.
780  *
781  * \return  0 on success.  Otherwise an errno indicating the cause
782  *          of failure.
783  *
784  * \note The returned result is provided in malloced storage and thus
785  *       must be free'd by the caller with 'free(result, M_XENSTORE);
786  */
787 int
788 xs_dev_request_and_reply(struct xsd_sockmsg *msg, void **result)
789 {
790 	int error;
791 
792 	sx_xlock(&xs.request_mutex);
793 	if ((error = xs_write_store(msg, sizeof(*msg) + msg->len)) == 0)
794 		error = xs_read_reply(&msg->type, &msg->len, result);
795 	sx_xunlock(&xs.request_mutex);
796 
797 	return (error);
798 }
799 
800 /**
801  * Send a message with an optionally muti-part body to the XenStore service.
802  *
803  * \param t              The transaction to use for this request.
804  * \param request_type   The type of message to send.
805  * \param iovec          Pointers to the body sections of the request.
806  * \param num_vecs       The number of body sections in the request.
807  * \param len            The returned length of the reply.
808  * \param result         The returned body of the reply.
809  *
810  * \return  0 on success.  Otherwise an errno indicating
811  *          the cause of failure.
812  *
813  * \note The returned result is provided in malloced storage and thus
814  *       must be free'd by the caller with 'free(*result, M_XENSTORE);
815  */
816 static int
817 xs_talkv(struct xs_transaction t, enum xsd_sockmsg_type request_type,
818     const struct iovec *iovec, u_int num_vecs, u_int *len, void **result)
819 {
820 	struct xsd_sockmsg msg;
821 	void *ret = NULL;
822 	u_int i;
823 	int error;
824 
825 	msg.tx_id = t.id;
826 	msg.req_id = 0;
827 	msg.type = request_type;
828 	msg.len = 0;
829 	for (i = 0; i < num_vecs; i++)
830 		msg.len += iovec[i].iov_len;
831 
832 	sx_xlock(&xs.request_mutex);
833 	error = xs_write_store(&msg, sizeof(msg));
834 	if (error) {
835 		printf("xs_talkv failed %d\n", error);
836 		goto error_lock_held;
837 	}
838 
839 	for (i = 0; i < num_vecs; i++) {
840 		error = xs_write_store(iovec[i].iov_base, iovec[i].iov_len);
841 		if (error) {
842 			printf("xs_talkv failed %d\n", error);
843 			goto error_lock_held;
844 		}
845 	}
846 
847 	error = xs_read_reply(&msg.type, len, &ret);
848 
849 error_lock_held:
850 	sx_xunlock(&xs.request_mutex);
851 	if (error)
852 		return (error);
853 
854 	if (msg.type == XS_ERROR) {
855 		error = xs_get_error(ret);
856 		free(ret, M_XENSTORE);
857 		return (error);
858 	}
859 
860 	/* Reply is either error or an echo of our request message type. */
861 	KASSERT(msg.type == request_type, ("bad xenstore message type"));
862 
863 	if (result)
864 		*result = ret;
865 	else
866 		free(ret, M_XENSTORE);
867 
868 	return (0);
869 }
870 
871 /**
872  * Wrapper for xs_talkv allowing easy transmission of a message with
873  * a single, contiguous, message body.
874  *
875  * \param t              The transaction to use for this request.
876  * \param request_type   The type of message to send.
877  * \param body           The body of the request.
878  * \param len            The returned length of the reply.
879  * \param result         The returned body of the reply.
880  *
881  * \return  0 on success.  Otherwise an errno indicating
882  *          the cause of failure.
883  *
884  * \note The returned result is provided in malloced storage and thus
885  *       must be free'd by the caller with 'free(*result, M_XENSTORE);
886  */
887 static int
888 xs_single(struct xs_transaction t, enum xsd_sockmsg_type request_type,
889     const char *body, u_int *len, void **result)
890 {
891 	struct iovec iovec;
892 
893 	iovec.iov_base = (void *)(uintptr_t)body;
894 	iovec.iov_len = strlen(body) + 1;
895 
896 	return (xs_talkv(t, request_type, &iovec, 1, len, result));
897 }
898 
899 /*------------------------- XenStore Watch Support ---------------------------*/
900 /**
901  * Transmit a watch request to the XenStore service.
902  *
903  * \param path    The path in the XenStore to watch.
904  * \param tocken  A unique identifier for this watch.
905  *
906  * \return  0 on success.  Otherwise an errno indicating the
907  *          cause of failure.
908  */
909 static int
910 xs_watch(const char *path, const char *token)
911 {
912 	struct iovec iov[2];
913 
914 	iov[0].iov_base = (void *)(uintptr_t) path;
915 	iov[0].iov_len = strlen(path) + 1;
916 	iov[1].iov_base = (void *)(uintptr_t) token;
917 	iov[1].iov_len = strlen(token) + 1;
918 
919 	return (xs_talkv(XST_NIL, XS_WATCH, iov, 2, NULL, NULL));
920 }
921 
922 /**
923  * Transmit an uwatch request to the XenStore service.
924  *
925  * \param path    The path in the XenStore to watch.
926  * \param tocken  A unique identifier for this watch.
927  *
928  * \return  0 on success.  Otherwise an errno indicating the
929  *          cause of failure.
930  */
931 static int
932 xs_unwatch(const char *path, const char *token)
933 {
934 	struct iovec iov[2];
935 
936 	iov[0].iov_base = (void *)(uintptr_t) path;
937 	iov[0].iov_len = strlen(path) + 1;
938 	iov[1].iov_base = (void *)(uintptr_t) token;
939 	iov[1].iov_len = strlen(token) + 1;
940 
941 	return (xs_talkv(XST_NIL, XS_UNWATCH, iov, 2, NULL, NULL));
942 }
943 
944 /**
945  * Convert from watch token (unique identifier) to the associated
946  * internal tracking structure for this watch.
947  *
948  * \param tocken  The unique identifier for the watch to find.
949  *
950  * \return  A pointer to the found watch structure or NULL.
951  */
952 static struct xs_watch *
953 find_watch(const char *token)
954 {
955 	struct xs_watch *i, *cmp;
956 
957 	cmp = (void *)strtoul(token, NULL, 16);
958 
959 	LIST_FOREACH(i, &xs.registered_watches, list)
960 		if (i == cmp)
961 			return (i);
962 
963 	return (NULL);
964 }
965 
966 /**
967  * Thread body of the XenStore watch event dispatch thread.
968  */
969 static void
970 xenwatch_thread(void *unused)
971 {
972 	struct xs_stored_msg *msg;
973 
974 	for (;;) {
975 		mtx_lock(&xs.watch_events_lock);
976 		while (TAILQ_EMPTY(&xs.watch_events))
977 			mtx_sleep(&xs.watch_events,
978 			    &xs.watch_events_lock,
979 			    PWAIT | PCATCH, "waitev", hz/10);
980 
981 		mtx_unlock(&xs.watch_events_lock);
982 		sx_xlock(&xs.xenwatch_mutex);
983 
984 		mtx_lock(&xs.watch_events_lock);
985 		msg = TAILQ_FIRST(&xs.watch_events);
986 		if (msg) {
987 			TAILQ_REMOVE(&xs.watch_events, msg, list);
988 			msg->u.watch.handle->pending--;
989 		}
990 		mtx_unlock(&xs.watch_events_lock);
991 
992 		if (msg != NULL) {
993 			/*
994 			 * XXX There are messages coming in with a NULL
995 			 * XXX callback.  This deserves further investigation;
996 			 * XXX the workaround here simply prevents the kernel
997 			 * XXX from panic'ing on startup.
998 			 */
999 			if (msg->u.watch.handle->callback != NULL)
1000 				msg->u.watch.handle->callback(
1001 					msg->u.watch.handle,
1002 					(const char **)msg->u.watch.vec,
1003 					msg->u.watch.vec_size);
1004 			free(msg->u.watch.vec, M_XENSTORE);
1005 			free(msg, M_XENSTORE);
1006 		}
1007 
1008 		sx_xunlock(&xs.xenwatch_mutex);
1009 	}
1010 }
1011 
1012 /*----------- XenStore Configuration, Initialization, and Control ------------*/
1013 /**
1014  * Setup communication channels with the XenStore service.
1015  *
1016  * \return  On success, 0. Otherwise an errno value indicating the
1017  *          type of failure.
1018  */
1019 static int
1020 xs_init_comms(void)
1021 {
1022 	int error;
1023 
1024 	if (xen_store->rsp_prod != xen_store->rsp_cons) {
1025 		log(LOG_WARNING, "XENSTORE response ring is not quiescent "
1026 		    "(%08x:%08x): fixing up\n",
1027 		    xen_store->rsp_cons, xen_store->rsp_prod);
1028 		xen_store->rsp_cons = xen_store->rsp_prod;
1029 	}
1030 
1031 	xen_intr_unbind(&xs.xen_intr_handle);
1032 
1033 	error = xen_intr_bind_local_port(xs.xs_dev, xs.evtchn,
1034 	    /*filter*/NULL, xs_intr, /*arg*/NULL, INTR_TYPE_NET|INTR_MPSAFE,
1035 	    &xs.xen_intr_handle);
1036 	if (error) {
1037 		log(LOG_WARNING, "XENSTORE request irq failed %i\n", error);
1038 		return (error);
1039 	}
1040 
1041 	return (0);
1042 }
1043 
1044 /*------------------ Private Device Attachment Functions  --------------------*/
1045 static void
1046 xs_identify(driver_t *driver, device_t parent)
1047 {
1048 
1049 	BUS_ADD_CHILD(parent, 0, "xenstore", 0);
1050 }
1051 
1052 /**
1053  * Probe for the existence of the XenStore.
1054  *
1055  * \param dev
1056  */
1057 static int
1058 xs_probe(device_t dev)
1059 {
1060 	/*
1061 	 * We are either operating within a PV kernel or being probed
1062 	 * as the child of the successfully attached xenpci device.
1063 	 * Thus we are in a Xen environment and there will be a XenStore.
1064 	 * Unconditionally return success.
1065 	 */
1066 	device_set_desc(dev, "XenStore");
1067 	return (BUS_PROBE_NOWILDCARD);
1068 }
1069 
1070 static void
1071 xs_attach_deferred(void *arg)
1072 {
1073 
1074 	bus_generic_probe(xs.xs_dev);
1075 	bus_generic_attach(xs.xs_dev);
1076 
1077 	config_intrhook_disestablish(&xs.xs_attachcb);
1078 }
1079 
1080 static void
1081 xs_attach_late(void *arg, int pending)
1082 {
1083 
1084 	KASSERT((pending == 1), ("xs late attach queued several times"));
1085 	bus_generic_probe(xs.xs_dev);
1086 	bus_generic_attach(xs.xs_dev);
1087 }
1088 
1089 /**
1090  * Attach to the XenStore.
1091  *
1092  * This routine also prepares for the probe/attach of drivers that rely
1093  * on the XenStore.
1094  */
1095 static int
1096 xs_attach(device_t dev)
1097 {
1098 	int error;
1099 
1100 	/* Allow us to get device_t from softc and vice-versa. */
1101 	xs.xs_dev = dev;
1102 	device_set_softc(dev, &xs);
1103 
1104 	/* Initialize the interface to xenstore. */
1105 	struct proc *p;
1106 
1107 	xs.initialized = false;
1108 	xs.evtchn = xen_get_xenstore_evtchn();
1109 	if (xs.evtchn == 0) {
1110 		struct evtchn_alloc_unbound alloc_unbound;
1111 
1112 		/* Allocate a local event channel for xenstore */
1113 		alloc_unbound.dom = DOMID_SELF;
1114 		alloc_unbound.remote_dom = DOMID_SELF;
1115 		error = HYPERVISOR_event_channel_op(
1116 		    EVTCHNOP_alloc_unbound, &alloc_unbound);
1117 		if (error != 0)
1118 			panic(
1119 			   "unable to alloc event channel for Dom0: %d",
1120 			    error);
1121 
1122 		xs.evtchn = alloc_unbound.port;
1123 
1124 		/* Allocate memory for the xs shared ring */
1125 		xen_store = malloc(PAGE_SIZE, M_XENSTORE, M_WAITOK | M_ZERO);
1126 		xs.gpfn = atop(pmap_kextract((vm_offset_t)xen_store));
1127 	} else {
1128 		xs.gpfn = xen_get_xenstore_mfn();
1129 		xen_store = pmap_mapdev_attr(ptoa(xs.gpfn), PAGE_SIZE,
1130 		    VM_MEMATTR_XEN);
1131 		xs.initialized = true;
1132 	}
1133 
1134 	TAILQ_INIT(&xs.reply_list);
1135 	TAILQ_INIT(&xs.watch_events);
1136 
1137 	mtx_init(&xs.ring_lock, "ring lock", NULL, MTX_DEF);
1138 	mtx_init(&xs.reply_lock, "reply lock", NULL, MTX_DEF);
1139 	sx_init(&xs.xenwatch_mutex, "xenwatch");
1140 	sx_init(&xs.request_mutex, "xenstore request");
1141 	mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF);
1142 	mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF);
1143 
1144 	/* Initialize the shared memory rings to talk to xenstored */
1145 	error = xs_init_comms();
1146 	if (error)
1147 		return (error);
1148 
1149 	error = kproc_create(xenwatch_thread, NULL, &p, RFHIGHPID,
1150 	    0, "xenwatch");
1151 	if (error)
1152 		return (error);
1153 	xs.xenwatch_pid = p->p_pid;
1154 
1155 	error = kproc_create(xs_rcv_thread, NULL, NULL,
1156 	    RFHIGHPID, 0, "xenstore_rcv");
1157 
1158 	xs.xs_attachcb.ich_func = xs_attach_deferred;
1159 	xs.xs_attachcb.ich_arg = NULL;
1160 	if (xs.initialized) {
1161 		config_intrhook_establish(&xs.xs_attachcb);
1162 	} else {
1163 		TASK_INIT(&xs.xs_late_init, 0, xs_attach_late, NULL);
1164 	}
1165 
1166 	return (error);
1167 }
1168 
1169 /**
1170  * Prepare for suspension of this VM by halting XenStore access after
1171  * all transactions and individual requests have completed.
1172  */
1173 static int
1174 xs_suspend(device_t dev)
1175 {
1176 	int error;
1177 
1178 	/* Suspend child Xen devices. */
1179 	error = bus_generic_suspend(dev);
1180 	if (error != 0)
1181 		return (error);
1182 
1183 	sx_xlock(&xs.request_mutex);
1184 
1185 	return (0);
1186 }
1187 
1188 /**
1189  * Resume XenStore operations after this VM is resumed.
1190  */
1191 static int
1192 xs_resume(device_t dev __unused)
1193 {
1194 	struct xs_watch *watch;
1195 	char token[sizeof(watch) * 2 + 1];
1196 
1197 	xs_init_comms();
1198 
1199 	sx_xunlock(&xs.request_mutex);
1200 
1201 	/*
1202 	 * NB: since xenstore childs have not been resumed yet, there's
1203 	 * no need to hold any watch mutex. Having clients try to add or
1204 	 * remove watches at this point (before xenstore is resumed) is
1205 	 * clearly a violantion of the resume order.
1206 	 */
1207 	LIST_FOREACH(watch, &xs.registered_watches, list) {
1208 		sprintf(token, "%lX", (long)watch);
1209 		xs_watch(watch->node, token);
1210 	}
1211 
1212 	/* Resume child Xen devices. */
1213 	bus_generic_resume(dev);
1214 
1215 	return (0);
1216 }
1217 
1218 /*-------------------- Private Device Attachment Data  -----------------------*/
1219 static device_method_t xenstore_methods[] = {
1220 	/* Device interface */
1221 	DEVMETHOD(device_identify,	xs_identify),
1222 	DEVMETHOD(device_probe,         xs_probe),
1223 	DEVMETHOD(device_attach,        xs_attach),
1224 	DEVMETHOD(device_detach,        bus_generic_detach),
1225 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
1226 	DEVMETHOD(device_suspend,       xs_suspend),
1227 	DEVMETHOD(device_resume,        xs_resume),
1228 
1229 	/* Bus interface */
1230 	DEVMETHOD(bus_add_child,        bus_generic_add_child),
1231 	DEVMETHOD(bus_alloc_resource,   bus_generic_alloc_resource),
1232 	DEVMETHOD(bus_release_resource, bus_generic_release_resource),
1233 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
1234 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
1235 
1236 	DEVMETHOD_END
1237 };
1238 
1239 DEFINE_CLASS_0(xenstore, xenstore_driver, xenstore_methods, 0);
1240 
1241 DRIVER_MODULE(xenstore, xenpv, xenstore_driver, 0, 0);
1242 
1243 /*------------------------------- Sysctl Data --------------------------------*/
1244 /* XXX Shouldn't the node be somewhere else? */
1245 SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1246     "Xen");
1247 SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xs.evtchn, 0, "");
1248 SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, "");
1249 
1250 /*-------------------------------- Public API --------------------------------*/
1251 /*------- API comments for these methods can be found in xenstorevar.h -------*/
1252 bool
1253 xs_initialized(void)
1254 {
1255 
1256 	return (xs.initialized);
1257 }
1258 
1259 evtchn_port_t
1260 xs_evtchn(void)
1261 {
1262 
1263     return (xs.evtchn);
1264 }
1265 
1266 vm_paddr_t
1267 xs_address(void)
1268 {
1269 
1270     return (ptoa(xs.gpfn));
1271 }
1272 
1273 int
1274 xs_directory(struct xs_transaction t, const char *dir, const char *node,
1275     u_int *num, const char ***result)
1276 {
1277 	struct sbuf *path;
1278 	char *strings;
1279 	u_int len = 0;
1280 	int error;
1281 
1282 	path = xs_join(dir, node);
1283 	error = xs_single(t, XS_DIRECTORY, sbuf_data(path), &len,
1284 	    (void **)&strings);
1285 	sbuf_delete(path);
1286 	if (error)
1287 		return (error);
1288 
1289 	*result = split(strings, len, num);
1290 
1291 	return (0);
1292 }
1293 
1294 int
1295 xs_exists(struct xs_transaction t, const char *dir, const char *node)
1296 {
1297 	const char **d;
1298 	int error, dir_n;
1299 
1300 	error = xs_directory(t, dir, node, &dir_n, &d);
1301 	if (error)
1302 		return (0);
1303 	free(d, M_XENSTORE);
1304 	return (1);
1305 }
1306 
1307 int
1308 xs_read(struct xs_transaction t, const char *dir, const char *node,
1309     u_int *len, void **result)
1310 {
1311 	struct sbuf *path;
1312 	void *ret;
1313 	int error;
1314 
1315 	path = xs_join(dir, node);
1316 	error = xs_single(t, XS_READ, sbuf_data(path), len, &ret);
1317 	sbuf_delete(path);
1318 	if (error)
1319 		return (error);
1320 	*result = ret;
1321 	return (0);
1322 }
1323 
1324 int
1325 xs_write(struct xs_transaction t, const char *dir, const char *node,
1326     const char *string)
1327 {
1328 	struct sbuf *path;
1329 	struct iovec iovec[2];
1330 	int error;
1331 
1332 	path = xs_join(dir, node);
1333 
1334 	iovec[0].iov_base = (void *)(uintptr_t) sbuf_data(path);
1335 	iovec[0].iov_len = sbuf_len(path) + 1;
1336 	iovec[1].iov_base = (void *)(uintptr_t) string;
1337 	iovec[1].iov_len = strlen(string);
1338 
1339 	error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
1340 	sbuf_delete(path);
1341 
1342 	return (error);
1343 }
1344 
1345 int
1346 xs_mkdir(struct xs_transaction t, const char *dir, const char *node)
1347 {
1348 	struct sbuf *path;
1349 	int ret;
1350 
1351 	path = xs_join(dir, node);
1352 	ret = xs_single(t, XS_MKDIR, sbuf_data(path), NULL, NULL);
1353 	sbuf_delete(path);
1354 
1355 	return (ret);
1356 }
1357 
1358 int
1359 xs_rm(struct xs_transaction t, const char *dir, const char *node)
1360 {
1361 	struct sbuf *path;
1362 	int ret;
1363 
1364 	path = xs_join(dir, node);
1365 	ret = xs_single(t, XS_RM, sbuf_data(path), NULL, NULL);
1366 	sbuf_delete(path);
1367 
1368 	return (ret);
1369 }
1370 
1371 int
1372 xs_rm_tree(struct xs_transaction xbt, const char *base, const char *node)
1373 {
1374 	struct xs_transaction local_xbt;
1375 	struct sbuf *root_path_sbuf;
1376 	struct sbuf *cur_path_sbuf;
1377 	char *root_path;
1378 	char *cur_path;
1379 	const char **dir;
1380 	int error;
1381 
1382 retry:
1383 	root_path_sbuf = xs_join(base, node);
1384 	cur_path_sbuf  = xs_join(base, node);
1385 	root_path      = sbuf_data(root_path_sbuf);
1386 	cur_path       = sbuf_data(cur_path_sbuf);
1387 	dir            = NULL;
1388 	local_xbt.id   = 0;
1389 
1390 	if (xbt.id == 0) {
1391 		error = xs_transaction_start(&local_xbt);
1392 		if (error != 0)
1393 			goto out;
1394 		xbt = local_xbt;
1395 	}
1396 
1397 	while (1) {
1398 		u_int count;
1399 		u_int i;
1400 
1401 		error = xs_directory(xbt, cur_path, "", &count, &dir);
1402 		if (error)
1403 			goto out;
1404 
1405 		for (i = 0; i < count; i++) {
1406 			error = xs_rm(xbt, cur_path, dir[i]);
1407 			if (error == ENOTEMPTY) {
1408 				struct sbuf *push_dir;
1409 
1410 				/*
1411 				 * Descend to clear out this sub directory.
1412 				 * We'll return to cur_dir once push_dir
1413 				 * is empty.
1414 				 */
1415 				push_dir = xs_join(cur_path, dir[i]);
1416 				sbuf_delete(cur_path_sbuf);
1417 				cur_path_sbuf = push_dir;
1418 				cur_path = sbuf_data(cur_path_sbuf);
1419 				break;
1420 			} else if (error != 0) {
1421 				goto out;
1422 			}
1423 		}
1424 
1425 		free(dir, M_XENSTORE);
1426 		dir = NULL;
1427 
1428 		if (i == count) {
1429 			char *last_slash;
1430 
1431 			/* Directory is empty.  It is now safe to remove. */
1432 			error = xs_rm(xbt, cur_path, "");
1433 			if (error != 0)
1434 				goto out;
1435 
1436 			if (!strcmp(cur_path, root_path))
1437 				break;
1438 
1439 			/* Return to processing the parent directory. */
1440 			last_slash = strrchr(cur_path, '/');
1441 			KASSERT(last_slash != NULL,
1442 				("xs_rm_tree: mangled path %s", cur_path));
1443 			*last_slash = '\0';
1444 		}
1445 	}
1446 
1447 out:
1448 	sbuf_delete(cur_path_sbuf);
1449 	sbuf_delete(root_path_sbuf);
1450 	if (dir != NULL)
1451 		free(dir, M_XENSTORE);
1452 
1453 	if (local_xbt.id != 0) {
1454 		int terror;
1455 
1456 		terror = xs_transaction_end(local_xbt, /*abort*/error != 0);
1457 		xbt.id = 0;
1458 		if (terror == EAGAIN && error == 0)
1459 			goto retry;
1460 	}
1461 	return (error);
1462 }
1463 
1464 int
1465 xs_transaction_start(struct xs_transaction *t)
1466 {
1467 	char *id_str;
1468 	int error;
1469 
1470 	error = xs_single(XST_NIL, XS_TRANSACTION_START, "", NULL,
1471 	    (void **)&id_str);
1472 	if (error == 0) {
1473 		t->id = strtoul(id_str, NULL, 0);
1474 		free(id_str, M_XENSTORE);
1475 	}
1476 	return (error);
1477 }
1478 
1479 int
1480 xs_transaction_end(struct xs_transaction t, int abort)
1481 {
1482 	char abortstr[2];
1483 
1484 	if (abort)
1485 		strcpy(abortstr, "F");
1486 	else
1487 		strcpy(abortstr, "T");
1488 
1489 	return (xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL));
1490 }
1491 
1492 int
1493 xs_scanf(struct xs_transaction t, const char *dir, const char *node,
1494      int *scancountp, const char *fmt, ...)
1495 {
1496 	va_list ap;
1497 	int error, ns;
1498 	char *val;
1499 
1500 	error = xs_read(t, dir, node, NULL, (void **) &val);
1501 	if (error)
1502 		return (error);
1503 
1504 	va_start(ap, fmt);
1505 	ns = vsscanf(val, fmt, ap);
1506 	va_end(ap);
1507 	free(val, M_XENSTORE);
1508 	/* Distinctive errno. */
1509 	if (ns == 0)
1510 		return (ERANGE);
1511 	if (scancountp)
1512 		*scancountp = ns;
1513 	return (0);
1514 }
1515 
1516 int
1517 xs_vprintf(struct xs_transaction t,
1518     const char *dir, const char *node, const char *fmt, va_list ap)
1519 {
1520 	struct sbuf *sb;
1521 	int error;
1522 
1523 	sb = sbuf_new_auto();
1524 	sbuf_vprintf(sb, fmt, ap);
1525 	sbuf_finish(sb);
1526 	error = xs_write(t, dir, node, sbuf_data(sb));
1527 	sbuf_delete(sb);
1528 
1529 	return (error);
1530 }
1531 
1532 int
1533 xs_printf(struct xs_transaction t, const char *dir, const char *node,
1534      const char *fmt, ...)
1535 {
1536 	va_list ap;
1537 	int error;
1538 
1539 	va_start(ap, fmt);
1540 	error = xs_vprintf(t, dir, node, fmt, ap);
1541 	va_end(ap);
1542 
1543 	return (error);
1544 }
1545 
1546 int
1547 xs_gather(struct xs_transaction t, const char *dir, ...)
1548 {
1549 	va_list ap;
1550 	const char *name;
1551 	int error;
1552 
1553 	va_start(ap, dir);
1554 	error = 0;
1555 	while (error == 0 && (name = va_arg(ap, char *)) != NULL) {
1556 		const char *fmt = va_arg(ap, char *);
1557 		void *result = va_arg(ap, void *);
1558 		char *p;
1559 
1560 		error = xs_read(t, dir, name, NULL, (void **) &p);
1561 		if (error)
1562 			break;
1563 
1564 		if (fmt) {
1565 			if (sscanf(p, fmt, result) == 0)
1566 				error = EINVAL;
1567 			free(p, M_XENSTORE);
1568 		} else
1569 			*(char **)result = p;
1570 	}
1571 	va_end(ap);
1572 
1573 	return (error);
1574 }
1575 
1576 int
1577 xs_register_watch(struct xs_watch *watch)
1578 {
1579 	/* Pointer in ascii is the token. */
1580 	char token[sizeof(watch) * 2 + 1];
1581 	int error;
1582 
1583 	watch->pending = 0;
1584 	sprintf(token, "%lX", (long)watch);
1585 
1586 	mtx_lock(&xs.registered_watches_lock);
1587 	KASSERT(find_watch(token) == NULL, ("watch already registered"));
1588 	LIST_INSERT_HEAD(&xs.registered_watches, watch, list);
1589 	mtx_unlock(&xs.registered_watches_lock);
1590 
1591 	error = xs_watch(watch->node, token);
1592 
1593 	/* Ignore errors due to multiple registration. */
1594 	if (error == EEXIST)
1595 		error = 0;
1596 
1597 	if (error != 0) {
1598 		mtx_lock(&xs.registered_watches_lock);
1599 		LIST_REMOVE(watch, list);
1600 		mtx_unlock(&xs.registered_watches_lock);
1601 	}
1602 
1603 	return (error);
1604 }
1605 
1606 void
1607 xs_unregister_watch(struct xs_watch *watch)
1608 {
1609 	struct xs_stored_msg *msg, *tmp;
1610 	char token[sizeof(watch) * 2 + 1];
1611 	int error;
1612 
1613 	sprintf(token, "%lX", (long)watch);
1614 
1615 	mtx_lock(&xs.registered_watches_lock);
1616 	if (find_watch(token) == NULL) {
1617 		mtx_unlock(&xs.registered_watches_lock);
1618 		return;
1619 	}
1620 	LIST_REMOVE(watch, list);
1621 	mtx_unlock(&xs.registered_watches_lock);
1622 
1623 	error = xs_unwatch(watch->node, token);
1624 	if (error)
1625 		log(LOG_WARNING, "XENSTORE Failed to release watch %s: %i\n",
1626 		    watch->node, error);
1627 
1628 	/* Cancel pending watch events. */
1629 	mtx_lock(&xs.watch_events_lock);
1630 	TAILQ_FOREACH_SAFE(msg, &xs.watch_events, list, tmp) {
1631 		if (msg->u.watch.handle != watch)
1632 			continue;
1633 		TAILQ_REMOVE(&xs.watch_events, msg, list);
1634 		free(msg->u.watch.vec, M_XENSTORE);
1635 		free(msg, M_XENSTORE);
1636 	}
1637 	mtx_unlock(&xs.watch_events_lock);
1638 
1639 	/* Flush any currently-executing callback, unless we are it. :-) */
1640 	if (curproc->p_pid != xs.xenwatch_pid) {
1641 		sx_xlock(&xs.xenwatch_mutex);
1642 		sx_xunlock(&xs.xenwatch_mutex);
1643 	}
1644 }
1645 
1646 void
1647 xs_lock(void)
1648 {
1649 
1650 	sx_xlock(&xs.request_mutex);
1651 	return;
1652 }
1653 
1654 void
1655 xs_unlock(void)
1656 {
1657 
1658 	sx_xunlock(&xs.request_mutex);
1659 	return;
1660 }
1661