xref: /freebsd/sys/dev/xen/xenstore/xenstore.c (revision 6b96125afdf245ae61dd82b59891ad0d6aab0066)
1 /******************************************************************************
2  * xenstore.c
3  *
4  * Low-level kernel interface to the XenStore.
5  *
6  * Copyright (C) 2005 Rusty Russell, IBM Corporation
7  * Copyright (C) 2009,2010 Spectra Logic Corporation
8  *
9  * This file may be distributed separately from the Linux kernel, or
10  * incorporated into other software packages, subject to the following license:
11  *
12  * Permission is hereby granted, free of charge, to any person obtaining a copy
13  * of this source file (the "Software"), to deal in the Software without
14  * restriction, including without limitation the rights to use, copy, modify,
15  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16  * and to permit persons to whom the Software is furnished to do so, subject to
17  * the following conditions:
18  *
19  * The above copyright notice and this permission notice shall be included in
20  * all copies or substantial portions of the Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28  * IN THE SOFTWARE.
29  */
30 
31 #include <sys/param.h>
32 #include <sys/bus.h>
33 #include <sys/kernel.h>
34 #include <sys/lock.h>
35 #include <sys/module.h>
36 #include <sys/mutex.h>
37 #include <sys/sx.h>
38 #include <sys/syslog.h>
39 #include <sys/malloc.h>
40 #include <sys/systm.h>
41 #include <sys/proc.h>
42 #include <sys/kthread.h>
43 #include <sys/sbuf.h>
44 #include <sys/sysctl.h>
45 #include <sys/uio.h>
46 #include <sys/unistd.h>
47 #include <sys/queue.h>
48 #include <sys/taskqueue.h>
49 
50 #include <machine/stdarg.h>
51 
52 #include <xen/xen-os.h>
53 #include <xen/hypervisor.h>
54 #include <xen/xen_intr.h>
55 
56 #include <contrib/xen/hvm/params.h>
57 #include <xen/hvm.h>
58 
59 #include <xen/xenstore/xenstorevar.h>
60 #include <xen/xenstore/xenstore_internal.h>
61 
62 #include <vm/vm.h>
63 #include <vm/pmap.h>
64 
65 /**
66  * \file xenstore.c
67  * \brief XenStore interface
68  *
69  * The XenStore interface is a simple storage system that is a means of
70  * communicating state and configuration data between the Xen Domain 0
71  * and the various guest domains.  All configuration data other than
72  * a small amount of essential information required during the early
73  * boot process of launching a Xen aware guest, is managed using the
74  * XenStore.
75  *
76  * The XenStore is ASCII string based, and has a structure and semantics
77  * similar to a filesystem.  There are files and directories, the directories
78  * able to contain files or other directories.  The depth of the hierarchy
79  * is only limited by the XenStore's maximum path length.
80  *
81  * The communication channel between the XenStore service and other
82  * domains is via two, guest specific, ring buffers in a shared memory
83  * area.  One ring buffer is used for communicating in each direction.
84  * The grant table references for this shared memory are given to the
85  * guest either via the xen_start_info structure for a fully para-
86  * virtualized guest, or via HVM hypercalls for a hardware virtualized
87  * guest.
88  *
89  * The XenStore communication relies on an event channel and thus
90  * interrupts.  For this reason, the attachment of the XenStore
91  * relies on an interrupt driven configuration hook to hold off
92  * boot processing until communication with the XenStore service
93  * can be established.
94  *
95  * Several Xen services depend on the XenStore, most notably the
96  * XenBus used to discover and manage Xen devices.  These services
97  * are implemented as NewBus child attachments to a bus exported
98  * by this XenStore driver.
99  */
100 
101 static struct xs_watch *find_watch(const char *token);
102 
103 MALLOC_DEFINE(M_XENSTORE, "xenstore", "XenStore data and results");
104 
105 /**
106  * Pointer to shared memory communication structures allowing us
107  * to communicate with the XenStore service.
108  *
109  * When operating in full PV mode, this pointer is set early in kernel
110  * startup from within xen_machdep.c.  In HVM mode, we use hypercalls
111  * to get the guest frame number for the shared page and then map it
112  * into kva.  See xs_init() for details.
113  */
114 static struct xenstore_domain_interface *xen_store;
115 
116 /*-------------------------- Private Data Structures ------------------------*/
117 
118 /**
119  * Structure capturing messages received from the XenStore service.
120  */
121 struct xs_stored_msg {
122 	TAILQ_ENTRY(xs_stored_msg) list;
123 
124 	struct xsd_sockmsg hdr;
125 
126 	union {
127 		/* Queued replies. */
128 		struct {
129 			char *body;
130 		} reply;
131 
132 		/* Queued watch events. */
133 		struct {
134 			struct xs_watch *handle;
135 			const char **vec;
136 			u_int vec_size;
137 		} watch;
138 	} u;
139 };
140 TAILQ_HEAD(xs_stored_msg_list, xs_stored_msg);
141 
142 /**
143  * Container for all XenStore related state.
144  */
145 struct xs_softc {
146 	/** Newbus device for the XenStore. */
147 	device_t xs_dev;
148 
149 	/**
150 	 * Lock serializing access to ring producer/consumer
151 	 * indexes.  Use of this lock guarantees that wakeups
152 	 * of blocking readers/writers are not missed due to
153 	 * races with the XenStore service.
154 	 */
155 	struct mtx ring_lock;
156 
157 	/*
158 	 * Mutex used to insure exclusive access to the outgoing
159 	 * communication ring.  We use a lock type that can be
160 	 * held while sleeping so that xs_write() can block waiting
161 	 * for space in the ring to free up, without allowing another
162 	 * writer to come in and corrupt a partial message write.
163 	 */
164 	struct sx request_mutex;
165 
166 	/**
167 	 * A list of replies to our requests.
168 	 *
169 	 * The reply list is filled by xs_rcv_thread().  It
170 	 * is consumed by the context that issued the request
171 	 * to which a reply is made.  The requester blocks in
172 	 * xs_read_reply().
173 	 *
174 	 * /note Only one requesting context can be active at a time.
175 	 *       This is guaranteed by the request_mutex and insures
176 	 *	 that the requester sees replies matching the order
177 	 *	 of its requests.
178 	 */
179 	struct xs_stored_msg_list reply_list;
180 
181 	/** Lock protecting the reply list. */
182 	struct mtx reply_lock;
183 
184 	/**
185 	 * List of registered watches.
186 	 */
187 	struct xs_watch_list  registered_watches;
188 
189 	/** Lock protecting the registered watches list. */
190 	struct mtx registered_watches_lock;
191 
192 	/**
193 	 * List of pending watch callback events.
194 	 */
195 	struct xs_stored_msg_list watch_events;
196 
197 	/** Lock protecting the watch calback list. */
198 	struct mtx watch_events_lock;
199 
200 	/**
201 	 * The processid of the xenwatch thread.
202 	 */
203 	pid_t xenwatch_pid;
204 
205 	/**
206 	 * Sleepable mutex used to gate the execution of XenStore
207 	 * watch event callbacks.
208 	 *
209 	 * xenwatch_thread holds an exclusive lock on this mutex
210 	 * while delivering event callbacks, and xenstore_unregister_watch()
211 	 * uses an exclusive lock of this mutex to guarantee that no
212 	 * callbacks of the just unregistered watch are pending
213 	 * before returning to its caller.
214 	 */
215 	struct sx xenwatch_mutex;
216 
217 	/**
218 	 * The HVM guest pseudo-physical frame number.  This is Xen's mapping
219 	 * of the true machine frame number into our "physical address space".
220 	 */
221 	unsigned long gpfn;
222 
223 	/**
224 	 * The event channel for communicating with the
225 	 * XenStore service.
226 	 */
227 	int evtchn;
228 
229 	/** Handle for XenStore interrupts. */
230 	xen_intr_handle_t xen_intr_handle;
231 
232 	/**
233 	 * Interrupt driven config hook allowing us to defer
234 	 * attaching children until interrupts (and thus communication
235 	 * with the XenStore service) are available.
236 	 */
237 	struct intr_config_hook xs_attachcb;
238 
239 	/**
240 	 * Xenstore is a user-space process that usually runs in Dom0,
241 	 * so if this domain is booting as Dom0, xenstore wont we accessible,
242 	 * and we have to defer the initialization of xenstore related
243 	 * devices to later (when xenstore is started).
244 	 */
245 	bool initialized;
246 
247 	/**
248 	 * Task to run when xenstore is initialized (Dom0 only), will
249 	 * take care of attaching xenstore related devices.
250 	 */
251 	struct task xs_late_init;
252 };
253 
254 /*-------------------------------- Global Data ------------------------------*/
255 static struct xs_softc xs;
256 
257 /*------------------------- Private Utility Functions -----------------------*/
258 
259 /**
260  * Count and optionally record pointers to a number of NUL terminated
261  * strings in a buffer.
262  *
263  * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
264  * \param dest	   An array to store pointers to each string found in strings.
265  * \param len	   The length of the buffer pointed to by strings.
266  *
267  * \return  A count of the number of strings found.
268  */
269 static u_int
270 extract_strings(const char *strings, const char **dest, u_int len)
271 {
272 	u_int num;
273 	const char *p;
274 
275 	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) {
276 		if (dest != NULL)
277 			*dest++ = p;
278 		num++;
279 	}
280 
281 	return (num);
282 }
283 
284 /**
285  * Convert a contiguous buffer containing a series of NUL terminated
286  * strings into an array of pointers to strings.
287  *
288  * The returned pointer references the array of string pointers which
289  * is followed by the storage for the string data.  It is the client's
290  * responsibility to free this storage.
291  *
292  * The storage addressed by strings is free'd prior to split returning.
293  *
294  * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
295  * \param len	   The length of the buffer pointed to by strings.
296  * \param num	   The number of strings found and returned in the strings
297  *                 array.
298  *
299  * \return  An array of pointers to the strings found in the input buffer.
300  */
301 static const char **
302 split(char *strings, u_int len, u_int *num)
303 {
304 	const char **ret;
305 
306 	/* Protect against unterminated buffers. */
307 	if (len > 0)
308 		strings[len - 1] = '\0';
309 
310 	/* Count the strings. */
311 	*num = extract_strings(strings, /*dest*/NULL, len);
312 
313 	/* Transfer to one big alloc for easy freeing by the caller. */
314 	ret = malloc(*num * sizeof(char *) + len, M_XENSTORE, M_WAITOK);
315 	memcpy(&ret[*num], strings, len);
316 	free(strings, M_XENSTORE);
317 
318 	/* Extract pointers to newly allocated array. */
319 	strings = (char *)&ret[*num];
320 	(void)extract_strings(strings, /*dest*/ret, len);
321 
322 	return (ret);
323 }
324 
325 /*------------------------- Public Utility Functions -------------------------*/
326 /*------- API comments for these methods can be found in xenstorevar.h -------*/
327 struct sbuf *
328 xs_join(const char *dir, const char *name)
329 {
330 	struct sbuf *sb;
331 
332 	sb = sbuf_new_auto();
333 	sbuf_cat(sb, dir);
334 	if (name[0] != '\0') {
335 		sbuf_putc(sb, '/');
336 		sbuf_cat(sb, name);
337 	}
338 	sbuf_finish(sb);
339 
340 	return (sb);
341 }
342 
343 /*-------------------- Low Level Communication Management --------------------*/
344 /**
345  * Interrupt handler for the XenStore event channel.
346  *
347  * XenStore reads and writes block on "xen_store" for buffer
348  * space.  Wakeup any blocking operations when the XenStore
349  * service has modified the queues.
350  */
351 static void
352 xs_intr(void * arg __unused /*__attribute__((unused))*/)
353 {
354 
355 	/* If xenstore has not been initialized, initialize it now */
356 	if (!xs.initialized) {
357 		xs.initialized = true;
358 		/*
359 		 * Since this task is probing and attaching devices we
360 		 * have to hold the Giant lock.
361 		 */
362 		taskqueue_enqueue(taskqueue_swi_giant, &xs.xs_late_init);
363 	}
364 
365 	/*
366 	 * Hold ring lock across wakeup so that clients
367 	 * cannot miss a wakeup.
368 	 */
369 	mtx_lock(&xs.ring_lock);
370 	wakeup(xen_store);
371 	mtx_unlock(&xs.ring_lock);
372 }
373 
374 /**
375  * Verify that the indexes for a ring are valid.
376  *
377  * The difference between the producer and consumer cannot
378  * exceed the size of the ring.
379  *
380  * \param cons  The consumer index for the ring to test.
381  * \param prod  The producer index for the ring to test.
382  *
383  * \retval 1  If indexes are in range.
384  * \retval 0  If the indexes are out of range.
385  */
386 static int
387 xs_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
388 {
389 
390 	return ((prod - cons) <= XENSTORE_RING_SIZE);
391 }
392 
393 /**
394  * Return a pointer to, and the length of, the contiguous
395  * free region available for output in a ring buffer.
396  *
397  * \param cons  The consumer index for the ring.
398  * \param prod  The producer index for the ring.
399  * \param buf   The base address of the ring's storage.
400  * \param len   The amount of contiguous storage available.
401  *
402  * \return  A pointer to the start location of the free region.
403  */
404 static void *
405 xs_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
406     char *buf, uint32_t *len)
407 {
408 
409 	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
410 	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
411 		*len = XENSTORE_RING_SIZE - (prod - cons);
412 	return (buf + MASK_XENSTORE_IDX(prod));
413 }
414 
415 /**
416  * Return a pointer to, and the length of, the contiguous
417  * data available to read from a ring buffer.
418  *
419  * \param cons  The consumer index for the ring.
420  * \param prod  The producer index for the ring.
421  * \param buf   The base address of the ring's storage.
422  * \param len   The amount of contiguous data available to read.
423  *
424  * \return  A pointer to the start location of the available data.
425  */
426 static const void *
427 xs_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
428     const char *buf, uint32_t *len)
429 {
430 
431 	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
432 	if ((prod - cons) < *len)
433 		*len = prod - cons;
434 	return (buf + MASK_XENSTORE_IDX(cons));
435 }
436 
437 /**
438  * Transmit data to the XenStore service.
439  *
440  * \param tdata  A pointer to the contiguous data to send.
441  * \param len    The amount of data to send.
442  *
443  * \return  On success 0, otherwise an errno value indicating the
444  *          cause of failure.
445  *
446  * \invariant  Called from thread context.
447  * \invariant  The buffer pointed to by tdata is at least len bytes
448  *             in length.
449  * \invariant  xs.request_mutex exclusively locked.
450  */
451 static int
452 xs_write_store(const void *tdata, unsigned len)
453 {
454 	XENSTORE_RING_IDX cons, prod;
455 	const char *data = (const char *)tdata;
456 	int error;
457 
458 	sx_assert(&xs.request_mutex, SX_XLOCKED);
459 	while (len != 0) {
460 		void *dst;
461 		u_int avail;
462 
463 		/* Hold lock so we can't miss wakeups should we block. */
464 		mtx_lock(&xs.ring_lock);
465 		cons = xen_store->req_cons;
466 		prod = xen_store->req_prod;
467 		if ((prod - cons) == XENSTORE_RING_SIZE) {
468 			/*
469 			 * Output ring is full. Wait for a ring event.
470 			 *
471 			 * Note that the events from both queues
472 			 * are combined, so being woken does not
473 			 * guarantee that data exist in the read
474 			 * ring.
475 			 *
476 			 * To simplify error recovery and the retry,
477 			 * we specify PDROP so our lock is *not* held
478 			 * when msleep returns.
479 			 */
480 			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
481 			     "xbwrite", /*timeout*/0);
482 			if (error && error != EWOULDBLOCK)
483 				return (error);
484 
485 			/* Try again. */
486 			continue;
487 		}
488 		mtx_unlock(&xs.ring_lock);
489 
490 		/* Verify queue sanity. */
491 		if (!xs_check_indexes(cons, prod)) {
492 			xen_store->req_cons = xen_store->req_prod = 0;
493 			return (EIO);
494 		}
495 
496 		dst = xs_get_output_chunk(cons, prod, xen_store->req, &avail);
497 		if (avail > len)
498 			avail = len;
499 
500 		memcpy(dst, data, avail);
501 		data += avail;
502 		len -= avail;
503 
504 		/*
505 		 * The store to the producer index, which indicates
506 		 * to the other side that new data has arrived, must
507 		 * be visible only after our copy of the data into the
508 		 * ring has completed.
509 		 */
510 		wmb();
511 		xen_store->req_prod += avail;
512 
513 		/*
514 		 * xen_intr_signal() implies mb(). The other side will see
515 		 * the change to req_prod at the time of the interrupt.
516 		 */
517 		xen_intr_signal(xs.xen_intr_handle);
518 	}
519 
520 	return (0);
521 }
522 
523 /**
524  * Receive data from the XenStore service.
525  *
526  * \param tdata  A pointer to the contiguous buffer to receive the data.
527  * \param len    The amount of data to receive.
528  *
529  * \return  On success 0, otherwise an errno value indicating the
530  *          cause of failure.
531  *
532  * \invariant  Called from thread context.
533  * \invariant  The buffer pointed to by tdata is at least len bytes
534  *             in length.
535  *
536  * \note xs_read does not perform any internal locking to guarantee
537  *       serial access to the incoming ring buffer.  However, there
538  *	 is only one context processing reads: xs_rcv_thread().
539  */
540 static int
541 xs_read_store(void *tdata, unsigned len)
542 {
543 	XENSTORE_RING_IDX cons, prod;
544 	char *data = (char *)tdata;
545 	int error;
546 
547 	while (len != 0) {
548 		u_int avail;
549 		const char *src;
550 
551 		/* Hold lock so we can't miss wakeups should we block. */
552 		mtx_lock(&xs.ring_lock);
553 		cons = xen_store->rsp_cons;
554 		prod = xen_store->rsp_prod;
555 		if (cons == prod) {
556 			/*
557 			 * Nothing to read. Wait for a ring event.
558 			 *
559 			 * Note that the events from both queues
560 			 * are combined, so being woken does not
561 			 * guarantee that data exist in the read
562 			 * ring.
563 			 *
564 			 * To simplify error recovery and the retry,
565 			 * we specify PDROP so our lock is *not* held
566 			 * when msleep returns.
567 			 */
568 			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
569 			    "xbread", /*timeout*/0);
570 			if (error && error != EWOULDBLOCK)
571 				return (error);
572 			continue;
573 		}
574 		mtx_unlock(&xs.ring_lock);
575 
576 		/* Verify queue sanity. */
577 		if (!xs_check_indexes(cons, prod)) {
578 			xen_store->rsp_cons = xen_store->rsp_prod = 0;
579 			return (EIO);
580 		}
581 
582 		src = xs_get_input_chunk(cons, prod, xen_store->rsp, &avail);
583 		if (avail > len)
584 			avail = len;
585 
586 		/*
587 		 * Insure the data we read is related to the indexes
588 		 * we read above.
589 		 */
590 		rmb();
591 
592 		memcpy(data, src, avail);
593 		data += avail;
594 		len -= avail;
595 
596 		/*
597 		 * Insure that the producer of this ring does not see
598 		 * the ring space as free until after we have copied it
599 		 * out.
600 		 */
601 		mb();
602 		xen_store->rsp_cons += avail;
603 
604 		/*
605 		 * xen_intr_signal() implies mb(). The producer will see
606 		 * the updated consumer index when the event is delivered.
607 		 */
608 		xen_intr_signal(xs.xen_intr_handle);
609 	}
610 
611 	return (0);
612 }
613 
614 /*----------------------- Received Message Processing ------------------------*/
615 /**
616  * Block reading the next message from the XenStore service and
617  * process the result.
618  *
619  * \param type  The returned type of the XenStore message received.
620  *
621  * \return  0 on success.  Otherwise an errno value indicating the
622  *          type of failure encountered.
623  */
624 static int
625 xs_process_msg(enum xsd_sockmsg_type *type)
626 {
627 	struct xs_stored_msg *msg;
628 	char *body;
629 	int error;
630 
631 	msg = malloc(sizeof(*msg), M_XENSTORE, M_WAITOK);
632 	error = xs_read_store(&msg->hdr, sizeof(msg->hdr));
633 	if (error) {
634 		free(msg, M_XENSTORE);
635 		return (error);
636 	}
637 
638 	body = malloc(msg->hdr.len + 1, M_XENSTORE, M_WAITOK);
639 	error = xs_read_store(body, msg->hdr.len);
640 	if (error) {
641 		free(body, M_XENSTORE);
642 		free(msg, M_XENSTORE);
643 		return (error);
644 	}
645 	body[msg->hdr.len] = '\0';
646 
647 	*type = msg->hdr.type;
648 	if (msg->hdr.type == XS_WATCH_EVENT) {
649 		msg->u.watch.vec = split(body, msg->hdr.len,
650 		    &msg->u.watch.vec_size);
651 
652 		mtx_lock(&xs.registered_watches_lock);
653 		msg->u.watch.handle = find_watch(
654 		    msg->u.watch.vec[XS_WATCH_TOKEN]);
655 		mtx_lock(&xs.watch_events_lock);
656 		if (msg->u.watch.handle != NULL &&
657 		    (!msg->u.watch.handle->max_pending ||
658 		    msg->u.watch.handle->pending <
659 		    msg->u.watch.handle->max_pending)) {
660 			msg->u.watch.handle->pending++;
661 			TAILQ_INSERT_TAIL(&xs.watch_events, msg, list);
662 			wakeup(&xs.watch_events);
663 			mtx_unlock(&xs.watch_events_lock);
664 		} else {
665 			mtx_unlock(&xs.watch_events_lock);
666 			free(msg->u.watch.vec, M_XENSTORE);
667 			free(msg, M_XENSTORE);
668 		}
669 		mtx_unlock(&xs.registered_watches_lock);
670 	} else {
671 		msg->u.reply.body = body;
672 		mtx_lock(&xs.reply_lock);
673 		TAILQ_INSERT_TAIL(&xs.reply_list, msg, list);
674 		wakeup(&xs.reply_list);
675 		mtx_unlock(&xs.reply_lock);
676 	}
677 
678 	return (0);
679 }
680 
681 /**
682  * Thread body of the XenStore receive thread.
683  *
684  * This thread blocks waiting for data from the XenStore service
685  * and processes and received messages.
686  */
687 static void
688 xs_rcv_thread(void *arg __unused)
689 {
690 	int error;
691 	enum xsd_sockmsg_type type;
692 
693 	for (;;) {
694 		error = xs_process_msg(&type);
695 		if (error)
696 			printf("XENSTORE error %d while reading message\n",
697 			    error);
698 	}
699 }
700 
701 /*---------------- XenStore Message Request/Reply Processing -----------------*/
702 #define xsd_error_count	(sizeof(xsd_errors) / sizeof(xsd_errors[0]))
703 
704 /**
705  * Convert a XenStore error string into an errno number.
706  *
707  * \param errorstring  The error string to convert.
708  *
709  * \return  The errno best matching the input string.
710  *
711  * \note Unknown error strings are converted to EINVAL.
712  */
713 static int
714 xs_get_error(const char *errorstring)
715 {
716 	u_int i;
717 
718 	for (i = 0; i < xsd_error_count; i++) {
719 		if (!strcmp(errorstring, xsd_errors[i].errstring))
720 			return (xsd_errors[i].errnum);
721 	}
722 	log(LOG_WARNING, "XENSTORE xen store gave: unknown error %s",
723 	    errorstring);
724 	return (EINVAL);
725 }
726 
727 /**
728  * Block waiting for a reply to a message request.
729  *
730  * \param type	  The returned type of the reply.
731  * \param len	  The returned body length of the reply.
732  * \param result  The returned body of the reply.
733  *
734  * \return  0 on success.  Otherwise an errno indicating the
735  *          cause of failure.
736  */
737 static int
738 xs_read_reply(enum xsd_sockmsg_type *type, u_int *len, void **result)
739 {
740 	struct xs_stored_msg *msg;
741 	char *body;
742 	int error;
743 
744 	mtx_lock(&xs.reply_lock);
745 	while (TAILQ_EMPTY(&xs.reply_list)) {
746 		error = mtx_sleep(&xs.reply_list, &xs.reply_lock, 0, "xswait",
747 		    hz/10);
748 		if (error && error != EWOULDBLOCK) {
749 			mtx_unlock(&xs.reply_lock);
750 			return (error);
751 		}
752 	}
753 	msg = TAILQ_FIRST(&xs.reply_list);
754 	TAILQ_REMOVE(&xs.reply_list, msg, list);
755 	mtx_unlock(&xs.reply_lock);
756 
757 	*type = msg->hdr.type;
758 	if (len)
759 		*len = msg->hdr.len;
760 	body = msg->u.reply.body;
761 
762 	free(msg, M_XENSTORE);
763 	*result = body;
764 	return (0);
765 }
766 
767 /**
768  * Pass-thru interface for XenStore access by userland processes
769  * via the XenStore device.
770  *
771  * Reply type and length data are returned by overwriting these
772  * fields in the passed in request message.
773  *
774  * \param msg	  A properly formatted message to transmit to
775  *		  the XenStore service.
776  * \param result  The returned body of the reply.
777  *
778  * \return  0 on success.  Otherwise an errno indicating the cause
779  *          of failure.
780  *
781  * \note The returned result is provided in malloced storage and thus
782  *       must be free'd by the caller with 'free(result, M_XENSTORE);
783  */
784 int
785 xs_dev_request_and_reply(struct xsd_sockmsg *msg, void **result)
786 {
787 	int error;
788 
789 	sx_xlock(&xs.request_mutex);
790 	if ((error = xs_write_store(msg, sizeof(*msg) + msg->len)) == 0)
791 		error = xs_read_reply(&msg->type, &msg->len, result);
792 	sx_xunlock(&xs.request_mutex);
793 
794 	return (error);
795 }
796 
797 /**
798  * Send a message with an optionally muti-part body to the XenStore service.
799  *
800  * \param t              The transaction to use for this request.
801  * \param request_type   The type of message to send.
802  * \param iovec          Pointers to the body sections of the request.
803  * \param num_vecs       The number of body sections in the request.
804  * \param len            The returned length of the reply.
805  * \param result         The returned body of the reply.
806  *
807  * \return  0 on success.  Otherwise an errno indicating
808  *          the cause of failure.
809  *
810  * \note The returned result is provided in malloced storage and thus
811  *       must be free'd by the caller with 'free(*result, M_XENSTORE);
812  */
813 static int
814 xs_talkv(struct xs_transaction t, enum xsd_sockmsg_type request_type,
815     const struct iovec *iovec, u_int num_vecs, u_int *len, void **result)
816 {
817 	struct xsd_sockmsg msg;
818 	void *ret = NULL;
819 	u_int i;
820 	int error;
821 
822 	msg.tx_id = t.id;
823 	msg.req_id = 0;
824 	msg.type = request_type;
825 	msg.len = 0;
826 	for (i = 0; i < num_vecs; i++)
827 		msg.len += iovec[i].iov_len;
828 
829 	sx_xlock(&xs.request_mutex);
830 	error = xs_write_store(&msg, sizeof(msg));
831 	if (error) {
832 		printf("xs_talkv failed %d\n", error);
833 		goto error_lock_held;
834 	}
835 
836 	for (i = 0; i < num_vecs; i++) {
837 		error = xs_write_store(iovec[i].iov_base, iovec[i].iov_len);
838 		if (error) {
839 			printf("xs_talkv failed %d\n", error);
840 			goto error_lock_held;
841 		}
842 	}
843 
844 	error = xs_read_reply(&msg.type, len, &ret);
845 
846 error_lock_held:
847 	sx_xunlock(&xs.request_mutex);
848 	if (error)
849 		return (error);
850 
851 	if (msg.type == XS_ERROR) {
852 		error = xs_get_error(ret);
853 		free(ret, M_XENSTORE);
854 		return (error);
855 	}
856 
857 	/* Reply is either error or an echo of our request message type. */
858 	KASSERT(msg.type == request_type, ("bad xenstore message type"));
859 
860 	if (result)
861 		*result = ret;
862 	else
863 		free(ret, M_XENSTORE);
864 
865 	return (0);
866 }
867 
868 /**
869  * Wrapper for xs_talkv allowing easy transmission of a message with
870  * a single, contiguous, message body.
871  *
872  * \param t              The transaction to use for this request.
873  * \param request_type   The type of message to send.
874  * \param body           The body of the request.
875  * \param len            The returned length of the reply.
876  * \param result         The returned body of the reply.
877  *
878  * \return  0 on success.  Otherwise an errno indicating
879  *          the cause of failure.
880  *
881  * \note The returned result is provided in malloced storage and thus
882  *       must be free'd by the caller with 'free(*result, M_XENSTORE);
883  */
884 static int
885 xs_single(struct xs_transaction t, enum xsd_sockmsg_type request_type,
886     const char *body, u_int *len, void **result)
887 {
888 	struct iovec iovec;
889 
890 	iovec.iov_base = (void *)(uintptr_t)body;
891 	iovec.iov_len = strlen(body) + 1;
892 
893 	return (xs_talkv(t, request_type, &iovec, 1, len, result));
894 }
895 
896 /*------------------------- XenStore Watch Support ---------------------------*/
897 /**
898  * Transmit a watch request to the XenStore service.
899  *
900  * \param path    The path in the XenStore to watch.
901  * \param tocken  A unique identifier for this watch.
902  *
903  * \return  0 on success.  Otherwise an errno indicating the
904  *          cause of failure.
905  */
906 static int
907 xs_watch(const char *path, const char *token)
908 {
909 	struct iovec iov[2];
910 
911 	iov[0].iov_base = (void *)(uintptr_t) path;
912 	iov[0].iov_len = strlen(path) + 1;
913 	iov[1].iov_base = (void *)(uintptr_t) token;
914 	iov[1].iov_len = strlen(token) + 1;
915 
916 	return (xs_talkv(XST_NIL, XS_WATCH, iov, 2, NULL, NULL));
917 }
918 
919 /**
920  * Transmit an uwatch request to the XenStore service.
921  *
922  * \param path    The path in the XenStore to watch.
923  * \param tocken  A unique identifier for this watch.
924  *
925  * \return  0 on success.  Otherwise an errno indicating the
926  *          cause of failure.
927  */
928 static int
929 xs_unwatch(const char *path, const char *token)
930 {
931 	struct iovec iov[2];
932 
933 	iov[0].iov_base = (void *)(uintptr_t) path;
934 	iov[0].iov_len = strlen(path) + 1;
935 	iov[1].iov_base = (void *)(uintptr_t) token;
936 	iov[1].iov_len = strlen(token) + 1;
937 
938 	return (xs_talkv(XST_NIL, XS_UNWATCH, iov, 2, NULL, NULL));
939 }
940 
941 /**
942  * Convert from watch token (unique identifier) to the associated
943  * internal tracking structure for this watch.
944  *
945  * \param tocken  The unique identifier for the watch to find.
946  *
947  * \return  A pointer to the found watch structure or NULL.
948  */
949 static struct xs_watch *
950 find_watch(const char *token)
951 {
952 	struct xs_watch *i, *cmp;
953 
954 	cmp = (void *)strtoul(token, NULL, 16);
955 
956 	LIST_FOREACH(i, &xs.registered_watches, list)
957 		if (i == cmp)
958 			return (i);
959 
960 	return (NULL);
961 }
962 
963 /**
964  * Thread body of the XenStore watch event dispatch thread.
965  */
966 static void
967 xenwatch_thread(void *unused)
968 {
969 	struct xs_stored_msg *msg;
970 
971 	for (;;) {
972 		mtx_lock(&xs.watch_events_lock);
973 		while (TAILQ_EMPTY(&xs.watch_events))
974 			mtx_sleep(&xs.watch_events,
975 			    &xs.watch_events_lock,
976 			    PWAIT | PCATCH, "waitev", hz/10);
977 
978 		mtx_unlock(&xs.watch_events_lock);
979 		sx_xlock(&xs.xenwatch_mutex);
980 
981 		mtx_lock(&xs.watch_events_lock);
982 		msg = TAILQ_FIRST(&xs.watch_events);
983 		if (msg) {
984 			TAILQ_REMOVE(&xs.watch_events, msg, list);
985 			msg->u.watch.handle->pending--;
986 		}
987 		mtx_unlock(&xs.watch_events_lock);
988 
989 		if (msg != NULL) {
990 			/*
991 			 * XXX There are messages coming in with a NULL
992 			 * XXX callback.  This deserves further investigation;
993 			 * XXX the workaround here simply prevents the kernel
994 			 * XXX from panic'ing on startup.
995 			 */
996 			if (msg->u.watch.handle->callback != NULL)
997 				msg->u.watch.handle->callback(
998 					msg->u.watch.handle,
999 					(const char **)msg->u.watch.vec,
1000 					msg->u.watch.vec_size);
1001 			free(msg->u.watch.vec, M_XENSTORE);
1002 			free(msg, M_XENSTORE);
1003 		}
1004 
1005 		sx_xunlock(&xs.xenwatch_mutex);
1006 	}
1007 }
1008 
1009 /*----------- XenStore Configuration, Initialization, and Control ------------*/
1010 /**
1011  * Setup communication channels with the XenStore service.
1012  *
1013  * \return  On success, 0. Otherwise an errno value indicating the
1014  *          type of failure.
1015  */
1016 static int
1017 xs_init_comms(void)
1018 {
1019 	int error;
1020 
1021 	if (xen_store->rsp_prod != xen_store->rsp_cons) {
1022 		log(LOG_WARNING, "XENSTORE response ring is not quiescent "
1023 		    "(%08x:%08x): fixing up\n",
1024 		    xen_store->rsp_cons, xen_store->rsp_prod);
1025 		xen_store->rsp_cons = xen_store->rsp_prod;
1026 	}
1027 
1028 	xen_intr_unbind(&xs.xen_intr_handle);
1029 
1030 	error = xen_intr_bind_local_port(xs.xs_dev, xs.evtchn,
1031 	    /*filter*/NULL, xs_intr, /*arg*/NULL, INTR_TYPE_NET|INTR_MPSAFE,
1032 	    &xs.xen_intr_handle);
1033 	if (error) {
1034 		log(LOG_WARNING, "XENSTORE request irq failed %i\n", error);
1035 		return (error);
1036 	}
1037 
1038 	return (0);
1039 }
1040 
1041 /*------------------ Private Device Attachment Functions  --------------------*/
1042 static void
1043 xs_identify(driver_t *driver, device_t parent)
1044 {
1045 
1046 	BUS_ADD_CHILD(parent, 0, "xenstore", 0);
1047 }
1048 
1049 /**
1050  * Probe for the existence of the XenStore.
1051  *
1052  * \param dev
1053  */
1054 static int
1055 xs_probe(device_t dev)
1056 {
1057 	/*
1058 	 * We are either operating within a PV kernel or being probed
1059 	 * as the child of the successfully attached xenpci device.
1060 	 * Thus we are in a Xen environment and there will be a XenStore.
1061 	 * Unconditionally return success.
1062 	 */
1063 	device_set_desc(dev, "XenStore");
1064 	return (BUS_PROBE_NOWILDCARD);
1065 }
1066 
1067 static void
1068 xs_attach_deferred(void *arg)
1069 {
1070 
1071 	bus_generic_probe(xs.xs_dev);
1072 	bus_generic_attach(xs.xs_dev);
1073 
1074 	config_intrhook_disestablish(&xs.xs_attachcb);
1075 }
1076 
1077 static void
1078 xs_attach_late(void *arg, int pending)
1079 {
1080 
1081 	KASSERT((pending == 1), ("xs late attach queued several times"));
1082 	bus_generic_probe(xs.xs_dev);
1083 	bus_generic_attach(xs.xs_dev);
1084 }
1085 
1086 /**
1087  * Attach to the XenStore.
1088  *
1089  * This routine also prepares for the probe/attach of drivers that rely
1090  * on the XenStore.
1091  */
1092 static int
1093 xs_attach(device_t dev)
1094 {
1095 	int error;
1096 
1097 	/* Allow us to get device_t from softc and vice-versa. */
1098 	xs.xs_dev = dev;
1099 	device_set_softc(dev, &xs);
1100 
1101 	/* Initialize the interface to xenstore. */
1102 	struct proc *p;
1103 
1104 	xs.initialized = false;
1105 	xs.evtchn = xen_get_xenstore_evtchn();
1106 	if (xs.evtchn == 0) {
1107 		struct evtchn_alloc_unbound alloc_unbound;
1108 
1109 		/* Allocate a local event channel for xenstore */
1110 		alloc_unbound.dom = DOMID_SELF;
1111 		alloc_unbound.remote_dom = DOMID_SELF;
1112 		error = HYPERVISOR_event_channel_op(
1113 		    EVTCHNOP_alloc_unbound, &alloc_unbound);
1114 		if (error != 0)
1115 			panic(
1116 			   "unable to alloc event channel for Dom0: %d",
1117 			    error);
1118 
1119 		xs.evtchn = alloc_unbound.port;
1120 
1121 		/* Allocate memory for the xs shared ring */
1122 		xen_store = malloc(PAGE_SIZE, M_XENSTORE, M_WAITOK | M_ZERO);
1123 		xs.gpfn = atop(pmap_kextract((vm_offset_t)xen_store));
1124 	} else {
1125 		xs.gpfn = xen_get_xenstore_mfn();
1126 		xen_store = pmap_mapdev_attr(ptoa(xs.gpfn), PAGE_SIZE,
1127 		    VM_MEMATTR_XEN);
1128 		xs.initialized = true;
1129 	}
1130 
1131 	TAILQ_INIT(&xs.reply_list);
1132 	TAILQ_INIT(&xs.watch_events);
1133 
1134 	mtx_init(&xs.ring_lock, "ring lock", NULL, MTX_DEF);
1135 	mtx_init(&xs.reply_lock, "reply lock", NULL, MTX_DEF);
1136 	sx_init(&xs.xenwatch_mutex, "xenwatch");
1137 	sx_init(&xs.request_mutex, "xenstore request");
1138 	mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF);
1139 	mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF);
1140 
1141 	/* Initialize the shared memory rings to talk to xenstored */
1142 	error = xs_init_comms();
1143 	if (error)
1144 		return (error);
1145 
1146 	error = kproc_create(xenwatch_thread, NULL, &p, RFHIGHPID,
1147 	    0, "xenwatch");
1148 	if (error)
1149 		return (error);
1150 	xs.xenwatch_pid = p->p_pid;
1151 
1152 	error = kproc_create(xs_rcv_thread, NULL, NULL,
1153 	    RFHIGHPID, 0, "xenstore_rcv");
1154 
1155 	xs.xs_attachcb.ich_func = xs_attach_deferred;
1156 	xs.xs_attachcb.ich_arg = NULL;
1157 	if (xs.initialized) {
1158 		config_intrhook_establish(&xs.xs_attachcb);
1159 	} else {
1160 		TASK_INIT(&xs.xs_late_init, 0, xs_attach_late, NULL);
1161 	}
1162 
1163 	return (error);
1164 }
1165 
1166 /**
1167  * Prepare for suspension of this VM by halting XenStore access after
1168  * all transactions and individual requests have completed.
1169  */
1170 static int
1171 xs_suspend(device_t dev)
1172 {
1173 	int error;
1174 
1175 	/* Suspend child Xen devices. */
1176 	error = bus_generic_suspend(dev);
1177 	if (error != 0)
1178 		return (error);
1179 
1180 	sx_xlock(&xs.request_mutex);
1181 
1182 	return (0);
1183 }
1184 
1185 /**
1186  * Resume XenStore operations after this VM is resumed.
1187  */
1188 static int
1189 xs_resume(device_t dev __unused)
1190 {
1191 	struct xs_watch *watch;
1192 	char token[sizeof(watch) * 2 + 1];
1193 
1194 	xs_init_comms();
1195 
1196 	sx_xunlock(&xs.request_mutex);
1197 
1198 	/*
1199 	 * NB: since xenstore childs have not been resumed yet, there's
1200 	 * no need to hold any watch mutex. Having clients try to add or
1201 	 * remove watches at this point (before xenstore is resumed) is
1202 	 * clearly a violantion of the resume order.
1203 	 */
1204 	LIST_FOREACH(watch, &xs.registered_watches, list) {
1205 		sprintf(token, "%lX", (long)watch);
1206 		xs_watch(watch->node, token);
1207 	}
1208 
1209 	/* Resume child Xen devices. */
1210 	bus_generic_resume(dev);
1211 
1212 	return (0);
1213 }
1214 
1215 /*-------------------- Private Device Attachment Data  -----------------------*/
1216 static device_method_t xenstore_methods[] = {
1217 	/* Device interface */
1218 	DEVMETHOD(device_identify,	xs_identify),
1219 	DEVMETHOD(device_probe,         xs_probe),
1220 	DEVMETHOD(device_attach,        xs_attach),
1221 	DEVMETHOD(device_detach,        bus_generic_detach),
1222 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
1223 	DEVMETHOD(device_suspend,       xs_suspend),
1224 	DEVMETHOD(device_resume,        xs_resume),
1225 
1226 	/* Bus interface */
1227 	DEVMETHOD(bus_add_child,        bus_generic_add_child),
1228 	DEVMETHOD(bus_alloc_resource,   bus_generic_alloc_resource),
1229 	DEVMETHOD(bus_release_resource, bus_generic_release_resource),
1230 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
1231 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
1232 
1233 	DEVMETHOD_END
1234 };
1235 
1236 DEFINE_CLASS_0(xenstore, xenstore_driver, xenstore_methods, 0);
1237 
1238 DRIVER_MODULE(xenstore, xenpv, xenstore_driver, 0, 0);
1239 
1240 /*------------------------------- Sysctl Data --------------------------------*/
1241 /* XXX Shouldn't the node be somewhere else? */
1242 SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1243     "Xen");
1244 SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xs.evtchn, 0, "");
1245 SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, "");
1246 
1247 /*-------------------------------- Public API --------------------------------*/
1248 /*------- API comments for these methods can be found in xenstorevar.h -------*/
1249 bool
1250 xs_initialized(void)
1251 {
1252 
1253 	return (xs.initialized);
1254 }
1255 
1256 evtchn_port_t
1257 xs_evtchn(void)
1258 {
1259 
1260     return (xs.evtchn);
1261 }
1262 
1263 vm_paddr_t
1264 xs_address(void)
1265 {
1266 
1267     return (ptoa(xs.gpfn));
1268 }
1269 
1270 int
1271 xs_directory(struct xs_transaction t, const char *dir, const char *node,
1272     u_int *num, const char ***result)
1273 {
1274 	struct sbuf *path;
1275 	char *strings;
1276 	u_int len = 0;
1277 	int error;
1278 
1279 	path = xs_join(dir, node);
1280 	error = xs_single(t, XS_DIRECTORY, sbuf_data(path), &len,
1281 	    (void **)&strings);
1282 	sbuf_delete(path);
1283 	if (error)
1284 		return (error);
1285 
1286 	*result = split(strings, len, num);
1287 
1288 	return (0);
1289 }
1290 
1291 int
1292 xs_exists(struct xs_transaction t, const char *dir, const char *node)
1293 {
1294 	const char **d;
1295 	int error, dir_n;
1296 
1297 	error = xs_directory(t, dir, node, &dir_n, &d);
1298 	if (error)
1299 		return (0);
1300 	free(d, M_XENSTORE);
1301 	return (1);
1302 }
1303 
1304 int
1305 xs_read(struct xs_transaction t, const char *dir, const char *node,
1306     u_int *len, void **result)
1307 {
1308 	struct sbuf *path;
1309 	void *ret;
1310 	int error;
1311 
1312 	path = xs_join(dir, node);
1313 	error = xs_single(t, XS_READ, sbuf_data(path), len, &ret);
1314 	sbuf_delete(path);
1315 	if (error)
1316 		return (error);
1317 	*result = ret;
1318 	return (0);
1319 }
1320 
1321 int
1322 xs_write(struct xs_transaction t, const char *dir, const char *node,
1323     const char *string)
1324 {
1325 	struct sbuf *path;
1326 	struct iovec iovec[2];
1327 	int error;
1328 
1329 	path = xs_join(dir, node);
1330 
1331 	iovec[0].iov_base = (void *)(uintptr_t) sbuf_data(path);
1332 	iovec[0].iov_len = sbuf_len(path) + 1;
1333 	iovec[1].iov_base = (void *)(uintptr_t) string;
1334 	iovec[1].iov_len = strlen(string);
1335 
1336 	error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
1337 	sbuf_delete(path);
1338 
1339 	return (error);
1340 }
1341 
1342 int
1343 xs_mkdir(struct xs_transaction t, const char *dir, const char *node)
1344 {
1345 	struct sbuf *path;
1346 	int ret;
1347 
1348 	path = xs_join(dir, node);
1349 	ret = xs_single(t, XS_MKDIR, sbuf_data(path), NULL, NULL);
1350 	sbuf_delete(path);
1351 
1352 	return (ret);
1353 }
1354 
1355 int
1356 xs_rm(struct xs_transaction t, const char *dir, const char *node)
1357 {
1358 	struct sbuf *path;
1359 	int ret;
1360 
1361 	path = xs_join(dir, node);
1362 	ret = xs_single(t, XS_RM, sbuf_data(path), NULL, NULL);
1363 	sbuf_delete(path);
1364 
1365 	return (ret);
1366 }
1367 
1368 int
1369 xs_rm_tree(struct xs_transaction xbt, const char *base, const char *node)
1370 {
1371 	struct xs_transaction local_xbt;
1372 	struct sbuf *root_path_sbuf;
1373 	struct sbuf *cur_path_sbuf;
1374 	char *root_path;
1375 	char *cur_path;
1376 	const char **dir;
1377 	int error;
1378 
1379 retry:
1380 	root_path_sbuf = xs_join(base, node);
1381 	cur_path_sbuf  = xs_join(base, node);
1382 	root_path      = sbuf_data(root_path_sbuf);
1383 	cur_path       = sbuf_data(cur_path_sbuf);
1384 	dir            = NULL;
1385 	local_xbt.id   = 0;
1386 
1387 	if (xbt.id == 0) {
1388 		error = xs_transaction_start(&local_xbt);
1389 		if (error != 0)
1390 			goto out;
1391 		xbt = local_xbt;
1392 	}
1393 
1394 	while (1) {
1395 		u_int count;
1396 		u_int i;
1397 
1398 		error = xs_directory(xbt, cur_path, "", &count, &dir);
1399 		if (error)
1400 			goto out;
1401 
1402 		for (i = 0; i < count; i++) {
1403 			error = xs_rm(xbt, cur_path, dir[i]);
1404 			if (error == ENOTEMPTY) {
1405 				struct sbuf *push_dir;
1406 
1407 				/*
1408 				 * Descend to clear out this sub directory.
1409 				 * We'll return to cur_dir once push_dir
1410 				 * is empty.
1411 				 */
1412 				push_dir = xs_join(cur_path, dir[i]);
1413 				sbuf_delete(cur_path_sbuf);
1414 				cur_path_sbuf = push_dir;
1415 				cur_path = sbuf_data(cur_path_sbuf);
1416 				break;
1417 			} else if (error != 0) {
1418 				goto out;
1419 			}
1420 		}
1421 
1422 		free(dir, M_XENSTORE);
1423 		dir = NULL;
1424 
1425 		if (i == count) {
1426 			char *last_slash;
1427 
1428 			/* Directory is empty.  It is now safe to remove. */
1429 			error = xs_rm(xbt, cur_path, "");
1430 			if (error != 0)
1431 				goto out;
1432 
1433 			if (!strcmp(cur_path, root_path))
1434 				break;
1435 
1436 			/* Return to processing the parent directory. */
1437 			last_slash = strrchr(cur_path, '/');
1438 			KASSERT(last_slash != NULL,
1439 				("xs_rm_tree: mangled path %s", cur_path));
1440 			*last_slash = '\0';
1441 		}
1442 	}
1443 
1444 out:
1445 	sbuf_delete(cur_path_sbuf);
1446 	sbuf_delete(root_path_sbuf);
1447 	if (dir != NULL)
1448 		free(dir, M_XENSTORE);
1449 
1450 	if (local_xbt.id != 0) {
1451 		int terror;
1452 
1453 		terror = xs_transaction_end(local_xbt, /*abort*/error != 0);
1454 		xbt.id = 0;
1455 		if (terror == EAGAIN && error == 0)
1456 			goto retry;
1457 	}
1458 	return (error);
1459 }
1460 
1461 int
1462 xs_transaction_start(struct xs_transaction *t)
1463 {
1464 	char *id_str;
1465 	int error;
1466 
1467 	error = xs_single(XST_NIL, XS_TRANSACTION_START, "", NULL,
1468 	    (void **)&id_str);
1469 	if (error == 0) {
1470 		t->id = strtoul(id_str, NULL, 0);
1471 		free(id_str, M_XENSTORE);
1472 	}
1473 	return (error);
1474 }
1475 
1476 int
1477 xs_transaction_end(struct xs_transaction t, int abort)
1478 {
1479 	char abortstr[2];
1480 
1481 	if (abort)
1482 		strcpy(abortstr, "F");
1483 	else
1484 		strcpy(abortstr, "T");
1485 
1486 	return (xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL));
1487 }
1488 
1489 int
1490 xs_scanf(struct xs_transaction t, const char *dir, const char *node,
1491      int *scancountp, const char *fmt, ...)
1492 {
1493 	va_list ap;
1494 	int error, ns;
1495 	char *val;
1496 
1497 	error = xs_read(t, dir, node, NULL, (void **) &val);
1498 	if (error)
1499 		return (error);
1500 
1501 	va_start(ap, fmt);
1502 	ns = vsscanf(val, fmt, ap);
1503 	va_end(ap);
1504 	free(val, M_XENSTORE);
1505 	/* Distinctive errno. */
1506 	if (ns == 0)
1507 		return (ERANGE);
1508 	if (scancountp)
1509 		*scancountp = ns;
1510 	return (0);
1511 }
1512 
1513 int
1514 xs_vprintf(struct xs_transaction t,
1515     const char *dir, const char *node, const char *fmt, va_list ap)
1516 {
1517 	struct sbuf *sb;
1518 	int error;
1519 
1520 	sb = sbuf_new_auto();
1521 	sbuf_vprintf(sb, fmt, ap);
1522 	sbuf_finish(sb);
1523 	error = xs_write(t, dir, node, sbuf_data(sb));
1524 	sbuf_delete(sb);
1525 
1526 	return (error);
1527 }
1528 
1529 int
1530 xs_printf(struct xs_transaction t, const char *dir, const char *node,
1531      const char *fmt, ...)
1532 {
1533 	va_list ap;
1534 	int error;
1535 
1536 	va_start(ap, fmt);
1537 	error = xs_vprintf(t, dir, node, fmt, ap);
1538 	va_end(ap);
1539 
1540 	return (error);
1541 }
1542 
1543 int
1544 xs_gather(struct xs_transaction t, const char *dir, ...)
1545 {
1546 	va_list ap;
1547 	const char *name;
1548 	int error;
1549 
1550 	va_start(ap, dir);
1551 	error = 0;
1552 	while (error == 0 && (name = va_arg(ap, char *)) != NULL) {
1553 		const char *fmt = va_arg(ap, char *);
1554 		void *result = va_arg(ap, void *);
1555 		char *p;
1556 
1557 		error = xs_read(t, dir, name, NULL, (void **) &p);
1558 		if (error)
1559 			break;
1560 
1561 		if (fmt) {
1562 			if (sscanf(p, fmt, result) == 0)
1563 				error = EINVAL;
1564 			free(p, M_XENSTORE);
1565 		} else
1566 			*(char **)result = p;
1567 	}
1568 	va_end(ap);
1569 
1570 	return (error);
1571 }
1572 
1573 int
1574 xs_register_watch(struct xs_watch *watch)
1575 {
1576 	/* Pointer in ascii is the token. */
1577 	char token[sizeof(watch) * 2 + 1];
1578 	int error;
1579 
1580 	watch->pending = 0;
1581 	sprintf(token, "%lX", (long)watch);
1582 
1583 	mtx_lock(&xs.registered_watches_lock);
1584 	KASSERT(find_watch(token) == NULL, ("watch already registered"));
1585 	LIST_INSERT_HEAD(&xs.registered_watches, watch, list);
1586 	mtx_unlock(&xs.registered_watches_lock);
1587 
1588 	error = xs_watch(watch->node, token);
1589 
1590 	/* Ignore errors due to multiple registration. */
1591 	if (error == EEXIST)
1592 		error = 0;
1593 
1594 	if (error != 0) {
1595 		mtx_lock(&xs.registered_watches_lock);
1596 		LIST_REMOVE(watch, list);
1597 		mtx_unlock(&xs.registered_watches_lock);
1598 	}
1599 
1600 	return (error);
1601 }
1602 
1603 void
1604 xs_unregister_watch(struct xs_watch *watch)
1605 {
1606 	struct xs_stored_msg *msg, *tmp;
1607 	char token[sizeof(watch) * 2 + 1];
1608 	int error;
1609 
1610 	sprintf(token, "%lX", (long)watch);
1611 
1612 	mtx_lock(&xs.registered_watches_lock);
1613 	if (find_watch(token) == NULL) {
1614 		mtx_unlock(&xs.registered_watches_lock);
1615 		return;
1616 	}
1617 	LIST_REMOVE(watch, list);
1618 	mtx_unlock(&xs.registered_watches_lock);
1619 
1620 	error = xs_unwatch(watch->node, token);
1621 	if (error)
1622 		log(LOG_WARNING, "XENSTORE Failed to release watch %s: %i\n",
1623 		    watch->node, error);
1624 
1625 	/* Cancel pending watch events. */
1626 	mtx_lock(&xs.watch_events_lock);
1627 	TAILQ_FOREACH_SAFE(msg, &xs.watch_events, list, tmp) {
1628 		if (msg->u.watch.handle != watch)
1629 			continue;
1630 		TAILQ_REMOVE(&xs.watch_events, msg, list);
1631 		free(msg->u.watch.vec, M_XENSTORE);
1632 		free(msg, M_XENSTORE);
1633 	}
1634 	mtx_unlock(&xs.watch_events_lock);
1635 
1636 	/* Flush any currently-executing callback, unless we are it. :-) */
1637 	if (curproc->p_pid != xs.xenwatch_pid) {
1638 		sx_xlock(&xs.xenwatch_mutex);
1639 		sx_xunlock(&xs.xenwatch_mutex);
1640 	}
1641 }
1642 
1643 void
1644 xs_lock(void)
1645 {
1646 
1647 	sx_xlock(&xs.request_mutex);
1648 	return;
1649 }
1650 
1651 void
1652 xs_unlock(void)
1653 {
1654 
1655 	sx_xunlock(&xs.request_mutex);
1656 	return;
1657 }
1658