xref: /freebsd/sys/dev/xen/xenstore/xenstore.c (revision 5f8f664619ecc3afcbc3573a64a814646b3584ba)
1 /******************************************************************************
2  * xenstore.c
3  *
4  * Low-level kernel interface to the XenStore.
5  *
6  * Copyright (C) 2005 Rusty Russell, IBM Corporation
7  * Copyright (C) 2009,2010 Spectra Logic Corporation
8  *
9  * This file may be distributed separately from the Linux kernel, or
10  * incorporated into other software packages, subject to the following license:
11  *
12  * Permission is hereby granted, free of charge, to any person obtaining a copy
13  * of this source file (the "Software"), to deal in the Software without
14  * restriction, including without limitation the rights to use, copy, modify,
15  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16  * and to permit persons to whom the Software is furnished to do so, subject to
17  * the following conditions:
18  *
19  * The above copyright notice and this permission notice shall be included in
20  * all copies or substantial portions of the Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28  * IN THE SOFTWARE.
29  */
30 
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/module.h>
40 #include <sys/mutex.h>
41 #include <sys/sx.h>
42 #include <sys/syslog.h>
43 #include <sys/malloc.h>
44 #include <sys/systm.h>
45 #include <sys/proc.h>
46 #include <sys/kthread.h>
47 #include <sys/sbuf.h>
48 #include <sys/sysctl.h>
49 #include <sys/uio.h>
50 #include <sys/unistd.h>
51 #include <sys/queue.h>
52 #include <sys/taskqueue.h>
53 
54 #include <machine/stdarg.h>
55 
56 #include <xen/xen-os.h>
57 #include <xen/hypervisor.h>
58 #include <xen/xen_intr.h>
59 
60 #include <xen/interface/hvm/params.h>
61 #include <xen/hvm.h>
62 
63 #include <xen/xenstore/xenstorevar.h>
64 #include <xen/xenstore/xenstore_internal.h>
65 
66 #include <vm/vm.h>
67 #include <vm/pmap.h>
68 
69 /**
70  * \file xenstore.c
71  * \brief XenStore interface
72  *
73  * The XenStore interface is a simple storage system that is a means of
74  * communicating state and configuration data between the Xen Domain 0
75  * and the various guest domains.  All configuration data other than
76  * a small amount of essential information required during the early
77  * boot process of launching a Xen aware guest, is managed using the
78  * XenStore.
79  *
80  * The XenStore is ASCII string based, and has a structure and semantics
81  * similar to a filesystem.  There are files and directories, the directories
82  * able to contain files or other directories.  The depth of the hierarchy
83  * is only limited by the XenStore's maximum path length.
84  *
85  * The communication channel between the XenStore service and other
86  * domains is via two, guest specific, ring buffers in a shared memory
87  * area.  One ring buffer is used for communicating in each direction.
88  * The grant table references for this shared memory are given to the
89  * guest either via the xen_start_info structure for a fully para-
90  * virtualized guest, or via HVM hypercalls for a hardware virtualized
91  * guest.
92  *
93  * The XenStore communication relies on an event channel and thus
94  * interrupts.  For this reason, the attachment of the XenStore
95  * relies on an interrupt driven configuration hook to hold off
96  * boot processing until communication with the XenStore service
97  * can be established.
98  *
99  * Several Xen services depend on the XenStore, most notably the
100  * XenBus used to discover and manage Xen devices.  These services
101  * are implemented as NewBus child attachments to a bus exported
102  * by this XenStore driver.
103  */
104 
105 static struct xs_watch *find_watch(const char *token);
106 
107 MALLOC_DEFINE(M_XENSTORE, "xenstore", "XenStore data and results");
108 
109 /**
110  * Pointer to shared memory communication structures allowing us
111  * to communicate with the XenStore service.
112  *
113  * When operating in full PV mode, this pointer is set early in kernel
114  * startup from within xen_machdep.c.  In HVM mode, we use hypercalls
115  * to get the guest frame number for the shared page and then map it
116  * into kva.  See xs_init() for details.
117  */
118 struct xenstore_domain_interface *xen_store;
119 
120 /*-------------------------- Private Data Structures ------------------------*/
121 
122 /**
123  * Structure capturing messages received from the XenStore service.
124  */
125 struct xs_stored_msg {
126 	TAILQ_ENTRY(xs_stored_msg) list;
127 
128 	struct xsd_sockmsg hdr;
129 
130 	union {
131 		/* Queued replies. */
132 		struct {
133 			char *body;
134 		} reply;
135 
136 		/* Queued watch events. */
137 		struct {
138 			struct xs_watch *handle;
139 			const char **vec;
140 			u_int vec_size;
141 		} watch;
142 	} u;
143 };
144 TAILQ_HEAD(xs_stored_msg_list, xs_stored_msg);
145 
146 /**
147  * Container for all XenStore related state.
148  */
149 struct xs_softc {
150 	/** Newbus device for the XenStore. */
151 	device_t xs_dev;
152 
153 	/**
154 	 * Lock serializing access to ring producer/consumer
155 	 * indexes.  Use of this lock guarantees that wakeups
156 	 * of blocking readers/writers are not missed due to
157 	 * races with the XenStore service.
158 	 */
159 	struct mtx ring_lock;
160 
161 	/*
162 	 * Mutex used to insure exclusive access to the outgoing
163 	 * communication ring.  We use a lock type that can be
164 	 * held while sleeping so that xs_write() can block waiting
165 	 * for space in the ring to free up, without allowing another
166 	 * writer to come in and corrupt a partial message write.
167 	 */
168 	struct sx request_mutex;
169 
170 	/**
171 	 * A list of replies to our requests.
172 	 *
173 	 * The reply list is filled by xs_rcv_thread().  It
174 	 * is consumed by the context that issued the request
175 	 * to which a reply is made.  The requester blocks in
176 	 * xs_read_reply().
177 	 *
178 	 * /note Only one requesting context can be active at a time.
179 	 *       This is guaranteed by the request_mutex and insures
180 	 *	 that the requester sees replies matching the order
181 	 *	 of its requests.
182 	 */
183 	struct xs_stored_msg_list reply_list;
184 
185 	/** Lock protecting the reply list. */
186 	struct mtx reply_lock;
187 
188 	/**
189 	 * List of registered watches.
190 	 */
191 	struct xs_watch_list  registered_watches;
192 
193 	/** Lock protecting the registered watches list. */
194 	struct mtx registered_watches_lock;
195 
196 	/**
197 	 * List of pending watch callback events.
198 	 */
199 	struct xs_stored_msg_list watch_events;
200 
201 	/** Lock protecting the watch calback list. */
202 	struct mtx watch_events_lock;
203 
204 	/**
205 	 * The processid of the xenwatch thread.
206 	 */
207 	pid_t xenwatch_pid;
208 
209 	/**
210 	 * Sleepable mutex used to gate the execution of XenStore
211 	 * watch event callbacks.
212 	 *
213 	 * xenwatch_thread holds an exclusive lock on this mutex
214 	 * while delivering event callbacks, and xenstore_unregister_watch()
215 	 * uses an exclusive lock of this mutex to guarantee that no
216 	 * callbacks of the just unregistered watch are pending
217 	 * before returning to its caller.
218 	 */
219 	struct sx xenwatch_mutex;
220 
221 	/**
222 	 * The HVM guest pseudo-physical frame number.  This is Xen's mapping
223 	 * of the true machine frame number into our "physical address space".
224 	 */
225 	unsigned long gpfn;
226 
227 	/**
228 	 * The event channel for communicating with the
229 	 * XenStore service.
230 	 */
231 	int evtchn;
232 
233 	/** Handle for XenStore interrupts. */
234 	xen_intr_handle_t xen_intr_handle;
235 
236 	/**
237 	 * Interrupt driven config hook allowing us to defer
238 	 * attaching children until interrupts (and thus communication
239 	 * with the XenStore service) are available.
240 	 */
241 	struct intr_config_hook xs_attachcb;
242 
243 	/**
244 	 * Xenstore is a user-space process that usually runs in Dom0,
245 	 * so if this domain is booting as Dom0, xenstore wont we accessible,
246 	 * and we have to defer the initialization of xenstore related
247 	 * devices to later (when xenstore is started).
248 	 */
249 	bool initialized;
250 
251 	/**
252 	 * Task to run when xenstore is initialized (Dom0 only), will
253 	 * take care of attaching xenstore related devices.
254 	 */
255 	struct task xs_late_init;
256 };
257 
258 /*-------------------------------- Global Data ------------------------------*/
259 static struct xs_softc xs;
260 
261 /*------------------------- Private Utility Functions -----------------------*/
262 
263 /**
264  * Count and optionally record pointers to a number of NUL terminated
265  * strings in a buffer.
266  *
267  * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
268  * \param dest	   An array to store pointers to each string found in strings.
269  * \param len	   The length of the buffer pointed to by strings.
270  *
271  * \return  A count of the number of strings found.
272  */
273 static u_int
274 extract_strings(const char *strings, const char **dest, u_int len)
275 {
276 	u_int num;
277 	const char *p;
278 
279 	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) {
280 		if (dest != NULL)
281 			*dest++ = p;
282 		num++;
283 	}
284 
285 	return (num);
286 }
287 
288 /**
289  * Convert a contiguous buffer containing a series of NUL terminated
290  * strings into an array of pointers to strings.
291  *
292  * The returned pointer references the array of string pointers which
293  * is followed by the storage for the string data.  It is the client's
294  * responsibility to free this storage.
295  *
296  * The storage addressed by strings is free'd prior to split returning.
297  *
298  * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
299  * \param len	   The length of the buffer pointed to by strings.
300  * \param num	   The number of strings found and returned in the strings
301  *                 array.
302  *
303  * \return  An array of pointers to the strings found in the input buffer.
304  */
305 static const char **
306 split(char *strings, u_int len, u_int *num)
307 {
308 	const char **ret;
309 
310 	/* Protect against unterminated buffers. */
311 	if (len > 0)
312 		strings[len - 1] = '\0';
313 
314 	/* Count the strings. */
315 	*num = extract_strings(strings, /*dest*/NULL, len);
316 
317 	/* Transfer to one big alloc for easy freeing by the caller. */
318 	ret = malloc(*num * sizeof(char *) + len, M_XENSTORE, M_WAITOK);
319 	memcpy(&ret[*num], strings, len);
320 	free(strings, M_XENSTORE);
321 
322 	/* Extract pointers to newly allocated array. */
323 	strings = (char *)&ret[*num];
324 	(void)extract_strings(strings, /*dest*/ret, len);
325 
326 	return (ret);
327 }
328 
329 /*------------------------- Public Utility Functions -------------------------*/
330 /*------- API comments for these methods can be found in xenstorevar.h -------*/
331 struct sbuf *
332 xs_join(const char *dir, const char *name)
333 {
334 	struct sbuf *sb;
335 
336 	sb = sbuf_new_auto();
337 	sbuf_cat(sb, dir);
338 	if (name[0] != '\0') {
339 		sbuf_putc(sb, '/');
340 		sbuf_cat(sb, name);
341 	}
342 	sbuf_finish(sb);
343 
344 	return (sb);
345 }
346 
347 /*-------------------- Low Level Communication Management --------------------*/
348 /**
349  * Interrupt handler for the XenStore event channel.
350  *
351  * XenStore reads and writes block on "xen_store" for buffer
352  * space.  Wakeup any blocking operations when the XenStore
353  * service has modified the queues.
354  */
355 static void
356 xs_intr(void * arg __unused /*__attribute__((unused))*/)
357 {
358 
359 	/* If xenstore has not been initialized, initialize it now */
360 	if (!xs.initialized) {
361 		xs.initialized = true;
362 		/*
363 		 * Since this task is probing and attaching devices we
364 		 * have to hold the Giant lock.
365 		 */
366 		taskqueue_enqueue(taskqueue_swi_giant, &xs.xs_late_init);
367 	}
368 
369 	/*
370 	 * Hold ring lock across wakeup so that clients
371 	 * cannot miss a wakeup.
372 	 */
373 	mtx_lock(&xs.ring_lock);
374 	wakeup(xen_store);
375 	mtx_unlock(&xs.ring_lock);
376 }
377 
378 /**
379  * Verify that the indexes for a ring are valid.
380  *
381  * The difference between the producer and consumer cannot
382  * exceed the size of the ring.
383  *
384  * \param cons  The consumer index for the ring to test.
385  * \param prod  The producer index for the ring to test.
386  *
387  * \retval 1  If indexes are in range.
388  * \retval 0  If the indexes are out of range.
389  */
390 static int
391 xs_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
392 {
393 
394 	return ((prod - cons) <= XENSTORE_RING_SIZE);
395 }
396 
397 /**
398  * Return a pointer to, and the length of, the contiguous
399  * free region available for output in a ring buffer.
400  *
401  * \param cons  The consumer index for the ring.
402  * \param prod  The producer index for the ring.
403  * \param buf   The base address of the ring's storage.
404  * \param len   The amount of contiguous storage available.
405  *
406  * \return  A pointer to the start location of the free region.
407  */
408 static void *
409 xs_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
410     char *buf, uint32_t *len)
411 {
412 
413 	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
414 	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
415 		*len = XENSTORE_RING_SIZE - (prod - cons);
416 	return (buf + MASK_XENSTORE_IDX(prod));
417 }
418 
419 /**
420  * Return a pointer to, and the length of, the contiguous
421  * data available to read from a ring buffer.
422  *
423  * \param cons  The consumer index for the ring.
424  * \param prod  The producer index for the ring.
425  * \param buf   The base address of the ring's storage.
426  * \param len   The amount of contiguous data available to read.
427  *
428  * \return  A pointer to the start location of the available data.
429  */
430 static const void *
431 xs_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
432     const char *buf, uint32_t *len)
433 {
434 
435 	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
436 	if ((prod - cons) < *len)
437 		*len = prod - cons;
438 	return (buf + MASK_XENSTORE_IDX(cons));
439 }
440 
441 /**
442  * Transmit data to the XenStore service.
443  *
444  * \param tdata  A pointer to the contiguous data to send.
445  * \param len    The amount of data to send.
446  *
447  * \return  On success 0, otherwise an errno value indicating the
448  *          cause of failure.
449  *
450  * \invariant  Called from thread context.
451  * \invariant  The buffer pointed to by tdata is at least len bytes
452  *             in length.
453  * \invariant  xs.request_mutex exclusively locked.
454  */
455 static int
456 xs_write_store(const void *tdata, unsigned len)
457 {
458 	XENSTORE_RING_IDX cons, prod;
459 	const char *data = (const char *)tdata;
460 	int error;
461 
462 	sx_assert(&xs.request_mutex, SX_XLOCKED);
463 	while (len != 0) {
464 		void *dst;
465 		u_int avail;
466 
467 		/* Hold lock so we can't miss wakeups should we block. */
468 		mtx_lock(&xs.ring_lock);
469 		cons = xen_store->req_cons;
470 		prod = xen_store->req_prod;
471 		if ((prod - cons) == XENSTORE_RING_SIZE) {
472 			/*
473 			 * Output ring is full. Wait for a ring event.
474 			 *
475 			 * Note that the events from both queues
476 			 * are combined, so being woken does not
477 			 * guarantee that data exist in the read
478 			 * ring.
479 			 *
480 			 * To simplify error recovery and the retry,
481 			 * we specify PDROP so our lock is *not* held
482 			 * when msleep returns.
483 			 */
484 			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
485 			     "xbwrite", /*timeout*/0);
486 			if (error && error != EWOULDBLOCK)
487 				return (error);
488 
489 			/* Try again. */
490 			continue;
491 		}
492 		mtx_unlock(&xs.ring_lock);
493 
494 		/* Verify queue sanity. */
495 		if (!xs_check_indexes(cons, prod)) {
496 			xen_store->req_cons = xen_store->req_prod = 0;
497 			return (EIO);
498 		}
499 
500 		dst = xs_get_output_chunk(cons, prod, xen_store->req, &avail);
501 		if (avail > len)
502 			avail = len;
503 
504 		memcpy(dst, data, avail);
505 		data += avail;
506 		len -= avail;
507 
508 		/*
509 		 * The store to the producer index, which indicates
510 		 * to the other side that new data has arrived, must
511 		 * be visible only after our copy of the data into the
512 		 * ring has completed.
513 		 */
514 		wmb();
515 		xen_store->req_prod += avail;
516 
517 		/*
518 		 * xen_intr_signal() implies mb(). The other side will see
519 		 * the change to req_prod at the time of the interrupt.
520 		 */
521 		xen_intr_signal(xs.xen_intr_handle);
522 	}
523 
524 	return (0);
525 }
526 
527 /**
528  * Receive data from the XenStore service.
529  *
530  * \param tdata  A pointer to the contiguous buffer to receive the data.
531  * \param len    The amount of data to receive.
532  *
533  * \return  On success 0, otherwise an errno value indicating the
534  *          cause of failure.
535  *
536  * \invariant  Called from thread context.
537  * \invariant  The buffer pointed to by tdata is at least len bytes
538  *             in length.
539  *
540  * \note xs_read does not perform any internal locking to guarantee
541  *       serial access to the incoming ring buffer.  However, there
542  *	 is only one context processing reads: xs_rcv_thread().
543  */
544 static int
545 xs_read_store(void *tdata, unsigned len)
546 {
547 	XENSTORE_RING_IDX cons, prod;
548 	char *data = (char *)tdata;
549 	int error;
550 
551 	while (len != 0) {
552 		u_int avail;
553 		const char *src;
554 
555 		/* Hold lock so we can't miss wakeups should we block. */
556 		mtx_lock(&xs.ring_lock);
557 		cons = xen_store->rsp_cons;
558 		prod = xen_store->rsp_prod;
559 		if (cons == prod) {
560 			/*
561 			 * Nothing to read. Wait for a ring event.
562 			 *
563 			 * Note that the events from both queues
564 			 * are combined, so being woken does not
565 			 * guarantee that data exist in the read
566 			 * ring.
567 			 *
568 			 * To simplify error recovery and the retry,
569 			 * we specify PDROP so our lock is *not* held
570 			 * when msleep returns.
571 			 */
572 			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
573 			    "xbread", /*timeout*/0);
574 			if (error && error != EWOULDBLOCK)
575 				return (error);
576 			continue;
577 		}
578 		mtx_unlock(&xs.ring_lock);
579 
580 		/* Verify queue sanity. */
581 		if (!xs_check_indexes(cons, prod)) {
582 			xen_store->rsp_cons = xen_store->rsp_prod = 0;
583 			return (EIO);
584 		}
585 
586 		src = xs_get_input_chunk(cons, prod, xen_store->rsp, &avail);
587 		if (avail > len)
588 			avail = len;
589 
590 		/*
591 		 * Insure the data we read is related to the indexes
592 		 * we read above.
593 		 */
594 		rmb();
595 
596 		memcpy(data, src, avail);
597 		data += avail;
598 		len -= avail;
599 
600 		/*
601 		 * Insure that the producer of this ring does not see
602 		 * the ring space as free until after we have copied it
603 		 * out.
604 		 */
605 		mb();
606 		xen_store->rsp_cons += avail;
607 
608 		/*
609 		 * xen_intr_signal() implies mb(). The producer will see
610 		 * the updated consumer index when the event is delivered.
611 		 */
612 		xen_intr_signal(xs.xen_intr_handle);
613 	}
614 
615 	return (0);
616 }
617 
618 /*----------------------- Received Message Processing ------------------------*/
619 /**
620  * Block reading the next message from the XenStore service and
621  * process the result.
622  *
623  * \param type  The returned type of the XenStore message received.
624  *
625  * \return  0 on success.  Otherwise an errno value indicating the
626  *          type of failure encountered.
627  */
628 static int
629 xs_process_msg(enum xsd_sockmsg_type *type)
630 {
631 	struct xs_stored_msg *msg;
632 	char *body;
633 	int error;
634 
635 	msg = malloc(sizeof(*msg), M_XENSTORE, M_WAITOK);
636 	error = xs_read_store(&msg->hdr, sizeof(msg->hdr));
637 	if (error) {
638 		free(msg, M_XENSTORE);
639 		return (error);
640 	}
641 
642 	body = malloc(msg->hdr.len + 1, M_XENSTORE, M_WAITOK);
643 	error = xs_read_store(body, msg->hdr.len);
644 	if (error) {
645 		free(body, M_XENSTORE);
646 		free(msg, M_XENSTORE);
647 		return (error);
648 	}
649 	body[msg->hdr.len] = '\0';
650 
651 	*type = msg->hdr.type;
652 	if (msg->hdr.type == XS_WATCH_EVENT) {
653 		msg->u.watch.vec = split(body, msg->hdr.len,
654 		    &msg->u.watch.vec_size);
655 
656 		mtx_lock(&xs.registered_watches_lock);
657 		msg->u.watch.handle = find_watch(
658 		    msg->u.watch.vec[XS_WATCH_TOKEN]);
659 		if (msg->u.watch.handle != NULL) {
660 			mtx_lock(&xs.watch_events_lock);
661 			TAILQ_INSERT_TAIL(&xs.watch_events, msg, list);
662 			wakeup(&xs.watch_events);
663 			mtx_unlock(&xs.watch_events_lock);
664 		} else {
665 			free(msg->u.watch.vec, M_XENSTORE);
666 			free(msg, M_XENSTORE);
667 		}
668 		mtx_unlock(&xs.registered_watches_lock);
669 	} else {
670 		msg->u.reply.body = body;
671 		mtx_lock(&xs.reply_lock);
672 		TAILQ_INSERT_TAIL(&xs.reply_list, msg, list);
673 		wakeup(&xs.reply_list);
674 		mtx_unlock(&xs.reply_lock);
675 	}
676 
677 	return (0);
678 }
679 
680 /**
681  * Thread body of the XenStore receive thread.
682  *
683  * This thread blocks waiting for data from the XenStore service
684  * and processes and received messages.
685  */
686 static void
687 xs_rcv_thread(void *arg __unused)
688 {
689 	int error;
690 	enum xsd_sockmsg_type type;
691 
692 	for (;;) {
693 		error = xs_process_msg(&type);
694 		if (error)
695 			printf("XENSTORE error %d while reading message\n",
696 			    error);
697 	}
698 }
699 
700 /*---------------- XenStore Message Request/Reply Processing -----------------*/
701 #define xsd_error_count	(sizeof(xsd_errors) / sizeof(xsd_errors[0]))
702 
703 /**
704  * Convert a XenStore error string into an errno number.
705  *
706  * \param errorstring  The error string to convert.
707  *
708  * \return  The errno best matching the input string.
709  *
710  * \note Unknown error strings are converted to EINVAL.
711  */
712 static int
713 xs_get_error(const char *errorstring)
714 {
715 	u_int i;
716 
717 	for (i = 0; i < xsd_error_count; i++) {
718 		if (!strcmp(errorstring, xsd_errors[i].errstring))
719 			return (xsd_errors[i].errnum);
720 	}
721 	log(LOG_WARNING, "XENSTORE xen store gave: unknown error %s",
722 	    errorstring);
723 	return (EINVAL);
724 }
725 
726 /**
727  * Block waiting for a reply to a message request.
728  *
729  * \param type	  The returned type of the reply.
730  * \param len	  The returned body length of the reply.
731  * \param result  The returned body of the reply.
732  *
733  * \return  0 on success.  Otherwise an errno indicating the
734  *          cause of failure.
735  */
736 static int
737 xs_read_reply(enum xsd_sockmsg_type *type, u_int *len, void **result)
738 {
739 	struct xs_stored_msg *msg;
740 	char *body;
741 	int error;
742 
743 	mtx_lock(&xs.reply_lock);
744 	while (TAILQ_EMPTY(&xs.reply_list)) {
745 		error = mtx_sleep(&xs.reply_list, &xs.reply_lock,
746 		    PCATCH, "xswait", hz/10);
747 		if (error && error != EWOULDBLOCK) {
748 			mtx_unlock(&xs.reply_lock);
749 			return (error);
750 		}
751 	}
752 	msg = TAILQ_FIRST(&xs.reply_list);
753 	TAILQ_REMOVE(&xs.reply_list, msg, list);
754 	mtx_unlock(&xs.reply_lock);
755 
756 	*type = msg->hdr.type;
757 	if (len)
758 		*len = msg->hdr.len;
759 	body = msg->u.reply.body;
760 
761 	free(msg, M_XENSTORE);
762 	*result = body;
763 	return (0);
764 }
765 
766 /**
767  * Pass-thru interface for XenStore access by userland processes
768  * via the XenStore device.
769  *
770  * Reply type and length data are returned by overwriting these
771  * fields in the passed in request message.
772  *
773  * \param msg	  A properly formatted message to transmit to
774  *		  the XenStore service.
775  * \param result  The returned body of the reply.
776  *
777  * \return  0 on success.  Otherwise an errno indicating the cause
778  *          of failure.
779  *
780  * \note The returned result is provided in malloced storage and thus
781  *       must be free'd by the caller with 'free(result, M_XENSTORE);
782  */
783 int
784 xs_dev_request_and_reply(struct xsd_sockmsg *msg, void **result)
785 {
786 	uint32_t request_type;
787 	int error;
788 
789 	request_type = msg->type;
790 
791 	sx_xlock(&xs.request_mutex);
792 	if ((error = xs_write_store(msg, sizeof(*msg) + msg->len)) == 0)
793 		error = xs_read_reply(&msg->type, &msg->len, result);
794 	sx_xunlock(&xs.request_mutex);
795 
796 	return (error);
797 }
798 
799 /**
800  * Send a message with an optionally muti-part body to the XenStore service.
801  *
802  * \param t              The transaction to use for this request.
803  * \param request_type   The type of message to send.
804  * \param iovec          Pointers to the body sections of the request.
805  * \param num_vecs       The number of body sections in the request.
806  * \param len            The returned length of the reply.
807  * \param result         The returned body of the reply.
808  *
809  * \return  0 on success.  Otherwise an errno indicating
810  *          the cause of failure.
811  *
812  * \note The returned result is provided in malloced storage and thus
813  *       must be free'd by the caller with 'free(*result, M_XENSTORE);
814  */
815 static int
816 xs_talkv(struct xs_transaction t, enum xsd_sockmsg_type request_type,
817     const struct iovec *iovec, u_int num_vecs, u_int *len, void **result)
818 {
819 	struct xsd_sockmsg msg;
820 	void *ret = NULL;
821 	u_int i;
822 	int error;
823 
824 	msg.tx_id = t.id;
825 	msg.req_id = 0;
826 	msg.type = request_type;
827 	msg.len = 0;
828 	for (i = 0; i < num_vecs; i++)
829 		msg.len += iovec[i].iov_len;
830 
831 	sx_xlock(&xs.request_mutex);
832 	error = xs_write_store(&msg, sizeof(msg));
833 	if (error) {
834 		printf("xs_talkv failed %d\n", error);
835 		goto error_lock_held;
836 	}
837 
838 	for (i = 0; i < num_vecs; i++) {
839 		error = xs_write_store(iovec[i].iov_base, iovec[i].iov_len);
840 		if (error) {
841 			printf("xs_talkv failed %d\n", error);
842 			goto error_lock_held;
843 		}
844 	}
845 
846 	error = xs_read_reply(&msg.type, len, &ret);
847 
848 error_lock_held:
849 	sx_xunlock(&xs.request_mutex);
850 	if (error)
851 		return (error);
852 
853 	if (msg.type == XS_ERROR) {
854 		error = xs_get_error(ret);
855 		free(ret, M_XENSTORE);
856 		return (error);
857 	}
858 
859 	/* Reply is either error or an echo of our request message type. */
860 	KASSERT(msg.type == request_type, ("bad xenstore message type"));
861 
862 	if (result)
863 		*result = ret;
864 	else
865 		free(ret, M_XENSTORE);
866 
867 	return (0);
868 }
869 
870 /**
871  * Wrapper for xs_talkv allowing easy transmission of a message with
872  * a single, contiguous, message body.
873  *
874  * \param t              The transaction to use for this request.
875  * \param request_type   The type of message to send.
876  * \param body           The body of the request.
877  * \param len            The returned length of the reply.
878  * \param result         The returned body of the reply.
879  *
880  * \return  0 on success.  Otherwise an errno indicating
881  *          the cause of failure.
882  *
883  * \note The returned result is provided in malloced storage and thus
884  *       must be free'd by the caller with 'free(*result, M_XENSTORE);
885  */
886 static int
887 xs_single(struct xs_transaction t, enum xsd_sockmsg_type request_type,
888     const char *body, u_int *len, void **result)
889 {
890 	struct iovec iovec;
891 
892 	iovec.iov_base = (void *)(uintptr_t)body;
893 	iovec.iov_len = strlen(body) + 1;
894 
895 	return (xs_talkv(t, request_type, &iovec, 1, len, result));
896 }
897 
898 /*------------------------- XenStore Watch Support ---------------------------*/
899 /**
900  * Transmit a watch request to the XenStore service.
901  *
902  * \param path    The path in the XenStore to watch.
903  * \param tocken  A unique identifier for this watch.
904  *
905  * \return  0 on success.  Otherwise an errno indicating the
906  *          cause of failure.
907  */
908 static int
909 xs_watch(const char *path, const char *token)
910 {
911 	struct iovec iov[2];
912 
913 	iov[0].iov_base = (void *)(uintptr_t) path;
914 	iov[0].iov_len = strlen(path) + 1;
915 	iov[1].iov_base = (void *)(uintptr_t) token;
916 	iov[1].iov_len = strlen(token) + 1;
917 
918 	return (xs_talkv(XST_NIL, XS_WATCH, iov, 2, NULL, NULL));
919 }
920 
921 /**
922  * Transmit an uwatch request to the XenStore service.
923  *
924  * \param path    The path in the XenStore to watch.
925  * \param tocken  A unique identifier for this watch.
926  *
927  * \return  0 on success.  Otherwise an errno indicating the
928  *          cause of failure.
929  */
930 static int
931 xs_unwatch(const char *path, const char *token)
932 {
933 	struct iovec iov[2];
934 
935 	iov[0].iov_base = (void *)(uintptr_t) path;
936 	iov[0].iov_len = strlen(path) + 1;
937 	iov[1].iov_base = (void *)(uintptr_t) token;
938 	iov[1].iov_len = strlen(token) + 1;
939 
940 	return (xs_talkv(XST_NIL, XS_UNWATCH, iov, 2, NULL, NULL));
941 }
942 
943 /**
944  * Convert from watch token (unique identifier) to the associated
945  * internal tracking structure for this watch.
946  *
947  * \param tocken  The unique identifier for the watch to find.
948  *
949  * \return  A pointer to the found watch structure or NULL.
950  */
951 static struct xs_watch *
952 find_watch(const char *token)
953 {
954 	struct xs_watch *i, *cmp;
955 
956 	cmp = (void *)strtoul(token, NULL, 16);
957 
958 	LIST_FOREACH(i, &xs.registered_watches, list)
959 		if (i == cmp)
960 			return (i);
961 
962 	return (NULL);
963 }
964 
965 /**
966  * Thread body of the XenStore watch event dispatch thread.
967  */
968 static void
969 xenwatch_thread(void *unused)
970 {
971 	struct xs_stored_msg *msg;
972 
973 	for (;;) {
974 
975 		mtx_lock(&xs.watch_events_lock);
976 		while (TAILQ_EMPTY(&xs.watch_events))
977 			mtx_sleep(&xs.watch_events,
978 			    &xs.watch_events_lock,
979 			    PWAIT | PCATCH, "waitev", hz/10);
980 
981 		mtx_unlock(&xs.watch_events_lock);
982 		sx_xlock(&xs.xenwatch_mutex);
983 
984 		mtx_lock(&xs.watch_events_lock);
985 		msg = TAILQ_FIRST(&xs.watch_events);
986 		if (msg)
987 			TAILQ_REMOVE(&xs.watch_events, msg, list);
988 		mtx_unlock(&xs.watch_events_lock);
989 
990 		if (msg != NULL) {
991 			/*
992 			 * XXX There are messages coming in with a NULL
993 			 * XXX callback.  This deserves further investigation;
994 			 * XXX the workaround here simply prevents the kernel
995 			 * XXX from panic'ing on startup.
996 			 */
997 			if (msg->u.watch.handle->callback != NULL)
998 				msg->u.watch.handle->callback(
999 					msg->u.watch.handle,
1000 					(const char **)msg->u.watch.vec,
1001 					msg->u.watch.vec_size);
1002 			free(msg->u.watch.vec, M_XENSTORE);
1003 			free(msg, M_XENSTORE);
1004 		}
1005 
1006 		sx_xunlock(&xs.xenwatch_mutex);
1007 	}
1008 }
1009 
1010 /*----------- XenStore Configuration, Initialization, and Control ------------*/
1011 /**
1012  * Setup communication channels with the XenStore service.
1013  *
1014  * \return  On success, 0. Otherwise an errno value indicating the
1015  *          type of failure.
1016  */
1017 static int
1018 xs_init_comms(void)
1019 {
1020 	int error;
1021 
1022 	if (xen_store->rsp_prod != xen_store->rsp_cons) {
1023 		log(LOG_WARNING, "XENSTORE response ring is not quiescent "
1024 		    "(%08x:%08x): fixing up\n",
1025 		    xen_store->rsp_cons, xen_store->rsp_prod);
1026 		xen_store->rsp_cons = xen_store->rsp_prod;
1027 	}
1028 
1029 	xen_intr_unbind(&xs.xen_intr_handle);
1030 
1031 	error = xen_intr_bind_local_port(xs.xs_dev, xs.evtchn,
1032 	    /*filter*/NULL, xs_intr, /*arg*/NULL, INTR_TYPE_NET|INTR_MPSAFE,
1033 	    &xs.xen_intr_handle);
1034 	if (error) {
1035 		log(LOG_WARNING, "XENSTORE request irq failed %i\n", error);
1036 		return (error);
1037 	}
1038 
1039 	return (0);
1040 }
1041 
1042 /*------------------ Private Device Attachment Functions  --------------------*/
1043 static void
1044 xs_identify(driver_t *driver, device_t parent)
1045 {
1046 
1047 	BUS_ADD_CHILD(parent, 0, "xenstore", 0);
1048 }
1049 
1050 /**
1051  * Probe for the existence of the XenStore.
1052  *
1053  * \param dev
1054  */
1055 static int
1056 xs_probe(device_t dev)
1057 {
1058 	/*
1059 	 * We are either operating within a PV kernel or being probed
1060 	 * as the child of the successfully attached xenpci device.
1061 	 * Thus we are in a Xen environment and there will be a XenStore.
1062 	 * Unconditionally return success.
1063 	 */
1064 	device_set_desc(dev, "XenStore");
1065 	return (BUS_PROBE_NOWILDCARD);
1066 }
1067 
1068 static void
1069 xs_attach_deferred(void *arg)
1070 {
1071 
1072 	bus_generic_probe(xs.xs_dev);
1073 	bus_generic_attach(xs.xs_dev);
1074 
1075 	config_intrhook_disestablish(&xs.xs_attachcb);
1076 }
1077 
1078 static void
1079 xs_attach_late(void *arg, int pending)
1080 {
1081 
1082 	KASSERT((pending == 1), ("xs late attach queued several times"));
1083 	bus_generic_probe(xs.xs_dev);
1084 	bus_generic_attach(xs.xs_dev);
1085 }
1086 
1087 /**
1088  * Attach to the XenStore.
1089  *
1090  * This routine also prepares for the probe/attach of drivers that rely
1091  * on the XenStore.
1092  */
1093 static int
1094 xs_attach(device_t dev)
1095 {
1096 	int error;
1097 
1098 	/* Allow us to get device_t from softc and vice-versa. */
1099 	xs.xs_dev = dev;
1100 	device_set_softc(dev, &xs);
1101 
1102 	/* Initialize the interface to xenstore. */
1103 	struct proc *p;
1104 
1105 	xs.initialized = false;
1106 	if (xen_hvm_domain()) {
1107 		xs.evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
1108 		xs.gpfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
1109 		xen_store = pmap_mapdev(xs.gpfn * PAGE_SIZE, PAGE_SIZE);
1110 		xs.initialized = true;
1111 	} else if (xen_pv_domain()) {
1112 		if (HYPERVISOR_start_info->store_evtchn == 0) {
1113 			struct evtchn_alloc_unbound alloc_unbound;
1114 
1115 			/* Allocate a local event channel for xenstore */
1116 			alloc_unbound.dom = DOMID_SELF;
1117 			alloc_unbound.remote_dom = DOMID_SELF;
1118 			error = HYPERVISOR_event_channel_op(
1119 			    EVTCHNOP_alloc_unbound, &alloc_unbound);
1120 			if (error != 0)
1121 				panic(
1122 				   "unable to alloc event channel for Dom0: %d",
1123 				    error);
1124 
1125 			HYPERVISOR_start_info->store_evtchn =
1126 			    alloc_unbound.port;
1127 			xs.evtchn = alloc_unbound.port;
1128 
1129 			/* Allocate memory for the xs shared ring */
1130 			xen_store = malloc(PAGE_SIZE, M_XENSTORE,
1131 			    M_WAITOK | M_ZERO);
1132 		} else {
1133 			xs.evtchn = HYPERVISOR_start_info->store_evtchn;
1134 			xs.initialized = true;
1135 		}
1136 	} else {
1137 		panic("Unknown domain type, cannot initialize xenstore.");
1138 	}
1139 
1140 	TAILQ_INIT(&xs.reply_list);
1141 	TAILQ_INIT(&xs.watch_events);
1142 
1143 	mtx_init(&xs.ring_lock, "ring lock", NULL, MTX_DEF);
1144 	mtx_init(&xs.reply_lock, "reply lock", NULL, MTX_DEF);
1145 	sx_init(&xs.xenwatch_mutex, "xenwatch");
1146 	sx_init(&xs.request_mutex, "xenstore request");
1147 	mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF);
1148 	mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF);
1149 
1150 	/* Initialize the shared memory rings to talk to xenstored */
1151 	error = xs_init_comms();
1152 	if (error)
1153 		return (error);
1154 
1155 	error = kproc_create(xenwatch_thread, NULL, &p, RFHIGHPID,
1156 	    0, "xenwatch");
1157 	if (error)
1158 		return (error);
1159 	xs.xenwatch_pid = p->p_pid;
1160 
1161 	error = kproc_create(xs_rcv_thread, NULL, NULL,
1162 	    RFHIGHPID, 0, "xenstore_rcv");
1163 
1164 	xs.xs_attachcb.ich_func = xs_attach_deferred;
1165 	xs.xs_attachcb.ich_arg = NULL;
1166 	if (xs.initialized) {
1167 		config_intrhook_establish(&xs.xs_attachcb);
1168 	} else {
1169 		TASK_INIT(&xs.xs_late_init, 0, xs_attach_late, NULL);
1170 	}
1171 
1172 	return (error);
1173 }
1174 
1175 /**
1176  * Prepare for suspension of this VM by halting XenStore access after
1177  * all transactions and individual requests have completed.
1178  */
1179 static int
1180 xs_suspend(device_t dev)
1181 {
1182 	int error;
1183 
1184 	/* Suspend child Xen devices. */
1185 	error = bus_generic_suspend(dev);
1186 	if (error != 0)
1187 		return (error);
1188 
1189 	sx_xlock(&xs.request_mutex);
1190 
1191 	return (0);
1192 }
1193 
1194 /**
1195  * Resume XenStore operations after this VM is resumed.
1196  */
1197 static int
1198 xs_resume(device_t dev __unused)
1199 {
1200 	struct xs_watch *watch;
1201 	char token[sizeof(watch) * 2 + 1];
1202 
1203 	xs_init_comms();
1204 
1205 	sx_xunlock(&xs.request_mutex);
1206 
1207 	/*
1208 	 * NB: since xenstore childs have not been resumed yet, there's
1209 	 * no need to hold any watch mutex. Having clients try to add or
1210 	 * remove watches at this point (before xenstore is resumed) is
1211 	 * clearly a violantion of the resume order.
1212 	 */
1213 	LIST_FOREACH(watch, &xs.registered_watches, list) {
1214 		sprintf(token, "%lX", (long)watch);
1215 		xs_watch(watch->node, token);
1216 	}
1217 
1218 	/* Resume child Xen devices. */
1219 	bus_generic_resume(dev);
1220 
1221 	return (0);
1222 }
1223 
1224 /*-------------------- Private Device Attachment Data  -----------------------*/
1225 static device_method_t xenstore_methods[] = {
1226 	/* Device interface */
1227 	DEVMETHOD(device_identify,	xs_identify),
1228 	DEVMETHOD(device_probe,         xs_probe),
1229 	DEVMETHOD(device_attach,        xs_attach),
1230 	DEVMETHOD(device_detach,        bus_generic_detach),
1231 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
1232 	DEVMETHOD(device_suspend,       xs_suspend),
1233 	DEVMETHOD(device_resume,        xs_resume),
1234 
1235 	/* Bus interface */
1236 	DEVMETHOD(bus_add_child,        bus_generic_add_child),
1237 	DEVMETHOD(bus_alloc_resource,   bus_generic_alloc_resource),
1238 	DEVMETHOD(bus_release_resource, bus_generic_release_resource),
1239 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
1240 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
1241 
1242 	DEVMETHOD_END
1243 };
1244 
1245 DEFINE_CLASS_0(xenstore, xenstore_driver, xenstore_methods, 0);
1246 static devclass_t xenstore_devclass;
1247 
1248 DRIVER_MODULE(xenstore, xenpv, xenstore_driver, xenstore_devclass, 0, 0);
1249 
1250 /*------------------------------- Sysctl Data --------------------------------*/
1251 /* XXX Shouldn't the node be somewhere else? */
1252 SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD, NULL, "Xen");
1253 SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xs.evtchn, 0, "");
1254 SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, "");
1255 
1256 /*-------------------------------- Public API --------------------------------*/
1257 /*------- API comments for these methods can be found in xenstorevar.h -------*/
1258 int
1259 xs_directory(struct xs_transaction t, const char *dir, const char *node,
1260     u_int *num, const char ***result)
1261 {
1262 	struct sbuf *path;
1263 	char *strings;
1264 	u_int len = 0;
1265 	int error;
1266 
1267 	path = xs_join(dir, node);
1268 	error = xs_single(t, XS_DIRECTORY, sbuf_data(path), &len,
1269 	    (void **)&strings);
1270 	sbuf_delete(path);
1271 	if (error)
1272 		return (error);
1273 
1274 	*result = split(strings, len, num);
1275 
1276 	return (0);
1277 }
1278 
1279 int
1280 xs_exists(struct xs_transaction t, const char *dir, const char *node)
1281 {
1282 	const char **d;
1283 	int error, dir_n;
1284 
1285 	error = xs_directory(t, dir, node, &dir_n, &d);
1286 	if (error)
1287 		return (0);
1288 	free(d, M_XENSTORE);
1289 	return (1);
1290 }
1291 
1292 int
1293 xs_read(struct xs_transaction t, const char *dir, const char *node,
1294     u_int *len, void **result)
1295 {
1296 	struct sbuf *path;
1297 	void *ret;
1298 	int error;
1299 
1300 	path = xs_join(dir, node);
1301 	error = xs_single(t, XS_READ, sbuf_data(path), len, &ret);
1302 	sbuf_delete(path);
1303 	if (error)
1304 		return (error);
1305 	*result = ret;
1306 	return (0);
1307 }
1308 
1309 int
1310 xs_write(struct xs_transaction t, const char *dir, const char *node,
1311     const char *string)
1312 {
1313 	struct sbuf *path;
1314 	struct iovec iovec[2];
1315 	int error;
1316 
1317 	path = xs_join(dir, node);
1318 
1319 	iovec[0].iov_base = (void *)(uintptr_t) sbuf_data(path);
1320 	iovec[0].iov_len = sbuf_len(path) + 1;
1321 	iovec[1].iov_base = (void *)(uintptr_t) string;
1322 	iovec[1].iov_len = strlen(string);
1323 
1324 	error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
1325 	sbuf_delete(path);
1326 
1327 	return (error);
1328 }
1329 
1330 int
1331 xs_mkdir(struct xs_transaction t, const char *dir, const char *node)
1332 {
1333 	struct sbuf *path;
1334 	int ret;
1335 
1336 	path = xs_join(dir, node);
1337 	ret = xs_single(t, XS_MKDIR, sbuf_data(path), NULL, NULL);
1338 	sbuf_delete(path);
1339 
1340 	return (ret);
1341 }
1342 
1343 int
1344 xs_rm(struct xs_transaction t, const char *dir, const char *node)
1345 {
1346 	struct sbuf *path;
1347 	int ret;
1348 
1349 	path = xs_join(dir, node);
1350 	ret = xs_single(t, XS_RM, sbuf_data(path), NULL, NULL);
1351 	sbuf_delete(path);
1352 
1353 	return (ret);
1354 }
1355 
1356 int
1357 xs_rm_tree(struct xs_transaction xbt, const char *base, const char *node)
1358 {
1359 	struct xs_transaction local_xbt;
1360 	struct sbuf *root_path_sbuf;
1361 	struct sbuf *cur_path_sbuf;
1362 	char *root_path;
1363 	char *cur_path;
1364 	const char **dir;
1365 	int error;
1366 
1367 retry:
1368 	root_path_sbuf = xs_join(base, node);
1369 	cur_path_sbuf  = xs_join(base, node);
1370 	root_path      = sbuf_data(root_path_sbuf);
1371 	cur_path       = sbuf_data(cur_path_sbuf);
1372 	dir            = NULL;
1373 	local_xbt.id   = 0;
1374 
1375 	if (xbt.id == 0) {
1376 		error = xs_transaction_start(&local_xbt);
1377 		if (error != 0)
1378 			goto out;
1379 		xbt = local_xbt;
1380 	}
1381 
1382 	while (1) {
1383 		u_int count;
1384 		u_int i;
1385 
1386 		error = xs_directory(xbt, cur_path, "", &count, &dir);
1387 		if (error)
1388 			goto out;
1389 
1390 		for (i = 0; i < count; i++) {
1391 			error = xs_rm(xbt, cur_path, dir[i]);
1392 			if (error == ENOTEMPTY) {
1393 				struct sbuf *push_dir;
1394 
1395 				/*
1396 				 * Descend to clear out this sub directory.
1397 				 * We'll return to cur_dir once push_dir
1398 				 * is empty.
1399 				 */
1400 				push_dir = xs_join(cur_path, dir[i]);
1401 				sbuf_delete(cur_path_sbuf);
1402 				cur_path_sbuf = push_dir;
1403 				cur_path = sbuf_data(cur_path_sbuf);
1404 				break;
1405 			} else if (error != 0) {
1406 				goto out;
1407 			}
1408 		}
1409 
1410 		free(dir, M_XENSTORE);
1411 		dir = NULL;
1412 
1413 		if (i == count) {
1414 			char *last_slash;
1415 
1416 			/* Directory is empty.  It is now safe to remove. */
1417 			error = xs_rm(xbt, cur_path, "");
1418 			if (error != 0)
1419 				goto out;
1420 
1421 			if (!strcmp(cur_path, root_path))
1422 				break;
1423 
1424 			/* Return to processing the parent directory. */
1425 			last_slash = strrchr(cur_path, '/');
1426 			KASSERT(last_slash != NULL,
1427 				("xs_rm_tree: mangled path %s", cur_path));
1428 			*last_slash = '\0';
1429 		}
1430 	}
1431 
1432 out:
1433 	sbuf_delete(cur_path_sbuf);
1434 	sbuf_delete(root_path_sbuf);
1435 	if (dir != NULL)
1436 		free(dir, M_XENSTORE);
1437 
1438 	if (local_xbt.id != 0) {
1439 		int terror;
1440 
1441 		terror = xs_transaction_end(local_xbt, /*abort*/error != 0);
1442 		xbt.id = 0;
1443 		if (terror == EAGAIN && error == 0)
1444 			goto retry;
1445 	}
1446 	return (error);
1447 }
1448 
1449 int
1450 xs_transaction_start(struct xs_transaction *t)
1451 {
1452 	char *id_str;
1453 	int error;
1454 
1455 	error = xs_single(XST_NIL, XS_TRANSACTION_START, "", NULL,
1456 	    (void **)&id_str);
1457 	if (error == 0) {
1458 		t->id = strtoul(id_str, NULL, 0);
1459 		free(id_str, M_XENSTORE);
1460 	}
1461 	return (error);
1462 }
1463 
1464 int
1465 xs_transaction_end(struct xs_transaction t, int abort)
1466 {
1467 	char abortstr[2];
1468 
1469 	if (abort)
1470 		strcpy(abortstr, "F");
1471 	else
1472 		strcpy(abortstr, "T");
1473 
1474 	return (xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL));
1475 }
1476 
1477 int
1478 xs_scanf(struct xs_transaction t, const char *dir, const char *node,
1479      int *scancountp, const char *fmt, ...)
1480 {
1481 	va_list ap;
1482 	int error, ns;
1483 	char *val;
1484 
1485 	error = xs_read(t, dir, node, NULL, (void **) &val);
1486 	if (error)
1487 		return (error);
1488 
1489 	va_start(ap, fmt);
1490 	ns = vsscanf(val, fmt, ap);
1491 	va_end(ap);
1492 	free(val, M_XENSTORE);
1493 	/* Distinctive errno. */
1494 	if (ns == 0)
1495 		return (ERANGE);
1496 	if (scancountp)
1497 		*scancountp = ns;
1498 	return (0);
1499 }
1500 
1501 int
1502 xs_vprintf(struct xs_transaction t,
1503     const char *dir, const char *node, const char *fmt, va_list ap)
1504 {
1505 	struct sbuf *sb;
1506 	int error;
1507 
1508 	sb = sbuf_new_auto();
1509 	sbuf_vprintf(sb, fmt, ap);
1510 	sbuf_finish(sb);
1511 	error = xs_write(t, dir, node, sbuf_data(sb));
1512 	sbuf_delete(sb);
1513 
1514 	return (error);
1515 }
1516 
1517 int
1518 xs_printf(struct xs_transaction t, const char *dir, const char *node,
1519      const char *fmt, ...)
1520 {
1521 	va_list ap;
1522 	int error;
1523 
1524 	va_start(ap, fmt);
1525 	error = xs_vprintf(t, dir, node, fmt, ap);
1526 	va_end(ap);
1527 
1528 	return (error);
1529 }
1530 
1531 int
1532 xs_gather(struct xs_transaction t, const char *dir, ...)
1533 {
1534 	va_list ap;
1535 	const char *name;
1536 	int error;
1537 
1538 	va_start(ap, dir);
1539 	error = 0;
1540 	while (error == 0 && (name = va_arg(ap, char *)) != NULL) {
1541 		const char *fmt = va_arg(ap, char *);
1542 		void *result = va_arg(ap, void *);
1543 		char *p;
1544 
1545 		error = xs_read(t, dir, name, NULL, (void **) &p);
1546 		if (error)
1547 			break;
1548 
1549 		if (fmt) {
1550 			if (sscanf(p, fmt, result) == 0)
1551 				error = EINVAL;
1552 			free(p, M_XENSTORE);
1553 		} else
1554 			*(char **)result = p;
1555 	}
1556 	va_end(ap);
1557 
1558 	return (error);
1559 }
1560 
1561 int
1562 xs_register_watch(struct xs_watch *watch)
1563 {
1564 	/* Pointer in ascii is the token. */
1565 	char token[sizeof(watch) * 2 + 1];
1566 	int error;
1567 
1568 	sprintf(token, "%lX", (long)watch);
1569 
1570 	mtx_lock(&xs.registered_watches_lock);
1571 	KASSERT(find_watch(token) == NULL, ("watch already registered"));
1572 	LIST_INSERT_HEAD(&xs.registered_watches, watch, list);
1573 	mtx_unlock(&xs.registered_watches_lock);
1574 
1575 	error = xs_watch(watch->node, token);
1576 
1577 	/* Ignore errors due to multiple registration. */
1578 	if (error == EEXIST)
1579 		error = 0;
1580 
1581 	if (error != 0) {
1582 		mtx_lock(&xs.registered_watches_lock);
1583 		LIST_REMOVE(watch, list);
1584 		mtx_unlock(&xs.registered_watches_lock);
1585 	}
1586 
1587 	return (error);
1588 }
1589 
1590 void
1591 xs_unregister_watch(struct xs_watch *watch)
1592 {
1593 	struct xs_stored_msg *msg, *tmp;
1594 	char token[sizeof(watch) * 2 + 1];
1595 	int error;
1596 
1597 	sprintf(token, "%lX", (long)watch);
1598 
1599 	mtx_lock(&xs.registered_watches_lock);
1600 	if (find_watch(token) == NULL) {
1601 		mtx_unlock(&xs.registered_watches_lock);
1602 		return;
1603 	}
1604 	LIST_REMOVE(watch, list);
1605 	mtx_unlock(&xs.registered_watches_lock);
1606 
1607 	error = xs_unwatch(watch->node, token);
1608 	if (error)
1609 		log(LOG_WARNING, "XENSTORE Failed to release watch %s: %i\n",
1610 		    watch->node, error);
1611 
1612 	/* Cancel pending watch events. */
1613 	mtx_lock(&xs.watch_events_lock);
1614 	TAILQ_FOREACH_SAFE(msg, &xs.watch_events, list, tmp) {
1615 		if (msg->u.watch.handle != watch)
1616 			continue;
1617 		TAILQ_REMOVE(&xs.watch_events, msg, list);
1618 		free(msg->u.watch.vec, M_XENSTORE);
1619 		free(msg, M_XENSTORE);
1620 	}
1621 	mtx_unlock(&xs.watch_events_lock);
1622 
1623 	/* Flush any currently-executing callback, unless we are it. :-) */
1624 	if (curproc->p_pid != xs.xenwatch_pid) {
1625 		sx_xlock(&xs.xenwatch_mutex);
1626 		sx_xunlock(&xs.xenwatch_mutex);
1627 	}
1628 }
1629 
1630 void
1631 xs_lock(void)
1632 {
1633 
1634 	sx_xlock(&xs.request_mutex);
1635 	return;
1636 }
1637 
1638 void
1639 xs_unlock(void)
1640 {
1641 
1642 	sx_xunlock(&xs.request_mutex);
1643 	return;
1644 }
1645 
1646