xref: /linux/drivers/xen/xenbus/xenbus_client.c (revision 24bce201d79807b668bf9d9e0aca801c5c0d5f78)
1 /******************************************************************************
2  * Client-facing interface for the Xenbus driver.  In other words, the
3  * interface between the Xenbus and the device-specific code, be it the
4  * frontend or the backend of that driver.
5  *
6  * Copyright (C) 2005 XenSource Ltd
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License version 2
10  * as published by the Free Software Foundation; or, when distributed
11  * separately from the Linux kernel or incorporated into other
12  * software packages, subject to the following license:
13  *
14  * Permission is hereby granted, free of charge, to any person obtaining a copy
15  * of this source file (the "Software"), to deal in the Software without
16  * restriction, including without limitation the rights to use, copy, modify,
17  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18  * and to permit persons to whom the Software is furnished to do so, subject to
19  * the following conditions:
20  *
21  * The above copyright notice and this permission notice shall be included in
22  * all copies or substantial portions of the Software.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30  * IN THE SOFTWARE.
31  */
32 
33 #include <linux/mm.h>
34 #include <linux/slab.h>
35 #include <linux/types.h>
36 #include <linux/spinlock.h>
37 #include <linux/vmalloc.h>
38 #include <linux/export.h>
39 #include <asm/xen/hypervisor.h>
40 #include <xen/page.h>
41 #include <xen/interface/xen.h>
42 #include <xen/interface/event_channel.h>
43 #include <xen/balloon.h>
44 #include <xen/events.h>
45 #include <xen/grant_table.h>
46 #include <xen/xenbus.h>
47 #include <xen/xen.h>
48 #include <xen/features.h>
49 
50 #include "xenbus.h"
51 
52 #define XENBUS_PAGES(_grants)	(DIV_ROUND_UP(_grants, XEN_PFN_PER_PAGE))
53 
54 #define XENBUS_MAX_RING_PAGES	(XENBUS_PAGES(XENBUS_MAX_RING_GRANTS))
55 
56 struct xenbus_map_node {
57 	struct list_head next;
58 	union {
59 		struct {
60 			struct vm_struct *area;
61 		} pv;
62 		struct {
63 			struct page *pages[XENBUS_MAX_RING_PAGES];
64 			unsigned long addrs[XENBUS_MAX_RING_GRANTS];
65 			void *addr;
66 		} hvm;
67 	};
68 	grant_handle_t handles[XENBUS_MAX_RING_GRANTS];
69 	unsigned int   nr_handles;
70 };
71 
72 struct map_ring_valloc {
73 	struct xenbus_map_node *node;
74 
75 	/* Why do we need two arrays? See comment of __xenbus_map_ring */
76 	unsigned long addrs[XENBUS_MAX_RING_GRANTS];
77 	phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
78 
79 	struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS];
80 	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
81 
82 	unsigned int idx;
83 };
84 
85 static DEFINE_SPINLOCK(xenbus_valloc_lock);
86 static LIST_HEAD(xenbus_valloc_pages);
87 
88 struct xenbus_ring_ops {
89 	int (*map)(struct xenbus_device *dev, struct map_ring_valloc *info,
90 		   grant_ref_t *gnt_refs, unsigned int nr_grefs,
91 		   void **vaddr);
92 	int (*unmap)(struct xenbus_device *dev, void *vaddr);
93 };
94 
95 static const struct xenbus_ring_ops *ring_ops __read_mostly;
96 
97 const char *xenbus_strstate(enum xenbus_state state)
98 {
99 	static const char *const name[] = {
100 		[ XenbusStateUnknown      ] = "Unknown",
101 		[ XenbusStateInitialising ] = "Initialising",
102 		[ XenbusStateInitWait     ] = "InitWait",
103 		[ XenbusStateInitialised  ] = "Initialised",
104 		[ XenbusStateConnected    ] = "Connected",
105 		[ XenbusStateClosing      ] = "Closing",
106 		[ XenbusStateClosed	  ] = "Closed",
107 		[XenbusStateReconfiguring] = "Reconfiguring",
108 		[XenbusStateReconfigured] = "Reconfigured",
109 	};
110 	return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
111 }
112 EXPORT_SYMBOL_GPL(xenbus_strstate);
113 
114 /**
115  * xenbus_watch_path - register a watch
116  * @dev: xenbus device
117  * @path: path to watch
118  * @watch: watch to register
119  * @callback: callback to register
120  *
121  * Register a @watch on the given path, using the given xenbus_watch structure
122  * for storage, and the given @callback function as the callback.  Return 0 on
123  * success, or -errno on error.  On success, the given @path will be saved as
124  * @watch->node, and remains the caller's to free.  On error, @watch->node will
125  * be NULL, the device will switch to %XenbusStateClosing, and the error will
126  * be saved in the store.
127  */
128 int xenbus_watch_path(struct xenbus_device *dev, const char *path,
129 		      struct xenbus_watch *watch,
130 		      bool (*will_handle)(struct xenbus_watch *,
131 					  const char *, const char *),
132 		      void (*callback)(struct xenbus_watch *,
133 				       const char *, const char *))
134 {
135 	int err;
136 
137 	watch->node = path;
138 	watch->will_handle = will_handle;
139 	watch->callback = callback;
140 
141 	err = register_xenbus_watch(watch);
142 
143 	if (err) {
144 		watch->node = NULL;
145 		watch->will_handle = NULL;
146 		watch->callback = NULL;
147 		xenbus_dev_fatal(dev, err, "adding watch on %s", path);
148 	}
149 
150 	return err;
151 }
152 EXPORT_SYMBOL_GPL(xenbus_watch_path);
153 
154 
155 /**
156  * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
157  * @dev: xenbus device
158  * @watch: watch to register
159  * @callback: callback to register
160  * @pathfmt: format of path to watch
161  *
162  * Register a watch on the given @path, using the given xenbus_watch
163  * structure for storage, and the given @callback function as the callback.
164  * Return 0 on success, or -errno on error.  On success, the watched path
165  * (@path/@path2) will be saved as @watch->node, and becomes the caller's to
166  * kfree().  On error, watch->node will be NULL, so the caller has nothing to
167  * free, the device will switch to %XenbusStateClosing, and the error will be
168  * saved in the store.
169  */
170 int xenbus_watch_pathfmt(struct xenbus_device *dev,
171 			 struct xenbus_watch *watch,
172 			 bool (*will_handle)(struct xenbus_watch *,
173 					const char *, const char *),
174 			 void (*callback)(struct xenbus_watch *,
175 					  const char *, const char *),
176 			 const char *pathfmt, ...)
177 {
178 	int err;
179 	va_list ap;
180 	char *path;
181 
182 	va_start(ap, pathfmt);
183 	path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap);
184 	va_end(ap);
185 
186 	if (!path) {
187 		xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
188 		return -ENOMEM;
189 	}
190 	err = xenbus_watch_path(dev, path, watch, will_handle, callback);
191 
192 	if (err)
193 		kfree(path);
194 	return err;
195 }
196 EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
197 
198 static void xenbus_switch_fatal(struct xenbus_device *, int, int,
199 				const char *, ...);
200 
201 static int
202 __xenbus_switch_state(struct xenbus_device *dev,
203 		      enum xenbus_state state, int depth)
204 {
205 	/* We check whether the state is currently set to the given value, and
206 	   if not, then the state is set.  We don't want to unconditionally
207 	   write the given state, because we don't want to fire watches
208 	   unnecessarily.  Furthermore, if the node has gone, we don't write
209 	   to it, as the device will be tearing down, and we don't want to
210 	   resurrect that directory.
211 
212 	   Note that, because of this cached value of our state, this
213 	   function will not take a caller's Xenstore transaction
214 	   (something it was trying to in the past) because dev->state
215 	   would not get reset if the transaction was aborted.
216 	 */
217 
218 	struct xenbus_transaction xbt;
219 	int current_state;
220 	int err, abort;
221 
222 	if (state == dev->state)
223 		return 0;
224 
225 again:
226 	abort = 1;
227 
228 	err = xenbus_transaction_start(&xbt);
229 	if (err) {
230 		xenbus_switch_fatal(dev, depth, err, "starting transaction");
231 		return 0;
232 	}
233 
234 	err = xenbus_scanf(xbt, dev->nodename, "state", "%d", &current_state);
235 	if (err != 1)
236 		goto abort;
237 
238 	err = xenbus_printf(xbt, dev->nodename, "state", "%d", state);
239 	if (err) {
240 		xenbus_switch_fatal(dev, depth, err, "writing new state");
241 		goto abort;
242 	}
243 
244 	abort = 0;
245 abort:
246 	err = xenbus_transaction_end(xbt, abort);
247 	if (err) {
248 		if (err == -EAGAIN && !abort)
249 			goto again;
250 		xenbus_switch_fatal(dev, depth, err, "ending transaction");
251 	} else
252 		dev->state = state;
253 
254 	return 0;
255 }
256 
257 /**
258  * xenbus_switch_state
259  * @dev: xenbus device
260  * @state: new state
261  *
262  * Advertise in the store a change of the given driver to the given new_state.
263  * Return 0 on success, or -errno on error.  On error, the device will switch
264  * to XenbusStateClosing, and the error will be saved in the store.
265  */
266 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
267 {
268 	return __xenbus_switch_state(dev, state, 0);
269 }
270 
271 EXPORT_SYMBOL_GPL(xenbus_switch_state);
272 
273 int xenbus_frontend_closed(struct xenbus_device *dev)
274 {
275 	xenbus_switch_state(dev, XenbusStateClosed);
276 	complete(&dev->down);
277 	return 0;
278 }
279 EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
280 
281 static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
282 				const char *fmt, va_list ap)
283 {
284 	unsigned int len;
285 	char *printf_buffer;
286 	char *path_buffer;
287 
288 #define PRINTF_BUFFER_SIZE 4096
289 
290 	printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
291 	if (!printf_buffer)
292 		return;
293 
294 	len = sprintf(printf_buffer, "%i ", -err);
295 	vsnprintf(printf_buffer + len, PRINTF_BUFFER_SIZE - len, fmt, ap);
296 
297 	dev_err(&dev->dev, "%s\n", printf_buffer);
298 
299 	path_buffer = kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
300 	if (path_buffer)
301 		xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer);
302 
303 	kfree(printf_buffer);
304 	kfree(path_buffer);
305 }
306 
307 /**
308  * xenbus_dev_error
309  * @dev: xenbus device
310  * @err: error to report
311  * @fmt: error message format
312  *
313  * Report the given negative errno into the store, along with the given
314  * formatted message.
315  */
316 void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...)
317 {
318 	va_list ap;
319 
320 	va_start(ap, fmt);
321 	xenbus_va_dev_error(dev, err, fmt, ap);
322 	va_end(ap);
323 }
324 EXPORT_SYMBOL_GPL(xenbus_dev_error);
325 
326 /**
327  * xenbus_dev_fatal
328  * @dev: xenbus device
329  * @err: error to report
330  * @fmt: error message format
331  *
332  * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
333  * xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly
334  * closedown of this driver and its peer.
335  */
336 
337 void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...)
338 {
339 	va_list ap;
340 
341 	va_start(ap, fmt);
342 	xenbus_va_dev_error(dev, err, fmt, ap);
343 	va_end(ap);
344 
345 	xenbus_switch_state(dev, XenbusStateClosing);
346 }
347 EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
348 
349 /**
350  * Equivalent to xenbus_dev_fatal(dev, err, fmt, args), but helps
351  * avoiding recursion within xenbus_switch_state.
352  */
353 static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
354 				const char *fmt, ...)
355 {
356 	va_list ap;
357 
358 	va_start(ap, fmt);
359 	xenbus_va_dev_error(dev, err, fmt, ap);
360 	va_end(ap);
361 
362 	if (!depth)
363 		__xenbus_switch_state(dev, XenbusStateClosing, 1);
364 }
365 
366 /*
367  * xenbus_setup_ring
368  * @dev: xenbus device
369  * @vaddr: pointer to starting virtual address of the ring
370  * @nr_pages: number of pages to be granted
371  * @grefs: grant reference array to be filled in
372  *
373  * Allocate physically contiguous pages for a shared ring buffer and grant it
374  * to the peer of the given device. The ring buffer is initially filled with
375  * zeroes. The virtual address of the ring is stored at @vaddr and the
376  * grant references are stored in the @grefs array. In case of error @vaddr
377  * will be set to NULL and @grefs will be filled with INVALID_GRANT_REF.
378  */
379 int xenbus_setup_ring(struct xenbus_device *dev, gfp_t gfp, void **vaddr,
380 		      unsigned int nr_pages, grant_ref_t *grefs)
381 {
382 	unsigned long ring_size = nr_pages * XEN_PAGE_SIZE;
383 	grant_ref_t gref_head;
384 	unsigned int i;
385 	int ret;
386 
387 	*vaddr = alloc_pages_exact(ring_size, gfp | __GFP_ZERO);
388 	if (!*vaddr) {
389 		ret = -ENOMEM;
390 		goto err;
391 	}
392 
393 	ret = gnttab_alloc_grant_references(nr_pages, &gref_head);
394 	if (ret) {
395 		xenbus_dev_fatal(dev, ret, "granting access to %u ring pages",
396 				 nr_pages);
397 		goto err;
398 	}
399 
400 	for (i = 0; i < nr_pages; i++) {
401 		unsigned long gfn;
402 
403 		if (is_vmalloc_addr(*vaddr))
404 			gfn = pfn_to_gfn(vmalloc_to_pfn(vaddr[i]));
405 		else
406 			gfn = virt_to_gfn(vaddr[i]);
407 
408 		grefs[i] = gnttab_claim_grant_reference(&gref_head);
409 		gnttab_grant_foreign_access_ref(grefs[i], dev->otherend_id,
410 						gfn, 0);
411 	}
412 
413 	return 0;
414 
415  err:
416 	if (*vaddr)
417 		free_pages_exact(*vaddr, ring_size);
418 	for (i = 0; i < nr_pages; i++)
419 		grefs[i] = INVALID_GRANT_REF;
420 	*vaddr = NULL;
421 
422 	return ret;
423 }
424 EXPORT_SYMBOL_GPL(xenbus_setup_ring);
425 
426 /*
427  * xenbus_teardown_ring
428  * @vaddr: starting virtual address of the ring
429  * @nr_pages: number of pages
430  * @grefs: grant reference array
431  *
432  * Remove grants for the shared ring buffer and free the associated memory.
433  * On return the grant reference array is filled with INVALID_GRANT_REF.
434  */
435 void xenbus_teardown_ring(void **vaddr, unsigned int nr_pages,
436 			  grant_ref_t *grefs)
437 {
438 	unsigned int i;
439 
440 	for (i = 0; i < nr_pages; i++) {
441 		if (grefs[i] != INVALID_GRANT_REF) {
442 			gnttab_end_foreign_access(grefs[i], NULL);
443 			grefs[i] = INVALID_GRANT_REF;
444 		}
445 	}
446 
447 	if (*vaddr)
448 		free_pages_exact(*vaddr, nr_pages * XEN_PAGE_SIZE);
449 	*vaddr = NULL;
450 }
451 EXPORT_SYMBOL_GPL(xenbus_teardown_ring);
452 
453 /**
454  * Allocate an event channel for the given xenbus_device, assigning the newly
455  * created local port to *port.  Return 0 on success, or -errno on error.  On
456  * error, the device will switch to XenbusStateClosing, and the error will be
457  * saved in the store.
458  */
459 int xenbus_alloc_evtchn(struct xenbus_device *dev, evtchn_port_t *port)
460 {
461 	struct evtchn_alloc_unbound alloc_unbound;
462 	int err;
463 
464 	alloc_unbound.dom = DOMID_SELF;
465 	alloc_unbound.remote_dom = dev->otherend_id;
466 
467 	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
468 					  &alloc_unbound);
469 	if (err)
470 		xenbus_dev_fatal(dev, err, "allocating event channel");
471 	else
472 		*port = alloc_unbound.port;
473 
474 	return err;
475 }
476 EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
477 
478 
479 /**
480  * Free an existing event channel. Returns 0 on success or -errno on error.
481  */
482 int xenbus_free_evtchn(struct xenbus_device *dev, evtchn_port_t port)
483 {
484 	struct evtchn_close close;
485 	int err;
486 
487 	close.port = port;
488 
489 	err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
490 	if (err)
491 		xenbus_dev_error(dev, err, "freeing event channel %u", port);
492 
493 	return err;
494 }
495 EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
496 
497 
498 /**
499  * xenbus_map_ring_valloc
500  * @dev: xenbus device
501  * @gnt_refs: grant reference array
502  * @nr_grefs: number of grant references
503  * @vaddr: pointer to address to be filled out by mapping
504  *
505  * Map @nr_grefs pages of memory into this domain from another
506  * domain's grant table.  xenbus_map_ring_valloc allocates @nr_grefs
507  * pages of virtual address space, maps the pages to that address, and
508  * sets *vaddr to that address.  Returns 0 on success, and -errno on
509  * error. If an error is returned, device will switch to
510  * XenbusStateClosing and the error message will be saved in XenStore.
511  */
512 int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs,
513 			   unsigned int nr_grefs, void **vaddr)
514 {
515 	int err;
516 	struct map_ring_valloc *info;
517 
518 	*vaddr = NULL;
519 
520 	if (nr_grefs > XENBUS_MAX_RING_GRANTS)
521 		return -EINVAL;
522 
523 	info = kzalloc(sizeof(*info), GFP_KERNEL);
524 	if (!info)
525 		return -ENOMEM;
526 
527 	info->node = kzalloc(sizeof(*info->node), GFP_KERNEL);
528 	if (!info->node)
529 		err = -ENOMEM;
530 	else
531 		err = ring_ops->map(dev, info, gnt_refs, nr_grefs, vaddr);
532 
533 	kfree(info->node);
534 	kfree(info);
535 	return err;
536 }
537 EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
538 
539 /* N.B. sizeof(phys_addr_t) doesn't always equal to sizeof(unsigned
540  * long), e.g. 32-on-64.  Caller is responsible for preparing the
541  * right array to feed into this function */
542 static int __xenbus_map_ring(struct xenbus_device *dev,
543 			     grant_ref_t *gnt_refs,
544 			     unsigned int nr_grefs,
545 			     grant_handle_t *handles,
546 			     struct map_ring_valloc *info,
547 			     unsigned int flags,
548 			     bool *leaked)
549 {
550 	int i, j;
551 
552 	if (nr_grefs > XENBUS_MAX_RING_GRANTS)
553 		return -EINVAL;
554 
555 	for (i = 0; i < nr_grefs; i++) {
556 		gnttab_set_map_op(&info->map[i], info->phys_addrs[i], flags,
557 				  gnt_refs[i], dev->otherend_id);
558 		handles[i] = INVALID_GRANT_HANDLE;
559 	}
560 
561 	gnttab_batch_map(info->map, i);
562 
563 	for (i = 0; i < nr_grefs; i++) {
564 		if (info->map[i].status != GNTST_okay) {
565 			xenbus_dev_fatal(dev, info->map[i].status,
566 					 "mapping in shared page %d from domain %d",
567 					 gnt_refs[i], dev->otherend_id);
568 			goto fail;
569 		} else
570 			handles[i] = info->map[i].handle;
571 	}
572 
573 	return 0;
574 
575  fail:
576 	for (i = j = 0; i < nr_grefs; i++) {
577 		if (handles[i] != INVALID_GRANT_HANDLE) {
578 			gnttab_set_unmap_op(&info->unmap[j],
579 					    info->phys_addrs[i],
580 					    GNTMAP_host_map, handles[i]);
581 			j++;
582 		}
583 	}
584 
585 	BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, info->unmap, j));
586 
587 	*leaked = false;
588 	for (i = 0; i < j; i++) {
589 		if (info->unmap[i].status != GNTST_okay) {
590 			*leaked = true;
591 			break;
592 		}
593 	}
594 
595 	return -ENOENT;
596 }
597 
598 /**
599  * xenbus_unmap_ring
600  * @dev: xenbus device
601  * @handles: grant handle array
602  * @nr_handles: number of handles in the array
603  * @vaddrs: addresses to unmap
604  *
605  * Unmap memory in this domain that was imported from another domain.
606  * Returns 0 on success and returns GNTST_* on error
607  * (see xen/include/interface/grant_table.h).
608  */
609 static int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t *handles,
610 			     unsigned int nr_handles, unsigned long *vaddrs)
611 {
612 	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
613 	int i;
614 	int err;
615 
616 	if (nr_handles > XENBUS_MAX_RING_GRANTS)
617 		return -EINVAL;
618 
619 	for (i = 0; i < nr_handles; i++)
620 		gnttab_set_unmap_op(&unmap[i], vaddrs[i],
621 				    GNTMAP_host_map, handles[i]);
622 
623 	BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i));
624 
625 	err = GNTST_okay;
626 	for (i = 0; i < nr_handles; i++) {
627 		if (unmap[i].status != GNTST_okay) {
628 			xenbus_dev_error(dev, unmap[i].status,
629 					 "unmapping page at handle %d error %d",
630 					 handles[i], unmap[i].status);
631 			err = unmap[i].status;
632 			break;
633 		}
634 	}
635 
636 	return err;
637 }
638 
639 static void xenbus_map_ring_setup_grant_hvm(unsigned long gfn,
640 					    unsigned int goffset,
641 					    unsigned int len,
642 					    void *data)
643 {
644 	struct map_ring_valloc *info = data;
645 	unsigned long vaddr = (unsigned long)gfn_to_virt(gfn);
646 
647 	info->phys_addrs[info->idx] = vaddr;
648 	info->addrs[info->idx] = vaddr;
649 
650 	info->idx++;
651 }
652 
653 static int xenbus_map_ring_hvm(struct xenbus_device *dev,
654 			       struct map_ring_valloc *info,
655 			       grant_ref_t *gnt_ref,
656 			       unsigned int nr_grefs,
657 			       void **vaddr)
658 {
659 	struct xenbus_map_node *node = info->node;
660 	int err;
661 	void *addr;
662 	bool leaked = false;
663 	unsigned int nr_pages = XENBUS_PAGES(nr_grefs);
664 
665 	err = xen_alloc_unpopulated_pages(nr_pages, node->hvm.pages);
666 	if (err)
667 		goto out_err;
668 
669 	gnttab_foreach_grant(node->hvm.pages, nr_grefs,
670 			     xenbus_map_ring_setup_grant_hvm,
671 			     info);
672 
673 	err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles,
674 				info, GNTMAP_host_map, &leaked);
675 	node->nr_handles = nr_grefs;
676 
677 	if (err)
678 		goto out_free_ballooned_pages;
679 
680 	addr = vmap(node->hvm.pages, nr_pages, VM_MAP | VM_IOREMAP,
681 		    PAGE_KERNEL);
682 	if (!addr) {
683 		err = -ENOMEM;
684 		goto out_xenbus_unmap_ring;
685 	}
686 
687 	node->hvm.addr = addr;
688 
689 	spin_lock(&xenbus_valloc_lock);
690 	list_add(&node->next, &xenbus_valloc_pages);
691 	spin_unlock(&xenbus_valloc_lock);
692 
693 	*vaddr = addr;
694 	info->node = NULL;
695 
696 	return 0;
697 
698  out_xenbus_unmap_ring:
699 	if (!leaked)
700 		xenbus_unmap_ring(dev, node->handles, nr_grefs, info->addrs);
701 	else
702 		pr_alert("leaking %p size %u page(s)",
703 			 addr, nr_pages);
704  out_free_ballooned_pages:
705 	if (!leaked)
706 		xen_free_unpopulated_pages(nr_pages, node->hvm.pages);
707  out_err:
708 	return err;
709 }
710 
711 /**
712  * xenbus_unmap_ring_vfree
713  * @dev: xenbus device
714  * @vaddr: addr to unmap
715  *
716  * Based on Rusty Russell's skeleton driver's unmap_page.
717  * Unmap a page of memory in this domain that was imported from another domain.
718  * Use xenbus_unmap_ring_vfree if you mapped in your memory with
719  * xenbus_map_ring_valloc (it will free the virtual address space).
720  * Returns 0 on success and returns GNTST_* on error
721  * (see xen/include/interface/grant_table.h).
722  */
723 int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
724 {
725 	return ring_ops->unmap(dev, vaddr);
726 }
727 EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
728 
729 #ifdef CONFIG_XEN_PV
730 static int map_ring_apply(pte_t *pte, unsigned long addr, void *data)
731 {
732 	struct map_ring_valloc *info = data;
733 
734 	info->phys_addrs[info->idx++] = arbitrary_virt_to_machine(pte).maddr;
735 	return 0;
736 }
737 
738 static int xenbus_map_ring_pv(struct xenbus_device *dev,
739 			      struct map_ring_valloc *info,
740 			      grant_ref_t *gnt_refs,
741 			      unsigned int nr_grefs,
742 			      void **vaddr)
743 {
744 	struct xenbus_map_node *node = info->node;
745 	struct vm_struct *area;
746 	bool leaked = false;
747 	int err = -ENOMEM;
748 
749 	area = get_vm_area(XEN_PAGE_SIZE * nr_grefs, VM_IOREMAP);
750 	if (!area)
751 		return -ENOMEM;
752 	if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
753 				XEN_PAGE_SIZE * nr_grefs, map_ring_apply, info))
754 		goto failed;
755 	err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
756 				info, GNTMAP_host_map | GNTMAP_contains_pte,
757 				&leaked);
758 	if (err)
759 		goto failed;
760 
761 	node->nr_handles = nr_grefs;
762 	node->pv.area = area;
763 
764 	spin_lock(&xenbus_valloc_lock);
765 	list_add(&node->next, &xenbus_valloc_pages);
766 	spin_unlock(&xenbus_valloc_lock);
767 
768 	*vaddr = area->addr;
769 	info->node = NULL;
770 
771 	return 0;
772 
773 failed:
774 	if (!leaked)
775 		free_vm_area(area);
776 	else
777 		pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs);
778 
779 	return err;
780 }
781 
782 static int xenbus_unmap_ring_pv(struct xenbus_device *dev, void *vaddr)
783 {
784 	struct xenbus_map_node *node;
785 	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
786 	unsigned int level;
787 	int i;
788 	bool leaked = false;
789 	int err;
790 
791 	spin_lock(&xenbus_valloc_lock);
792 	list_for_each_entry(node, &xenbus_valloc_pages, next) {
793 		if (node->pv.area->addr == vaddr) {
794 			list_del(&node->next);
795 			goto found;
796 		}
797 	}
798 	node = NULL;
799  found:
800 	spin_unlock(&xenbus_valloc_lock);
801 
802 	if (!node) {
803 		xenbus_dev_error(dev, -ENOENT,
804 				 "can't find mapped virtual address %p", vaddr);
805 		return GNTST_bad_virt_addr;
806 	}
807 
808 	for (i = 0; i < node->nr_handles; i++) {
809 		unsigned long addr;
810 
811 		memset(&unmap[i], 0, sizeof(unmap[i]));
812 		addr = (unsigned long)vaddr + (XEN_PAGE_SIZE * i);
813 		unmap[i].host_addr = arbitrary_virt_to_machine(
814 			lookup_address(addr, &level)).maddr;
815 		unmap[i].dev_bus_addr = 0;
816 		unmap[i].handle = node->handles[i];
817 	}
818 
819 	BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i));
820 
821 	err = GNTST_okay;
822 	leaked = false;
823 	for (i = 0; i < node->nr_handles; i++) {
824 		if (unmap[i].status != GNTST_okay) {
825 			leaked = true;
826 			xenbus_dev_error(dev, unmap[i].status,
827 					 "unmapping page at handle %d error %d",
828 					 node->handles[i], unmap[i].status);
829 			err = unmap[i].status;
830 			break;
831 		}
832 	}
833 
834 	if (!leaked)
835 		free_vm_area(node->pv.area);
836 	else
837 		pr_alert("leaking VM area %p size %u page(s)",
838 			 node->pv.area, node->nr_handles);
839 
840 	kfree(node);
841 	return err;
842 }
843 
844 static const struct xenbus_ring_ops ring_ops_pv = {
845 	.map = xenbus_map_ring_pv,
846 	.unmap = xenbus_unmap_ring_pv,
847 };
848 #endif
849 
850 struct unmap_ring_hvm
851 {
852 	unsigned int idx;
853 	unsigned long addrs[XENBUS_MAX_RING_GRANTS];
854 };
855 
856 static void xenbus_unmap_ring_setup_grant_hvm(unsigned long gfn,
857 					      unsigned int goffset,
858 					      unsigned int len,
859 					      void *data)
860 {
861 	struct unmap_ring_hvm *info = data;
862 
863 	info->addrs[info->idx] = (unsigned long)gfn_to_virt(gfn);
864 
865 	info->idx++;
866 }
867 
868 static int xenbus_unmap_ring_hvm(struct xenbus_device *dev, void *vaddr)
869 {
870 	int rv;
871 	struct xenbus_map_node *node;
872 	void *addr;
873 	struct unmap_ring_hvm info = {
874 		.idx = 0,
875 	};
876 	unsigned int nr_pages;
877 
878 	spin_lock(&xenbus_valloc_lock);
879 	list_for_each_entry(node, &xenbus_valloc_pages, next) {
880 		addr = node->hvm.addr;
881 		if (addr == vaddr) {
882 			list_del(&node->next);
883 			goto found;
884 		}
885 	}
886 	node = addr = NULL;
887  found:
888 	spin_unlock(&xenbus_valloc_lock);
889 
890 	if (!node) {
891 		xenbus_dev_error(dev, -ENOENT,
892 				 "can't find mapped virtual address %p", vaddr);
893 		return GNTST_bad_virt_addr;
894 	}
895 
896 	nr_pages = XENBUS_PAGES(node->nr_handles);
897 
898 	gnttab_foreach_grant(node->hvm.pages, node->nr_handles,
899 			     xenbus_unmap_ring_setup_grant_hvm,
900 			     &info);
901 
902 	rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles,
903 			       info.addrs);
904 	if (!rv) {
905 		vunmap(vaddr);
906 		xen_free_unpopulated_pages(nr_pages, node->hvm.pages);
907 	}
908 	else
909 		WARN(1, "Leaking %p, size %u page(s)\n", vaddr, nr_pages);
910 
911 	kfree(node);
912 	return rv;
913 }
914 
915 /**
916  * xenbus_read_driver_state
917  * @path: path for driver
918  *
919  * Return the state of the driver rooted at the given store path, or
920  * XenbusStateUnknown if no state can be read.
921  */
922 enum xenbus_state xenbus_read_driver_state(const char *path)
923 {
924 	enum xenbus_state result;
925 	int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
926 	if (err)
927 		result = XenbusStateUnknown;
928 
929 	return result;
930 }
931 EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
932 
933 static const struct xenbus_ring_ops ring_ops_hvm = {
934 	.map = xenbus_map_ring_hvm,
935 	.unmap = xenbus_unmap_ring_hvm,
936 };
937 
938 void __init xenbus_ring_ops_init(void)
939 {
940 #ifdef CONFIG_XEN_PV
941 	if (!xen_feature(XENFEAT_auto_translated_physmap))
942 		ring_ops = &ring_ops_pv;
943 	else
944 #endif
945 		ring_ops = &ring_ops_hvm;
946 }
947