xref: /linux/drivers/xen/grant-table.c (revision 056a5087d87ead77dedbe9cf5bde53b7cd4b4651)
1 /******************************************************************************
2  * grant_table.c
3  *
4  * Granting foreign access to our memory reservation.
5  *
6  * Copyright (c) 2005-2006, Christopher Clark
7  * Copyright (c) 2004-2005, K A Fraser
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License version 2
11  * as published by the Free Software Foundation; or, when distributed
12  * separately from the Linux kernel or incorporated into other
13  * software packages, subject to the following license:
14  *
15  * Permission is hereby granted, free of charge, to any person obtaining a copy
16  * of this source file (the "Software"), to deal in the Software without
17  * restriction, including without limitation the rights to use, copy, modify,
18  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19  * and to permit persons to whom the Software is furnished to do so, subject to
20  * the following conditions:
21  *
22  * The above copyright notice and this permission notice shall be included in
23  * all copies or substantial portions of the Software.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31  * IN THE SOFTWARE.
32  */
33 
34 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
35 
36 #include <linux/bitmap.h>
37 #include <linux/memblock.h>
38 #include <linux/sched.h>
39 #include <linux/mm.h>
40 #include <linux/slab.h>
41 #include <linux/vmalloc.h>
42 #include <linux/uaccess.h>
43 #include <linux/io.h>
44 #include <linux/delay.h>
45 #include <linux/hardirq.h>
46 #include <linux/workqueue.h>
47 #include <linux/ratelimit.h>
48 #include <linux/moduleparam.h>
49 #ifdef CONFIG_XEN_GRANT_DMA_ALLOC
50 #include <linux/dma-mapping.h>
51 #endif
52 
53 #include <xen/xen.h>
54 #include <xen/interface/xen.h>
55 #include <xen/page.h>
56 #include <xen/grant_table.h>
57 #include <xen/interface/memory.h>
58 #include <xen/hvc-console.h>
59 #include <xen/swiotlb-xen.h>
60 #include <xen/balloon.h>
61 #ifdef CONFIG_X86
62 #include <asm/cpuid/api.h>
63 #include <asm/xen/cpuid.h>
64 #endif
65 #include <xen/mem-reservation.h>
66 #include <asm/xen/hypercall.h>
67 #include <asm/xen/interface.h>
68 
69 #include <asm/sync_bitops.h>
70 
71 #define GNTTAB_LIST_END 0xffffffff
72 
73 static grant_ref_t **gnttab_list;
74 static unsigned int nr_grant_frames;
75 
76 /*
77  * Handling of free grants:
78  *
79  * Free grants are in a simple list anchored in gnttab_free_head. They are
80  * linked by grant ref, the last element contains GNTTAB_LIST_END. The number
81  * of free entries is stored in gnttab_free_count.
82  * Additionally there is a bitmap of free entries anchored in
83  * gnttab_free_bitmap. This is being used for simplifying allocation of
84  * multiple consecutive grants, which is needed e.g. for support of virtio.
85  * gnttab_last_free is used to add free entries of new frames at the end of
86  * the free list.
87  * gnttab_free_tail_ptr specifies the variable which references the start
88  * of consecutive free grants ending with gnttab_last_free. This pointer is
89  * updated in a rather defensive way, in order to avoid performance hits in
90  * hot paths.
91  * All those variables are protected by gnttab_list_lock.
92  */
93 static int gnttab_free_count;
94 static unsigned int gnttab_size;
95 static grant_ref_t gnttab_free_head = GNTTAB_LIST_END;
96 static grant_ref_t gnttab_last_free = GNTTAB_LIST_END;
97 static grant_ref_t *gnttab_free_tail_ptr;
98 static unsigned long *gnttab_free_bitmap;
99 static DEFINE_SPINLOCK(gnttab_list_lock);
100 
101 struct grant_frames xen_auto_xlat_grant_frames;
102 static unsigned int xen_gnttab_version;
103 module_param_named(version, xen_gnttab_version, uint, 0);
104 
105 static union {
106 	struct grant_entry_v1 *v1;
107 	union grant_entry_v2 *v2;
108 	void *addr;
109 } gnttab_shared;
110 
111 /*This is a structure of function pointers for grant table*/
112 struct gnttab_ops {
113 	/*
114 	 * Version of the grant interface.
115 	 */
116 	unsigned int version;
117 	/*
118 	 * Grant refs per grant frame.
119 	 */
120 	unsigned int grefs_per_grant_frame;
121 	/*
122 	 * Mapping a list of frames for storing grant entries. Frames parameter
123 	 * is used to store grant table address when grant table being setup,
124 	 * nr_gframes is the number of frames to map grant table. Returning
125 	 * GNTST_okay means success and negative value means failure.
126 	 */
127 	int (*map_frames)(xen_pfn_t *frames, unsigned int nr_gframes);
128 	/*
129 	 * Release a list of frames which are mapped in map_frames for grant
130 	 * entry status.
131 	 */
132 	void (*unmap_frames)(void);
133 	/*
134 	 * Introducing a valid entry into the grant table, granting the frame of
135 	 * this grant entry to domain for accessing. Ref
136 	 * parameter is reference of this introduced grant entry, domid is id of
137 	 * granted domain, frame is the page frame to be granted, and flags is
138 	 * status of the grant entry to be updated.
139 	 */
140 	void (*update_entry)(grant_ref_t ref, domid_t domid,
141 			     unsigned long frame, unsigned flags);
142 	/*
143 	 * Stop granting a grant entry to domain for accessing. Ref parameter is
144 	 * reference of a grant entry whose grant access will be stopped.
145 	 * If the grant entry is currently mapped for reading or writing, just
146 	 * return failure(==0) directly and don't tear down the grant access.
147 	 * Otherwise, stop grant access for this entry and return success(==1).
148 	 */
149 	int (*end_foreign_access_ref)(grant_ref_t ref);
150 	/*
151 	 * Read the frame number related to a given grant reference.
152 	 */
153 	unsigned long (*read_frame)(grant_ref_t ref);
154 };
155 
156 struct unmap_refs_callback_data {
157 	struct completion completion;
158 	int result;
159 };
160 
161 static const struct gnttab_ops *gnttab_interface;
162 
163 /* This reflects status of grant entries, so act as a global value. */
164 static grant_status_t *grstatus;
165 
166 static struct gnttab_free_callback *gnttab_free_callback_list;
167 
168 static int gnttab_expand(unsigned int req_entries);
169 
170 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
171 #define SPP (PAGE_SIZE / sizeof(grant_status_t))
172 
173 static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
174 {
175 	return &gnttab_list[(entry) / RPP][(entry) % RPP];
176 }
177 /* This can be used as an l-value */
178 #define gnttab_entry(entry) (*__gnttab_entry(entry))
179 
180 static int get_free_entries(unsigned count)
181 {
182 	unsigned long flags;
183 	int ref, rc = 0;
184 	grant_ref_t head;
185 
186 	spin_lock_irqsave(&gnttab_list_lock, flags);
187 
188 	if ((gnttab_free_count < count) &&
189 	    ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
190 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
191 		return rc;
192 	}
193 
194 	ref = head = gnttab_free_head;
195 	gnttab_free_count -= count;
196 	while (count--) {
197 		bitmap_clear(gnttab_free_bitmap, head, 1);
198 		if (gnttab_free_tail_ptr == __gnttab_entry(head))
199 			gnttab_free_tail_ptr = &gnttab_free_head;
200 		if (count)
201 			head = gnttab_entry(head);
202 	}
203 	gnttab_free_head = gnttab_entry(head);
204 	gnttab_entry(head) = GNTTAB_LIST_END;
205 
206 	if (!gnttab_free_count) {
207 		gnttab_last_free = GNTTAB_LIST_END;
208 		gnttab_free_tail_ptr = NULL;
209 	}
210 
211 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
212 
213 	return ref;
214 }
215 
216 static int get_seq_entry_count(void)
217 {
218 	if (gnttab_last_free == GNTTAB_LIST_END || !gnttab_free_tail_ptr ||
219 	    *gnttab_free_tail_ptr == GNTTAB_LIST_END)
220 		return 0;
221 
222 	return gnttab_last_free - *gnttab_free_tail_ptr + 1;
223 }
224 
225 /* Rebuilds the free grant list and tries to find count consecutive entries. */
226 static int get_free_seq(unsigned int count)
227 {
228 	int ret = -ENOSPC;
229 	unsigned int from, to;
230 	grant_ref_t *last;
231 
232 	gnttab_free_tail_ptr = &gnttab_free_head;
233 	last = &gnttab_free_head;
234 
235 	for (from = find_first_bit(gnttab_free_bitmap, gnttab_size);
236 	     from < gnttab_size;
237 	     from = find_next_bit(gnttab_free_bitmap, gnttab_size, to + 1)) {
238 		to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size,
239 					from + 1);
240 		if (ret < 0 && to - from >= count) {
241 			ret = from;
242 			bitmap_clear(gnttab_free_bitmap, ret, count);
243 			from += count;
244 			gnttab_free_count -= count;
245 			if (from == to)
246 				continue;
247 		}
248 
249 		/*
250 		 * Recreate the free list in order to have it properly sorted.
251 		 * This is needed to make sure that the free tail has the maximum
252 		 * possible size.
253 		 */
254 		while (from < to) {
255 			*last = from;
256 			last = __gnttab_entry(from);
257 			gnttab_last_free = from;
258 			from++;
259 		}
260 		if (to < gnttab_size)
261 			gnttab_free_tail_ptr = __gnttab_entry(to - 1);
262 	}
263 
264 	*last = GNTTAB_LIST_END;
265 	if (gnttab_last_free != gnttab_size - 1)
266 		gnttab_free_tail_ptr = NULL;
267 
268 	return ret;
269 }
270 
271 static int get_free_entries_seq(unsigned int count)
272 {
273 	unsigned long flags;
274 	int ret = 0;
275 
276 	spin_lock_irqsave(&gnttab_list_lock, flags);
277 
278 	if (gnttab_free_count < count) {
279 		ret = gnttab_expand(count - gnttab_free_count);
280 		if (ret < 0)
281 			goto out;
282 	}
283 
284 	if (get_seq_entry_count() < count) {
285 		ret = get_free_seq(count);
286 		if (ret >= 0)
287 			goto out;
288 		ret = gnttab_expand(count - get_seq_entry_count());
289 		if (ret < 0)
290 			goto out;
291 	}
292 
293 	ret = *gnttab_free_tail_ptr;
294 	*gnttab_free_tail_ptr = gnttab_entry(ret + count - 1);
295 	gnttab_free_count -= count;
296 	if (!gnttab_free_count)
297 		gnttab_free_tail_ptr = NULL;
298 	bitmap_clear(gnttab_free_bitmap, ret, count);
299 
300  out:
301 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
302 
303 	return ret;
304 }
305 
306 static void do_free_callbacks(void)
307 {
308 	struct gnttab_free_callback *callback, *next;
309 
310 	callback = gnttab_free_callback_list;
311 	gnttab_free_callback_list = NULL;
312 
313 	while (callback != NULL) {
314 		next = callback->next;
315 		if (gnttab_free_count >= callback->count) {
316 			callback->next = NULL;
317 			callback->fn(callback->arg);
318 		} else {
319 			callback->next = gnttab_free_callback_list;
320 			gnttab_free_callback_list = callback;
321 		}
322 		callback = next;
323 	}
324 }
325 
326 static inline void check_free_callbacks(void)
327 {
328 	if (unlikely(gnttab_free_callback_list))
329 		do_free_callbacks();
330 }
331 
332 static void put_free_entry_locked(grant_ref_t ref)
333 {
334 	if (unlikely(ref < GNTTAB_NR_RESERVED_ENTRIES))
335 		return;
336 
337 	gnttab_entry(ref) = gnttab_free_head;
338 	gnttab_free_head = ref;
339 	if (!gnttab_free_count)
340 		gnttab_last_free = ref;
341 	if (gnttab_free_tail_ptr == &gnttab_free_head)
342 		gnttab_free_tail_ptr = __gnttab_entry(ref);
343 	gnttab_free_count++;
344 	bitmap_set(gnttab_free_bitmap, ref, 1);
345 }
346 
347 static void put_free_entry(grant_ref_t ref)
348 {
349 	unsigned long flags;
350 
351 	spin_lock_irqsave(&gnttab_list_lock, flags);
352 	put_free_entry_locked(ref);
353 	check_free_callbacks();
354 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
355 }
356 
357 static void gnttab_set_free(unsigned int start, unsigned int n)
358 {
359 	unsigned int i;
360 
361 	for (i = start; i < start + n - 1; i++)
362 		gnttab_entry(i) = i + 1;
363 
364 	gnttab_entry(i) = GNTTAB_LIST_END;
365 	if (!gnttab_free_count) {
366 		gnttab_free_head = start;
367 		gnttab_free_tail_ptr = &gnttab_free_head;
368 	} else {
369 		gnttab_entry(gnttab_last_free) = start;
370 	}
371 	gnttab_free_count += n;
372 	gnttab_last_free = i;
373 
374 	bitmap_set(gnttab_free_bitmap, start, n);
375 }
376 
377 /*
378  * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
379  * Introducing a valid entry into the grant table:
380  *  1. Write ent->domid.
381  *  2. Write ent->frame: Frame to which access is permitted.
382  *  3. Write memory barrier (WMB).
383  *  4. Write ent->flags, inc. valid type.
384  */
385 static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid,
386 				   unsigned long frame, unsigned flags)
387 {
388 	gnttab_shared.v1[ref].domid = domid;
389 	gnttab_shared.v1[ref].frame = frame;
390 	wmb();
391 	gnttab_shared.v1[ref].flags = flags;
392 }
393 
394 static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid,
395 				   unsigned long frame, unsigned int flags)
396 {
397 	gnttab_shared.v2[ref].hdr.domid = domid;
398 	gnttab_shared.v2[ref].full_page.frame = frame;
399 	wmb();	/* Hypervisor concurrent accesses. */
400 	gnttab_shared.v2[ref].hdr.flags = GTF_permit_access | flags;
401 }
402 
403 /*
404  * Public grant-issuing interface functions
405  */
406 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
407 				     unsigned long frame, int readonly)
408 {
409 	gnttab_interface->update_entry(ref, domid, frame,
410 			   GTF_permit_access | (readonly ? GTF_readonly : 0));
411 }
412 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
413 
414 int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
415 				int readonly)
416 {
417 	int ref;
418 
419 	ref = get_free_entries(1);
420 	if (unlikely(ref < 0))
421 		return -ENOSPC;
422 
423 	gnttab_grant_foreign_access_ref(ref, domid, frame, readonly);
424 
425 	return ref;
426 }
427 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
428 
429 static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref)
430 {
431 	u16 *pflags = &gnttab_shared.v1[ref].flags;
432 	u16 flags;
433 
434 	flags = *pflags;
435 	do {
436 		if (flags & (GTF_reading|GTF_writing))
437 			return 0;
438 	} while (!sync_try_cmpxchg(pflags, &flags, 0));
439 
440 	return 1;
441 }
442 
443 static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref)
444 {
445 	gnttab_shared.v2[ref].hdr.flags = 0;
446 	mb();	/* Concurrent access by hypervisor. */
447 	if (grstatus[ref] & (GTF_reading|GTF_writing)) {
448 		return 0;
449 	} else {
450 		/*
451 		 * The read of grstatus needs to have acquire semantics.
452 		 *  On x86, reads already have that, and we just need to
453 		 * protect against compiler reorderings.
454 		 * On other architectures we may need a full barrier.
455 		 */
456 #ifdef CONFIG_X86
457 		barrier();
458 #else
459 		mb();
460 #endif
461 	}
462 
463 	return 1;
464 }
465 
466 static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref)
467 {
468 	return gnttab_interface->end_foreign_access_ref(ref);
469 }
470 
471 int gnttab_end_foreign_access_ref(grant_ref_t ref)
472 {
473 	if (_gnttab_end_foreign_access_ref(ref))
474 		return 1;
475 	pr_warn("WARNING: g.e. %#x still in use!\n", ref);
476 	return 0;
477 }
478 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
479 
480 static unsigned long gnttab_read_frame_v1(grant_ref_t ref)
481 {
482 	return gnttab_shared.v1[ref].frame;
483 }
484 
485 static unsigned long gnttab_read_frame_v2(grant_ref_t ref)
486 {
487 	return gnttab_shared.v2[ref].full_page.frame;
488 }
489 
490 struct deferred_entry {
491 	struct list_head list;
492 	grant_ref_t ref;
493 	uint16_t warn_delay;
494 	struct page *page;
495 };
496 static LIST_HEAD(deferred_list);
497 static void gnttab_handle_deferred(struct timer_list *);
498 static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred);
499 
500 static atomic64_t deferred_count;
501 static atomic64_t leaked_count;
502 static unsigned int free_per_iteration = 10;
503 module_param(free_per_iteration, uint, 0600);
504 
505 static void gnttab_handle_deferred(struct timer_list *unused)
506 {
507 	unsigned int nr = READ_ONCE(free_per_iteration);
508 	const bool ignore_limit = nr == 0;
509 	struct deferred_entry *first = NULL;
510 	unsigned long flags;
511 	size_t freed = 0;
512 
513 	spin_lock_irqsave(&gnttab_list_lock, flags);
514 	while ((ignore_limit || nr--) && !list_empty(&deferred_list)) {
515 		struct deferred_entry *entry
516 			= list_first_entry(&deferred_list,
517 					   struct deferred_entry, list);
518 
519 		if (entry == first)
520 			break;
521 		list_del(&entry->list);
522 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
523 		if (_gnttab_end_foreign_access_ref(entry->ref)) {
524 			uint64_t ret = atomic64_dec_return(&deferred_count);
525 
526 			put_free_entry(entry->ref);
527 			pr_debug("freeing g.e. %#x (pfn %#lx), %llu remaining\n",
528 				 entry->ref, page_to_pfn(entry->page),
529 				 (unsigned long long)ret);
530 			put_page(entry->page);
531 			freed++;
532 			kfree(entry);
533 			entry = NULL;
534 		} else {
535 			if (!--entry->warn_delay)
536 				pr_info("g.e. %#x still pending\n", entry->ref);
537 			if (!first)
538 				first = entry;
539 		}
540 		spin_lock_irqsave(&gnttab_list_lock, flags);
541 		if (entry)
542 			list_add_tail(&entry->list, &deferred_list);
543 	}
544 	if (list_empty(&deferred_list))
545 		WARN_ON(atomic64_read(&deferred_count));
546 	else if (!timer_pending(&deferred_timer)) {
547 		deferred_timer.expires = jiffies + HZ;
548 		add_timer(&deferred_timer);
549 	}
550 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
551 	pr_debug("Freed %zu references", freed);
552 }
553 
554 static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
555 {
556 	struct deferred_entry *entry;
557 	gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
558 	uint64_t leaked, deferred;
559 
560 	entry = kmalloc_obj(*entry, gfp);
561 	if (!page) {
562 		unsigned long gfn = gnttab_interface->read_frame(ref);
563 
564 		page = pfn_to_page(gfn_to_pfn(gfn));
565 		get_page(page);
566 	}
567 
568 	if (entry) {
569 		unsigned long flags;
570 
571 		entry->ref = ref;
572 		entry->page = page;
573 		entry->warn_delay = 60;
574 		spin_lock_irqsave(&gnttab_list_lock, flags);
575 		list_add_tail(&entry->list, &deferred_list);
576 		if (!timer_pending(&deferred_timer)) {
577 			deferred_timer.expires = jiffies + HZ;
578 			add_timer(&deferred_timer);
579 		}
580 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
581 		deferred = atomic64_inc_return(&deferred_count);
582 		leaked = atomic64_read(&leaked_count);
583 		pr_debug("deferring g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
584 			 ref, page ? page_to_pfn(page) : -1, deferred, leaked);
585 	} else {
586 		deferred = atomic64_read(&deferred_count);
587 		leaked = atomic64_inc_return(&leaked_count);
588 		pr_warn("leaking g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
589 			ref, page ? page_to_pfn(page) : -1, deferred, leaked);
590 	}
591 }
592 
593 int gnttab_try_end_foreign_access(grant_ref_t ref)
594 {
595 	int ret = _gnttab_end_foreign_access_ref(ref);
596 
597 	if (ret)
598 		put_free_entry(ref);
599 
600 	return ret;
601 }
602 EXPORT_SYMBOL_GPL(gnttab_try_end_foreign_access);
603 
604 void gnttab_end_foreign_access(grant_ref_t ref, struct page *page)
605 {
606 	if (gnttab_try_end_foreign_access(ref)) {
607 		if (page)
608 			put_page(page);
609 	} else
610 		gnttab_add_deferred(ref, page);
611 }
612 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
613 
614 void gnttab_free_grant_reference(grant_ref_t ref)
615 {
616 	put_free_entry(ref);
617 }
618 EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
619 
620 void gnttab_free_grant_references(grant_ref_t head)
621 {
622 	grant_ref_t ref;
623 	unsigned long flags;
624 
625 	spin_lock_irqsave(&gnttab_list_lock, flags);
626 	while (head != GNTTAB_LIST_END) {
627 		ref = gnttab_entry(head);
628 		put_free_entry_locked(head);
629 		head = ref;
630 	}
631 	check_free_callbacks();
632 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
633 }
634 EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
635 
636 void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count)
637 {
638 	unsigned long flags;
639 	unsigned int i;
640 
641 	spin_lock_irqsave(&gnttab_list_lock, flags);
642 	for (i = count; i > 0; i--)
643 		put_free_entry_locked(head + i - 1);
644 	check_free_callbacks();
645 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
646 }
647 EXPORT_SYMBOL_GPL(gnttab_free_grant_reference_seq);
648 
649 int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
650 {
651 	int h = get_free_entries(count);
652 
653 	if (h < 0)
654 		return -ENOSPC;
655 
656 	*head = h;
657 
658 	return 0;
659 }
660 EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
661 
662 int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first)
663 {
664 	int h;
665 
666 	if (count == 1)
667 		h = get_free_entries(1);
668 	else
669 		h = get_free_entries_seq(count);
670 
671 	if (h < 0)
672 		return -ENOSPC;
673 
674 	*first = h;
675 
676 	return 0;
677 }
678 EXPORT_SYMBOL_GPL(gnttab_alloc_grant_reference_seq);
679 
680 int gnttab_empty_grant_references(const grant_ref_t *private_head)
681 {
682 	return (*private_head == GNTTAB_LIST_END);
683 }
684 EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
685 
686 int gnttab_claim_grant_reference(grant_ref_t *private_head)
687 {
688 	grant_ref_t g = *private_head;
689 	if (unlikely(g == GNTTAB_LIST_END))
690 		return -ENOSPC;
691 	*private_head = gnttab_entry(g);
692 	return g;
693 }
694 EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
695 
696 void gnttab_release_grant_reference(grant_ref_t *private_head,
697 				    grant_ref_t release)
698 {
699 	gnttab_entry(release) = *private_head;
700 	*private_head = release;
701 }
702 EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
703 
704 void gnttab_request_free_callback(struct gnttab_free_callback *callback,
705 				  void (*fn)(void *), void *arg, u16 count)
706 {
707 	unsigned long flags;
708 	struct gnttab_free_callback *cb;
709 
710 	spin_lock_irqsave(&gnttab_list_lock, flags);
711 
712 	/* Check if the callback is already on the list */
713 	cb = gnttab_free_callback_list;
714 	while (cb) {
715 		if (cb == callback)
716 			goto out;
717 		cb = cb->next;
718 	}
719 
720 	callback->fn = fn;
721 	callback->arg = arg;
722 	callback->count = count;
723 	callback->next = gnttab_free_callback_list;
724 	gnttab_free_callback_list = callback;
725 	check_free_callbacks();
726 out:
727 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
728 }
729 EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
730 
731 void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
732 {
733 	struct gnttab_free_callback **pcb;
734 	unsigned long flags;
735 
736 	spin_lock_irqsave(&gnttab_list_lock, flags);
737 	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
738 		if (*pcb == callback) {
739 			*pcb = callback->next;
740 			break;
741 		}
742 	}
743 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
744 }
745 EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
746 
747 static unsigned int gnttab_frames(unsigned int frames, unsigned int align)
748 {
749 	return (frames * gnttab_interface->grefs_per_grant_frame + align - 1) /
750 	       align;
751 }
752 
753 static int grow_gnttab_list(unsigned int more_frames)
754 {
755 	unsigned int new_nr_grant_frames, extra_entries, i;
756 	unsigned int nr_glist_frames, new_nr_glist_frames;
757 	unsigned int grefs_per_frame;
758 
759 	grefs_per_frame = gnttab_interface->grefs_per_grant_frame;
760 
761 	new_nr_grant_frames = nr_grant_frames + more_frames;
762 	extra_entries = more_frames * grefs_per_frame;
763 
764 	nr_glist_frames = gnttab_frames(nr_grant_frames, RPP);
765 	new_nr_glist_frames = gnttab_frames(new_nr_grant_frames, RPP);
766 	for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
767 		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
768 		if (!gnttab_list[i])
769 			goto grow_nomem;
770 	}
771 
772 	gnttab_set_free(gnttab_size, extra_entries);
773 
774 	if (!gnttab_free_tail_ptr)
775 		gnttab_free_tail_ptr = __gnttab_entry(gnttab_size);
776 
777 	nr_grant_frames = new_nr_grant_frames;
778 	gnttab_size += extra_entries;
779 
780 	check_free_callbacks();
781 
782 	return 0;
783 
784 grow_nomem:
785 	while (i-- > nr_glist_frames)
786 		free_page((unsigned long) gnttab_list[i]);
787 	return -ENOMEM;
788 }
789 
790 static unsigned int __max_nr_grant_frames(void)
791 {
792 	struct gnttab_query_size query;
793 	int rc;
794 
795 	query.dom = DOMID_SELF;
796 
797 	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
798 	if ((rc < 0) || (query.status != GNTST_okay))
799 		return 4; /* Legacy max supported number of frames */
800 
801 	return query.max_nr_frames;
802 }
803 
804 unsigned int gnttab_max_grant_frames(void)
805 {
806 	unsigned int xen_max = __max_nr_grant_frames();
807 	static unsigned int boot_max_nr_grant_frames;
808 
809 	/* First time, initialize it properly. */
810 	if (!boot_max_nr_grant_frames)
811 		boot_max_nr_grant_frames = __max_nr_grant_frames();
812 
813 	if (xen_max > boot_max_nr_grant_frames)
814 		return boot_max_nr_grant_frames;
815 	return xen_max;
816 }
817 EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
818 
819 int gnttab_setup_auto_xlat_frames(phys_addr_t addr)
820 {
821 	xen_pfn_t *pfn;
822 	unsigned int max_nr_gframes = __max_nr_grant_frames();
823 	unsigned int i;
824 	void *vaddr;
825 
826 	if (xen_auto_xlat_grant_frames.count)
827 		return -EINVAL;
828 
829 	vaddr = memremap(addr, XEN_PAGE_SIZE * max_nr_gframes, MEMREMAP_WB);
830 	if (vaddr == NULL) {
831 		pr_warn("Failed to ioremap gnttab share frames (addr=%pa)!\n",
832 			&addr);
833 		return -ENOMEM;
834 	}
835 	pfn = kzalloc_objs(pfn[0], max_nr_gframes);
836 	if (!pfn) {
837 		memunmap(vaddr);
838 		return -ENOMEM;
839 	}
840 	for (i = 0; i < max_nr_gframes; i++)
841 		pfn[i] = XEN_PFN_DOWN(addr) + i;
842 
843 	xen_auto_xlat_grant_frames.vaddr = vaddr;
844 	xen_auto_xlat_grant_frames.pfn = pfn;
845 	xen_auto_xlat_grant_frames.count = max_nr_gframes;
846 
847 	return 0;
848 }
849 EXPORT_SYMBOL_GPL(gnttab_setup_auto_xlat_frames);
850 
851 void gnttab_free_auto_xlat_frames(void)
852 {
853 	if (!xen_auto_xlat_grant_frames.count)
854 		return;
855 	kfree(xen_auto_xlat_grant_frames.pfn);
856 	memunmap(xen_auto_xlat_grant_frames.vaddr);
857 
858 	xen_auto_xlat_grant_frames.pfn = NULL;
859 	xen_auto_xlat_grant_frames.count = 0;
860 	xen_auto_xlat_grant_frames.vaddr = NULL;
861 }
862 EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames);
863 
864 int gnttab_pages_set_private(int nr_pages, struct page **pages)
865 {
866 	int i;
867 
868 	for (i = 0; i < nr_pages; i++) {
869 #if BITS_PER_LONG < 64
870 		struct xen_page_foreign *foreign;
871 
872 		foreign = kzalloc_obj(*foreign);
873 		if (!foreign)
874 			return -ENOMEM;
875 
876 		set_page_private(pages[i], (unsigned long)foreign);
877 #endif
878 		SetPagePrivate(pages[i]);
879 	}
880 
881 	return 0;
882 }
883 EXPORT_SYMBOL_GPL(gnttab_pages_set_private);
884 
885 /**
886  * gnttab_alloc_pages - alloc pages suitable for grant mapping into
887  * @nr_pages: number of pages to alloc
888  * @pages: returns the pages
889  */
890 int gnttab_alloc_pages(int nr_pages, struct page **pages)
891 {
892 	int ret;
893 
894 	ret = xen_alloc_unpopulated_pages(nr_pages, pages);
895 	if (ret < 0)
896 		return ret;
897 
898 	ret = gnttab_pages_set_private(nr_pages, pages);
899 	if (ret < 0)
900 		gnttab_free_pages(nr_pages, pages);
901 
902 	return ret;
903 }
904 EXPORT_SYMBOL_GPL(gnttab_alloc_pages);
905 
906 #ifdef CONFIG_XEN_UNPOPULATED_ALLOC
907 static inline void cache_init(struct gnttab_page_cache *cache)
908 {
909 	cache->pages = NULL;
910 }
911 
912 static inline bool cache_empty(struct gnttab_page_cache *cache)
913 {
914 	return !cache->pages;
915 }
916 
917 static inline struct page *cache_deq(struct gnttab_page_cache *cache)
918 {
919 	struct page *page;
920 
921 	page = cache->pages;
922 	cache->pages = page->zone_device_data;
923 
924 	return page;
925 }
926 
927 static inline void cache_enq(struct gnttab_page_cache *cache, struct page *page)
928 {
929 	page->zone_device_data = cache->pages;
930 	cache->pages = page;
931 }
932 #else
933 static inline void cache_init(struct gnttab_page_cache *cache)
934 {
935 	INIT_LIST_HEAD(&cache->pages);
936 }
937 
938 static inline bool cache_empty(struct gnttab_page_cache *cache)
939 {
940 	return list_empty(&cache->pages);
941 }
942 
943 static inline struct page *cache_deq(struct gnttab_page_cache *cache)
944 {
945 	struct page *page;
946 
947 	page = list_first_entry(&cache->pages, struct page, lru);
948 	list_del(&page->lru);
949 
950 	return page;
951 }
952 
953 static inline void cache_enq(struct gnttab_page_cache *cache, struct page *page)
954 {
955 	list_add(&page->lru, &cache->pages);
956 }
957 #endif
958 
959 void gnttab_page_cache_init(struct gnttab_page_cache *cache)
960 {
961 	spin_lock_init(&cache->lock);
962 	cache_init(cache);
963 	cache->num_pages = 0;
964 }
965 EXPORT_SYMBOL_GPL(gnttab_page_cache_init);
966 
967 int gnttab_page_cache_get(struct gnttab_page_cache *cache, struct page **page)
968 {
969 	unsigned long flags;
970 
971 	spin_lock_irqsave(&cache->lock, flags);
972 
973 	if (cache_empty(cache)) {
974 		spin_unlock_irqrestore(&cache->lock, flags);
975 		return gnttab_alloc_pages(1, page);
976 	}
977 
978 	page[0] = cache_deq(cache);
979 	cache->num_pages--;
980 
981 	spin_unlock_irqrestore(&cache->lock, flags);
982 
983 	return 0;
984 }
985 EXPORT_SYMBOL_GPL(gnttab_page_cache_get);
986 
987 void gnttab_page_cache_put(struct gnttab_page_cache *cache, struct page **page,
988 			   unsigned int num)
989 {
990 	unsigned long flags;
991 	unsigned int i;
992 
993 	spin_lock_irqsave(&cache->lock, flags);
994 
995 	for (i = 0; i < num; i++)
996 		cache_enq(cache, page[i]);
997 	cache->num_pages += num;
998 
999 	spin_unlock_irqrestore(&cache->lock, flags);
1000 }
1001 EXPORT_SYMBOL_GPL(gnttab_page_cache_put);
1002 
1003 void gnttab_page_cache_shrink(struct gnttab_page_cache *cache, unsigned int num)
1004 {
1005 	struct page *page[10];
1006 	unsigned int i = 0;
1007 	unsigned long flags;
1008 
1009 	spin_lock_irqsave(&cache->lock, flags);
1010 
1011 	while (cache->num_pages > num) {
1012 		page[i] = cache_deq(cache);
1013 		cache->num_pages--;
1014 		if (++i == ARRAY_SIZE(page)) {
1015 			spin_unlock_irqrestore(&cache->lock, flags);
1016 			gnttab_free_pages(i, page);
1017 			i = 0;
1018 			spin_lock_irqsave(&cache->lock, flags);
1019 		}
1020 	}
1021 
1022 	spin_unlock_irqrestore(&cache->lock, flags);
1023 
1024 	if (i != 0)
1025 		gnttab_free_pages(i, page);
1026 }
1027 EXPORT_SYMBOL_GPL(gnttab_page_cache_shrink);
1028 
1029 void gnttab_pages_clear_private(int nr_pages, struct page **pages)
1030 {
1031 	int i;
1032 
1033 	for (i = 0; i < nr_pages; i++) {
1034 		if (PagePrivate(pages[i])) {
1035 #if BITS_PER_LONG < 64
1036 			kfree((void *)page_private(pages[i]));
1037 #endif
1038 			ClearPagePrivate(pages[i]);
1039 		}
1040 	}
1041 }
1042 EXPORT_SYMBOL_GPL(gnttab_pages_clear_private);
1043 
1044 /**
1045  * gnttab_free_pages - free pages allocated by gnttab_alloc_pages()
1046  * @nr_pages: number of pages to free
1047  * @pages: the pages
1048  */
1049 void gnttab_free_pages(int nr_pages, struct page **pages)
1050 {
1051 	gnttab_pages_clear_private(nr_pages, pages);
1052 	xen_free_unpopulated_pages(nr_pages, pages);
1053 }
1054 EXPORT_SYMBOL_GPL(gnttab_free_pages);
1055 
1056 #ifdef CONFIG_XEN_GRANT_DMA_ALLOC
1057 /**
1058  * gnttab_dma_alloc_pages - alloc DMAable pages suitable for grant mapping into
1059  * @args: arguments to the function
1060  */
1061 int gnttab_dma_alloc_pages(struct gnttab_dma_alloc_args *args)
1062 {
1063 	unsigned long pfn, start_pfn;
1064 	size_t size;
1065 	int i, ret;
1066 
1067 	if (args->nr_pages < 0 || args->nr_pages > (INT_MAX >> PAGE_SHIFT))
1068 		return -ENOMEM;
1069 
1070 	size = args->nr_pages << PAGE_SHIFT;
1071 	if (args->coherent)
1072 		args->vaddr = dma_alloc_coherent(args->dev, size,
1073 						 &args->dev_bus_addr,
1074 						 GFP_KERNEL | __GFP_NOWARN);
1075 	else
1076 		args->vaddr = dma_alloc_wc(args->dev, size,
1077 					   &args->dev_bus_addr,
1078 					   GFP_KERNEL | __GFP_NOWARN);
1079 	if (!args->vaddr) {
1080 		pr_debug("Failed to allocate DMA buffer of size %zu\n", size);
1081 		return -ENOMEM;
1082 	}
1083 
1084 	start_pfn = __phys_to_pfn(args->dev_bus_addr);
1085 	for (pfn = start_pfn, i = 0; pfn < start_pfn + args->nr_pages;
1086 			pfn++, i++) {
1087 		struct page *page = pfn_to_page(pfn);
1088 
1089 		args->pages[i] = page;
1090 		args->frames[i] = xen_page_to_gfn(page);
1091 		xenmem_reservation_scrub_page(page);
1092 	}
1093 
1094 	xenmem_reservation_va_mapping_reset(args->nr_pages, args->pages);
1095 
1096 	ret = xenmem_reservation_decrease(args->nr_pages, args->frames);
1097 	if (ret != args->nr_pages) {
1098 		pr_debug("Failed to decrease reservation for DMA buffer\n");
1099 		ret = -EFAULT;
1100 		goto fail;
1101 	}
1102 
1103 	ret = gnttab_pages_set_private(args->nr_pages, args->pages);
1104 	if (ret < 0)
1105 		goto fail;
1106 
1107 	return 0;
1108 
1109 fail:
1110 	gnttab_dma_free_pages(args);
1111 	return ret;
1112 }
1113 EXPORT_SYMBOL_GPL(gnttab_dma_alloc_pages);
1114 
1115 /**
1116  * gnttab_dma_free_pages - free DMAable pages
1117  * @args: arguments to the function
1118  */
1119 int gnttab_dma_free_pages(struct gnttab_dma_alloc_args *args)
1120 {
1121 	size_t size;
1122 	int i, ret;
1123 
1124 	gnttab_pages_clear_private(args->nr_pages, args->pages);
1125 
1126 	for (i = 0; i < args->nr_pages; i++)
1127 		args->frames[i] = page_to_xen_pfn(args->pages[i]);
1128 
1129 	ret = xenmem_reservation_increase(args->nr_pages, args->frames);
1130 	if (ret != args->nr_pages) {
1131 		pr_debug("Failed to increase reservation for DMA buffer\n");
1132 		ret = -EFAULT;
1133 	} else {
1134 		ret = 0;
1135 	}
1136 
1137 	xenmem_reservation_va_mapping_update(args->nr_pages, args->pages,
1138 					     args->frames);
1139 
1140 	size = args->nr_pages << PAGE_SHIFT;
1141 	if (args->coherent)
1142 		dma_free_coherent(args->dev, size,
1143 				  args->vaddr, args->dev_bus_addr);
1144 	else
1145 		dma_free_wc(args->dev, size,
1146 			    args->vaddr, args->dev_bus_addr);
1147 	return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(gnttab_dma_free_pages);
1150 #endif
1151 
1152 /* Handling of paged out grant targets (GNTST_eagain) */
1153 #define MAX_DELAY 256
1154 static inline void
1155 gnttab_retry_eagain_gop(unsigned int cmd, void *gop, int16_t *status,
1156 						const char *func)
1157 {
1158 	unsigned delay = 1;
1159 
1160 	do {
1161 		BUG_ON(HYPERVISOR_grant_table_op(cmd, gop, 1));
1162 		if (*status == GNTST_eagain)
1163 			msleep(delay++);
1164 	} while ((*status == GNTST_eagain) && (delay < MAX_DELAY));
1165 
1166 	if (delay >= MAX_DELAY) {
1167 		pr_err("%s: %s eagain grant\n", func, current->comm);
1168 		*status = GNTST_bad_page;
1169 	}
1170 }
1171 
1172 void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count)
1173 {
1174 	struct gnttab_map_grant_ref *op;
1175 
1176 	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, batch, count))
1177 		BUG();
1178 	for (op = batch; op < batch + count; op++)
1179 		if (op->status == GNTST_eagain)
1180 			gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, op,
1181 						&op->status, __func__);
1182 }
1183 EXPORT_SYMBOL_GPL(gnttab_batch_map);
1184 
1185 void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count)
1186 {
1187 	struct gnttab_copy *op;
1188 
1189 	if (HYPERVISOR_grant_table_op(GNTTABOP_copy, batch, count))
1190 		BUG();
1191 	for (op = batch; op < batch + count; op++)
1192 		if (op->status == GNTST_eagain)
1193 			gnttab_retry_eagain_gop(GNTTABOP_copy, op,
1194 						&op->status, __func__);
1195 }
1196 EXPORT_SYMBOL_GPL(gnttab_batch_copy);
1197 
1198 void gnttab_foreach_grant_in_range(struct page *page,
1199 				   unsigned int offset,
1200 				   unsigned int len,
1201 				   xen_grant_fn_t fn,
1202 				   void *data)
1203 {
1204 	unsigned int goffset;
1205 	unsigned int glen;
1206 	unsigned long xen_pfn;
1207 
1208 	len = min(PAGE_SIZE - offset, len);
1209 	goffset = xen_offset_in_page(offset);
1210 
1211 	xen_pfn = page_to_xen_pfn(page) + XEN_PFN_DOWN(offset);
1212 
1213 	while (len) {
1214 		glen = min_t(unsigned int, XEN_PAGE_SIZE - goffset, len);
1215 		fn(pfn_to_gfn(xen_pfn), goffset, glen, data);
1216 
1217 		goffset = 0;
1218 		xen_pfn++;
1219 		len -= glen;
1220 	}
1221 }
1222 EXPORT_SYMBOL_GPL(gnttab_foreach_grant_in_range);
1223 
1224 void gnttab_foreach_grant(struct page **pages,
1225 			  unsigned int nr_grefs,
1226 			  xen_grant_fn_t fn,
1227 			  void *data)
1228 {
1229 	unsigned int goffset = 0;
1230 	unsigned long xen_pfn = 0;
1231 	unsigned int i;
1232 
1233 	for (i = 0; i < nr_grefs; i++) {
1234 		if ((i % XEN_PFN_PER_PAGE) == 0) {
1235 			xen_pfn = page_to_xen_pfn(pages[i / XEN_PFN_PER_PAGE]);
1236 			goffset = 0;
1237 		}
1238 
1239 		fn(pfn_to_gfn(xen_pfn), goffset, XEN_PAGE_SIZE, data);
1240 
1241 		goffset += XEN_PAGE_SIZE;
1242 		xen_pfn++;
1243 	}
1244 }
1245 
1246 int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
1247 		    struct gnttab_map_grant_ref *kmap_ops,
1248 		    struct page **pages, unsigned int count)
1249 {
1250 	int i, ret;
1251 
1252 	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count);
1253 	if (ret)
1254 		return ret;
1255 
1256 	for (i = 0; i < count; i++) {
1257 		switch (map_ops[i].status) {
1258 		case GNTST_okay:
1259 		{
1260 			struct xen_page_foreign *foreign;
1261 
1262 			SetPageForeign(pages[i]);
1263 			foreign = xen_page_foreign(pages[i]);
1264 			foreign->domid = map_ops[i].dom;
1265 			foreign->gref = map_ops[i].ref;
1266 			break;
1267 		}
1268 
1269 		case GNTST_no_device_space:
1270 			pr_warn_ratelimited("maptrack limit reached, can't map all guest pages\n");
1271 			break;
1272 
1273 		case GNTST_eagain:
1274 			/* Retry eagain maps */
1275 			gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref,
1276 						map_ops + i,
1277 						&map_ops[i].status, __func__);
1278 			/* Test status in next loop iteration. */
1279 			i--;
1280 			break;
1281 
1282 		default:
1283 			break;
1284 		}
1285 	}
1286 
1287 	return set_foreign_p2m_mapping(map_ops, kmap_ops, pages, count);
1288 }
1289 EXPORT_SYMBOL_GPL(gnttab_map_refs);
1290 
1291 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
1292 		      struct gnttab_unmap_grant_ref *kunmap_ops,
1293 		      struct page **pages, unsigned int count)
1294 {
1295 	unsigned int i;
1296 	int ret;
1297 
1298 	ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count);
1299 	if (ret)
1300 		return ret;
1301 
1302 	for (i = 0; i < count; i++)
1303 		ClearPageForeign(pages[i]);
1304 
1305 	return clear_foreign_p2m_mapping(unmap_ops, kunmap_ops, pages, count);
1306 }
1307 EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
1308 
1309 #define GNTTAB_UNMAP_REFS_DELAY 5
1310 
1311 static void __gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item);
1312 
1313 static void gnttab_unmap_work(struct work_struct *work)
1314 {
1315 	struct gntab_unmap_queue_data
1316 		*unmap_data = container_of(work,
1317 					   struct gntab_unmap_queue_data,
1318 					   gnttab_work.work);
1319 	if (unmap_data->age != UINT_MAX)
1320 		unmap_data->age++;
1321 	__gnttab_unmap_refs_async(unmap_data);
1322 }
1323 
1324 static void __gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item)
1325 {
1326 	int ret;
1327 	int pc;
1328 
1329 	for (pc = 0; pc < item->count; pc++) {
1330 		if (page_count(item->pages[pc]) > 1) {
1331 			unsigned long delay = GNTTAB_UNMAP_REFS_DELAY * (item->age + 1);
1332 			schedule_delayed_work(&item->gnttab_work,
1333 					      msecs_to_jiffies(delay));
1334 			return;
1335 		}
1336 	}
1337 
1338 	ret = gnttab_unmap_refs(item->unmap_ops, item->kunmap_ops,
1339 				item->pages, item->count);
1340 	item->done(ret, item);
1341 }
1342 
1343 void gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item)
1344 {
1345 	INIT_DELAYED_WORK(&item->gnttab_work, gnttab_unmap_work);
1346 	item->age = 0;
1347 
1348 	__gnttab_unmap_refs_async(item);
1349 }
1350 EXPORT_SYMBOL_GPL(gnttab_unmap_refs_async);
1351 
1352 static void unmap_refs_callback(int result,
1353 		struct gntab_unmap_queue_data *data)
1354 {
1355 	struct unmap_refs_callback_data *d = data->data;
1356 
1357 	d->result = result;
1358 	complete(&d->completion);
1359 }
1360 
1361 int gnttab_unmap_refs_sync(struct gntab_unmap_queue_data *item)
1362 {
1363 	struct unmap_refs_callback_data data;
1364 
1365 	init_completion(&data.completion);
1366 	item->data = &data;
1367 	item->done = &unmap_refs_callback;
1368 	gnttab_unmap_refs_async(item);
1369 	wait_for_completion(&data.completion);
1370 
1371 	return data.result;
1372 }
1373 EXPORT_SYMBOL_GPL(gnttab_unmap_refs_sync);
1374 
1375 static unsigned int nr_status_frames(unsigned int nr_grant_frames)
1376 {
1377 	return gnttab_frames(nr_grant_frames, SPP);
1378 }
1379 
1380 static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes)
1381 {
1382 	int rc;
1383 
1384 	rc = arch_gnttab_map_shared(frames, nr_gframes,
1385 				    gnttab_max_grant_frames(),
1386 				    &gnttab_shared.addr);
1387 	BUG_ON(rc);
1388 
1389 	return 0;
1390 }
1391 
1392 static void gnttab_unmap_frames_v1(void)
1393 {
1394 	arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
1395 }
1396 
1397 static int gnttab_map_frames_v2(xen_pfn_t *frames, unsigned int nr_gframes)
1398 {
1399 	uint64_t *sframes;
1400 	unsigned int nr_sframes;
1401 	struct gnttab_get_status_frames getframes;
1402 	int rc;
1403 
1404 	nr_sframes = nr_status_frames(nr_gframes);
1405 
1406 	/* No need for kzalloc as it is initialized in following hypercall
1407 	 * GNTTABOP_get_status_frames.
1408 	 */
1409 	sframes = kmalloc_array(nr_sframes, sizeof(uint64_t), GFP_ATOMIC);
1410 	if (!sframes)
1411 		return -ENOMEM;
1412 
1413 	getframes.dom        = DOMID_SELF;
1414 	getframes.nr_frames  = nr_sframes;
1415 	set_xen_guest_handle(getframes.frame_list, sframes);
1416 
1417 	rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
1418 				       &getframes, 1);
1419 	if (rc == -ENOSYS) {
1420 		kfree(sframes);
1421 		return -ENOSYS;
1422 	}
1423 
1424 	BUG_ON(rc || getframes.status);
1425 
1426 	rc = arch_gnttab_map_status(sframes, nr_sframes,
1427 				    nr_status_frames(gnttab_max_grant_frames()),
1428 				    &grstatus);
1429 	BUG_ON(rc);
1430 	kfree(sframes);
1431 
1432 	rc = arch_gnttab_map_shared(frames, nr_gframes,
1433 				    gnttab_max_grant_frames(),
1434 				    &gnttab_shared.addr);
1435 	BUG_ON(rc);
1436 
1437 	return 0;
1438 }
1439 
1440 static void gnttab_unmap_frames_v2(void)
1441 {
1442 	arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
1443 	arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames));
1444 }
1445 
1446 static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
1447 {
1448 	struct gnttab_setup_table setup;
1449 	xen_pfn_t *frames;
1450 	unsigned int nr_gframes = end_idx + 1;
1451 	int rc;
1452 
1453 	if (!xen_pv_domain()) {
1454 		struct xen_add_to_physmap xatp;
1455 		unsigned int i = end_idx;
1456 		rc = 0;
1457 		BUG_ON(xen_auto_xlat_grant_frames.count < nr_gframes);
1458 		/*
1459 		 * Loop backwards, so that the first hypercall has the largest
1460 		 * index, ensuring that the table will grow only once.
1461 		 */
1462 		do {
1463 			xatp.domid = DOMID_SELF;
1464 			xatp.idx = i;
1465 			xatp.space = XENMAPSPACE_grant_table;
1466 			xatp.gpfn = xen_auto_xlat_grant_frames.pfn[i];
1467 			rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
1468 			if (rc != 0) {
1469 				pr_warn("grant table add_to_physmap failed, err=%d\n",
1470 					rc);
1471 				break;
1472 			}
1473 		} while (i-- > start_idx);
1474 
1475 		return rc;
1476 	}
1477 
1478 	/* No need for kzalloc as it is initialized in following hypercall
1479 	 * GNTTABOP_setup_table.
1480 	 */
1481 	frames = kmalloc_array(nr_gframes, sizeof(unsigned long), GFP_ATOMIC);
1482 	if (!frames)
1483 		return -ENOMEM;
1484 
1485 	setup.dom        = DOMID_SELF;
1486 	setup.nr_frames  = nr_gframes;
1487 	set_xen_guest_handle(setup.frame_list, frames);
1488 
1489 	rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
1490 	if (rc == -ENOSYS) {
1491 		kfree(frames);
1492 		return -ENOSYS;
1493 	}
1494 
1495 	BUG_ON(rc || setup.status);
1496 
1497 	rc = gnttab_interface->map_frames(frames, nr_gframes);
1498 
1499 	kfree(frames);
1500 
1501 	return rc;
1502 }
1503 
1504 static const struct gnttab_ops gnttab_v1_ops = {
1505 	.version			= 1,
1506 	.grefs_per_grant_frame		= XEN_PAGE_SIZE /
1507 					  sizeof(struct grant_entry_v1),
1508 	.map_frames			= gnttab_map_frames_v1,
1509 	.unmap_frames			= gnttab_unmap_frames_v1,
1510 	.update_entry			= gnttab_update_entry_v1,
1511 	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v1,
1512 	.read_frame			= gnttab_read_frame_v1,
1513 };
1514 
1515 static const struct gnttab_ops gnttab_v2_ops = {
1516 	.version			= 2,
1517 	.grefs_per_grant_frame		= XEN_PAGE_SIZE /
1518 					  sizeof(union grant_entry_v2),
1519 	.map_frames			= gnttab_map_frames_v2,
1520 	.unmap_frames			= gnttab_unmap_frames_v2,
1521 	.update_entry			= gnttab_update_entry_v2,
1522 	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v2,
1523 	.read_frame			= gnttab_read_frame_v2,
1524 };
1525 
1526 static bool gnttab_need_v2(void)
1527 {
1528 #ifdef CONFIG_X86
1529 	uint32_t base, width;
1530 
1531 	if (xen_pv_domain()) {
1532 		base = xen_cpuid_base();
1533 		if (cpuid_eax(base) < 5)
1534 			return false;	/* Information not available, use V1. */
1535 		width = cpuid_ebx(base + 5) &
1536 			XEN_CPUID_MACHINE_ADDRESS_WIDTH_MASK;
1537 		return width > 32 + PAGE_SHIFT;
1538 	}
1539 #endif
1540 	return !!(max_possible_pfn >> 32);
1541 }
1542 
1543 static void gnttab_request_version(void)
1544 {
1545 	long rc;
1546 	struct gnttab_set_version gsv;
1547 
1548 	if (gnttab_need_v2())
1549 		gsv.version = 2;
1550 	else
1551 		gsv.version = 1;
1552 
1553 	/* Boot parameter overrides automatic selection. */
1554 	if (xen_gnttab_version >= 1 && xen_gnttab_version <= 2)
1555 		gsv.version = xen_gnttab_version;
1556 
1557 	rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
1558 	if (rc == 0 && gsv.version == 2)
1559 		gnttab_interface = &gnttab_v2_ops;
1560 	else
1561 		gnttab_interface = &gnttab_v1_ops;
1562 	pr_info("Grant tables using version %d layout\n",
1563 		gnttab_interface->version);
1564 }
1565 
1566 static int gnttab_setup(void)
1567 {
1568 	unsigned int max_nr_gframes;
1569 
1570 	max_nr_gframes = gnttab_max_grant_frames();
1571 	if (max_nr_gframes < nr_grant_frames)
1572 		return -ENOSYS;
1573 
1574 	if (!xen_pv_domain() && gnttab_shared.addr == NULL) {
1575 		gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr;
1576 		if (gnttab_shared.addr == NULL) {
1577 			pr_warn("gnttab share frames is not mapped!\n");
1578 			return -ENOMEM;
1579 		}
1580 	}
1581 	return gnttab_map(0, nr_grant_frames - 1);
1582 }
1583 #ifdef CONFIG_HIBERNATE_CALLBACKS
1584 int gnttab_resume(void)
1585 {
1586 	gnttab_request_version();
1587 	return gnttab_setup();
1588 }
1589 
1590 int gnttab_suspend(void)
1591 {
1592 	if (xen_pv_domain())
1593 		gnttab_interface->unmap_frames();
1594 	return 0;
1595 }
1596 #endif
1597 
1598 static int gnttab_expand(unsigned int req_entries)
1599 {
1600 	int rc;
1601 	unsigned int cur, extra;
1602 
1603 	cur = nr_grant_frames;
1604 	extra = ((req_entries + gnttab_interface->grefs_per_grant_frame - 1) /
1605 		 gnttab_interface->grefs_per_grant_frame);
1606 	if (cur + extra > gnttab_max_grant_frames()) {
1607 		pr_warn_ratelimited("xen/grant-table: max_grant_frames reached"
1608 				    " cur=%u extra=%u limit=%u"
1609 				    " gnttab_free_count=%u req_entries=%u\n",
1610 				    cur, extra, gnttab_max_grant_frames(),
1611 				    gnttab_free_count, req_entries);
1612 		return -ENOSPC;
1613 	}
1614 
1615 	rc = gnttab_map(cur, cur + extra - 1);
1616 	if (rc == 0)
1617 		rc = grow_gnttab_list(extra);
1618 
1619 	return rc;
1620 }
1621 
1622 int gnttab_init(void)
1623 {
1624 	int i;
1625 	unsigned long max_nr_grant_frames, max_nr_grefs;
1626 	unsigned int max_nr_glist_frames, nr_glist_frames;
1627 	int ret;
1628 
1629 	gnttab_request_version();
1630 	max_nr_grant_frames = gnttab_max_grant_frames();
1631 	max_nr_grefs = max_nr_grant_frames *
1632 			gnttab_interface->grefs_per_grant_frame;
1633 	nr_grant_frames = 1;
1634 
1635 	/* Determine the maximum number of frames required for the
1636 	 * grant reference free list on the current hypervisor.
1637 	 */
1638 	max_nr_glist_frames = max_nr_grefs / RPP;
1639 
1640 	gnttab_list = kmalloc_objs(grant_ref_t *, max_nr_glist_frames);
1641 	if (gnttab_list == NULL)
1642 		return -ENOMEM;
1643 
1644 	nr_glist_frames = gnttab_frames(nr_grant_frames, RPP);
1645 	for (i = 0; i < nr_glist_frames; i++) {
1646 		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
1647 		if (gnttab_list[i] == NULL) {
1648 			ret = -ENOMEM;
1649 			goto ini_nomem;
1650 		}
1651 	}
1652 
1653 	gnttab_free_bitmap = bitmap_zalloc(max_nr_grefs, GFP_KERNEL);
1654 	if (!gnttab_free_bitmap) {
1655 		ret = -ENOMEM;
1656 		goto ini_nomem;
1657 	}
1658 
1659 	ret = arch_gnttab_init(max_nr_grant_frames,
1660 			       nr_status_frames(max_nr_grant_frames));
1661 	if (ret < 0)
1662 		goto ini_nomem;
1663 
1664 	if (gnttab_setup() < 0) {
1665 		ret = -ENODEV;
1666 		goto ini_nomem;
1667 	}
1668 
1669 	gnttab_size = nr_grant_frames * gnttab_interface->grefs_per_grant_frame;
1670 
1671 	gnttab_set_free(GNTTAB_NR_RESERVED_ENTRIES,
1672 			gnttab_size - GNTTAB_NR_RESERVED_ENTRIES);
1673 
1674 	printk("Grant table initialized\n");
1675 	return 0;
1676 
1677  ini_nomem:
1678 	for (i--; i >= 0; i--)
1679 		free_page((unsigned long)gnttab_list[i]);
1680 	kfree(gnttab_list);
1681 	bitmap_free(gnttab_free_bitmap);
1682 	return ret;
1683 }
1684 EXPORT_SYMBOL_GPL(gnttab_init);
1685 
1686 static int __gnttab_init(void)
1687 {
1688 	if (!xen_domain())
1689 		return -ENODEV;
1690 
1691 	/* Delay grant-table initialization in the PV on HVM case */
1692 	if (xen_hvm_domain() && !xen_pvh_domain())
1693 		return 0;
1694 
1695 	return gnttab_init();
1696 }
1697 /* Starts after core_initcall so that xen_pvh_gnttab_setup can be called
1698  * beforehand to initialize xen_auto_xlat_grant_frames. */
1699 core_initcall_sync(__gnttab_init);
1700