xref: /linux/drivers/xen/grant-table.c (revision f49f4ab95c301dbccad0efe85296d908b8ae7ad4)
1 /******************************************************************************
2  * grant_table.c
3  *
4  * Granting foreign access to our memory reservation.
5  *
6  * Copyright (c) 2005-2006, Christopher Clark
7  * Copyright (c) 2004-2005, K A Fraser
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License version 2
11  * as published by the Free Software Foundation; or, when distributed
12  * separately from the Linux kernel or incorporated into other
13  * software packages, subject to the following license:
14  *
15  * Permission is hereby granted, free of charge, to any person obtaining a copy
16  * of this source file (the "Software"), to deal in the Software without
17  * restriction, including without limitation the rights to use, copy, modify,
18  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19  * and to permit persons to whom the Software is furnished to do so, subject to
20  * the following conditions:
21  *
22  * The above copyright notice and this permission notice shall be included in
23  * all copies or substantial portions of the Software.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31  * IN THE SOFTWARE.
32  */
33 
34 #include <linux/module.h>
35 #include <linux/sched.h>
36 #include <linux/mm.h>
37 #include <linux/slab.h>
38 #include <linux/vmalloc.h>
39 #include <linux/uaccess.h>
40 #include <linux/io.h>
41 #include <linux/delay.h>
42 #include <linux/hardirq.h>
43 
44 #include <xen/xen.h>
45 #include <xen/interface/xen.h>
46 #include <xen/page.h>
47 #include <xen/grant_table.h>
48 #include <xen/interface/memory.h>
49 #include <xen/hvc-console.h>
50 #include <asm/xen/hypercall.h>
51 #include <asm/xen/interface.h>
52 
53 #include <asm/pgtable.h>
54 #include <asm/sync_bitops.h>
55 
56 /* External tools reserve first few grant table entries. */
57 #define NR_RESERVED_ENTRIES 8
58 #define GNTTAB_LIST_END 0xffffffff
59 #define GREFS_PER_GRANT_FRAME \
60 (grant_table_version == 1 ?                      \
61 (PAGE_SIZE / sizeof(struct grant_entry_v1)) :   \
62 (PAGE_SIZE / sizeof(union grant_entry_v2)))
63 
64 static grant_ref_t **gnttab_list;
65 static unsigned int nr_grant_frames;
66 static unsigned int boot_max_nr_grant_frames;
67 static int gnttab_free_count;
68 static grant_ref_t gnttab_free_head;
69 static DEFINE_SPINLOCK(gnttab_list_lock);
70 unsigned long xen_hvm_resume_frames;
71 EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
72 
73 static union {
74 	struct grant_entry_v1 *v1;
75 	union grant_entry_v2 *v2;
76 	void *addr;
77 } gnttab_shared;
78 
79 /*This is a structure of function pointers for grant table*/
80 struct gnttab_ops {
81 	/*
82 	 * Mapping a list of frames for storing grant entries. Frames parameter
83 	 * is used to store grant table address when grant table being setup,
84 	 * nr_gframes is the number of frames to map grant table. Returning
85 	 * GNTST_okay means success and negative value means failure.
86 	 */
87 	int (*map_frames)(xen_pfn_t *frames, unsigned int nr_gframes);
88 	/*
89 	 * Release a list of frames which are mapped in map_frames for grant
90 	 * entry status.
91 	 */
92 	void (*unmap_frames)(void);
93 	/*
94 	 * Introducing a valid entry into the grant table, granting the frame of
95 	 * this grant entry to domain for accessing or transfering. Ref
96 	 * parameter is reference of this introduced grant entry, domid is id of
97 	 * granted domain, frame is the page frame to be granted, and flags is
98 	 * status of the grant entry to be updated.
99 	 */
100 	void (*update_entry)(grant_ref_t ref, domid_t domid,
101 			     unsigned long frame, unsigned flags);
102 	/*
103 	 * Stop granting a grant entry to domain for accessing. Ref parameter is
104 	 * reference of a grant entry whose grant access will be stopped,
105 	 * readonly is not in use in this function. If the grant entry is
106 	 * currently mapped for reading or writing, just return failure(==0)
107 	 * directly and don't tear down the grant access. Otherwise, stop grant
108 	 * access for this entry and return success(==1).
109 	 */
110 	int (*end_foreign_access_ref)(grant_ref_t ref, int readonly);
111 	/*
112 	 * Stop granting a grant entry to domain for transfer. Ref parameter is
113 	 * reference of a grant entry whose grant transfer will be stopped. If
114 	 * tranfer has not started, just reclaim the grant entry and return
115 	 * failure(==0). Otherwise, wait for the transfer to complete and then
116 	 * return the frame.
117 	 */
118 	unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref);
119 	/*
120 	 * Query the status of a grant entry. Ref parameter is reference of
121 	 * queried grant entry, return value is the status of queried entry.
122 	 * Detailed status(writing/reading) can be gotten from the return value
123 	 * by bit operations.
124 	 */
125 	int (*query_foreign_access)(grant_ref_t ref);
126 	/*
127 	 * Grant a domain to access a range of bytes within the page referred by
128 	 * an available grant entry. Ref parameter is reference of a grant entry
129 	 * which will be sub-page accessed, domid is id of grantee domain, frame
130 	 * is frame address of subpage grant, flags is grant type and flag
131 	 * information, page_off is offset of the range of bytes, and length is
132 	 * length of bytes to be accessed.
133 	 */
134 	void (*update_subpage_entry)(grant_ref_t ref, domid_t domid,
135 				     unsigned long frame, int flags,
136 				     unsigned page_off, unsigned length);
137 	/*
138 	 * Redirect an available grant entry on domain A to another grant
139 	 * reference of domain B, then allow domain C to use grant reference
140 	 * of domain B transitively. Ref parameter is an available grant entry
141 	 * reference on domain A, domid is id of domain C which accesses grant
142 	 * entry transitively, flags is grant type and flag information,
143 	 * trans_domid is id of domain B whose grant entry is finally accessed
144 	 * transitively, trans_gref is grant entry transitive reference of
145 	 * domain B.
146 	 */
147 	void (*update_trans_entry)(grant_ref_t ref, domid_t domid, int flags,
148 				   domid_t trans_domid, grant_ref_t trans_gref);
149 };
150 
151 static struct gnttab_ops *gnttab_interface;
152 
153 /*This reflects status of grant entries, so act as a global value*/
154 static grant_status_t *grstatus;
155 
156 static int grant_table_version;
157 
158 static struct gnttab_free_callback *gnttab_free_callback_list;
159 
160 static int gnttab_expand(unsigned int req_entries);
161 
162 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
163 #define SPP (PAGE_SIZE / sizeof(grant_status_t))
164 
165 static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
166 {
167 	return &gnttab_list[(entry) / RPP][(entry) % RPP];
168 }
169 /* This can be used as an l-value */
170 #define gnttab_entry(entry) (*__gnttab_entry(entry))
171 
172 static int get_free_entries(unsigned count)
173 {
174 	unsigned long flags;
175 	int ref, rc = 0;
176 	grant_ref_t head;
177 
178 	spin_lock_irqsave(&gnttab_list_lock, flags);
179 
180 	if ((gnttab_free_count < count) &&
181 	    ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
182 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
183 		return rc;
184 	}
185 
186 	ref = head = gnttab_free_head;
187 	gnttab_free_count -= count;
188 	while (count-- > 1)
189 		head = gnttab_entry(head);
190 	gnttab_free_head = gnttab_entry(head);
191 	gnttab_entry(head) = GNTTAB_LIST_END;
192 
193 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
194 
195 	return ref;
196 }
197 
198 static void do_free_callbacks(void)
199 {
200 	struct gnttab_free_callback *callback, *next;
201 
202 	callback = gnttab_free_callback_list;
203 	gnttab_free_callback_list = NULL;
204 
205 	while (callback != NULL) {
206 		next = callback->next;
207 		if (gnttab_free_count >= callback->count) {
208 			callback->next = NULL;
209 			callback->fn(callback->arg);
210 		} else {
211 			callback->next = gnttab_free_callback_list;
212 			gnttab_free_callback_list = callback;
213 		}
214 		callback = next;
215 	}
216 }
217 
218 static inline void check_free_callbacks(void)
219 {
220 	if (unlikely(gnttab_free_callback_list))
221 		do_free_callbacks();
222 }
223 
224 static void put_free_entry(grant_ref_t ref)
225 {
226 	unsigned long flags;
227 	spin_lock_irqsave(&gnttab_list_lock, flags);
228 	gnttab_entry(ref) = gnttab_free_head;
229 	gnttab_free_head = ref;
230 	gnttab_free_count++;
231 	check_free_callbacks();
232 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
233 }
234 
235 /*
236  * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
237  * Introducing a valid entry into the grant table:
238  *  1. Write ent->domid.
239  *  2. Write ent->frame:
240  *      GTF_permit_access:   Frame to which access is permitted.
241  *      GTF_accept_transfer: Pseudo-phys frame slot being filled by new
242  *                           frame, or zero if none.
243  *  3. Write memory barrier (WMB).
244  *  4. Write ent->flags, inc. valid type.
245  */
246 static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid,
247 				   unsigned long frame, unsigned flags)
248 {
249 	gnttab_shared.v1[ref].domid = domid;
250 	gnttab_shared.v1[ref].frame = frame;
251 	wmb();
252 	gnttab_shared.v1[ref].flags = flags;
253 }
254 
255 static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid,
256 				   unsigned long frame, unsigned flags)
257 {
258 	gnttab_shared.v2[ref].hdr.domid = domid;
259 	gnttab_shared.v2[ref].full_page.frame = frame;
260 	wmb();
261 	gnttab_shared.v2[ref].hdr.flags = GTF_permit_access | flags;
262 }
263 
264 /*
265  * Public grant-issuing interface functions
266  */
267 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
268 				     unsigned long frame, int readonly)
269 {
270 	gnttab_interface->update_entry(ref, domid, frame,
271 			   GTF_permit_access | (readonly ? GTF_readonly : 0));
272 }
273 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
274 
275 int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
276 				int readonly)
277 {
278 	int ref;
279 
280 	ref = get_free_entries(1);
281 	if (unlikely(ref < 0))
282 		return -ENOSPC;
283 
284 	gnttab_grant_foreign_access_ref(ref, domid, frame, readonly);
285 
286 	return ref;
287 }
288 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
289 
290 static void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid,
291 					   unsigned long frame, int flags,
292 					   unsigned page_off, unsigned length)
293 {
294 	gnttab_shared.v2[ref].sub_page.frame = frame;
295 	gnttab_shared.v2[ref].sub_page.page_off = page_off;
296 	gnttab_shared.v2[ref].sub_page.length = length;
297 	gnttab_shared.v2[ref].hdr.domid = domid;
298 	wmb();
299 	gnttab_shared.v2[ref].hdr.flags =
300 				GTF_permit_access | GTF_sub_page | flags;
301 }
302 
303 int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid,
304 					    unsigned long frame, int flags,
305 					    unsigned page_off,
306 					    unsigned length)
307 {
308 	if (flags & (GTF_accept_transfer | GTF_reading |
309 		     GTF_writing | GTF_transitive))
310 		return -EPERM;
311 
312 	if (gnttab_interface->update_subpage_entry == NULL)
313 		return -ENOSYS;
314 
315 	gnttab_interface->update_subpage_entry(ref, domid, frame, flags,
316 					       page_off, length);
317 
318 	return 0;
319 }
320 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage_ref);
321 
322 int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame,
323 					int flags, unsigned page_off,
324 					unsigned length)
325 {
326 	int ref, rc;
327 
328 	ref = get_free_entries(1);
329 	if (unlikely(ref < 0))
330 		return -ENOSPC;
331 
332 	rc = gnttab_grant_foreign_access_subpage_ref(ref, domid, frame, flags,
333 						     page_off, length);
334 	if (rc < 0) {
335 		put_free_entry(ref);
336 		return rc;
337 	}
338 
339 	return ref;
340 }
341 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage);
342 
343 bool gnttab_subpage_grants_available(void)
344 {
345 	return gnttab_interface->update_subpage_entry != NULL;
346 }
347 EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available);
348 
349 static void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid,
350 					 int flags, domid_t trans_domid,
351 					 grant_ref_t trans_gref)
352 {
353 	gnttab_shared.v2[ref].transitive.trans_domid = trans_domid;
354 	gnttab_shared.v2[ref].transitive.gref = trans_gref;
355 	gnttab_shared.v2[ref].hdr.domid = domid;
356 	wmb();
357 	gnttab_shared.v2[ref].hdr.flags =
358 				GTF_permit_access | GTF_transitive | flags;
359 }
360 
361 int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid,
362 					  int flags, domid_t trans_domid,
363 					  grant_ref_t trans_gref)
364 {
365 	if (flags & (GTF_accept_transfer | GTF_reading |
366 		     GTF_writing | GTF_sub_page))
367 		return -EPERM;
368 
369 	if (gnttab_interface->update_trans_entry == NULL)
370 		return -ENOSYS;
371 
372 	gnttab_interface->update_trans_entry(ref, domid, flags, trans_domid,
373 					     trans_gref);
374 
375 	return 0;
376 }
377 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans_ref);
378 
379 int gnttab_grant_foreign_access_trans(domid_t domid, int flags,
380 				      domid_t trans_domid,
381 				      grant_ref_t trans_gref)
382 {
383 	int ref, rc;
384 
385 	ref = get_free_entries(1);
386 	if (unlikely(ref < 0))
387 		return -ENOSPC;
388 
389 	rc = gnttab_grant_foreign_access_trans_ref(ref, domid, flags,
390 						   trans_domid, trans_gref);
391 	if (rc < 0) {
392 		put_free_entry(ref);
393 		return rc;
394 	}
395 
396 	return ref;
397 }
398 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans);
399 
400 bool gnttab_trans_grants_available(void)
401 {
402 	return gnttab_interface->update_trans_entry != NULL;
403 }
404 EXPORT_SYMBOL_GPL(gnttab_trans_grants_available);
405 
406 static int gnttab_query_foreign_access_v1(grant_ref_t ref)
407 {
408 	return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing);
409 }
410 
411 static int gnttab_query_foreign_access_v2(grant_ref_t ref)
412 {
413 	return grstatus[ref] & (GTF_reading|GTF_writing);
414 }
415 
416 int gnttab_query_foreign_access(grant_ref_t ref)
417 {
418 	return gnttab_interface->query_foreign_access(ref);
419 }
420 EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
421 
422 static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly)
423 {
424 	u16 flags, nflags;
425 	u16 *pflags;
426 
427 	pflags = &gnttab_shared.v1[ref].flags;
428 	nflags = *pflags;
429 	do {
430 		flags = nflags;
431 		if (flags & (GTF_reading|GTF_writing))
432 			return 0;
433 	} while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags);
434 
435 	return 1;
436 }
437 
438 static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly)
439 {
440 	gnttab_shared.v2[ref].hdr.flags = 0;
441 	mb();
442 	if (grstatus[ref] & (GTF_reading|GTF_writing)) {
443 		return 0;
444 	} else {
445 		/* The read of grstatus needs to have acquire
446 		semantics.  On x86, reads already have
447 		that, and we just need to protect against
448 		compiler reorderings.  On other
449 		architectures we may need a full
450 		barrier. */
451 #ifdef CONFIG_X86
452 		barrier();
453 #else
454 		mb();
455 #endif
456 	}
457 
458 	return 1;
459 }
460 
461 static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
462 {
463 	return gnttab_interface->end_foreign_access_ref(ref, readonly);
464 }
465 
466 int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
467 {
468 	if (_gnttab_end_foreign_access_ref(ref, readonly))
469 		return 1;
470 	pr_warn("WARNING: g.e. %#x still in use!\n", ref);
471 	return 0;
472 }
473 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
474 
475 struct deferred_entry {
476 	struct list_head list;
477 	grant_ref_t ref;
478 	bool ro;
479 	uint16_t warn_delay;
480 	struct page *page;
481 };
482 static LIST_HEAD(deferred_list);
483 static void gnttab_handle_deferred(unsigned long);
484 static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred, 0, 0);
485 
486 static void gnttab_handle_deferred(unsigned long unused)
487 {
488 	unsigned int nr = 10;
489 	struct deferred_entry *first = NULL;
490 	unsigned long flags;
491 
492 	spin_lock_irqsave(&gnttab_list_lock, flags);
493 	while (nr--) {
494 		struct deferred_entry *entry
495 			= list_first_entry(&deferred_list,
496 					   struct deferred_entry, list);
497 
498 		if (entry == first)
499 			break;
500 		list_del(&entry->list);
501 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
502 		if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) {
503 			put_free_entry(entry->ref);
504 			if (entry->page) {
505 				pr_debug("freeing g.e. %#x (pfn %#lx)\n",
506 					 entry->ref, page_to_pfn(entry->page));
507 				__free_page(entry->page);
508 			} else
509 				pr_info("freeing g.e. %#x\n", entry->ref);
510 			kfree(entry);
511 			entry = NULL;
512 		} else {
513 			if (!--entry->warn_delay)
514 				pr_info("g.e. %#x still pending\n",
515 					entry->ref);
516 			if (!first)
517 				first = entry;
518 		}
519 		spin_lock_irqsave(&gnttab_list_lock, flags);
520 		if (entry)
521 			list_add_tail(&entry->list, &deferred_list);
522 		else if (list_empty(&deferred_list))
523 			break;
524 	}
525 	if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) {
526 		deferred_timer.expires = jiffies + HZ;
527 		add_timer(&deferred_timer);
528 	}
529 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
530 }
531 
532 static void gnttab_add_deferred(grant_ref_t ref, bool readonly,
533 				struct page *page)
534 {
535 	struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
536 	const char *what = KERN_WARNING "leaking";
537 
538 	if (entry) {
539 		unsigned long flags;
540 
541 		entry->ref = ref;
542 		entry->ro = readonly;
543 		entry->page = page;
544 		entry->warn_delay = 60;
545 		spin_lock_irqsave(&gnttab_list_lock, flags);
546 		list_add_tail(&entry->list, &deferred_list);
547 		if (!timer_pending(&deferred_timer)) {
548 			deferred_timer.expires = jiffies + HZ;
549 			add_timer(&deferred_timer);
550 		}
551 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
552 		what = KERN_DEBUG "deferring";
553 	}
554 	printk("%s g.e. %#x (pfn %#lx)\n",
555 	       what, ref, page ? page_to_pfn(page) : -1);
556 }
557 
558 void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
559 			       unsigned long page)
560 {
561 	if (gnttab_end_foreign_access_ref(ref, readonly)) {
562 		put_free_entry(ref);
563 		if (page != 0)
564 			free_page(page);
565 	} else
566 		gnttab_add_deferred(ref, readonly,
567 				    page ? virt_to_page(page) : NULL);
568 }
569 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
570 
571 int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
572 {
573 	int ref;
574 
575 	ref = get_free_entries(1);
576 	if (unlikely(ref < 0))
577 		return -ENOSPC;
578 	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
579 
580 	return ref;
581 }
582 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
583 
584 void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
585 				       unsigned long pfn)
586 {
587 	gnttab_interface->update_entry(ref, domid, pfn, GTF_accept_transfer);
588 }
589 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
590 
591 static unsigned long gnttab_end_foreign_transfer_ref_v1(grant_ref_t ref)
592 {
593 	unsigned long frame;
594 	u16           flags;
595 	u16          *pflags;
596 
597 	pflags = &gnttab_shared.v1[ref].flags;
598 
599 	/*
600 	 * If a transfer is not even yet started, try to reclaim the grant
601 	 * reference and return failure (== 0).
602 	 */
603 	while (!((flags = *pflags) & GTF_transfer_committed)) {
604 		if (sync_cmpxchg(pflags, flags, 0) == flags)
605 			return 0;
606 		cpu_relax();
607 	}
608 
609 	/* If a transfer is in progress then wait until it is completed. */
610 	while (!(flags & GTF_transfer_completed)) {
611 		flags = *pflags;
612 		cpu_relax();
613 	}
614 
615 	rmb();	/* Read the frame number /after/ reading completion status. */
616 	frame = gnttab_shared.v1[ref].frame;
617 	BUG_ON(frame == 0);
618 
619 	return frame;
620 }
621 
622 static unsigned long gnttab_end_foreign_transfer_ref_v2(grant_ref_t ref)
623 {
624 	unsigned long frame;
625 	u16           flags;
626 	u16          *pflags;
627 
628 	pflags = &gnttab_shared.v2[ref].hdr.flags;
629 
630 	/*
631 	 * If a transfer is not even yet started, try to reclaim the grant
632 	 * reference and return failure (== 0).
633 	 */
634 	while (!((flags = *pflags) & GTF_transfer_committed)) {
635 		if (sync_cmpxchg(pflags, flags, 0) == flags)
636 			return 0;
637 		cpu_relax();
638 	}
639 
640 	/* If a transfer is in progress then wait until it is completed. */
641 	while (!(flags & GTF_transfer_completed)) {
642 		flags = *pflags;
643 		cpu_relax();
644 	}
645 
646 	rmb();  /* Read the frame number /after/ reading completion status. */
647 	frame = gnttab_shared.v2[ref].full_page.frame;
648 	BUG_ON(frame == 0);
649 
650 	return frame;
651 }
652 
653 unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
654 {
655 	return gnttab_interface->end_foreign_transfer_ref(ref);
656 }
657 EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
658 
659 unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
660 {
661 	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
662 	put_free_entry(ref);
663 	return frame;
664 }
665 EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
666 
667 void gnttab_free_grant_reference(grant_ref_t ref)
668 {
669 	put_free_entry(ref);
670 }
671 EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
672 
673 void gnttab_free_grant_references(grant_ref_t head)
674 {
675 	grant_ref_t ref;
676 	unsigned long flags;
677 	int count = 1;
678 	if (head == GNTTAB_LIST_END)
679 		return;
680 	spin_lock_irqsave(&gnttab_list_lock, flags);
681 	ref = head;
682 	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
683 		ref = gnttab_entry(ref);
684 		count++;
685 	}
686 	gnttab_entry(ref) = gnttab_free_head;
687 	gnttab_free_head = head;
688 	gnttab_free_count += count;
689 	check_free_callbacks();
690 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
691 }
692 EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
693 
694 int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
695 {
696 	int h = get_free_entries(count);
697 
698 	if (h < 0)
699 		return -ENOSPC;
700 
701 	*head = h;
702 
703 	return 0;
704 }
705 EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
706 
707 int gnttab_empty_grant_references(const grant_ref_t *private_head)
708 {
709 	return (*private_head == GNTTAB_LIST_END);
710 }
711 EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
712 
713 int gnttab_claim_grant_reference(grant_ref_t *private_head)
714 {
715 	grant_ref_t g = *private_head;
716 	if (unlikely(g == GNTTAB_LIST_END))
717 		return -ENOSPC;
718 	*private_head = gnttab_entry(g);
719 	return g;
720 }
721 EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
722 
723 void gnttab_release_grant_reference(grant_ref_t *private_head,
724 				    grant_ref_t release)
725 {
726 	gnttab_entry(release) = *private_head;
727 	*private_head = release;
728 }
729 EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
730 
731 void gnttab_request_free_callback(struct gnttab_free_callback *callback,
732 				  void (*fn)(void *), void *arg, u16 count)
733 {
734 	unsigned long flags;
735 	spin_lock_irqsave(&gnttab_list_lock, flags);
736 	if (callback->next)
737 		goto out;
738 	callback->fn = fn;
739 	callback->arg = arg;
740 	callback->count = count;
741 	callback->next = gnttab_free_callback_list;
742 	gnttab_free_callback_list = callback;
743 	check_free_callbacks();
744 out:
745 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
746 }
747 EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
748 
749 void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
750 {
751 	struct gnttab_free_callback **pcb;
752 	unsigned long flags;
753 
754 	spin_lock_irqsave(&gnttab_list_lock, flags);
755 	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
756 		if (*pcb == callback) {
757 			*pcb = callback->next;
758 			break;
759 		}
760 	}
761 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
762 }
763 EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
764 
765 static int grow_gnttab_list(unsigned int more_frames)
766 {
767 	unsigned int new_nr_grant_frames, extra_entries, i;
768 	unsigned int nr_glist_frames, new_nr_glist_frames;
769 
770 	new_nr_grant_frames = nr_grant_frames + more_frames;
771 	extra_entries       = more_frames * GREFS_PER_GRANT_FRAME;
772 
773 	nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
774 	new_nr_glist_frames =
775 		(new_nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
776 	for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
777 		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
778 		if (!gnttab_list[i])
779 			goto grow_nomem;
780 	}
781 
782 
783 	for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
784 	     i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
785 		gnttab_entry(i) = i + 1;
786 
787 	gnttab_entry(i) = gnttab_free_head;
788 	gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
789 	gnttab_free_count += extra_entries;
790 
791 	nr_grant_frames = new_nr_grant_frames;
792 
793 	check_free_callbacks();
794 
795 	return 0;
796 
797 grow_nomem:
798 	for ( ; i >= nr_glist_frames; i--)
799 		free_page((unsigned long) gnttab_list[i]);
800 	return -ENOMEM;
801 }
802 
803 static unsigned int __max_nr_grant_frames(void)
804 {
805 	struct gnttab_query_size query;
806 	int rc;
807 
808 	query.dom = DOMID_SELF;
809 
810 	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
811 	if ((rc < 0) || (query.status != GNTST_okay))
812 		return 4; /* Legacy max supported number of frames */
813 
814 	return query.max_nr_frames;
815 }
816 
817 unsigned int gnttab_max_grant_frames(void)
818 {
819 	unsigned int xen_max = __max_nr_grant_frames();
820 
821 	if (xen_max > boot_max_nr_grant_frames)
822 		return boot_max_nr_grant_frames;
823 	return xen_max;
824 }
825 EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
826 
827 /* Handling of paged out grant targets (GNTST_eagain) */
828 #define MAX_DELAY 256
829 static inline void
830 gnttab_retry_eagain_gop(unsigned int cmd, void *gop, int16_t *status,
831 						const char *func)
832 {
833 	unsigned delay = 1;
834 
835 	do {
836 		BUG_ON(HYPERVISOR_grant_table_op(cmd, gop, 1));
837 		if (*status == GNTST_eagain)
838 			msleep(delay++);
839 	} while ((*status == GNTST_eagain) && (delay < MAX_DELAY));
840 
841 	if (delay >= MAX_DELAY) {
842 		printk(KERN_ERR "%s: %s eagain grant\n", func, current->comm);
843 		*status = GNTST_bad_page;
844 	}
845 }
846 
847 void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count)
848 {
849 	struct gnttab_map_grant_ref *op;
850 
851 	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, batch, count))
852 		BUG();
853 	for (op = batch; op < batch + count; op++)
854 		if (op->status == GNTST_eagain)
855 			gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, op,
856 						&op->status, __func__);
857 }
858 EXPORT_SYMBOL_GPL(gnttab_batch_map);
859 
860 void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count)
861 {
862 	struct gnttab_copy *op;
863 
864 	if (HYPERVISOR_grant_table_op(GNTTABOP_copy, batch, count))
865 		BUG();
866 	for (op = batch; op < batch + count; op++)
867 		if (op->status == GNTST_eagain)
868 			gnttab_retry_eagain_gop(GNTTABOP_copy, op,
869 						&op->status, __func__);
870 }
871 EXPORT_SYMBOL_GPL(gnttab_batch_copy);
872 
873 int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
874 		    struct gnttab_map_grant_ref *kmap_ops,
875 		    struct page **pages, unsigned int count)
876 {
877 	int i, ret;
878 	bool lazy = false;
879 	pte_t *pte;
880 	unsigned long mfn;
881 
882 	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count);
883 	if (ret)
884 		return ret;
885 
886 	/* Retry eagain maps */
887 	for (i = 0; i < count; i++)
888 		if (map_ops[i].status == GNTST_eagain)
889 			gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i,
890 						&map_ops[i].status, __func__);
891 
892 	if (xen_feature(XENFEAT_auto_translated_physmap))
893 		return ret;
894 
895 	if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
896 		arch_enter_lazy_mmu_mode();
897 		lazy = true;
898 	}
899 
900 	for (i = 0; i < count; i++) {
901 		/* Do not add to override if the map failed. */
902 		if (map_ops[i].status)
903 			continue;
904 
905 		if (map_ops[i].flags & GNTMAP_contains_pte) {
906 			pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
907 				(map_ops[i].host_addr & ~PAGE_MASK));
908 			mfn = pte_mfn(*pte);
909 		} else {
910 			mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
911 		}
912 		ret = m2p_add_override(mfn, pages[i], kmap_ops ?
913 				       &kmap_ops[i] : NULL);
914 		if (ret)
915 			return ret;
916 	}
917 
918 	if (lazy)
919 		arch_leave_lazy_mmu_mode();
920 
921 	return ret;
922 }
923 EXPORT_SYMBOL_GPL(gnttab_map_refs);
924 
925 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
926 		      struct gnttab_map_grant_ref *kmap_ops,
927 		      struct page **pages, unsigned int count)
928 {
929 	int i, ret;
930 	bool lazy = false;
931 
932 	ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count);
933 	if (ret)
934 		return ret;
935 
936 	if (xen_feature(XENFEAT_auto_translated_physmap))
937 		return ret;
938 
939 	if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
940 		arch_enter_lazy_mmu_mode();
941 		lazy = true;
942 	}
943 
944 	for (i = 0; i < count; i++) {
945 		ret = m2p_remove_override(pages[i], kmap_ops ?
946 				       &kmap_ops[i] : NULL);
947 		if (ret)
948 			return ret;
949 	}
950 
951 	if (lazy)
952 		arch_leave_lazy_mmu_mode();
953 
954 	return ret;
955 }
956 EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
957 
958 static unsigned nr_status_frames(unsigned nr_grant_frames)
959 {
960 	return (nr_grant_frames * GREFS_PER_GRANT_FRAME + SPP - 1) / SPP;
961 }
962 
963 static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes)
964 {
965 	int rc;
966 
967 	rc = arch_gnttab_map_shared(frames, nr_gframes,
968 				    gnttab_max_grant_frames(),
969 				    &gnttab_shared.addr);
970 	BUG_ON(rc);
971 
972 	return 0;
973 }
974 
975 static void gnttab_unmap_frames_v1(void)
976 {
977 	arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
978 }
979 
980 static int gnttab_map_frames_v2(xen_pfn_t *frames, unsigned int nr_gframes)
981 {
982 	uint64_t *sframes;
983 	unsigned int nr_sframes;
984 	struct gnttab_get_status_frames getframes;
985 	int rc;
986 
987 	nr_sframes = nr_status_frames(nr_gframes);
988 
989 	/* No need for kzalloc as it is initialized in following hypercall
990 	 * GNTTABOP_get_status_frames.
991 	 */
992 	sframes = kmalloc(nr_sframes  * sizeof(uint64_t), GFP_ATOMIC);
993 	if (!sframes)
994 		return -ENOMEM;
995 
996 	getframes.dom        = DOMID_SELF;
997 	getframes.nr_frames  = nr_sframes;
998 	set_xen_guest_handle(getframes.frame_list, sframes);
999 
1000 	rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
1001 				       &getframes, 1);
1002 	if (rc == -ENOSYS) {
1003 		kfree(sframes);
1004 		return -ENOSYS;
1005 	}
1006 
1007 	BUG_ON(rc || getframes.status);
1008 
1009 	rc = arch_gnttab_map_status(sframes, nr_sframes,
1010 				    nr_status_frames(gnttab_max_grant_frames()),
1011 				    &grstatus);
1012 	BUG_ON(rc);
1013 	kfree(sframes);
1014 
1015 	rc = arch_gnttab_map_shared(frames, nr_gframes,
1016 				    gnttab_max_grant_frames(),
1017 				    &gnttab_shared.addr);
1018 	BUG_ON(rc);
1019 
1020 	return 0;
1021 }
1022 
1023 static void gnttab_unmap_frames_v2(void)
1024 {
1025 	arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
1026 	arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames));
1027 }
1028 
1029 static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
1030 {
1031 	struct gnttab_setup_table setup;
1032 	xen_pfn_t *frames;
1033 	unsigned int nr_gframes = end_idx + 1;
1034 	int rc;
1035 
1036 	if (xen_hvm_domain()) {
1037 		struct xen_add_to_physmap xatp;
1038 		unsigned int i = end_idx;
1039 		rc = 0;
1040 		/*
1041 		 * Loop backwards, so that the first hypercall has the largest
1042 		 * index, ensuring that the table will grow only once.
1043 		 */
1044 		do {
1045 			xatp.domid = DOMID_SELF;
1046 			xatp.idx = i;
1047 			xatp.space = XENMAPSPACE_grant_table;
1048 			xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
1049 			rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
1050 			if (rc != 0) {
1051 				printk(KERN_WARNING
1052 						"grant table add_to_physmap failed, err=%d\n", rc);
1053 				break;
1054 			}
1055 		} while (i-- > start_idx);
1056 
1057 		return rc;
1058 	}
1059 
1060 	/* No need for kzalloc as it is initialized in following hypercall
1061 	 * GNTTABOP_setup_table.
1062 	 */
1063 	frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
1064 	if (!frames)
1065 		return -ENOMEM;
1066 
1067 	setup.dom        = DOMID_SELF;
1068 	setup.nr_frames  = nr_gframes;
1069 	set_xen_guest_handle(setup.frame_list, frames);
1070 
1071 	rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
1072 	if (rc == -ENOSYS) {
1073 		kfree(frames);
1074 		return -ENOSYS;
1075 	}
1076 
1077 	BUG_ON(rc || setup.status);
1078 
1079 	rc = gnttab_interface->map_frames(frames, nr_gframes);
1080 
1081 	kfree(frames);
1082 
1083 	return rc;
1084 }
1085 
1086 static struct gnttab_ops gnttab_v1_ops = {
1087 	.map_frames			= gnttab_map_frames_v1,
1088 	.unmap_frames			= gnttab_unmap_frames_v1,
1089 	.update_entry			= gnttab_update_entry_v1,
1090 	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v1,
1091 	.end_foreign_transfer_ref	= gnttab_end_foreign_transfer_ref_v1,
1092 	.query_foreign_access		= gnttab_query_foreign_access_v1,
1093 };
1094 
1095 static struct gnttab_ops gnttab_v2_ops = {
1096 	.map_frames			= gnttab_map_frames_v2,
1097 	.unmap_frames			= gnttab_unmap_frames_v2,
1098 	.update_entry			= gnttab_update_entry_v2,
1099 	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v2,
1100 	.end_foreign_transfer_ref	= gnttab_end_foreign_transfer_ref_v2,
1101 	.query_foreign_access		= gnttab_query_foreign_access_v2,
1102 	.update_subpage_entry		= gnttab_update_subpage_entry_v2,
1103 	.update_trans_entry		= gnttab_update_trans_entry_v2,
1104 };
1105 
1106 static void gnttab_request_version(void)
1107 {
1108 	int rc;
1109 	struct gnttab_set_version gsv;
1110 
1111 	if (xen_hvm_domain())
1112 		gsv.version = 1;
1113 	else
1114 		gsv.version = 2;
1115 	rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
1116 	if (rc == 0 && gsv.version == 2) {
1117 		grant_table_version = 2;
1118 		gnttab_interface = &gnttab_v2_ops;
1119 	} else if (grant_table_version == 2) {
1120 		/*
1121 		 * If we've already used version 2 features,
1122 		 * but then suddenly discover that they're not
1123 		 * available (e.g. migrating to an older
1124 		 * version of Xen), almost unbounded badness
1125 		 * can happen.
1126 		 */
1127 		panic("we need grant tables version 2, but only version 1 is available");
1128 	} else {
1129 		grant_table_version = 1;
1130 		gnttab_interface = &gnttab_v1_ops;
1131 	}
1132 	printk(KERN_INFO "Grant tables using version %d layout.\n",
1133 		grant_table_version);
1134 }
1135 
1136 int gnttab_resume(void)
1137 {
1138 	unsigned int max_nr_gframes;
1139 
1140 	gnttab_request_version();
1141 	max_nr_gframes = gnttab_max_grant_frames();
1142 	if (max_nr_gframes < nr_grant_frames)
1143 		return -ENOSYS;
1144 
1145 	if (xen_pv_domain())
1146 		return gnttab_map(0, nr_grant_frames - 1);
1147 
1148 	if (gnttab_shared.addr == NULL) {
1149 		gnttab_shared.addr = ioremap(xen_hvm_resume_frames,
1150 						PAGE_SIZE * max_nr_gframes);
1151 		if (gnttab_shared.addr == NULL) {
1152 			printk(KERN_WARNING
1153 					"Failed to ioremap gnttab share frames!");
1154 			return -ENOMEM;
1155 		}
1156 	}
1157 
1158 	gnttab_map(0, nr_grant_frames - 1);
1159 
1160 	return 0;
1161 }
1162 
1163 int gnttab_suspend(void)
1164 {
1165 	gnttab_interface->unmap_frames();
1166 	return 0;
1167 }
1168 
1169 static int gnttab_expand(unsigned int req_entries)
1170 {
1171 	int rc;
1172 	unsigned int cur, extra;
1173 
1174 	cur = nr_grant_frames;
1175 	extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
1176 		 GREFS_PER_GRANT_FRAME);
1177 	if (cur + extra > gnttab_max_grant_frames())
1178 		return -ENOSPC;
1179 
1180 	rc = gnttab_map(cur, cur + extra - 1);
1181 	if (rc == 0)
1182 		rc = grow_gnttab_list(extra);
1183 
1184 	return rc;
1185 }
1186 
1187 int gnttab_init(void)
1188 {
1189 	int i;
1190 	unsigned int max_nr_glist_frames, nr_glist_frames;
1191 	unsigned int nr_init_grefs;
1192 	int ret;
1193 
1194 	nr_grant_frames = 1;
1195 	boot_max_nr_grant_frames = __max_nr_grant_frames();
1196 
1197 	/* Determine the maximum number of frames required for the
1198 	 * grant reference free list on the current hypervisor.
1199 	 */
1200 	max_nr_glist_frames = (boot_max_nr_grant_frames *
1201 			       GREFS_PER_GRANT_FRAME / RPP);
1202 
1203 	gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
1204 			      GFP_KERNEL);
1205 	if (gnttab_list == NULL)
1206 		return -ENOMEM;
1207 
1208 	nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
1209 	for (i = 0; i < nr_glist_frames; i++) {
1210 		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
1211 		if (gnttab_list[i] == NULL) {
1212 			ret = -ENOMEM;
1213 			goto ini_nomem;
1214 		}
1215 	}
1216 
1217 	if (gnttab_resume() < 0) {
1218 		ret = -ENODEV;
1219 		goto ini_nomem;
1220 	}
1221 
1222 	nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
1223 
1224 	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
1225 		gnttab_entry(i) = i + 1;
1226 
1227 	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
1228 	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
1229 	gnttab_free_head  = NR_RESERVED_ENTRIES;
1230 
1231 	printk("Grant table initialized\n");
1232 	return 0;
1233 
1234  ini_nomem:
1235 	for (i--; i >= 0; i--)
1236 		free_page((unsigned long)gnttab_list[i]);
1237 	kfree(gnttab_list);
1238 	return ret;
1239 }
1240 EXPORT_SYMBOL_GPL(gnttab_init);
1241 
1242 static int __devinit __gnttab_init(void)
1243 {
1244 	/* Delay grant-table initialization in the PV on HVM case */
1245 	if (xen_hvm_domain())
1246 		return 0;
1247 
1248 	if (!xen_pv_domain())
1249 		return -ENODEV;
1250 
1251 	return gnttab_init();
1252 }
1253 
1254 core_initcall(__gnttab_init);
1255