xref: /linux/drivers/xen/grant-table.c (revision 9429ec96c2718c0d1e3317cf60a87a0405223814)
1 /******************************************************************************
2  * grant_table.c
3  *
4  * Granting foreign access to our memory reservation.
5  *
6  * Copyright (c) 2005-2006, Christopher Clark
7  * Copyright (c) 2004-2005, K A Fraser
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License version 2
11  * as published by the Free Software Foundation; or, when distributed
12  * separately from the Linux kernel or incorporated into other
13  * software packages, subject to the following license:
14  *
15  * Permission is hereby granted, free of charge, to any person obtaining a copy
16  * of this source file (the "Software"), to deal in the Software without
17  * restriction, including without limitation the rights to use, copy, modify,
18  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19  * and to permit persons to whom the Software is furnished to do so, subject to
20  * the following conditions:
21  *
22  * The above copyright notice and this permission notice shall be included in
23  * all copies or substantial portions of the Software.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31  * IN THE SOFTWARE.
32  */
33 
34 #include <linux/module.h>
35 #include <linux/sched.h>
36 #include <linux/mm.h>
37 #include <linux/slab.h>
38 #include <linux/vmalloc.h>
39 #include <linux/uaccess.h>
40 #include <linux/io.h>
41 #include <linux/hardirq.h>
42 
43 #include <xen/xen.h>
44 #include <xen/interface/xen.h>
45 #include <xen/page.h>
46 #include <xen/grant_table.h>
47 #include <xen/interface/memory.h>
48 #include <xen/hvc-console.h>
49 #include <asm/xen/hypercall.h>
50 
51 #include <asm/pgtable.h>
52 #include <asm/sync_bitops.h>
53 
54 /* External tools reserve first few grant table entries. */
55 #define NR_RESERVED_ENTRIES 8
56 #define GNTTAB_LIST_END 0xffffffff
57 #define GREFS_PER_GRANT_FRAME \
58 (grant_table_version == 1 ?                      \
59 (PAGE_SIZE / sizeof(struct grant_entry_v1)) :   \
60 (PAGE_SIZE / sizeof(union grant_entry_v2)))
61 
62 static grant_ref_t **gnttab_list;
63 static unsigned int nr_grant_frames;
64 static unsigned int boot_max_nr_grant_frames;
65 static int gnttab_free_count;
66 static grant_ref_t gnttab_free_head;
67 static DEFINE_SPINLOCK(gnttab_list_lock);
68 unsigned long xen_hvm_resume_frames;
69 EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
70 
71 static union {
72 	struct grant_entry_v1 *v1;
73 	union grant_entry_v2 *v2;
74 	void *addr;
75 } gnttab_shared;
76 
77 /*This is a structure of function pointers for grant table*/
78 struct gnttab_ops {
79 	/*
80 	 * Mapping a list of frames for storing grant entries. Frames parameter
81 	 * is used to store grant table address when grant table being setup,
82 	 * nr_gframes is the number of frames to map grant table. Returning
83 	 * GNTST_okay means success and negative value means failure.
84 	 */
85 	int (*map_frames)(unsigned long *frames, unsigned int nr_gframes);
86 	/*
87 	 * Release a list of frames which are mapped in map_frames for grant
88 	 * entry status.
89 	 */
90 	void (*unmap_frames)(void);
91 	/*
92 	 * Introducing a valid entry into the grant table, granting the frame of
93 	 * this grant entry to domain for accessing or transfering. Ref
94 	 * parameter is reference of this introduced grant entry, domid is id of
95 	 * granted domain, frame is the page frame to be granted, and flags is
96 	 * status of the grant entry to be updated.
97 	 */
98 	void (*update_entry)(grant_ref_t ref, domid_t domid,
99 			     unsigned long frame, unsigned flags);
100 	/*
101 	 * Stop granting a grant entry to domain for accessing. Ref parameter is
102 	 * reference of a grant entry whose grant access will be stopped,
103 	 * readonly is not in use in this function. If the grant entry is
104 	 * currently mapped for reading or writing, just return failure(==0)
105 	 * directly and don't tear down the grant access. Otherwise, stop grant
106 	 * access for this entry and return success(==1).
107 	 */
108 	int (*end_foreign_access_ref)(grant_ref_t ref, int readonly);
109 	/*
110 	 * Stop granting a grant entry to domain for transfer. Ref parameter is
111 	 * reference of a grant entry whose grant transfer will be stopped. If
112 	 * tranfer has not started, just reclaim the grant entry and return
113 	 * failure(==0). Otherwise, wait for the transfer to complete and then
114 	 * return the frame.
115 	 */
116 	unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref);
117 	/*
118 	 * Query the status of a grant entry. Ref parameter is reference of
119 	 * queried grant entry, return value is the status of queried entry.
120 	 * Detailed status(writing/reading) can be gotten from the return value
121 	 * by bit operations.
122 	 */
123 	int (*query_foreign_access)(grant_ref_t ref);
124 	/*
125 	 * Grant a domain to access a range of bytes within the page referred by
126 	 * an available grant entry. Ref parameter is reference of a grant entry
127 	 * which will be sub-page accessed, domid is id of grantee domain, frame
128 	 * is frame address of subpage grant, flags is grant type and flag
129 	 * information, page_off is offset of the range of bytes, and length is
130 	 * length of bytes to be accessed.
131 	 */
132 	void (*update_subpage_entry)(grant_ref_t ref, domid_t domid,
133 				     unsigned long frame, int flags,
134 				     unsigned page_off, unsigned length);
135 	/*
136 	 * Redirect an available grant entry on domain A to another grant
137 	 * reference of domain B, then allow domain C to use grant reference
138 	 * of domain B transitively. Ref parameter is an available grant entry
139 	 * reference on domain A, domid is id of domain C which accesses grant
140 	 * entry transitively, flags is grant type and flag information,
141 	 * trans_domid is id of domain B whose grant entry is finally accessed
142 	 * transitively, trans_gref is grant entry transitive reference of
143 	 * domain B.
144 	 */
145 	void (*update_trans_entry)(grant_ref_t ref, domid_t domid, int flags,
146 				   domid_t trans_domid, grant_ref_t trans_gref);
147 };
148 
149 static struct gnttab_ops *gnttab_interface;
150 
151 /*This reflects status of grant entries, so act as a global value*/
152 static grant_status_t *grstatus;
153 
154 static int grant_table_version;
155 
156 static struct gnttab_free_callback *gnttab_free_callback_list;
157 
158 static int gnttab_expand(unsigned int req_entries);
159 
160 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
161 #define SPP (PAGE_SIZE / sizeof(grant_status_t))
162 
163 static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
164 {
165 	return &gnttab_list[(entry) / RPP][(entry) % RPP];
166 }
167 /* This can be used as an l-value */
168 #define gnttab_entry(entry) (*__gnttab_entry(entry))
169 
170 static int get_free_entries(unsigned count)
171 {
172 	unsigned long flags;
173 	int ref, rc = 0;
174 	grant_ref_t head;
175 
176 	spin_lock_irqsave(&gnttab_list_lock, flags);
177 
178 	if ((gnttab_free_count < count) &&
179 	    ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
180 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
181 		return rc;
182 	}
183 
184 	ref = head = gnttab_free_head;
185 	gnttab_free_count -= count;
186 	while (count-- > 1)
187 		head = gnttab_entry(head);
188 	gnttab_free_head = gnttab_entry(head);
189 	gnttab_entry(head) = GNTTAB_LIST_END;
190 
191 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
192 
193 	return ref;
194 }
195 
196 static void do_free_callbacks(void)
197 {
198 	struct gnttab_free_callback *callback, *next;
199 
200 	callback = gnttab_free_callback_list;
201 	gnttab_free_callback_list = NULL;
202 
203 	while (callback != NULL) {
204 		next = callback->next;
205 		if (gnttab_free_count >= callback->count) {
206 			callback->next = NULL;
207 			callback->fn(callback->arg);
208 		} else {
209 			callback->next = gnttab_free_callback_list;
210 			gnttab_free_callback_list = callback;
211 		}
212 		callback = next;
213 	}
214 }
215 
216 static inline void check_free_callbacks(void)
217 {
218 	if (unlikely(gnttab_free_callback_list))
219 		do_free_callbacks();
220 }
221 
222 static void put_free_entry(grant_ref_t ref)
223 {
224 	unsigned long flags;
225 	spin_lock_irqsave(&gnttab_list_lock, flags);
226 	gnttab_entry(ref) = gnttab_free_head;
227 	gnttab_free_head = ref;
228 	gnttab_free_count++;
229 	check_free_callbacks();
230 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
231 }
232 
233 /*
234  * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
235  * Introducing a valid entry into the grant table:
236  *  1. Write ent->domid.
237  *  2. Write ent->frame:
238  *      GTF_permit_access:   Frame to which access is permitted.
239  *      GTF_accept_transfer: Pseudo-phys frame slot being filled by new
240  *                           frame, or zero if none.
241  *  3. Write memory barrier (WMB).
242  *  4. Write ent->flags, inc. valid type.
243  */
244 static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid,
245 				   unsigned long frame, unsigned flags)
246 {
247 	gnttab_shared.v1[ref].domid = domid;
248 	gnttab_shared.v1[ref].frame = frame;
249 	wmb();
250 	gnttab_shared.v1[ref].flags = flags;
251 }
252 
253 static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid,
254 				   unsigned long frame, unsigned flags)
255 {
256 	gnttab_shared.v2[ref].hdr.domid = domid;
257 	gnttab_shared.v2[ref].full_page.frame = frame;
258 	wmb();
259 	gnttab_shared.v2[ref].hdr.flags = GTF_permit_access | flags;
260 }
261 
262 /*
263  * Public grant-issuing interface functions
264  */
265 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
266 				     unsigned long frame, int readonly)
267 {
268 	gnttab_interface->update_entry(ref, domid, frame,
269 			   GTF_permit_access | (readonly ? GTF_readonly : 0));
270 }
271 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
272 
273 int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
274 				int readonly)
275 {
276 	int ref;
277 
278 	ref = get_free_entries(1);
279 	if (unlikely(ref < 0))
280 		return -ENOSPC;
281 
282 	gnttab_grant_foreign_access_ref(ref, domid, frame, readonly);
283 
284 	return ref;
285 }
286 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
287 
288 void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid,
289 				    unsigned long frame, int flags,
290 				    unsigned page_off,
291 				    unsigned length)
292 {
293 	gnttab_shared.v2[ref].sub_page.frame = frame;
294 	gnttab_shared.v2[ref].sub_page.page_off = page_off;
295 	gnttab_shared.v2[ref].sub_page.length = length;
296 	gnttab_shared.v2[ref].hdr.domid = domid;
297 	wmb();
298 	gnttab_shared.v2[ref].hdr.flags =
299 				GTF_permit_access | GTF_sub_page | flags;
300 }
301 
302 int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid,
303 					    unsigned long frame, int flags,
304 					    unsigned page_off,
305 					    unsigned length)
306 {
307 	if (flags & (GTF_accept_transfer | GTF_reading |
308 		     GTF_writing | GTF_transitive))
309 		return -EPERM;
310 
311 	if (gnttab_interface->update_subpage_entry == NULL)
312 		return -ENOSYS;
313 
314 	gnttab_interface->update_subpage_entry(ref, domid, frame, flags,
315 					       page_off, length);
316 
317 	return 0;
318 }
319 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage_ref);
320 
321 int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame,
322 					int flags, unsigned page_off,
323 					unsigned length)
324 {
325 	int ref, rc;
326 
327 	ref = get_free_entries(1);
328 	if (unlikely(ref < 0))
329 		return -ENOSPC;
330 
331 	rc = gnttab_grant_foreign_access_subpage_ref(ref, domid, frame, flags,
332 						     page_off, length);
333 	if (rc < 0) {
334 		put_free_entry(ref);
335 		return rc;
336 	}
337 
338 	return ref;
339 }
340 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage);
341 
342 bool gnttab_subpage_grants_available(void)
343 {
344 	return gnttab_interface->update_subpage_entry != NULL;
345 }
346 EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available);
347 
348 void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid,
349 				  int flags, domid_t trans_domid,
350 				  grant_ref_t trans_gref)
351 {
352 	gnttab_shared.v2[ref].transitive.trans_domid = trans_domid;
353 	gnttab_shared.v2[ref].transitive.gref = trans_gref;
354 	gnttab_shared.v2[ref].hdr.domid = domid;
355 	wmb();
356 	gnttab_shared.v2[ref].hdr.flags =
357 				GTF_permit_access | GTF_transitive | flags;
358 }
359 
360 int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid,
361 					  int flags, domid_t trans_domid,
362 					  grant_ref_t trans_gref)
363 {
364 	if (flags & (GTF_accept_transfer | GTF_reading |
365 		     GTF_writing | GTF_sub_page))
366 		return -EPERM;
367 
368 	if (gnttab_interface->update_trans_entry == NULL)
369 		return -ENOSYS;
370 
371 	gnttab_interface->update_trans_entry(ref, domid, flags, trans_domid,
372 					     trans_gref);
373 
374 	return 0;
375 }
376 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans_ref);
377 
378 int gnttab_grant_foreign_access_trans(domid_t domid, int flags,
379 				      domid_t trans_domid,
380 				      grant_ref_t trans_gref)
381 {
382 	int ref, rc;
383 
384 	ref = get_free_entries(1);
385 	if (unlikely(ref < 0))
386 		return -ENOSPC;
387 
388 	rc = gnttab_grant_foreign_access_trans_ref(ref, domid, flags,
389 						   trans_domid, trans_gref);
390 	if (rc < 0) {
391 		put_free_entry(ref);
392 		return rc;
393 	}
394 
395 	return ref;
396 }
397 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans);
398 
399 bool gnttab_trans_grants_available(void)
400 {
401 	return gnttab_interface->update_trans_entry != NULL;
402 }
403 EXPORT_SYMBOL_GPL(gnttab_trans_grants_available);
404 
405 static int gnttab_query_foreign_access_v1(grant_ref_t ref)
406 {
407 	return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing);
408 }
409 
410 static int gnttab_query_foreign_access_v2(grant_ref_t ref)
411 {
412 	return grstatus[ref] & (GTF_reading|GTF_writing);
413 }
414 
415 int gnttab_query_foreign_access(grant_ref_t ref)
416 {
417 	return gnttab_interface->query_foreign_access(ref);
418 }
419 EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
420 
421 static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly)
422 {
423 	u16 flags, nflags;
424 	u16 *pflags;
425 
426 	pflags = &gnttab_shared.v1[ref].flags;
427 	nflags = *pflags;
428 	do {
429 		flags = nflags;
430 		if (flags & (GTF_reading|GTF_writing))
431 			return 0;
432 	} while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags);
433 
434 	return 1;
435 }
436 
437 static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly)
438 {
439 	gnttab_shared.v2[ref].hdr.flags = 0;
440 	mb();
441 	if (grstatus[ref] & (GTF_reading|GTF_writing)) {
442 		return 0;
443 	} else {
444 		/* The read of grstatus needs to have acquire
445 		semantics.  On x86, reads already have
446 		that, and we just need to protect against
447 		compiler reorderings.  On other
448 		architectures we may need a full
449 		barrier. */
450 #ifdef CONFIG_X86
451 		barrier();
452 #else
453 		mb();
454 #endif
455 	}
456 
457 	return 1;
458 }
459 
460 static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
461 {
462 	return gnttab_interface->end_foreign_access_ref(ref, readonly);
463 }
464 
465 int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
466 {
467 	if (_gnttab_end_foreign_access_ref(ref, readonly))
468 		return 1;
469 	pr_warn("WARNING: g.e. %#x still in use!\n", ref);
470 	return 0;
471 }
472 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
473 
474 struct deferred_entry {
475 	struct list_head list;
476 	grant_ref_t ref;
477 	bool ro;
478 	uint16_t warn_delay;
479 	struct page *page;
480 };
481 static LIST_HEAD(deferred_list);
482 static void gnttab_handle_deferred(unsigned long);
483 static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred, 0, 0);
484 
485 static void gnttab_handle_deferred(unsigned long unused)
486 {
487 	unsigned int nr = 10;
488 	struct deferred_entry *first = NULL;
489 	unsigned long flags;
490 
491 	spin_lock_irqsave(&gnttab_list_lock, flags);
492 	while (nr--) {
493 		struct deferred_entry *entry
494 			= list_first_entry(&deferred_list,
495 					   struct deferred_entry, list);
496 
497 		if (entry == first)
498 			break;
499 		list_del(&entry->list);
500 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
501 		if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) {
502 			put_free_entry(entry->ref);
503 			if (entry->page) {
504 				pr_debug("freeing g.e. %#x (pfn %#lx)\n",
505 					 entry->ref, page_to_pfn(entry->page));
506 				__free_page(entry->page);
507 			} else
508 				pr_info("freeing g.e. %#x\n", entry->ref);
509 			kfree(entry);
510 			entry = NULL;
511 		} else {
512 			if (!--entry->warn_delay)
513 				pr_info("g.e. %#x still pending\n",
514 					entry->ref);
515 			if (!first)
516 				first = entry;
517 		}
518 		spin_lock_irqsave(&gnttab_list_lock, flags);
519 		if (entry)
520 			list_add_tail(&entry->list, &deferred_list);
521 		else if (list_empty(&deferred_list))
522 			break;
523 	}
524 	if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) {
525 		deferred_timer.expires = jiffies + HZ;
526 		add_timer(&deferred_timer);
527 	}
528 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
529 }
530 
531 static void gnttab_add_deferred(grant_ref_t ref, bool readonly,
532 				struct page *page)
533 {
534 	struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
535 	const char *what = KERN_WARNING "leaking";
536 
537 	if (entry) {
538 		unsigned long flags;
539 
540 		entry->ref = ref;
541 		entry->ro = readonly;
542 		entry->page = page;
543 		entry->warn_delay = 60;
544 		spin_lock_irqsave(&gnttab_list_lock, flags);
545 		list_add_tail(&entry->list, &deferred_list);
546 		if (!timer_pending(&deferred_timer)) {
547 			deferred_timer.expires = jiffies + HZ;
548 			add_timer(&deferred_timer);
549 		}
550 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
551 		what = KERN_DEBUG "deferring";
552 	}
553 	printk("%s g.e. %#x (pfn %#lx)\n",
554 	       what, ref, page ? page_to_pfn(page) : -1);
555 }
556 
557 void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
558 			       unsigned long page)
559 {
560 	if (gnttab_end_foreign_access_ref(ref, readonly)) {
561 		put_free_entry(ref);
562 		if (page != 0)
563 			free_page(page);
564 	} else
565 		gnttab_add_deferred(ref, readonly,
566 				    page ? virt_to_page(page) : NULL);
567 }
568 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
569 
570 int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
571 {
572 	int ref;
573 
574 	ref = get_free_entries(1);
575 	if (unlikely(ref < 0))
576 		return -ENOSPC;
577 	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
578 
579 	return ref;
580 }
581 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
582 
583 void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
584 				       unsigned long pfn)
585 {
586 	gnttab_interface->update_entry(ref, domid, pfn, GTF_accept_transfer);
587 }
588 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
589 
590 static unsigned long gnttab_end_foreign_transfer_ref_v1(grant_ref_t ref)
591 {
592 	unsigned long frame;
593 	u16           flags;
594 	u16          *pflags;
595 
596 	pflags = &gnttab_shared.v1[ref].flags;
597 
598 	/*
599 	 * If a transfer is not even yet started, try to reclaim the grant
600 	 * reference and return failure (== 0).
601 	 */
602 	while (!((flags = *pflags) & GTF_transfer_committed)) {
603 		if (sync_cmpxchg(pflags, flags, 0) == flags)
604 			return 0;
605 		cpu_relax();
606 	}
607 
608 	/* If a transfer is in progress then wait until it is completed. */
609 	while (!(flags & GTF_transfer_completed)) {
610 		flags = *pflags;
611 		cpu_relax();
612 	}
613 
614 	rmb();	/* Read the frame number /after/ reading completion status. */
615 	frame = gnttab_shared.v1[ref].frame;
616 	BUG_ON(frame == 0);
617 
618 	return frame;
619 }
620 
621 static unsigned long gnttab_end_foreign_transfer_ref_v2(grant_ref_t ref)
622 {
623 	unsigned long frame;
624 	u16           flags;
625 	u16          *pflags;
626 
627 	pflags = &gnttab_shared.v2[ref].hdr.flags;
628 
629 	/*
630 	 * If a transfer is not even yet started, try to reclaim the grant
631 	 * reference and return failure (== 0).
632 	 */
633 	while (!((flags = *pflags) & GTF_transfer_committed)) {
634 		if (sync_cmpxchg(pflags, flags, 0) == flags)
635 			return 0;
636 		cpu_relax();
637 	}
638 
639 	/* If a transfer is in progress then wait until it is completed. */
640 	while (!(flags & GTF_transfer_completed)) {
641 		flags = *pflags;
642 		cpu_relax();
643 	}
644 
645 	rmb();  /* Read the frame number /after/ reading completion status. */
646 	frame = gnttab_shared.v2[ref].full_page.frame;
647 	BUG_ON(frame == 0);
648 
649 	return frame;
650 }
651 
652 unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
653 {
654 	return gnttab_interface->end_foreign_transfer_ref(ref);
655 }
656 EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
657 
658 unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
659 {
660 	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
661 	put_free_entry(ref);
662 	return frame;
663 }
664 EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
665 
666 void gnttab_free_grant_reference(grant_ref_t ref)
667 {
668 	put_free_entry(ref);
669 }
670 EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
671 
672 void gnttab_free_grant_references(grant_ref_t head)
673 {
674 	grant_ref_t ref;
675 	unsigned long flags;
676 	int count = 1;
677 	if (head == GNTTAB_LIST_END)
678 		return;
679 	spin_lock_irqsave(&gnttab_list_lock, flags);
680 	ref = head;
681 	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
682 		ref = gnttab_entry(ref);
683 		count++;
684 	}
685 	gnttab_entry(ref) = gnttab_free_head;
686 	gnttab_free_head = head;
687 	gnttab_free_count += count;
688 	check_free_callbacks();
689 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
690 }
691 EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
692 
693 int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
694 {
695 	int h = get_free_entries(count);
696 
697 	if (h < 0)
698 		return -ENOSPC;
699 
700 	*head = h;
701 
702 	return 0;
703 }
704 EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
705 
706 int gnttab_empty_grant_references(const grant_ref_t *private_head)
707 {
708 	return (*private_head == GNTTAB_LIST_END);
709 }
710 EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
711 
712 int gnttab_claim_grant_reference(grant_ref_t *private_head)
713 {
714 	grant_ref_t g = *private_head;
715 	if (unlikely(g == GNTTAB_LIST_END))
716 		return -ENOSPC;
717 	*private_head = gnttab_entry(g);
718 	return g;
719 }
720 EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
721 
722 void gnttab_release_grant_reference(grant_ref_t *private_head,
723 				    grant_ref_t release)
724 {
725 	gnttab_entry(release) = *private_head;
726 	*private_head = release;
727 }
728 EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
729 
730 void gnttab_request_free_callback(struct gnttab_free_callback *callback,
731 				  void (*fn)(void *), void *arg, u16 count)
732 {
733 	unsigned long flags;
734 	spin_lock_irqsave(&gnttab_list_lock, flags);
735 	if (callback->next)
736 		goto out;
737 	callback->fn = fn;
738 	callback->arg = arg;
739 	callback->count = count;
740 	callback->next = gnttab_free_callback_list;
741 	gnttab_free_callback_list = callback;
742 	check_free_callbacks();
743 out:
744 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
745 }
746 EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
747 
748 void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
749 {
750 	struct gnttab_free_callback **pcb;
751 	unsigned long flags;
752 
753 	spin_lock_irqsave(&gnttab_list_lock, flags);
754 	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
755 		if (*pcb == callback) {
756 			*pcb = callback->next;
757 			break;
758 		}
759 	}
760 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
761 }
762 EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
763 
764 static int grow_gnttab_list(unsigned int more_frames)
765 {
766 	unsigned int new_nr_grant_frames, extra_entries, i;
767 	unsigned int nr_glist_frames, new_nr_glist_frames;
768 
769 	new_nr_grant_frames = nr_grant_frames + more_frames;
770 	extra_entries       = more_frames * GREFS_PER_GRANT_FRAME;
771 
772 	nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
773 	new_nr_glist_frames =
774 		(new_nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
775 	for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
776 		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
777 		if (!gnttab_list[i])
778 			goto grow_nomem;
779 	}
780 
781 
782 	for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
783 	     i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
784 		gnttab_entry(i) = i + 1;
785 
786 	gnttab_entry(i) = gnttab_free_head;
787 	gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
788 	gnttab_free_count += extra_entries;
789 
790 	nr_grant_frames = new_nr_grant_frames;
791 
792 	check_free_callbacks();
793 
794 	return 0;
795 
796 grow_nomem:
797 	for ( ; i >= nr_glist_frames; i--)
798 		free_page((unsigned long) gnttab_list[i]);
799 	return -ENOMEM;
800 }
801 
802 static unsigned int __max_nr_grant_frames(void)
803 {
804 	struct gnttab_query_size query;
805 	int rc;
806 
807 	query.dom = DOMID_SELF;
808 
809 	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
810 	if ((rc < 0) || (query.status != GNTST_okay))
811 		return 4; /* Legacy max supported number of frames */
812 
813 	return query.max_nr_frames;
814 }
815 
816 unsigned int gnttab_max_grant_frames(void)
817 {
818 	unsigned int xen_max = __max_nr_grant_frames();
819 
820 	if (xen_max > boot_max_nr_grant_frames)
821 		return boot_max_nr_grant_frames;
822 	return xen_max;
823 }
824 EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
825 
826 int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
827 		    struct gnttab_map_grant_ref *kmap_ops,
828 		    struct page **pages, unsigned int count)
829 {
830 	int i, ret;
831 	bool lazy = false;
832 	pte_t *pte;
833 	unsigned long mfn;
834 
835 	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count);
836 	if (ret)
837 		return ret;
838 
839 	if (xen_feature(XENFEAT_auto_translated_physmap))
840 		return ret;
841 
842 	if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
843 		arch_enter_lazy_mmu_mode();
844 		lazy = true;
845 	}
846 
847 	for (i = 0; i < count; i++) {
848 		/* Do not add to override if the map failed. */
849 		if (map_ops[i].status)
850 			continue;
851 
852 		if (map_ops[i].flags & GNTMAP_contains_pte) {
853 			pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
854 				(map_ops[i].host_addr & ~PAGE_MASK));
855 			mfn = pte_mfn(*pte);
856 		} else {
857 			mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
858 		}
859 		ret = m2p_add_override(mfn, pages[i], kmap_ops ?
860 				       &kmap_ops[i] : NULL);
861 		if (ret)
862 			return ret;
863 	}
864 
865 	if (lazy)
866 		arch_leave_lazy_mmu_mode();
867 
868 	return ret;
869 }
870 EXPORT_SYMBOL_GPL(gnttab_map_refs);
871 
872 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
873 		      struct gnttab_map_grant_ref *kmap_ops,
874 		      struct page **pages, unsigned int count)
875 {
876 	int i, ret;
877 	bool lazy = false;
878 
879 	ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count);
880 	if (ret)
881 		return ret;
882 
883 	if (xen_feature(XENFEAT_auto_translated_physmap))
884 		return ret;
885 
886 	if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
887 		arch_enter_lazy_mmu_mode();
888 		lazy = true;
889 	}
890 
891 	for (i = 0; i < count; i++) {
892 		ret = m2p_remove_override(pages[i], kmap_ops ?
893 				       &kmap_ops[i] : NULL);
894 		if (ret)
895 			return ret;
896 	}
897 
898 	if (lazy)
899 		arch_leave_lazy_mmu_mode();
900 
901 	return ret;
902 }
903 EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
904 
905 static unsigned nr_status_frames(unsigned nr_grant_frames)
906 {
907 	return (nr_grant_frames * GREFS_PER_GRANT_FRAME + SPP - 1) / SPP;
908 }
909 
910 static int gnttab_map_frames_v1(unsigned long *frames, unsigned int nr_gframes)
911 {
912 	int rc;
913 
914 	rc = arch_gnttab_map_shared(frames, nr_gframes,
915 				    gnttab_max_grant_frames(),
916 				    &gnttab_shared.addr);
917 	BUG_ON(rc);
918 
919 	return 0;
920 }
921 
922 static void gnttab_unmap_frames_v1(void)
923 {
924 	arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
925 }
926 
927 static int gnttab_map_frames_v2(unsigned long *frames, unsigned int nr_gframes)
928 {
929 	uint64_t *sframes;
930 	unsigned int nr_sframes;
931 	struct gnttab_get_status_frames getframes;
932 	int rc;
933 
934 	nr_sframes = nr_status_frames(nr_gframes);
935 
936 	/* No need for kzalloc as it is initialized in following hypercall
937 	 * GNTTABOP_get_status_frames.
938 	 */
939 	sframes = kmalloc(nr_sframes  * sizeof(uint64_t), GFP_ATOMIC);
940 	if (!sframes)
941 		return -ENOMEM;
942 
943 	getframes.dom        = DOMID_SELF;
944 	getframes.nr_frames  = nr_sframes;
945 	set_xen_guest_handle(getframes.frame_list, sframes);
946 
947 	rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
948 				       &getframes, 1);
949 	if (rc == -ENOSYS) {
950 		kfree(sframes);
951 		return -ENOSYS;
952 	}
953 
954 	BUG_ON(rc || getframes.status);
955 
956 	rc = arch_gnttab_map_status(sframes, nr_sframes,
957 				    nr_status_frames(gnttab_max_grant_frames()),
958 				    &grstatus);
959 	BUG_ON(rc);
960 	kfree(sframes);
961 
962 	rc = arch_gnttab_map_shared(frames, nr_gframes,
963 				    gnttab_max_grant_frames(),
964 				    &gnttab_shared.addr);
965 	BUG_ON(rc);
966 
967 	return 0;
968 }
969 
970 static void gnttab_unmap_frames_v2(void)
971 {
972 	arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
973 	arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames));
974 }
975 
976 static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
977 {
978 	struct gnttab_setup_table setup;
979 	unsigned long *frames;
980 	unsigned int nr_gframes = end_idx + 1;
981 	int rc;
982 
983 	if (xen_hvm_domain()) {
984 		struct xen_add_to_physmap xatp;
985 		unsigned int i = end_idx;
986 		rc = 0;
987 		/*
988 		 * Loop backwards, so that the first hypercall has the largest
989 		 * index, ensuring that the table will grow only once.
990 		 */
991 		do {
992 			xatp.domid = DOMID_SELF;
993 			xatp.idx = i;
994 			xatp.space = XENMAPSPACE_grant_table;
995 			xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
996 			rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
997 			if (rc != 0) {
998 				printk(KERN_WARNING
999 						"grant table add_to_physmap failed, err=%d\n", rc);
1000 				break;
1001 			}
1002 		} while (i-- > start_idx);
1003 
1004 		return rc;
1005 	}
1006 
1007 	/* No need for kzalloc as it is initialized in following hypercall
1008 	 * GNTTABOP_setup_table.
1009 	 */
1010 	frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
1011 	if (!frames)
1012 		return -ENOMEM;
1013 
1014 	setup.dom        = DOMID_SELF;
1015 	setup.nr_frames  = nr_gframes;
1016 	set_xen_guest_handle(setup.frame_list, frames);
1017 
1018 	rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
1019 	if (rc == -ENOSYS) {
1020 		kfree(frames);
1021 		return -ENOSYS;
1022 	}
1023 
1024 	BUG_ON(rc || setup.status);
1025 
1026 	rc = gnttab_interface->map_frames(frames, nr_gframes);
1027 
1028 	kfree(frames);
1029 
1030 	return rc;
1031 }
1032 
1033 static struct gnttab_ops gnttab_v1_ops = {
1034 	.map_frames			= gnttab_map_frames_v1,
1035 	.unmap_frames			= gnttab_unmap_frames_v1,
1036 	.update_entry			= gnttab_update_entry_v1,
1037 	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v1,
1038 	.end_foreign_transfer_ref	= gnttab_end_foreign_transfer_ref_v1,
1039 	.query_foreign_access		= gnttab_query_foreign_access_v1,
1040 };
1041 
1042 static struct gnttab_ops gnttab_v2_ops = {
1043 	.map_frames			= gnttab_map_frames_v2,
1044 	.unmap_frames			= gnttab_unmap_frames_v2,
1045 	.update_entry			= gnttab_update_entry_v2,
1046 	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v2,
1047 	.end_foreign_transfer_ref	= gnttab_end_foreign_transfer_ref_v2,
1048 	.query_foreign_access		= gnttab_query_foreign_access_v2,
1049 	.update_subpage_entry		= gnttab_update_subpage_entry_v2,
1050 	.update_trans_entry		= gnttab_update_trans_entry_v2,
1051 };
1052 
1053 static void gnttab_request_version(void)
1054 {
1055 	int rc;
1056 	struct gnttab_set_version gsv;
1057 
1058 	if (xen_hvm_domain())
1059 		gsv.version = 1;
1060 	else
1061 		gsv.version = 2;
1062 	rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
1063 	if (rc == 0 && gsv.version == 2) {
1064 		grant_table_version = 2;
1065 		gnttab_interface = &gnttab_v2_ops;
1066 	} else if (grant_table_version == 2) {
1067 		/*
1068 		 * If we've already used version 2 features,
1069 		 * but then suddenly discover that they're not
1070 		 * available (e.g. migrating to an older
1071 		 * version of Xen), almost unbounded badness
1072 		 * can happen.
1073 		 */
1074 		panic("we need grant tables version 2, but only version 1 is available");
1075 	} else {
1076 		grant_table_version = 1;
1077 		gnttab_interface = &gnttab_v1_ops;
1078 	}
1079 	printk(KERN_INFO "Grant tables using version %d layout.\n",
1080 		grant_table_version);
1081 }
1082 
1083 int gnttab_resume(void)
1084 {
1085 	unsigned int max_nr_gframes;
1086 
1087 	gnttab_request_version();
1088 	max_nr_gframes = gnttab_max_grant_frames();
1089 	if (max_nr_gframes < nr_grant_frames)
1090 		return -ENOSYS;
1091 
1092 	if (xen_pv_domain())
1093 		return gnttab_map(0, nr_grant_frames - 1);
1094 
1095 	if (gnttab_shared.addr == NULL) {
1096 		gnttab_shared.addr = ioremap(xen_hvm_resume_frames,
1097 						PAGE_SIZE * max_nr_gframes);
1098 		if (gnttab_shared.addr == NULL) {
1099 			printk(KERN_WARNING
1100 					"Failed to ioremap gnttab share frames!");
1101 			return -ENOMEM;
1102 		}
1103 	}
1104 
1105 	gnttab_map(0, nr_grant_frames - 1);
1106 
1107 	return 0;
1108 }
1109 
1110 int gnttab_suspend(void)
1111 {
1112 	gnttab_interface->unmap_frames();
1113 	return 0;
1114 }
1115 
1116 static int gnttab_expand(unsigned int req_entries)
1117 {
1118 	int rc;
1119 	unsigned int cur, extra;
1120 
1121 	cur = nr_grant_frames;
1122 	extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
1123 		 GREFS_PER_GRANT_FRAME);
1124 	if (cur + extra > gnttab_max_grant_frames())
1125 		return -ENOSPC;
1126 
1127 	rc = gnttab_map(cur, cur + extra - 1);
1128 	if (rc == 0)
1129 		rc = grow_gnttab_list(extra);
1130 
1131 	return rc;
1132 }
1133 
1134 int gnttab_init(void)
1135 {
1136 	int i;
1137 	unsigned int max_nr_glist_frames, nr_glist_frames;
1138 	unsigned int nr_init_grefs;
1139 	int ret;
1140 
1141 	nr_grant_frames = 1;
1142 	boot_max_nr_grant_frames = __max_nr_grant_frames();
1143 
1144 	/* Determine the maximum number of frames required for the
1145 	 * grant reference free list on the current hypervisor.
1146 	 */
1147 	max_nr_glist_frames = (boot_max_nr_grant_frames *
1148 			       GREFS_PER_GRANT_FRAME / RPP);
1149 
1150 	gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
1151 			      GFP_KERNEL);
1152 	if (gnttab_list == NULL)
1153 		return -ENOMEM;
1154 
1155 	nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
1156 	for (i = 0; i < nr_glist_frames; i++) {
1157 		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
1158 		if (gnttab_list[i] == NULL) {
1159 			ret = -ENOMEM;
1160 			goto ini_nomem;
1161 		}
1162 	}
1163 
1164 	if (gnttab_resume() < 0) {
1165 		ret = -ENODEV;
1166 		goto ini_nomem;
1167 	}
1168 
1169 	nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
1170 
1171 	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
1172 		gnttab_entry(i) = i + 1;
1173 
1174 	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
1175 	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
1176 	gnttab_free_head  = NR_RESERVED_ENTRIES;
1177 
1178 	printk("Grant table initialized\n");
1179 	return 0;
1180 
1181  ini_nomem:
1182 	for (i--; i >= 0; i--)
1183 		free_page((unsigned long)gnttab_list[i]);
1184 	kfree(gnttab_list);
1185 	return ret;
1186 }
1187 EXPORT_SYMBOL_GPL(gnttab_init);
1188 
1189 static int __devinit __gnttab_init(void)
1190 {
1191 	/* Delay grant-table initialization in the PV on HVM case */
1192 	if (xen_hvm_domain())
1193 		return 0;
1194 
1195 	if (!xen_pv_domain())
1196 		return -ENODEV;
1197 
1198 	return gnttab_init();
1199 }
1200 
1201 core_initcall(__gnttab_init);
1202