xref: /linux/drivers/xen/grant-table.c (revision 148f9bb87745ed45f7a11b2cbd3bc0f017d5d257)
1 /******************************************************************************
2  * grant_table.c
3  *
4  * Granting foreign access to our memory reservation.
5  *
6  * Copyright (c) 2005-2006, Christopher Clark
7  * Copyright (c) 2004-2005, K A Fraser
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License version 2
11  * as published by the Free Software Foundation; or, when distributed
12  * separately from the Linux kernel or incorporated into other
13  * software packages, subject to the following license:
14  *
15  * Permission is hereby granted, free of charge, to any person obtaining a copy
16  * of this source file (the "Software"), to deal in the Software without
17  * restriction, including without limitation the rights to use, copy, modify,
18  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19  * and to permit persons to whom the Software is furnished to do so, subject to
20  * the following conditions:
21  *
22  * The above copyright notice and this permission notice shall be included in
23  * all copies or substantial portions of the Software.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31  * IN THE SOFTWARE.
32  */
33 
34 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
35 
36 #include <linux/module.h>
37 #include <linux/sched.h>
38 #include <linux/mm.h>
39 #include <linux/slab.h>
40 #include <linux/vmalloc.h>
41 #include <linux/uaccess.h>
42 #include <linux/io.h>
43 #include <linux/delay.h>
44 #include <linux/hardirq.h>
45 
46 #include <xen/xen.h>
47 #include <xen/interface/xen.h>
48 #include <xen/page.h>
49 #include <xen/grant_table.h>
50 #include <xen/interface/memory.h>
51 #include <xen/hvc-console.h>
52 #include <asm/xen/hypercall.h>
53 #include <asm/xen/interface.h>
54 
55 #include <asm/pgtable.h>
56 #include <asm/sync_bitops.h>
57 
58 /* External tools reserve first few grant table entries. */
59 #define NR_RESERVED_ENTRIES 8
60 #define GNTTAB_LIST_END 0xffffffff
61 
62 static grant_ref_t **gnttab_list;
63 static unsigned int nr_grant_frames;
64 static unsigned int boot_max_nr_grant_frames;
65 static int gnttab_free_count;
66 static grant_ref_t gnttab_free_head;
67 static DEFINE_SPINLOCK(gnttab_list_lock);
68 unsigned long xen_hvm_resume_frames;
69 EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
70 
71 static union {
72 	struct grant_entry_v1 *v1;
73 	union grant_entry_v2 *v2;
74 	void *addr;
75 } gnttab_shared;
76 
77 /*This is a structure of function pointers for grant table*/
78 struct gnttab_ops {
79 	/*
80 	 * Mapping a list of frames for storing grant entries. Frames parameter
81 	 * is used to store grant table address when grant table being setup,
82 	 * nr_gframes is the number of frames to map grant table. Returning
83 	 * GNTST_okay means success and negative value means failure.
84 	 */
85 	int (*map_frames)(xen_pfn_t *frames, unsigned int nr_gframes);
86 	/*
87 	 * Release a list of frames which are mapped in map_frames for grant
88 	 * entry status.
89 	 */
90 	void (*unmap_frames)(void);
91 	/*
92 	 * Introducing a valid entry into the grant table, granting the frame of
93 	 * this grant entry to domain for accessing or transfering. Ref
94 	 * parameter is reference of this introduced grant entry, domid is id of
95 	 * granted domain, frame is the page frame to be granted, and flags is
96 	 * status of the grant entry to be updated.
97 	 */
98 	void (*update_entry)(grant_ref_t ref, domid_t domid,
99 			     unsigned long frame, unsigned flags);
100 	/*
101 	 * Stop granting a grant entry to domain for accessing. Ref parameter is
102 	 * reference of a grant entry whose grant access will be stopped,
103 	 * readonly is not in use in this function. If the grant entry is
104 	 * currently mapped for reading or writing, just return failure(==0)
105 	 * directly and don't tear down the grant access. Otherwise, stop grant
106 	 * access for this entry and return success(==1).
107 	 */
108 	int (*end_foreign_access_ref)(grant_ref_t ref, int readonly);
109 	/*
110 	 * Stop granting a grant entry to domain for transfer. Ref parameter is
111 	 * reference of a grant entry whose grant transfer will be stopped. If
112 	 * tranfer has not started, just reclaim the grant entry and return
113 	 * failure(==0). Otherwise, wait for the transfer to complete and then
114 	 * return the frame.
115 	 */
116 	unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref);
117 	/*
118 	 * Query the status of a grant entry. Ref parameter is reference of
119 	 * queried grant entry, return value is the status of queried entry.
120 	 * Detailed status(writing/reading) can be gotten from the return value
121 	 * by bit operations.
122 	 */
123 	int (*query_foreign_access)(grant_ref_t ref);
124 	/*
125 	 * Grant a domain to access a range of bytes within the page referred by
126 	 * an available grant entry. Ref parameter is reference of a grant entry
127 	 * which will be sub-page accessed, domid is id of grantee domain, frame
128 	 * is frame address of subpage grant, flags is grant type and flag
129 	 * information, page_off is offset of the range of bytes, and length is
130 	 * length of bytes to be accessed.
131 	 */
132 	void (*update_subpage_entry)(grant_ref_t ref, domid_t domid,
133 				     unsigned long frame, int flags,
134 				     unsigned page_off, unsigned length);
135 	/*
136 	 * Redirect an available grant entry on domain A to another grant
137 	 * reference of domain B, then allow domain C to use grant reference
138 	 * of domain B transitively. Ref parameter is an available grant entry
139 	 * reference on domain A, domid is id of domain C which accesses grant
140 	 * entry transitively, flags is grant type and flag information,
141 	 * trans_domid is id of domain B whose grant entry is finally accessed
142 	 * transitively, trans_gref is grant entry transitive reference of
143 	 * domain B.
144 	 */
145 	void (*update_trans_entry)(grant_ref_t ref, domid_t domid, int flags,
146 				   domid_t trans_domid, grant_ref_t trans_gref);
147 };
148 
149 static struct gnttab_ops *gnttab_interface;
150 
151 /*This reflects status of grant entries, so act as a global value*/
152 static grant_status_t *grstatus;
153 
154 static int grant_table_version;
155 static int grefs_per_grant_frame;
156 
157 static struct gnttab_free_callback *gnttab_free_callback_list;
158 
159 static int gnttab_expand(unsigned int req_entries);
160 
161 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
162 #define SPP (PAGE_SIZE / sizeof(grant_status_t))
163 
164 static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
165 {
166 	return &gnttab_list[(entry) / RPP][(entry) % RPP];
167 }
168 /* This can be used as an l-value */
169 #define gnttab_entry(entry) (*__gnttab_entry(entry))
170 
171 static int get_free_entries(unsigned count)
172 {
173 	unsigned long flags;
174 	int ref, rc = 0;
175 	grant_ref_t head;
176 
177 	spin_lock_irqsave(&gnttab_list_lock, flags);
178 
179 	if ((gnttab_free_count < count) &&
180 	    ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
181 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
182 		return rc;
183 	}
184 
185 	ref = head = gnttab_free_head;
186 	gnttab_free_count -= count;
187 	while (count-- > 1)
188 		head = gnttab_entry(head);
189 	gnttab_free_head = gnttab_entry(head);
190 	gnttab_entry(head) = GNTTAB_LIST_END;
191 
192 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
193 
194 	return ref;
195 }
196 
197 static void do_free_callbacks(void)
198 {
199 	struct gnttab_free_callback *callback, *next;
200 
201 	callback = gnttab_free_callback_list;
202 	gnttab_free_callback_list = NULL;
203 
204 	while (callback != NULL) {
205 		next = callback->next;
206 		if (gnttab_free_count >= callback->count) {
207 			callback->next = NULL;
208 			callback->fn(callback->arg);
209 		} else {
210 			callback->next = gnttab_free_callback_list;
211 			gnttab_free_callback_list = callback;
212 		}
213 		callback = next;
214 	}
215 }
216 
217 static inline void check_free_callbacks(void)
218 {
219 	if (unlikely(gnttab_free_callback_list))
220 		do_free_callbacks();
221 }
222 
223 static void put_free_entry(grant_ref_t ref)
224 {
225 	unsigned long flags;
226 	spin_lock_irqsave(&gnttab_list_lock, flags);
227 	gnttab_entry(ref) = gnttab_free_head;
228 	gnttab_free_head = ref;
229 	gnttab_free_count++;
230 	check_free_callbacks();
231 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
232 }
233 
234 /*
235  * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
236  * Introducing a valid entry into the grant table:
237  *  1. Write ent->domid.
238  *  2. Write ent->frame:
239  *      GTF_permit_access:   Frame to which access is permitted.
240  *      GTF_accept_transfer: Pseudo-phys frame slot being filled by new
241  *                           frame, or zero if none.
242  *  3. Write memory barrier (WMB).
243  *  4. Write ent->flags, inc. valid type.
244  */
245 static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid,
246 				   unsigned long frame, unsigned flags)
247 {
248 	gnttab_shared.v1[ref].domid = domid;
249 	gnttab_shared.v1[ref].frame = frame;
250 	wmb();
251 	gnttab_shared.v1[ref].flags = flags;
252 }
253 
254 static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid,
255 				   unsigned long frame, unsigned flags)
256 {
257 	gnttab_shared.v2[ref].hdr.domid = domid;
258 	gnttab_shared.v2[ref].full_page.frame = frame;
259 	wmb();
260 	gnttab_shared.v2[ref].hdr.flags = GTF_permit_access | flags;
261 }
262 
263 /*
264  * Public grant-issuing interface functions
265  */
266 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
267 				     unsigned long frame, int readonly)
268 {
269 	gnttab_interface->update_entry(ref, domid, frame,
270 			   GTF_permit_access | (readonly ? GTF_readonly : 0));
271 }
272 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
273 
274 int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
275 				int readonly)
276 {
277 	int ref;
278 
279 	ref = get_free_entries(1);
280 	if (unlikely(ref < 0))
281 		return -ENOSPC;
282 
283 	gnttab_grant_foreign_access_ref(ref, domid, frame, readonly);
284 
285 	return ref;
286 }
287 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
288 
289 static void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid,
290 					   unsigned long frame, int flags,
291 					   unsigned page_off, unsigned length)
292 {
293 	gnttab_shared.v2[ref].sub_page.frame = frame;
294 	gnttab_shared.v2[ref].sub_page.page_off = page_off;
295 	gnttab_shared.v2[ref].sub_page.length = length;
296 	gnttab_shared.v2[ref].hdr.domid = domid;
297 	wmb();
298 	gnttab_shared.v2[ref].hdr.flags =
299 				GTF_permit_access | GTF_sub_page | flags;
300 }
301 
302 int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid,
303 					    unsigned long frame, int flags,
304 					    unsigned page_off,
305 					    unsigned length)
306 {
307 	if (flags & (GTF_accept_transfer | GTF_reading |
308 		     GTF_writing | GTF_transitive))
309 		return -EPERM;
310 
311 	if (gnttab_interface->update_subpage_entry == NULL)
312 		return -ENOSYS;
313 
314 	gnttab_interface->update_subpage_entry(ref, domid, frame, flags,
315 					       page_off, length);
316 
317 	return 0;
318 }
319 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage_ref);
320 
321 int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame,
322 					int flags, unsigned page_off,
323 					unsigned length)
324 {
325 	int ref, rc;
326 
327 	ref = get_free_entries(1);
328 	if (unlikely(ref < 0))
329 		return -ENOSPC;
330 
331 	rc = gnttab_grant_foreign_access_subpage_ref(ref, domid, frame, flags,
332 						     page_off, length);
333 	if (rc < 0) {
334 		put_free_entry(ref);
335 		return rc;
336 	}
337 
338 	return ref;
339 }
340 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage);
341 
342 bool gnttab_subpage_grants_available(void)
343 {
344 	return gnttab_interface->update_subpage_entry != NULL;
345 }
346 EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available);
347 
348 static void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid,
349 					 int flags, domid_t trans_domid,
350 					 grant_ref_t trans_gref)
351 {
352 	gnttab_shared.v2[ref].transitive.trans_domid = trans_domid;
353 	gnttab_shared.v2[ref].transitive.gref = trans_gref;
354 	gnttab_shared.v2[ref].hdr.domid = domid;
355 	wmb();
356 	gnttab_shared.v2[ref].hdr.flags =
357 				GTF_permit_access | GTF_transitive | flags;
358 }
359 
360 int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid,
361 					  int flags, domid_t trans_domid,
362 					  grant_ref_t trans_gref)
363 {
364 	if (flags & (GTF_accept_transfer | GTF_reading |
365 		     GTF_writing | GTF_sub_page))
366 		return -EPERM;
367 
368 	if (gnttab_interface->update_trans_entry == NULL)
369 		return -ENOSYS;
370 
371 	gnttab_interface->update_trans_entry(ref, domid, flags, trans_domid,
372 					     trans_gref);
373 
374 	return 0;
375 }
376 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans_ref);
377 
378 int gnttab_grant_foreign_access_trans(domid_t domid, int flags,
379 				      domid_t trans_domid,
380 				      grant_ref_t trans_gref)
381 {
382 	int ref, rc;
383 
384 	ref = get_free_entries(1);
385 	if (unlikely(ref < 0))
386 		return -ENOSPC;
387 
388 	rc = gnttab_grant_foreign_access_trans_ref(ref, domid, flags,
389 						   trans_domid, trans_gref);
390 	if (rc < 0) {
391 		put_free_entry(ref);
392 		return rc;
393 	}
394 
395 	return ref;
396 }
397 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans);
398 
399 bool gnttab_trans_grants_available(void)
400 {
401 	return gnttab_interface->update_trans_entry != NULL;
402 }
403 EXPORT_SYMBOL_GPL(gnttab_trans_grants_available);
404 
405 static int gnttab_query_foreign_access_v1(grant_ref_t ref)
406 {
407 	return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing);
408 }
409 
410 static int gnttab_query_foreign_access_v2(grant_ref_t ref)
411 {
412 	return grstatus[ref] & (GTF_reading|GTF_writing);
413 }
414 
415 int gnttab_query_foreign_access(grant_ref_t ref)
416 {
417 	return gnttab_interface->query_foreign_access(ref);
418 }
419 EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
420 
421 static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly)
422 {
423 	u16 flags, nflags;
424 	u16 *pflags;
425 
426 	pflags = &gnttab_shared.v1[ref].flags;
427 	nflags = *pflags;
428 	do {
429 		flags = nflags;
430 		if (flags & (GTF_reading|GTF_writing))
431 			return 0;
432 	} while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags);
433 
434 	return 1;
435 }
436 
437 static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly)
438 {
439 	gnttab_shared.v2[ref].hdr.flags = 0;
440 	mb();
441 	if (grstatus[ref] & (GTF_reading|GTF_writing)) {
442 		return 0;
443 	} else {
444 		/* The read of grstatus needs to have acquire
445 		semantics.  On x86, reads already have
446 		that, and we just need to protect against
447 		compiler reorderings.  On other
448 		architectures we may need a full
449 		barrier. */
450 #ifdef CONFIG_X86
451 		barrier();
452 #else
453 		mb();
454 #endif
455 	}
456 
457 	return 1;
458 }
459 
460 static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
461 {
462 	return gnttab_interface->end_foreign_access_ref(ref, readonly);
463 }
464 
465 int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
466 {
467 	if (_gnttab_end_foreign_access_ref(ref, readonly))
468 		return 1;
469 	pr_warn("WARNING: g.e. %#x still in use!\n", ref);
470 	return 0;
471 }
472 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
473 
474 struct deferred_entry {
475 	struct list_head list;
476 	grant_ref_t ref;
477 	bool ro;
478 	uint16_t warn_delay;
479 	struct page *page;
480 };
481 static LIST_HEAD(deferred_list);
482 static void gnttab_handle_deferred(unsigned long);
483 static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred, 0, 0);
484 
485 static void gnttab_handle_deferred(unsigned long unused)
486 {
487 	unsigned int nr = 10;
488 	struct deferred_entry *first = NULL;
489 	unsigned long flags;
490 
491 	spin_lock_irqsave(&gnttab_list_lock, flags);
492 	while (nr--) {
493 		struct deferred_entry *entry
494 			= list_first_entry(&deferred_list,
495 					   struct deferred_entry, list);
496 
497 		if (entry == first)
498 			break;
499 		list_del(&entry->list);
500 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
501 		if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) {
502 			put_free_entry(entry->ref);
503 			if (entry->page) {
504 				pr_debug("freeing g.e. %#x (pfn %#lx)\n",
505 					 entry->ref, page_to_pfn(entry->page));
506 				__free_page(entry->page);
507 			} else
508 				pr_info("freeing g.e. %#x\n", entry->ref);
509 			kfree(entry);
510 			entry = NULL;
511 		} else {
512 			if (!--entry->warn_delay)
513 				pr_info("g.e. %#x still pending\n", entry->ref);
514 			if (!first)
515 				first = entry;
516 		}
517 		spin_lock_irqsave(&gnttab_list_lock, flags);
518 		if (entry)
519 			list_add_tail(&entry->list, &deferred_list);
520 		else if (list_empty(&deferred_list))
521 			break;
522 	}
523 	if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) {
524 		deferred_timer.expires = jiffies + HZ;
525 		add_timer(&deferred_timer);
526 	}
527 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
528 }
529 
530 static void gnttab_add_deferred(grant_ref_t ref, bool readonly,
531 				struct page *page)
532 {
533 	struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
534 	const char *what = KERN_WARNING "leaking";
535 
536 	if (entry) {
537 		unsigned long flags;
538 
539 		entry->ref = ref;
540 		entry->ro = readonly;
541 		entry->page = page;
542 		entry->warn_delay = 60;
543 		spin_lock_irqsave(&gnttab_list_lock, flags);
544 		list_add_tail(&entry->list, &deferred_list);
545 		if (!timer_pending(&deferred_timer)) {
546 			deferred_timer.expires = jiffies + HZ;
547 			add_timer(&deferred_timer);
548 		}
549 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
550 		what = KERN_DEBUG "deferring";
551 	}
552 	printk("%s g.e. %#x (pfn %#lx)\n",
553 	       what, ref, page ? page_to_pfn(page) : -1);
554 }
555 
556 void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
557 			       unsigned long page)
558 {
559 	if (gnttab_end_foreign_access_ref(ref, readonly)) {
560 		put_free_entry(ref);
561 		if (page != 0)
562 			free_page(page);
563 	} else
564 		gnttab_add_deferred(ref, readonly,
565 				    page ? virt_to_page(page) : NULL);
566 }
567 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
568 
569 int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
570 {
571 	int ref;
572 
573 	ref = get_free_entries(1);
574 	if (unlikely(ref < 0))
575 		return -ENOSPC;
576 	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
577 
578 	return ref;
579 }
580 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
581 
582 void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
583 				       unsigned long pfn)
584 {
585 	gnttab_interface->update_entry(ref, domid, pfn, GTF_accept_transfer);
586 }
587 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
588 
589 static unsigned long gnttab_end_foreign_transfer_ref_v1(grant_ref_t ref)
590 {
591 	unsigned long frame;
592 	u16           flags;
593 	u16          *pflags;
594 
595 	pflags = &gnttab_shared.v1[ref].flags;
596 
597 	/*
598 	 * If a transfer is not even yet started, try to reclaim the grant
599 	 * reference and return failure (== 0).
600 	 */
601 	while (!((flags = *pflags) & GTF_transfer_committed)) {
602 		if (sync_cmpxchg(pflags, flags, 0) == flags)
603 			return 0;
604 		cpu_relax();
605 	}
606 
607 	/* If a transfer is in progress then wait until it is completed. */
608 	while (!(flags & GTF_transfer_completed)) {
609 		flags = *pflags;
610 		cpu_relax();
611 	}
612 
613 	rmb();	/* Read the frame number /after/ reading completion status. */
614 	frame = gnttab_shared.v1[ref].frame;
615 	BUG_ON(frame == 0);
616 
617 	return frame;
618 }
619 
620 static unsigned long gnttab_end_foreign_transfer_ref_v2(grant_ref_t ref)
621 {
622 	unsigned long frame;
623 	u16           flags;
624 	u16          *pflags;
625 
626 	pflags = &gnttab_shared.v2[ref].hdr.flags;
627 
628 	/*
629 	 * If a transfer is not even yet started, try to reclaim the grant
630 	 * reference and return failure (== 0).
631 	 */
632 	while (!((flags = *pflags) & GTF_transfer_committed)) {
633 		if (sync_cmpxchg(pflags, flags, 0) == flags)
634 			return 0;
635 		cpu_relax();
636 	}
637 
638 	/* If a transfer is in progress then wait until it is completed. */
639 	while (!(flags & GTF_transfer_completed)) {
640 		flags = *pflags;
641 		cpu_relax();
642 	}
643 
644 	rmb();  /* Read the frame number /after/ reading completion status. */
645 	frame = gnttab_shared.v2[ref].full_page.frame;
646 	BUG_ON(frame == 0);
647 
648 	return frame;
649 }
650 
651 unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
652 {
653 	return gnttab_interface->end_foreign_transfer_ref(ref);
654 }
655 EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
656 
657 unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
658 {
659 	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
660 	put_free_entry(ref);
661 	return frame;
662 }
663 EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
664 
665 void gnttab_free_grant_reference(grant_ref_t ref)
666 {
667 	put_free_entry(ref);
668 }
669 EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
670 
671 void gnttab_free_grant_references(grant_ref_t head)
672 {
673 	grant_ref_t ref;
674 	unsigned long flags;
675 	int count = 1;
676 	if (head == GNTTAB_LIST_END)
677 		return;
678 	spin_lock_irqsave(&gnttab_list_lock, flags);
679 	ref = head;
680 	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
681 		ref = gnttab_entry(ref);
682 		count++;
683 	}
684 	gnttab_entry(ref) = gnttab_free_head;
685 	gnttab_free_head = head;
686 	gnttab_free_count += count;
687 	check_free_callbacks();
688 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
689 }
690 EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
691 
692 int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
693 {
694 	int h = get_free_entries(count);
695 
696 	if (h < 0)
697 		return -ENOSPC;
698 
699 	*head = h;
700 
701 	return 0;
702 }
703 EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
704 
705 int gnttab_empty_grant_references(const grant_ref_t *private_head)
706 {
707 	return (*private_head == GNTTAB_LIST_END);
708 }
709 EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
710 
711 int gnttab_claim_grant_reference(grant_ref_t *private_head)
712 {
713 	grant_ref_t g = *private_head;
714 	if (unlikely(g == GNTTAB_LIST_END))
715 		return -ENOSPC;
716 	*private_head = gnttab_entry(g);
717 	return g;
718 }
719 EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
720 
721 void gnttab_release_grant_reference(grant_ref_t *private_head,
722 				    grant_ref_t release)
723 {
724 	gnttab_entry(release) = *private_head;
725 	*private_head = release;
726 }
727 EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
728 
729 void gnttab_request_free_callback(struct gnttab_free_callback *callback,
730 				  void (*fn)(void *), void *arg, u16 count)
731 {
732 	unsigned long flags;
733 	spin_lock_irqsave(&gnttab_list_lock, flags);
734 	if (callback->next)
735 		goto out;
736 	callback->fn = fn;
737 	callback->arg = arg;
738 	callback->count = count;
739 	callback->next = gnttab_free_callback_list;
740 	gnttab_free_callback_list = callback;
741 	check_free_callbacks();
742 out:
743 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
744 }
745 EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
746 
747 void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
748 {
749 	struct gnttab_free_callback **pcb;
750 	unsigned long flags;
751 
752 	spin_lock_irqsave(&gnttab_list_lock, flags);
753 	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
754 		if (*pcb == callback) {
755 			*pcb = callback->next;
756 			break;
757 		}
758 	}
759 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
760 }
761 EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
762 
763 static int grow_gnttab_list(unsigned int more_frames)
764 {
765 	unsigned int new_nr_grant_frames, extra_entries, i;
766 	unsigned int nr_glist_frames, new_nr_glist_frames;
767 
768 	BUG_ON(grefs_per_grant_frame == 0);
769 
770 	new_nr_grant_frames = nr_grant_frames + more_frames;
771 	extra_entries       = more_frames * grefs_per_grant_frame;
772 
773 	nr_glist_frames = (nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP;
774 	new_nr_glist_frames =
775 		(new_nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP;
776 	for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
777 		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
778 		if (!gnttab_list[i])
779 			goto grow_nomem;
780 	}
781 
782 
783 	for (i = grefs_per_grant_frame * nr_grant_frames;
784 	     i < grefs_per_grant_frame * new_nr_grant_frames - 1; i++)
785 		gnttab_entry(i) = i + 1;
786 
787 	gnttab_entry(i) = gnttab_free_head;
788 	gnttab_free_head = grefs_per_grant_frame * nr_grant_frames;
789 	gnttab_free_count += extra_entries;
790 
791 	nr_grant_frames = new_nr_grant_frames;
792 
793 	check_free_callbacks();
794 
795 	return 0;
796 
797 grow_nomem:
798 	for ( ; i >= nr_glist_frames; i--)
799 		free_page((unsigned long) gnttab_list[i]);
800 	return -ENOMEM;
801 }
802 
803 static unsigned int __max_nr_grant_frames(void)
804 {
805 	struct gnttab_query_size query;
806 	int rc;
807 
808 	query.dom = DOMID_SELF;
809 
810 	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
811 	if ((rc < 0) || (query.status != GNTST_okay))
812 		return 4; /* Legacy max supported number of frames */
813 
814 	return query.max_nr_frames;
815 }
816 
817 unsigned int gnttab_max_grant_frames(void)
818 {
819 	unsigned int xen_max = __max_nr_grant_frames();
820 
821 	if (xen_max > boot_max_nr_grant_frames)
822 		return boot_max_nr_grant_frames;
823 	return xen_max;
824 }
825 EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
826 
827 /* Handling of paged out grant targets (GNTST_eagain) */
828 #define MAX_DELAY 256
829 static inline void
830 gnttab_retry_eagain_gop(unsigned int cmd, void *gop, int16_t *status,
831 						const char *func)
832 {
833 	unsigned delay = 1;
834 
835 	do {
836 		BUG_ON(HYPERVISOR_grant_table_op(cmd, gop, 1));
837 		if (*status == GNTST_eagain)
838 			msleep(delay++);
839 	} while ((*status == GNTST_eagain) && (delay < MAX_DELAY));
840 
841 	if (delay >= MAX_DELAY) {
842 		pr_err("%s: %s eagain grant\n", func, current->comm);
843 		*status = GNTST_bad_page;
844 	}
845 }
846 
847 void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count)
848 {
849 	struct gnttab_map_grant_ref *op;
850 
851 	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, batch, count))
852 		BUG();
853 	for (op = batch; op < batch + count; op++)
854 		if (op->status == GNTST_eagain)
855 			gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, op,
856 						&op->status, __func__);
857 }
858 EXPORT_SYMBOL_GPL(gnttab_batch_map);
859 
860 void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count)
861 {
862 	struct gnttab_copy *op;
863 
864 	if (HYPERVISOR_grant_table_op(GNTTABOP_copy, batch, count))
865 		BUG();
866 	for (op = batch; op < batch + count; op++)
867 		if (op->status == GNTST_eagain)
868 			gnttab_retry_eagain_gop(GNTTABOP_copy, op,
869 						&op->status, __func__);
870 }
871 EXPORT_SYMBOL_GPL(gnttab_batch_copy);
872 
873 int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
874 		    struct gnttab_map_grant_ref *kmap_ops,
875 		    struct page **pages, unsigned int count)
876 {
877 	int i, ret;
878 	bool lazy = false;
879 	pte_t *pte;
880 	unsigned long mfn;
881 
882 	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count);
883 	if (ret)
884 		return ret;
885 
886 	/* Retry eagain maps */
887 	for (i = 0; i < count; i++)
888 		if (map_ops[i].status == GNTST_eagain)
889 			gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i,
890 						&map_ops[i].status, __func__);
891 
892 	if (xen_feature(XENFEAT_auto_translated_physmap))
893 		return ret;
894 
895 	if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
896 		arch_enter_lazy_mmu_mode();
897 		lazy = true;
898 	}
899 
900 	for (i = 0; i < count; i++) {
901 		/* Do not add to override if the map failed. */
902 		if (map_ops[i].status)
903 			continue;
904 
905 		if (map_ops[i].flags & GNTMAP_contains_pte) {
906 			pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
907 				(map_ops[i].host_addr & ~PAGE_MASK));
908 			mfn = pte_mfn(*pte);
909 		} else {
910 			mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
911 		}
912 		ret = m2p_add_override(mfn, pages[i], kmap_ops ?
913 				       &kmap_ops[i] : NULL);
914 		if (ret)
915 			return ret;
916 	}
917 
918 	if (lazy)
919 		arch_leave_lazy_mmu_mode();
920 
921 	return ret;
922 }
923 EXPORT_SYMBOL_GPL(gnttab_map_refs);
924 
925 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
926 		      struct gnttab_map_grant_ref *kmap_ops,
927 		      struct page **pages, unsigned int count)
928 {
929 	int i, ret;
930 	bool lazy = false;
931 
932 	ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count);
933 	if (ret)
934 		return ret;
935 
936 	if (xen_feature(XENFEAT_auto_translated_physmap))
937 		return ret;
938 
939 	if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
940 		arch_enter_lazy_mmu_mode();
941 		lazy = true;
942 	}
943 
944 	for (i = 0; i < count; i++) {
945 		ret = m2p_remove_override(pages[i], kmap_ops ?
946 				       &kmap_ops[i] : NULL);
947 		if (ret)
948 			return ret;
949 	}
950 
951 	if (lazy)
952 		arch_leave_lazy_mmu_mode();
953 
954 	return ret;
955 }
956 EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
957 
958 static unsigned nr_status_frames(unsigned nr_grant_frames)
959 {
960 	BUG_ON(grefs_per_grant_frame == 0);
961 	return (nr_grant_frames * grefs_per_grant_frame + SPP - 1) / SPP;
962 }
963 
964 static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes)
965 {
966 	int rc;
967 
968 	rc = arch_gnttab_map_shared(frames, nr_gframes,
969 				    gnttab_max_grant_frames(),
970 				    &gnttab_shared.addr);
971 	BUG_ON(rc);
972 
973 	return 0;
974 }
975 
976 static void gnttab_unmap_frames_v1(void)
977 {
978 	arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
979 }
980 
981 static int gnttab_map_frames_v2(xen_pfn_t *frames, unsigned int nr_gframes)
982 {
983 	uint64_t *sframes;
984 	unsigned int nr_sframes;
985 	struct gnttab_get_status_frames getframes;
986 	int rc;
987 
988 	nr_sframes = nr_status_frames(nr_gframes);
989 
990 	/* No need for kzalloc as it is initialized in following hypercall
991 	 * GNTTABOP_get_status_frames.
992 	 */
993 	sframes = kmalloc(nr_sframes  * sizeof(uint64_t), GFP_ATOMIC);
994 	if (!sframes)
995 		return -ENOMEM;
996 
997 	getframes.dom        = DOMID_SELF;
998 	getframes.nr_frames  = nr_sframes;
999 	set_xen_guest_handle(getframes.frame_list, sframes);
1000 
1001 	rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
1002 				       &getframes, 1);
1003 	if (rc == -ENOSYS) {
1004 		kfree(sframes);
1005 		return -ENOSYS;
1006 	}
1007 
1008 	BUG_ON(rc || getframes.status);
1009 
1010 	rc = arch_gnttab_map_status(sframes, nr_sframes,
1011 				    nr_status_frames(gnttab_max_grant_frames()),
1012 				    &grstatus);
1013 	BUG_ON(rc);
1014 	kfree(sframes);
1015 
1016 	rc = arch_gnttab_map_shared(frames, nr_gframes,
1017 				    gnttab_max_grant_frames(),
1018 				    &gnttab_shared.addr);
1019 	BUG_ON(rc);
1020 
1021 	return 0;
1022 }
1023 
1024 static void gnttab_unmap_frames_v2(void)
1025 {
1026 	arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
1027 	arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames));
1028 }
1029 
1030 static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
1031 {
1032 	struct gnttab_setup_table setup;
1033 	xen_pfn_t *frames;
1034 	unsigned int nr_gframes = end_idx + 1;
1035 	int rc;
1036 
1037 	if (xen_hvm_domain()) {
1038 		struct xen_add_to_physmap xatp;
1039 		unsigned int i = end_idx;
1040 		rc = 0;
1041 		/*
1042 		 * Loop backwards, so that the first hypercall has the largest
1043 		 * index, ensuring that the table will grow only once.
1044 		 */
1045 		do {
1046 			xatp.domid = DOMID_SELF;
1047 			xatp.idx = i;
1048 			xatp.space = XENMAPSPACE_grant_table;
1049 			xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
1050 			rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
1051 			if (rc != 0) {
1052 				pr_warn("grant table add_to_physmap failed, err=%d\n",
1053 					rc);
1054 				break;
1055 			}
1056 		} while (i-- > start_idx);
1057 
1058 		return rc;
1059 	}
1060 
1061 	/* No need for kzalloc as it is initialized in following hypercall
1062 	 * GNTTABOP_setup_table.
1063 	 */
1064 	frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
1065 	if (!frames)
1066 		return -ENOMEM;
1067 
1068 	setup.dom        = DOMID_SELF;
1069 	setup.nr_frames  = nr_gframes;
1070 	set_xen_guest_handle(setup.frame_list, frames);
1071 
1072 	rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
1073 	if (rc == -ENOSYS) {
1074 		kfree(frames);
1075 		return -ENOSYS;
1076 	}
1077 
1078 	BUG_ON(rc || setup.status);
1079 
1080 	rc = gnttab_interface->map_frames(frames, nr_gframes);
1081 
1082 	kfree(frames);
1083 
1084 	return rc;
1085 }
1086 
1087 static struct gnttab_ops gnttab_v1_ops = {
1088 	.map_frames			= gnttab_map_frames_v1,
1089 	.unmap_frames			= gnttab_unmap_frames_v1,
1090 	.update_entry			= gnttab_update_entry_v1,
1091 	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v1,
1092 	.end_foreign_transfer_ref	= gnttab_end_foreign_transfer_ref_v1,
1093 	.query_foreign_access		= gnttab_query_foreign_access_v1,
1094 };
1095 
1096 static struct gnttab_ops gnttab_v2_ops = {
1097 	.map_frames			= gnttab_map_frames_v2,
1098 	.unmap_frames			= gnttab_unmap_frames_v2,
1099 	.update_entry			= gnttab_update_entry_v2,
1100 	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v2,
1101 	.end_foreign_transfer_ref	= gnttab_end_foreign_transfer_ref_v2,
1102 	.query_foreign_access		= gnttab_query_foreign_access_v2,
1103 	.update_subpage_entry		= gnttab_update_subpage_entry_v2,
1104 	.update_trans_entry		= gnttab_update_trans_entry_v2,
1105 };
1106 
1107 static void gnttab_request_version(void)
1108 {
1109 	int rc;
1110 	struct gnttab_set_version gsv;
1111 
1112 	if (xen_hvm_domain())
1113 		gsv.version = 1;
1114 	else
1115 		gsv.version = 2;
1116 	rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
1117 	if (rc == 0 && gsv.version == 2) {
1118 		grant_table_version = 2;
1119 		grefs_per_grant_frame = PAGE_SIZE / sizeof(union grant_entry_v2);
1120 		gnttab_interface = &gnttab_v2_ops;
1121 	} else if (grant_table_version == 2) {
1122 		/*
1123 		 * If we've already used version 2 features,
1124 		 * but then suddenly discover that they're not
1125 		 * available (e.g. migrating to an older
1126 		 * version of Xen), almost unbounded badness
1127 		 * can happen.
1128 		 */
1129 		panic("we need grant tables version 2, but only version 1 is available");
1130 	} else {
1131 		grant_table_version = 1;
1132 		grefs_per_grant_frame = PAGE_SIZE / sizeof(struct grant_entry_v1);
1133 		gnttab_interface = &gnttab_v1_ops;
1134 	}
1135 	pr_info("Grant tables using version %d layout\n", grant_table_version);
1136 }
1137 
1138 static int gnttab_setup(void)
1139 {
1140 	unsigned int max_nr_gframes;
1141 
1142 	max_nr_gframes = gnttab_max_grant_frames();
1143 	if (max_nr_gframes < nr_grant_frames)
1144 		return -ENOSYS;
1145 
1146 	if (xen_pv_domain())
1147 		return gnttab_map(0, nr_grant_frames - 1);
1148 
1149 	if (gnttab_shared.addr == NULL) {
1150 		gnttab_shared.addr = xen_remap(xen_hvm_resume_frames,
1151 						PAGE_SIZE * max_nr_gframes);
1152 		if (gnttab_shared.addr == NULL) {
1153 			pr_warn("Failed to ioremap gnttab share frames!\n");
1154 			return -ENOMEM;
1155 		}
1156 	}
1157 
1158 	gnttab_map(0, nr_grant_frames - 1);
1159 
1160 	return 0;
1161 }
1162 
1163 int gnttab_resume(void)
1164 {
1165 	gnttab_request_version();
1166 	return gnttab_setup();
1167 }
1168 
1169 int gnttab_suspend(void)
1170 {
1171 	gnttab_interface->unmap_frames();
1172 	return 0;
1173 }
1174 
1175 static int gnttab_expand(unsigned int req_entries)
1176 {
1177 	int rc;
1178 	unsigned int cur, extra;
1179 
1180 	BUG_ON(grefs_per_grant_frame == 0);
1181 	cur = nr_grant_frames;
1182 	extra = ((req_entries + (grefs_per_grant_frame-1)) /
1183 		 grefs_per_grant_frame);
1184 	if (cur + extra > gnttab_max_grant_frames())
1185 		return -ENOSPC;
1186 
1187 	rc = gnttab_map(cur, cur + extra - 1);
1188 	if (rc == 0)
1189 		rc = grow_gnttab_list(extra);
1190 
1191 	return rc;
1192 }
1193 
1194 int gnttab_init(void)
1195 {
1196 	int i;
1197 	unsigned int max_nr_glist_frames, nr_glist_frames;
1198 	unsigned int nr_init_grefs;
1199 	int ret;
1200 
1201 	gnttab_request_version();
1202 	nr_grant_frames = 1;
1203 	boot_max_nr_grant_frames = __max_nr_grant_frames();
1204 
1205 	/* Determine the maximum number of frames required for the
1206 	 * grant reference free list on the current hypervisor.
1207 	 */
1208 	BUG_ON(grefs_per_grant_frame == 0);
1209 	max_nr_glist_frames = (boot_max_nr_grant_frames *
1210 			       grefs_per_grant_frame / RPP);
1211 
1212 	gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
1213 			      GFP_KERNEL);
1214 	if (gnttab_list == NULL)
1215 		return -ENOMEM;
1216 
1217 	nr_glist_frames = (nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP;
1218 	for (i = 0; i < nr_glist_frames; i++) {
1219 		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
1220 		if (gnttab_list[i] == NULL) {
1221 			ret = -ENOMEM;
1222 			goto ini_nomem;
1223 		}
1224 	}
1225 
1226 	if (gnttab_setup() < 0) {
1227 		ret = -ENODEV;
1228 		goto ini_nomem;
1229 	}
1230 
1231 	nr_init_grefs = nr_grant_frames * grefs_per_grant_frame;
1232 
1233 	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
1234 		gnttab_entry(i) = i + 1;
1235 
1236 	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
1237 	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
1238 	gnttab_free_head  = NR_RESERVED_ENTRIES;
1239 
1240 	printk("Grant table initialized\n");
1241 	return 0;
1242 
1243  ini_nomem:
1244 	for (i--; i >= 0; i--)
1245 		free_page((unsigned long)gnttab_list[i]);
1246 	kfree(gnttab_list);
1247 	return ret;
1248 }
1249 EXPORT_SYMBOL_GPL(gnttab_init);
1250 
1251 static int __gnttab_init(void)
1252 {
1253 	/* Delay grant-table initialization in the PV on HVM case */
1254 	if (xen_hvm_domain())
1255 		return 0;
1256 
1257 	if (!xen_pv_domain())
1258 		return -ENODEV;
1259 
1260 	return gnttab_init();
1261 }
1262 
1263 core_initcall(__gnttab_init);
1264