xref: /freebsd/sys/dev/xen/grant_table/grant_table.c (revision b78ee15e9f04ae15c3e1200df974473167524d17)
1 /******************************************************************************
2  * gnttab.c
3  *
4  * Two sets of functionality:
5  * 1. Granting foreign access to our memory reservation.
6  * 2. Accessing others' memory reservations via grant references.
7  * (i.e., mechanisms for both sender and recipient of grant references)
8  *
9  * Copyright (c) 2005, Christopher Clark
10  * Copyright (c) 2004, K A Fraser
11  */
12 
13 #include <sys/cdefs.h>
14 __FBSDID("$FreeBSD$");
15 
16 #include "opt_pmap.h"
17 
18 #include <sys/param.h>
19 #include <sys/systm.h>
20 #include <sys/bus.h>
21 #include <sys/conf.h>
22 #include <sys/module.h>
23 #include <sys/kernel.h>
24 #include <sys/lock.h>
25 #include <sys/malloc.h>
26 #include <sys/mman.h>
27 #include <sys/limits.h>
28 #include <sys/rman.h>
29 #include <machine/resource.h>
30 
31 #include <xen/xen-os.h>
32 #include <xen/hypervisor.h>
33 #include <machine/xen/synch_bitops.h>
34 
35 #include <xen/hypervisor.h>
36 #include <xen/gnttab.h>
37 
38 #include <vm/vm.h>
39 #include <vm/vm_kern.h>
40 #include <vm/vm_extern.h>
41 #include <vm/pmap.h>
42 
43 #define cmpxchg(a, b, c) atomic_cmpset_int((volatile u_int *)(a),(b),(c))
44 
45 /* External tools reserve first few grant table entries. */
46 #define NR_RESERVED_ENTRIES 8
47 #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t))
48 
49 static grant_ref_t **gnttab_list;
50 static unsigned int nr_grant_frames;
51 static unsigned int boot_max_nr_grant_frames;
52 static int gnttab_free_count;
53 static grant_ref_t gnttab_free_head;
54 static struct mtx gnttab_list_lock;
55 
56 /*
57  * Resource representing allocated physical address space
58  * for the grant table metainfo
59  */
60 static struct resource *gnttab_pseudo_phys_res;
61 
62 /* Resource id for allocated physical address space. */
63 static int gnttab_pseudo_phys_res_id;
64 
65 static grant_entry_t *shared;
66 
67 static struct gnttab_free_callback *gnttab_free_callback_list = NULL;
68 
69 static int gnttab_expand(unsigned int req_entries);
70 
71 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
72 #define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
73 
74 static int
75 get_free_entries(int count, int *entries)
76 {
77 	int ref, error;
78 	grant_ref_t head;
79 
80 	mtx_lock(&gnttab_list_lock);
81 	if ((gnttab_free_count < count) &&
82 	    ((error = gnttab_expand(count - gnttab_free_count)) != 0)) {
83 		mtx_unlock(&gnttab_list_lock);
84 		return (error);
85 	}
86 	ref = head = gnttab_free_head;
87 	gnttab_free_count -= count;
88 	while (count-- > 1)
89 		head = gnttab_entry(head);
90 	gnttab_free_head = gnttab_entry(head);
91 	gnttab_entry(head) = GNTTAB_LIST_END;
92 	mtx_unlock(&gnttab_list_lock);
93 
94 	*entries = ref;
95 	return (0);
96 }
97 
98 static void
99 do_free_callbacks(void)
100 {
101 	struct gnttab_free_callback *callback, *next;
102 
103 	callback = gnttab_free_callback_list;
104 	gnttab_free_callback_list = NULL;
105 
106 	while (callback != NULL) {
107 		next = callback->next;
108 		if (gnttab_free_count >= callback->count) {
109 			callback->next = NULL;
110 			callback->fn(callback->arg);
111 		} else {
112 			callback->next = gnttab_free_callback_list;
113 			gnttab_free_callback_list = callback;
114 		}
115 		callback = next;
116 	}
117 }
118 
119 static inline void
120 check_free_callbacks(void)
121 {
122 	if (__predict_false(gnttab_free_callback_list != NULL))
123 		do_free_callbacks();
124 }
125 
126 static void
127 put_free_entry(grant_ref_t ref)
128 {
129 
130 	mtx_lock(&gnttab_list_lock);
131 	gnttab_entry(ref) = gnttab_free_head;
132 	gnttab_free_head = ref;
133 	gnttab_free_count++;
134 	check_free_callbacks();
135 	mtx_unlock(&gnttab_list_lock);
136 }
137 
138 /*
139  * Public grant-issuing interface functions
140  */
141 
142 int
143 gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly,
144 	grant_ref_t *result)
145 {
146 	int error, ref;
147 
148 	error = get_free_entries(1, &ref);
149 
150 	if (__predict_false(error))
151 		return (error);
152 
153 	shared[ref].frame = frame;
154 	shared[ref].domid = domid;
155 	wmb();
156 	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
157 
158 	if (result)
159 		*result = ref;
160 
161 	return (0);
162 }
163 
164 void
165 gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
166 				unsigned long frame, int readonly)
167 {
168 
169 	shared[ref].frame = frame;
170 	shared[ref].domid = domid;
171 	wmb();
172 	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
173 }
174 
175 int
176 gnttab_query_foreign_access(grant_ref_t ref)
177 {
178 	uint16_t nflags;
179 
180 	nflags = shared[ref].flags;
181 
182 	return (nflags & (GTF_reading|GTF_writing));
183 }
184 
185 int
186 gnttab_end_foreign_access_ref(grant_ref_t ref)
187 {
188 	uint16_t flags, nflags;
189 
190 	nflags = shared[ref].flags;
191 	do {
192 		if ( (flags = nflags) & (GTF_reading|GTF_writing) ) {
193 			printf("%s: WARNING: g.e. still in use!\n", __func__);
194 			return (0);
195 		}
196 	} while ((nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) !=
197 	       flags);
198 
199 	return (1);
200 }
201 
202 void
203 gnttab_end_foreign_access(grant_ref_t ref, void *page)
204 {
205 	if (gnttab_end_foreign_access_ref(ref)) {
206 		put_free_entry(ref);
207 		if (page != NULL) {
208 			free(page, M_DEVBUF);
209 		}
210 	}
211 	else {
212 		/* XXX This needs to be fixed so that the ref and page are
213 		   placed on a list to be freed up later. */
214 		printf("%s: WARNING: leaking g.e. and page still in use!\n",
215 		       __func__);
216 	}
217 }
218 
219 void
220 gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs)
221 {
222 	grant_ref_t *last_ref;
223 	grant_ref_t  head;
224 	grant_ref_t  tail;
225 
226 	head = GNTTAB_LIST_END;
227 	tail = *refs;
228 	last_ref = refs + count;
229 	while (refs != last_ref) {
230 
231 		if (gnttab_end_foreign_access_ref(*refs)) {
232 			gnttab_entry(*refs) = head;
233 			head = *refs;
234 		} else {
235 			/*
236 			 * XXX This needs to be fixed so that the ref
237 			 * is placed on a list to be freed up later.
238 			 */
239 			printf("%s: WARNING: leaking g.e. still in use!\n",
240 			       __func__);
241 			count--;
242 		}
243 		refs++;
244 	}
245 
246 	if (count != 0) {
247 		mtx_lock(&gnttab_list_lock);
248 		gnttab_free_count += count;
249 		gnttab_entry(tail) = gnttab_free_head;
250 		gnttab_free_head = head;
251 		mtx_unlock(&gnttab_list_lock);
252 	}
253 }
254 
255 int
256 gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn,
257     grant_ref_t *result)
258 {
259 	int error, ref;
260 
261 	error = get_free_entries(1, &ref);
262 	if (__predict_false(error))
263 		return (error);
264 
265 	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
266 
267 	*result = ref;
268 	return (0);
269 }
270 
271 void
272 gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
273 	unsigned long pfn)
274 {
275 	shared[ref].frame = pfn;
276 	shared[ref].domid = domid;
277 	wmb();
278 	shared[ref].flags = GTF_accept_transfer;
279 }
280 
281 unsigned long
282 gnttab_end_foreign_transfer_ref(grant_ref_t ref)
283 {
284 	unsigned long frame;
285 	uint16_t      flags;
286 
287 	/*
288          * If a transfer is not even yet started, try to reclaim the grant
289          * reference and return failure (== 0).
290          */
291 	while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
292 		if ( synch_cmpxchg(&shared[ref].flags, flags, 0) == flags )
293 			return (0);
294 		cpu_relax();
295 	}
296 
297 	/* If a transfer is in progress then wait until it is completed. */
298 	while (!(flags & GTF_transfer_completed)) {
299 		flags = shared[ref].flags;
300 		cpu_relax();
301 	}
302 
303 	/* Read the frame number /after/ reading completion status. */
304 	rmb();
305 	frame = shared[ref].frame;
306 	KASSERT(frame != 0, ("grant table inconsistent"));
307 
308 	return (frame);
309 }
310 
311 unsigned long
312 gnttab_end_foreign_transfer(grant_ref_t ref)
313 {
314 	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
315 
316 	put_free_entry(ref);
317 	return (frame);
318 }
319 
320 void
321 gnttab_free_grant_reference(grant_ref_t ref)
322 {
323 
324 	put_free_entry(ref);
325 }
326 
327 void
328 gnttab_free_grant_references(grant_ref_t head)
329 {
330 	grant_ref_t ref;
331 	int count = 1;
332 
333 	if (head == GNTTAB_LIST_END)
334 		return;
335 
336 	ref = head;
337 	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
338 		ref = gnttab_entry(ref);
339 		count++;
340 	}
341 	mtx_lock(&gnttab_list_lock);
342 	gnttab_entry(ref) = gnttab_free_head;
343 	gnttab_free_head = head;
344 	gnttab_free_count += count;
345 	check_free_callbacks();
346 	mtx_unlock(&gnttab_list_lock);
347 }
348 
349 int
350 gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head)
351 {
352 	int ref, error;
353 
354 	error = get_free_entries(count, &ref);
355 	if (__predict_false(error))
356 		return (error);
357 
358 	*head = ref;
359 	return (0);
360 }
361 
362 int
363 gnttab_empty_grant_references(const grant_ref_t *private_head)
364 {
365 
366 	return (*private_head == GNTTAB_LIST_END);
367 }
368 
369 int
370 gnttab_claim_grant_reference(grant_ref_t *private_head)
371 {
372 	grant_ref_t g = *private_head;
373 
374 	if (__predict_false(g == GNTTAB_LIST_END))
375 		return (g);
376 	*private_head = gnttab_entry(g);
377 	return (g);
378 }
379 
380 void
381 gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t  release)
382 {
383 
384 	gnttab_entry(release) = *private_head;
385 	*private_head = release;
386 }
387 
388 void
389 gnttab_request_free_callback(struct gnttab_free_callback *callback,
390     void (*fn)(void *), void *arg, uint16_t count)
391 {
392 
393 	mtx_lock(&gnttab_list_lock);
394 	if (callback->next)
395 		goto out;
396 	callback->fn = fn;
397 	callback->arg = arg;
398 	callback->count = count;
399 	callback->next = gnttab_free_callback_list;
400 	gnttab_free_callback_list = callback;
401 	check_free_callbacks();
402  out:
403 	mtx_unlock(&gnttab_list_lock);
404 
405 }
406 
407 void
408 gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
409 {
410 	struct gnttab_free_callback **pcb;
411 
412 	mtx_lock(&gnttab_list_lock);
413 	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
414 		if (*pcb == callback) {
415 			*pcb = callback->next;
416 			break;
417 		}
418 	}
419 	mtx_unlock(&gnttab_list_lock);
420 }
421 
422 
423 static int
424 grow_gnttab_list(unsigned int more_frames)
425 {
426 	unsigned int new_nr_grant_frames, extra_entries, i;
427 
428 	new_nr_grant_frames = nr_grant_frames + more_frames;
429 	extra_entries       = more_frames * GREFS_PER_GRANT_FRAME;
430 
431 	for (i = nr_grant_frames; i < new_nr_grant_frames; i++)
432 	{
433 		gnttab_list[i] = (grant_ref_t *)
434 			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
435 
436 		if (!gnttab_list[i])
437 			goto grow_nomem;
438 	}
439 
440 	for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
441 	     i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
442 		gnttab_entry(i) = i + 1;
443 
444 	gnttab_entry(i) = gnttab_free_head;
445 	gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
446 	gnttab_free_count += extra_entries;
447 
448 	nr_grant_frames = new_nr_grant_frames;
449 
450 	check_free_callbacks();
451 
452 	return (0);
453 
454 grow_nomem:
455 	for ( ; i >= nr_grant_frames; i--)
456 		free(gnttab_list[i], M_DEVBUF);
457 	return (ENOMEM);
458 }
459 
460 static unsigned int
461 __max_nr_grant_frames(void)
462 {
463 	struct gnttab_query_size query;
464 	int rc;
465 
466 	query.dom = DOMID_SELF;
467 
468 	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
469 	if ((rc < 0) || (query.status != GNTST_okay))
470 		return (4); /* Legacy max supported number of frames */
471 
472 	return (query.max_nr_frames);
473 }
474 
475 static inline
476 unsigned int max_nr_grant_frames(void)
477 {
478 	unsigned int xen_max = __max_nr_grant_frames();
479 
480 	if (xen_max > boot_max_nr_grant_frames)
481 		return (boot_max_nr_grant_frames);
482 	return (xen_max);
483 }
484 
485 #ifdef notyet
486 /*
487  * XXX needed for backend support
488  *
489  */
490 static int
491 map_pte_fn(pte_t *pte, struct page *pmd_page,
492 		      unsigned long addr, void *data)
493 {
494 	unsigned long **frames = (unsigned long **)data;
495 
496 	set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
497 	(*frames)++;
498 	return 0;
499 }
500 
501 static int
502 unmap_pte_fn(pte_t *pte, struct page *pmd_page,
503 			unsigned long addr, void *data)
504 {
505 
506 	set_pte_at(&init_mm, addr, pte, __pte(0));
507 	return 0;
508 }
509 #endif
510 
511 static vm_paddr_t resume_frames;
512 
513 static int
514 gnttab_map(unsigned int start_idx, unsigned int end_idx)
515 {
516 	struct xen_add_to_physmap xatp;
517 	unsigned int i = end_idx;
518 
519 	/*
520 	 * Loop backwards, so that the first hypercall has the largest index,
521 	 * ensuring that the table will grow only once.
522 	 */
523 	do {
524 		xatp.domid = DOMID_SELF;
525 		xatp.idx = i;
526 		xatp.space = XENMAPSPACE_grant_table;
527 		xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
528 		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
529 			panic("HYPERVISOR_memory_op failed to map gnttab");
530 	} while (i-- > start_idx);
531 
532 	if (shared == NULL) {
533 		vm_offset_t area;
534 
535 		area = kva_alloc(PAGE_SIZE * max_nr_grant_frames());
536 		KASSERT(area, ("can't allocate VM space for grant table"));
537 		shared = (grant_entry_t *)area;
538 	}
539 
540 	for (i = start_idx; i <= end_idx; i++) {
541 		pmap_kenter((vm_offset_t) shared + i * PAGE_SIZE,
542 		    resume_frames + i * PAGE_SIZE);
543 	}
544 
545 	return (0);
546 }
547 
548 int
549 gnttab_resume(device_t dev)
550 {
551 	unsigned int max_nr_gframes, nr_gframes;
552 
553 	nr_gframes = nr_grant_frames;
554 	max_nr_gframes = max_nr_grant_frames();
555 	if (max_nr_gframes < nr_gframes)
556 		return (ENOSYS);
557 
558 	if (!resume_frames) {
559 		KASSERT(dev != NULL,
560 		    ("No resume frames and no device provided"));
561 
562 		gnttab_pseudo_phys_res = xenmem_alloc(dev,
563 		    &gnttab_pseudo_phys_res_id, PAGE_SIZE * max_nr_gframes);
564 		if (gnttab_pseudo_phys_res == NULL)
565 			panic("Unable to reserve physical memory for gnttab");
566 		resume_frames = rman_get_start(gnttab_pseudo_phys_res);
567 	}
568 
569 	return (gnttab_map(0, nr_gframes - 1));
570 }
571 
572 static int
573 gnttab_expand(unsigned int req_entries)
574 {
575 	int error;
576 	unsigned int cur, extra;
577 
578 	cur = nr_grant_frames;
579 	extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
580 		 GREFS_PER_GRANT_FRAME);
581 	if (cur + extra > max_nr_grant_frames())
582 		return (ENOSPC);
583 
584 	error = gnttab_map(cur, cur + extra - 1);
585 	if (!error)
586 		error = grow_gnttab_list(extra);
587 
588 	return (error);
589 }
590 
591 MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF);
592 
593 /*------------------ Private Device Attachment Functions  --------------------*/
594 /**
595  * \brief Identify instances of this device type in the system.
596  *
597  * \param driver  The driver performing this identify action.
598  * \param parent  The NewBus parent device for any devices this method adds.
599  */
600 static void
601 granttable_identify(driver_t *driver __unused, device_t parent)
602 {
603 
604 	KASSERT(xen_domain(),
605 	    ("Trying to attach grant-table device on non Xen domain"));
606 	/*
607 	 * A single device instance for our driver is always present
608 	 * in a system operating under Xen.
609 	 */
610 	if (BUS_ADD_CHILD(parent, 0, driver->name, 0) == NULL)
611 		panic("unable to attach Xen Grant-table device");
612 }
613 
614 /**
615  * \brief Probe for the existence of the Xen Grant-table device
616  *
617  * \param dev  NewBus device_t for this instance.
618  *
619  * \return  Always returns 0 indicating success.
620  */
621 static int
622 granttable_probe(device_t dev)
623 {
624 
625 	device_set_desc(dev, "Xen Grant-table Device");
626 	return (BUS_PROBE_NOWILDCARD);
627 }
628 
629 /**
630  * \brief Attach the Xen Grant-table device.
631  *
632  * \param dev  NewBus device_t for this instance.
633  *
634  * \return  On success, 0. Otherwise an errno value indicating the
635  *          type of failure.
636  */
637 static int
638 granttable_attach(device_t dev)
639 {
640 	int i;
641 	unsigned int max_nr_glist_frames;
642 	unsigned int nr_init_grefs;
643 
644 	nr_grant_frames = 1;
645 	boot_max_nr_grant_frames = __max_nr_grant_frames();
646 
647 	/* Determine the maximum number of frames required for the
648 	 * grant reference free list on the current hypervisor.
649 	 */
650 	max_nr_glist_frames = (boot_max_nr_grant_frames *
651 			       GREFS_PER_GRANT_FRAME /
652 			       (PAGE_SIZE / sizeof(grant_ref_t)));
653 
654 	gnttab_list = malloc(max_nr_glist_frames * sizeof(grant_ref_t *),
655 	    M_DEVBUF, M_NOWAIT);
656 
657 	if (gnttab_list == NULL)
658 		return (ENOMEM);
659 
660 	for (i = 0; i < nr_grant_frames; i++) {
661 		gnttab_list[i] = (grant_ref_t *)
662 			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
663 		if (gnttab_list[i] == NULL)
664 			goto ini_nomem;
665 	}
666 
667 	if (gnttab_resume(dev))
668 		return (ENODEV);
669 
670 	nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
671 
672 	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
673 		gnttab_entry(i) = i + 1;
674 
675 	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
676 	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
677 	gnttab_free_head  = NR_RESERVED_ENTRIES;
678 
679 	if (bootverbose)
680 		printf("Grant table initialized\n");
681 
682 	return (0);
683 
684 ini_nomem:
685 	for (i--; i >= 0; i--)
686 		free(gnttab_list[i], M_DEVBUF);
687 	free(gnttab_list, M_DEVBUF);
688 	return (ENOMEM);
689 }
690 
691 /*-------------------- Private Device Attachment Data  -----------------------*/
692 static device_method_t granttable_methods[] = {
693 	/* Device interface */
694 	DEVMETHOD(device_identify,	granttable_identify),
695 	DEVMETHOD(device_probe,         granttable_probe),
696 	DEVMETHOD(device_attach,        granttable_attach),
697 
698 	DEVMETHOD_END
699 };
700 
701 DEFINE_CLASS_0(granttable, granttable_driver, granttable_methods, 0);
702 devclass_t granttable_devclass;
703 
704 DRIVER_MODULE_ORDERED(granttable, xenpv, granttable_driver, granttable_devclass,
705     NULL, NULL, SI_ORDER_FIRST);
706