xref: /freebsd/sys/dev/xen/grant_table/grant_table.c (revision 7de88bb4a2e872f72b6d443547c50178799b8366)
1 /******************************************************************************
2  * gnttab.c
3  *
4  * Two sets of functionality:
5  * 1. Granting foreign access to our memory reservation.
6  * 2. Accessing others' memory reservations via grant references.
7  * (i.e., mechanisms for both sender and recipient of grant references)
8  *
9  * Copyright (c) 2005, Christopher Clark
10  * Copyright (c) 2004, K A Fraser
11  */
12 
13 #include <sys/cdefs.h>
14 __FBSDID("$FreeBSD$");
15 
16 #include <sys/param.h>
17 #include <sys/systm.h>
18 #include <sys/bus.h>
19 #include <sys/conf.h>
20 #include <sys/module.h>
21 #include <sys/kernel.h>
22 #include <sys/lock.h>
23 #include <sys/malloc.h>
24 #include <sys/mman.h>
25 #include <sys/limits.h>
26 #include <sys/rman.h>
27 #include <machine/resource.h>
28 #include <machine/cpu.h>
29 
30 #include <xen/xen-os.h>
31 #include <xen/hypervisor.h>
32 #include <machine/xen/synch_bitops.h>
33 
34 #include <xen/hypervisor.h>
35 #include <xen/gnttab.h>
36 
37 #include <vm/vm.h>
38 #include <vm/vm_kern.h>
39 #include <vm/vm_extern.h>
40 #include <vm/pmap.h>
41 
42 /* External tools reserve first few grant table entries. */
43 #define NR_RESERVED_ENTRIES 8
44 #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t))
45 
46 static grant_ref_t **gnttab_list;
47 static unsigned int nr_grant_frames;
48 static unsigned int boot_max_nr_grant_frames;
49 static int gnttab_free_count;
50 static grant_ref_t gnttab_free_head;
51 static struct mtx gnttab_list_lock;
52 
53 /*
54  * Resource representing allocated physical address space
55  * for the grant table metainfo
56  */
57 static struct resource *gnttab_pseudo_phys_res;
58 
59 /* Resource id for allocated physical address space. */
60 static int gnttab_pseudo_phys_res_id;
61 
62 static grant_entry_t *shared;
63 
64 static struct gnttab_free_callback *gnttab_free_callback_list = NULL;
65 
66 static int gnttab_expand(unsigned int req_entries);
67 
68 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
69 #define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
70 
71 static int
72 get_free_entries(int count, int *entries)
73 {
74 	int ref, error;
75 	grant_ref_t head;
76 
77 	mtx_lock(&gnttab_list_lock);
78 	if ((gnttab_free_count < count) &&
79 	    ((error = gnttab_expand(count - gnttab_free_count)) != 0)) {
80 		mtx_unlock(&gnttab_list_lock);
81 		return (error);
82 	}
83 	ref = head = gnttab_free_head;
84 	gnttab_free_count -= count;
85 	while (count-- > 1)
86 		head = gnttab_entry(head);
87 	gnttab_free_head = gnttab_entry(head);
88 	gnttab_entry(head) = GNTTAB_LIST_END;
89 	mtx_unlock(&gnttab_list_lock);
90 
91 	*entries = ref;
92 	return (0);
93 }
94 
95 static void
96 do_free_callbacks(void)
97 {
98 	struct gnttab_free_callback *callback, *next;
99 
100 	callback = gnttab_free_callback_list;
101 	gnttab_free_callback_list = NULL;
102 
103 	while (callback != NULL) {
104 		next = callback->next;
105 		if (gnttab_free_count >= callback->count) {
106 			callback->next = NULL;
107 			callback->fn(callback->arg);
108 		} else {
109 			callback->next = gnttab_free_callback_list;
110 			gnttab_free_callback_list = callback;
111 		}
112 		callback = next;
113 	}
114 }
115 
116 static inline void
117 check_free_callbacks(void)
118 {
119 	if (__predict_false(gnttab_free_callback_list != NULL))
120 		do_free_callbacks();
121 }
122 
123 static void
124 put_free_entry(grant_ref_t ref)
125 {
126 
127 	mtx_lock(&gnttab_list_lock);
128 	gnttab_entry(ref) = gnttab_free_head;
129 	gnttab_free_head = ref;
130 	gnttab_free_count++;
131 	check_free_callbacks();
132 	mtx_unlock(&gnttab_list_lock);
133 }
134 
135 /*
136  * Public grant-issuing interface functions
137  */
138 
139 int
140 gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly,
141 	grant_ref_t *result)
142 {
143 	int error, ref;
144 
145 	error = get_free_entries(1, &ref);
146 
147 	if (__predict_false(error))
148 		return (error);
149 
150 	shared[ref].frame = frame;
151 	shared[ref].domid = domid;
152 	wmb();
153 	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
154 
155 	if (result)
156 		*result = ref;
157 
158 	return (0);
159 }
160 
161 void
162 gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
163 				unsigned long frame, int readonly)
164 {
165 
166 	shared[ref].frame = frame;
167 	shared[ref].domid = domid;
168 	wmb();
169 	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
170 }
171 
172 int
173 gnttab_query_foreign_access(grant_ref_t ref)
174 {
175 	uint16_t nflags;
176 
177 	nflags = shared[ref].flags;
178 
179 	return (nflags & (GTF_reading|GTF_writing));
180 }
181 
182 int
183 gnttab_end_foreign_access_ref(grant_ref_t ref)
184 {
185 	uint16_t flags, nflags;
186 
187 	nflags = shared[ref].flags;
188 	do {
189 		if ( (flags = nflags) & (GTF_reading|GTF_writing) ) {
190 			printf("%s: WARNING: g.e. still in use!\n", __func__);
191 			return (0);
192 		}
193 	} while ((nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) !=
194 	       flags);
195 
196 	return (1);
197 }
198 
199 void
200 gnttab_end_foreign_access(grant_ref_t ref, void *page)
201 {
202 	if (gnttab_end_foreign_access_ref(ref)) {
203 		put_free_entry(ref);
204 		if (page != NULL) {
205 			free(page, M_DEVBUF);
206 		}
207 	}
208 	else {
209 		/* XXX This needs to be fixed so that the ref and page are
210 		   placed on a list to be freed up later. */
211 		printf("%s: WARNING: leaking g.e. and page still in use!\n",
212 		       __func__);
213 	}
214 }
215 
216 void
217 gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs)
218 {
219 	grant_ref_t *last_ref;
220 	grant_ref_t  head;
221 	grant_ref_t  tail;
222 
223 	head = GNTTAB_LIST_END;
224 	tail = *refs;
225 	last_ref = refs + count;
226 	while (refs != last_ref) {
227 		if (gnttab_end_foreign_access_ref(*refs)) {
228 			gnttab_entry(*refs) = head;
229 			head = *refs;
230 		} else {
231 			/*
232 			 * XXX This needs to be fixed so that the ref
233 			 * is placed on a list to be freed up later.
234 			 */
235 			printf("%s: WARNING: leaking g.e. still in use!\n",
236 			       __func__);
237 			count--;
238 		}
239 		refs++;
240 	}
241 
242 	if (count != 0) {
243 		mtx_lock(&gnttab_list_lock);
244 		gnttab_free_count += count;
245 		gnttab_entry(tail) = gnttab_free_head;
246 		gnttab_free_head = head;
247 		check_free_callbacks();
248 		mtx_unlock(&gnttab_list_lock);
249 	}
250 }
251 
252 int
253 gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn,
254     grant_ref_t *result)
255 {
256 	int error, ref;
257 
258 	error = get_free_entries(1, &ref);
259 	if (__predict_false(error))
260 		return (error);
261 
262 	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
263 
264 	*result = ref;
265 	return (0);
266 }
267 
268 void
269 gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
270 	unsigned long pfn)
271 {
272 	shared[ref].frame = pfn;
273 	shared[ref].domid = domid;
274 	wmb();
275 	shared[ref].flags = GTF_accept_transfer;
276 }
277 
278 unsigned long
279 gnttab_end_foreign_transfer_ref(grant_ref_t ref)
280 {
281 	unsigned long frame;
282 	uint16_t      flags;
283 
284 	/*
285          * If a transfer is not even yet started, try to reclaim the grant
286          * reference and return failure (== 0).
287          */
288 	while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
289 		if ( synch_cmpxchg(&shared[ref].flags, flags, 0) == flags )
290 			return (0);
291 		cpu_spinwait();
292 	}
293 
294 	/* If a transfer is in progress then wait until it is completed. */
295 	while (!(flags & GTF_transfer_completed)) {
296 		flags = shared[ref].flags;
297 		cpu_spinwait();
298 	}
299 
300 	/* Read the frame number /after/ reading completion status. */
301 	rmb();
302 	frame = shared[ref].frame;
303 	KASSERT(frame != 0, ("grant table inconsistent"));
304 
305 	return (frame);
306 }
307 
308 unsigned long
309 gnttab_end_foreign_transfer(grant_ref_t ref)
310 {
311 	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
312 
313 	put_free_entry(ref);
314 	return (frame);
315 }
316 
317 void
318 gnttab_free_grant_reference(grant_ref_t ref)
319 {
320 
321 	put_free_entry(ref);
322 }
323 
324 void
325 gnttab_free_grant_references(grant_ref_t head)
326 {
327 	grant_ref_t ref;
328 	int count = 1;
329 
330 	if (head == GNTTAB_LIST_END)
331 		return;
332 
333 	ref = head;
334 	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
335 		ref = gnttab_entry(ref);
336 		count++;
337 	}
338 	mtx_lock(&gnttab_list_lock);
339 	gnttab_entry(ref) = gnttab_free_head;
340 	gnttab_free_head = head;
341 	gnttab_free_count += count;
342 	check_free_callbacks();
343 	mtx_unlock(&gnttab_list_lock);
344 }
345 
346 int
347 gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head)
348 {
349 	int ref, error;
350 
351 	error = get_free_entries(count, &ref);
352 	if (__predict_false(error))
353 		return (error);
354 
355 	*head = ref;
356 	return (0);
357 }
358 
359 int
360 gnttab_empty_grant_references(const grant_ref_t *private_head)
361 {
362 
363 	return (*private_head == GNTTAB_LIST_END);
364 }
365 
366 int
367 gnttab_claim_grant_reference(grant_ref_t *private_head)
368 {
369 	grant_ref_t g = *private_head;
370 
371 	if (__predict_false(g == GNTTAB_LIST_END))
372 		return (g);
373 	*private_head = gnttab_entry(g);
374 	return (g);
375 }
376 
377 void
378 gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t  release)
379 {
380 
381 	gnttab_entry(release) = *private_head;
382 	*private_head = release;
383 }
384 
385 void
386 gnttab_request_free_callback(struct gnttab_free_callback *callback,
387     void (*fn)(void *), void *arg, uint16_t count)
388 {
389 
390 	mtx_lock(&gnttab_list_lock);
391 	if (callback->next)
392 		goto out;
393 	callback->fn = fn;
394 	callback->arg = arg;
395 	callback->count = count;
396 	callback->next = gnttab_free_callback_list;
397 	gnttab_free_callback_list = callback;
398 	check_free_callbacks();
399  out:
400 	mtx_unlock(&gnttab_list_lock);
401 
402 }
403 
404 void
405 gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
406 {
407 	struct gnttab_free_callback **pcb;
408 
409 	mtx_lock(&gnttab_list_lock);
410 	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
411 		if (*pcb == callback) {
412 			*pcb = callback->next;
413 			break;
414 		}
415 	}
416 	mtx_unlock(&gnttab_list_lock);
417 }
418 
419 static int
420 grow_gnttab_list(unsigned int more_frames)
421 {
422 	unsigned int new_nr_grant_frames, extra_entries, i;
423 
424 	new_nr_grant_frames = nr_grant_frames + more_frames;
425 	extra_entries       = more_frames * GREFS_PER_GRANT_FRAME;
426 
427 	for (i = nr_grant_frames; i < new_nr_grant_frames; i++)
428 	{
429 		gnttab_list[i] = (grant_ref_t *)
430 			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
431 
432 		if (!gnttab_list[i])
433 			goto grow_nomem;
434 	}
435 
436 	for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
437 	     i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
438 		gnttab_entry(i) = i + 1;
439 
440 	gnttab_entry(i) = gnttab_free_head;
441 	gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
442 	gnttab_free_count += extra_entries;
443 
444 	nr_grant_frames = new_nr_grant_frames;
445 
446 	check_free_callbacks();
447 
448 	return (0);
449 
450 grow_nomem:
451 	for ( ; i >= nr_grant_frames; i--)
452 		free(gnttab_list[i], M_DEVBUF);
453 	return (ENOMEM);
454 }
455 
456 static unsigned int
457 __max_nr_grant_frames(void)
458 {
459 	struct gnttab_query_size query;
460 	int rc;
461 
462 	query.dom = DOMID_SELF;
463 
464 	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
465 	if ((rc < 0) || (query.status != GNTST_okay))
466 		return (4); /* Legacy max supported number of frames */
467 
468 	return (query.max_nr_frames);
469 }
470 
471 static inline
472 unsigned int max_nr_grant_frames(void)
473 {
474 
475 	return (min(__max_nr_grant_frames(), boot_max_nr_grant_frames));
476 }
477 
478 #ifdef notyet
479 /*
480  * XXX needed for backend support
481  *
482  */
483 static int
484 map_pte_fn(pte_t *pte, struct page *pmd_page,
485 		      unsigned long addr, void *data)
486 {
487 	unsigned long **frames = (unsigned long **)data;
488 
489 	set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
490 	(*frames)++;
491 	return 0;
492 }
493 
494 static int
495 unmap_pte_fn(pte_t *pte, struct page *pmd_page,
496 			unsigned long addr, void *data)
497 {
498 
499 	set_pte_at(&init_mm, addr, pte, __pte(0));
500 	return 0;
501 }
502 #endif
503 
504 static vm_paddr_t resume_frames;
505 
506 static int
507 gnttab_map(unsigned int start_idx, unsigned int end_idx)
508 {
509 	struct xen_add_to_physmap xatp;
510 	unsigned int i = end_idx;
511 
512 	/*
513 	 * Loop backwards, so that the first hypercall has the largest index,
514 	 * ensuring that the table will grow only once.
515 	 */
516 	do {
517 		xatp.domid = DOMID_SELF;
518 		xatp.idx = i;
519 		xatp.space = XENMAPSPACE_grant_table;
520 		xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
521 		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
522 			panic("HYPERVISOR_memory_op failed to map gnttab");
523 	} while (i-- > start_idx);
524 
525 	if (shared == NULL) {
526 		vm_offset_t area;
527 
528 		area = kva_alloc(PAGE_SIZE * max_nr_grant_frames());
529 		KASSERT(area, ("can't allocate VM space for grant table"));
530 		shared = (grant_entry_t *)area;
531 	}
532 
533 	for (i = start_idx; i <= end_idx; i++) {
534 		pmap_kenter((vm_offset_t) shared + i * PAGE_SIZE,
535 		    resume_frames + i * PAGE_SIZE);
536 	}
537 
538 	return (0);
539 }
540 
541 int
542 gnttab_resume(device_t dev)
543 {
544 	unsigned int max_nr_gframes, nr_gframes;
545 
546 	nr_gframes = nr_grant_frames;
547 	max_nr_gframes = max_nr_grant_frames();
548 	if (max_nr_gframes < nr_gframes)
549 		return (ENOSYS);
550 
551 	if (!resume_frames) {
552 		KASSERT(dev != NULL,
553 		    ("No resume frames and no device provided"));
554 
555 		gnttab_pseudo_phys_res = xenmem_alloc(dev,
556 		    &gnttab_pseudo_phys_res_id, PAGE_SIZE * max_nr_gframes);
557 		if (gnttab_pseudo_phys_res == NULL)
558 			panic("Unable to reserve physical memory for gnttab");
559 		resume_frames = rman_get_start(gnttab_pseudo_phys_res);
560 	}
561 
562 	return (gnttab_map(0, nr_gframes - 1));
563 }
564 
565 static int
566 gnttab_expand(unsigned int req_entries)
567 {
568 	int error;
569 	unsigned int cur, extra;
570 
571 	cur = nr_grant_frames;
572 	extra = howmany(req_entries, GREFS_PER_GRANT_FRAME);
573 	if (cur + extra > max_nr_grant_frames())
574 		return (ENOSPC);
575 
576 	error = gnttab_map(cur, cur + extra - 1);
577 	if (!error)
578 		error = grow_gnttab_list(extra);
579 
580 	return (error);
581 }
582 
583 MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF | MTX_RECURSE);
584 
585 /*------------------ Private Device Attachment Functions  --------------------*/
586 /**
587  * \brief Identify instances of this device type in the system.
588  *
589  * \param driver  The driver performing this identify action.
590  * \param parent  The NewBus parent device for any devices this method adds.
591  */
592 static void
593 granttable_identify(driver_t *driver __unused, device_t parent)
594 {
595 
596 	KASSERT(xen_domain(),
597 	    ("Trying to attach grant-table device on non Xen domain"));
598 	/*
599 	 * A single device instance for our driver is always present
600 	 * in a system operating under Xen.
601 	 */
602 	if (BUS_ADD_CHILD(parent, 0, driver->name, 0) == NULL)
603 		panic("unable to attach Xen Grant-table device");
604 }
605 
606 /**
607  * \brief Probe for the existence of the Xen Grant-table device
608  *
609  * \param dev  NewBus device_t for this instance.
610  *
611  * \return  Always returns 0 indicating success.
612  */
613 static int
614 granttable_probe(device_t dev)
615 {
616 
617 	device_set_desc(dev, "Xen Grant-table Device");
618 	return (BUS_PROBE_NOWILDCARD);
619 }
620 
621 /**
622  * \brief Attach the Xen Grant-table device.
623  *
624  * \param dev  NewBus device_t for this instance.
625  *
626  * \return  On success, 0. Otherwise an errno value indicating the
627  *          type of failure.
628  */
629 static int
630 granttable_attach(device_t dev)
631 {
632 	int i;
633 	unsigned int max_nr_glist_frames;
634 	unsigned int nr_init_grefs;
635 
636 	nr_grant_frames = 1;
637 	boot_max_nr_grant_frames = __max_nr_grant_frames();
638 
639 	/* Determine the maximum number of frames required for the
640 	 * grant reference free list on the current hypervisor.
641 	 */
642 	max_nr_glist_frames = (boot_max_nr_grant_frames *
643 			       GREFS_PER_GRANT_FRAME /
644 			       (PAGE_SIZE / sizeof(grant_ref_t)));
645 
646 	gnttab_list = malloc(max_nr_glist_frames * sizeof(grant_ref_t *),
647 	    M_DEVBUF, M_NOWAIT);
648 
649 	if (gnttab_list == NULL)
650 		return (ENOMEM);
651 
652 	for (i = 0; i < nr_grant_frames; i++) {
653 		gnttab_list[i] = (grant_ref_t *)
654 			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
655 		if (gnttab_list[i] == NULL)
656 			goto ini_nomem;
657 	}
658 
659 	if (gnttab_resume(dev))
660 		return (ENODEV);
661 
662 	nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
663 
664 	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
665 		gnttab_entry(i) = i + 1;
666 
667 	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
668 	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
669 	gnttab_free_head  = NR_RESERVED_ENTRIES;
670 
671 	if (bootverbose)
672 		printf("Grant table initialized\n");
673 
674 	return (0);
675 
676 ini_nomem:
677 	for (i--; i >= 0; i--)
678 		free(gnttab_list[i], M_DEVBUF);
679 	free(gnttab_list, M_DEVBUF);
680 	return (ENOMEM);
681 }
682 
683 /*-------------------- Private Device Attachment Data  -----------------------*/
684 static device_method_t granttable_methods[] = {
685 	/* Device interface */
686 	DEVMETHOD(device_identify,	granttable_identify),
687 	DEVMETHOD(device_probe,         granttable_probe),
688 	DEVMETHOD(device_attach,        granttable_attach),
689 
690 	DEVMETHOD_END
691 };
692 
693 DEFINE_CLASS_0(granttable, granttable_driver, granttable_methods, 0);
694 devclass_t granttable_devclass;
695 
696 DRIVER_MODULE_ORDERED(granttable, xenpv, granttable_driver, granttable_devclass,
697     NULL, NULL, SI_ORDER_FIRST);
698