xref: /freebsd/sys/dev/xen/grant_table/grant_table.c (revision 5e3190f700637fcfc1a52daeaa4a031fdd2557c7)
1 /******************************************************************************
2  * gnttab.c
3  *
4  * Two sets of functionality:
5  * 1. Granting foreign access to our memory reservation.
6  * 2. Accessing others' memory reservations via grant references.
7  * (i.e., mechanisms for both sender and recipient of grant references)
8  *
9  * Copyright (c) 2005, Christopher Clark
10  * Copyright (c) 2004, K A Fraser
11  */
12 
13 #include <sys/cdefs.h>
14 #include <sys/param.h>
15 #include <sys/systm.h>
16 #include <sys/bus.h>
17 #include <sys/conf.h>
18 #include <sys/module.h>
19 #include <sys/kernel.h>
20 #include <sys/lock.h>
21 #include <sys/malloc.h>
22 #include <sys/mman.h>
23 #include <sys/limits.h>
24 #include <sys/rman.h>
25 #include <machine/resource.h>
26 #include <machine/cpu.h>
27 
28 #include <xen/xen-os.h>
29 #include <xen/hypervisor.h>
30 #include <xen/gnttab.h>
31 
32 #include <vm/vm.h>
33 #include <vm/vm_kern.h>
34 #include <vm/vm_extern.h>
35 #include <vm/pmap.h>
36 
37 /* External tools reserve first few grant table entries. */
38 #define NR_RESERVED_ENTRIES 8
39 #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_v1_t))
40 
41 static grant_ref_t **gnttab_list;
42 static unsigned int nr_grant_frames;
43 static unsigned int boot_max_nr_grant_frames;
44 static int gnttab_free_count;
45 static grant_ref_t gnttab_free_head;
46 static struct mtx gnttab_list_lock;
47 
48 /*
49  * Resource representing allocated physical address space
50  * for the grant table metainfo
51  */
52 static struct resource *gnttab_pseudo_phys_res;
53 
54 /* Resource id for allocated physical address space. */
55 static int gnttab_pseudo_phys_res_id;
56 
57 static grant_entry_v1_t *shared;
58 
59 static struct gnttab_free_callback *gnttab_free_callback_list = NULL;
60 
61 static int gnttab_expand(unsigned int req_entries);
62 
63 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
64 #define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
65 
66 static int
67 get_free_entries(int count, int *entries)
68 {
69 	int ref, error;
70 	grant_ref_t head;
71 
72 	mtx_lock(&gnttab_list_lock);
73 	if ((gnttab_free_count < count) &&
74 	    ((error = gnttab_expand(count - gnttab_free_count)) != 0)) {
75 		mtx_unlock(&gnttab_list_lock);
76 		return (error);
77 	}
78 	ref = head = gnttab_free_head;
79 	gnttab_free_count -= count;
80 	while (count-- > 1)
81 		head = gnttab_entry(head);
82 	gnttab_free_head = gnttab_entry(head);
83 	gnttab_entry(head) = GNTTAB_LIST_END;
84 	mtx_unlock(&gnttab_list_lock);
85 
86 	*entries = ref;
87 	return (0);
88 }
89 
90 static void
91 do_free_callbacks(void)
92 {
93 	struct gnttab_free_callback *callback, *next;
94 
95 	callback = gnttab_free_callback_list;
96 	gnttab_free_callback_list = NULL;
97 
98 	while (callback != NULL) {
99 		next = callback->next;
100 		if (gnttab_free_count >= callback->count) {
101 			callback->next = NULL;
102 			callback->fn(callback->arg);
103 		} else {
104 			callback->next = gnttab_free_callback_list;
105 			gnttab_free_callback_list = callback;
106 		}
107 		callback = next;
108 	}
109 }
110 
111 static inline void
112 check_free_callbacks(void)
113 {
114 	if (__predict_false(gnttab_free_callback_list != NULL))
115 		do_free_callbacks();
116 }
117 
118 static void
119 put_free_entry(grant_ref_t ref)
120 {
121 
122 	mtx_lock(&gnttab_list_lock);
123 	gnttab_entry(ref) = gnttab_free_head;
124 	gnttab_free_head = ref;
125 	gnttab_free_count++;
126 	check_free_callbacks();
127 	mtx_unlock(&gnttab_list_lock);
128 }
129 
130 /*
131  * Public grant-issuing interface functions
132  */
133 
134 int
135 gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly,
136 	grant_ref_t *result)
137 {
138 	int error, ref;
139 
140 	error = get_free_entries(1, &ref);
141 
142 	if (__predict_false(error))
143 		return (error);
144 
145 	shared[ref].frame = frame;
146 	shared[ref].domid = domid;
147 	wmb();
148 	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
149 
150 	if (result)
151 		*result = ref;
152 
153 	return (0);
154 }
155 
156 void
157 gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
158 				unsigned long frame, int readonly)
159 {
160 
161 	shared[ref].frame = frame;
162 	shared[ref].domid = domid;
163 	wmb();
164 	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
165 }
166 
167 int
168 gnttab_query_foreign_access(grant_ref_t ref)
169 {
170 	uint16_t nflags;
171 
172 	nflags = shared[ref].flags;
173 
174 	return (nflags & (GTF_reading|GTF_writing));
175 }
176 
177 int
178 gnttab_end_foreign_access_ref(grant_ref_t ref)
179 {
180 	uint16_t flags;
181 
182 	while (!((flags = atomic_load_16(&shared[ref].flags)) &
183 	    (GTF_reading|GTF_writing)))
184 		if (atomic_cmpset_16(&shared[ref].flags, flags, 0))
185 			return (1);
186 
187 	printf("%s: WARNING: g.e. still in use!\n", __func__);
188 	return (0);
189 }
190 
191 void
192 gnttab_end_foreign_access(grant_ref_t ref, void *page)
193 {
194 	if (gnttab_end_foreign_access_ref(ref)) {
195 		put_free_entry(ref);
196 		if (page != NULL) {
197 			free(page, M_DEVBUF);
198 		}
199 	}
200 	else {
201 		/* XXX This needs to be fixed so that the ref and page are
202 		   placed on a list to be freed up later. */
203 		printf("%s: WARNING: leaking g.e. and page still in use!\n",
204 		       __func__);
205 	}
206 }
207 
208 void
209 gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs)
210 {
211 	grant_ref_t *last_ref;
212 	grant_ref_t  head;
213 	grant_ref_t  tail;
214 
215 	head = GNTTAB_LIST_END;
216 	tail = *refs;
217 	last_ref = refs + count;
218 	while (refs != last_ref) {
219 		if (gnttab_end_foreign_access_ref(*refs)) {
220 			gnttab_entry(*refs) = head;
221 			head = *refs;
222 		} else {
223 			/*
224 			 * XXX This needs to be fixed so that the ref
225 			 * is placed on a list to be freed up later.
226 			 */
227 			printf("%s: WARNING: leaking g.e. still in use!\n",
228 			       __func__);
229 			count--;
230 		}
231 		refs++;
232 	}
233 
234 	if (count != 0) {
235 		mtx_lock(&gnttab_list_lock);
236 		gnttab_free_count += count;
237 		gnttab_entry(tail) = gnttab_free_head;
238 		gnttab_free_head = head;
239 		check_free_callbacks();
240 		mtx_unlock(&gnttab_list_lock);
241 	}
242 }
243 
244 int
245 gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn,
246     grant_ref_t *result)
247 {
248 	int error, ref;
249 
250 	error = get_free_entries(1, &ref);
251 	if (__predict_false(error))
252 		return (error);
253 
254 	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
255 
256 	*result = ref;
257 	return (0);
258 }
259 
260 void
261 gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
262 	unsigned long pfn)
263 {
264 	shared[ref].frame = pfn;
265 	shared[ref].domid = domid;
266 	wmb();
267 	shared[ref].flags = GTF_accept_transfer;
268 }
269 
270 unsigned long
271 gnttab_end_foreign_transfer_ref(grant_ref_t ref)
272 {
273 	unsigned long frame;
274 	uint16_t      flags;
275 
276 	/*
277          * If a transfer is not even yet started, try to reclaim the grant
278          * reference and return failure (== 0).
279 	 *
280 	 * NOTE: This is a loop since the atomic cmpset can fail multiple
281 	 * times.  In normal operation it will be rare to execute more than
282 	 * twice.  Attempting an attack would consume a great deal of
283 	 * attacker resources and be unlikely to prolong the loop very much.
284          */
285 	while (!((flags = atomic_load_16(&shared[ref].flags)) &
286 	    GTF_transfer_committed))
287 		if (atomic_cmpset_16(&shared[ref].flags, flags, 0))
288 			return (0);
289 
290 	/* If a transfer is in progress then wait until it is completed. */
291 	while (!(flags & GTF_transfer_completed)) {
292 		cpu_spinwait();
293 		flags = atomic_load_16(&shared[ref].flags);
294 	}
295 
296 	/* Read the frame number /after/ reading completion status. */
297 	rmb();
298 	frame = shared[ref].frame;
299 	KASSERT(frame != 0, ("grant table inconsistent"));
300 
301 	return (frame);
302 }
303 
304 unsigned long
305 gnttab_end_foreign_transfer(grant_ref_t ref)
306 {
307 	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
308 
309 	put_free_entry(ref);
310 	return (frame);
311 }
312 
313 void
314 gnttab_free_grant_reference(grant_ref_t ref)
315 {
316 
317 	put_free_entry(ref);
318 }
319 
320 void
321 gnttab_free_grant_references(grant_ref_t head)
322 {
323 	grant_ref_t ref;
324 	int count = 1;
325 
326 	if (head == GNTTAB_LIST_END)
327 		return;
328 
329 	ref = head;
330 	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
331 		ref = gnttab_entry(ref);
332 		count++;
333 	}
334 	mtx_lock(&gnttab_list_lock);
335 	gnttab_entry(ref) = gnttab_free_head;
336 	gnttab_free_head = head;
337 	gnttab_free_count += count;
338 	check_free_callbacks();
339 	mtx_unlock(&gnttab_list_lock);
340 }
341 
342 int
343 gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head)
344 {
345 	int ref, error;
346 
347 	error = get_free_entries(count, &ref);
348 	if (__predict_false(error))
349 		return (error);
350 
351 	*head = ref;
352 	return (0);
353 }
354 
355 int
356 gnttab_empty_grant_references(const grant_ref_t *private_head)
357 {
358 
359 	return (*private_head == GNTTAB_LIST_END);
360 }
361 
362 int
363 gnttab_claim_grant_reference(grant_ref_t *private_head)
364 {
365 	grant_ref_t g = *private_head;
366 
367 	if (__predict_false(g == GNTTAB_LIST_END))
368 		return (g);
369 	*private_head = gnttab_entry(g);
370 	return (g);
371 }
372 
373 void
374 gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t  release)
375 {
376 
377 	gnttab_entry(release) = *private_head;
378 	*private_head = release;
379 }
380 
381 void
382 gnttab_request_free_callback(struct gnttab_free_callback *callback,
383     void (*fn)(void *), void *arg, uint16_t count)
384 {
385 
386 	mtx_lock(&gnttab_list_lock);
387 	if (callback->next)
388 		goto out;
389 	callback->fn = fn;
390 	callback->arg = arg;
391 	callback->count = count;
392 	callback->next = gnttab_free_callback_list;
393 	gnttab_free_callback_list = callback;
394 	check_free_callbacks();
395  out:
396 	mtx_unlock(&gnttab_list_lock);
397 
398 }
399 
400 void
401 gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
402 {
403 	struct gnttab_free_callback **pcb;
404 
405 	mtx_lock(&gnttab_list_lock);
406 	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
407 		if (*pcb == callback) {
408 			*pcb = callback->next;
409 			break;
410 		}
411 	}
412 	mtx_unlock(&gnttab_list_lock);
413 }
414 
415 static int
416 grow_gnttab_list(unsigned int more_frames)
417 {
418 	unsigned int new_nr_grant_frames, extra_entries, i;
419 
420 	new_nr_grant_frames = nr_grant_frames + more_frames;
421 	extra_entries       = more_frames * GREFS_PER_GRANT_FRAME;
422 
423 	for (i = nr_grant_frames; i < new_nr_grant_frames; i++)
424 	{
425 		gnttab_list[i] = (grant_ref_t *)
426 			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
427 
428 		if (!gnttab_list[i])
429 			goto grow_nomem;
430 	}
431 
432 	for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
433 	     i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
434 		gnttab_entry(i) = i + 1;
435 
436 	gnttab_entry(i) = gnttab_free_head;
437 	gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
438 	gnttab_free_count += extra_entries;
439 
440 	nr_grant_frames = new_nr_grant_frames;
441 
442 	check_free_callbacks();
443 
444 	return (0);
445 
446 grow_nomem:
447 	for ( ; i >= nr_grant_frames; i--)
448 		free(gnttab_list[i], M_DEVBUF);
449 	return (ENOMEM);
450 }
451 
452 static unsigned int
453 __max_nr_grant_frames(void)
454 {
455 	struct gnttab_query_size query;
456 	int rc;
457 
458 	query.dom = DOMID_SELF;
459 
460 	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
461 	if ((rc < 0) || (query.status != GNTST_okay))
462 		return (4); /* Legacy max supported number of frames */
463 
464 	return (query.max_nr_frames);
465 }
466 
467 static inline
468 unsigned int max_nr_grant_frames(void)
469 {
470 
471 	return (min(__max_nr_grant_frames(), boot_max_nr_grant_frames));
472 }
473 
474 #ifdef notyet
475 /*
476  * XXX needed for backend support
477  *
478  */
479 static int
480 map_pte_fn(pte_t *pte, struct page *pmd_page,
481 		      unsigned long addr, void *data)
482 {
483 	unsigned long **frames = (unsigned long **)data;
484 
485 	set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
486 	(*frames)++;
487 	return 0;
488 }
489 
490 static int
491 unmap_pte_fn(pte_t *pte, struct page *pmd_page,
492 			unsigned long addr, void *data)
493 {
494 
495 	set_pte_at(&init_mm, addr, pte, __pte(0));
496 	return 0;
497 }
498 #endif
499 
500 static vm_paddr_t resume_frames;
501 
502 static void
503 gnttab_map(unsigned int start_idx, unsigned int end_idx)
504 {
505 	struct xen_add_to_physmap xatp;
506 	unsigned int i = end_idx;
507 
508 	/*
509 	 * Loop backwards, so that the first hypercall has the largest index,
510 	 * ensuring that the table will grow only once.
511 	 */
512 	do {
513 		xatp.domid = DOMID_SELF;
514 		xatp.idx = i;
515 		xatp.space = XENMAPSPACE_grant_table;
516 		xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
517 		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
518 			panic("HYPERVISOR_memory_op failed to map gnttab");
519 	} while (i-- > start_idx);
520 }
521 
522 int
523 gnttab_resume(device_t dev)
524 {
525 	unsigned int max_nr_gframes, nr_gframes;
526 
527 	nr_gframes = nr_grant_frames;
528 	max_nr_gframes = max_nr_grant_frames();
529 	if (max_nr_gframes < nr_gframes)
530 		return (ENOSYS);
531 
532 	if (!resume_frames) {
533 		KASSERT(dev != NULL,
534 		    ("No resume frames and no device provided"));
535 
536 		gnttab_pseudo_phys_res = xenmem_alloc(dev,
537 		    &gnttab_pseudo_phys_res_id, PAGE_SIZE * max_nr_gframes);
538 		if (gnttab_pseudo_phys_res == NULL)
539 			panic("Unable to reserve physical memory for gnttab");
540 		resume_frames = rman_get_start(gnttab_pseudo_phys_res);
541 		shared = rman_get_virtual(gnttab_pseudo_phys_res);
542 	}
543 	gnttab_map(0, nr_gframes - 1);
544 
545 	return (0);
546 }
547 
548 static int
549 gnttab_expand(unsigned int req_entries)
550 {
551 	unsigned int cur, extra;
552 
553 	cur = nr_grant_frames;
554 	extra = howmany(req_entries, GREFS_PER_GRANT_FRAME);
555 	if (cur + extra > max_nr_grant_frames())
556 		return (ENOSPC);
557 
558 	gnttab_map(cur, cur + extra - 1);
559 
560 	return (grow_gnttab_list(extra));
561 }
562 
563 MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF | MTX_RECURSE);
564 
565 /*------------------ Private Device Attachment Functions  --------------------*/
566 /**
567  * \brief Identify instances of this device type in the system.
568  *
569  * \param driver  The driver performing this identify action.
570  * \param parent  The NewBus parent device for any devices this method adds.
571  */
572 static void
573 granttable_identify(driver_t *driver __unused, device_t parent)
574 {
575 
576 	KASSERT(xen_domain(),
577 	    ("Trying to attach grant-table device on non Xen domain"));
578 	/*
579 	 * A single device instance for our driver is always present
580 	 * in a system operating under Xen.
581 	 */
582 	if (BUS_ADD_CHILD(parent, 0, driver->name, 0) == NULL)
583 		panic("unable to attach Xen Grant-table device");
584 }
585 
586 /**
587  * \brief Probe for the existence of the Xen Grant-table device
588  *
589  * \param dev  NewBus device_t for this instance.
590  *
591  * \return  Always returns 0 indicating success.
592  */
593 static int
594 granttable_probe(device_t dev)
595 {
596 
597 	device_set_desc(dev, "Xen Grant-table Device");
598 	return (BUS_PROBE_NOWILDCARD);
599 }
600 
601 /**
602  * \brief Attach the Xen Grant-table device.
603  *
604  * \param dev  NewBus device_t for this instance.
605  *
606  * \return  On success, 0. Otherwise an errno value indicating the
607  *          type of failure.
608  */
609 static int
610 granttable_attach(device_t dev)
611 {
612 	int i;
613 	unsigned int max_nr_glist_frames;
614 	unsigned int nr_init_grefs;
615 
616 	nr_grant_frames = 1;
617 	boot_max_nr_grant_frames = __max_nr_grant_frames();
618 
619 	/* Determine the maximum number of frames required for the
620 	 * grant reference free list on the current hypervisor.
621 	 */
622 	max_nr_glist_frames = (boot_max_nr_grant_frames *
623 			       GREFS_PER_GRANT_FRAME /
624 			       (PAGE_SIZE / sizeof(grant_ref_t)));
625 
626 	gnttab_list = malloc(max_nr_glist_frames * sizeof(grant_ref_t *),
627 	    M_DEVBUF, M_NOWAIT);
628 
629 	if (gnttab_list == NULL)
630 		return (ENOMEM);
631 
632 	for (i = 0; i < nr_grant_frames; i++) {
633 		gnttab_list[i] = (grant_ref_t *)
634 			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
635 		if (gnttab_list[i] == NULL)
636 			goto ini_nomem;
637 	}
638 
639 	if (gnttab_resume(dev))
640 		return (ENODEV);
641 
642 	nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
643 
644 	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
645 		gnttab_entry(i) = i + 1;
646 
647 	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
648 	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
649 	gnttab_free_head  = NR_RESERVED_ENTRIES;
650 
651 	if (bootverbose)
652 		printf("Grant table initialized\n");
653 
654 	return (0);
655 
656 ini_nomem:
657 	for (i--; i >= 0; i--)
658 		free(gnttab_list[i], M_DEVBUF);
659 	free(gnttab_list, M_DEVBUF);
660 	return (ENOMEM);
661 }
662 
663 /*-------------------- Private Device Attachment Data  -----------------------*/
664 static device_method_t granttable_methods[] = {
665 	/* Device interface */
666 	DEVMETHOD(device_identify,	granttable_identify),
667 	DEVMETHOD(device_probe,         granttable_probe),
668 	DEVMETHOD(device_attach,        granttable_attach),
669 
670 	DEVMETHOD_END
671 };
672 
673 DEFINE_CLASS_0(granttable, granttable_driver, granttable_methods, 0);
674 
675 DRIVER_MODULE_ORDERED(granttable, xenpv, granttable_driver, NULL, NULL,
676     SI_ORDER_FIRST);
677