xref: /freebsd/sys/dev/xen/grant_table/grant_table.c (revision d9a42747950146bf03cda7f6e25d219253f8a57a)
1 /******************************************************************************
2  * gnttab.c
3  *
4  * Two sets of functionality:
5  * 1. Granting foreign access to our memory reservation.
6  * 2. Accessing others' memory reservations via grant references.
7  * (i.e., mechanisms for both sender and recipient of grant references)
8  *
9  * Copyright (c) 2005, Christopher Clark
10  * Copyright (c) 2004, K A Fraser
11  */
12 
13 #include <sys/cdefs.h>
14 __FBSDID("$FreeBSD$");
15 
16 #include <sys/param.h>
17 #include <sys/systm.h>
18 #include <sys/bus.h>
19 #include <sys/conf.h>
20 #include <sys/module.h>
21 #include <sys/kernel.h>
22 #include <sys/lock.h>
23 #include <sys/malloc.h>
24 #include <sys/mman.h>
25 #include <sys/limits.h>
26 #include <sys/rman.h>
27 #include <machine/resource.h>
28 #include <machine/cpu.h>
29 
30 #include <xen/xen-os.h>
31 #include <xen/hypervisor.h>
32 #include <xen/gnttab.h>
33 
34 #include <vm/vm.h>
35 #include <vm/vm_kern.h>
36 #include <vm/vm_extern.h>
37 #include <vm/pmap.h>
38 
39 /* External tools reserve first few grant table entries. */
40 #define NR_RESERVED_ENTRIES 8
41 #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_v1_t))
42 
43 static grant_ref_t **gnttab_list;
44 static unsigned int nr_grant_frames;
45 static unsigned int boot_max_nr_grant_frames;
46 static int gnttab_free_count;
47 static grant_ref_t gnttab_free_head;
48 static struct mtx gnttab_list_lock;
49 
50 /*
51  * Resource representing allocated physical address space
52  * for the grant table metainfo
53  */
54 static struct resource *gnttab_pseudo_phys_res;
55 
56 /* Resource id for allocated physical address space. */
57 static int gnttab_pseudo_phys_res_id;
58 
59 static grant_entry_v1_t *shared;
60 
61 static struct gnttab_free_callback *gnttab_free_callback_list = NULL;
62 
63 static int gnttab_expand(unsigned int req_entries);
64 
65 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
66 #define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
67 
68 static int
69 get_free_entries(int count, int *entries)
70 {
71 	int ref, error;
72 	grant_ref_t head;
73 
74 	mtx_lock(&gnttab_list_lock);
75 	if ((gnttab_free_count < count) &&
76 	    ((error = gnttab_expand(count - gnttab_free_count)) != 0)) {
77 		mtx_unlock(&gnttab_list_lock);
78 		return (error);
79 	}
80 	ref = head = gnttab_free_head;
81 	gnttab_free_count -= count;
82 	while (count-- > 1)
83 		head = gnttab_entry(head);
84 	gnttab_free_head = gnttab_entry(head);
85 	gnttab_entry(head) = GNTTAB_LIST_END;
86 	mtx_unlock(&gnttab_list_lock);
87 
88 	*entries = ref;
89 	return (0);
90 }
91 
92 static void
93 do_free_callbacks(void)
94 {
95 	struct gnttab_free_callback *callback, *next;
96 
97 	callback = gnttab_free_callback_list;
98 	gnttab_free_callback_list = NULL;
99 
100 	while (callback != NULL) {
101 		next = callback->next;
102 		if (gnttab_free_count >= callback->count) {
103 			callback->next = NULL;
104 			callback->fn(callback->arg);
105 		} else {
106 			callback->next = gnttab_free_callback_list;
107 			gnttab_free_callback_list = callback;
108 		}
109 		callback = next;
110 	}
111 }
112 
113 static inline void
114 check_free_callbacks(void)
115 {
116 	if (__predict_false(gnttab_free_callback_list != NULL))
117 		do_free_callbacks();
118 }
119 
120 static void
121 put_free_entry(grant_ref_t ref)
122 {
123 
124 	mtx_lock(&gnttab_list_lock);
125 	gnttab_entry(ref) = gnttab_free_head;
126 	gnttab_free_head = ref;
127 	gnttab_free_count++;
128 	check_free_callbacks();
129 	mtx_unlock(&gnttab_list_lock);
130 }
131 
132 /*
133  * Public grant-issuing interface functions
134  */
135 
136 int
137 gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly,
138 	grant_ref_t *result)
139 {
140 	int error, ref;
141 
142 	error = get_free_entries(1, &ref);
143 
144 	if (__predict_false(error))
145 		return (error);
146 
147 	shared[ref].frame = frame;
148 	shared[ref].domid = domid;
149 	wmb();
150 	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
151 
152 	if (result)
153 		*result = ref;
154 
155 	return (0);
156 }
157 
158 void
159 gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
160 				unsigned long frame, int readonly)
161 {
162 
163 	shared[ref].frame = frame;
164 	shared[ref].domid = domid;
165 	wmb();
166 	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
167 }
168 
169 int
170 gnttab_query_foreign_access(grant_ref_t ref)
171 {
172 	uint16_t nflags;
173 
174 	nflags = shared[ref].flags;
175 
176 	return (nflags & (GTF_reading|GTF_writing));
177 }
178 
179 int
180 gnttab_end_foreign_access_ref(grant_ref_t ref)
181 {
182 	uint16_t flags;
183 
184 	while (!((flags = atomic_load_16(&shared[ref].flags)) &
185 	    (GTF_reading|GTF_writing)))
186 		if (atomic_cmpset_16(&shared[ref].flags, flags, 0))
187 			return (1);
188 
189 	printf("%s: WARNING: g.e. still in use!\n", __func__);
190 	return (0);
191 }
192 
193 void
194 gnttab_end_foreign_access(grant_ref_t ref, void *page)
195 {
196 	if (gnttab_end_foreign_access_ref(ref)) {
197 		put_free_entry(ref);
198 		if (page != NULL) {
199 			free(page, M_DEVBUF);
200 		}
201 	}
202 	else {
203 		/* XXX This needs to be fixed so that the ref and page are
204 		   placed on a list to be freed up later. */
205 		printf("%s: WARNING: leaking g.e. and page still in use!\n",
206 		       __func__);
207 	}
208 }
209 
210 void
211 gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs)
212 {
213 	grant_ref_t *last_ref;
214 	grant_ref_t  head;
215 	grant_ref_t  tail;
216 
217 	head = GNTTAB_LIST_END;
218 	tail = *refs;
219 	last_ref = refs + count;
220 	while (refs != last_ref) {
221 		if (gnttab_end_foreign_access_ref(*refs)) {
222 			gnttab_entry(*refs) = head;
223 			head = *refs;
224 		} else {
225 			/*
226 			 * XXX This needs to be fixed so that the ref
227 			 * is placed on a list to be freed up later.
228 			 */
229 			printf("%s: WARNING: leaking g.e. still in use!\n",
230 			       __func__);
231 			count--;
232 		}
233 		refs++;
234 	}
235 
236 	if (count != 0) {
237 		mtx_lock(&gnttab_list_lock);
238 		gnttab_free_count += count;
239 		gnttab_entry(tail) = gnttab_free_head;
240 		gnttab_free_head = head;
241 		check_free_callbacks();
242 		mtx_unlock(&gnttab_list_lock);
243 	}
244 }
245 
246 int
247 gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn,
248     grant_ref_t *result)
249 {
250 	int error, ref;
251 
252 	error = get_free_entries(1, &ref);
253 	if (__predict_false(error))
254 		return (error);
255 
256 	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
257 
258 	*result = ref;
259 	return (0);
260 }
261 
262 void
263 gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
264 	unsigned long pfn)
265 {
266 	shared[ref].frame = pfn;
267 	shared[ref].domid = domid;
268 	wmb();
269 	shared[ref].flags = GTF_accept_transfer;
270 }
271 
272 unsigned long
273 gnttab_end_foreign_transfer_ref(grant_ref_t ref)
274 {
275 	unsigned long frame;
276 	uint16_t      flags;
277 
278 	/*
279          * If a transfer is not even yet started, try to reclaim the grant
280          * reference and return failure (== 0).
281 	 *
282 	 * NOTE: This is a loop since the atomic cmpset can fail multiple
283 	 * times.  In normal operation it will be rare to execute more than
284 	 * twice.  Attempting an attack would consume a great deal of
285 	 * attacker resources and be unlikely to prolong the loop very much.
286          */
287 	while (!((flags = atomic_load_16(&shared[ref].flags)) &
288 	    GTF_transfer_committed))
289 		if (atomic_cmpset_16(&shared[ref].flags, flags, 0))
290 			return (0);
291 
292 	/* If a transfer is in progress then wait until it is completed. */
293 	while (!(flags & GTF_transfer_completed)) {
294 		cpu_spinwait();
295 		flags = atomic_load_16(&shared[ref].flags);
296 	}
297 
298 	/* Read the frame number /after/ reading completion status. */
299 	rmb();
300 	frame = shared[ref].frame;
301 	KASSERT(frame != 0, ("grant table inconsistent"));
302 
303 	return (frame);
304 }
305 
306 unsigned long
307 gnttab_end_foreign_transfer(grant_ref_t ref)
308 {
309 	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
310 
311 	put_free_entry(ref);
312 	return (frame);
313 }
314 
315 void
316 gnttab_free_grant_reference(grant_ref_t ref)
317 {
318 
319 	put_free_entry(ref);
320 }
321 
322 void
323 gnttab_free_grant_references(grant_ref_t head)
324 {
325 	grant_ref_t ref;
326 	int count = 1;
327 
328 	if (head == GNTTAB_LIST_END)
329 		return;
330 
331 	ref = head;
332 	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
333 		ref = gnttab_entry(ref);
334 		count++;
335 	}
336 	mtx_lock(&gnttab_list_lock);
337 	gnttab_entry(ref) = gnttab_free_head;
338 	gnttab_free_head = head;
339 	gnttab_free_count += count;
340 	check_free_callbacks();
341 	mtx_unlock(&gnttab_list_lock);
342 }
343 
344 int
345 gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head)
346 {
347 	int ref, error;
348 
349 	error = get_free_entries(count, &ref);
350 	if (__predict_false(error))
351 		return (error);
352 
353 	*head = ref;
354 	return (0);
355 }
356 
357 int
358 gnttab_empty_grant_references(const grant_ref_t *private_head)
359 {
360 
361 	return (*private_head == GNTTAB_LIST_END);
362 }
363 
364 int
365 gnttab_claim_grant_reference(grant_ref_t *private_head)
366 {
367 	grant_ref_t g = *private_head;
368 
369 	if (__predict_false(g == GNTTAB_LIST_END))
370 		return (g);
371 	*private_head = gnttab_entry(g);
372 	return (g);
373 }
374 
375 void
376 gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t  release)
377 {
378 
379 	gnttab_entry(release) = *private_head;
380 	*private_head = release;
381 }
382 
383 void
384 gnttab_request_free_callback(struct gnttab_free_callback *callback,
385     void (*fn)(void *), void *arg, uint16_t count)
386 {
387 
388 	mtx_lock(&gnttab_list_lock);
389 	if (callback->next)
390 		goto out;
391 	callback->fn = fn;
392 	callback->arg = arg;
393 	callback->count = count;
394 	callback->next = gnttab_free_callback_list;
395 	gnttab_free_callback_list = callback;
396 	check_free_callbacks();
397  out:
398 	mtx_unlock(&gnttab_list_lock);
399 
400 }
401 
402 void
403 gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
404 {
405 	struct gnttab_free_callback **pcb;
406 
407 	mtx_lock(&gnttab_list_lock);
408 	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
409 		if (*pcb == callback) {
410 			*pcb = callback->next;
411 			break;
412 		}
413 	}
414 	mtx_unlock(&gnttab_list_lock);
415 }
416 
417 static int
418 grow_gnttab_list(unsigned int more_frames)
419 {
420 	unsigned int new_nr_grant_frames, extra_entries, i;
421 
422 	new_nr_grant_frames = nr_grant_frames + more_frames;
423 	extra_entries       = more_frames * GREFS_PER_GRANT_FRAME;
424 
425 	for (i = nr_grant_frames; i < new_nr_grant_frames; i++)
426 	{
427 		gnttab_list[i] = (grant_ref_t *)
428 			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
429 
430 		if (!gnttab_list[i])
431 			goto grow_nomem;
432 	}
433 
434 	for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
435 	     i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
436 		gnttab_entry(i) = i + 1;
437 
438 	gnttab_entry(i) = gnttab_free_head;
439 	gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
440 	gnttab_free_count += extra_entries;
441 
442 	nr_grant_frames = new_nr_grant_frames;
443 
444 	check_free_callbacks();
445 
446 	return (0);
447 
448 grow_nomem:
449 	for ( ; i >= nr_grant_frames; i--)
450 		free(gnttab_list[i], M_DEVBUF);
451 	return (ENOMEM);
452 }
453 
454 static unsigned int
455 __max_nr_grant_frames(void)
456 {
457 	struct gnttab_query_size query;
458 	int rc;
459 
460 	query.dom = DOMID_SELF;
461 
462 	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
463 	if ((rc < 0) || (query.status != GNTST_okay))
464 		return (4); /* Legacy max supported number of frames */
465 
466 	return (query.max_nr_frames);
467 }
468 
469 static inline
470 unsigned int max_nr_grant_frames(void)
471 {
472 
473 	return (min(__max_nr_grant_frames(), boot_max_nr_grant_frames));
474 }
475 
476 #ifdef notyet
477 /*
478  * XXX needed for backend support
479  *
480  */
481 static int
482 map_pte_fn(pte_t *pte, struct page *pmd_page,
483 		      unsigned long addr, void *data)
484 {
485 	unsigned long **frames = (unsigned long **)data;
486 
487 	set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
488 	(*frames)++;
489 	return 0;
490 }
491 
492 static int
493 unmap_pte_fn(pte_t *pte, struct page *pmd_page,
494 			unsigned long addr, void *data)
495 {
496 
497 	set_pte_at(&init_mm, addr, pte, __pte(0));
498 	return 0;
499 }
500 #endif
501 
502 static vm_paddr_t resume_frames;
503 
504 static void
505 gnttab_map(unsigned int start_idx, unsigned int end_idx)
506 {
507 	struct xen_add_to_physmap xatp;
508 	unsigned int i = end_idx;
509 
510 	/*
511 	 * Loop backwards, so that the first hypercall has the largest index,
512 	 * ensuring that the table will grow only once.
513 	 */
514 	do {
515 		xatp.domid = DOMID_SELF;
516 		xatp.idx = i;
517 		xatp.space = XENMAPSPACE_grant_table;
518 		xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
519 		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
520 			panic("HYPERVISOR_memory_op failed to map gnttab");
521 	} while (i-- > start_idx);
522 }
523 
524 int
525 gnttab_resume(device_t dev)
526 {
527 	unsigned int max_nr_gframes, nr_gframes;
528 
529 	nr_gframes = nr_grant_frames;
530 	max_nr_gframes = max_nr_grant_frames();
531 	if (max_nr_gframes < nr_gframes)
532 		return (ENOSYS);
533 
534 	if (!resume_frames) {
535 		KASSERT(dev != NULL,
536 		    ("No resume frames and no device provided"));
537 
538 		gnttab_pseudo_phys_res = xenmem_alloc(dev,
539 		    &gnttab_pseudo_phys_res_id, PAGE_SIZE * max_nr_gframes);
540 		if (gnttab_pseudo_phys_res == NULL)
541 			panic("Unable to reserve physical memory for gnttab");
542 		resume_frames = rman_get_start(gnttab_pseudo_phys_res);
543 		shared = rman_get_virtual(gnttab_pseudo_phys_res);
544 	}
545 	gnttab_map(0, nr_gframes - 1);
546 
547 	return (0);
548 }
549 
550 static int
551 gnttab_expand(unsigned int req_entries)
552 {
553 	unsigned int cur, extra;
554 
555 	cur = nr_grant_frames;
556 	extra = howmany(req_entries, GREFS_PER_GRANT_FRAME);
557 	if (cur + extra > max_nr_grant_frames())
558 		return (ENOSPC);
559 
560 	gnttab_map(cur, cur + extra - 1);
561 
562 	return (grow_gnttab_list(extra));
563 }
564 
565 MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF | MTX_RECURSE);
566 
567 /*------------------ Private Device Attachment Functions  --------------------*/
568 /**
569  * \brief Identify instances of this device type in the system.
570  *
571  * \param driver  The driver performing this identify action.
572  * \param parent  The NewBus parent device for any devices this method adds.
573  */
574 static void
575 granttable_identify(driver_t *driver __unused, device_t parent)
576 {
577 
578 	KASSERT(xen_domain(),
579 	    ("Trying to attach grant-table device on non Xen domain"));
580 	/*
581 	 * A single device instance for our driver is always present
582 	 * in a system operating under Xen.
583 	 */
584 	if (BUS_ADD_CHILD(parent, 0, driver->name, 0) == NULL)
585 		panic("unable to attach Xen Grant-table device");
586 }
587 
588 /**
589  * \brief Probe for the existence of the Xen Grant-table device
590  *
591  * \param dev  NewBus device_t for this instance.
592  *
593  * \return  Always returns 0 indicating success.
594  */
595 static int
596 granttable_probe(device_t dev)
597 {
598 
599 	device_set_desc(dev, "Xen Grant-table Device");
600 	return (BUS_PROBE_NOWILDCARD);
601 }
602 
603 /**
604  * \brief Attach the Xen Grant-table device.
605  *
606  * \param dev  NewBus device_t for this instance.
607  *
608  * \return  On success, 0. Otherwise an errno value indicating the
609  *          type of failure.
610  */
611 static int
612 granttable_attach(device_t dev)
613 {
614 	int i;
615 	unsigned int max_nr_glist_frames;
616 	unsigned int nr_init_grefs;
617 
618 	nr_grant_frames = 1;
619 	boot_max_nr_grant_frames = __max_nr_grant_frames();
620 
621 	/* Determine the maximum number of frames required for the
622 	 * grant reference free list on the current hypervisor.
623 	 */
624 	max_nr_glist_frames = (boot_max_nr_grant_frames *
625 			       GREFS_PER_GRANT_FRAME /
626 			       (PAGE_SIZE / sizeof(grant_ref_t)));
627 
628 	gnttab_list = malloc(max_nr_glist_frames * sizeof(grant_ref_t *),
629 	    M_DEVBUF, M_NOWAIT);
630 
631 	if (gnttab_list == NULL)
632 		return (ENOMEM);
633 
634 	for (i = 0; i < nr_grant_frames; i++) {
635 		gnttab_list[i] = (grant_ref_t *)
636 			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
637 		if (gnttab_list[i] == NULL)
638 			goto ini_nomem;
639 	}
640 
641 	if (gnttab_resume(dev))
642 		return (ENODEV);
643 
644 	nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
645 
646 	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
647 		gnttab_entry(i) = i + 1;
648 
649 	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
650 	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
651 	gnttab_free_head  = NR_RESERVED_ENTRIES;
652 
653 	if (bootverbose)
654 		printf("Grant table initialized\n");
655 
656 	return (0);
657 
658 ini_nomem:
659 	for (i--; i >= 0; i--)
660 		free(gnttab_list[i], M_DEVBUF);
661 	free(gnttab_list, M_DEVBUF);
662 	return (ENOMEM);
663 }
664 
665 /*-------------------- Private Device Attachment Data  -----------------------*/
666 static device_method_t granttable_methods[] = {
667 	/* Device interface */
668 	DEVMETHOD(device_identify,	granttable_identify),
669 	DEVMETHOD(device_probe,         granttable_probe),
670 	DEVMETHOD(device_attach,        granttable_attach),
671 
672 	DEVMETHOD_END
673 };
674 
675 DEFINE_CLASS_0(granttable, granttable_driver, granttable_methods, 0);
676 
677 DRIVER_MODULE_ORDERED(granttable, xenpv, granttable_driver, NULL, NULL,
678     SI_ORDER_FIRST);
679