xref: /freebsd/sys/dev/xen/grant_table/grant_table.c (revision a03411e84728e9b267056fd31c7d1d9d1dc1b01e)
1 /******************************************************************************
2  * gnttab.c
3  *
4  * Two sets of functionality:
5  * 1. Granting foreign access to our memory reservation.
6  * 2. Accessing others' memory reservations via grant references.
7  * (i.e., mechanisms for both sender and recipient of grant references)
8  *
9  * Copyright (c) 2005, Christopher Clark
10  * Copyright (c) 2004, K A Fraser
11  */
12 
13 #include <sys/param.h>
14 #include <sys/systm.h>
15 #include <sys/bus.h>
16 #include <sys/conf.h>
17 #include <sys/module.h>
18 #include <sys/kernel.h>
19 #include <sys/lock.h>
20 #include <sys/malloc.h>
21 #include <sys/mman.h>
22 #include <sys/limits.h>
23 #include <sys/rman.h>
24 #include <machine/resource.h>
25 #include <machine/cpu.h>
26 
27 #include <xen/xen-os.h>
28 #include <xen/hypervisor.h>
29 #include <xen/gnttab.h>
30 
31 #include <vm/vm.h>
32 #include <vm/vm_kern.h>
33 #include <vm/vm_extern.h>
34 #include <vm/pmap.h>
35 
36 /* External tools reserve first few grant table entries. */
37 #define NR_RESERVED_ENTRIES 8
38 #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_v1_t))
39 
40 static grant_ref_t **gnttab_list;
41 static unsigned int nr_grant_frames;
42 static unsigned int boot_max_nr_grant_frames;
43 static int gnttab_free_count;
44 static grant_ref_t gnttab_free_head;
45 static struct mtx gnttab_list_lock;
46 
47 /*
48  * Resource representing allocated physical address space
49  * for the grant table metainfo
50  */
51 static struct resource *gnttab_pseudo_phys_res;
52 
53 /* Resource id for allocated physical address space. */
54 static int gnttab_pseudo_phys_res_id;
55 
56 static grant_entry_v1_t *shared;
57 
58 static struct gnttab_free_callback *gnttab_free_callback_list = NULL;
59 
60 static int gnttab_expand(unsigned int req_entries);
61 
62 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
63 #define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
64 
65 static int
66 get_free_entries(int count, int *entries)
67 {
68 	int ref, error;
69 	grant_ref_t head;
70 
71 	mtx_lock(&gnttab_list_lock);
72 	if ((gnttab_free_count < count) &&
73 	    ((error = gnttab_expand(count - gnttab_free_count)) != 0)) {
74 		mtx_unlock(&gnttab_list_lock);
75 		return (error);
76 	}
77 	ref = head = gnttab_free_head;
78 	gnttab_free_count -= count;
79 	while (count-- > 1)
80 		head = gnttab_entry(head);
81 	gnttab_free_head = gnttab_entry(head);
82 	gnttab_entry(head) = GNTTAB_LIST_END;
83 	mtx_unlock(&gnttab_list_lock);
84 
85 	*entries = ref;
86 	return (0);
87 }
88 
89 static void
90 do_free_callbacks(void)
91 {
92 	struct gnttab_free_callback *callback, *next;
93 
94 	callback = gnttab_free_callback_list;
95 	gnttab_free_callback_list = NULL;
96 
97 	while (callback != NULL) {
98 		next = callback->next;
99 		if (gnttab_free_count >= callback->count) {
100 			callback->next = NULL;
101 			callback->fn(callback->arg);
102 		} else {
103 			callback->next = gnttab_free_callback_list;
104 			gnttab_free_callback_list = callback;
105 		}
106 		callback = next;
107 	}
108 }
109 
110 static inline void
111 check_free_callbacks(void)
112 {
113 	if (__predict_false(gnttab_free_callback_list != NULL))
114 		do_free_callbacks();
115 }
116 
117 static void
118 put_free_entry(grant_ref_t ref)
119 {
120 
121 	mtx_lock(&gnttab_list_lock);
122 	gnttab_entry(ref) = gnttab_free_head;
123 	gnttab_free_head = ref;
124 	gnttab_free_count++;
125 	check_free_callbacks();
126 	mtx_unlock(&gnttab_list_lock);
127 }
128 
129 /*
130  * Public grant-issuing interface functions
131  */
132 
133 int
134 gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly,
135 	grant_ref_t *result)
136 {
137 	int error, ref;
138 
139 	error = get_free_entries(1, &ref);
140 
141 	if (__predict_false(error))
142 		return (error);
143 
144 	shared[ref].frame = frame;
145 	shared[ref].domid = domid;
146 	wmb();
147 	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
148 
149 	if (result)
150 		*result = ref;
151 
152 	return (0);
153 }
154 
155 void
156 gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
157 				unsigned long frame, int readonly)
158 {
159 
160 	shared[ref].frame = frame;
161 	shared[ref].domid = domid;
162 	wmb();
163 	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
164 }
165 
166 int
167 gnttab_query_foreign_access(grant_ref_t ref)
168 {
169 	uint16_t nflags;
170 
171 	nflags = shared[ref].flags;
172 
173 	return (nflags & (GTF_reading|GTF_writing));
174 }
175 
176 int
177 gnttab_end_foreign_access_ref(grant_ref_t ref)
178 {
179 	uint16_t flags;
180 
181 	while (!((flags = atomic_load_16(&shared[ref].flags)) &
182 	    (GTF_reading|GTF_writing)))
183 		if (atomic_cmpset_16(&shared[ref].flags, flags, 0))
184 			return (1);
185 
186 	printf("%s: WARNING: g.e. still in use!\n", __func__);
187 	return (0);
188 }
189 
190 void
191 gnttab_end_foreign_access(grant_ref_t ref, void *page)
192 {
193 	if (gnttab_end_foreign_access_ref(ref)) {
194 		put_free_entry(ref);
195 		if (page != NULL) {
196 			free(page, M_DEVBUF);
197 		}
198 	}
199 	else {
200 		/* XXX This needs to be fixed so that the ref and page are
201 		   placed on a list to be freed up later. */
202 		printf("%s: WARNING: leaking g.e. and page still in use!\n",
203 		       __func__);
204 	}
205 }
206 
207 void
208 gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs)
209 {
210 	grant_ref_t *last_ref;
211 	grant_ref_t  head;
212 	grant_ref_t  tail;
213 
214 	head = GNTTAB_LIST_END;
215 	tail = *refs;
216 	last_ref = refs + count;
217 	while (refs != last_ref) {
218 		if (gnttab_end_foreign_access_ref(*refs)) {
219 			gnttab_entry(*refs) = head;
220 			head = *refs;
221 		} else {
222 			/*
223 			 * XXX This needs to be fixed so that the ref
224 			 * is placed on a list to be freed up later.
225 			 */
226 			printf("%s: WARNING: leaking g.e. still in use!\n",
227 			       __func__);
228 			count--;
229 		}
230 		refs++;
231 	}
232 
233 	if (count != 0) {
234 		mtx_lock(&gnttab_list_lock);
235 		gnttab_free_count += count;
236 		gnttab_entry(tail) = gnttab_free_head;
237 		gnttab_free_head = head;
238 		check_free_callbacks();
239 		mtx_unlock(&gnttab_list_lock);
240 	}
241 }
242 
243 int
244 gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn,
245     grant_ref_t *result)
246 {
247 	int error, ref;
248 
249 	error = get_free_entries(1, &ref);
250 	if (__predict_false(error))
251 		return (error);
252 
253 	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
254 
255 	*result = ref;
256 	return (0);
257 }
258 
259 void
260 gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
261 	unsigned long pfn)
262 {
263 	shared[ref].frame = pfn;
264 	shared[ref].domid = domid;
265 	wmb();
266 	shared[ref].flags = GTF_accept_transfer;
267 }
268 
269 unsigned long
270 gnttab_end_foreign_transfer_ref(grant_ref_t ref)
271 {
272 	unsigned long frame;
273 	uint16_t      flags;
274 
275 	/*
276          * If a transfer is not even yet started, try to reclaim the grant
277          * reference and return failure (== 0).
278 	 *
279 	 * NOTE: This is a loop since the atomic cmpset can fail multiple
280 	 * times.  In normal operation it will be rare to execute more than
281 	 * twice.  Attempting an attack would consume a great deal of
282 	 * attacker resources and be unlikely to prolong the loop very much.
283          */
284 	while (!((flags = atomic_load_16(&shared[ref].flags)) &
285 	    GTF_transfer_committed))
286 		if (atomic_cmpset_16(&shared[ref].flags, flags, 0))
287 			return (0);
288 
289 	/* If a transfer is in progress then wait until it is completed. */
290 	while (!(flags & GTF_transfer_completed)) {
291 		cpu_spinwait();
292 		flags = atomic_load_16(&shared[ref].flags);
293 	}
294 
295 	/* Read the frame number /after/ reading completion status. */
296 	rmb();
297 	frame = shared[ref].frame;
298 	KASSERT(frame != 0, ("grant table inconsistent"));
299 
300 	return (frame);
301 }
302 
303 unsigned long
304 gnttab_end_foreign_transfer(grant_ref_t ref)
305 {
306 	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
307 
308 	put_free_entry(ref);
309 	return (frame);
310 }
311 
312 void
313 gnttab_free_grant_reference(grant_ref_t ref)
314 {
315 
316 	put_free_entry(ref);
317 }
318 
319 void
320 gnttab_free_grant_references(grant_ref_t head)
321 {
322 	grant_ref_t ref;
323 	int count = 1;
324 
325 	if (head == GNTTAB_LIST_END)
326 		return;
327 
328 	ref = head;
329 	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
330 		ref = gnttab_entry(ref);
331 		count++;
332 	}
333 	mtx_lock(&gnttab_list_lock);
334 	gnttab_entry(ref) = gnttab_free_head;
335 	gnttab_free_head = head;
336 	gnttab_free_count += count;
337 	check_free_callbacks();
338 	mtx_unlock(&gnttab_list_lock);
339 }
340 
341 int
342 gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head)
343 {
344 	int ref, error;
345 
346 	error = get_free_entries(count, &ref);
347 	if (__predict_false(error))
348 		return (error);
349 
350 	*head = ref;
351 	return (0);
352 }
353 
354 int
355 gnttab_empty_grant_references(const grant_ref_t *private_head)
356 {
357 
358 	return (*private_head == GNTTAB_LIST_END);
359 }
360 
361 int
362 gnttab_claim_grant_reference(grant_ref_t *private_head)
363 {
364 	grant_ref_t g = *private_head;
365 
366 	if (__predict_false(g == GNTTAB_LIST_END))
367 		return (g);
368 	*private_head = gnttab_entry(g);
369 	return (g);
370 }
371 
372 void
373 gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t  release)
374 {
375 
376 	gnttab_entry(release) = *private_head;
377 	*private_head = release;
378 }
379 
380 void
381 gnttab_request_free_callback(struct gnttab_free_callback *callback,
382     void (*fn)(void *), void *arg, uint16_t count)
383 {
384 
385 	mtx_lock(&gnttab_list_lock);
386 	if (callback->next)
387 		goto out;
388 	callback->fn = fn;
389 	callback->arg = arg;
390 	callback->count = count;
391 	callback->next = gnttab_free_callback_list;
392 	gnttab_free_callback_list = callback;
393 	check_free_callbacks();
394  out:
395 	mtx_unlock(&gnttab_list_lock);
396 
397 }
398 
399 void
400 gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
401 {
402 	struct gnttab_free_callback **pcb;
403 
404 	mtx_lock(&gnttab_list_lock);
405 	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
406 		if (*pcb == callback) {
407 			*pcb = callback->next;
408 			break;
409 		}
410 	}
411 	mtx_unlock(&gnttab_list_lock);
412 }
413 
414 static int
415 grow_gnttab_list(unsigned int more_frames)
416 {
417 	unsigned int new_nr_grant_frames, extra_entries, i;
418 
419 	new_nr_grant_frames = nr_grant_frames + more_frames;
420 	extra_entries       = more_frames * GREFS_PER_GRANT_FRAME;
421 
422 	for (i = nr_grant_frames; i < new_nr_grant_frames; i++)
423 	{
424 		gnttab_list[i] = (grant_ref_t *)
425 			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
426 
427 		if (!gnttab_list[i])
428 			goto grow_nomem;
429 	}
430 
431 	for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
432 	     i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
433 		gnttab_entry(i) = i + 1;
434 
435 	gnttab_entry(i) = gnttab_free_head;
436 	gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
437 	gnttab_free_count += extra_entries;
438 
439 	nr_grant_frames = new_nr_grant_frames;
440 
441 	check_free_callbacks();
442 
443 	return (0);
444 
445 grow_nomem:
446 	for ( ; i >= nr_grant_frames; i--)
447 		free(gnttab_list[i], M_DEVBUF);
448 	return (ENOMEM);
449 }
450 
451 static unsigned int
452 __max_nr_grant_frames(void)
453 {
454 	struct gnttab_query_size query;
455 	int rc;
456 
457 	query.dom = DOMID_SELF;
458 
459 	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
460 	if ((rc < 0) || (query.status != GNTST_okay))
461 		return (4); /* Legacy max supported number of frames */
462 
463 	return (query.max_nr_frames);
464 }
465 
466 static inline
467 unsigned int max_nr_grant_frames(void)
468 {
469 
470 	return (min(__max_nr_grant_frames(), boot_max_nr_grant_frames));
471 }
472 
473 #ifdef notyet
474 /*
475  * XXX needed for backend support
476  *
477  */
478 static int
479 map_pte_fn(pte_t *pte, struct page *pmd_page,
480 		      unsigned long addr, void *data)
481 {
482 	unsigned long **frames = (unsigned long **)data;
483 
484 	set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
485 	(*frames)++;
486 	return 0;
487 }
488 
489 static int
490 unmap_pte_fn(pte_t *pte, struct page *pmd_page,
491 			unsigned long addr, void *data)
492 {
493 
494 	set_pte_at(&init_mm, addr, pte, __pte(0));
495 	return 0;
496 }
497 #endif
498 
499 static vm_paddr_t resume_frames;
500 
501 static void
502 gnttab_map(unsigned int start_idx, unsigned int end_idx)
503 {
504 	struct xen_add_to_physmap xatp;
505 	unsigned int i = end_idx;
506 
507 	/*
508 	 * Loop backwards, so that the first hypercall has the largest index,
509 	 * ensuring that the table will grow only once.
510 	 */
511 	do {
512 		xatp.domid = DOMID_SELF;
513 		xatp.idx = i;
514 		xatp.space = XENMAPSPACE_grant_table;
515 		xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
516 		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
517 			panic("HYPERVISOR_memory_op failed to map gnttab");
518 	} while (i-- > start_idx);
519 }
520 
521 int
522 gnttab_resume(device_t dev)
523 {
524 	unsigned int max_nr_gframes, nr_gframes;
525 
526 	nr_gframes = nr_grant_frames;
527 	max_nr_gframes = max_nr_grant_frames();
528 	if (max_nr_gframes < nr_gframes)
529 		return (ENOSYS);
530 
531 	if (!resume_frames) {
532 		KASSERT(dev != NULL,
533 		    ("No resume frames and no device provided"));
534 
535 		gnttab_pseudo_phys_res = xenmem_alloc(dev,
536 		    &gnttab_pseudo_phys_res_id, PAGE_SIZE * max_nr_gframes);
537 		if (gnttab_pseudo_phys_res == NULL)
538 			panic("Unable to reserve physical memory for gnttab");
539 		resume_frames = rman_get_start(gnttab_pseudo_phys_res);
540 		shared = rman_get_virtual(gnttab_pseudo_phys_res);
541 	}
542 	gnttab_map(0, nr_gframes - 1);
543 
544 	return (0);
545 }
546 
547 static int
548 gnttab_expand(unsigned int req_entries)
549 {
550 	unsigned int cur, extra;
551 
552 	cur = nr_grant_frames;
553 	extra = howmany(req_entries, GREFS_PER_GRANT_FRAME);
554 	if (cur + extra > max_nr_grant_frames())
555 		return (ENOSPC);
556 
557 	gnttab_map(cur, cur + extra - 1);
558 
559 	return (grow_gnttab_list(extra));
560 }
561 
562 MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF | MTX_RECURSE);
563 
564 /*------------------ Private Device Attachment Functions  --------------------*/
565 /**
566  * \brief Identify instances of this device type in the system.
567  *
568  * \param driver  The driver performing this identify action.
569  * \param parent  The NewBus parent device for any devices this method adds.
570  */
571 static void
572 granttable_identify(driver_t *driver, device_t parent)
573 {
574 
575 	KASSERT(xen_domain(),
576 	    ("Trying to attach grant-table device on non Xen domain"));
577 	/*
578 	 * A single device instance for our driver is always present
579 	 * in a system operating under Xen.
580 	 */
581 	if (BUS_ADD_CHILD(parent, 0, driver->name, 0) == NULL)
582 		panic("unable to attach Xen Grant-table device");
583 }
584 
585 /**
586  * \brief Probe for the existence of the Xen Grant-table device
587  *
588  * \param dev  NewBus device_t for this instance.
589  *
590  * \return  Always returns 0 indicating success.
591  */
592 static int
593 granttable_probe(device_t dev)
594 {
595 
596 	device_set_desc(dev, "Xen Grant-table Device");
597 	return (BUS_PROBE_NOWILDCARD);
598 }
599 
600 /**
601  * \brief Attach the Xen Grant-table device.
602  *
603  * \param dev  NewBus device_t for this instance.
604  *
605  * \return  On success, 0. Otherwise an errno value indicating the
606  *          type of failure.
607  */
608 static int
609 granttable_attach(device_t dev)
610 {
611 	int i;
612 	unsigned int nr_init_grefs;
613 
614 	nr_grant_frames = 1;
615 	boot_max_nr_grant_frames = __max_nr_grant_frames();
616 
617 	gnttab_list = malloc(boot_max_nr_grant_frames * sizeof(grant_ref_t *),
618 	    M_DEVBUF, M_NOWAIT);
619 
620 	if (gnttab_list == NULL)
621 		return (ENOMEM);
622 
623 	for (i = 0; i < nr_grant_frames; i++) {
624 		gnttab_list[i] = (grant_ref_t *)
625 			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
626 		if (gnttab_list[i] == NULL)
627 			goto ini_nomem;
628 	}
629 
630 	if (gnttab_resume(dev))
631 		return (ENODEV);
632 
633 	nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
634 
635 	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
636 		gnttab_entry(i) = i + 1;
637 
638 	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
639 	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
640 	gnttab_free_head  = NR_RESERVED_ENTRIES;
641 
642 	if (bootverbose)
643 		printf("Grant table initialized\n");
644 
645 	return (0);
646 
647 ini_nomem:
648 	for (i--; i >= 0; i--)
649 		free(gnttab_list[i], M_DEVBUF);
650 	free(gnttab_list, M_DEVBUF);
651 	return (ENOMEM);
652 }
653 
654 /*-------------------- Private Device Attachment Data  -----------------------*/
655 static device_method_t granttable_methods[] = {
656 	/* Device interface */
657 	DEVMETHOD(device_identify,	granttable_identify),
658 	DEVMETHOD(device_probe,         granttable_probe),
659 	DEVMETHOD(device_attach,        granttable_attach),
660 
661 	DEVMETHOD_END
662 };
663 
664 DEFINE_CLASS_0(granttable, granttable_driver, granttable_methods, 0);
665 
666 DRIVER_MODULE_ORDERED(granttable, xenpv, granttable_driver, NULL, NULL,
667     SI_ORDER_FIRST);
668