1 /******************************************************************************
2 * gnttab.c
3 *
4 * Two sets of functionality:
5 * 1. Granting foreign access to our memory reservation.
6 * 2. Accessing others' memory reservations via grant references.
7 * (i.e., mechanisms for both sender and recipient of grant references)
8 *
9 * Copyright (c) 2005, Christopher Clark
10 * Copyright (c) 2004, K A Fraser
11 */
12
13 #include <sys/param.h>
14 #include <sys/systm.h>
15 #include <sys/bus.h>
16 #include <sys/conf.h>
17 #include <sys/module.h>
18 #include <sys/kernel.h>
19 #include <sys/lock.h>
20 #include <sys/malloc.h>
21 #include <sys/mman.h>
22 #include <sys/limits.h>
23 #include <sys/rman.h>
24 #include <machine/resource.h>
25 #include <machine/cpu.h>
26
27 #include <xen/xen-os.h>
28 #include <xen/hypervisor.h>
29 #include <xen/gnttab.h>
30
31 #include <vm/vm.h>
32 #include <vm/vm_kern.h>
33 #include <vm/vm_extern.h>
34 #include <vm/pmap.h>
35
36 /* External tools reserve first few grant table entries. */
37 #define NR_RESERVED_ENTRIES 8
38 #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_v1_t))
39
40 static grant_ref_t **gnttab_list;
41 static unsigned int nr_grant_frames;
42 static unsigned int boot_max_nr_grant_frames;
43 static int gnttab_free_count;
44 static grant_ref_t gnttab_free_head;
45 static struct mtx gnttab_list_lock;
46
47 /*
48 * Resource representing allocated physical address space
49 * for the grant table metainfo
50 */
51 static struct resource *gnttab_pseudo_phys_res;
52
53 /* Resource id for allocated physical address space. */
54 static int gnttab_pseudo_phys_res_id;
55
56 static grant_entry_v1_t *shared;
57
58 static struct gnttab_free_callback *gnttab_free_callback_list = NULL;
59
60 static int gnttab_expand(unsigned int req_entries);
61
62 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
63 #define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
64
65 static int
get_free_entries(int count,int * entries)66 get_free_entries(int count, int *entries)
67 {
68 int ref, error;
69 grant_ref_t head;
70
71 mtx_lock(&gnttab_list_lock);
72 if ((gnttab_free_count < count) &&
73 ((error = gnttab_expand(count - gnttab_free_count)) != 0)) {
74 mtx_unlock(&gnttab_list_lock);
75 return (error);
76 }
77 ref = head = gnttab_free_head;
78 gnttab_free_count -= count;
79 while (count-- > 1)
80 head = gnttab_entry(head);
81 gnttab_free_head = gnttab_entry(head);
82 gnttab_entry(head) = GNTTAB_LIST_END;
83 mtx_unlock(&gnttab_list_lock);
84
85 *entries = ref;
86 return (0);
87 }
88
89 static void
do_free_callbacks(void)90 do_free_callbacks(void)
91 {
92 struct gnttab_free_callback *callback, *next;
93
94 callback = gnttab_free_callback_list;
95 gnttab_free_callback_list = NULL;
96
97 while (callback != NULL) {
98 next = callback->next;
99 if (gnttab_free_count >= callback->count) {
100 callback->next = NULL;
101 callback->fn(callback->arg);
102 } else {
103 callback->next = gnttab_free_callback_list;
104 gnttab_free_callback_list = callback;
105 }
106 callback = next;
107 }
108 }
109
110 static inline void
check_free_callbacks(void)111 check_free_callbacks(void)
112 {
113 if (__predict_false(gnttab_free_callback_list != NULL))
114 do_free_callbacks();
115 }
116
117 static void
put_free_entry(grant_ref_t ref)118 put_free_entry(grant_ref_t ref)
119 {
120
121 mtx_lock(&gnttab_list_lock);
122 gnttab_entry(ref) = gnttab_free_head;
123 gnttab_free_head = ref;
124 gnttab_free_count++;
125 check_free_callbacks();
126 mtx_unlock(&gnttab_list_lock);
127 }
128
129 /*
130 * Public grant-issuing interface functions
131 */
132
133 int
gnttab_grant_foreign_access(domid_t domid,unsigned long frame,int readonly,grant_ref_t * result)134 gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly,
135 grant_ref_t *result)
136 {
137 int error, ref;
138
139 error = get_free_entries(1, &ref);
140
141 if (__predict_false(error))
142 return (error);
143
144 shared[ref].frame = frame;
145 shared[ref].domid = domid;
146 wmb();
147 shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
148
149 if (result)
150 *result = ref;
151
152 return (0);
153 }
154
155 void
gnttab_grant_foreign_access_ref(grant_ref_t ref,domid_t domid,unsigned long frame,int readonly)156 gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
157 unsigned long frame, int readonly)
158 {
159
160 shared[ref].frame = frame;
161 shared[ref].domid = domid;
162 wmb();
163 shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
164 }
165
166 int
gnttab_query_foreign_access(grant_ref_t ref)167 gnttab_query_foreign_access(grant_ref_t ref)
168 {
169 uint16_t nflags;
170
171 nflags = shared[ref].flags;
172
173 return (nflags & (GTF_reading|GTF_writing));
174 }
175
176 int
gnttab_end_foreign_access_ref(grant_ref_t ref)177 gnttab_end_foreign_access_ref(grant_ref_t ref)
178 {
179 uint16_t flags;
180
181 while (!((flags = atomic_load_16(&shared[ref].flags)) &
182 (GTF_reading|GTF_writing)))
183 if (atomic_cmpset_16(&shared[ref].flags, flags, 0))
184 return (1);
185
186 printf("%s: WARNING: g.e. still in use!\n", __func__);
187 return (0);
188 }
189
190 void
gnttab_end_foreign_access(grant_ref_t ref,void * page)191 gnttab_end_foreign_access(grant_ref_t ref, void *page)
192 {
193 if (gnttab_end_foreign_access_ref(ref)) {
194 put_free_entry(ref);
195 if (page != NULL) {
196 free(page, M_DEVBUF);
197 }
198 }
199 else {
200 /* XXX This needs to be fixed so that the ref and page are
201 placed on a list to be freed up later. */
202 printf("%s: WARNING: leaking g.e. and page still in use!\n",
203 __func__);
204 }
205 }
206
207 void
gnttab_end_foreign_access_references(u_int count,grant_ref_t * refs)208 gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs)
209 {
210 grant_ref_t *last_ref;
211 grant_ref_t head;
212 grant_ref_t tail;
213
214 head = GNTTAB_LIST_END;
215 tail = *refs;
216 last_ref = refs + count;
217 while (refs != last_ref) {
218 if (gnttab_end_foreign_access_ref(*refs)) {
219 gnttab_entry(*refs) = head;
220 head = *refs;
221 } else {
222 /*
223 * XXX This needs to be fixed so that the ref
224 * is placed on a list to be freed up later.
225 */
226 printf("%s: WARNING: leaking g.e. still in use!\n",
227 __func__);
228 count--;
229 }
230 refs++;
231 }
232
233 if (count != 0) {
234 mtx_lock(&gnttab_list_lock);
235 gnttab_free_count += count;
236 gnttab_entry(tail) = gnttab_free_head;
237 gnttab_free_head = head;
238 check_free_callbacks();
239 mtx_unlock(&gnttab_list_lock);
240 }
241 }
242
243 int
gnttab_grant_foreign_transfer(domid_t domid,unsigned long pfn,grant_ref_t * result)244 gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn,
245 grant_ref_t *result)
246 {
247 int error, ref;
248
249 error = get_free_entries(1, &ref);
250 if (__predict_false(error))
251 return (error);
252
253 gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
254
255 *result = ref;
256 return (0);
257 }
258
259 void
gnttab_grant_foreign_transfer_ref(grant_ref_t ref,domid_t domid,unsigned long pfn)260 gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
261 unsigned long pfn)
262 {
263 shared[ref].frame = pfn;
264 shared[ref].domid = domid;
265 wmb();
266 shared[ref].flags = GTF_accept_transfer;
267 }
268
269 unsigned long
gnttab_end_foreign_transfer_ref(grant_ref_t ref)270 gnttab_end_foreign_transfer_ref(grant_ref_t ref)
271 {
272 unsigned long frame;
273 uint16_t flags;
274
275 /*
276 * If a transfer is not even yet started, try to reclaim the grant
277 * reference and return failure (== 0).
278 *
279 * NOTE: This is a loop since the atomic cmpset can fail multiple
280 * times. In normal operation it will be rare to execute more than
281 * twice. Attempting an attack would consume a great deal of
282 * attacker resources and be unlikely to prolong the loop very much.
283 */
284 while (!((flags = atomic_load_16(&shared[ref].flags)) &
285 GTF_transfer_committed))
286 if (atomic_cmpset_16(&shared[ref].flags, flags, 0))
287 return (0);
288
289 /* If a transfer is in progress then wait until it is completed. */
290 while (!(flags & GTF_transfer_completed)) {
291 cpu_spinwait();
292 flags = atomic_load_16(&shared[ref].flags);
293 }
294
295 /* Read the frame number /after/ reading completion status. */
296 rmb();
297 frame = shared[ref].frame;
298 KASSERT(frame != 0, ("grant table inconsistent"));
299
300 return (frame);
301 }
302
303 unsigned long
gnttab_end_foreign_transfer(grant_ref_t ref)304 gnttab_end_foreign_transfer(grant_ref_t ref)
305 {
306 unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
307
308 put_free_entry(ref);
309 return (frame);
310 }
311
312 void
gnttab_free_grant_reference(grant_ref_t ref)313 gnttab_free_grant_reference(grant_ref_t ref)
314 {
315
316 put_free_entry(ref);
317 }
318
319 void
gnttab_free_grant_references(grant_ref_t head)320 gnttab_free_grant_references(grant_ref_t head)
321 {
322 grant_ref_t ref;
323 int count = 1;
324
325 if (head == GNTTAB_LIST_END)
326 return;
327
328 ref = head;
329 while (gnttab_entry(ref) != GNTTAB_LIST_END) {
330 ref = gnttab_entry(ref);
331 count++;
332 }
333 mtx_lock(&gnttab_list_lock);
334 gnttab_entry(ref) = gnttab_free_head;
335 gnttab_free_head = head;
336 gnttab_free_count += count;
337 check_free_callbacks();
338 mtx_unlock(&gnttab_list_lock);
339 }
340
341 int
gnttab_alloc_grant_references(uint16_t count,grant_ref_t * head)342 gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head)
343 {
344 int ref, error;
345
346 error = get_free_entries(count, &ref);
347 if (__predict_false(error))
348 return (error);
349
350 *head = ref;
351 return (0);
352 }
353
354 int
gnttab_empty_grant_references(const grant_ref_t * private_head)355 gnttab_empty_grant_references(const grant_ref_t *private_head)
356 {
357
358 return (*private_head == GNTTAB_LIST_END);
359 }
360
361 int
gnttab_claim_grant_reference(grant_ref_t * private_head)362 gnttab_claim_grant_reference(grant_ref_t *private_head)
363 {
364 grant_ref_t g = *private_head;
365
366 if (__predict_false(g == GNTTAB_LIST_END))
367 return (g);
368 *private_head = gnttab_entry(g);
369 return (g);
370 }
371
372 void
gnttab_release_grant_reference(grant_ref_t * private_head,grant_ref_t release)373 gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release)
374 {
375
376 gnttab_entry(release) = *private_head;
377 *private_head = release;
378 }
379
380 void
gnttab_request_free_callback(struct gnttab_free_callback * callback,void (* fn)(void *),void * arg,uint16_t count)381 gnttab_request_free_callback(struct gnttab_free_callback *callback,
382 void (*fn)(void *), void *arg, uint16_t count)
383 {
384
385 mtx_lock(&gnttab_list_lock);
386 if (callback->next)
387 goto out;
388 callback->fn = fn;
389 callback->arg = arg;
390 callback->count = count;
391 callback->next = gnttab_free_callback_list;
392 gnttab_free_callback_list = callback;
393 check_free_callbacks();
394 out:
395 mtx_unlock(&gnttab_list_lock);
396
397 }
398
399 void
gnttab_cancel_free_callback(struct gnttab_free_callback * callback)400 gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
401 {
402 struct gnttab_free_callback **pcb;
403
404 mtx_lock(&gnttab_list_lock);
405 for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
406 if (*pcb == callback) {
407 *pcb = callback->next;
408 break;
409 }
410 }
411 mtx_unlock(&gnttab_list_lock);
412 }
413
414 static int
grow_gnttab_list(unsigned int more_frames)415 grow_gnttab_list(unsigned int more_frames)
416 {
417 unsigned int new_nr_grant_frames, extra_entries, i;
418
419 new_nr_grant_frames = nr_grant_frames + more_frames;
420 extra_entries = more_frames * GREFS_PER_GRANT_FRAME;
421
422 for (i = nr_grant_frames; i < new_nr_grant_frames; i++)
423 {
424 gnttab_list[i] = (grant_ref_t *)
425 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
426
427 if (!gnttab_list[i])
428 goto grow_nomem;
429 }
430
431 for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
432 i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
433 gnttab_entry(i) = i + 1;
434
435 gnttab_entry(i) = gnttab_free_head;
436 gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
437 gnttab_free_count += extra_entries;
438
439 nr_grant_frames = new_nr_grant_frames;
440
441 check_free_callbacks();
442
443 return (0);
444
445 grow_nomem:
446 for ( ; i >= nr_grant_frames; i--)
447 free(gnttab_list[i], M_DEVBUF);
448 return (ENOMEM);
449 }
450
451 static unsigned int
__max_nr_grant_frames(void)452 __max_nr_grant_frames(void)
453 {
454 struct gnttab_query_size query;
455 int rc;
456
457 query.dom = DOMID_SELF;
458
459 rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
460 if ((rc < 0) || (query.status != GNTST_okay))
461 return (4); /* Legacy max supported number of frames */
462
463 return (query.max_nr_frames);
464 }
465
466 static inline
max_nr_grant_frames(void)467 unsigned int max_nr_grant_frames(void)
468 {
469
470 return (min(__max_nr_grant_frames(), boot_max_nr_grant_frames));
471 }
472
473 #ifdef notyet
474 /*
475 * XXX needed for backend support
476 *
477 */
478 static int
map_pte_fn(pte_t * pte,struct page * pmd_page,unsigned long addr,void * data)479 map_pte_fn(pte_t *pte, struct page *pmd_page,
480 unsigned long addr, void *data)
481 {
482 unsigned long **frames = (unsigned long **)data;
483
484 set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
485 (*frames)++;
486 return 0;
487 }
488
489 static int
unmap_pte_fn(pte_t * pte,struct page * pmd_page,unsigned long addr,void * data)490 unmap_pte_fn(pte_t *pte, struct page *pmd_page,
491 unsigned long addr, void *data)
492 {
493
494 set_pte_at(&init_mm, addr, pte, __pte(0));
495 return 0;
496 }
497 #endif
498
499 static vm_paddr_t resume_frames;
500
501 static void
gnttab_map(unsigned int start_idx,unsigned int end_idx)502 gnttab_map(unsigned int start_idx, unsigned int end_idx)
503 {
504 struct xen_add_to_physmap xatp;
505 unsigned int i = end_idx;
506
507 /*
508 * Loop backwards, so that the first hypercall has the largest index,
509 * ensuring that the table will grow only once.
510 */
511 do {
512 xatp.domid = DOMID_SELF;
513 xatp.idx = i;
514 xatp.space = XENMAPSPACE_grant_table;
515 xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
516 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
517 panic("HYPERVISOR_memory_op failed to map gnttab");
518 } while (i-- > start_idx);
519 }
520
521 int
gnttab_resume(device_t dev)522 gnttab_resume(device_t dev)
523 {
524 unsigned int max_nr_gframes, nr_gframes;
525
526 nr_gframes = nr_grant_frames;
527 max_nr_gframes = max_nr_grant_frames();
528 if (max_nr_gframes < nr_gframes)
529 return (ENOSYS);
530
531 if (!resume_frames) {
532 KASSERT(dev != NULL,
533 ("No resume frames and no device provided"));
534
535 gnttab_pseudo_phys_res = xenmem_alloc(dev,
536 &gnttab_pseudo_phys_res_id, PAGE_SIZE * max_nr_gframes);
537 if (gnttab_pseudo_phys_res == NULL)
538 panic("Unable to reserve physical memory for gnttab");
539 resume_frames = rman_get_start(gnttab_pseudo_phys_res);
540 shared = rman_get_virtual(gnttab_pseudo_phys_res);
541 }
542 gnttab_map(0, nr_gframes - 1);
543
544 return (0);
545 }
546
547 static int
gnttab_expand(unsigned int req_entries)548 gnttab_expand(unsigned int req_entries)
549 {
550 unsigned int cur, extra;
551
552 cur = nr_grant_frames;
553 extra = howmany(req_entries, GREFS_PER_GRANT_FRAME);
554 if (cur + extra > max_nr_grant_frames())
555 return (ENOSPC);
556
557 gnttab_map(cur, cur + extra - 1);
558
559 return (grow_gnttab_list(extra));
560 }
561
562 MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF | MTX_RECURSE);
563
564 /*------------------ Private Device Attachment Functions --------------------*/
565 /**
566 * \brief Identify instances of this device type in the system.
567 *
568 * \param driver The driver performing this identify action.
569 * \param parent The NewBus parent device for any devices this method adds.
570 */
571 static void
granttable_identify(driver_t * driver,device_t parent)572 granttable_identify(driver_t *driver, device_t parent)
573 {
574
575 KASSERT(xen_domain(),
576 ("Trying to attach grant-table device on non Xen domain"));
577 /*
578 * A single device instance for our driver is always present
579 * in a system operating under Xen.
580 */
581 if (BUS_ADD_CHILD(parent, 0, driver->name, 0) == NULL)
582 panic("unable to attach Xen Grant-table device");
583 }
584
585 /**
586 * \brief Probe for the existence of the Xen Grant-table device
587 *
588 * \param dev NewBus device_t for this instance.
589 *
590 * \return Always returns 0 indicating success.
591 */
592 static int
granttable_probe(device_t dev)593 granttable_probe(device_t dev)
594 {
595
596 device_set_desc(dev, "Xen Grant-table Device");
597 return (BUS_PROBE_NOWILDCARD);
598 }
599
600 /**
601 * \brief Attach the Xen Grant-table device.
602 *
603 * \param dev NewBus device_t for this instance.
604 *
605 * \return On success, 0. Otherwise an errno value indicating the
606 * type of failure.
607 */
608 static int
granttable_attach(device_t dev)609 granttable_attach(device_t dev)
610 {
611 int i;
612 unsigned int nr_init_grefs;
613
614 nr_grant_frames = 1;
615 boot_max_nr_grant_frames = __max_nr_grant_frames();
616
617 gnttab_list = malloc(boot_max_nr_grant_frames * sizeof(grant_ref_t *),
618 M_DEVBUF, M_NOWAIT);
619
620 if (gnttab_list == NULL)
621 return (ENOMEM);
622
623 for (i = 0; i < nr_grant_frames; i++) {
624 gnttab_list[i] = (grant_ref_t *)
625 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
626 if (gnttab_list[i] == NULL)
627 goto ini_nomem;
628 }
629
630 if (gnttab_resume(dev))
631 return (ENODEV);
632
633 nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
634
635 for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
636 gnttab_entry(i) = i + 1;
637
638 gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
639 gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
640 gnttab_free_head = NR_RESERVED_ENTRIES;
641
642 if (bootverbose)
643 printf("Grant table initialized\n");
644
645 return (0);
646
647 ini_nomem:
648 for (i--; i >= 0; i--)
649 free(gnttab_list[i], M_DEVBUF);
650 free(gnttab_list, M_DEVBUF);
651 return (ENOMEM);
652 }
653
654 /*-------------------- Private Device Attachment Data -----------------------*/
655 static device_method_t granttable_methods[] = {
656 /* Device interface */
657 DEVMETHOD(device_identify, granttable_identify),
658 DEVMETHOD(device_probe, granttable_probe),
659 DEVMETHOD(device_attach, granttable_attach),
660
661 DEVMETHOD_END
662 };
663
664 DEFINE_CLASS_0(granttable, granttable_driver, granttable_methods, 0);
665
666 DRIVER_MODULE_ORDERED(granttable, xenpv, granttable_driver, NULL, NULL,
667 SI_ORDER_FIRST);
668