xref: /linux/drivers/xen/gntalloc.c (revision 312b62b6610cabea4cb535fd4889c41e9a84afca)
1 /******************************************************************************
2  * gntalloc.c
3  *
4  * Device for creating grant references (in user-space) that may be shared
5  * with other domains.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software
14  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
15  */
16 
17 /*
18  * This driver exists to allow userspace programs in Linux to allocate kernel
19  * memory that will later be shared with another domain.  Without this device,
20  * Linux userspace programs cannot create grant references.
21  *
22  * How this stuff works:
23  *   X -> granting a page to Y
24  *   Y -> mapping the grant from X
25  *
26  *   1. X uses the gntalloc device to allocate a page of kernel memory, P.
27  *   2. X creates an entry in the grant table that says domid(Y) can access P.
28  *      This is done without a hypercall unless the grant table needs expansion.
29  *   3. X gives the grant reference identifier, GREF, to Y.
30  *   4. Y maps the page, either directly into kernel memory for use in a backend
31  *      driver, or via a the gntdev device to map into the address space of an
32  *      application running in Y. This is the first point at which Xen does any
33  *      tracking of the page.
34  *   5. A program in X mmap()s a segment of the gntalloc device that corresponds
35  *      to the shared page, and can now communicate with Y over the shared page.
36  *
37  *
38  * NOTE TO USERSPACE LIBRARIES:
39  *   The grant allocation and mmap()ing are, naturally, two separate operations.
40  *   You set up the sharing by calling the create ioctl() and then the mmap().
41  *   Teardown requires munmap() and either close() or ioctl().
42  *
43  * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant
44  * reference, this device can be used to consume kernel memory by leaving grant
45  * references mapped by another domain when an application exits. Therefore,
46  * there is a global limit on the number of pages that can be allocated. When
47  * all references to the page are unmapped, it will be freed during the next
48  * grant operation.
49  */
50 
51 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
52 
53 #include <linux/atomic.h>
54 #include <linux/module.h>
55 #include <linux/miscdevice.h>
56 #include <linux/kernel.h>
57 #include <linux/init.h>
58 #include <linux/slab.h>
59 #include <linux/fs.h>
60 #include <linux/device.h>
61 #include <linux/mm.h>
62 #include <linux/uaccess.h>
63 #include <linux/types.h>
64 #include <linux/list.h>
65 #include <linux/highmem.h>
66 
67 #include <xen/xen.h>
68 #include <xen/page.h>
69 #include <xen/grant_table.h>
70 #include <xen/gntalloc.h>
71 #include <xen/events.h>
72 
73 static int limit = 1024;
74 module_param(limit, int, 0644);
75 MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by "
76 		"the gntalloc device");
77 
78 static LIST_HEAD(gref_list);
79 static DEFINE_MUTEX(gref_mutex);
80 static int gref_size;
81 
82 struct notify_info {
83 	uint16_t pgoff:12;    /* Bits 0-11: Offset of the byte to clear */
84 	uint16_t flags:2;     /* Bits 12-13: Unmap notification flags */
85 	int event;            /* Port (event channel) to notify */
86 };
87 
88 /* Metadata on a grant reference. */
89 struct gntalloc_gref {
90 	struct list_head next_gref;  /* list entry gref_list */
91 	struct list_head next_file;  /* list entry file->list, if open */
92 	struct page *page;	     /* The shared page */
93 	uint64_t file_index;         /* File offset for mmap() */
94 	unsigned int users;          /* Use count - when zero, waiting on Xen */
95 	grant_ref_t gref_id;         /* The grant reference number */
96 	struct notify_info notify;   /* Unmap notification */
97 };
98 
99 struct gntalloc_file_private_data {
100 	struct list_head list;
101 	uint64_t index;
102 };
103 
104 struct gntalloc_vma_private_data {
105 	struct gntalloc_gref *gref;
106 	int users;
107 	int count;
108 };
109 
110 static void __del_gref(struct gntalloc_gref *gref);
111 
112 static void do_cleanup(void)
113 {
114 	struct gntalloc_gref *gref, *n;
115 	list_for_each_entry_safe(gref, n, &gref_list, next_gref) {
116 		if (!gref->users)
117 			__del_gref(gref);
118 	}
119 }
120 
121 static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
122 	uint32_t *gref_ids, struct gntalloc_file_private_data *priv)
123 {
124 	int i, rc, readonly;
125 	LIST_HEAD(queue_gref);
126 	LIST_HEAD(queue_file);
127 	struct gntalloc_gref *gref, *next;
128 
129 	readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE);
130 	for (i = 0; i < op->count; i++) {
131 		gref = kzalloc(sizeof(*gref), GFP_KERNEL);
132 		if (!gref) {
133 			rc = -ENOMEM;
134 			goto undo;
135 		}
136 		list_add_tail(&gref->next_gref, &queue_gref);
137 		list_add_tail(&gref->next_file, &queue_file);
138 		gref->users = 1;
139 		gref->file_index = op->index + i * PAGE_SIZE;
140 		gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
141 		if (!gref->page) {
142 			rc = -ENOMEM;
143 			goto undo;
144 		}
145 
146 		/* Grant foreign access to the page. */
147 		rc = gnttab_grant_foreign_access(op->domid,
148 						 xen_page_to_gfn(gref->page),
149 						 readonly);
150 		if (rc < 0)
151 			goto undo;
152 		gref_ids[i] = gref->gref_id = rc;
153 	}
154 
155 	/* Add to gref lists. */
156 	mutex_lock(&gref_mutex);
157 	list_splice_tail(&queue_gref, &gref_list);
158 	list_splice_tail(&queue_file, &priv->list);
159 	mutex_unlock(&gref_mutex);
160 
161 	return 0;
162 
163 undo:
164 	mutex_lock(&gref_mutex);
165 	gref_size -= (op->count - i);
166 
167 	list_for_each_entry_safe(gref, next, &queue_file, next_file) {
168 		list_del(&gref->next_file);
169 		__del_gref(gref);
170 	}
171 
172 	/* It's possible for the target domain to map the just-allocated grant
173 	 * references by blindly guessing their IDs; if this is done, then
174 	 * __del_gref will leave them in the queue_gref list. They need to be
175 	 * added to the global list so that we can free them when they are no
176 	 * longer referenced.
177 	 */
178 	if (unlikely(!list_empty(&queue_gref)))
179 		list_splice_tail(&queue_gref, &gref_list);
180 	mutex_unlock(&gref_mutex);
181 	return rc;
182 }
183 
184 static void __del_gref(struct gntalloc_gref *gref)
185 {
186 	if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
187 		uint8_t *tmp = kmap(gref->page);
188 		tmp[gref->notify.pgoff] = 0;
189 		kunmap(gref->page);
190 	}
191 	if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
192 		notify_remote_via_evtchn(gref->notify.event);
193 		evtchn_put(gref->notify.event);
194 	}
195 
196 	gref->notify.flags = 0;
197 
198 	if (gref->gref_id) {
199 		if (gnttab_query_foreign_access(gref->gref_id))
200 			return;
201 
202 		if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
203 			return;
204 
205 		gnttab_free_grant_reference(gref->gref_id);
206 	}
207 
208 	gref_size--;
209 	list_del(&gref->next_gref);
210 
211 	if (gref->page)
212 		__free_page(gref->page);
213 
214 	kfree(gref);
215 }
216 
217 /* finds contiguous grant references in a file, returns the first */
218 static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv,
219 		uint64_t index, uint32_t count)
220 {
221 	struct gntalloc_gref *rv = NULL, *gref;
222 	list_for_each_entry(gref, &priv->list, next_file) {
223 		if (gref->file_index == index && !rv)
224 			rv = gref;
225 		if (rv) {
226 			if (gref->file_index != index)
227 				return NULL;
228 			index += PAGE_SIZE;
229 			count--;
230 			if (count == 0)
231 				return rv;
232 		}
233 	}
234 	return NULL;
235 }
236 
237 /*
238  * -------------------------------------
239  *  File operations.
240  * -------------------------------------
241  */
242 static int gntalloc_open(struct inode *inode, struct file *filp)
243 {
244 	struct gntalloc_file_private_data *priv;
245 
246 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
247 	if (!priv)
248 		goto out_nomem;
249 	INIT_LIST_HEAD(&priv->list);
250 
251 	filp->private_data = priv;
252 
253 	pr_debug("%s: priv %p\n", __func__, priv);
254 
255 	return 0;
256 
257 out_nomem:
258 	return -ENOMEM;
259 }
260 
261 static int gntalloc_release(struct inode *inode, struct file *filp)
262 {
263 	struct gntalloc_file_private_data *priv = filp->private_data;
264 	struct gntalloc_gref *gref;
265 
266 	pr_debug("%s: priv %p\n", __func__, priv);
267 
268 	mutex_lock(&gref_mutex);
269 	while (!list_empty(&priv->list)) {
270 		gref = list_entry(priv->list.next,
271 			struct gntalloc_gref, next_file);
272 		list_del(&gref->next_file);
273 		gref->users--;
274 		if (gref->users == 0)
275 			__del_gref(gref);
276 	}
277 	kfree(priv);
278 	mutex_unlock(&gref_mutex);
279 
280 	return 0;
281 }
282 
283 static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
284 		struct ioctl_gntalloc_alloc_gref __user *arg)
285 {
286 	int rc = 0;
287 	struct ioctl_gntalloc_alloc_gref op;
288 	uint32_t *gref_ids;
289 
290 	pr_debug("%s: priv %p\n", __func__, priv);
291 
292 	if (copy_from_user(&op, arg, sizeof(op))) {
293 		rc = -EFAULT;
294 		goto out;
295 	}
296 
297 	gref_ids = kcalloc(op.count, sizeof(gref_ids[0]), GFP_KERNEL);
298 	if (!gref_ids) {
299 		rc = -ENOMEM;
300 		goto out;
301 	}
302 
303 	mutex_lock(&gref_mutex);
304 	/* Clean up pages that were at zero (local) users but were still mapped
305 	 * by remote domains. Since those pages count towards the limit that we
306 	 * are about to enforce, removing them here is a good idea.
307 	 */
308 	do_cleanup();
309 	if (gref_size + op.count > limit) {
310 		mutex_unlock(&gref_mutex);
311 		rc = -ENOSPC;
312 		goto out_free;
313 	}
314 	gref_size += op.count;
315 	op.index = priv->index;
316 	priv->index += op.count * PAGE_SIZE;
317 	mutex_unlock(&gref_mutex);
318 
319 	rc = add_grefs(&op, gref_ids, priv);
320 	if (rc < 0)
321 		goto out_free;
322 
323 	/* Once we finish add_grefs, it is unsafe to touch the new reference,
324 	 * since it is possible for a concurrent ioctl to remove it (by guessing
325 	 * its index). If the userspace application doesn't provide valid memory
326 	 * to write the IDs to, then it will need to close the file in order to
327 	 * release - which it will do by segfaulting when it tries to access the
328 	 * IDs to close them.
329 	 */
330 	if (copy_to_user(arg, &op, sizeof(op))) {
331 		rc = -EFAULT;
332 		goto out_free;
333 	}
334 	if (copy_to_user(arg->gref_ids, gref_ids,
335 			sizeof(gref_ids[0]) * op.count)) {
336 		rc = -EFAULT;
337 		goto out_free;
338 	}
339 
340 out_free:
341 	kfree(gref_ids);
342 out:
343 	return rc;
344 }
345 
346 static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
347 		void __user *arg)
348 {
349 	int i, rc = 0;
350 	struct ioctl_gntalloc_dealloc_gref op;
351 	struct gntalloc_gref *gref, *n;
352 
353 	pr_debug("%s: priv %p\n", __func__, priv);
354 
355 	if (copy_from_user(&op, arg, sizeof(op))) {
356 		rc = -EFAULT;
357 		goto dealloc_grant_out;
358 	}
359 
360 	mutex_lock(&gref_mutex);
361 	gref = find_grefs(priv, op.index, op.count);
362 	if (gref) {
363 		/* Remove from the file list only, and decrease reference count.
364 		 * The later call to do_cleanup() will remove from gref_list and
365 		 * free the memory if the pages aren't mapped anywhere.
366 		 */
367 		for (i = 0; i < op.count; i++) {
368 			n = list_entry(gref->next_file.next,
369 				struct gntalloc_gref, next_file);
370 			list_del(&gref->next_file);
371 			gref->users--;
372 			gref = n;
373 		}
374 	} else {
375 		rc = -EINVAL;
376 	}
377 
378 	do_cleanup();
379 
380 	mutex_unlock(&gref_mutex);
381 dealloc_grant_out:
382 	return rc;
383 }
384 
385 static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv,
386 		void __user *arg)
387 {
388 	struct ioctl_gntalloc_unmap_notify op;
389 	struct gntalloc_gref *gref;
390 	uint64_t index;
391 	int pgoff;
392 	int rc;
393 
394 	if (copy_from_user(&op, arg, sizeof(op)))
395 		return -EFAULT;
396 
397 	index = op.index & ~(PAGE_SIZE - 1);
398 	pgoff = op.index & (PAGE_SIZE - 1);
399 
400 	mutex_lock(&gref_mutex);
401 
402 	gref = find_grefs(priv, index, 1);
403 	if (!gref) {
404 		rc = -ENOENT;
405 		goto unlock_out;
406 	}
407 
408 	if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) {
409 		rc = -EINVAL;
410 		goto unlock_out;
411 	}
412 
413 	/* We need to grab a reference to the event channel we are going to use
414 	 * to send the notify before releasing the reference we may already have
415 	 * (if someone has called this ioctl twice). This is required so that
416 	 * it is possible to change the clear_byte part of the notification
417 	 * without disturbing the event channel part, which may now be the last
418 	 * reference to that event channel.
419 	 */
420 	if (op.action & UNMAP_NOTIFY_SEND_EVENT) {
421 		if (evtchn_get(op.event_channel_port)) {
422 			rc = -EINVAL;
423 			goto unlock_out;
424 		}
425 	}
426 
427 	if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT)
428 		evtchn_put(gref->notify.event);
429 
430 	gref->notify.flags = op.action;
431 	gref->notify.pgoff = pgoff;
432 	gref->notify.event = op.event_channel_port;
433 	rc = 0;
434 
435  unlock_out:
436 	mutex_unlock(&gref_mutex);
437 	return rc;
438 }
439 
440 static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
441 		unsigned long arg)
442 {
443 	struct gntalloc_file_private_data *priv = filp->private_data;
444 
445 	switch (cmd) {
446 	case IOCTL_GNTALLOC_ALLOC_GREF:
447 		return gntalloc_ioctl_alloc(priv, (void __user *)arg);
448 
449 	case IOCTL_GNTALLOC_DEALLOC_GREF:
450 		return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
451 
452 	case IOCTL_GNTALLOC_SET_UNMAP_NOTIFY:
453 		return gntalloc_ioctl_unmap_notify(priv, (void __user *)arg);
454 
455 	default:
456 		return -ENOIOCTLCMD;
457 	}
458 
459 	return 0;
460 }
461 
462 static void gntalloc_vma_open(struct vm_area_struct *vma)
463 {
464 	struct gntalloc_vma_private_data *priv = vma->vm_private_data;
465 
466 	if (!priv)
467 		return;
468 
469 	mutex_lock(&gref_mutex);
470 	priv->users++;
471 	mutex_unlock(&gref_mutex);
472 }
473 
474 static void gntalloc_vma_close(struct vm_area_struct *vma)
475 {
476 	struct gntalloc_vma_private_data *priv = vma->vm_private_data;
477 	struct gntalloc_gref *gref, *next;
478 	int i;
479 
480 	if (!priv)
481 		return;
482 
483 	mutex_lock(&gref_mutex);
484 	priv->users--;
485 	if (priv->users == 0) {
486 		gref = priv->gref;
487 		for (i = 0; i < priv->count; i++) {
488 			gref->users--;
489 			next = list_entry(gref->next_gref.next,
490 					  struct gntalloc_gref, next_gref);
491 			if (gref->users == 0)
492 				__del_gref(gref);
493 			gref = next;
494 		}
495 		kfree(priv);
496 	}
497 	mutex_unlock(&gref_mutex);
498 }
499 
500 static const struct vm_operations_struct gntalloc_vmops = {
501 	.open = gntalloc_vma_open,
502 	.close = gntalloc_vma_close,
503 };
504 
505 static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
506 {
507 	struct gntalloc_file_private_data *priv = filp->private_data;
508 	struct gntalloc_vma_private_data *vm_priv;
509 	struct gntalloc_gref *gref;
510 	int count = vma_pages(vma);
511 	int rv, i;
512 
513 	if (!(vma->vm_flags & VM_SHARED)) {
514 		pr_err("%s: Mapping must be shared\n", __func__);
515 		return -EINVAL;
516 	}
517 
518 	vm_priv = kmalloc(sizeof(*vm_priv), GFP_KERNEL);
519 	if (!vm_priv)
520 		return -ENOMEM;
521 
522 	mutex_lock(&gref_mutex);
523 
524 	pr_debug("%s: priv %p,%p, page %lu+%d\n", __func__,
525 		       priv, vm_priv, vma->vm_pgoff, count);
526 
527 	gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count);
528 	if (gref == NULL) {
529 		rv = -ENOENT;
530 		pr_debug("%s: Could not find grant reference",
531 				__func__);
532 		kfree(vm_priv);
533 		goto out_unlock;
534 	}
535 
536 	vm_priv->gref = gref;
537 	vm_priv->users = 1;
538 	vm_priv->count = count;
539 
540 	vma->vm_private_data = vm_priv;
541 
542 	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
543 
544 	vma->vm_ops = &gntalloc_vmops;
545 
546 	for (i = 0; i < count; i++) {
547 		gref->users++;
548 		rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
549 				gref->page);
550 		if (rv)
551 			goto out_unlock;
552 
553 		gref = list_entry(gref->next_file.next,
554 				struct gntalloc_gref, next_file);
555 	}
556 	rv = 0;
557 
558 out_unlock:
559 	mutex_unlock(&gref_mutex);
560 	return rv;
561 }
562 
563 static const struct file_operations gntalloc_fops = {
564 	.owner = THIS_MODULE,
565 	.open = gntalloc_open,
566 	.release = gntalloc_release,
567 	.unlocked_ioctl = gntalloc_ioctl,
568 	.mmap = gntalloc_mmap
569 };
570 
571 /*
572  * -------------------------------------
573  * Module creation/destruction.
574  * -------------------------------------
575  */
576 static struct miscdevice gntalloc_miscdev = {
577 	.minor	= MISC_DYNAMIC_MINOR,
578 	.name	= "xen/gntalloc",
579 	.fops	= &gntalloc_fops,
580 };
581 
582 static int __init gntalloc_init(void)
583 {
584 	int err;
585 
586 	if (!xen_domain())
587 		return -ENODEV;
588 
589 	err = misc_register(&gntalloc_miscdev);
590 	if (err != 0) {
591 		pr_err("Could not register misc gntalloc device\n");
592 		return err;
593 	}
594 
595 	pr_debug("Created grant allocation device at %d,%d\n",
596 			MISC_MAJOR, gntalloc_miscdev.minor);
597 
598 	return 0;
599 }
600 
601 static void __exit gntalloc_exit(void)
602 {
603 	misc_deregister(&gntalloc_miscdev);
604 }
605 
606 module_init(gntalloc_init);
607 module_exit(gntalloc_exit);
608 
609 MODULE_LICENSE("GPL");
610 MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, "
611 		"Daniel De Graaf <dgdegra@tycho.nsa.gov>");
612 MODULE_DESCRIPTION("User-space grant reference allocator driver");
613