xref: /freebsd/sys/amd64/sgx/sgx.c (revision 92f340d137ba5d6db7610ba1dae35842e2c9c8ea)
1 /*-
2  * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
3  * All rights reserved.
4  *
5  * This software was developed by BAE Systems, the University of Cambridge
6  * Computer Laboratory, and Memorial University under DARPA/AFRL contract
7  * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
8  * (TC) research program.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  * Design overview.
34  *
35  * The driver provides character device for mmap(2) and ioctl(2) system calls
36  * allowing user to manage isolated compartments ("enclaves") in user VA space.
37  *
38  * The driver duties is EPC pages management, enclave management, user data
39  * validation.
40  *
41  * This driver requires Intel SGX support from hardware.
42  *
43  * /dev/sgx:
44  *    .mmap:
45  *        sgx_mmap_single() allocates VM object with following pager
46  *        operations:
47  *              a) sgx_pg_ctor():
48  *                  VM object constructor does nothing
49  *              b) sgx_pg_dtor():
50  *                  VM object destructor destroys the SGX enclave associated
51  *                  with the object: it frees all the EPC pages allocated for
52  *                  enclave and removes the enclave.
53  *              c) sgx_pg_fault():
54  *                  VM object fault handler does nothing
55  *
56  *    .ioctl:
57  *        sgx_ioctl():
58  *               a) SGX_IOC_ENCLAVE_CREATE
59  *                   Adds Enclave SECS page: initial step of enclave creation.
60  *               b) SGX_IOC_ENCLAVE_ADD_PAGE
61  *                   Adds TCS, REG pages to the enclave.
62  *               c) SGX_IOC_ENCLAVE_INIT
63  *                   Finalizes enclave creation.
64  *
65  * Enclave lifecycle:
66  *          .-- ECREATE  -- Add SECS page
67  *   Kernel |   EADD     -- Add TCS, REG pages
68  *    space |   EEXTEND  -- Measure the page (take unique hash)
69  *    ENCLS |   EPA      -- Allocate version array page
70  *          '-- EINIT    -- Finalize enclave creation
71  *   User   .-- EENTER   -- Go to entry point of enclave
72  *    space |   EEXIT    -- Exit back to main application
73  *    ENCLU '-- ERESUME  -- Resume enclave execution (e.g. after exception)
74  *
75  * Enclave lifecycle from driver point of view:
76  *  1) User calls mmap() on /dev/sgx: we allocate a VM object
77  *  2) User calls ioctl SGX_IOC_ENCLAVE_CREATE: we look for the VM object
78  *     associated with user process created on step 1, create SECS physical
79  *     page and store it in enclave's VM object queue by special index
80  *     SGX_SECS_VM_OBJECT_INDEX.
81  *  3) User calls ioctl SGX_IOC_ENCLAVE_ADD_PAGE: we look for enclave created
82  *     on step 2, create TCS or REG physical page and map it to specified by
83  *     user address of enclave VM object.
84  *  4) User finalizes enclave creation with ioctl SGX_IOC_ENCLAVE_INIT call.
85  *  5) User can freely enter to and exit from enclave using ENCLU instructions
86  *     from userspace: the driver does nothing here.
87  *  6) User proceed munmap(2) system call (or the process with enclave dies):
88  *     we destroy the enclave associated with the object.
89  *
90  * EPC page types and their indexes in VM object queue:
91  *   - PT_SECS index is special and equals SGX_SECS_VM_OBJECT_INDEX (-1);
92  *   - PT_TCS and PT_REG indexes are specified by user in addr field of ioctl
93  *     request data and determined as follows:
94  *       pidx = OFF_TO_IDX(addp->addr - vmh->base);
95  *   - PT_VA index is special, created for PT_REG, PT_TCS and PT_SECS pages
96  *     and determined by formula:
97  *       va_page_idx = - SGX_VA_PAGES_OFFS - (page_idx / SGX_VA_PAGE_SLOTS);
98  *     PT_VA page can hold versions of up to 512 pages, and slot for each
99  *     page in PT_VA page is determined as follows:
100  *       va_slot_idx = page_idx % SGX_VA_PAGE_SLOTS;
101  *   - PT_TRIM is unused.
102  *
103  * Locking:
104  *    SGX ENCLS set of instructions have limitations on concurrency:
105  *    some instructions can't be executed same time on different CPUs.
106  *    We use sc->mtx_encls lock around them to prevent concurrent execution.
107  *    sc->mtx lock is used to manage list of created enclaves and the state of
108  *    SGX driver.
109  *
110  * Eviction of EPC pages:
111  *    Eviction support is not implemented in this driver, however the driver
112  *    manages VA (version array) pages: it allocates a VA slot for each EPC
113  *    page. This will be required for eviction support in future.
114  *    VA pages and slots are currently unused.
115  *
116  * Intel® 64 and IA-32 Architectures Software Developer's Manual
117  * https://software.intel.com/en-us/articles/intel-sdm
118  */
119 
120 #include <sys/param.h>
121 #include <sys/systm.h>
122 #include <sys/ioccom.h>
123 #include <sys/malloc.h>
124 #include <sys/kernel.h>
125 #include <sys/lock.h>
126 #include <sys/mutex.h>
127 #include <sys/rwlock.h>
128 #include <sys/conf.h>
129 #include <sys/module.h>
130 #include <sys/proc.h>
131 #include <sys/vmem.h>
132 #include <sys/vmmeter.h>
133 
134 #include <vm/vm.h>
135 #include <vm/vm_param.h>
136 #include <vm/vm_extern.h>
137 #include <vm/vm_kern.h>
138 #include <vm/vm_page.h>
139 #include <vm/vm_map.h>
140 #include <vm/vm_object.h>
141 #include <vm/vm_pager.h>
142 #include <vm/vm_phys.h>
143 #include <vm/vm_radix.h>
144 #include <vm/pmap.h>
145 
146 #include <machine/md_var.h>
147 #include <machine/specialreg.h>
148 #include <machine/cpufunc.h>
149 #include <machine/sgx.h>
150 #include <machine/sgxreg.h>
151 
152 #include <amd64/sgx/sgxvar.h>
153 
154 #define	SGX_DEBUG
155 #undef	SGX_DEBUG
156 
157 #ifdef	SGX_DEBUG
158 #define	dprintf(fmt, ...)	printf(fmt, ##__VA_ARGS__)
159 #else
160 #define	dprintf(fmt, ...)
161 #endif
162 
163 static struct cdev_pager_ops sgx_pg_ops;
164 struct sgx_softc sgx_sc;
165 
166 static int
167 sgx_get_epc_page(struct sgx_softc *sc, struct epc_page **epc)
168 {
169 	vmem_addr_t addr;
170 	int i;
171 
172 	if (vmem_alloc(sc->vmem_epc, PAGE_SIZE, M_FIRSTFIT | M_NOWAIT,
173 	    &addr) == 0) {
174 		i = (addr - sc->epc_base) / PAGE_SIZE;
175 		*epc = &sc->epc_pages[i];
176 		return (0);
177 	}
178 
179 	return (ENOMEM);
180 }
181 
182 static void
183 sgx_put_epc_page(struct sgx_softc *sc, struct epc_page *epc)
184 {
185 	vmem_addr_t addr;
186 
187 	if (epc == NULL)
188 		return;
189 
190 	addr = (epc->index * PAGE_SIZE) + sc->epc_base;
191 	vmem_free(sc->vmem_epc, addr, PAGE_SIZE);
192 }
193 
194 static int
195 sgx_va_slot_init_by_index(struct sgx_softc *sc, vm_object_t object,
196     uint64_t idx)
197 {
198 	struct epc_page *epc;
199 	vm_page_t page;
200 	vm_page_t p;
201 	int ret;
202 
203 	VM_OBJECT_ASSERT_WLOCKED(object);
204 
205 	p = vm_page_lookup(object, idx);
206 	if (p == NULL) {
207 		ret = sgx_get_epc_page(sc, &epc);
208 		if (ret) {
209 			dprintf("%s: No free EPC pages available.\n",
210 			    __func__);
211 			return (ret);
212 		}
213 
214 		mtx_lock(&sc->mtx_encls);
215 		sgx_epa((void *)epc->base);
216 		mtx_unlock(&sc->mtx_encls);
217 
218 		page = PHYS_TO_VM_PAGE(epc->phys);
219 
220 		page->valid = VM_PAGE_BITS_ALL;
221 		vm_page_insert(page, object, idx);
222 	}
223 
224 	return (0);
225 }
226 
227 static int
228 sgx_va_slot_init(struct sgx_softc *sc,
229     struct sgx_enclave *enclave,
230     uint64_t addr)
231 {
232 	vm_pindex_t pidx;
233 	uint64_t va_page_idx;
234 	uint64_t idx;
235 	vm_object_t object;
236 	int ret;
237 
238 	object = enclave->object;
239 
240 	VM_OBJECT_ASSERT_WLOCKED(object);
241 
242 	pidx = OFF_TO_IDX(addr);
243 
244 	va_page_idx = pidx / SGX_VA_PAGE_SLOTS;
245 	idx = - SGX_VA_PAGES_OFFS - va_page_idx;
246 
247 	ret = sgx_va_slot_init_by_index(sc, object, idx);
248 
249 	return (ret);
250 }
251 
252 static int
253 sgx_mem_find(struct sgx_softc *sc, uint64_t addr,
254     vm_map_entry_t *entry0, vm_object_t *object0)
255 {
256 	vm_map_t map;
257 	vm_map_entry_t entry;
258 	vm_object_t object;
259 
260 	map = &curproc->p_vmspace->vm_map;
261 
262 	vm_map_lock_read(map);
263 	if (!vm_map_lookup_entry(map, addr, &entry)) {
264 		vm_map_unlock_read(map);
265 		dprintf("%s: Can't find enclave.\n", __func__);
266 		return (EINVAL);
267 	}
268 
269 	object = entry->object.vm_object;
270 	if (object == NULL || object->handle == NULL) {
271 		vm_map_unlock_read(map);
272 		return (EINVAL);
273 	}
274 
275 	if (object->type != OBJT_MGTDEVICE ||
276 	    object->un_pager.devp.ops != &sgx_pg_ops) {
277 		vm_map_unlock_read(map);
278 		return (EINVAL);
279 	}
280 
281 	vm_object_reference(object);
282 
283 	*object0 = object;
284 	*entry0 = entry;
285 	vm_map_unlock_read(map);
286 
287 	return (0);
288 }
289 
290 static int
291 sgx_enclave_find(struct sgx_softc *sc, uint64_t addr,
292     struct sgx_enclave **encl)
293 {
294 	struct sgx_vm_handle *vmh;
295 	struct sgx_enclave *enclave;
296 	vm_map_entry_t entry;
297 	vm_object_t object;
298 	int ret;
299 
300 	ret = sgx_mem_find(sc, addr, &entry, &object);
301 	if (ret)
302 		return (ret);
303 
304 	vmh = object->handle;
305 	if (vmh == NULL) {
306 		vm_object_deallocate(object);
307 		return (EINVAL);
308 	}
309 
310 	enclave = vmh->enclave;
311 	if (enclave == NULL || enclave->object == NULL) {
312 		vm_object_deallocate(object);
313 		return (EINVAL);
314 	}
315 
316 	*encl = enclave;
317 
318 	return (0);
319 }
320 
321 static int
322 sgx_enclave_alloc(struct sgx_softc *sc, struct secs *secs,
323     struct sgx_enclave **enclave0)
324 {
325 	struct sgx_enclave *enclave;
326 
327 	enclave = malloc(sizeof(struct sgx_enclave),
328 	    M_SGX, M_WAITOK | M_ZERO);
329 
330 	enclave->base = secs->base;
331 	enclave->size = secs->size;
332 
333 	*enclave0 = enclave;
334 
335 	return (0);
336 }
337 
338 static void
339 sgx_epc_page_remove(struct sgx_softc *sc,
340     struct epc_page *epc)
341 {
342 
343 	mtx_lock(&sc->mtx_encls);
344 	sgx_eremove((void *)epc->base);
345 	mtx_unlock(&sc->mtx_encls);
346 }
347 
348 static void
349 sgx_page_remove(struct sgx_softc *sc, vm_page_t p,
350     struct pctrie_iter *pages)
351 {
352 	struct epc_page *epc;
353 	vm_paddr_t pa;
354 	uint64_t offs;
355 
356 	if (pages != NULL)
357 		(void)vm_page_iter_remove(pages, p);
358 	else
359 		(void) vm_page_remove(p);
360 
361 	dprintf("%s: p->pidx %ld\n", __func__, p->pindex);
362 
363 	pa = VM_PAGE_TO_PHYS(p);
364 	epc = &sc->epc_pages[0];
365 	offs = (pa - epc->phys) / PAGE_SIZE;
366 	epc = &sc->epc_pages[offs];
367 
368 	sgx_epc_page_remove(sc, epc);
369 	sgx_put_epc_page(sc, epc);
370 }
371 
372 static void
373 sgx_enclave_remove(struct sgx_softc *sc,
374     struct sgx_enclave *enclave)
375 {
376 	struct pctrie_iter pages;
377 	vm_object_t object;
378 	vm_page_t p, p_secs;
379 
380 	mtx_lock(&sc->mtx);
381 	TAILQ_REMOVE(&sc->enclaves, enclave, next);
382 	mtx_unlock(&sc->mtx);
383 
384 	object = enclave->object;
385 
386 	vm_page_iter_init(&pages, object);
387 	VM_OBJECT_WLOCK(object);
388 
389 	/*
390 	 * First remove all the pages except SECS,
391 	 * then remove SECS page.
392 	 */
393 restart:
394 	VM_RADIX_FOREACH(p, &pages) {
395 		if (p->pindex == SGX_SECS_VM_OBJECT_INDEX)
396 			continue;
397 		if (vm_page_busy_acquire(p, VM_ALLOC_WAITFAIL) == 0) {
398 			pctrie_iter_reset(&pages);
399 			goto restart;
400 		}
401 		sgx_page_remove(sc, p, &pages);
402 	}
403 	p_secs = vm_page_grab(object, SGX_SECS_VM_OBJECT_INDEX,
404 	    VM_ALLOC_NOCREAT);
405 	/* Now remove SECS page */
406 	if (p_secs != NULL)
407 		sgx_page_remove(sc, p_secs, NULL);
408 
409 	KASSERT(object->resident_page_count == 0, ("count"));
410 
411 	VM_OBJECT_WUNLOCK(object);
412 }
413 
414 static int
415 sgx_measure_page(struct sgx_softc *sc, struct epc_page *secs,
416     struct epc_page *epc, uint16_t mrmask)
417 {
418 	int i, j;
419 	int ret;
420 
421 	mtx_lock(&sc->mtx_encls);
422 
423 	for (i = 0, j = 1; i < PAGE_SIZE; i += 0x100, j <<= 1) {
424 		if (!(j & mrmask))
425 			continue;
426 
427 		ret = sgx_eextend((void *)secs->base,
428 		    (void *)(epc->base + i));
429 		if (ret == SGX_EFAULT) {
430 			mtx_unlock(&sc->mtx_encls);
431 			return (ret);
432 		}
433 	}
434 
435 	mtx_unlock(&sc->mtx_encls);
436 
437 	return (0);
438 }
439 
440 static int
441 sgx_secs_validate(struct sgx_softc *sc, struct secs *secs)
442 {
443 	struct secs_attr *attr;
444 	int i;
445 
446 	if (secs->size == 0)
447 		return (EINVAL);
448 
449 	/* BASEADDR must be naturally aligned on an SECS.SIZE boundary. */
450 	if (secs->base & (secs->size - 1))
451 		return (EINVAL);
452 
453 	/* SECS.SIZE must be at least 2 pages. */
454 	if (secs->size < 2 * PAGE_SIZE)
455 		return (EINVAL);
456 
457 	if ((secs->size & (secs->size - 1)) != 0)
458 		return (EINVAL);
459 
460 	attr = &secs->attributes;
461 
462 	if (attr->reserved1 != 0 ||
463 	    attr->reserved2 != 0 ||
464 	    attr->reserved3 != 0)
465 		return (EINVAL);
466 
467 	for (i = 0; i < SECS_ATTR_RSV4_SIZE; i++)
468 		if (attr->reserved4[i])
469 			return (EINVAL);
470 
471 	/*
472 	 * Intel® Software Guard Extensions Programming Reference
473 	 * 6.7.2 Relevant Fields in Various Data Structures
474 	 * 6.7.2.1 SECS.ATTRIBUTES.XFRM
475 	 * XFRM[1:0] must be set to 0x3.
476 	 */
477 	if ((attr->xfrm & 0x3) != 0x3)
478 		return (EINVAL);
479 
480 	if (!attr->mode64bit)
481 		return (EINVAL);
482 
483 	if (secs->size > sc->enclave_size_max)
484 		return (EINVAL);
485 
486 	for (i = 0; i < SECS_RSV1_SIZE; i++)
487 		if (secs->reserved1[i])
488 			return (EINVAL);
489 
490 	for (i = 0; i < SECS_RSV2_SIZE; i++)
491 		if (secs->reserved2[i])
492 			return (EINVAL);
493 
494 	for (i = 0; i < SECS_RSV3_SIZE; i++)
495 		if (secs->reserved3[i])
496 			return (EINVAL);
497 
498 	for (i = 0; i < SECS_RSV4_SIZE; i++)
499 		if (secs->reserved4[i])
500 			return (EINVAL);
501 
502 	return (0);
503 }
504 
505 static int
506 sgx_tcs_validate(struct tcs *tcs)
507 {
508 	int i;
509 
510 	if ((tcs->flags) ||
511 	    (tcs->ossa & (PAGE_SIZE - 1)) ||
512 	    (tcs->ofsbasgx & (PAGE_SIZE - 1)) ||
513 	    (tcs->ogsbasgx & (PAGE_SIZE - 1)) ||
514 	    ((tcs->fslimit & 0xfff) != 0xfff) ||
515 	    ((tcs->gslimit & 0xfff) != 0xfff))
516 		return (EINVAL);
517 
518 	for (i = 0; i < nitems(tcs->reserved3); i++)
519 		if (tcs->reserved3[i])
520 			return (EINVAL);
521 
522 	return (0);
523 }
524 
525 static void
526 sgx_tcs_dump(struct sgx_softc *sc, struct tcs *t)
527 {
528 
529 	dprintf("t->flags %lx\n", t->flags);
530 	dprintf("t->ossa %lx\n", t->ossa);
531 	dprintf("t->cssa %x\n", t->cssa);
532 	dprintf("t->nssa %x\n", t->nssa);
533 	dprintf("t->oentry %lx\n", t->oentry);
534 	dprintf("t->ofsbasgx %lx\n", t->ofsbasgx);
535 	dprintf("t->ogsbasgx %lx\n", t->ogsbasgx);
536 	dprintf("t->fslimit %x\n", t->fslimit);
537 	dprintf("t->gslimit %x\n", t->gslimit);
538 }
539 
540 static int
541 sgx_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
542     vm_ooffset_t foff, struct ucred *cred, u_short *color)
543 {
544 	struct sgx_vm_handle *vmh;
545 
546 	vmh = handle;
547 	if (vmh == NULL) {
548 		dprintf("%s: vmh not found.\n", __func__);
549 		return (0);
550 	}
551 
552 	dprintf("%s: vmh->base %lx foff 0x%lx size 0x%lx\n",
553 	    __func__, vmh->base, foff, size);
554 
555 	return (0);
556 }
557 
558 static void
559 sgx_pg_dtor(void *handle)
560 {
561 	struct sgx_vm_handle *vmh;
562 	struct sgx_softc *sc;
563 
564 	vmh = handle;
565 	if (vmh == NULL) {
566 		dprintf("%s: vmh not found.\n", __func__);
567 		return;
568 	}
569 
570 	sc = vmh->sc;
571 	if (sc == NULL) {
572 		dprintf("%s: sc is NULL\n", __func__);
573 		return;
574 	}
575 
576 	if (vmh->enclave == NULL) {
577 		dprintf("%s: Enclave not found.\n", __func__);
578 		return;
579 	}
580 
581 	sgx_enclave_remove(sc, vmh->enclave);
582 
583 	free(vmh->enclave, M_SGX);
584 	free(vmh, M_SGX);
585 }
586 
587 static int
588 sgx_pg_fault(vm_object_t object, vm_ooffset_t offset,
589     int prot, vm_page_t *mres)
590 {
591 
592 	/*
593 	 * The purpose of this trivial handler is to handle the race
594 	 * when user tries to access mmaped region before or during
595 	 * enclave creation ioctl calls.
596 	 */
597 
598 	dprintf("%s: offset 0x%lx\n", __func__, offset);
599 
600 	return (VM_PAGER_FAIL);
601 }
602 
603 static void
604 sgx_pg_path(void *handle, char *path, size_t len)
605 {
606 	strlcpy(path, "sgx", len);
607 }
608 
609 static struct cdev_pager_ops sgx_pg_ops = {
610 	.cdev_pg_ctor = sgx_pg_ctor,
611 	.cdev_pg_dtor = sgx_pg_dtor,
612 	.cdev_pg_fault = sgx_pg_fault,
613 	.cdev_pg_path = sgx_pg_path,
614 };
615 
616 static void
617 sgx_insert_epc_page_by_index(vm_page_t page, vm_object_t object,
618     vm_pindex_t pidx)
619 {
620 
621 	VM_OBJECT_ASSERT_WLOCKED(object);
622 
623 	page->valid = VM_PAGE_BITS_ALL;
624 	vm_page_insert(page, object, pidx);
625 }
626 
627 static void
628 sgx_insert_epc_page(struct sgx_enclave *enclave,
629     struct epc_page *epc, uint64_t addr)
630 {
631 	vm_pindex_t pidx;
632 	vm_page_t page;
633 
634 	VM_OBJECT_ASSERT_WLOCKED(enclave->object);
635 
636 	pidx = OFF_TO_IDX(addr);
637 	page = PHYS_TO_VM_PAGE(epc->phys);
638 
639 	sgx_insert_epc_page_by_index(page, enclave->object, pidx);
640 }
641 
642 static int
643 sgx_ioctl_create(struct sgx_softc *sc, struct sgx_enclave_create *param)
644 {
645 	struct sgx_vm_handle *vmh;
646 	vm_map_entry_t entry;
647 	vm_page_t p;
648 	struct page_info pginfo;
649 	struct secinfo secinfo;
650 	struct sgx_enclave *enclave;
651 	struct epc_page *epc;
652 	struct secs *secs;
653 	vm_object_t object;
654 	vm_page_t page;
655 	int ret;
656 
657 	epc = NULL;
658 	secs = NULL;
659 	enclave = NULL;
660 	object = NULL;
661 
662 	/* SGX Enclave Control Structure (SECS) */
663 	secs = malloc(PAGE_SIZE, M_SGX, M_WAITOK | M_ZERO);
664 	ret = copyin((void *)param->src, secs, sizeof(struct secs));
665 	if (ret) {
666 		dprintf("%s: Can't copy SECS.\n", __func__);
667 		goto error;
668 	}
669 
670 	ret = sgx_secs_validate(sc, secs);
671 	if (ret) {
672 		dprintf("%s: SECS validation failed.\n", __func__);
673 		goto error;
674 	}
675 
676 	ret = sgx_mem_find(sc, secs->base, &entry, &object);
677 	if (ret) {
678 		dprintf("%s: Can't find vm_map.\n", __func__);
679 		goto error;
680 	}
681 
682 	vmh = object->handle;
683 	if (!vmh) {
684 		dprintf("%s: Can't find vmh.\n", __func__);
685 		ret = ENXIO;
686 		goto error;
687 	}
688 
689 	dprintf("%s: entry start %lx offset %lx\n",
690 	    __func__, entry->start, entry->offset);
691 	vmh->base = (entry->start - entry->offset);
692 
693 	ret = sgx_enclave_alloc(sc, secs, &enclave);
694 	if (ret) {
695 		dprintf("%s: Can't alloc enclave.\n", __func__);
696 		goto error;
697 	}
698 	enclave->object = object;
699 	enclave->vmh = vmh;
700 
701 	memset(&secinfo, 0, sizeof(struct secinfo));
702 	memset(&pginfo, 0, sizeof(struct page_info));
703 	pginfo.linaddr = 0;
704 	pginfo.srcpge = (uint64_t)secs;
705 	pginfo.secinfo = &secinfo;
706 	pginfo.secs = 0;
707 
708 	ret = sgx_get_epc_page(sc, &epc);
709 	if (ret) {
710 		dprintf("%s: Failed to get free epc page.\n", __func__);
711 		goto error;
712 	}
713 	enclave->secs_epc_page = epc;
714 
715 	VM_OBJECT_WLOCK(object);
716 	p = vm_page_lookup(object, SGX_SECS_VM_OBJECT_INDEX);
717 	if (p) {
718 		VM_OBJECT_WUNLOCK(object);
719 		/* SECS page already added. */
720 		ret = ENXIO;
721 		goto error;
722 	}
723 
724 	ret = sgx_va_slot_init_by_index(sc, object,
725 	    - SGX_VA_PAGES_OFFS - SGX_SECS_VM_OBJECT_INDEX);
726 	if (ret) {
727 		VM_OBJECT_WUNLOCK(object);
728 		dprintf("%s: Can't init va slot.\n", __func__);
729 		goto error;
730 	}
731 
732 	mtx_lock(&sc->mtx);
733 	if ((sc->state & SGX_STATE_RUNNING) == 0) {
734 		mtx_unlock(&sc->mtx);
735 		/* Remove VA page that was just created for SECS page. */
736 		p = vm_page_grab(enclave->object,
737 		    - SGX_VA_PAGES_OFFS - SGX_SECS_VM_OBJECT_INDEX,
738 		    VM_ALLOC_NOCREAT);
739 		sgx_page_remove(sc, p, NULL);
740 		VM_OBJECT_WUNLOCK(object);
741 		goto error;
742 	}
743 	mtx_lock(&sc->mtx_encls);
744 	ret = sgx_ecreate(&pginfo, (void *)epc->base);
745 	mtx_unlock(&sc->mtx_encls);
746 	if (ret == SGX_EFAULT) {
747 		dprintf("%s: gp fault\n", __func__);
748 		mtx_unlock(&sc->mtx);
749 		/* Remove VA page that was just created for SECS page. */
750 		p = vm_page_grab(enclave->object,
751 		    - SGX_VA_PAGES_OFFS - SGX_SECS_VM_OBJECT_INDEX,
752 		    VM_ALLOC_NOCREAT);
753 		sgx_page_remove(sc, p, NULL);
754 		VM_OBJECT_WUNLOCK(object);
755 		goto error;
756 	}
757 
758 	TAILQ_INSERT_TAIL(&sc->enclaves, enclave, next);
759 	mtx_unlock(&sc->mtx);
760 
761 	vmh->enclave = enclave;
762 
763 	page = PHYS_TO_VM_PAGE(epc->phys);
764 	sgx_insert_epc_page_by_index(page, enclave->object,
765 	    SGX_SECS_VM_OBJECT_INDEX);
766 
767 	VM_OBJECT_WUNLOCK(object);
768 
769 	/* Release the reference. */
770 	vm_object_deallocate(object);
771 
772 	free(secs, M_SGX);
773 
774 	return (0);
775 
776 error:
777 	free(secs, M_SGX);
778 	sgx_put_epc_page(sc, epc);
779 	free(enclave, M_SGX);
780 	vm_object_deallocate(object);
781 
782 	return (ret);
783 }
784 
785 static int
786 sgx_ioctl_add_page(struct sgx_softc *sc,
787     struct sgx_enclave_add_page *addp)
788 {
789 	struct epc_page *secs_epc_page;
790 	struct sgx_enclave *enclave;
791 	struct sgx_vm_handle *vmh;
792 	struct epc_page *epc;
793 	struct page_info pginfo;
794 	struct secinfo secinfo;
795 	vm_object_t object;
796 	void *tmp_vaddr;
797 	uint64_t page_type;
798 	struct tcs *t;
799 	uint64_t addr;
800 	uint64_t pidx;
801 	vm_page_t p;
802 	int ret;
803 
804 	tmp_vaddr = NULL;
805 	epc = NULL;
806 	object = NULL;
807 
808 	/* Find and get reference to VM object. */
809 	ret = sgx_enclave_find(sc, addp->addr, &enclave);
810 	if (ret) {
811 		dprintf("%s: Failed to find enclave.\n", __func__);
812 		goto error;
813 	}
814 
815 	object = enclave->object;
816 	KASSERT(object != NULL, ("vm object is NULL\n"));
817 	vmh = object->handle;
818 
819 	ret = sgx_get_epc_page(sc, &epc);
820 	if (ret) {
821 		dprintf("%s: Failed to get free epc page.\n", __func__);
822 		goto error;
823 	}
824 
825 	memset(&secinfo, 0, sizeof(struct secinfo));
826 	ret = copyin((void *)addp->secinfo, &secinfo,
827 	    sizeof(struct secinfo));
828 	if (ret) {
829 		dprintf("%s: Failed to copy secinfo.\n", __func__);
830 		goto error;
831 	}
832 
833 	tmp_vaddr = malloc(PAGE_SIZE, M_SGX, M_WAITOK | M_ZERO);
834 	ret = copyin((void *)addp->src, tmp_vaddr, PAGE_SIZE);
835 	if (ret) {
836 		dprintf("%s: Failed to copy page.\n", __func__);
837 		goto error;
838 	}
839 
840 	page_type = (secinfo.flags & SECINFO_FLAGS_PT_M) >>
841 	    SECINFO_FLAGS_PT_S;
842 	if (page_type != SGX_PT_TCS && page_type != SGX_PT_REG) {
843 		dprintf("%s: page can't be added.\n", __func__);
844 		goto error;
845 	}
846 	if (page_type == SGX_PT_TCS) {
847 		t = (struct tcs *)tmp_vaddr;
848 		ret = sgx_tcs_validate(t);
849 		if (ret) {
850 			dprintf("%s: TCS page validation failed.\n",
851 			    __func__);
852 			goto error;
853 		}
854 		sgx_tcs_dump(sc, t);
855 	}
856 
857 	addr = (addp->addr - vmh->base);
858 	pidx = OFF_TO_IDX(addr);
859 
860 	VM_OBJECT_WLOCK(object);
861 	p = vm_page_lookup(object, pidx);
862 	if (p) {
863 		VM_OBJECT_WUNLOCK(object);
864 		/* Page already added. */
865 		ret = ENXIO;
866 		goto error;
867 	}
868 
869 	ret = sgx_va_slot_init(sc, enclave, addr);
870 	if (ret) {
871 		VM_OBJECT_WUNLOCK(object);
872 		dprintf("%s: Can't init va slot.\n", __func__);
873 		goto error;
874 	}
875 
876 	secs_epc_page = enclave->secs_epc_page;
877 	memset(&pginfo, 0, sizeof(struct page_info));
878 	pginfo.linaddr = (uint64_t)addp->addr;
879 	pginfo.srcpge = (uint64_t)tmp_vaddr;
880 	pginfo.secinfo = &secinfo;
881 	pginfo.secs = (uint64_t)secs_epc_page->base;
882 
883 	mtx_lock(&sc->mtx_encls);
884 	ret = sgx_eadd(&pginfo, (void *)epc->base);
885 	if (ret == SGX_EFAULT) {
886 		dprintf("%s: gp fault on eadd\n", __func__);
887 		mtx_unlock(&sc->mtx_encls);
888 		VM_OBJECT_WUNLOCK(object);
889 		goto error;
890 	}
891 	mtx_unlock(&sc->mtx_encls);
892 
893 	ret = sgx_measure_page(sc, enclave->secs_epc_page, epc, addp->mrmask);
894 	if (ret == SGX_EFAULT) {
895 		dprintf("%s: gp fault on eextend\n", __func__);
896 		sgx_epc_page_remove(sc, epc);
897 		VM_OBJECT_WUNLOCK(object);
898 		goto error;
899 	}
900 
901 	sgx_insert_epc_page(enclave, epc, addr);
902 
903 	VM_OBJECT_WUNLOCK(object);
904 
905 	/* Release the reference. */
906 	vm_object_deallocate(object);
907 
908 	free(tmp_vaddr, M_SGX);
909 
910 	return (0);
911 
912 error:
913 	free(tmp_vaddr, M_SGX);
914 	sgx_put_epc_page(sc, epc);
915 	vm_object_deallocate(object);
916 
917 	return (ret);
918 }
919 
920 static int
921 sgx_ioctl_init(struct sgx_softc *sc, struct sgx_enclave_init *initp)
922 {
923 	struct epc_page *secs_epc_page;
924 	struct sgx_enclave *enclave;
925 	struct thread *td;
926 	void *tmp_vaddr;
927 	void *einittoken;
928 	void *sigstruct;
929 	vm_object_t object;
930 	int retry;
931 	int ret;
932 
933 	td = curthread;
934 	tmp_vaddr = NULL;
935 	object = NULL;
936 
937 	dprintf("%s: addr %lx, sigstruct %lx, einittoken %lx\n",
938 	    __func__, initp->addr, initp->sigstruct, initp->einittoken);
939 
940 	/* Find and get reference to VM object. */
941 	ret = sgx_enclave_find(sc, initp->addr, &enclave);
942 	if (ret) {
943 		dprintf("%s: Failed to find enclave.\n", __func__);
944 		goto error;
945 	}
946 
947 	object = enclave->object;
948 
949 	tmp_vaddr = malloc(PAGE_SIZE, M_SGX, M_WAITOK | M_ZERO);
950 	sigstruct = tmp_vaddr;
951 	einittoken = (void *)((uint64_t)sigstruct + PAGE_SIZE / 2);
952 
953 	ret = copyin((void *)initp->sigstruct, sigstruct,
954 	    SGX_SIGSTRUCT_SIZE);
955 	if (ret) {
956 		dprintf("%s: Failed to copy SIGSTRUCT page.\n", __func__);
957 		goto error;
958 	}
959 
960 	ret = copyin((void *)initp->einittoken, einittoken,
961 	    SGX_EINITTOKEN_SIZE);
962 	if (ret) {
963 		dprintf("%s: Failed to copy EINITTOKEN page.\n", __func__);
964 		goto error;
965 	}
966 
967 	secs_epc_page = enclave->secs_epc_page;
968 	retry = 16;
969 	do {
970 		mtx_lock(&sc->mtx_encls);
971 		ret = sgx_einit(sigstruct, (void *)secs_epc_page->base,
972 		    einittoken);
973 		mtx_unlock(&sc->mtx_encls);
974 		dprintf("%s: sgx_einit returned %d\n", __func__, ret);
975 	} while (ret == SGX_UNMASKED_EVENT && retry--);
976 
977 	if (ret) {
978 		dprintf("%s: Failed init enclave: %d\n", __func__, ret);
979 		td->td_retval[0] = ret;
980 		ret = 0;
981 	}
982 
983 error:
984 	free(tmp_vaddr, M_SGX);
985 
986 	/* Release the reference. */
987 	vm_object_deallocate(object);
988 
989 	return (ret);
990 }
991 
992 static int
993 sgx_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
994     struct thread *td)
995 {
996 	struct sgx_enclave_add_page *addp;
997 	struct sgx_enclave_create *param;
998 	struct sgx_enclave_init *initp;
999 	struct sgx_softc *sc;
1000 	int ret;
1001 	int len;
1002 
1003 	sc = &sgx_sc;
1004 
1005 	len = IOCPARM_LEN(cmd);
1006 
1007 	dprintf("%s: cmd %lx, addr %lx, len %d\n",
1008 	    __func__, cmd, (uint64_t)addr, len);
1009 
1010 	if (len > SGX_IOCTL_MAX_DATA_LEN)
1011 		return (EINVAL);
1012 
1013 	switch (cmd) {
1014 	case SGX_IOC_ENCLAVE_CREATE:
1015 		param = (struct sgx_enclave_create *)addr;
1016 		ret = sgx_ioctl_create(sc, param);
1017 		break;
1018 	case SGX_IOC_ENCLAVE_ADD_PAGE:
1019 		addp = (struct sgx_enclave_add_page *)addr;
1020 		ret = sgx_ioctl_add_page(sc, addp);
1021 		break;
1022 	case SGX_IOC_ENCLAVE_INIT:
1023 		initp = (struct sgx_enclave_init *)addr;
1024 		ret = sgx_ioctl_init(sc, initp);
1025 		break;
1026 	default:
1027 		return (EINVAL);
1028 	}
1029 
1030 	return (ret);
1031 }
1032 
1033 static int
1034 sgx_mmap_single(struct cdev *cdev, vm_ooffset_t *offset,
1035     vm_size_t mapsize, struct vm_object **objp, int nprot)
1036 {
1037 	struct sgx_vm_handle *vmh;
1038 	struct sgx_softc *sc;
1039 
1040 	sc = &sgx_sc;
1041 
1042 	dprintf("%s: mapsize 0x%lx, offset %lx\n",
1043 	    __func__, mapsize, *offset);
1044 
1045 	vmh = malloc(sizeof(struct sgx_vm_handle),
1046 	    M_SGX, M_WAITOK | M_ZERO);
1047 	vmh->sc = sc;
1048 	vmh->size = mapsize;
1049 	vmh->mem = cdev_pager_allocate(vmh, OBJT_MGTDEVICE, &sgx_pg_ops,
1050 	    mapsize, nprot, *offset, NULL);
1051 	if (vmh->mem == NULL) {
1052 		free(vmh, M_SGX);
1053 		return (ENOMEM);
1054 	}
1055 
1056 	VM_OBJECT_WLOCK(vmh->mem);
1057 	vm_object_set_flag(vmh->mem, OBJ_PG_DTOR);
1058 	VM_OBJECT_WUNLOCK(vmh->mem);
1059 
1060 	*objp = vmh->mem;
1061 
1062 	return (0);
1063 }
1064 
1065 static struct cdevsw sgx_cdevsw = {
1066 	.d_version =		D_VERSION,
1067 	.d_ioctl =		sgx_ioctl,
1068 	.d_mmap_single =	sgx_mmap_single,
1069 	.d_name =		"Intel SGX",
1070 };
1071 
1072 static int
1073 sgx_get_epc_area(struct sgx_softc *sc)
1074 {
1075 	vm_offset_t epc_base_vaddr;
1076 	u_int cp[4];
1077 	int error;
1078 	int i;
1079 
1080 	cpuid_count(SGX_CPUID, 0x2, cp);
1081 
1082 	sc->epc_base = ((uint64_t)(cp[1] & 0xfffff) << 32) +
1083 	    (cp[0] & 0xfffff000);
1084 	sc->epc_size = ((uint64_t)(cp[3] & 0xfffff) << 32) +
1085 	    (cp[2] & 0xfffff000);
1086 	sc->npages = sc->epc_size / SGX_PAGE_SIZE;
1087 
1088 	if (sc->epc_size == 0 || sc->epc_base == 0) {
1089 		printf("%s: Incorrect EPC data: EPC base %lx, size %lu\n",
1090 		    __func__, sc->epc_base, sc->epc_size);
1091 		return (EINVAL);
1092 	}
1093 
1094 	if (cp[3] & 0xffff)
1095 		sc->enclave_size_max = (1 << ((cp[3] >> 8) & 0xff));
1096 	else
1097 		sc->enclave_size_max = SGX_ENCL_SIZE_MAX_DEF;
1098 
1099 	epc_base_vaddr = (vm_offset_t)pmap_mapdev_attr(sc->epc_base,
1100 	    sc->epc_size, VM_MEMATTR_DEFAULT);
1101 
1102 	sc->epc_pages = malloc(sizeof(struct epc_page) * sc->npages,
1103 	    M_DEVBUF, M_WAITOK | M_ZERO);
1104 
1105 	for (i = 0; i < sc->npages; i++) {
1106 		sc->epc_pages[i].base = epc_base_vaddr + SGX_PAGE_SIZE * i;
1107 		sc->epc_pages[i].phys = sc->epc_base + SGX_PAGE_SIZE * i;
1108 		sc->epc_pages[i].index = i;
1109 	}
1110 
1111 	sc->vmem_epc = vmem_create("SGX EPC", sc->epc_base, sc->epc_size,
1112 	    PAGE_SIZE, PAGE_SIZE, M_FIRSTFIT | M_WAITOK);
1113 	if (sc->vmem_epc == NULL) {
1114 		printf("%s: Can't create vmem arena.\n", __func__);
1115 		free(sc->epc_pages, M_SGX);
1116 		return (EINVAL);
1117 	}
1118 
1119 	error = vm_phys_fictitious_reg_range(sc->epc_base,
1120 	    sc->epc_base + sc->epc_size, VM_MEMATTR_DEFAULT);
1121 	if (error) {
1122 		printf("%s: Can't register fictitious space.\n", __func__);
1123 		free(sc->epc_pages, M_SGX);
1124 		return (EINVAL);
1125 	}
1126 
1127 	return (0);
1128 }
1129 
1130 static void
1131 sgx_put_epc_area(struct sgx_softc *sc)
1132 {
1133 
1134 	vm_phys_fictitious_unreg_range(sc->epc_base,
1135 	    sc->epc_base + sc->epc_size);
1136 
1137 	free(sc->epc_pages, M_SGX);
1138 }
1139 
1140 static int
1141 sgx_load(void)
1142 {
1143 	struct sgx_softc *sc;
1144 	int error;
1145 
1146 	sc = &sgx_sc;
1147 
1148 	if ((cpu_stdext_feature & CPUID_STDEXT_SGX) == 0)
1149 		return (ENXIO);
1150 
1151 	error = sgx_get_epc_area(sc);
1152 	if (error) {
1153 		printf("%s: Failed to get Processor Reserved Memory area.\n",
1154 		    __func__);
1155 		return (ENXIO);
1156 	}
1157 
1158 	mtx_init(&sc->mtx_encls, "SGX ENCLS", NULL, MTX_DEF);
1159 	mtx_init(&sc->mtx, "SGX driver", NULL, MTX_DEF);
1160 
1161 	TAILQ_INIT(&sc->enclaves);
1162 
1163 	sc->sgx_cdev = make_dev(&sgx_cdevsw, 0, UID_ROOT, GID_WHEEL,
1164 	    0600, "isgx");
1165 
1166 	sc->state |= SGX_STATE_RUNNING;
1167 
1168 	printf("SGX initialized: EPC base 0x%lx size %ld (%d pages)\n",
1169 	    sc->epc_base, sc->epc_size, sc->npages);
1170 
1171 	return (0);
1172 }
1173 
1174 static int
1175 sgx_unload(void)
1176 {
1177 	struct sgx_softc *sc;
1178 
1179 	sc = &sgx_sc;
1180 
1181 	if ((sc->state & SGX_STATE_RUNNING) == 0)
1182 		return (0);
1183 
1184 	mtx_lock(&sc->mtx);
1185 	if (!TAILQ_EMPTY(&sc->enclaves)) {
1186 		mtx_unlock(&sc->mtx);
1187 		return (EBUSY);
1188 	}
1189 	sc->state &= ~SGX_STATE_RUNNING;
1190 	mtx_unlock(&sc->mtx);
1191 
1192 	destroy_dev(sc->sgx_cdev);
1193 
1194 	vmem_destroy(sc->vmem_epc);
1195 	sgx_put_epc_area(sc);
1196 
1197 	mtx_destroy(&sc->mtx_encls);
1198 	mtx_destroy(&sc->mtx);
1199 
1200 	return (0);
1201 }
1202 
1203 static int
1204 sgx_handler(module_t mod, int what, void *arg)
1205 {
1206 	int error;
1207 
1208 	switch (what) {
1209 	case MOD_LOAD:
1210 		error = sgx_load();
1211 		break;
1212 	case MOD_UNLOAD:
1213 		error = sgx_unload();
1214 		break;
1215 	default:
1216 		error = 0;
1217 		break;
1218 	}
1219 
1220 	return (error);
1221 }
1222 
1223 static moduledata_t sgx_kmod = {
1224 	"sgx",
1225 	sgx_handler,
1226 	NULL
1227 };
1228 
1229 DECLARE_MODULE(sgx, sgx_kmod, SI_SUB_LAST, SI_ORDER_ANY);
1230 MODULE_VERSION(sgx, 1);
1231