xref: /freebsd/sys/amd64/sgx/sgx.c (revision d0b2dbfa0ecf2bbc9709efc5e20baf8e4b44bbbf)
1 /*-
2  * Copyright (c) 2017 Ruslan Bukin <br@bsdpad.com>
3  * All rights reserved.
4  *
5  * This software was developed by BAE Systems, the University of Cambridge
6  * Computer Laboratory, and Memorial University under DARPA/AFRL contract
7  * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing
8  * (TC) research program.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  * Design overview.
34  *
35  * The driver provides character device for mmap(2) and ioctl(2) system calls
36  * allowing user to manage isolated compartments ("enclaves") in user VA space.
37  *
38  * The driver duties is EPC pages management, enclave management, user data
39  * validation.
40  *
41  * This driver requires Intel SGX support from hardware.
42  *
43  * /dev/sgx:
44  *    .mmap:
45  *        sgx_mmap_single() allocates VM object with following pager
46  *        operations:
47  *              a) sgx_pg_ctor():
48  *                  VM object constructor does nothing
49  *              b) sgx_pg_dtor():
50  *                  VM object destructor destroys the SGX enclave associated
51  *                  with the object: it frees all the EPC pages allocated for
52  *                  enclave and removes the enclave.
53  *              c) sgx_pg_fault():
54  *                  VM object fault handler does nothing
55  *
56  *    .ioctl:
57  *        sgx_ioctl():
58  *               a) SGX_IOC_ENCLAVE_CREATE
59  *                   Adds Enclave SECS page: initial step of enclave creation.
60  *               b) SGX_IOC_ENCLAVE_ADD_PAGE
61  *                   Adds TCS, REG pages to the enclave.
62  *               c) SGX_IOC_ENCLAVE_INIT
63  *                   Finalizes enclave creation.
64  *
65  * Enclave lifecycle:
66  *          .-- ECREATE  -- Add SECS page
67  *   Kernel |   EADD     -- Add TCS, REG pages
68  *    space |   EEXTEND  -- Measure the page (take unique hash)
69  *    ENCLS |   EPA      -- Allocate version array page
70  *          '-- EINIT    -- Finalize enclave creation
71  *   User   .-- EENTER   -- Go to entry point of enclave
72  *    space |   EEXIT    -- Exit back to main application
73  *    ENCLU '-- ERESUME  -- Resume enclave execution (e.g. after exception)
74  *
75  * Enclave lifecycle from driver point of view:
76  *  1) User calls mmap() on /dev/sgx: we allocate a VM object
77  *  2) User calls ioctl SGX_IOC_ENCLAVE_CREATE: we look for the VM object
78  *     associated with user process created on step 1, create SECS physical
79  *     page and store it in enclave's VM object queue by special index
80  *     SGX_SECS_VM_OBJECT_INDEX.
81  *  3) User calls ioctl SGX_IOC_ENCLAVE_ADD_PAGE: we look for enclave created
82  *     on step 2, create TCS or REG physical page and map it to specified by
83  *     user address of enclave VM object.
84  *  4) User finalizes enclave creation with ioctl SGX_IOC_ENCLAVE_INIT call.
85  *  5) User can freely enter to and exit from enclave using ENCLU instructions
86  *     from userspace: the driver does nothing here.
87  *  6) User proceed munmap(2) system call (or the process with enclave dies):
88  *     we destroy the enclave associated with the object.
89  *
90  * EPC page types and their indexes in VM object queue:
91  *   - PT_SECS index is special and equals SGX_SECS_VM_OBJECT_INDEX (-1);
92  *   - PT_TCS and PT_REG indexes are specified by user in addr field of ioctl
93  *     request data and determined as follows:
94  *       pidx = OFF_TO_IDX(addp->addr - vmh->base);
95  *   - PT_VA index is special, created for PT_REG, PT_TCS and PT_SECS pages
96  *     and determined by formula:
97  *       va_page_idx = - SGX_VA_PAGES_OFFS - (page_idx / SGX_VA_PAGE_SLOTS);
98  *     PT_VA page can hold versions of up to 512 pages, and slot for each
99  *     page in PT_VA page is determined as follows:
100  *       va_slot_idx = page_idx % SGX_VA_PAGE_SLOTS;
101  *   - PT_TRIM is unused.
102  *
103  * Locking:
104  *    SGX ENCLS set of instructions have limitations on concurrency:
105  *    some instructions can't be executed same time on different CPUs.
106  *    We use sc->mtx_encls lock around them to prevent concurrent execution.
107  *    sc->mtx lock is used to manage list of created enclaves and the state of
108  *    SGX driver.
109  *
110  * Eviction of EPC pages:
111  *    Eviction support is not implemented in this driver, however the driver
112  *    manages VA (version array) pages: it allocates a VA slot for each EPC
113  *    page. This will be required for eviction support in future.
114  *    VA pages and slots are currently unused.
115  *
116  * Intel® 64 and IA-32 Architectures Software Developer's Manual
117  * https://software.intel.com/en-us/articles/intel-sdm
118  */
119 
120 #include <sys/cdefs.h>
121 #include <sys/param.h>
122 #include <sys/systm.h>
123 #include <sys/ioccom.h>
124 #include <sys/malloc.h>
125 #include <sys/kernel.h>
126 #include <sys/lock.h>
127 #include <sys/mutex.h>
128 #include <sys/rwlock.h>
129 #include <sys/conf.h>
130 #include <sys/module.h>
131 #include <sys/proc.h>
132 #include <sys/vmem.h>
133 #include <sys/vmmeter.h>
134 
135 #include <vm/vm.h>
136 #include <vm/vm_param.h>
137 #include <vm/vm_extern.h>
138 #include <vm/vm_kern.h>
139 #include <vm/vm_page.h>
140 #include <vm/vm_map.h>
141 #include <vm/vm_object.h>
142 #include <vm/vm_pager.h>
143 #include <vm/vm_phys.h>
144 #include <vm/vm_radix.h>
145 #include <vm/pmap.h>
146 
147 #include <machine/md_var.h>
148 #include <machine/specialreg.h>
149 #include <machine/cpufunc.h>
150 #include <machine/sgx.h>
151 #include <machine/sgxreg.h>
152 
153 #include <amd64/sgx/sgxvar.h>
154 
155 #define	SGX_DEBUG
156 #undef	SGX_DEBUG
157 
158 #ifdef	SGX_DEBUG
159 #define	dprintf(fmt, ...)	printf(fmt, ##__VA_ARGS__)
160 #else
161 #define	dprintf(fmt, ...)
162 #endif
163 
164 static struct cdev_pager_ops sgx_pg_ops;
165 struct sgx_softc sgx_sc;
166 
167 static int
168 sgx_get_epc_page(struct sgx_softc *sc, struct epc_page **epc)
169 {
170 	vmem_addr_t addr;
171 	int i;
172 
173 	if (vmem_alloc(sc->vmem_epc, PAGE_SIZE, M_FIRSTFIT | M_NOWAIT,
174 	    &addr) == 0) {
175 		i = (addr - sc->epc_base) / PAGE_SIZE;
176 		*epc = &sc->epc_pages[i];
177 		return (0);
178 	}
179 
180 	return (ENOMEM);
181 }
182 
183 static void
184 sgx_put_epc_page(struct sgx_softc *sc, struct epc_page *epc)
185 {
186 	vmem_addr_t addr;
187 
188 	if (epc == NULL)
189 		return;
190 
191 	addr = (epc->index * PAGE_SIZE) + sc->epc_base;
192 	vmem_free(sc->vmem_epc, addr, PAGE_SIZE);
193 }
194 
195 static int
196 sgx_va_slot_init_by_index(struct sgx_softc *sc, vm_object_t object,
197     uint64_t idx)
198 {
199 	struct epc_page *epc;
200 	vm_page_t page;
201 	vm_page_t p;
202 	int ret;
203 
204 	VM_OBJECT_ASSERT_WLOCKED(object);
205 
206 	p = vm_page_lookup(object, idx);
207 	if (p == NULL) {
208 		ret = sgx_get_epc_page(sc, &epc);
209 		if (ret) {
210 			dprintf("%s: No free EPC pages available.\n",
211 			    __func__);
212 			return (ret);
213 		}
214 
215 		mtx_lock(&sc->mtx_encls);
216 		sgx_epa((void *)epc->base);
217 		mtx_unlock(&sc->mtx_encls);
218 
219 		page = PHYS_TO_VM_PAGE(epc->phys);
220 
221 		page->valid = VM_PAGE_BITS_ALL;
222 		vm_page_insert(page, object, idx);
223 	}
224 
225 	return (0);
226 }
227 
228 static int
229 sgx_va_slot_init(struct sgx_softc *sc,
230     struct sgx_enclave *enclave,
231     uint64_t addr)
232 {
233 	vm_pindex_t pidx;
234 	uint64_t va_page_idx;
235 	uint64_t idx;
236 	vm_object_t object;
237 	int ret;
238 
239 	object = enclave->object;
240 
241 	VM_OBJECT_ASSERT_WLOCKED(object);
242 
243 	pidx = OFF_TO_IDX(addr);
244 
245 	va_page_idx = pidx / SGX_VA_PAGE_SLOTS;
246 	idx = - SGX_VA_PAGES_OFFS - va_page_idx;
247 
248 	ret = sgx_va_slot_init_by_index(sc, object, idx);
249 
250 	return (ret);
251 }
252 
253 static int
254 sgx_mem_find(struct sgx_softc *sc, uint64_t addr,
255     vm_map_entry_t *entry0, vm_object_t *object0)
256 {
257 	vm_map_t map;
258 	vm_map_entry_t entry;
259 	vm_object_t object;
260 
261 	map = &curproc->p_vmspace->vm_map;
262 
263 	vm_map_lock_read(map);
264 	if (!vm_map_lookup_entry(map, addr, &entry)) {
265 		vm_map_unlock_read(map);
266 		dprintf("%s: Can't find enclave.\n", __func__);
267 		return (EINVAL);
268 	}
269 
270 	object = entry->object.vm_object;
271 	if (object == NULL || object->handle == NULL) {
272 		vm_map_unlock_read(map);
273 		return (EINVAL);
274 	}
275 
276 	if (object->type != OBJT_MGTDEVICE ||
277 	    object->un_pager.devp.ops != &sgx_pg_ops) {
278 		vm_map_unlock_read(map);
279 		return (EINVAL);
280 	}
281 
282 	vm_object_reference(object);
283 
284 	*object0 = object;
285 	*entry0 = entry;
286 	vm_map_unlock_read(map);
287 
288 	return (0);
289 }
290 
291 static int
292 sgx_enclave_find(struct sgx_softc *sc, uint64_t addr,
293     struct sgx_enclave **encl)
294 {
295 	struct sgx_vm_handle *vmh;
296 	struct sgx_enclave *enclave;
297 	vm_map_entry_t entry;
298 	vm_object_t object;
299 	int ret;
300 
301 	ret = sgx_mem_find(sc, addr, &entry, &object);
302 	if (ret)
303 		return (ret);
304 
305 	vmh = object->handle;
306 	if (vmh == NULL) {
307 		vm_object_deallocate(object);
308 		return (EINVAL);
309 	}
310 
311 	enclave = vmh->enclave;
312 	if (enclave == NULL || enclave->object == NULL) {
313 		vm_object_deallocate(object);
314 		return (EINVAL);
315 	}
316 
317 	*encl = enclave;
318 
319 	return (0);
320 }
321 
322 static int
323 sgx_enclave_alloc(struct sgx_softc *sc, struct secs *secs,
324     struct sgx_enclave **enclave0)
325 {
326 	struct sgx_enclave *enclave;
327 
328 	enclave = malloc(sizeof(struct sgx_enclave),
329 	    M_SGX, M_WAITOK | M_ZERO);
330 
331 	enclave->base = secs->base;
332 	enclave->size = secs->size;
333 
334 	*enclave0 = enclave;
335 
336 	return (0);
337 }
338 
339 static void
340 sgx_epc_page_remove(struct sgx_softc *sc,
341     struct epc_page *epc)
342 {
343 
344 	mtx_lock(&sc->mtx_encls);
345 	sgx_eremove((void *)epc->base);
346 	mtx_unlock(&sc->mtx_encls);
347 }
348 
349 static void
350 sgx_page_remove(struct sgx_softc *sc, vm_page_t p)
351 {
352 	struct epc_page *epc;
353 	vm_paddr_t pa;
354 	uint64_t offs;
355 
356 	(void)vm_page_remove(p);
357 
358 	dprintf("%s: p->pidx %ld\n", __func__, p->pindex);
359 
360 	pa = VM_PAGE_TO_PHYS(p);
361 	epc = &sc->epc_pages[0];
362 	offs = (pa - epc->phys) / PAGE_SIZE;
363 	epc = &sc->epc_pages[offs];
364 
365 	sgx_epc_page_remove(sc, epc);
366 	sgx_put_epc_page(sc, epc);
367 }
368 
369 static void
370 sgx_enclave_remove(struct sgx_softc *sc,
371     struct sgx_enclave *enclave)
372 {
373 	vm_object_t object;
374 	vm_page_t p, p_secs, p_next;
375 
376 	mtx_lock(&sc->mtx);
377 	TAILQ_REMOVE(&sc->enclaves, enclave, next);
378 	mtx_unlock(&sc->mtx);
379 
380 	object = enclave->object;
381 
382 	VM_OBJECT_WLOCK(object);
383 
384 	/*
385 	 * First remove all the pages except SECS,
386 	 * then remove SECS page.
387 	 */
388 restart:
389 	TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) {
390 		if (p->pindex == SGX_SECS_VM_OBJECT_INDEX)
391 			continue;
392 		if (vm_page_busy_acquire(p, VM_ALLOC_WAITFAIL) == 0)
393 			goto restart;
394 		sgx_page_remove(sc, p);
395 	}
396 	p_secs = vm_page_grab(object, SGX_SECS_VM_OBJECT_INDEX,
397 	    VM_ALLOC_NOCREAT);
398 	/* Now remove SECS page */
399 	if (p_secs != NULL)
400 		sgx_page_remove(sc, p_secs);
401 
402 	KASSERT(TAILQ_EMPTY(&object->memq) == 1, ("not empty"));
403 	KASSERT(object->resident_page_count == 0, ("count"));
404 
405 	VM_OBJECT_WUNLOCK(object);
406 }
407 
408 static int
409 sgx_measure_page(struct sgx_softc *sc, struct epc_page *secs,
410     struct epc_page *epc, uint16_t mrmask)
411 {
412 	int i, j;
413 	int ret;
414 
415 	mtx_lock(&sc->mtx_encls);
416 
417 	for (i = 0, j = 1; i < PAGE_SIZE; i += 0x100, j <<= 1) {
418 		if (!(j & mrmask))
419 			continue;
420 
421 		ret = sgx_eextend((void *)secs->base,
422 		    (void *)(epc->base + i));
423 		if (ret == SGX_EFAULT) {
424 			mtx_unlock(&sc->mtx_encls);
425 			return (ret);
426 		}
427 	}
428 
429 	mtx_unlock(&sc->mtx_encls);
430 
431 	return (0);
432 }
433 
434 static int
435 sgx_secs_validate(struct sgx_softc *sc, struct secs *secs)
436 {
437 	struct secs_attr *attr;
438 	int i;
439 
440 	if (secs->size == 0)
441 		return (EINVAL);
442 
443 	/* BASEADDR must be naturally aligned on an SECS.SIZE boundary. */
444 	if (secs->base & (secs->size - 1))
445 		return (EINVAL);
446 
447 	/* SECS.SIZE must be at least 2 pages. */
448 	if (secs->size < 2 * PAGE_SIZE)
449 		return (EINVAL);
450 
451 	if ((secs->size & (secs->size - 1)) != 0)
452 		return (EINVAL);
453 
454 	attr = &secs->attributes;
455 
456 	if (attr->reserved1 != 0 ||
457 	    attr->reserved2 != 0 ||
458 	    attr->reserved3 != 0)
459 		return (EINVAL);
460 
461 	for (i = 0; i < SECS_ATTR_RSV4_SIZE; i++)
462 		if (attr->reserved4[i])
463 			return (EINVAL);
464 
465 	/*
466 	 * Intel® Software Guard Extensions Programming Reference
467 	 * 6.7.2 Relevant Fields in Various Data Structures
468 	 * 6.7.2.1 SECS.ATTRIBUTES.XFRM
469 	 * XFRM[1:0] must be set to 0x3.
470 	 */
471 	if ((attr->xfrm & 0x3) != 0x3)
472 		return (EINVAL);
473 
474 	if (!attr->mode64bit)
475 		return (EINVAL);
476 
477 	if (secs->size > sc->enclave_size_max)
478 		return (EINVAL);
479 
480 	for (i = 0; i < SECS_RSV1_SIZE; i++)
481 		if (secs->reserved1[i])
482 			return (EINVAL);
483 
484 	for (i = 0; i < SECS_RSV2_SIZE; i++)
485 		if (secs->reserved2[i])
486 			return (EINVAL);
487 
488 	for (i = 0; i < SECS_RSV3_SIZE; i++)
489 		if (secs->reserved3[i])
490 			return (EINVAL);
491 
492 	for (i = 0; i < SECS_RSV4_SIZE; i++)
493 		if (secs->reserved4[i])
494 			return (EINVAL);
495 
496 	return (0);
497 }
498 
499 static int
500 sgx_tcs_validate(struct tcs *tcs)
501 {
502 	int i;
503 
504 	if ((tcs->flags) ||
505 	    (tcs->ossa & (PAGE_SIZE - 1)) ||
506 	    (tcs->ofsbasgx & (PAGE_SIZE - 1)) ||
507 	    (tcs->ogsbasgx & (PAGE_SIZE - 1)) ||
508 	    ((tcs->fslimit & 0xfff) != 0xfff) ||
509 	    ((tcs->gslimit & 0xfff) != 0xfff))
510 		return (EINVAL);
511 
512 	for (i = 0; i < nitems(tcs->reserved3); i++)
513 		if (tcs->reserved3[i])
514 			return (EINVAL);
515 
516 	return (0);
517 }
518 
519 static void
520 sgx_tcs_dump(struct sgx_softc *sc, struct tcs *t)
521 {
522 
523 	dprintf("t->flags %lx\n", t->flags);
524 	dprintf("t->ossa %lx\n", t->ossa);
525 	dprintf("t->cssa %x\n", t->cssa);
526 	dprintf("t->nssa %x\n", t->nssa);
527 	dprintf("t->oentry %lx\n", t->oentry);
528 	dprintf("t->ofsbasgx %lx\n", t->ofsbasgx);
529 	dprintf("t->ogsbasgx %lx\n", t->ogsbasgx);
530 	dprintf("t->fslimit %x\n", t->fslimit);
531 	dprintf("t->gslimit %x\n", t->gslimit);
532 }
533 
534 static int
535 sgx_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
536     vm_ooffset_t foff, struct ucred *cred, u_short *color)
537 {
538 	struct sgx_vm_handle *vmh;
539 
540 	vmh = handle;
541 	if (vmh == NULL) {
542 		dprintf("%s: vmh not found.\n", __func__);
543 		return (0);
544 	}
545 
546 	dprintf("%s: vmh->base %lx foff 0x%lx size 0x%lx\n",
547 	    __func__, vmh->base, foff, size);
548 
549 	return (0);
550 }
551 
552 static void
553 sgx_pg_dtor(void *handle)
554 {
555 	struct sgx_vm_handle *vmh;
556 	struct sgx_softc *sc;
557 
558 	vmh = handle;
559 	if (vmh == NULL) {
560 		dprintf("%s: vmh not found.\n", __func__);
561 		return;
562 	}
563 
564 	sc = vmh->sc;
565 	if (sc == NULL) {
566 		dprintf("%s: sc is NULL\n", __func__);
567 		return;
568 	}
569 
570 	if (vmh->enclave == NULL) {
571 		dprintf("%s: Enclave not found.\n", __func__);
572 		return;
573 	}
574 
575 	sgx_enclave_remove(sc, vmh->enclave);
576 
577 	free(vmh->enclave, M_SGX);
578 	free(vmh, M_SGX);
579 }
580 
581 static int
582 sgx_pg_fault(vm_object_t object, vm_ooffset_t offset,
583     int prot, vm_page_t *mres)
584 {
585 
586 	/*
587 	 * The purpose of this trivial handler is to handle the race
588 	 * when user tries to access mmaped region before or during
589 	 * enclave creation ioctl calls.
590 	 */
591 
592 	dprintf("%s: offset 0x%lx\n", __func__, offset);
593 
594 	return (VM_PAGER_FAIL);
595 }
596 
597 static struct cdev_pager_ops sgx_pg_ops = {
598 	.cdev_pg_ctor = sgx_pg_ctor,
599 	.cdev_pg_dtor = sgx_pg_dtor,
600 	.cdev_pg_fault = sgx_pg_fault,
601 };
602 
603 static void
604 sgx_insert_epc_page_by_index(vm_page_t page, vm_object_t object,
605     vm_pindex_t pidx)
606 {
607 
608 	VM_OBJECT_ASSERT_WLOCKED(object);
609 
610 	page->valid = VM_PAGE_BITS_ALL;
611 	vm_page_insert(page, object, pidx);
612 }
613 
614 static void
615 sgx_insert_epc_page(struct sgx_enclave *enclave,
616     struct epc_page *epc, uint64_t addr)
617 {
618 	vm_pindex_t pidx;
619 	vm_page_t page;
620 
621 	VM_OBJECT_ASSERT_WLOCKED(enclave->object);
622 
623 	pidx = OFF_TO_IDX(addr);
624 	page = PHYS_TO_VM_PAGE(epc->phys);
625 
626 	sgx_insert_epc_page_by_index(page, enclave->object, pidx);
627 }
628 
629 static int
630 sgx_ioctl_create(struct sgx_softc *sc, struct sgx_enclave_create *param)
631 {
632 	struct sgx_vm_handle *vmh;
633 	vm_map_entry_t entry;
634 	vm_page_t p;
635 	struct page_info pginfo;
636 	struct secinfo secinfo;
637 	struct sgx_enclave *enclave;
638 	struct epc_page *epc;
639 	struct secs *secs;
640 	vm_object_t object;
641 	vm_page_t page;
642 	int ret;
643 
644 	epc = NULL;
645 	secs = NULL;
646 	enclave = NULL;
647 	object = NULL;
648 
649 	/* SGX Enclave Control Structure (SECS) */
650 	secs = malloc(PAGE_SIZE, M_SGX, M_WAITOK | M_ZERO);
651 	ret = copyin((void *)param->src, secs, sizeof(struct secs));
652 	if (ret) {
653 		dprintf("%s: Can't copy SECS.\n", __func__);
654 		goto error;
655 	}
656 
657 	ret = sgx_secs_validate(sc, secs);
658 	if (ret) {
659 		dprintf("%s: SECS validation failed.\n", __func__);
660 		goto error;
661 	}
662 
663 	ret = sgx_mem_find(sc, secs->base, &entry, &object);
664 	if (ret) {
665 		dprintf("%s: Can't find vm_map.\n", __func__);
666 		goto error;
667 	}
668 
669 	vmh = object->handle;
670 	if (!vmh) {
671 		dprintf("%s: Can't find vmh.\n", __func__);
672 		ret = ENXIO;
673 		goto error;
674 	}
675 
676 	dprintf("%s: entry start %lx offset %lx\n",
677 	    __func__, entry->start, entry->offset);
678 	vmh->base = (entry->start - entry->offset);
679 
680 	ret = sgx_enclave_alloc(sc, secs, &enclave);
681 	if (ret) {
682 		dprintf("%s: Can't alloc enclave.\n", __func__);
683 		goto error;
684 	}
685 	enclave->object = object;
686 	enclave->vmh = vmh;
687 
688 	memset(&secinfo, 0, sizeof(struct secinfo));
689 	memset(&pginfo, 0, sizeof(struct page_info));
690 	pginfo.linaddr = 0;
691 	pginfo.srcpge = (uint64_t)secs;
692 	pginfo.secinfo = &secinfo;
693 	pginfo.secs = 0;
694 
695 	ret = sgx_get_epc_page(sc, &epc);
696 	if (ret) {
697 		dprintf("%s: Failed to get free epc page.\n", __func__);
698 		goto error;
699 	}
700 	enclave->secs_epc_page = epc;
701 
702 	VM_OBJECT_WLOCK(object);
703 	p = vm_page_lookup(object, SGX_SECS_VM_OBJECT_INDEX);
704 	if (p) {
705 		VM_OBJECT_WUNLOCK(object);
706 		/* SECS page already added. */
707 		ret = ENXIO;
708 		goto error;
709 	}
710 
711 	ret = sgx_va_slot_init_by_index(sc, object,
712 	    - SGX_VA_PAGES_OFFS - SGX_SECS_VM_OBJECT_INDEX);
713 	if (ret) {
714 		VM_OBJECT_WUNLOCK(object);
715 		dprintf("%s: Can't init va slot.\n", __func__);
716 		goto error;
717 	}
718 
719 	mtx_lock(&sc->mtx);
720 	if ((sc->state & SGX_STATE_RUNNING) == 0) {
721 		mtx_unlock(&sc->mtx);
722 		/* Remove VA page that was just created for SECS page. */
723 		p = vm_page_grab(enclave->object,
724 		    - SGX_VA_PAGES_OFFS - SGX_SECS_VM_OBJECT_INDEX,
725 		    VM_ALLOC_NOCREAT);
726 		sgx_page_remove(sc, p);
727 		VM_OBJECT_WUNLOCK(object);
728 		goto error;
729 	}
730 	mtx_lock(&sc->mtx_encls);
731 	ret = sgx_ecreate(&pginfo, (void *)epc->base);
732 	mtx_unlock(&sc->mtx_encls);
733 	if (ret == SGX_EFAULT) {
734 		dprintf("%s: gp fault\n", __func__);
735 		mtx_unlock(&sc->mtx);
736 		/* Remove VA page that was just created for SECS page. */
737 		p = vm_page_grab(enclave->object,
738 		    - SGX_VA_PAGES_OFFS - SGX_SECS_VM_OBJECT_INDEX,
739 		    VM_ALLOC_NOCREAT);
740 		sgx_page_remove(sc, p);
741 		VM_OBJECT_WUNLOCK(object);
742 		goto error;
743 	}
744 
745 	TAILQ_INSERT_TAIL(&sc->enclaves, enclave, next);
746 	mtx_unlock(&sc->mtx);
747 
748 	vmh->enclave = enclave;
749 
750 	page = PHYS_TO_VM_PAGE(epc->phys);
751 	sgx_insert_epc_page_by_index(page, enclave->object,
752 	    SGX_SECS_VM_OBJECT_INDEX);
753 
754 	VM_OBJECT_WUNLOCK(object);
755 
756 	/* Release the reference. */
757 	vm_object_deallocate(object);
758 
759 	free(secs, M_SGX);
760 
761 	return (0);
762 
763 error:
764 	free(secs, M_SGX);
765 	sgx_put_epc_page(sc, epc);
766 	free(enclave, M_SGX);
767 	vm_object_deallocate(object);
768 
769 	return (ret);
770 }
771 
772 static int
773 sgx_ioctl_add_page(struct sgx_softc *sc,
774     struct sgx_enclave_add_page *addp)
775 {
776 	struct epc_page *secs_epc_page;
777 	struct sgx_enclave *enclave;
778 	struct sgx_vm_handle *vmh;
779 	struct epc_page *epc;
780 	struct page_info pginfo;
781 	struct secinfo secinfo;
782 	vm_object_t object;
783 	void *tmp_vaddr;
784 	uint64_t page_type;
785 	struct tcs *t;
786 	uint64_t addr;
787 	uint64_t pidx;
788 	vm_page_t p;
789 	int ret;
790 
791 	tmp_vaddr = NULL;
792 	epc = NULL;
793 	object = NULL;
794 
795 	/* Find and get reference to VM object. */
796 	ret = sgx_enclave_find(sc, addp->addr, &enclave);
797 	if (ret) {
798 		dprintf("%s: Failed to find enclave.\n", __func__);
799 		goto error;
800 	}
801 
802 	object = enclave->object;
803 	KASSERT(object != NULL, ("vm object is NULL\n"));
804 	vmh = object->handle;
805 
806 	ret = sgx_get_epc_page(sc, &epc);
807 	if (ret) {
808 		dprintf("%s: Failed to get free epc page.\n", __func__);
809 		goto error;
810 	}
811 
812 	memset(&secinfo, 0, sizeof(struct secinfo));
813 	ret = copyin((void *)addp->secinfo, &secinfo,
814 	    sizeof(struct secinfo));
815 	if (ret) {
816 		dprintf("%s: Failed to copy secinfo.\n", __func__);
817 		goto error;
818 	}
819 
820 	tmp_vaddr = malloc(PAGE_SIZE, M_SGX, M_WAITOK | M_ZERO);
821 	ret = copyin((void *)addp->src, tmp_vaddr, PAGE_SIZE);
822 	if (ret) {
823 		dprintf("%s: Failed to copy page.\n", __func__);
824 		goto error;
825 	}
826 
827 	page_type = (secinfo.flags & SECINFO_FLAGS_PT_M) >>
828 	    SECINFO_FLAGS_PT_S;
829 	if (page_type != SGX_PT_TCS && page_type != SGX_PT_REG) {
830 		dprintf("%s: page can't be added.\n", __func__);
831 		goto error;
832 	}
833 	if (page_type == SGX_PT_TCS) {
834 		t = (struct tcs *)tmp_vaddr;
835 		ret = sgx_tcs_validate(t);
836 		if (ret) {
837 			dprintf("%s: TCS page validation failed.\n",
838 			    __func__);
839 			goto error;
840 		}
841 		sgx_tcs_dump(sc, t);
842 	}
843 
844 	addr = (addp->addr - vmh->base);
845 	pidx = OFF_TO_IDX(addr);
846 
847 	VM_OBJECT_WLOCK(object);
848 	p = vm_page_lookup(object, pidx);
849 	if (p) {
850 		VM_OBJECT_WUNLOCK(object);
851 		/* Page already added. */
852 		ret = ENXIO;
853 		goto error;
854 	}
855 
856 	ret = sgx_va_slot_init(sc, enclave, addr);
857 	if (ret) {
858 		VM_OBJECT_WUNLOCK(object);
859 		dprintf("%s: Can't init va slot.\n", __func__);
860 		goto error;
861 	}
862 
863 	secs_epc_page = enclave->secs_epc_page;
864 	memset(&pginfo, 0, sizeof(struct page_info));
865 	pginfo.linaddr = (uint64_t)addp->addr;
866 	pginfo.srcpge = (uint64_t)tmp_vaddr;
867 	pginfo.secinfo = &secinfo;
868 	pginfo.secs = (uint64_t)secs_epc_page->base;
869 
870 	mtx_lock(&sc->mtx_encls);
871 	ret = sgx_eadd(&pginfo, (void *)epc->base);
872 	if (ret == SGX_EFAULT) {
873 		dprintf("%s: gp fault on eadd\n", __func__);
874 		mtx_unlock(&sc->mtx_encls);
875 		VM_OBJECT_WUNLOCK(object);
876 		goto error;
877 	}
878 	mtx_unlock(&sc->mtx_encls);
879 
880 	ret = sgx_measure_page(sc, enclave->secs_epc_page, epc, addp->mrmask);
881 	if (ret == SGX_EFAULT) {
882 		dprintf("%s: gp fault on eextend\n", __func__);
883 		sgx_epc_page_remove(sc, epc);
884 		VM_OBJECT_WUNLOCK(object);
885 		goto error;
886 	}
887 
888 	sgx_insert_epc_page(enclave, epc, addr);
889 
890 	VM_OBJECT_WUNLOCK(object);
891 
892 	/* Release the reference. */
893 	vm_object_deallocate(object);
894 
895 	free(tmp_vaddr, M_SGX);
896 
897 	return (0);
898 
899 error:
900 	free(tmp_vaddr, M_SGX);
901 	sgx_put_epc_page(sc, epc);
902 	vm_object_deallocate(object);
903 
904 	return (ret);
905 }
906 
907 static int
908 sgx_ioctl_init(struct sgx_softc *sc, struct sgx_enclave_init *initp)
909 {
910 	struct epc_page *secs_epc_page;
911 	struct sgx_enclave *enclave;
912 	struct thread *td;
913 	void *tmp_vaddr;
914 	void *einittoken;
915 	void *sigstruct;
916 	vm_object_t object;
917 	int retry;
918 	int ret;
919 
920 	td = curthread;
921 	tmp_vaddr = NULL;
922 	object = NULL;
923 
924 	dprintf("%s: addr %lx, sigstruct %lx, einittoken %lx\n",
925 	    __func__, initp->addr, initp->sigstruct, initp->einittoken);
926 
927 	/* Find and get reference to VM object. */
928 	ret = sgx_enclave_find(sc, initp->addr, &enclave);
929 	if (ret) {
930 		dprintf("%s: Failed to find enclave.\n", __func__);
931 		goto error;
932 	}
933 
934 	object = enclave->object;
935 
936 	tmp_vaddr = malloc(PAGE_SIZE, M_SGX, M_WAITOK | M_ZERO);
937 	sigstruct = tmp_vaddr;
938 	einittoken = (void *)((uint64_t)sigstruct + PAGE_SIZE / 2);
939 
940 	ret = copyin((void *)initp->sigstruct, sigstruct,
941 	    SGX_SIGSTRUCT_SIZE);
942 	if (ret) {
943 		dprintf("%s: Failed to copy SIGSTRUCT page.\n", __func__);
944 		goto error;
945 	}
946 
947 	ret = copyin((void *)initp->einittoken, einittoken,
948 	    SGX_EINITTOKEN_SIZE);
949 	if (ret) {
950 		dprintf("%s: Failed to copy EINITTOKEN page.\n", __func__);
951 		goto error;
952 	}
953 
954 	secs_epc_page = enclave->secs_epc_page;
955 	retry = 16;
956 	do {
957 		mtx_lock(&sc->mtx_encls);
958 		ret = sgx_einit(sigstruct, (void *)secs_epc_page->base,
959 		    einittoken);
960 		mtx_unlock(&sc->mtx_encls);
961 		dprintf("%s: sgx_einit returned %d\n", __func__, ret);
962 	} while (ret == SGX_UNMASKED_EVENT && retry--);
963 
964 	if (ret) {
965 		dprintf("%s: Failed init enclave: %d\n", __func__, ret);
966 		td->td_retval[0] = ret;
967 		ret = 0;
968 	}
969 
970 error:
971 	free(tmp_vaddr, M_SGX);
972 
973 	/* Release the reference. */
974 	vm_object_deallocate(object);
975 
976 	return (ret);
977 }
978 
979 static int
980 sgx_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
981     struct thread *td)
982 {
983 	struct sgx_enclave_add_page *addp;
984 	struct sgx_enclave_create *param;
985 	struct sgx_enclave_init *initp;
986 	struct sgx_softc *sc;
987 	int ret;
988 	int len;
989 
990 	sc = &sgx_sc;
991 
992 	len = IOCPARM_LEN(cmd);
993 
994 	dprintf("%s: cmd %lx, addr %lx, len %d\n",
995 	    __func__, cmd, (uint64_t)addr, len);
996 
997 	if (len > SGX_IOCTL_MAX_DATA_LEN)
998 		return (EINVAL);
999 
1000 	switch (cmd) {
1001 	case SGX_IOC_ENCLAVE_CREATE:
1002 		param = (struct sgx_enclave_create *)addr;
1003 		ret = sgx_ioctl_create(sc, param);
1004 		break;
1005 	case SGX_IOC_ENCLAVE_ADD_PAGE:
1006 		addp = (struct sgx_enclave_add_page *)addr;
1007 		ret = sgx_ioctl_add_page(sc, addp);
1008 		break;
1009 	case SGX_IOC_ENCLAVE_INIT:
1010 		initp = (struct sgx_enclave_init *)addr;
1011 		ret = sgx_ioctl_init(sc, initp);
1012 		break;
1013 	default:
1014 		return (EINVAL);
1015 	}
1016 
1017 	return (ret);
1018 }
1019 
1020 static int
1021 sgx_mmap_single(struct cdev *cdev, vm_ooffset_t *offset,
1022     vm_size_t mapsize, struct vm_object **objp, int nprot)
1023 {
1024 	struct sgx_vm_handle *vmh;
1025 	struct sgx_softc *sc;
1026 
1027 	sc = &sgx_sc;
1028 
1029 	dprintf("%s: mapsize 0x%lx, offset %lx\n",
1030 	    __func__, mapsize, *offset);
1031 
1032 	vmh = malloc(sizeof(struct sgx_vm_handle),
1033 	    M_SGX, M_WAITOK | M_ZERO);
1034 	vmh->sc = sc;
1035 	vmh->size = mapsize;
1036 	vmh->mem = cdev_pager_allocate(vmh, OBJT_MGTDEVICE, &sgx_pg_ops,
1037 	    mapsize, nprot, *offset, NULL);
1038 	if (vmh->mem == NULL) {
1039 		free(vmh, M_SGX);
1040 		return (ENOMEM);
1041 	}
1042 
1043 	VM_OBJECT_WLOCK(vmh->mem);
1044 	vm_object_set_flag(vmh->mem, OBJ_PG_DTOR);
1045 	VM_OBJECT_WUNLOCK(vmh->mem);
1046 
1047 	*objp = vmh->mem;
1048 
1049 	return (0);
1050 }
1051 
1052 static struct cdevsw sgx_cdevsw = {
1053 	.d_version =		D_VERSION,
1054 	.d_ioctl =		sgx_ioctl,
1055 	.d_mmap_single =	sgx_mmap_single,
1056 	.d_name =		"Intel SGX",
1057 };
1058 
1059 static int
1060 sgx_get_epc_area(struct sgx_softc *sc)
1061 {
1062 	vm_offset_t epc_base_vaddr;
1063 	u_int cp[4];
1064 	int error;
1065 	int i;
1066 
1067 	cpuid_count(SGX_CPUID, 0x2, cp);
1068 
1069 	sc->epc_base = ((uint64_t)(cp[1] & 0xfffff) << 32) +
1070 	    (cp[0] & 0xfffff000);
1071 	sc->epc_size = ((uint64_t)(cp[3] & 0xfffff) << 32) +
1072 	    (cp[2] & 0xfffff000);
1073 	sc->npages = sc->epc_size / SGX_PAGE_SIZE;
1074 
1075 	if (sc->epc_size == 0 || sc->epc_base == 0) {
1076 		printf("%s: Incorrect EPC data: EPC base %lx, size %lu\n",
1077 		    __func__, sc->epc_base, sc->epc_size);
1078 		return (EINVAL);
1079 	}
1080 
1081 	if (cp[3] & 0xffff)
1082 		sc->enclave_size_max = (1 << ((cp[3] >> 8) & 0xff));
1083 	else
1084 		sc->enclave_size_max = SGX_ENCL_SIZE_MAX_DEF;
1085 
1086 	epc_base_vaddr = (vm_offset_t)pmap_mapdev_attr(sc->epc_base,
1087 	    sc->epc_size, VM_MEMATTR_DEFAULT);
1088 
1089 	sc->epc_pages = malloc(sizeof(struct epc_page) * sc->npages,
1090 	    M_DEVBUF, M_WAITOK | M_ZERO);
1091 
1092 	for (i = 0; i < sc->npages; i++) {
1093 		sc->epc_pages[i].base = epc_base_vaddr + SGX_PAGE_SIZE * i;
1094 		sc->epc_pages[i].phys = sc->epc_base + SGX_PAGE_SIZE * i;
1095 		sc->epc_pages[i].index = i;
1096 	}
1097 
1098 	sc->vmem_epc = vmem_create("SGX EPC", sc->epc_base, sc->epc_size,
1099 	    PAGE_SIZE, PAGE_SIZE, M_FIRSTFIT | M_WAITOK);
1100 	if (sc->vmem_epc == NULL) {
1101 		printf("%s: Can't create vmem arena.\n", __func__);
1102 		free(sc->epc_pages, M_SGX);
1103 		return (EINVAL);
1104 	}
1105 
1106 	error = vm_phys_fictitious_reg_range(sc->epc_base,
1107 	    sc->epc_base + sc->epc_size, VM_MEMATTR_DEFAULT);
1108 	if (error) {
1109 		printf("%s: Can't register fictitious space.\n", __func__);
1110 		free(sc->epc_pages, M_SGX);
1111 		return (EINVAL);
1112 	}
1113 
1114 	return (0);
1115 }
1116 
1117 static void
1118 sgx_put_epc_area(struct sgx_softc *sc)
1119 {
1120 
1121 	vm_phys_fictitious_unreg_range(sc->epc_base,
1122 	    sc->epc_base + sc->epc_size);
1123 
1124 	free(sc->epc_pages, M_SGX);
1125 }
1126 
1127 static int
1128 sgx_load(void)
1129 {
1130 	struct sgx_softc *sc;
1131 	int error;
1132 
1133 	sc = &sgx_sc;
1134 
1135 	if ((cpu_stdext_feature & CPUID_STDEXT_SGX) == 0)
1136 		return (ENXIO);
1137 
1138 	error = sgx_get_epc_area(sc);
1139 	if (error) {
1140 		printf("%s: Failed to get Processor Reserved Memory area.\n",
1141 		    __func__);
1142 		return (ENXIO);
1143 	}
1144 
1145 	mtx_init(&sc->mtx_encls, "SGX ENCLS", NULL, MTX_DEF);
1146 	mtx_init(&sc->mtx, "SGX driver", NULL, MTX_DEF);
1147 
1148 	TAILQ_INIT(&sc->enclaves);
1149 
1150 	sc->sgx_cdev = make_dev(&sgx_cdevsw, 0, UID_ROOT, GID_WHEEL,
1151 	    0600, "isgx");
1152 
1153 	sc->state |= SGX_STATE_RUNNING;
1154 
1155 	printf("SGX initialized: EPC base 0x%lx size %ld (%d pages)\n",
1156 	    sc->epc_base, sc->epc_size, sc->npages);
1157 
1158 	return (0);
1159 }
1160 
1161 static int
1162 sgx_unload(void)
1163 {
1164 	struct sgx_softc *sc;
1165 
1166 	sc = &sgx_sc;
1167 
1168 	if ((sc->state & SGX_STATE_RUNNING) == 0)
1169 		return (0);
1170 
1171 	mtx_lock(&sc->mtx);
1172 	if (!TAILQ_EMPTY(&sc->enclaves)) {
1173 		mtx_unlock(&sc->mtx);
1174 		return (EBUSY);
1175 	}
1176 	sc->state &= ~SGX_STATE_RUNNING;
1177 	mtx_unlock(&sc->mtx);
1178 
1179 	destroy_dev(sc->sgx_cdev);
1180 
1181 	vmem_destroy(sc->vmem_epc);
1182 	sgx_put_epc_area(sc);
1183 
1184 	mtx_destroy(&sc->mtx_encls);
1185 	mtx_destroy(&sc->mtx);
1186 
1187 	return (0);
1188 }
1189 
1190 static int
1191 sgx_handler(module_t mod, int what, void *arg)
1192 {
1193 	int error;
1194 
1195 	switch (what) {
1196 	case MOD_LOAD:
1197 		error = sgx_load();
1198 		break;
1199 	case MOD_UNLOAD:
1200 		error = sgx_unload();
1201 		break;
1202 	default:
1203 		error = 0;
1204 		break;
1205 	}
1206 
1207 	return (error);
1208 }
1209 
1210 static moduledata_t sgx_kmod = {
1211 	"sgx",
1212 	sgx_handler,
1213 	NULL
1214 };
1215 
1216 DECLARE_MODULE(sgx, sgx_kmod, SI_SUB_LAST, SI_ORDER_ANY);
1217 MODULE_VERSION(sgx, 1);
1218