xref: /linux/arch/x86/kernel/cpu/sgx/encl.c (revision 7fc2cd2e4b398c57c9cf961cfea05eadbf34c05c)
1 // SPDX-License-Identifier: GPL-2.0
2 /*  Copyright(c) 2016-20 Intel Corporation. */
3 
4 #include <linux/lockdep.h>
5 #include <linux/mm.h>
6 #include <linux/mman.h>
7 #include <linux/shmem_fs.h>
8 #include <linux/suspend.h>
9 #include <linux/sched/mm.h>
10 #include <asm/sgx.h>
11 #include "encl.h"
12 #include "encls.h"
13 #include "sgx.h"
14 
15 static int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
16 			    struct sgx_backing *backing);
17 
18 #define PCMDS_PER_PAGE (PAGE_SIZE / sizeof(struct sgx_pcmd))
19 /*
20  * 32 PCMD entries share a PCMD page. PCMD_FIRST_MASK is used to
21  * determine the page index associated with the first PCMD entry
22  * within a PCMD page.
23  */
24 #define PCMD_FIRST_MASK GENMASK(4, 0)
25 
26 /**
27  * reclaimer_writing_to_pcmd() - Query if any enclave page associated with
28  *                               a PCMD page is in process of being reclaimed.
29  * @encl:        Enclave to which PCMD page belongs
30  * @start_addr:  Address of enclave page using first entry within the PCMD page
31  *
32  * When an enclave page is reclaimed some Paging Crypto MetaData (PCMD) is
33  * stored. The PCMD data of a reclaimed enclave page contains enough
34  * information for the processor to verify the page at the time
35  * it is loaded back into the Enclave Page Cache (EPC).
36  *
37  * The backing storage to which enclave pages are reclaimed is laid out as
38  * follows:
39  * Encrypted enclave pages:SECS page:PCMD pages
40  *
41  * Each PCMD page contains the PCMD metadata of
42  * PAGE_SIZE/sizeof(struct sgx_pcmd) enclave pages.
43  *
44  * A PCMD page can only be truncated if it is (a) empty, and (b) not in the
45  * process of getting data (and thus soon being non-empty). (b) is tested with
46  * a check if an enclave page sharing the PCMD page is in the process of being
47  * reclaimed.
48  *
49  * The reclaimer sets the SGX_ENCL_PAGE_BEING_RECLAIMED flag when it
50  * intends to reclaim that enclave page - it means that the PCMD page
51  * associated with that enclave page is about to get some data and thus
52  * even if the PCMD page is empty, it should not be truncated.
53  *
54  * Context: Enclave mutex (&sgx_encl->lock) must be held.
55  * Return: 1 if the reclaimer is about to write to the PCMD page
56  *         0 if the reclaimer has no intention to write to the PCMD page
57  */
58 static int reclaimer_writing_to_pcmd(struct sgx_encl *encl,
59 				     unsigned long start_addr)
60 {
61 	int reclaimed = 0;
62 	int i;
63 
64 	/*
65 	 * PCMD_FIRST_MASK is based on number of PCMD entries within
66 	 * PCMD page being 32.
67 	 */
68 	BUILD_BUG_ON(PCMDS_PER_PAGE != 32);
69 
70 	for (i = 0; i < PCMDS_PER_PAGE; i++) {
71 		struct sgx_encl_page *entry;
72 		unsigned long addr;
73 
74 		addr = start_addr + i * PAGE_SIZE;
75 
76 		/*
77 		 * Stop when reaching the SECS page - it does not
78 		 * have a page_array entry and its reclaim is
79 		 * started and completed with enclave mutex held so
80 		 * it does not use the SGX_ENCL_PAGE_BEING_RECLAIMED
81 		 * flag.
82 		 */
83 		if (addr == encl->base + encl->size)
84 			break;
85 
86 		entry = xa_load(&encl->page_array, PFN_DOWN(addr));
87 		if (!entry)
88 			continue;
89 
90 		/*
91 		 * VA page slot ID uses same bit as the flag so it is important
92 		 * to ensure that the page is not already in backing store.
93 		 */
94 		if (entry->epc_page &&
95 		    (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)) {
96 			reclaimed = 1;
97 			break;
98 		}
99 	}
100 
101 	return reclaimed;
102 }
103 
104 /*
105  * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's
106  * follow right after the EPC data in the backing storage. In addition to the
107  * visible enclave pages, there's one extra page slot for SECS, before PCMD
108  * structs.
109  */
110 static inline pgoff_t sgx_encl_get_backing_page_pcmd_offset(struct sgx_encl *encl,
111 							    unsigned long page_index)
112 {
113 	pgoff_t epc_end_off = encl->size + sizeof(struct sgx_secs);
114 
115 	return epc_end_off + page_index * sizeof(struct sgx_pcmd);
116 }
117 
118 /*
119  * Free a page from the backing storage in the given page index.
120  */
121 static inline void sgx_encl_truncate_backing_page(struct sgx_encl *encl, unsigned long page_index)
122 {
123 	struct inode *inode = file_inode(encl->backing);
124 
125 	shmem_truncate_range(inode, PFN_PHYS(page_index), PFN_PHYS(page_index) + PAGE_SIZE - 1);
126 }
127 
128 /*
129  * ELDU: Load an EPC page as unblocked. For more info, see "OS Management of EPC
130  * Pages" in the SDM.
131  */
132 static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
133 			   struct sgx_epc_page *epc_page,
134 			   struct sgx_epc_page *secs_page)
135 {
136 	unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK;
137 	struct sgx_encl *encl = encl_page->encl;
138 	pgoff_t page_index, page_pcmd_off;
139 	unsigned long pcmd_first_page;
140 	struct sgx_pageinfo pginfo;
141 	struct sgx_backing b;
142 	bool pcmd_page_empty;
143 	u8 *pcmd_page;
144 	int ret;
145 
146 	if (secs_page)
147 		page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base);
148 	else
149 		page_index = PFN_DOWN(encl->size);
150 
151 	/*
152 	 * Address of enclave page using the first entry within the PCMD page.
153 	 */
154 	pcmd_first_page = PFN_PHYS(page_index & ~PCMD_FIRST_MASK) + encl->base;
155 
156 	page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
157 
158 	ret = sgx_encl_lookup_backing(encl, page_index, &b);
159 	if (ret)
160 		return ret;
161 
162 	pginfo.addr = encl_page->desc & PAGE_MASK;
163 	pginfo.contents = (unsigned long)kmap_local_page(b.contents);
164 	pcmd_page = kmap_local_page(b.pcmd);
165 	pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset;
166 
167 	if (secs_page)
168 		pginfo.secs = (u64)sgx_get_epc_virt_addr(secs_page);
169 	else
170 		pginfo.secs = 0;
171 
172 	ret = __eldu(&pginfo, sgx_get_epc_virt_addr(epc_page),
173 		     sgx_get_epc_virt_addr(encl_page->va_page->epc_page) + va_offset);
174 	if (ret) {
175 		if (encls_failed(ret))
176 			ENCLS_WARN(ret, "ELDU");
177 
178 		ret = -EFAULT;
179 	}
180 
181 	memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd));
182 	set_page_dirty(b.pcmd);
183 
184 	/*
185 	 * The area for the PCMD in the page was zeroed above.  Check if the
186 	 * whole page is now empty meaning that all PCMD's have been zeroed:
187 	 */
188 	pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE);
189 
190 	kunmap_local(pcmd_page);
191 	kunmap_local((void *)(unsigned long)pginfo.contents);
192 
193 	get_page(b.pcmd);
194 	sgx_encl_put_backing(&b);
195 
196 	sgx_encl_truncate_backing_page(encl, page_index);
197 
198 	if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) {
199 		sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
200 		pcmd_page = kmap_local_page(b.pcmd);
201 		if (memchr_inv(pcmd_page, 0, PAGE_SIZE))
202 			pr_warn("PCMD page not empty after truncate.\n");
203 		kunmap_local(pcmd_page);
204 	}
205 
206 	put_page(b.pcmd);
207 
208 	return ret;
209 }
210 
211 static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
212 					  struct sgx_epc_page *secs_page)
213 {
214 
215 	unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK;
216 	struct sgx_encl *encl = encl_page->encl;
217 	struct sgx_epc_page *epc_page;
218 	int ret;
219 
220 	epc_page = sgx_alloc_epc_page(encl_page, false);
221 	if (IS_ERR(epc_page))
222 		return epc_page;
223 
224 	ret = __sgx_encl_eldu(encl_page, epc_page, secs_page);
225 	if (ret) {
226 		sgx_encl_free_epc_page(epc_page);
227 		return ERR_PTR(ret);
228 	}
229 
230 	sgx_free_va_slot(encl_page->va_page, va_offset);
231 	list_move(&encl_page->va_page->list, &encl->va_pages);
232 	encl_page->desc &= ~SGX_ENCL_PAGE_VA_OFFSET_MASK;
233 	encl_page->epc_page = epc_page;
234 
235 	return epc_page;
236 }
237 
238 /*
239  * Ensure the SECS page is not swapped out.  Must be called with encl->lock
240  * to protect the enclave states including SECS and ensure the SECS page is
241  * not swapped out again while being used.
242  */
243 static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl)
244 {
245 	struct sgx_epc_page *epc_page = encl->secs.epc_page;
246 
247 	if (!epc_page)
248 		epc_page = sgx_encl_eldu(&encl->secs, NULL);
249 
250 	return epc_page;
251 }
252 
253 static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
254 						  struct sgx_encl_page *entry)
255 {
256 	struct sgx_epc_page *epc_page;
257 
258 	/* Entry successfully located. */
259 	if (entry->epc_page) {
260 		if (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)
261 			return ERR_PTR(-EBUSY);
262 
263 		return entry;
264 	}
265 
266 	epc_page = sgx_encl_load_secs(encl);
267 	if (IS_ERR(epc_page))
268 		return ERR_CAST(epc_page);
269 
270 	epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
271 	if (IS_ERR(epc_page))
272 		return ERR_CAST(epc_page);
273 
274 	encl->secs_child_cnt++;
275 	sgx_mark_page_reclaimable(entry->epc_page);
276 
277 	return entry;
278 }
279 
280 static struct sgx_encl_page *sgx_encl_load_page_in_vma(struct sgx_encl *encl,
281 						       unsigned long addr,
282 						       vm_flags_t vm_flags)
283 {
284 	unsigned long vm_prot_bits = vm_flags & VM_ACCESS_FLAGS;
285 	struct sgx_encl_page *entry;
286 
287 	entry = xa_load(&encl->page_array, PFN_DOWN(addr));
288 	if (!entry)
289 		return ERR_PTR(-EFAULT);
290 
291 	/*
292 	 * Verify that the page has equal or higher build time
293 	 * permissions than the VMA permissions (i.e. the subset of {VM_READ,
294 	 * VM_WRITE, VM_EXECUTE} in vma->vm_flags).
295 	 */
296 	if ((entry->vm_max_prot_bits & vm_prot_bits) != vm_prot_bits)
297 		return ERR_PTR(-EFAULT);
298 
299 	return __sgx_encl_load_page(encl, entry);
300 }
301 
302 struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
303 					 unsigned long addr)
304 {
305 	struct sgx_encl_page *entry;
306 
307 	entry = xa_load(&encl->page_array, PFN_DOWN(addr));
308 	if (!entry)
309 		return ERR_PTR(-EFAULT);
310 
311 	return __sgx_encl_load_page(encl, entry);
312 }
313 
314 /**
315  * sgx_encl_eaug_page() - Dynamically add page to initialized enclave
316  * @vma:	VMA obtained from fault info from where page is accessed
317  * @encl:	enclave accessing the page
318  * @addr:	address that triggered the page fault
319  *
320  * When an initialized enclave accesses a page with no backing EPC page
321  * on a SGX2 system then the EPC can be added dynamically via the SGX2
322  * ENCLS[EAUG] instruction.
323  *
324  * Returns: Appropriate vm_fault_t: VM_FAULT_NOPAGE when PTE was installed
325  * successfully, VM_FAULT_SIGBUS or VM_FAULT_OOM as error otherwise.
326  */
327 static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
328 				     struct sgx_encl *encl, unsigned long addr)
329 {
330 	vm_fault_t vmret = VM_FAULT_SIGBUS;
331 	struct sgx_pageinfo pginfo = {0};
332 	struct sgx_encl_page *encl_page;
333 	struct sgx_epc_page *epc_page;
334 	struct sgx_va_page *va_page;
335 	unsigned long phys_addr;
336 	u64 secinfo_flags;
337 	int ret;
338 
339 	if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
340 		return VM_FAULT_SIGBUS;
341 
342 	/*
343 	 * Ignore internal permission checking for dynamically added pages.
344 	 * They matter only for data added during the pre-initialization
345 	 * phase. The enclave decides the permissions by the means of
346 	 * EACCEPT, EACCEPTCOPY and EMODPE.
347 	 */
348 	secinfo_flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
349 	encl_page = sgx_encl_page_alloc(encl, addr - encl->base, secinfo_flags);
350 	if (IS_ERR(encl_page))
351 		return VM_FAULT_OOM;
352 
353 	mutex_lock(&encl->lock);
354 
355 	epc_page = sgx_encl_load_secs(encl);
356 	if (IS_ERR(epc_page)) {
357 		if (PTR_ERR(epc_page) == -EBUSY)
358 			vmret = VM_FAULT_NOPAGE;
359 		goto err_out_unlock;
360 	}
361 
362 	epc_page = sgx_alloc_epc_page(encl_page, false);
363 	if (IS_ERR(epc_page)) {
364 		if (PTR_ERR(epc_page) == -EBUSY)
365 			vmret =  VM_FAULT_NOPAGE;
366 		goto err_out_unlock;
367 	}
368 
369 	va_page = sgx_encl_grow(encl, false);
370 	if (IS_ERR(va_page)) {
371 		if (PTR_ERR(va_page) == -EBUSY)
372 			vmret = VM_FAULT_NOPAGE;
373 		goto err_out_epc;
374 	}
375 
376 	if (va_page)
377 		list_add(&va_page->list, &encl->va_pages);
378 
379 	ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
380 			encl_page, GFP_KERNEL);
381 	/*
382 	 * If ret == -EBUSY then page was created in another flow while
383 	 * running without encl->lock
384 	 */
385 	if (ret)
386 		goto err_out_shrink;
387 
388 	pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
389 	pginfo.addr = encl_page->desc & PAGE_MASK;
390 	pginfo.metadata = 0;
391 
392 	ret = __eaug(&pginfo, sgx_get_epc_virt_addr(epc_page));
393 	if (ret)
394 		goto err_out;
395 
396 	encl_page->encl = encl;
397 	encl_page->epc_page = epc_page;
398 	encl_page->type = SGX_PAGE_TYPE_REG;
399 	encl->secs_child_cnt++;
400 
401 	sgx_mark_page_reclaimable(encl_page->epc_page);
402 
403 	phys_addr = sgx_get_epc_phys_addr(epc_page);
404 	/*
405 	 * Do not undo everything when creating PTE entry fails - next #PF
406 	 * would find page ready for a PTE.
407 	 */
408 	vmret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr));
409 	if (vmret != VM_FAULT_NOPAGE) {
410 		mutex_unlock(&encl->lock);
411 		return VM_FAULT_SIGBUS;
412 	}
413 	mutex_unlock(&encl->lock);
414 	return VM_FAULT_NOPAGE;
415 
416 err_out:
417 	xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
418 
419 err_out_shrink:
420 	sgx_encl_shrink(encl, va_page);
421 err_out_epc:
422 	sgx_encl_free_epc_page(epc_page);
423 err_out_unlock:
424 	mutex_unlock(&encl->lock);
425 	kfree(encl_page);
426 
427 	return vmret;
428 }
429 
430 static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
431 {
432 	unsigned long addr = (unsigned long)vmf->address;
433 	struct vm_area_struct *vma = vmf->vma;
434 	struct sgx_encl_page *entry;
435 	unsigned long phys_addr;
436 	struct sgx_encl *encl;
437 	vm_fault_t ret;
438 
439 	encl = vma->vm_private_data;
440 
441 	/*
442 	 * It's very unlikely but possible that allocating memory for the
443 	 * mm_list entry of a forked process failed in sgx_vma_open(). When
444 	 * this happens, vm_private_data is set to NULL.
445 	 */
446 	if (unlikely(!encl))
447 		return VM_FAULT_SIGBUS;
448 
449 	/*
450 	 * The page_array keeps track of all enclave pages, whether they
451 	 * are swapped out or not. If there is no entry for this page and
452 	 * the system supports SGX2 then it is possible to dynamically add
453 	 * a new enclave page. This is only possible for an initialized
454 	 * enclave that will be checked for right away.
455 	 */
456 	if (cpu_feature_enabled(X86_FEATURE_SGX2) &&
457 	    (!xa_load(&encl->page_array, PFN_DOWN(addr))))
458 		return sgx_encl_eaug_page(vma, encl, addr);
459 
460 	mutex_lock(&encl->lock);
461 
462 	entry = sgx_encl_load_page_in_vma(encl, addr, vma->vm_flags);
463 	if (IS_ERR(entry)) {
464 		mutex_unlock(&encl->lock);
465 
466 		if (PTR_ERR(entry) == -EBUSY)
467 			return VM_FAULT_NOPAGE;
468 
469 		return VM_FAULT_SIGBUS;
470 	}
471 
472 	phys_addr = sgx_get_epc_phys_addr(entry->epc_page);
473 
474 	ret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr));
475 	if (ret != VM_FAULT_NOPAGE) {
476 		mutex_unlock(&encl->lock);
477 
478 		return VM_FAULT_SIGBUS;
479 	}
480 
481 	sgx_encl_test_and_clear_young(vma->vm_mm, entry);
482 	mutex_unlock(&encl->lock);
483 
484 	return VM_FAULT_NOPAGE;
485 }
486 
487 static void sgx_vma_open(struct vm_area_struct *vma)
488 {
489 	struct sgx_encl *encl = vma->vm_private_data;
490 
491 	/*
492 	 * It's possible but unlikely that vm_private_data is NULL. This can
493 	 * happen in a grandchild of a process, when sgx_encl_mm_add() had
494 	 * failed to allocate memory in this callback.
495 	 */
496 	if (unlikely(!encl))
497 		return;
498 
499 	if (sgx_encl_mm_add(encl, vma->vm_mm))
500 		vma->vm_private_data = NULL;
501 }
502 
503 
504 /**
505  * sgx_encl_may_map() - Check if a requested VMA mapping is allowed
506  * @encl:		an enclave pointer
507  * @start:		lower bound of the address range, inclusive
508  * @end:		upper bound of the address range, exclusive
509  * @vm_flags:		VMA flags
510  *
511  * Iterate through the enclave pages contained within [@start, @end) to verify
512  * that the permissions requested by a subset of {VM_READ, VM_WRITE, VM_EXEC}
513  * do not contain any permissions that are not contained in the build time
514  * permissions of any of the enclave pages within the given address range.
515  *
516  * An enclave creator must declare the strongest permissions that will be
517  * needed for each enclave page. This ensures that mappings have the identical
518  * or weaker permissions than the earlier declared permissions.
519  *
520  * Return: 0 on success, -EACCES otherwise
521  */
522 int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
523 		     unsigned long end, vm_flags_t vm_flags)
524 {
525 	vm_flags_t vm_prot_bits = vm_flags & VM_ACCESS_FLAGS;
526 	struct sgx_encl_page *page;
527 	unsigned long count = 0;
528 	int ret = 0;
529 
530 	XA_STATE(xas, &encl->page_array, PFN_DOWN(start));
531 
532 	/* Disallow mapping outside enclave's address range. */
533 	if (test_bit(SGX_ENCL_INITIALIZED, &encl->flags) &&
534 	    (start < encl->base || end > encl->base + encl->size))
535 		return -EACCES;
536 
537 	/*
538 	 * Disallow READ_IMPLIES_EXEC tasks as their VMA permissions might
539 	 * conflict with the enclave page permissions.
540 	 */
541 	if (current->personality & READ_IMPLIES_EXEC)
542 		return -EACCES;
543 
544 	mutex_lock(&encl->lock);
545 	xas_lock(&xas);
546 	xas_for_each(&xas, page, PFN_DOWN(end - 1)) {
547 		if (~page->vm_max_prot_bits & vm_prot_bits) {
548 			ret = -EACCES;
549 			break;
550 		}
551 
552 		/* Reschedule on every XA_CHECK_SCHED iteration. */
553 		if (!(++count % XA_CHECK_SCHED)) {
554 			xas_pause(&xas);
555 			xas_unlock(&xas);
556 			mutex_unlock(&encl->lock);
557 
558 			cond_resched();
559 
560 			mutex_lock(&encl->lock);
561 			xas_lock(&xas);
562 		}
563 	}
564 	xas_unlock(&xas);
565 	mutex_unlock(&encl->lock);
566 
567 	return ret;
568 }
569 
570 static int sgx_vma_mprotect(struct vm_area_struct *vma, unsigned long start,
571 			    unsigned long end, unsigned long newflags)
572 {
573 	return sgx_encl_may_map(vma->vm_private_data, start, end, newflags);
574 }
575 
576 static int sgx_encl_debug_read(struct sgx_encl *encl, struct sgx_encl_page *page,
577 			       unsigned long addr, void *data)
578 {
579 	unsigned long offset = addr & ~PAGE_MASK;
580 	int ret;
581 
582 
583 	ret = __edbgrd(sgx_get_epc_virt_addr(page->epc_page) + offset, data);
584 	if (ret)
585 		return -EIO;
586 
587 	return 0;
588 }
589 
590 static int sgx_encl_debug_write(struct sgx_encl *encl, struct sgx_encl_page *page,
591 				unsigned long addr, void *data)
592 {
593 	unsigned long offset = addr & ~PAGE_MASK;
594 	int ret;
595 
596 	ret = __edbgwr(sgx_get_epc_virt_addr(page->epc_page) + offset, data);
597 	if (ret)
598 		return -EIO;
599 
600 	return 0;
601 }
602 
603 /*
604  * Load an enclave page to EPC if required, and take encl->lock.
605  */
606 static struct sgx_encl_page *sgx_encl_reserve_page(struct sgx_encl *encl,
607 						   unsigned long addr,
608 						   vm_flags_t vm_flags)
609 {
610 	struct sgx_encl_page *entry;
611 
612 	for ( ; ; ) {
613 		mutex_lock(&encl->lock);
614 
615 		entry = sgx_encl_load_page_in_vma(encl, addr, vm_flags);
616 		if (PTR_ERR(entry) != -EBUSY)
617 			break;
618 
619 		mutex_unlock(&encl->lock);
620 	}
621 
622 	if (IS_ERR(entry))
623 		mutex_unlock(&encl->lock);
624 
625 	return entry;
626 }
627 
628 static int sgx_vma_access(struct vm_area_struct *vma, unsigned long addr,
629 			  void *buf, int len, int write)
630 {
631 	struct sgx_encl *encl = vma->vm_private_data;
632 	struct sgx_encl_page *entry = NULL;
633 	char data[sizeof(unsigned long)];
634 	unsigned long align;
635 	int offset;
636 	int cnt;
637 	int ret = 0;
638 	int i;
639 
640 	/*
641 	 * If process was forked, VMA is still there but vm_private_data is set
642 	 * to NULL.
643 	 */
644 	if (!encl)
645 		return -EFAULT;
646 
647 	if (!test_bit(SGX_ENCL_DEBUG, &encl->flags))
648 		return -EFAULT;
649 
650 	for (i = 0; i < len; i += cnt) {
651 		entry = sgx_encl_reserve_page(encl, (addr + i) & PAGE_MASK,
652 					      vma->vm_flags);
653 		if (IS_ERR(entry)) {
654 			ret = PTR_ERR(entry);
655 			break;
656 		}
657 
658 		align = ALIGN_DOWN(addr + i, sizeof(unsigned long));
659 		offset = (addr + i) & (sizeof(unsigned long) - 1);
660 		cnt = sizeof(unsigned long) - offset;
661 		cnt = min(cnt, len - i);
662 
663 		ret = sgx_encl_debug_read(encl, entry, align, data);
664 		if (ret)
665 			goto out;
666 
667 		if (write) {
668 			memcpy(data + offset, buf + i, cnt);
669 			ret = sgx_encl_debug_write(encl, entry, align, data);
670 			if (ret)
671 				goto out;
672 		} else {
673 			memcpy(buf + i, data + offset, cnt);
674 		}
675 
676 out:
677 		mutex_unlock(&encl->lock);
678 
679 		if (ret)
680 			break;
681 	}
682 
683 	return ret < 0 ? ret : i;
684 }
685 
686 const struct vm_operations_struct sgx_vm_ops = {
687 	.fault = sgx_vma_fault,
688 	.mprotect = sgx_vma_mprotect,
689 	.open = sgx_vma_open,
690 	.access = sgx_vma_access,
691 };
692 
693 /**
694  * sgx_encl_release - Destroy an enclave instance
695  * @ref:	address of a kref inside &sgx_encl
696  *
697  * Used together with kref_put(). Frees all the resources associated with the
698  * enclave and the instance itself.
699  */
700 void sgx_encl_release(struct kref *ref)
701 {
702 	struct sgx_encl *encl = container_of(ref, struct sgx_encl, refcount);
703 	unsigned long max_page_index = PFN_DOWN(encl->base + encl->size - 1);
704 	struct sgx_va_page *va_page;
705 	struct sgx_encl_page *entry;
706 	unsigned long count = 0;
707 
708 	XA_STATE(xas, &encl->page_array, PFN_DOWN(encl->base));
709 
710 	xas_lock(&xas);
711 	xas_for_each(&xas, entry, max_page_index) {
712 		if (entry->epc_page) {
713 			/*
714 			 * The page and its radix tree entry cannot be freed
715 			 * if the page is being held by the reclaimer.
716 			 */
717 			if (sgx_unmark_page_reclaimable(entry->epc_page))
718 				continue;
719 
720 			sgx_encl_free_epc_page(entry->epc_page);
721 			encl->secs_child_cnt--;
722 			entry->epc_page = NULL;
723 		}
724 
725 		kfree(entry);
726 		/*
727 		 * Invoke scheduler on every XA_CHECK_SCHED iteration
728 		 * to prevent soft lockups.
729 		 */
730 		if (!(++count % XA_CHECK_SCHED)) {
731 			xas_pause(&xas);
732 			xas_unlock(&xas);
733 
734 			cond_resched();
735 
736 			xas_lock(&xas);
737 		}
738 	}
739 	xas_unlock(&xas);
740 
741 	xa_destroy(&encl->page_array);
742 
743 	if (!encl->secs_child_cnt && encl->secs.epc_page) {
744 		sgx_encl_free_epc_page(encl->secs.epc_page);
745 		encl->secs.epc_page = NULL;
746 	}
747 
748 	while (!list_empty(&encl->va_pages)) {
749 		va_page = list_first_entry(&encl->va_pages, struct sgx_va_page,
750 					   list);
751 		list_del(&va_page->list);
752 		sgx_encl_free_epc_page(va_page->epc_page);
753 		kfree(va_page);
754 	}
755 
756 	if (encl->backing)
757 		fput(encl->backing);
758 
759 	cleanup_srcu_struct(&encl->srcu);
760 
761 	WARN_ON_ONCE(!list_empty(&encl->mm_list));
762 
763 	/* Detect EPC page leak's. */
764 	WARN_ON_ONCE(encl->secs_child_cnt);
765 	WARN_ON_ONCE(encl->secs.epc_page);
766 
767 	kfree(encl);
768 	sgx_dec_usage_count();
769 }
770 
771 /*
772  * 'mm' is exiting and no longer needs mmu notifications.
773  */
774 static void sgx_mmu_notifier_release(struct mmu_notifier *mn,
775 				     struct mm_struct *mm)
776 {
777 	struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier);
778 	struct sgx_encl_mm *tmp = NULL;
779 	bool found = false;
780 
781 	/*
782 	 * The enclave itself can remove encl_mm.  Note, objects can't be moved
783 	 * off an RCU protected list, but deletion is ok.
784 	 */
785 	spin_lock(&encl_mm->encl->mm_lock);
786 	list_for_each_entry(tmp, &encl_mm->encl->mm_list, list) {
787 		if (tmp == encl_mm) {
788 			list_del_rcu(&encl_mm->list);
789 			found = true;
790 			break;
791 		}
792 	}
793 	spin_unlock(&encl_mm->encl->mm_lock);
794 
795 	if (found) {
796 		synchronize_srcu(&encl_mm->encl->srcu);
797 		mmu_notifier_put(mn);
798 	}
799 }
800 
801 static void sgx_mmu_notifier_free(struct mmu_notifier *mn)
802 {
803 	struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier);
804 
805 	/* 'encl_mm' is going away, put encl_mm->encl reference: */
806 	kref_put(&encl_mm->encl->refcount, sgx_encl_release);
807 
808 	kfree(encl_mm);
809 }
810 
811 static const struct mmu_notifier_ops sgx_mmu_notifier_ops = {
812 	.release		= sgx_mmu_notifier_release,
813 	.free_notifier		= sgx_mmu_notifier_free,
814 };
815 
816 static struct sgx_encl_mm *sgx_encl_find_mm(struct sgx_encl *encl,
817 					    struct mm_struct *mm)
818 {
819 	struct sgx_encl_mm *encl_mm = NULL;
820 	struct sgx_encl_mm *tmp;
821 	int idx;
822 
823 	idx = srcu_read_lock(&encl->srcu);
824 
825 	list_for_each_entry_rcu(tmp, &encl->mm_list, list) {
826 		if (tmp->mm == mm) {
827 			encl_mm = tmp;
828 			break;
829 		}
830 	}
831 
832 	srcu_read_unlock(&encl->srcu, idx);
833 
834 	return encl_mm;
835 }
836 
837 int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
838 {
839 	struct sgx_encl_mm *encl_mm;
840 	int ret;
841 
842 	/*
843 	 * Even though a single enclave may be mapped into an mm more than once,
844 	 * each 'mm' only appears once on encl->mm_list. This is guaranteed by
845 	 * holding the mm's mmap lock for write before an mm can be added or
846 	 * remove to an encl->mm_list.
847 	 */
848 	mmap_assert_write_locked(mm);
849 
850 	/*
851 	 * It's possible that an entry already exists in the mm_list, because it
852 	 * is removed only on VFS release or process exit.
853 	 */
854 	if (sgx_encl_find_mm(encl, mm))
855 		return 0;
856 
857 	encl_mm = kzalloc(sizeof(*encl_mm), GFP_KERNEL);
858 	if (!encl_mm)
859 		return -ENOMEM;
860 
861 	/* Grab a refcount for the encl_mm->encl reference: */
862 	kref_get(&encl->refcount);
863 	encl_mm->encl = encl;
864 	encl_mm->mm = mm;
865 	encl_mm->mmu_notifier.ops = &sgx_mmu_notifier_ops;
866 
867 	ret = __mmu_notifier_register(&encl_mm->mmu_notifier, mm);
868 	if (ret) {
869 		kfree(encl_mm);
870 		return ret;
871 	}
872 
873 	spin_lock(&encl->mm_lock);
874 	list_add_rcu(&encl_mm->list, &encl->mm_list);
875 	/* Pairs with smp_rmb() in sgx_zap_enclave_ptes(). */
876 	smp_wmb();
877 	encl->mm_list_version++;
878 	spin_unlock(&encl->mm_lock);
879 
880 	return 0;
881 }
882 
883 /**
884  * sgx_encl_cpumask() - Query which CPUs might be accessing the enclave
885  * @encl: the enclave
886  *
887  * Some SGX functions require that no cached linear-to-physical address
888  * mappings are present before they can succeed. For example, ENCLS[EWB]
889  * copies a page from the enclave page cache to regular main memory but
890  * it fails if it cannot ensure that there are no cached
891  * linear-to-physical address mappings referring to the page.
892  *
893  * SGX hardware flushes all cached linear-to-physical mappings on a CPU
894  * when an enclave is exited via ENCLU[EEXIT] or an Asynchronous Enclave
895  * Exit (AEX). Exiting an enclave will thus ensure cached linear-to-physical
896  * address mappings are cleared but coordination with the tracking done within
897  * the SGX hardware is needed to support the SGX functions that depend on this
898  * cache clearing.
899  *
900  * When the ENCLS[ETRACK] function is issued on an enclave the hardware
901  * tracks threads operating inside the enclave at that time. The SGX
902  * hardware tracking require that all the identified threads must have
903  * exited the enclave in order to flush the mappings before a function such
904  * as ENCLS[EWB] will be permitted
905  *
906  * The following flow is used to support SGX functions that require that
907  * no cached linear-to-physical address mappings are present:
908  * 1) Execute ENCLS[ETRACK] to initiate hardware tracking.
909  * 2) Use this function (sgx_encl_cpumask()) to query which CPUs might be
910  *    accessing the enclave.
911  * 3) Send IPI to identified CPUs, kicking them out of the enclave and
912  *    thus flushing all locally cached linear-to-physical address mappings.
913  * 4) Execute SGX function.
914  *
915  * Context: It is required to call this function after ENCLS[ETRACK].
916  *          This will ensure that if any new mm appears (racing with
917  *          sgx_encl_mm_add()) then the new mm will enter into the
918  *          enclave with fresh linear-to-physical address mappings.
919  *
920  *          It is required that all IPIs are completed before a new
921  *          ENCLS[ETRACK] is issued so be sure to protect steps 1 to 3
922  *          of the above flow with the enclave's mutex.
923  *
924  * Return: cpumask of CPUs that might be accessing @encl
925  */
926 const cpumask_t *sgx_encl_cpumask(struct sgx_encl *encl)
927 {
928 	cpumask_t *cpumask = &encl->cpumask;
929 	struct sgx_encl_mm *encl_mm;
930 	int idx;
931 
932 	cpumask_clear(cpumask);
933 
934 	idx = srcu_read_lock(&encl->srcu);
935 
936 	list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
937 		if (!mmget_not_zero(encl_mm->mm))
938 			continue;
939 
940 		cpumask_or(cpumask, cpumask, mm_cpumask(encl_mm->mm));
941 
942 		mmput_async(encl_mm->mm);
943 	}
944 
945 	srcu_read_unlock(&encl->srcu, idx);
946 
947 	return cpumask;
948 }
949 
950 static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl,
951 					      pgoff_t index)
952 {
953 	struct address_space *mapping = encl->backing->f_mapping;
954 	gfp_t gfpmask = mapping_gfp_mask(mapping);
955 
956 	return shmem_read_mapping_page_gfp(mapping, index, gfpmask);
957 }
958 
959 /**
960  * __sgx_encl_get_backing() - Pin the backing storage
961  * @encl:	an enclave pointer
962  * @page_index:	enclave page index
963  * @backing:	data for accessing backing storage for the page
964  *
965  * Pin the backing storage pages for storing the encrypted contents and Paging
966  * Crypto MetaData (PCMD) of an enclave page.
967  *
968  * Return:
969  *   0 on success,
970  *   -errno otherwise.
971  */
972 static int __sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
973 			 struct sgx_backing *backing)
974 {
975 	pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
976 	struct page *contents;
977 	struct page *pcmd;
978 
979 	contents = sgx_encl_get_backing_page(encl, page_index);
980 	if (IS_ERR(contents))
981 		return PTR_ERR(contents);
982 
983 	pcmd = sgx_encl_get_backing_page(encl, PFN_DOWN(page_pcmd_off));
984 	if (IS_ERR(pcmd)) {
985 		put_page(contents);
986 		return PTR_ERR(pcmd);
987 	}
988 
989 	backing->contents = contents;
990 	backing->pcmd = pcmd;
991 	backing->pcmd_offset = page_pcmd_off & (PAGE_SIZE - 1);
992 
993 	return 0;
994 }
995 
996 /*
997  * When called from ksgxd, returns the mem_cgroup of a struct mm stored
998  * in the enclave's mm_list. When not called from ksgxd, just returns
999  * the mem_cgroup of the current task.
1000  */
1001 static struct mem_cgroup *sgx_encl_get_mem_cgroup(struct sgx_encl *encl)
1002 {
1003 	struct mem_cgroup *memcg = NULL;
1004 	struct sgx_encl_mm *encl_mm;
1005 	int idx;
1006 
1007 	/*
1008 	 * If called from normal task context, return the mem_cgroup
1009 	 * of the current task's mm. The remainder of the handling is for
1010 	 * ksgxd.
1011 	 */
1012 	if (!current_is_ksgxd())
1013 		return get_mem_cgroup_from_mm(current->mm);
1014 
1015 	/*
1016 	 * Search the enclave's mm_list to find an mm associated with
1017 	 * this enclave to charge the allocation to.
1018 	 */
1019 	idx = srcu_read_lock(&encl->srcu);
1020 
1021 	list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
1022 		if (!mmget_not_zero(encl_mm->mm))
1023 			continue;
1024 
1025 		memcg = get_mem_cgroup_from_mm(encl_mm->mm);
1026 
1027 		mmput_async(encl_mm->mm);
1028 
1029 		break;
1030 	}
1031 
1032 	srcu_read_unlock(&encl->srcu, idx);
1033 
1034 	/*
1035 	 * In the rare case that there isn't an mm associated with
1036 	 * the enclave, set memcg to the current active mem_cgroup.
1037 	 * This will be the root mem_cgroup if there is no active
1038 	 * mem_cgroup.
1039 	 */
1040 	if (!memcg)
1041 		return get_mem_cgroup_from_mm(NULL);
1042 
1043 	return memcg;
1044 }
1045 
1046 /**
1047  * sgx_encl_alloc_backing() - create a new backing storage page
1048  * @encl:	an enclave pointer
1049  * @page_index:	enclave page index
1050  * @backing:	data for accessing backing storage for the page
1051  *
1052  * When called from ksgxd, sets the active memcg from one of the
1053  * mms in the enclave's mm_list prior to any backing page allocation,
1054  * in order to ensure that shmem page allocations are charged to the
1055  * enclave.  Create a backing page for loading data back into an EPC page with
1056  * ELDU.  This function takes a reference on a new backing page which
1057  * must be dropped with a corresponding call to sgx_encl_put_backing().
1058  *
1059  * Return:
1060  *   0 on success,
1061  *   -errno otherwise.
1062  */
1063 int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,
1064 			   struct sgx_backing *backing)
1065 {
1066 	struct mem_cgroup *encl_memcg = sgx_encl_get_mem_cgroup(encl);
1067 	struct mem_cgroup *memcg = set_active_memcg(encl_memcg);
1068 	int ret;
1069 
1070 	ret = __sgx_encl_get_backing(encl, page_index, backing);
1071 
1072 	set_active_memcg(memcg);
1073 	mem_cgroup_put(encl_memcg);
1074 
1075 	return ret;
1076 }
1077 
1078 /**
1079  * sgx_encl_lookup_backing() - retrieve an existing backing storage page
1080  * @encl:	an enclave pointer
1081  * @page_index:	enclave page index
1082  * @backing:	data for accessing backing storage for the page
1083  *
1084  * Retrieve a backing page for loading data back into an EPC page with ELDU.
1085  * It is the caller's responsibility to ensure that it is appropriate to use
1086  * sgx_encl_lookup_backing() rather than sgx_encl_alloc_backing(). If lookup is
1087  * not used correctly, this will cause an allocation which is not accounted for.
1088  * This function takes a reference on an existing backing page which must be
1089  * dropped with a corresponding call to sgx_encl_put_backing().
1090  *
1091  * Return:
1092  *   0 on success,
1093  *   -errno otherwise.
1094  */
1095 static int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
1096 			   struct sgx_backing *backing)
1097 {
1098 	return __sgx_encl_get_backing(encl, page_index, backing);
1099 }
1100 
1101 /**
1102  * sgx_encl_put_backing() - Unpin the backing storage
1103  * @backing:	data for accessing backing storage for the page
1104  */
1105 void sgx_encl_put_backing(struct sgx_backing *backing)
1106 {
1107 	put_page(backing->pcmd);
1108 	put_page(backing->contents);
1109 }
1110 
1111 static int sgx_encl_test_and_clear_young_cb(pte_t *ptep, unsigned long addr,
1112 					    void *data)
1113 {
1114 	pte_t pte;
1115 	int ret;
1116 
1117 	ret = pte_young(*ptep);
1118 	if (ret) {
1119 		pte = pte_mkold(*ptep);
1120 		set_pte_at((struct mm_struct *)data, addr, ptep, pte);
1121 	}
1122 
1123 	return ret;
1124 }
1125 
1126 /**
1127  * sgx_encl_test_and_clear_young() - Test and reset the accessed bit
1128  * @mm:		mm_struct that is checked
1129  * @page:	enclave page to be tested for recent access
1130  *
1131  * Checks the Access (A) bit from the PTE corresponding to the enclave page and
1132  * clears it.
1133  *
1134  * Return: 1 if the page has been recently accessed and 0 if not.
1135  */
1136 int sgx_encl_test_and_clear_young(struct mm_struct *mm,
1137 				  struct sgx_encl_page *page)
1138 {
1139 	unsigned long addr = page->desc & PAGE_MASK;
1140 	struct sgx_encl *encl = page->encl;
1141 	struct vm_area_struct *vma;
1142 	int ret;
1143 
1144 	ret = sgx_encl_find(mm, addr, &vma);
1145 	if (ret)
1146 		return 0;
1147 
1148 	if (encl != vma->vm_private_data)
1149 		return 0;
1150 
1151 	ret = apply_to_page_range(vma->vm_mm, addr, PAGE_SIZE,
1152 				  sgx_encl_test_and_clear_young_cb, vma->vm_mm);
1153 	if (ret < 0)
1154 		return 0;
1155 
1156 	return ret;
1157 }
1158 
1159 struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
1160 					  unsigned long offset,
1161 					  u64 secinfo_flags)
1162 {
1163 	struct sgx_encl_page *encl_page;
1164 	unsigned long prot;
1165 
1166 	encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
1167 	if (!encl_page)
1168 		return ERR_PTR(-ENOMEM);
1169 
1170 	encl_page->desc = encl->base + offset;
1171 	encl_page->encl = encl;
1172 
1173 	prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ)  |
1174 	       _calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) |
1175 	       _calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC);
1176 
1177 	/*
1178 	 * TCS pages must always RW set for CPU access while the SECINFO
1179 	 * permissions are *always* zero - the CPU ignores the user provided
1180 	 * values and silently overwrites them with zero permissions.
1181 	 */
1182 	if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS)
1183 		prot |= PROT_READ | PROT_WRITE;
1184 
1185 	/* Calculate maximum of the VM flags for the page. */
1186 	encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
1187 
1188 	return encl_page;
1189 }
1190 
1191 /**
1192  * sgx_zap_enclave_ptes() - remove PTEs mapping the address from enclave
1193  * @encl: the enclave
1194  * @addr: page aligned pointer to single page for which PTEs will be removed
1195  *
1196  * Multiple VMAs may have an enclave page mapped. Remove the PTE mapping
1197  * @addr from each VMA. Ensure that page fault handler is ready to handle
1198  * new mappings of @addr before calling this function.
1199  */
1200 void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr)
1201 {
1202 	unsigned long mm_list_version;
1203 	struct sgx_encl_mm *encl_mm;
1204 	struct vm_area_struct *vma;
1205 	int idx, ret;
1206 
1207 	do {
1208 		mm_list_version = encl->mm_list_version;
1209 
1210 		/* Pairs with smp_wmb() in sgx_encl_mm_add(). */
1211 		smp_rmb();
1212 
1213 		idx = srcu_read_lock(&encl->srcu);
1214 
1215 		list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
1216 			if (!mmget_not_zero(encl_mm->mm))
1217 				continue;
1218 
1219 			mmap_read_lock(encl_mm->mm);
1220 
1221 			ret = sgx_encl_find(encl_mm->mm, addr, &vma);
1222 			if (!ret && encl == vma->vm_private_data)
1223 				zap_vma_ptes(vma, addr, PAGE_SIZE);
1224 
1225 			mmap_read_unlock(encl_mm->mm);
1226 
1227 			mmput_async(encl_mm->mm);
1228 		}
1229 
1230 		srcu_read_unlock(&encl->srcu, idx);
1231 	} while (unlikely(encl->mm_list_version != mm_list_version));
1232 }
1233 
1234 /**
1235  * sgx_alloc_va_page() - Allocate a Version Array (VA) page
1236  * @reclaim: Reclaim EPC pages directly if none available. Enclave
1237  *           mutex should not be held if this is set.
1238  *
1239  * Allocate a free EPC page and convert it to a Version Array (VA) page.
1240  *
1241  * Return:
1242  *   a VA page,
1243  *   -errno otherwise
1244  */
1245 struct sgx_epc_page *sgx_alloc_va_page(bool reclaim)
1246 {
1247 	struct sgx_epc_page *epc_page;
1248 	int ret;
1249 
1250 	epc_page = sgx_alloc_epc_page(NULL, reclaim);
1251 	if (IS_ERR(epc_page))
1252 		return ERR_CAST(epc_page);
1253 
1254 	ret = __epa(sgx_get_epc_virt_addr(epc_page));
1255 	if (ret) {
1256 		WARN_ONCE(1, "EPA returned %d (0x%x)", ret, ret);
1257 		sgx_encl_free_epc_page(epc_page);
1258 		return ERR_PTR(-EFAULT);
1259 	}
1260 
1261 	return epc_page;
1262 }
1263 
1264 /**
1265  * sgx_alloc_va_slot - allocate a VA slot
1266  * @va_page:	a &struct sgx_va_page instance
1267  *
1268  * Allocates a slot from a &struct sgx_va_page instance.
1269  *
1270  * Return: offset of the slot inside the VA page
1271  */
1272 unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page)
1273 {
1274 	int slot = find_first_zero_bit(va_page->slots, SGX_VA_SLOT_COUNT);
1275 
1276 	if (slot < SGX_VA_SLOT_COUNT)
1277 		set_bit(slot, va_page->slots);
1278 
1279 	return slot << 3;
1280 }
1281 
1282 /**
1283  * sgx_free_va_slot - free a VA slot
1284  * @va_page:	a &struct sgx_va_page instance
1285  * @offset:	offset of the slot inside the VA page
1286  *
1287  * Frees a slot from a &struct sgx_va_page instance.
1288  */
1289 void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset)
1290 {
1291 	clear_bit(offset >> 3, va_page->slots);
1292 }
1293 
1294 /**
1295  * sgx_va_page_full - is the VA page full?
1296  * @va_page:	a &struct sgx_va_page instance
1297  *
1298  * Return: true if all slots have been taken
1299  */
1300 bool sgx_va_page_full(struct sgx_va_page *va_page)
1301 {
1302 	int slot = find_first_zero_bit(va_page->slots, SGX_VA_SLOT_COUNT);
1303 
1304 	return slot == SGX_VA_SLOT_COUNT;
1305 }
1306 
1307 /**
1308  * sgx_encl_free_epc_page - free an EPC page assigned to an enclave
1309  * @page:	EPC page to be freed
1310  *
1311  * Free an EPC page assigned to an enclave. It does EREMOVE for the page, and
1312  * only upon success, it puts the page back to free page list.  Otherwise, it
1313  * gives a WARNING to indicate page is leaked.
1314  */
1315 void sgx_encl_free_epc_page(struct sgx_epc_page *page)
1316 {
1317 	int ret;
1318 
1319 	WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
1320 
1321 	ret = __eremove(sgx_get_epc_virt_addr(page));
1322 	if (WARN_ONCE(ret, EREMOVE_ERROR_MESSAGE, ret, ret))
1323 		return;
1324 
1325 	sgx_free_epc_page(page);
1326 }
1327