1 /*-
2 * Copyright (c) 2004 Marcel Moolenaar
3 * Copyright (c) 2001 Doug Rabson
4 * Copyright (c) 2016 The FreeBSD Foundation
5 * All rights reserved.
6 *
7 * Portions of this software were developed by Konstantin Belousov
8 * under sponsorship from the FreeBSD Foundation.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/param.h>
33 #include <sys/efi.h>
34 #include <sys/kernel.h>
35 #include <sys/linker.h>
36 #include <sys/lock.h>
37 #include <sys/module.h>
38 #include <sys/mutex.h>
39 #include <sys/clock.h>
40 #include <sys/proc.h>
41 #include <sys/rwlock.h>
42 #include <sys/sched.h>
43 #include <sys/sysctl.h>
44 #include <sys/systm.h>
45 #include <sys/vmmeter.h>
46 #include <isa/rtc.h>
47 #include <machine/efi.h>
48 #include <machine/md_var.h>
49 #include <machine/vmparam.h>
50 #include <vm/vm.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_map.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_page.h>
56 #include <vm/vm_pager.h>
57 #include <vm/vm_radix.h>
58
59 /* The EFI regions we're allowed to map. */
60 #define EFI_ALLOWED_TYPES_MASK ( \
61 1u << EFI_MD_TYPE_BS_CODE | 1u << EFI_MD_TYPE_BS_DATA | \
62 1u << EFI_MD_TYPE_RT_CODE | 1u << EFI_MD_TYPE_RT_DATA | \
63 1u << EFI_MD_TYPE_FIRMWARE \
64 )
65
66 static pml5_entry_t *efi_pml5;
67 static pml4_entry_t *efi_pml4;
68 static vm_object_t obj_1t1_pt;
69 static vm_page_t efi_pmltop_page;
70 static vm_pindex_t efi_1t1_idx;
71
72 void
efi_destroy_1t1_map(void)73 efi_destroy_1t1_map(void)
74 {
75 struct pctrie_iter pages;
76 vm_page_t m;
77
78 if (obj_1t1_pt != NULL) {
79 vm_page_iter_init(&pages, obj_1t1_pt);
80 VM_OBJECT_RLOCK(obj_1t1_pt);
81 VM_RADIX_FOREACH(m, &pages)
82 m->ref_count = VPRC_OBJREF;
83 vm_wire_sub(obj_1t1_pt->resident_page_count);
84 VM_OBJECT_RUNLOCK(obj_1t1_pt);
85 vm_object_deallocate(obj_1t1_pt);
86 }
87
88 obj_1t1_pt = NULL;
89 efi_pml4 = NULL;
90 efi_pml5 = NULL;
91 efi_pmltop_page = NULL;
92 }
93
94 /*
95 * Map a physical address from EFI runtime space into KVA space. Returns 0 to
96 * indicate a failed mapping so that the caller may handle error.
97 */
98 vm_offset_t
efi_phys_to_kva(vm_paddr_t paddr)99 efi_phys_to_kva(vm_paddr_t paddr)
100 {
101
102 if (paddr >= dmaplimit)
103 return (0);
104 return (PHYS_TO_DMAP(paddr));
105 }
106
107 static vm_page_t
efi_1t1_page(void)108 efi_1t1_page(void)
109 {
110
111 return (vm_page_grab(obj_1t1_pt, efi_1t1_idx++, VM_ALLOC_NOBUSY |
112 VM_ALLOC_WIRED | VM_ALLOC_ZERO));
113 }
114
115 static pt_entry_t *
efi_1t1_pte(vm_offset_t va)116 efi_1t1_pte(vm_offset_t va)
117 {
118 pml5_entry_t *pml5e;
119 pml4_entry_t *pml4e;
120 pdp_entry_t *pdpe;
121 pd_entry_t *pde;
122 pt_entry_t *pte;
123 vm_page_t m;
124 vm_pindex_t pml5_idx, pml4_idx, pdp_idx, pd_idx;
125 vm_paddr_t mphys;
126
127 pml4_idx = pmap_pml4e_index(va);
128 if (la57) {
129 pml5_idx = pmap_pml5e_index(va);
130 pml5e = &efi_pml5[pml5_idx];
131 if (*pml5e == 0) {
132 m = efi_1t1_page();
133 mphys = VM_PAGE_TO_PHYS(m);
134 *pml5e = mphys | X86_PG_RW | X86_PG_V;
135 } else {
136 mphys = *pml5e & PG_FRAME;
137 }
138 pml4e = (pml4_entry_t *)PHYS_TO_DMAP(mphys);
139 pml4e = &pml4e[pml4_idx];
140 } else {
141 pml4e = &efi_pml4[pml4_idx];
142 }
143
144 if (*pml4e == 0) {
145 m = efi_1t1_page();
146 mphys = VM_PAGE_TO_PHYS(m);
147 *pml4e = mphys | X86_PG_RW | X86_PG_V;
148 } else {
149 mphys = *pml4e & PG_FRAME;
150 }
151
152 pdpe = (pdp_entry_t *)PHYS_TO_DMAP(mphys);
153 pdp_idx = pmap_pdpe_index(va);
154 pdpe += pdp_idx;
155 if (*pdpe == 0) {
156 m = efi_1t1_page();
157 mphys = VM_PAGE_TO_PHYS(m);
158 *pdpe = mphys | X86_PG_RW | X86_PG_V;
159 } else {
160 mphys = *pdpe & PG_FRAME;
161 }
162
163 pde = (pd_entry_t *)PHYS_TO_DMAP(mphys);
164 pd_idx = pmap_pde_index(va);
165 pde += pd_idx;
166 if (*pde == 0) {
167 m = efi_1t1_page();
168 mphys = VM_PAGE_TO_PHYS(m);
169 *pde = mphys | X86_PG_RW | X86_PG_V;
170 } else {
171 mphys = *pde & PG_FRAME;
172 }
173
174 pte = (pt_entry_t *)PHYS_TO_DMAP(mphys);
175 pte += pmap_pte_index(va);
176 KASSERT(*pte == 0, ("va %#jx *pt %#jx", va, *pte));
177
178 return (pte);
179 }
180
181 bool
efi_create_1t1_map(struct efi_md * map,int ndesc,int descsz)182 efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz)
183 {
184 struct efi_md *p;
185 pt_entry_t *pte;
186 void *pml;
187 vm_page_t m;
188 vm_offset_t va;
189 uint64_t idx;
190 int bits, i, mode;
191 bool map_pz = true;
192
193 obj_1t1_pt = vm_pager_allocate(OBJT_PHYS, NULL, ptoa(1 +
194 NPML4EPG + NPML4EPG * NPDPEPG + NPML4EPG * NPDPEPG * NPDEPG),
195 VM_PROT_ALL, 0, NULL);
196 efi_1t1_idx = 0;
197 VM_OBJECT_WLOCK(obj_1t1_pt);
198 efi_pmltop_page = efi_1t1_page();
199 VM_OBJECT_WUNLOCK(obj_1t1_pt);
200 pml = (void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(efi_pmltop_page));
201 if (la57) {
202 efi_pml5 = pml;
203 pmap_pinit_pml5(efi_pmltop_page);
204 } else {
205 efi_pml4 = pml;
206 pmap_pinit_pml4(efi_pmltop_page);
207 }
208
209 if ((efi_map_regs & ~EFI_ALLOWED_TYPES_MASK) != 0) {
210 printf("Ignoring the following runtime EFI regions: %#x\n",
211 efi_map_regs & ~EFI_ALLOWED_TYPES_MASK);
212 efi_map_regs &= EFI_ALLOWED_TYPES_MASK;
213 }
214
215 for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p,
216 descsz)) {
217 if ((p->md_attr & EFI_MD_ATTR_RT) == 0 &&
218 !EFI_MAP_BOOTTYPE_ALLOWED(p->md_type))
219 continue;
220 if (p->md_virt != 0 && p->md_virt != p->md_phys) {
221 if (bootverbose)
222 printf("EFI Runtime entry %d is mapped\n", i);
223 goto fail;
224 }
225 if ((p->md_phys & EFI_PAGE_MASK) != 0) {
226 if (bootverbose)
227 printf("EFI Runtime entry %d is not aligned\n",
228 i);
229 goto fail;
230 }
231 if (p->md_phys + p->md_pages * EFI_PAGE_SIZE < p->md_phys ||
232 p->md_phys + p->md_pages * EFI_PAGE_SIZE >=
233 VM_MAXUSER_ADDRESS) {
234 printf("EFI Runtime entry %d is not in mappable for RT:"
235 "base %#016jx %#jx pages\n",
236 i, (uintmax_t)p->md_phys,
237 (uintmax_t)p->md_pages);
238 goto fail;
239 }
240 if ((p->md_attr & EFI_MD_ATTR_WB) != 0)
241 mode = VM_MEMATTR_WRITE_BACK;
242 else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
243 mode = VM_MEMATTR_WRITE_THROUGH;
244 else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
245 mode = VM_MEMATTR_WRITE_COMBINING;
246 else if ((p->md_attr & EFI_MD_ATTR_WP) != 0)
247 mode = VM_MEMATTR_WRITE_PROTECTED;
248 else if ((p->md_attr & EFI_MD_ATTR_UC) != 0)
249 mode = VM_MEMATTR_UNCACHEABLE;
250 else {
251 if (bootverbose)
252 printf("EFI Runtime entry %d mapping "
253 "attributes unsupported\n", i);
254 mode = VM_MEMATTR_UNCACHEABLE;
255 }
256 bits = pmap_cache_bits(kernel_pmap, mode, false) | X86_PG_RW |
257 X86_PG_V;
258 VM_OBJECT_WLOCK(obj_1t1_pt);
259 for (va = p->md_phys, idx = 0; idx < p->md_pages; idx++,
260 va += PAGE_SIZE) {
261 pte = efi_1t1_pte(va);
262 pte_store(pte, va | bits);
263
264 m = PHYS_TO_VM_PAGE(va);
265 if (m != NULL && VM_PAGE_TO_PHYS(m) == 0) {
266 vm_page_init_page(m, va, -1,
267 VM_FREEPOOL_DEFAULT);
268 m->order = VM_NFREEORDER + 1; /* invalid */
269 m->pool = VM_NFREEPOOL + 1; /* invalid */
270 pmap_page_set_memattr_noflush(m, mode);
271 }
272 }
273 VM_OBJECT_WUNLOCK(obj_1t1_pt);
274 if (p->md_phys == 0)
275 map_pz = false;
276 }
277
278 /*
279 * Some BIOSes tend to access phys 0 during efirt calls,
280 * so map it if we haven't yet.
281 */
282 if (map_pz) {
283 VM_OBJECT_WLOCK(obj_1t1_pt);
284 pte = efi_1t1_pte(0);
285 /* Assume Write-Back */
286 bits = pmap_cache_bits(kernel_pmap, VM_MEMATTR_WRITE_BACK,
287 false) | X86_PG_RW | X86_PG_V;
288 pte_store(pte, bits);
289 VM_OBJECT_WUNLOCK(obj_1t1_pt);
290 }
291
292 return (true);
293
294 fail:
295 efi_destroy_1t1_map();
296 return (false);
297 }
298
299 /*
300 * Create an environment for the EFI runtime code call. The most
301 * important part is creating the required 1:1 physical->virtual
302 * mappings for the runtime segments. To do that, we manually create
303 * page table which unmap userspace but gives correct kernel mapping.
304 * The 1:1 mappings for runtime segments usually occupy low 4G of the
305 * physical address map.
306 *
307 * The 1:1 mappings were chosen over the SetVirtualAddressMap() EFI RT
308 * service, because there are some BIOSes which fail to correctly
309 * relocate itself on the call, requiring both 1:1 and virtual
310 * mapping. As result, we must provide 1:1 mapping anyway, so no
311 * reason to bother with the virtual map, and no need to add a
312 * complexity into loader.
313 *
314 * There is no need to disable interrupts around the change of %cr3,
315 * the kernel mappings are correct, while we only grabbed the
316 * userspace portion of VA. Interrupts handlers must not access
317 * userspace. Having interrupts enabled fixes the issue with
318 * firmware/SMM long operation, which would negatively affect IPIs,
319 * esp. TLB shootdown requests.
320 */
321 int
efi_arch_enter(void)322 efi_arch_enter(void)
323 {
324 pmap_t curpmap;
325 uint64_t cr3;
326
327 curpmap = PCPU_GET(curpmap);
328 PMAP_LOCK_ASSERT(curpmap, MA_OWNED);
329 curthread->td_md.md_efirt_dis_pf = vm_fault_disable_pagefaults();
330
331 /*
332 * IPI TLB shootdown handler invltlb_pcid_handler() reloads
333 * %cr3 from the curpmap->pm_cr3, which would disable runtime
334 * segments mappings. Block the handler's action by setting
335 * curpmap to impossible value. See also comment in
336 * pmap.c:pmap_activate_sw().
337 */
338 if (pmap_pcid_enabled && !invpcid_works)
339 PCPU_SET(curpmap, NULL);
340
341 cr3 = VM_PAGE_TO_PHYS(efi_pmltop_page);
342 if (pmap_pcid_enabled)
343 cr3 |= pmap_get_pcid(curpmap);
344 load_cr3(cr3);
345 /*
346 * If PCID is enabled, the clear CR3_PCID_SAVE bit in the loaded %cr3
347 * causes TLB invalidation.
348 */
349 if (!pmap_pcid_enabled)
350 invltlb();
351 return (0);
352 }
353
354 void
efi_arch_leave(void)355 efi_arch_leave(void)
356 {
357 pmap_t curpmap;
358 uint64_t cr3;
359
360 curpmap = &curproc->p_vmspace->vm_pmap;
361 cr3 = curpmap->pm_cr3;
362 if (pmap_pcid_enabled) {
363 cr3 |= pmap_get_pcid(curpmap);
364 if (!invpcid_works)
365 PCPU_SET(curpmap, curpmap);
366 }
367 load_cr3(cr3);
368 if (!pmap_pcid_enabled)
369 invltlb();
370 vm_fault_enable_pagefaults(curthread->td_md.md_efirt_dis_pf);
371 }
372
373 /* XXX debug stuff */
374 static int
efi_time_sysctl_handler(SYSCTL_HANDLER_ARGS)375 efi_time_sysctl_handler(SYSCTL_HANDLER_ARGS)
376 {
377 struct efi_tm tm;
378 int error, val;
379
380 val = 0;
381 error = sysctl_handle_int(oidp, &val, 0, req);
382 if (error != 0 || req->newptr == NULL)
383 return (error);
384 error = efi_get_time(&tm);
385 if (error == 0) {
386 uprintf("EFI reports: Year %d Month %d Day %d Hour %d Min %d "
387 "Sec %d\n", tm.tm_year, tm.tm_mon, tm.tm_mday, tm.tm_hour,
388 tm.tm_min, tm.tm_sec);
389 }
390 return (error);
391 }
392
393 SYSCTL_PROC(_debug, OID_AUTO, efi_time,
394 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
395 efi_time_sysctl_handler, "I",
396 "");
397