xref: /linux/arch/x86/mm/ioremap.c (revision f66d6acccbc08b4146f4c2cf9445241f70f5517d)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Re-map IO memory to kernel address space so that we can access it.
4  * This is needed for high PCI addresses that aren't mapped in the
5  * 640k-1MB IO memory area on PC's
6  *
7  * (C) Copyright 1995 1996 Linus Torvalds
8  */
9 
10 #include <linux/memblock.h>
11 #include <linux/init.h>
12 #include <linux/io.h>
13 #include <linux/ioport.h>
14 #include <linux/ioremap.h>
15 #include <linux/slab.h>
16 #include <linux/vmalloc.h>
17 #include <linux/mmiotrace.h>
18 #include <linux/cc_platform.h>
19 #include <linux/efi.h>
20 #include <linux/pgtable.h>
21 #include <linux/kmsan.h>
22 
23 #include <asm/set_memory.h>
24 #include <asm/e820/api.h>
25 #include <asm/efi.h>
26 #include <asm/fixmap.h>
27 #include <asm/tlbflush.h>
28 #include <asm/pgalloc.h>
29 #include <asm/memtype.h>
30 #include <asm/setup.h>
31 
32 #include "physaddr.h"
33 
34 /*
35  * Descriptor controlling ioremap() behavior.
36  */
37 struct ioremap_desc {
38 	unsigned int flags;
39 };
40 
41 /*
42  * Fix up the linear direct mapping of the kernel to avoid cache attribute
43  * conflicts.
44  */
ioremap_change_attr(unsigned long vaddr,unsigned long size,enum page_cache_mode pcm)45 int ioremap_change_attr(unsigned long vaddr, unsigned long size,
46 			enum page_cache_mode pcm)
47 {
48 	unsigned long nrpages = size >> PAGE_SHIFT;
49 	int err;
50 
51 	switch (pcm) {
52 	case _PAGE_CACHE_MODE_UC:
53 	default:
54 		err = _set_memory_uc(vaddr, nrpages);
55 		break;
56 	case _PAGE_CACHE_MODE_WC:
57 		err = _set_memory_wc(vaddr, nrpages);
58 		break;
59 	case _PAGE_CACHE_MODE_WT:
60 		err = _set_memory_wt(vaddr, nrpages);
61 		break;
62 	case _PAGE_CACHE_MODE_WB:
63 		err = _set_memory_wb(vaddr, nrpages);
64 		break;
65 	}
66 
67 	return err;
68 }
69 
70 /* Does the range (or a subset of) contain normal RAM? */
__ioremap_check_ram(struct resource * res)71 static unsigned int __ioremap_check_ram(struct resource *res)
72 {
73 	unsigned long start_pfn, stop_pfn;
74 	unsigned long i;
75 
76 	if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM)
77 		return 0;
78 
79 	start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT;
80 	stop_pfn = (res->end + 1) >> PAGE_SHIFT;
81 	if (stop_pfn > start_pfn) {
82 		for (i = 0; i < (stop_pfn - start_pfn); ++i)
83 			if (pfn_valid(start_pfn + i) &&
84 			    !PageReserved(pfn_to_page(start_pfn + i)))
85 				return IORES_MAP_SYSTEM_RAM;
86 	}
87 
88 	return 0;
89 }
90 
91 /*
92  * In a SEV guest, NONE and RESERVED should not be mapped encrypted because
93  * there the whole memory is already encrypted.
94  */
__ioremap_check_encrypted(struct resource * res)95 static unsigned int __ioremap_check_encrypted(struct resource *res)
96 {
97 	if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
98 		return 0;
99 
100 	switch (res->desc) {
101 	case IORES_DESC_NONE:
102 	case IORES_DESC_RESERVED:
103 		break;
104 	default:
105 		return IORES_MAP_ENCRYPTED;
106 	}
107 
108 	return 0;
109 }
110 
111 /*
112  * The EFI runtime services data area is not covered by walk_mem_res(), but must
113  * be mapped encrypted when SEV is active.
114  */
__ioremap_check_other(resource_size_t addr,struct ioremap_desc * desc)115 static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc)
116 {
117 	if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
118 		return;
119 
120 	if (x86_platform.hyper.is_private_mmio(addr)) {
121 		desc->flags |= IORES_MAP_ENCRYPTED;
122 		return;
123 	}
124 
125 	if (!IS_ENABLED(CONFIG_EFI))
126 		return;
127 
128 	if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA ||
129 	    (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA &&
130 	     efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME))
131 		desc->flags |= IORES_MAP_ENCRYPTED;
132 }
133 
__ioremap_collect_map_flags(struct resource * res,void * arg)134 static int __ioremap_collect_map_flags(struct resource *res, void *arg)
135 {
136 	struct ioremap_desc *desc = arg;
137 
138 	if (!(desc->flags & IORES_MAP_SYSTEM_RAM))
139 		desc->flags |= __ioremap_check_ram(res);
140 
141 	if (!(desc->flags & IORES_MAP_ENCRYPTED))
142 		desc->flags |= __ioremap_check_encrypted(res);
143 
144 	return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) ==
145 			       (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED));
146 }
147 
148 /*
149  * To avoid multiple resource walks, this function walks resources marked as
150  * IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a
151  * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES).
152  *
153  * After that, deal with misc other ranges in __ioremap_check_other() which do
154  * not fall into the above category.
155  */
__ioremap_check_mem(resource_size_t addr,unsigned long size,struct ioremap_desc * desc)156 static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
157 				struct ioremap_desc *desc)
158 {
159 	u64 start, end;
160 
161 	start = (u64)addr;
162 	end = start + size - 1;
163 	memset(desc, 0, sizeof(struct ioremap_desc));
164 
165 	walk_mem_res(start, end, desc, __ioremap_collect_map_flags);
166 
167 	__ioremap_check_other(addr, desc);
168 }
169 
170 /*
171  * Remap an arbitrary physical address space into the kernel virtual
172  * address space. It transparently creates kernel huge I/O mapping when
173  * the physical address is aligned by a huge page size (1GB or 2MB) and
174  * the requested size is at least the huge page size.
175  *
176  * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
177  * Therefore, the mapping code falls back to use a smaller page toward 4KB
178  * when a mapping range is covered by non-WB type of MTRRs.
179  *
180  * NOTE! We need to allow non-page-aligned mappings too: we will obviously
181  * have to convert them into an offset in a page-aligned mapping, but the
182  * caller shouldn't need to know that small detail.
183  */
184 static void __iomem *
__ioremap_caller(resource_size_t phys_addr,unsigned long size,enum page_cache_mode pcm,void * caller,bool encrypted)185 __ioremap_caller(resource_size_t phys_addr, unsigned long size,
186 		 enum page_cache_mode pcm, void *caller, bool encrypted)
187 {
188 	unsigned long offset, vaddr;
189 	resource_size_t last_addr;
190 	const resource_size_t unaligned_phys_addr = phys_addr;
191 	const unsigned long unaligned_size = size;
192 	struct ioremap_desc io_desc;
193 	struct vm_struct *area;
194 	enum page_cache_mode new_pcm;
195 	pgprot_t prot;
196 	int retval;
197 	void __iomem *ret_addr;
198 
199 	/* Don't allow wraparound or zero size */
200 	last_addr = phys_addr + size - 1;
201 	if (!size || last_addr < phys_addr)
202 		return NULL;
203 
204 	if (!phys_addr_valid(phys_addr)) {
205 		printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
206 		       (unsigned long long)phys_addr);
207 		WARN_ON_ONCE(1);
208 		return NULL;
209 	}
210 
211 	__ioremap_check_mem(phys_addr, size, &io_desc);
212 
213 	/*
214 	 * Don't allow anybody to remap normal RAM that we're using..
215 	 */
216 	if (io_desc.flags & IORES_MAP_SYSTEM_RAM) {
217 		WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
218 			  &phys_addr, &last_addr);
219 		return NULL;
220 	}
221 
222 	/*
223 	 * Mappings have to be page-aligned
224 	 */
225 	offset = phys_addr & ~PAGE_MASK;
226 	phys_addr &= PAGE_MASK;
227 	size = PAGE_ALIGN(last_addr+1) - phys_addr;
228 
229 	/*
230 	 * Mask out any bits not part of the actual physical
231 	 * address, like memory encryption bits.
232 	 */
233 	phys_addr &= PHYSICAL_PAGE_MASK;
234 
235 	retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
236 						pcm, &new_pcm);
237 	if (retval) {
238 		printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval);
239 		return NULL;
240 	}
241 
242 	if (pcm != new_pcm) {
243 		if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
244 			printk(KERN_ERR
245 		"ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
246 				(unsigned long long)phys_addr,
247 				(unsigned long long)(phys_addr + size),
248 				pcm, new_pcm);
249 			goto err_free_memtype;
250 		}
251 		pcm = new_pcm;
252 	}
253 
254 	/*
255 	 * If the page being mapped is in memory and SEV is active then
256 	 * make sure the memory encryption attribute is enabled in the
257 	 * resulting mapping.
258 	 * In TDX guests, memory is marked private by default. If encryption
259 	 * is not requested (using encrypted), explicitly set decrypt
260 	 * attribute in all IOREMAPPED memory.
261 	 */
262 	prot = PAGE_KERNEL_IO;
263 	if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted)
264 		prot = pgprot_encrypted(prot);
265 	else
266 		prot = pgprot_decrypted(prot);
267 
268 	switch (pcm) {
269 	case _PAGE_CACHE_MODE_UC:
270 	default:
271 		prot = __pgprot(pgprot_val(prot) |
272 				cachemode2protval(_PAGE_CACHE_MODE_UC));
273 		break;
274 	case _PAGE_CACHE_MODE_UC_MINUS:
275 		prot = __pgprot(pgprot_val(prot) |
276 				cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
277 		break;
278 	case _PAGE_CACHE_MODE_WC:
279 		prot = __pgprot(pgprot_val(prot) |
280 				cachemode2protval(_PAGE_CACHE_MODE_WC));
281 		break;
282 	case _PAGE_CACHE_MODE_WT:
283 		prot = __pgprot(pgprot_val(prot) |
284 				cachemode2protval(_PAGE_CACHE_MODE_WT));
285 		break;
286 	case _PAGE_CACHE_MODE_WB:
287 		break;
288 	}
289 
290 	/*
291 	 * Ok, go for it..
292 	 */
293 	area = get_vm_area_caller(size, VM_IOREMAP, caller);
294 	if (!area)
295 		goto err_free_memtype;
296 	area->phys_addr = phys_addr;
297 	vaddr = (unsigned long) area->addr;
298 
299 	if (memtype_kernel_map_sync(phys_addr, size, pcm))
300 		goto err_free_area;
301 
302 	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
303 		goto err_free_area;
304 
305 	ret_addr = (void __iomem *) (vaddr + offset);
306 	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
307 
308 	/*
309 	 * Check if the request spans more than any BAR in the iomem resource
310 	 * tree.
311 	 */
312 	if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
313 		pr_warn("caller %pS mapping multiple BARs\n", caller);
314 
315 	return ret_addr;
316 err_free_area:
317 	free_vm_area(area);
318 err_free_memtype:
319 	memtype_free(phys_addr, phys_addr + size);
320 	return NULL;
321 }
322 
323 /**
324  * ioremap     -   map bus memory into CPU space
325  * @phys_addr:    bus address of the memory
326  * @size:      size of the resource to map
327  *
328  * ioremap performs a platform specific sequence of operations to
329  * make bus memory CPU accessible via the readb/readw/readl/writeb/
330  * writew/writel functions and the other mmio helpers. The returned
331  * address is not guaranteed to be usable directly as a virtual
332  * address.
333  *
334  * This version of ioremap ensures that the memory is marked uncachable
335  * on the CPU as well as honouring existing caching rules from things like
336  * the PCI bus. Note that there are other caches and buffers on many
337  * busses. In particular driver authors should read up on PCI writes
338  *
339  * It's useful if some control registers are in such an area and
340  * write combining or read caching is not desirable:
341  *
342  * Must be freed with iounmap.
343  */
ioremap(resource_size_t phys_addr,unsigned long size)344 void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
345 {
346 	/*
347 	 * Ideally, this should be:
348 	 *	pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
349 	 *
350 	 * Till we fix all X drivers to use ioremap_wc(), we will use
351 	 * UC MINUS. Drivers that are certain they need or can already
352 	 * be converted over to strong UC can use ioremap_uc().
353 	 */
354 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
355 
356 	return __ioremap_caller(phys_addr, size, pcm,
357 				__builtin_return_address(0), false);
358 }
359 EXPORT_SYMBOL(ioremap);
360 
361 /**
362  * ioremap_uc     -   map bus memory into CPU space as strongly uncachable
363  * @phys_addr:    bus address of the memory
364  * @size:      size of the resource to map
365  *
366  * ioremap_uc performs a platform specific sequence of operations to
367  * make bus memory CPU accessible via the readb/readw/readl/writeb/
368  * writew/writel functions and the other mmio helpers. The returned
369  * address is not guaranteed to be usable directly as a virtual
370  * address.
371  *
372  * This version of ioremap ensures that the memory is marked with a strong
373  * preference as completely uncachable on the CPU when possible. For non-PAT
374  * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
375  * systems this will set the PAT entry for the pages as strong UC.  This call
376  * will honor existing caching rules from things like the PCI bus. Note that
377  * there are other caches and buffers on many busses. In particular driver
378  * authors should read up on PCI writes.
379  *
380  * It's useful if some control registers are in such an area and
381  * write combining or read caching is not desirable:
382  *
383  * Must be freed with iounmap.
384  */
ioremap_uc(resource_size_t phys_addr,unsigned long size)385 void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
386 {
387 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
388 
389 	return __ioremap_caller(phys_addr, size, pcm,
390 				__builtin_return_address(0), false);
391 }
392 EXPORT_SYMBOL_GPL(ioremap_uc);
393 
394 /**
395  * ioremap_wc	-	map memory into CPU space write combined
396  * @phys_addr:	bus address of the memory
397  * @size:	size of the resource to map
398  *
399  * This version of ioremap ensures that the memory is marked write combining.
400  * Write combining allows faster writes to some hardware devices.
401  *
402  * Must be freed with iounmap.
403  */
ioremap_wc(resource_size_t phys_addr,unsigned long size)404 void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
405 {
406 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
407 					__builtin_return_address(0), false);
408 }
409 EXPORT_SYMBOL(ioremap_wc);
410 
411 /**
412  * ioremap_wt	-	map memory into CPU space write through
413  * @phys_addr:	bus address of the memory
414  * @size:	size of the resource to map
415  *
416  * This version of ioremap ensures that the memory is marked write through.
417  * Write through stores data into memory while keeping the cache up-to-date.
418  *
419  * Must be freed with iounmap.
420  */
ioremap_wt(resource_size_t phys_addr,unsigned long size)421 void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
422 {
423 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
424 					__builtin_return_address(0), false);
425 }
426 EXPORT_SYMBOL(ioremap_wt);
427 
ioremap_encrypted(resource_size_t phys_addr,unsigned long size)428 void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size)
429 {
430 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
431 				__builtin_return_address(0), true);
432 }
433 EXPORT_SYMBOL(ioremap_encrypted);
434 
ioremap_cache(resource_size_t phys_addr,unsigned long size)435 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
436 {
437 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
438 				__builtin_return_address(0), false);
439 }
440 EXPORT_SYMBOL(ioremap_cache);
441 
ioremap_prot(resource_size_t phys_addr,unsigned long size,unsigned long prot_val)442 void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
443 				unsigned long prot_val)
444 {
445 	return __ioremap_caller(phys_addr, size,
446 				pgprot2cachemode(__pgprot(prot_val)),
447 				__builtin_return_address(0), false);
448 }
449 EXPORT_SYMBOL(ioremap_prot);
450 
451 /**
452  * iounmap - Free a IO remapping
453  * @addr: virtual address from ioremap_*
454  *
455  * Caller must ensure there is only one unmapping for the same pointer.
456  */
iounmap(volatile void __iomem * addr)457 void iounmap(volatile void __iomem *addr)
458 {
459 	struct vm_struct *p, *o;
460 
461 	if (WARN_ON_ONCE(!is_ioremap_addr((void __force *)addr)))
462 		return;
463 
464 	/*
465 	 * The PCI/ISA range special-casing was removed from __ioremap()
466 	 * so this check, in theory, can be removed. However, there are
467 	 * cases where iounmap() is called for addresses not obtained via
468 	 * ioremap() (vga16fb for example). Add a warning so that these
469 	 * cases can be caught and fixed.
470 	 */
471 	if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
472 	    (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) {
473 		WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n");
474 		return;
475 	}
476 
477 	mmiotrace_iounmap(addr);
478 
479 	addr = (volatile void __iomem *)
480 		(PAGE_MASK & (unsigned long __force)addr);
481 
482 	/* Use the vm area unlocked, assuming the caller
483 	   ensures there isn't another iounmap for the same address
484 	   in parallel. Reuse of the virtual address is prevented by
485 	   leaving it in the global lists until we're done with it.
486 	   cpa takes care of the direct mappings. */
487 	p = find_vm_area((void __force *)addr);
488 
489 	if (!p) {
490 		printk(KERN_ERR "iounmap: bad address %p\n", addr);
491 		dump_stack();
492 		return;
493 	}
494 
495 	kmsan_iounmap_page_range((unsigned long)addr,
496 		(unsigned long)addr + get_vm_area_size(p));
497 	memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
498 
499 	/* Finally remove it */
500 	o = remove_vm_area((void __force *)addr);
501 	BUG_ON(p != o || o == NULL);
502 	kfree(p);
503 }
504 EXPORT_SYMBOL(iounmap);
505 
506 /*
507  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
508  * access
509  */
xlate_dev_mem_ptr(phys_addr_t phys)510 void *xlate_dev_mem_ptr(phys_addr_t phys)
511 {
512 	unsigned long start  = phys &  PAGE_MASK;
513 	unsigned long offset = phys & ~PAGE_MASK;
514 	void *vaddr;
515 
516 	/* memremap() maps if RAM, otherwise falls back to ioremap() */
517 	vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB);
518 
519 	/* Only add the offset on success and return NULL if memremap() failed */
520 	if (vaddr)
521 		vaddr += offset;
522 
523 	return vaddr;
524 }
525 
unxlate_dev_mem_ptr(phys_addr_t phys,void * addr)526 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
527 {
528 	memunmap((void *)((unsigned long)addr & PAGE_MASK));
529 }
530 
531 #ifdef CONFIG_AMD_MEM_ENCRYPT
532 /*
533  * Examine the physical address to determine if it is an area of memory
534  * that should be mapped decrypted.  If the memory is not part of the
535  * kernel usable area it was accessed and created decrypted, so these
536  * areas should be mapped decrypted. And since the encryption key can
537  * change across reboots, persistent memory should also be mapped
538  * decrypted.
539  *
540  * If SEV is active, that implies that BIOS/UEFI also ran encrypted so
541  * only persistent memory should be mapped decrypted.
542  */
memremap_should_map_decrypted(resource_size_t phys_addr,unsigned long size)543 static bool memremap_should_map_decrypted(resource_size_t phys_addr,
544 					  unsigned long size)
545 {
546 	int is_pmem;
547 
548 	/*
549 	 * Check if the address is part of a persistent memory region.
550 	 * This check covers areas added by E820, EFI and ACPI.
551 	 */
552 	is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
553 				    IORES_DESC_PERSISTENT_MEMORY);
554 	if (is_pmem != REGION_DISJOINT)
555 		return true;
556 
557 	/*
558 	 * Check if the non-volatile attribute is set for an EFI
559 	 * reserved area.
560 	 */
561 	if (efi_enabled(EFI_BOOT)) {
562 		switch (efi_mem_type(phys_addr)) {
563 		case EFI_RESERVED_TYPE:
564 			if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
565 				return true;
566 			break;
567 		default:
568 			break;
569 		}
570 	}
571 
572 	/* Check if the address is outside kernel usable area */
573 	switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
574 	case E820_TYPE_RESERVED:
575 	case E820_TYPE_ACPI:
576 	case E820_TYPE_NVS:
577 	case E820_TYPE_UNUSABLE:
578 		/* For SEV, these areas are encrypted */
579 		if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
580 			break;
581 		fallthrough;
582 
583 	case E820_TYPE_PRAM:
584 		return true;
585 	default:
586 		break;
587 	}
588 
589 	return false;
590 }
591 
592 /*
593  * Examine the physical address to determine if it is EFI data. Check
594  * it against the boot params structure and EFI tables and memory types.
595  */
memremap_is_efi_data(resource_size_t phys_addr,unsigned long size)596 static bool memremap_is_efi_data(resource_size_t phys_addr,
597 				 unsigned long size)
598 {
599 	u64 paddr;
600 
601 	/* Check if the address is part of EFI boot/runtime data */
602 	if (!efi_enabled(EFI_BOOT))
603 		return false;
604 
605 	paddr = boot_params.efi_info.efi_memmap_hi;
606 	paddr <<= 32;
607 	paddr |= boot_params.efi_info.efi_memmap;
608 	if (phys_addr == paddr)
609 		return true;
610 
611 	paddr = boot_params.efi_info.efi_systab_hi;
612 	paddr <<= 32;
613 	paddr |= boot_params.efi_info.efi_systab;
614 	if (phys_addr == paddr)
615 		return true;
616 
617 	if (efi_is_table_address(phys_addr))
618 		return true;
619 
620 	switch (efi_mem_type(phys_addr)) {
621 	case EFI_BOOT_SERVICES_DATA:
622 	case EFI_RUNTIME_SERVICES_DATA:
623 		return true;
624 	default:
625 		break;
626 	}
627 
628 	return false;
629 }
630 
631 /*
632  * Examine the physical address to determine if it is boot data by checking
633  * it against the boot params setup_data chain.
634  */
memremap_is_setup_data(resource_size_t phys_addr,unsigned long size)635 static bool memremap_is_setup_data(resource_size_t phys_addr,
636 				   unsigned long size)
637 {
638 	struct setup_indirect *indirect;
639 	struct setup_data *data;
640 	u64 paddr, paddr_next;
641 
642 	paddr = boot_params.hdr.setup_data;
643 	while (paddr) {
644 		unsigned int len;
645 
646 		if (phys_addr == paddr)
647 			return true;
648 
649 		data = memremap(paddr, sizeof(*data),
650 				MEMREMAP_WB | MEMREMAP_DEC);
651 		if (!data) {
652 			pr_warn("failed to memremap setup_data entry\n");
653 			return false;
654 		}
655 
656 		paddr_next = data->next;
657 		len = data->len;
658 
659 		if ((phys_addr > paddr) &&
660 		    (phys_addr < (paddr + sizeof(struct setup_data) + len))) {
661 			memunmap(data);
662 			return true;
663 		}
664 
665 		if (data->type == SETUP_INDIRECT) {
666 			memunmap(data);
667 			data = memremap(paddr, sizeof(*data) + len,
668 					MEMREMAP_WB | MEMREMAP_DEC);
669 			if (!data) {
670 				pr_warn("failed to memremap indirect setup_data\n");
671 				return false;
672 			}
673 
674 			indirect = (struct setup_indirect *)data->data;
675 
676 			if (indirect->type != SETUP_INDIRECT) {
677 				paddr = indirect->addr;
678 				len = indirect->len;
679 			}
680 		}
681 
682 		memunmap(data);
683 
684 		if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
685 			return true;
686 
687 		paddr = paddr_next;
688 	}
689 
690 	return false;
691 }
692 
693 /*
694  * Examine the physical address to determine if it is boot data by checking
695  * it against the boot params setup_data chain (early boot version).
696  */
early_memremap_is_setup_data(resource_size_t phys_addr,unsigned long size)697 static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
698 						unsigned long size)
699 {
700 	struct setup_indirect *indirect;
701 	struct setup_data *data;
702 	u64 paddr, paddr_next;
703 
704 	paddr = boot_params.hdr.setup_data;
705 	while (paddr) {
706 		unsigned int len, size;
707 
708 		if (phys_addr == paddr)
709 			return true;
710 
711 		data = early_memremap_decrypted(paddr, sizeof(*data));
712 		if (!data) {
713 			pr_warn("failed to early memremap setup_data entry\n");
714 			return false;
715 		}
716 
717 		size = sizeof(*data);
718 
719 		paddr_next = data->next;
720 		len = data->len;
721 
722 		if ((phys_addr > paddr) &&
723 		    (phys_addr < (paddr + sizeof(struct setup_data) + len))) {
724 			early_memunmap(data, sizeof(*data));
725 			return true;
726 		}
727 
728 		if (data->type == SETUP_INDIRECT) {
729 			size += len;
730 			early_memunmap(data, sizeof(*data));
731 			data = early_memremap_decrypted(paddr, size);
732 			if (!data) {
733 				pr_warn("failed to early memremap indirect setup_data\n");
734 				return false;
735 			}
736 
737 			indirect = (struct setup_indirect *)data->data;
738 
739 			if (indirect->type != SETUP_INDIRECT) {
740 				paddr = indirect->addr;
741 				len = indirect->len;
742 			}
743 		}
744 
745 		early_memunmap(data, size);
746 
747 		if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
748 			return true;
749 
750 		paddr = paddr_next;
751 	}
752 
753 	return false;
754 }
755 
756 /*
757  * Architecture function to determine if RAM remap is allowed. By default, a
758  * RAM remap will map the data as encrypted. Determine if a RAM remap should
759  * not be done so that the data will be mapped decrypted.
760  */
arch_memremap_can_ram_remap(resource_size_t phys_addr,unsigned long size,unsigned long flags)761 bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
762 				 unsigned long flags)
763 {
764 	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
765 		return true;
766 
767 	if (flags & MEMREMAP_ENC)
768 		return true;
769 
770 	if (flags & MEMREMAP_DEC)
771 		return false;
772 
773 	if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
774 		if (memremap_is_setup_data(phys_addr, size) ||
775 		    memremap_is_efi_data(phys_addr, size))
776 			return false;
777 	}
778 
779 	return !memremap_should_map_decrypted(phys_addr, size);
780 }
781 
782 /*
783  * Architecture override of __weak function to adjust the protection attributes
784  * used when remapping memory. By default, early_memremap() will map the data
785  * as encrypted. Determine if an encrypted mapping should not be done and set
786  * the appropriate protection attributes.
787  */
early_memremap_pgprot_adjust(resource_size_t phys_addr,unsigned long size,pgprot_t prot)788 pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
789 					     unsigned long size,
790 					     pgprot_t prot)
791 {
792 	bool encrypted_prot;
793 
794 	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
795 		return prot;
796 
797 	encrypted_prot = true;
798 
799 	if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
800 		if (early_memremap_is_setup_data(phys_addr, size) ||
801 		    memremap_is_efi_data(phys_addr, size))
802 			encrypted_prot = false;
803 	}
804 
805 	if (encrypted_prot && memremap_should_map_decrypted(phys_addr, size))
806 		encrypted_prot = false;
807 
808 	return encrypted_prot ? pgprot_encrypted(prot)
809 			      : pgprot_decrypted(prot);
810 }
811 
phys_mem_access_encrypted(unsigned long phys_addr,unsigned long size)812 bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
813 {
814 	return arch_memremap_can_ram_remap(phys_addr, size, 0);
815 }
816 
817 /* Remap memory with encryption */
early_memremap_encrypted(resource_size_t phys_addr,unsigned long size)818 void __init *early_memremap_encrypted(resource_size_t phys_addr,
819 				      unsigned long size)
820 {
821 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC);
822 }
823 
824 /*
825  * Remap memory with encryption and write-protected - cannot be called
826  * before pat_init() is called
827  */
early_memremap_encrypted_wp(resource_size_t phys_addr,unsigned long size)828 void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
829 					 unsigned long size)
830 {
831 	if (!x86_has_pat_wp())
832 		return NULL;
833 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP);
834 }
835 
836 /* Remap memory without encryption */
early_memremap_decrypted(resource_size_t phys_addr,unsigned long size)837 void __init *early_memremap_decrypted(resource_size_t phys_addr,
838 				      unsigned long size)
839 {
840 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC);
841 }
842 
843 /*
844  * Remap memory without encryption and write-protected - cannot be called
845  * before pat_init() is called
846  */
early_memremap_decrypted_wp(resource_size_t phys_addr,unsigned long size)847 void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
848 					 unsigned long size)
849 {
850 	if (!x86_has_pat_wp())
851 		return NULL;
852 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP);
853 }
854 #endif	/* CONFIG_AMD_MEM_ENCRYPT */
855 
856 static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
857 
early_ioremap_pmd(unsigned long addr)858 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
859 {
860 	/* Don't assume we're using swapper_pg_dir at this point */
861 	pgd_t *base = __va(read_cr3_pa());
862 	pgd_t *pgd = &base[pgd_index(addr)];
863 	p4d_t *p4d = p4d_offset(pgd, addr);
864 	pud_t *pud = pud_offset(p4d, addr);
865 	pmd_t *pmd = pmd_offset(pud, addr);
866 
867 	return pmd;
868 }
869 
early_ioremap_pte(unsigned long addr)870 static inline pte_t * __init early_ioremap_pte(unsigned long addr)
871 {
872 	return &bm_pte[pte_index(addr)];
873 }
874 
is_early_ioremap_ptep(pte_t * ptep)875 bool __init is_early_ioremap_ptep(pte_t *ptep)
876 {
877 	return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
878 }
879 
early_ioremap_init(void)880 void __init early_ioremap_init(void)
881 {
882 	pmd_t *pmd;
883 
884 #ifdef CONFIG_X86_64
885 	BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
886 #else
887 	WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
888 #endif
889 
890 	early_ioremap_setup();
891 
892 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
893 	memset(bm_pte, 0, sizeof(bm_pte));
894 	pmd_populate_kernel(&init_mm, pmd, bm_pte);
895 
896 	/*
897 	 * The boot-ioremap range spans multiple pmds, for which
898 	 * we are not prepared:
899 	 */
900 #define __FIXADDR_TOP (-PAGE_SIZE)
901 	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
902 		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
903 #undef __FIXADDR_TOP
904 	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
905 		WARN_ON(1);
906 		printk(KERN_WARNING "pmd %p != %p\n",
907 		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
908 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
909 			fix_to_virt(FIX_BTMAP_BEGIN));
910 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
911 			fix_to_virt(FIX_BTMAP_END));
912 
913 		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
914 		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
915 		       FIX_BTMAP_BEGIN);
916 	}
917 }
918 
__early_set_fixmap(enum fixed_addresses idx,phys_addr_t phys,pgprot_t flags)919 void __init __early_set_fixmap(enum fixed_addresses idx,
920 			       phys_addr_t phys, pgprot_t flags)
921 {
922 	unsigned long addr = __fix_to_virt(idx);
923 	pte_t *pte;
924 
925 	if (idx >= __end_of_fixed_addresses) {
926 		BUG();
927 		return;
928 	}
929 	pte = early_ioremap_pte(addr);
930 
931 	/* Sanitize 'prot' against any unsupported bits: */
932 	pgprot_val(flags) &= __supported_pte_mask;
933 
934 	if (pgprot_val(flags))
935 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
936 	else
937 		pte_clear(&init_mm, addr, pte);
938 	flush_tlb_one_kernel(addr);
939 }
940