xref: /linux/arch/x86/mm/ioremap.c (revision 34dc1baba215b826e454b8d19e4f24adbeb7d00d)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Re-map IO memory to kernel address space so that we can access it.
4  * This is needed for high PCI addresses that aren't mapped in the
5  * 640k-1MB IO memory area on PC's
6  *
7  * (C) Copyright 1995 1996 Linus Torvalds
8  */
9 
10 #include <linux/memblock.h>
11 #include <linux/init.h>
12 #include <linux/io.h>
13 #include <linux/ioport.h>
14 #include <linux/slab.h>
15 #include <linux/vmalloc.h>
16 #include <linux/mmiotrace.h>
17 #include <linux/cc_platform.h>
18 #include <linux/efi.h>
19 #include <linux/pgtable.h>
20 #include <linux/kmsan.h>
21 
22 #include <asm/set_memory.h>
23 #include <asm/e820/api.h>
24 #include <asm/efi.h>
25 #include <asm/fixmap.h>
26 #include <asm/tlbflush.h>
27 #include <asm/pgalloc.h>
28 #include <asm/memtype.h>
29 #include <asm/setup.h>
30 
31 #include "physaddr.h"
32 
33 /*
34  * Descriptor controlling ioremap() behavior.
35  */
36 struct ioremap_desc {
37 	unsigned int flags;
38 };
39 
40 /*
41  * Fix up the linear direct mapping of the kernel to avoid cache attribute
42  * conflicts.
43  */
44 int ioremap_change_attr(unsigned long vaddr, unsigned long size,
45 			enum page_cache_mode pcm)
46 {
47 	unsigned long nrpages = size >> PAGE_SHIFT;
48 	int err;
49 
50 	switch (pcm) {
51 	case _PAGE_CACHE_MODE_UC:
52 	default:
53 		err = _set_memory_uc(vaddr, nrpages);
54 		break;
55 	case _PAGE_CACHE_MODE_WC:
56 		err = _set_memory_wc(vaddr, nrpages);
57 		break;
58 	case _PAGE_CACHE_MODE_WT:
59 		err = _set_memory_wt(vaddr, nrpages);
60 		break;
61 	case _PAGE_CACHE_MODE_WB:
62 		err = _set_memory_wb(vaddr, nrpages);
63 		break;
64 	}
65 
66 	return err;
67 }
68 
69 /* Does the range (or a subset of) contain normal RAM? */
70 static unsigned int __ioremap_check_ram(struct resource *res)
71 {
72 	unsigned long start_pfn, stop_pfn;
73 	unsigned long i;
74 
75 	if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM)
76 		return 0;
77 
78 	start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT;
79 	stop_pfn = (res->end + 1) >> PAGE_SHIFT;
80 	if (stop_pfn > start_pfn) {
81 		for (i = 0; i < (stop_pfn - start_pfn); ++i)
82 			if (pfn_valid(start_pfn + i) &&
83 			    !PageReserved(pfn_to_page(start_pfn + i)))
84 				return IORES_MAP_SYSTEM_RAM;
85 	}
86 
87 	return 0;
88 }
89 
90 /*
91  * In a SEV guest, NONE and RESERVED should not be mapped encrypted because
92  * there the whole memory is already encrypted.
93  */
94 static unsigned int __ioremap_check_encrypted(struct resource *res)
95 {
96 	if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
97 		return 0;
98 
99 	switch (res->desc) {
100 	case IORES_DESC_NONE:
101 	case IORES_DESC_RESERVED:
102 		break;
103 	default:
104 		return IORES_MAP_ENCRYPTED;
105 	}
106 
107 	return 0;
108 }
109 
110 /*
111  * The EFI runtime services data area is not covered by walk_mem_res(), but must
112  * be mapped encrypted when SEV is active.
113  */
114 static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc)
115 {
116 	if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
117 		return;
118 
119 	if (x86_platform.hyper.is_private_mmio(addr)) {
120 		desc->flags |= IORES_MAP_ENCRYPTED;
121 		return;
122 	}
123 
124 	if (!IS_ENABLED(CONFIG_EFI))
125 		return;
126 
127 	if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA ||
128 	    (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA &&
129 	     efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME))
130 		desc->flags |= IORES_MAP_ENCRYPTED;
131 }
132 
133 static int __ioremap_collect_map_flags(struct resource *res, void *arg)
134 {
135 	struct ioremap_desc *desc = arg;
136 
137 	if (!(desc->flags & IORES_MAP_SYSTEM_RAM))
138 		desc->flags |= __ioremap_check_ram(res);
139 
140 	if (!(desc->flags & IORES_MAP_ENCRYPTED))
141 		desc->flags |= __ioremap_check_encrypted(res);
142 
143 	return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) ==
144 			       (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED));
145 }
146 
147 /*
148  * To avoid multiple resource walks, this function walks resources marked as
149  * IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a
150  * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES).
151  *
152  * After that, deal with misc other ranges in __ioremap_check_other() which do
153  * not fall into the above category.
154  */
155 static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
156 				struct ioremap_desc *desc)
157 {
158 	u64 start, end;
159 
160 	start = (u64)addr;
161 	end = start + size - 1;
162 	memset(desc, 0, sizeof(struct ioremap_desc));
163 
164 	walk_mem_res(start, end, desc, __ioremap_collect_map_flags);
165 
166 	__ioremap_check_other(addr, desc);
167 }
168 
169 /*
170  * Remap an arbitrary physical address space into the kernel virtual
171  * address space. It transparently creates kernel huge I/O mapping when
172  * the physical address is aligned by a huge page size (1GB or 2MB) and
173  * the requested size is at least the huge page size.
174  *
175  * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
176  * Therefore, the mapping code falls back to use a smaller page toward 4KB
177  * when a mapping range is covered by non-WB type of MTRRs.
178  *
179  * NOTE! We need to allow non-page-aligned mappings too: we will obviously
180  * have to convert them into an offset in a page-aligned mapping, but the
181  * caller shouldn't need to know that small detail.
182  */
183 static void __iomem *
184 __ioremap_caller(resource_size_t phys_addr, unsigned long size,
185 		 enum page_cache_mode pcm, void *caller, bool encrypted)
186 {
187 	unsigned long offset, vaddr;
188 	resource_size_t last_addr;
189 	const resource_size_t unaligned_phys_addr = phys_addr;
190 	const unsigned long unaligned_size = size;
191 	struct ioremap_desc io_desc;
192 	struct vm_struct *area;
193 	enum page_cache_mode new_pcm;
194 	pgprot_t prot;
195 	int retval;
196 	void __iomem *ret_addr;
197 
198 	/* Don't allow wraparound or zero size */
199 	last_addr = phys_addr + size - 1;
200 	if (!size || last_addr < phys_addr)
201 		return NULL;
202 
203 	if (!phys_addr_valid(phys_addr)) {
204 		printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
205 		       (unsigned long long)phys_addr);
206 		WARN_ON_ONCE(1);
207 		return NULL;
208 	}
209 
210 	__ioremap_check_mem(phys_addr, size, &io_desc);
211 
212 	/*
213 	 * Don't allow anybody to remap normal RAM that we're using..
214 	 */
215 	if (io_desc.flags & IORES_MAP_SYSTEM_RAM) {
216 		WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
217 			  &phys_addr, &last_addr);
218 		return NULL;
219 	}
220 
221 	/*
222 	 * Mappings have to be page-aligned
223 	 */
224 	offset = phys_addr & ~PAGE_MASK;
225 	phys_addr &= PAGE_MASK;
226 	size = PAGE_ALIGN(last_addr+1) - phys_addr;
227 
228 	/*
229 	 * Mask out any bits not part of the actual physical
230 	 * address, like memory encryption bits.
231 	 */
232 	phys_addr &= PHYSICAL_PAGE_MASK;
233 
234 	retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
235 						pcm, &new_pcm);
236 	if (retval) {
237 		printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval);
238 		return NULL;
239 	}
240 
241 	if (pcm != new_pcm) {
242 		if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
243 			printk(KERN_ERR
244 		"ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
245 				(unsigned long long)phys_addr,
246 				(unsigned long long)(phys_addr + size),
247 				pcm, new_pcm);
248 			goto err_free_memtype;
249 		}
250 		pcm = new_pcm;
251 	}
252 
253 	/*
254 	 * If the page being mapped is in memory and SEV is active then
255 	 * make sure the memory encryption attribute is enabled in the
256 	 * resulting mapping.
257 	 * In TDX guests, memory is marked private by default. If encryption
258 	 * is not requested (using encrypted), explicitly set decrypt
259 	 * attribute in all IOREMAPPED memory.
260 	 */
261 	prot = PAGE_KERNEL_IO;
262 	if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted)
263 		prot = pgprot_encrypted(prot);
264 	else
265 		prot = pgprot_decrypted(prot);
266 
267 	switch (pcm) {
268 	case _PAGE_CACHE_MODE_UC:
269 	default:
270 		prot = __pgprot(pgprot_val(prot) |
271 				cachemode2protval(_PAGE_CACHE_MODE_UC));
272 		break;
273 	case _PAGE_CACHE_MODE_UC_MINUS:
274 		prot = __pgprot(pgprot_val(prot) |
275 				cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
276 		break;
277 	case _PAGE_CACHE_MODE_WC:
278 		prot = __pgprot(pgprot_val(prot) |
279 				cachemode2protval(_PAGE_CACHE_MODE_WC));
280 		break;
281 	case _PAGE_CACHE_MODE_WT:
282 		prot = __pgprot(pgprot_val(prot) |
283 				cachemode2protval(_PAGE_CACHE_MODE_WT));
284 		break;
285 	case _PAGE_CACHE_MODE_WB:
286 		break;
287 	}
288 
289 	/*
290 	 * Ok, go for it..
291 	 */
292 	area = get_vm_area_caller(size, VM_IOREMAP, caller);
293 	if (!area)
294 		goto err_free_memtype;
295 	area->phys_addr = phys_addr;
296 	vaddr = (unsigned long) area->addr;
297 
298 	if (memtype_kernel_map_sync(phys_addr, size, pcm))
299 		goto err_free_area;
300 
301 	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
302 		goto err_free_area;
303 
304 	ret_addr = (void __iomem *) (vaddr + offset);
305 	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
306 
307 	/*
308 	 * Check if the request spans more than any BAR in the iomem resource
309 	 * tree.
310 	 */
311 	if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
312 		pr_warn("caller %pS mapping multiple BARs\n", caller);
313 
314 	return ret_addr;
315 err_free_area:
316 	free_vm_area(area);
317 err_free_memtype:
318 	memtype_free(phys_addr, phys_addr + size);
319 	return NULL;
320 }
321 
322 /**
323  * ioremap     -   map bus memory into CPU space
324  * @phys_addr:    bus address of the memory
325  * @size:      size of the resource to map
326  *
327  * ioremap performs a platform specific sequence of operations to
328  * make bus memory CPU accessible via the readb/readw/readl/writeb/
329  * writew/writel functions and the other mmio helpers. The returned
330  * address is not guaranteed to be usable directly as a virtual
331  * address.
332  *
333  * This version of ioremap ensures that the memory is marked uncachable
334  * on the CPU as well as honouring existing caching rules from things like
335  * the PCI bus. Note that there are other caches and buffers on many
336  * busses. In particular driver authors should read up on PCI writes
337  *
338  * It's useful if some control registers are in such an area and
339  * write combining or read caching is not desirable:
340  *
341  * Must be freed with iounmap.
342  */
343 void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
344 {
345 	/*
346 	 * Ideally, this should be:
347 	 *	pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
348 	 *
349 	 * Till we fix all X drivers to use ioremap_wc(), we will use
350 	 * UC MINUS. Drivers that are certain they need or can already
351 	 * be converted over to strong UC can use ioremap_uc().
352 	 */
353 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
354 
355 	return __ioremap_caller(phys_addr, size, pcm,
356 				__builtin_return_address(0), false);
357 }
358 EXPORT_SYMBOL(ioremap);
359 
360 /**
361  * ioremap_uc     -   map bus memory into CPU space as strongly uncachable
362  * @phys_addr:    bus address of the memory
363  * @size:      size of the resource to map
364  *
365  * ioremap_uc performs a platform specific sequence of operations to
366  * make bus memory CPU accessible via the readb/readw/readl/writeb/
367  * writew/writel functions and the other mmio helpers. The returned
368  * address is not guaranteed to be usable directly as a virtual
369  * address.
370  *
371  * This version of ioremap ensures that the memory is marked with a strong
372  * preference as completely uncachable on the CPU when possible. For non-PAT
373  * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
374  * systems this will set the PAT entry for the pages as strong UC.  This call
375  * will honor existing caching rules from things like the PCI bus. Note that
376  * there are other caches and buffers on many busses. In particular driver
377  * authors should read up on PCI writes.
378  *
379  * It's useful if some control registers are in such an area and
380  * write combining or read caching is not desirable:
381  *
382  * Must be freed with iounmap.
383  */
384 void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
385 {
386 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
387 
388 	return __ioremap_caller(phys_addr, size, pcm,
389 				__builtin_return_address(0), false);
390 }
391 EXPORT_SYMBOL_GPL(ioremap_uc);
392 
393 /**
394  * ioremap_wc	-	map memory into CPU space write combined
395  * @phys_addr:	bus address of the memory
396  * @size:	size of the resource to map
397  *
398  * This version of ioremap ensures that the memory is marked write combining.
399  * Write combining allows faster writes to some hardware devices.
400  *
401  * Must be freed with iounmap.
402  */
403 void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
404 {
405 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
406 					__builtin_return_address(0), false);
407 }
408 EXPORT_SYMBOL(ioremap_wc);
409 
410 /**
411  * ioremap_wt	-	map memory into CPU space write through
412  * @phys_addr:	bus address of the memory
413  * @size:	size of the resource to map
414  *
415  * This version of ioremap ensures that the memory is marked write through.
416  * Write through stores data into memory while keeping the cache up-to-date.
417  *
418  * Must be freed with iounmap.
419  */
420 void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
421 {
422 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
423 					__builtin_return_address(0), false);
424 }
425 EXPORT_SYMBOL(ioremap_wt);
426 
427 void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size)
428 {
429 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
430 				__builtin_return_address(0), true);
431 }
432 EXPORT_SYMBOL(ioremap_encrypted);
433 
434 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
435 {
436 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
437 				__builtin_return_address(0), false);
438 }
439 EXPORT_SYMBOL(ioremap_cache);
440 
441 void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
442 				unsigned long prot_val)
443 {
444 	return __ioremap_caller(phys_addr, size,
445 				pgprot2cachemode(__pgprot(prot_val)),
446 				__builtin_return_address(0), false);
447 }
448 EXPORT_SYMBOL(ioremap_prot);
449 
450 /**
451  * iounmap - Free a IO remapping
452  * @addr: virtual address from ioremap_*
453  *
454  * Caller must ensure there is only one unmapping for the same pointer.
455  */
456 void iounmap(volatile void __iomem *addr)
457 {
458 	struct vm_struct *p, *o;
459 
460 	if ((void __force *)addr <= high_memory)
461 		return;
462 
463 	/*
464 	 * The PCI/ISA range special-casing was removed from __ioremap()
465 	 * so this check, in theory, can be removed. However, there are
466 	 * cases where iounmap() is called for addresses not obtained via
467 	 * ioremap() (vga16fb for example). Add a warning so that these
468 	 * cases can be caught and fixed.
469 	 */
470 	if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
471 	    (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) {
472 		WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n");
473 		return;
474 	}
475 
476 	mmiotrace_iounmap(addr);
477 
478 	addr = (volatile void __iomem *)
479 		(PAGE_MASK & (unsigned long __force)addr);
480 
481 	/* Use the vm area unlocked, assuming the caller
482 	   ensures there isn't another iounmap for the same address
483 	   in parallel. Reuse of the virtual address is prevented by
484 	   leaving it in the global lists until we're done with it.
485 	   cpa takes care of the direct mappings. */
486 	p = find_vm_area((void __force *)addr);
487 
488 	if (!p) {
489 		printk(KERN_ERR "iounmap: bad address %p\n", addr);
490 		dump_stack();
491 		return;
492 	}
493 
494 	kmsan_iounmap_page_range((unsigned long)addr,
495 		(unsigned long)addr + get_vm_area_size(p));
496 	memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
497 
498 	/* Finally remove it */
499 	o = remove_vm_area((void __force *)addr);
500 	BUG_ON(p != o || o == NULL);
501 	kfree(p);
502 }
503 EXPORT_SYMBOL(iounmap);
504 
505 /*
506  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
507  * access
508  */
509 void *xlate_dev_mem_ptr(phys_addr_t phys)
510 {
511 	unsigned long start  = phys &  PAGE_MASK;
512 	unsigned long offset = phys & ~PAGE_MASK;
513 	void *vaddr;
514 
515 	/* memremap() maps if RAM, otherwise falls back to ioremap() */
516 	vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB);
517 
518 	/* Only add the offset on success and return NULL if memremap() failed */
519 	if (vaddr)
520 		vaddr += offset;
521 
522 	return vaddr;
523 }
524 
525 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
526 {
527 	memunmap((void *)((unsigned long)addr & PAGE_MASK));
528 }
529 
530 #ifdef CONFIG_AMD_MEM_ENCRYPT
531 /*
532  * Examine the physical address to determine if it is an area of memory
533  * that should be mapped decrypted.  If the memory is not part of the
534  * kernel usable area it was accessed and created decrypted, so these
535  * areas should be mapped decrypted. And since the encryption key can
536  * change across reboots, persistent memory should also be mapped
537  * decrypted.
538  *
539  * If SEV is active, that implies that BIOS/UEFI also ran encrypted so
540  * only persistent memory should be mapped decrypted.
541  */
542 static bool memremap_should_map_decrypted(resource_size_t phys_addr,
543 					  unsigned long size)
544 {
545 	int is_pmem;
546 
547 	/*
548 	 * Check if the address is part of a persistent memory region.
549 	 * This check covers areas added by E820, EFI and ACPI.
550 	 */
551 	is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
552 				    IORES_DESC_PERSISTENT_MEMORY);
553 	if (is_pmem != REGION_DISJOINT)
554 		return true;
555 
556 	/*
557 	 * Check if the non-volatile attribute is set for an EFI
558 	 * reserved area.
559 	 */
560 	if (efi_enabled(EFI_BOOT)) {
561 		switch (efi_mem_type(phys_addr)) {
562 		case EFI_RESERVED_TYPE:
563 			if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
564 				return true;
565 			break;
566 		default:
567 			break;
568 		}
569 	}
570 
571 	/* Check if the address is outside kernel usable area */
572 	switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
573 	case E820_TYPE_RESERVED:
574 	case E820_TYPE_ACPI:
575 	case E820_TYPE_NVS:
576 	case E820_TYPE_UNUSABLE:
577 		/* For SEV, these areas are encrypted */
578 		if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
579 			break;
580 		fallthrough;
581 
582 	case E820_TYPE_PRAM:
583 		return true;
584 	default:
585 		break;
586 	}
587 
588 	return false;
589 }
590 
591 /*
592  * Examine the physical address to determine if it is EFI data. Check
593  * it against the boot params structure and EFI tables and memory types.
594  */
595 static bool memremap_is_efi_data(resource_size_t phys_addr,
596 				 unsigned long size)
597 {
598 	u64 paddr;
599 
600 	/* Check if the address is part of EFI boot/runtime data */
601 	if (!efi_enabled(EFI_BOOT))
602 		return false;
603 
604 	paddr = boot_params.efi_info.efi_memmap_hi;
605 	paddr <<= 32;
606 	paddr |= boot_params.efi_info.efi_memmap;
607 	if (phys_addr == paddr)
608 		return true;
609 
610 	paddr = boot_params.efi_info.efi_systab_hi;
611 	paddr <<= 32;
612 	paddr |= boot_params.efi_info.efi_systab;
613 	if (phys_addr == paddr)
614 		return true;
615 
616 	if (efi_is_table_address(phys_addr))
617 		return true;
618 
619 	switch (efi_mem_type(phys_addr)) {
620 	case EFI_BOOT_SERVICES_DATA:
621 	case EFI_RUNTIME_SERVICES_DATA:
622 		return true;
623 	default:
624 		break;
625 	}
626 
627 	return false;
628 }
629 
630 /*
631  * Examine the physical address to determine if it is boot data by checking
632  * it against the boot params setup_data chain.
633  */
634 static bool memremap_is_setup_data(resource_size_t phys_addr,
635 				   unsigned long size)
636 {
637 	struct setup_indirect *indirect;
638 	struct setup_data *data;
639 	u64 paddr, paddr_next;
640 
641 	paddr = boot_params.hdr.setup_data;
642 	while (paddr) {
643 		unsigned int len;
644 
645 		if (phys_addr == paddr)
646 			return true;
647 
648 		data = memremap(paddr, sizeof(*data),
649 				MEMREMAP_WB | MEMREMAP_DEC);
650 		if (!data) {
651 			pr_warn("failed to memremap setup_data entry\n");
652 			return false;
653 		}
654 
655 		paddr_next = data->next;
656 		len = data->len;
657 
658 		if ((phys_addr > paddr) && (phys_addr < (paddr + len))) {
659 			memunmap(data);
660 			return true;
661 		}
662 
663 		if (data->type == SETUP_INDIRECT) {
664 			memunmap(data);
665 			data = memremap(paddr, sizeof(*data) + len,
666 					MEMREMAP_WB | MEMREMAP_DEC);
667 			if (!data) {
668 				pr_warn("failed to memremap indirect setup_data\n");
669 				return false;
670 			}
671 
672 			indirect = (struct setup_indirect *)data->data;
673 
674 			if (indirect->type != SETUP_INDIRECT) {
675 				paddr = indirect->addr;
676 				len = indirect->len;
677 			}
678 		}
679 
680 		memunmap(data);
681 
682 		if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
683 			return true;
684 
685 		paddr = paddr_next;
686 	}
687 
688 	return false;
689 }
690 
691 /*
692  * Examine the physical address to determine if it is boot data by checking
693  * it against the boot params setup_data chain (early boot version).
694  */
695 static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
696 						unsigned long size)
697 {
698 	struct setup_indirect *indirect;
699 	struct setup_data *data;
700 	u64 paddr, paddr_next;
701 
702 	paddr = boot_params.hdr.setup_data;
703 	while (paddr) {
704 		unsigned int len, size;
705 
706 		if (phys_addr == paddr)
707 			return true;
708 
709 		data = early_memremap_decrypted(paddr, sizeof(*data));
710 		if (!data) {
711 			pr_warn("failed to early memremap setup_data entry\n");
712 			return false;
713 		}
714 
715 		size = sizeof(*data);
716 
717 		paddr_next = data->next;
718 		len = data->len;
719 
720 		if ((phys_addr > paddr) && (phys_addr < (paddr + len))) {
721 			early_memunmap(data, sizeof(*data));
722 			return true;
723 		}
724 
725 		if (data->type == SETUP_INDIRECT) {
726 			size += len;
727 			early_memunmap(data, sizeof(*data));
728 			data = early_memremap_decrypted(paddr, size);
729 			if (!data) {
730 				pr_warn("failed to early memremap indirect setup_data\n");
731 				return false;
732 			}
733 
734 			indirect = (struct setup_indirect *)data->data;
735 
736 			if (indirect->type != SETUP_INDIRECT) {
737 				paddr = indirect->addr;
738 				len = indirect->len;
739 			}
740 		}
741 
742 		early_memunmap(data, size);
743 
744 		if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
745 			return true;
746 
747 		paddr = paddr_next;
748 	}
749 
750 	return false;
751 }
752 
753 /*
754  * Architecture function to determine if RAM remap is allowed. By default, a
755  * RAM remap will map the data as encrypted. Determine if a RAM remap should
756  * not be done so that the data will be mapped decrypted.
757  */
758 bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
759 				 unsigned long flags)
760 {
761 	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
762 		return true;
763 
764 	if (flags & MEMREMAP_ENC)
765 		return true;
766 
767 	if (flags & MEMREMAP_DEC)
768 		return false;
769 
770 	if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
771 		if (memremap_is_setup_data(phys_addr, size) ||
772 		    memremap_is_efi_data(phys_addr, size))
773 			return false;
774 	}
775 
776 	return !memremap_should_map_decrypted(phys_addr, size);
777 }
778 
779 /*
780  * Architecture override of __weak function to adjust the protection attributes
781  * used when remapping memory. By default, early_memremap() will map the data
782  * as encrypted. Determine if an encrypted mapping should not be done and set
783  * the appropriate protection attributes.
784  */
785 pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
786 					     unsigned long size,
787 					     pgprot_t prot)
788 {
789 	bool encrypted_prot;
790 
791 	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
792 		return prot;
793 
794 	encrypted_prot = true;
795 
796 	if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
797 		if (early_memremap_is_setup_data(phys_addr, size) ||
798 		    memremap_is_efi_data(phys_addr, size))
799 			encrypted_prot = false;
800 	}
801 
802 	if (encrypted_prot && memremap_should_map_decrypted(phys_addr, size))
803 		encrypted_prot = false;
804 
805 	return encrypted_prot ? pgprot_encrypted(prot)
806 			      : pgprot_decrypted(prot);
807 }
808 
809 bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
810 {
811 	return arch_memremap_can_ram_remap(phys_addr, size, 0);
812 }
813 
814 /* Remap memory with encryption */
815 void __init *early_memremap_encrypted(resource_size_t phys_addr,
816 				      unsigned long size)
817 {
818 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC);
819 }
820 
821 /*
822  * Remap memory with encryption and write-protected - cannot be called
823  * before pat_init() is called
824  */
825 void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
826 					 unsigned long size)
827 {
828 	if (!x86_has_pat_wp())
829 		return NULL;
830 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP);
831 }
832 
833 /* Remap memory without encryption */
834 void __init *early_memremap_decrypted(resource_size_t phys_addr,
835 				      unsigned long size)
836 {
837 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC);
838 }
839 
840 /*
841  * Remap memory without encryption and write-protected - cannot be called
842  * before pat_init() is called
843  */
844 void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
845 					 unsigned long size)
846 {
847 	if (!x86_has_pat_wp())
848 		return NULL;
849 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP);
850 }
851 #endif	/* CONFIG_AMD_MEM_ENCRYPT */
852 
853 static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
854 
855 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
856 {
857 	/* Don't assume we're using swapper_pg_dir at this point */
858 	pgd_t *base = __va(read_cr3_pa());
859 	pgd_t *pgd = &base[pgd_index(addr)];
860 	p4d_t *p4d = p4d_offset(pgd, addr);
861 	pud_t *pud = pud_offset(p4d, addr);
862 	pmd_t *pmd = pmd_offset(pud, addr);
863 
864 	return pmd;
865 }
866 
867 static inline pte_t * __init early_ioremap_pte(unsigned long addr)
868 {
869 	return &bm_pte[pte_index(addr)];
870 }
871 
872 bool __init is_early_ioremap_ptep(pte_t *ptep)
873 {
874 	return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
875 }
876 
877 void __init early_ioremap_init(void)
878 {
879 	pmd_t *pmd;
880 
881 #ifdef CONFIG_X86_64
882 	BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
883 #else
884 	WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
885 #endif
886 
887 	early_ioremap_setup();
888 
889 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
890 	memset(bm_pte, 0, sizeof(bm_pte));
891 	pmd_populate_kernel(&init_mm, pmd, bm_pte);
892 
893 	/*
894 	 * The boot-ioremap range spans multiple pmds, for which
895 	 * we are not prepared:
896 	 */
897 #define __FIXADDR_TOP (-PAGE_SIZE)
898 	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
899 		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
900 #undef __FIXADDR_TOP
901 	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
902 		WARN_ON(1);
903 		printk(KERN_WARNING "pmd %p != %p\n",
904 		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
905 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
906 			fix_to_virt(FIX_BTMAP_BEGIN));
907 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
908 			fix_to_virt(FIX_BTMAP_END));
909 
910 		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
911 		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
912 		       FIX_BTMAP_BEGIN);
913 	}
914 }
915 
916 void __init __early_set_fixmap(enum fixed_addresses idx,
917 			       phys_addr_t phys, pgprot_t flags)
918 {
919 	unsigned long addr = __fix_to_virt(idx);
920 	pte_t *pte;
921 
922 	if (idx >= __end_of_fixed_addresses) {
923 		BUG();
924 		return;
925 	}
926 	pte = early_ioremap_pte(addr);
927 
928 	/* Sanitize 'prot' against any unsupported bits: */
929 	pgprot_val(flags) &= __supported_pte_mask;
930 
931 	if (pgprot_val(flags))
932 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
933 	else
934 		pte_clear(&init_mm, addr, pte);
935 	flush_tlb_one_kernel(addr);
936 }
937