xref: /linux/arch/x86/mm/ioremap.c (revision 7a4ffec9fd54ea27395e24dff726dbf58e2fe06b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Re-map IO memory to kernel address space so that we can access it.
4  * This is needed for high PCI addresses that aren't mapped in the
5  * 640k-1MB IO memory area on PC's
6  *
7  * (C) Copyright 1995 1996 Linus Torvalds
8  */
9 
10 #include <linux/memblock.h>
11 #include <linux/init.h>
12 #include <linux/io.h>
13 #include <linux/ioport.h>
14 #include <linux/ioremap.h>
15 #include <linux/slab.h>
16 #include <linux/vmalloc.h>
17 #include <linux/mmiotrace.h>
18 #include <linux/cc_platform.h>
19 #include <linux/efi.h>
20 #include <linux/pgtable.h>
21 #include <linux/kmsan.h>
22 
23 #include <asm/set_memory.h>
24 #include <asm/e820/api.h>
25 #include <asm/efi.h>
26 #include <asm/fixmap.h>
27 #include <asm/tlbflush.h>
28 #include <asm/pgalloc.h>
29 #include <asm/memtype.h>
30 #include <asm/setup.h>
31 
32 #include "physaddr.h"
33 
34 /*
35  * Descriptor controlling ioremap() behavior.
36  */
37 struct ioremap_desc {
38 	unsigned int flags;
39 };
40 
41 /*
42  * Fix up the linear direct mapping of the kernel to avoid cache attribute
43  * conflicts.
44  */
45 int ioremap_change_attr(unsigned long vaddr, unsigned long size,
46 			enum page_cache_mode pcm)
47 {
48 	unsigned long nrpages = size >> PAGE_SHIFT;
49 	int err;
50 
51 	switch (pcm) {
52 	case _PAGE_CACHE_MODE_UC:
53 	default:
54 		err = _set_memory_uc(vaddr, nrpages);
55 		break;
56 	case _PAGE_CACHE_MODE_WC:
57 		err = _set_memory_wc(vaddr, nrpages);
58 		break;
59 	case _PAGE_CACHE_MODE_WT:
60 		err = _set_memory_wt(vaddr, nrpages);
61 		break;
62 	case _PAGE_CACHE_MODE_WB:
63 		err = _set_memory_wb(vaddr, nrpages);
64 		break;
65 	}
66 
67 	return err;
68 }
69 
70 /* Does the range (or a subset of) contain normal RAM? */
71 static unsigned int __ioremap_check_ram(struct resource *res)
72 {
73 	unsigned long start_pfn, stop_pfn;
74 	unsigned long i;
75 
76 	if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM)
77 		return 0;
78 
79 	start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT;
80 	stop_pfn = (res->end + 1) >> PAGE_SHIFT;
81 	if (stop_pfn > start_pfn) {
82 		for (i = 0; i < (stop_pfn - start_pfn); ++i)
83 			if (pfn_valid(start_pfn + i) &&
84 			    !PageReserved(pfn_to_page(start_pfn + i)))
85 				return IORES_MAP_SYSTEM_RAM;
86 	}
87 
88 	return 0;
89 }
90 
91 /*
92  * In a SEV guest, NONE and RESERVED should not be mapped encrypted because
93  * there the whole memory is already encrypted.
94  */
95 static unsigned int __ioremap_check_encrypted(struct resource *res)
96 {
97 	if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
98 		return 0;
99 
100 	switch (res->desc) {
101 	case IORES_DESC_NONE:
102 	case IORES_DESC_RESERVED:
103 		break;
104 	default:
105 		return IORES_MAP_ENCRYPTED;
106 	}
107 
108 	return 0;
109 }
110 
111 /*
112  * The EFI runtime services data area is not covered by walk_mem_res(), but must
113  * be mapped encrypted when SEV is active.
114  */
115 static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc)
116 {
117 	if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
118 		return;
119 
120 	if (x86_platform.hyper.is_private_mmio(addr)) {
121 		desc->flags |= IORES_MAP_ENCRYPTED;
122 		return;
123 	}
124 
125 	if (!IS_ENABLED(CONFIG_EFI))
126 		return;
127 
128 	if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA ||
129 	    (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA &&
130 	     efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME))
131 		desc->flags |= IORES_MAP_ENCRYPTED;
132 }
133 
134 static int __ioremap_collect_map_flags(struct resource *res, void *arg)
135 {
136 	struct ioremap_desc *desc = arg;
137 
138 	if (!(desc->flags & IORES_MAP_SYSTEM_RAM))
139 		desc->flags |= __ioremap_check_ram(res);
140 
141 	if (!(desc->flags & IORES_MAP_ENCRYPTED))
142 		desc->flags |= __ioremap_check_encrypted(res);
143 
144 	return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) ==
145 			       (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED));
146 }
147 
148 /*
149  * To avoid multiple resource walks, this function walks resources marked as
150  * IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a
151  * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES).
152  *
153  * After that, deal with misc other ranges in __ioremap_check_other() which do
154  * not fall into the above category.
155  */
156 static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
157 				struct ioremap_desc *desc)
158 {
159 	u64 start, end;
160 
161 	start = (u64)addr;
162 	end = start + size - 1;
163 	memset(desc, 0, sizeof(struct ioremap_desc));
164 
165 	walk_mem_res(start, end, desc, __ioremap_collect_map_flags);
166 
167 	__ioremap_check_other(addr, desc);
168 }
169 
170 /*
171  * Remap an arbitrary physical address space into the kernel virtual
172  * address space. It transparently creates kernel huge I/O mapping when
173  * the physical address is aligned by a huge page size (1GB or 2MB) and
174  * the requested size is at least the huge page size.
175  *
176  * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
177  * Therefore, the mapping code falls back to use a smaller page toward 4KB
178  * when a mapping range is covered by non-WB type of MTRRs.
179  *
180  * NOTE! We need to allow non-page-aligned mappings too: we will obviously
181  * have to convert them into an offset in a page-aligned mapping, but the
182  * caller shouldn't need to know that small detail.
183  */
184 static void __iomem *
185 __ioremap_caller(resource_size_t phys_addr, unsigned long size,
186 		 enum page_cache_mode pcm, void *caller, bool encrypted)
187 {
188 	unsigned long offset, vaddr;
189 	resource_size_t last_addr;
190 	const resource_size_t unaligned_phys_addr = phys_addr;
191 	const unsigned long unaligned_size = size;
192 	struct ioremap_desc io_desc;
193 	struct vm_struct *area;
194 	enum page_cache_mode new_pcm;
195 	pgprot_t prot;
196 	int retval;
197 	void __iomem *ret_addr;
198 
199 	/* Don't allow wraparound or zero size */
200 	last_addr = phys_addr + size - 1;
201 	if (!size || last_addr < phys_addr)
202 		return NULL;
203 
204 	if (!phys_addr_valid(phys_addr)) {
205 		printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
206 		       (unsigned long long)phys_addr);
207 		WARN_ON_ONCE(1);
208 		return NULL;
209 	}
210 
211 	__ioremap_check_mem(phys_addr, size, &io_desc);
212 
213 	/*
214 	 * Don't allow anybody to remap normal RAM that we're using..
215 	 */
216 	if (io_desc.flags & IORES_MAP_SYSTEM_RAM) {
217 		WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
218 			  &phys_addr, &last_addr);
219 		return NULL;
220 	}
221 
222 	/*
223 	 * Mappings have to be page-aligned
224 	 */
225 	offset = phys_addr & ~PAGE_MASK;
226 	phys_addr &= PAGE_MASK;
227 	size = PAGE_ALIGN(last_addr+1) - phys_addr;
228 
229 	/*
230 	 * Mask out any bits not part of the actual physical
231 	 * address, like memory encryption bits.
232 	 */
233 	phys_addr &= PHYSICAL_PAGE_MASK;
234 
235 	retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
236 						pcm, &new_pcm);
237 	if (retval) {
238 		printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval);
239 		return NULL;
240 	}
241 
242 	if (pcm != new_pcm) {
243 		if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
244 			printk(KERN_ERR
245 		"ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
246 				(unsigned long long)phys_addr,
247 				(unsigned long long)(phys_addr + size),
248 				pcm, new_pcm);
249 			goto err_free_memtype;
250 		}
251 		pcm = new_pcm;
252 	}
253 
254 	/*
255 	 * If the page being mapped is in memory and SEV is active then
256 	 * make sure the memory encryption attribute is enabled in the
257 	 * resulting mapping.
258 	 * In TDX guests, memory is marked private by default. If encryption
259 	 * is not requested (using encrypted), explicitly set decrypt
260 	 * attribute in all IOREMAPPED memory.
261 	 */
262 	prot = PAGE_KERNEL_IO;
263 	if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted)
264 		prot = pgprot_encrypted(prot);
265 	else
266 		prot = pgprot_decrypted(prot);
267 
268 	switch (pcm) {
269 	case _PAGE_CACHE_MODE_UC:
270 	default:
271 		prot = __pgprot(pgprot_val(prot) |
272 				cachemode2protval(_PAGE_CACHE_MODE_UC));
273 		break;
274 	case _PAGE_CACHE_MODE_UC_MINUS:
275 		prot = __pgprot(pgprot_val(prot) |
276 				cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
277 		break;
278 	case _PAGE_CACHE_MODE_WC:
279 		prot = __pgprot(pgprot_val(prot) |
280 				cachemode2protval(_PAGE_CACHE_MODE_WC));
281 		break;
282 	case _PAGE_CACHE_MODE_WT:
283 		prot = __pgprot(pgprot_val(prot) |
284 				cachemode2protval(_PAGE_CACHE_MODE_WT));
285 		break;
286 	case _PAGE_CACHE_MODE_WB:
287 		break;
288 	}
289 
290 	/*
291 	 * Ok, go for it..
292 	 */
293 	area = get_vm_area_caller(size, VM_IOREMAP, caller);
294 	if (!area)
295 		goto err_free_memtype;
296 	area->phys_addr = phys_addr;
297 	vaddr = (unsigned long) area->addr;
298 
299 	if (memtype_kernel_map_sync(phys_addr, size, pcm))
300 		goto err_free_area;
301 
302 	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
303 		goto err_free_area;
304 
305 	ret_addr = (void __iomem *) (vaddr + offset);
306 	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
307 
308 	/*
309 	 * Check if the request spans more than any BAR in the iomem resource
310 	 * tree.
311 	 */
312 	if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
313 		pr_warn("caller %pS mapping multiple BARs\n", caller);
314 
315 	return ret_addr;
316 err_free_area:
317 	free_vm_area(area);
318 err_free_memtype:
319 	memtype_free(phys_addr, phys_addr + size);
320 	return NULL;
321 }
322 
323 /**
324  * ioremap     -   map bus memory into CPU space
325  * @phys_addr:    bus address of the memory
326  * @size:      size of the resource to map
327  *
328  * ioremap performs a platform specific sequence of operations to
329  * make bus memory CPU accessible via the readb/readw/readl/writeb/
330  * writew/writel functions and the other mmio helpers. The returned
331  * address is not guaranteed to be usable directly as a virtual
332  * address.
333  *
334  * This version of ioremap ensures that the memory is marked uncachable
335  * on the CPU as well as honouring existing caching rules from things like
336  * the PCI bus. Note that there are other caches and buffers on many
337  * busses. In particular driver authors should read up on PCI writes
338  *
339  * It's useful if some control registers are in such an area and
340  * write combining or read caching is not desirable:
341  *
342  * Must be freed with iounmap.
343  */
344 void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
345 {
346 	/*
347 	 * Ideally, this should be:
348 	 *	pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
349 	 *
350 	 * Till we fix all X drivers to use ioremap_wc(), we will use
351 	 * UC MINUS. Drivers that are certain they need or can already
352 	 * be converted over to strong UC can use ioremap_uc().
353 	 */
354 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
355 
356 	return __ioremap_caller(phys_addr, size, pcm,
357 				__builtin_return_address(0), false);
358 }
359 EXPORT_SYMBOL(ioremap);
360 
361 /**
362  * ioremap_uc     -   map bus memory into CPU space as strongly uncachable
363  * @phys_addr:    bus address of the memory
364  * @size:      size of the resource to map
365  *
366  * ioremap_uc performs a platform specific sequence of operations to
367  * make bus memory CPU accessible via the readb/readw/readl/writeb/
368  * writew/writel functions and the other mmio helpers. The returned
369  * address is not guaranteed to be usable directly as a virtual
370  * address.
371  *
372  * This version of ioremap ensures that the memory is marked with a strong
373  * preference as completely uncachable on the CPU when possible. For non-PAT
374  * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
375  * systems this will set the PAT entry for the pages as strong UC.  This call
376  * will honor existing caching rules from things like the PCI bus. Note that
377  * there are other caches and buffers on many busses. In particular driver
378  * authors should read up on PCI writes.
379  *
380  * It's useful if some control registers are in such an area and
381  * write combining or read caching is not desirable:
382  *
383  * Must be freed with iounmap.
384  */
385 void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
386 {
387 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
388 
389 	return __ioremap_caller(phys_addr, size, pcm,
390 				__builtin_return_address(0), false);
391 }
392 EXPORT_SYMBOL_GPL(ioremap_uc);
393 
394 /**
395  * ioremap_wc	-	map memory into CPU space write combined
396  * @phys_addr:	bus address of the memory
397  * @size:	size of the resource to map
398  *
399  * This version of ioremap ensures that the memory is marked write combining.
400  * Write combining allows faster writes to some hardware devices.
401  *
402  * Must be freed with iounmap.
403  */
404 void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
405 {
406 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
407 					__builtin_return_address(0), false);
408 }
409 EXPORT_SYMBOL(ioremap_wc);
410 
411 /**
412  * ioremap_wt	-	map memory into CPU space write through
413  * @phys_addr:	bus address of the memory
414  * @size:	size of the resource to map
415  *
416  * This version of ioremap ensures that the memory is marked write through.
417  * Write through stores data into memory while keeping the cache up-to-date.
418  *
419  * Must be freed with iounmap.
420  */
421 void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
422 {
423 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
424 					__builtin_return_address(0), false);
425 }
426 EXPORT_SYMBOL(ioremap_wt);
427 
428 void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size)
429 {
430 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
431 				__builtin_return_address(0), true);
432 }
433 EXPORT_SYMBOL(ioremap_encrypted);
434 
435 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
436 {
437 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
438 				__builtin_return_address(0), false);
439 }
440 EXPORT_SYMBOL(ioremap_cache);
441 
442 void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
443 				unsigned long prot_val)
444 {
445 	return __ioremap_caller(phys_addr, size,
446 				pgprot2cachemode(__pgprot(prot_val)),
447 				__builtin_return_address(0), false);
448 }
449 EXPORT_SYMBOL(ioremap_prot);
450 
451 /**
452  * iounmap - Free a IO remapping
453  * @addr: virtual address from ioremap_*
454  *
455  * Caller must ensure there is only one unmapping for the same pointer.
456  */
457 void iounmap(volatile void __iomem *addr)
458 {
459 	struct vm_struct *p, *o;
460 
461 	if (WARN_ON_ONCE(!is_ioremap_addr((void __force *)addr)))
462 		return;
463 
464 	/*
465 	 * The PCI/ISA range special-casing was removed from __ioremap()
466 	 * so this check, in theory, can be removed. However, there are
467 	 * cases where iounmap() is called for addresses not obtained via
468 	 * ioremap() (vga16fb for example). Add a warning so that these
469 	 * cases can be caught and fixed.
470 	 */
471 	if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
472 	    (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) {
473 		WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n");
474 		return;
475 	}
476 
477 	mmiotrace_iounmap(addr);
478 
479 	addr = (volatile void __iomem *)
480 		(PAGE_MASK & (unsigned long __force)addr);
481 
482 	/* Use the vm area unlocked, assuming the caller
483 	   ensures there isn't another iounmap for the same address
484 	   in parallel. Reuse of the virtual address is prevented by
485 	   leaving it in the global lists until we're done with it.
486 	   cpa takes care of the direct mappings. */
487 	p = find_vm_area((void __force *)addr);
488 
489 	if (!p) {
490 		printk(KERN_ERR "iounmap: bad address %p\n", addr);
491 		dump_stack();
492 		return;
493 	}
494 
495 	kmsan_iounmap_page_range((unsigned long)addr,
496 		(unsigned long)addr + get_vm_area_size(p));
497 	memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
498 
499 	/* Finally remove it */
500 	o = remove_vm_area((void __force *)addr);
501 	BUG_ON(p != o || o == NULL);
502 	kfree(p);
503 }
504 EXPORT_SYMBOL(iounmap);
505 
506 /*
507  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
508  * access
509  */
510 void *xlate_dev_mem_ptr(phys_addr_t phys)
511 {
512 	unsigned long start  = phys &  PAGE_MASK;
513 	unsigned long offset = phys & ~PAGE_MASK;
514 	void *vaddr;
515 
516 	/* memremap() maps if RAM, otherwise falls back to ioremap() */
517 	vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB);
518 
519 	/* Only add the offset on success and return NULL if memremap() failed */
520 	if (vaddr)
521 		vaddr += offset;
522 
523 	return vaddr;
524 }
525 
526 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
527 {
528 	memunmap((void *)((unsigned long)addr & PAGE_MASK));
529 }
530 
531 #ifdef CONFIG_AMD_MEM_ENCRYPT
532 /*
533  * Examine the physical address to determine if it is an area of memory
534  * that should be mapped decrypted.  If the memory is not part of the
535  * kernel usable area it was accessed and created decrypted, so these
536  * areas should be mapped decrypted. And since the encryption key can
537  * change across reboots, persistent memory should also be mapped
538  * decrypted.
539  *
540  * If SEV is active, that implies that BIOS/UEFI also ran encrypted so
541  * only persistent memory should be mapped decrypted.
542  */
543 static bool memremap_should_map_decrypted(resource_size_t phys_addr,
544 					  unsigned long size)
545 {
546 	int is_pmem;
547 
548 	/*
549 	 * Check if the address is part of a persistent memory region.
550 	 * This check covers areas added by E820, EFI and ACPI.
551 	 */
552 	is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
553 				    IORES_DESC_PERSISTENT_MEMORY);
554 	if (is_pmem != REGION_DISJOINT)
555 		return true;
556 
557 	/*
558 	 * Check if the non-volatile attribute is set for an EFI
559 	 * reserved area.
560 	 */
561 	if (efi_enabled(EFI_BOOT)) {
562 		switch (efi_mem_type(phys_addr)) {
563 		case EFI_RESERVED_TYPE:
564 			if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
565 				return true;
566 			break;
567 		default:
568 			break;
569 		}
570 	}
571 
572 	/* Check if the address is outside kernel usable area */
573 	switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
574 	case E820_TYPE_RESERVED:
575 	case E820_TYPE_ACPI:
576 	case E820_TYPE_NVS:
577 	case E820_TYPE_UNUSABLE:
578 		/* For SEV, these areas are encrypted */
579 		if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
580 			break;
581 		fallthrough;
582 
583 	case E820_TYPE_PRAM:
584 		return true;
585 	default:
586 		break;
587 	}
588 
589 	return false;
590 }
591 
592 /*
593  * Examine the physical address to determine if it is EFI data. Check
594  * it against the boot params structure and EFI tables and memory types.
595  */
596 static bool memremap_is_efi_data(resource_size_t phys_addr,
597 				 unsigned long size)
598 {
599 	u64 paddr;
600 
601 	/* Check if the address is part of EFI boot/runtime data */
602 	if (!efi_enabled(EFI_BOOT))
603 		return false;
604 
605 	paddr = boot_params.efi_info.efi_memmap_hi;
606 	paddr <<= 32;
607 	paddr |= boot_params.efi_info.efi_memmap;
608 	if (phys_addr == paddr)
609 		return true;
610 
611 	paddr = boot_params.efi_info.efi_systab_hi;
612 	paddr <<= 32;
613 	paddr |= boot_params.efi_info.efi_systab;
614 	if (phys_addr == paddr)
615 		return true;
616 
617 	if (efi_is_table_address(phys_addr))
618 		return true;
619 
620 	switch (efi_mem_type(phys_addr)) {
621 	case EFI_BOOT_SERVICES_DATA:
622 	case EFI_RUNTIME_SERVICES_DATA:
623 		return true;
624 	default:
625 		break;
626 	}
627 
628 	return false;
629 }
630 
631 /*
632  * Examine the physical address to determine if it is boot data by checking
633  * it against the boot params setup_data chain.
634  */
635 static bool memremap_is_setup_data(resource_size_t phys_addr,
636 				   unsigned long size)
637 {
638 	struct setup_indirect *indirect;
639 	struct setup_data *data;
640 	u64 paddr, paddr_next;
641 
642 	paddr = boot_params.hdr.setup_data;
643 	while (paddr) {
644 		unsigned int len;
645 
646 		if (phys_addr == paddr)
647 			return true;
648 
649 		data = memremap(paddr, sizeof(*data),
650 				MEMREMAP_WB | MEMREMAP_DEC);
651 		if (!data) {
652 			pr_warn("failed to memremap setup_data entry\n");
653 			return false;
654 		}
655 
656 		paddr_next = data->next;
657 		len = data->len;
658 
659 		if ((phys_addr > paddr) && (phys_addr < (paddr + len))) {
660 			memunmap(data);
661 			return true;
662 		}
663 
664 		if (data->type == SETUP_INDIRECT) {
665 			memunmap(data);
666 			data = memremap(paddr, sizeof(*data) + len,
667 					MEMREMAP_WB | MEMREMAP_DEC);
668 			if (!data) {
669 				pr_warn("failed to memremap indirect setup_data\n");
670 				return false;
671 			}
672 
673 			indirect = (struct setup_indirect *)data->data;
674 
675 			if (indirect->type != SETUP_INDIRECT) {
676 				paddr = indirect->addr;
677 				len = indirect->len;
678 			}
679 		}
680 
681 		memunmap(data);
682 
683 		if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
684 			return true;
685 
686 		paddr = paddr_next;
687 	}
688 
689 	return false;
690 }
691 
692 /*
693  * Examine the physical address to determine if it is boot data by checking
694  * it against the boot params setup_data chain (early boot version).
695  */
696 static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
697 						unsigned long size)
698 {
699 	struct setup_indirect *indirect;
700 	struct setup_data *data;
701 	u64 paddr, paddr_next;
702 
703 	paddr = boot_params.hdr.setup_data;
704 	while (paddr) {
705 		unsigned int len, size;
706 
707 		if (phys_addr == paddr)
708 			return true;
709 
710 		data = early_memremap_decrypted(paddr, sizeof(*data));
711 		if (!data) {
712 			pr_warn("failed to early memremap setup_data entry\n");
713 			return false;
714 		}
715 
716 		size = sizeof(*data);
717 
718 		paddr_next = data->next;
719 		len = data->len;
720 
721 		if ((phys_addr > paddr) && (phys_addr < (paddr + len))) {
722 			early_memunmap(data, sizeof(*data));
723 			return true;
724 		}
725 
726 		if (data->type == SETUP_INDIRECT) {
727 			size += len;
728 			early_memunmap(data, sizeof(*data));
729 			data = early_memremap_decrypted(paddr, size);
730 			if (!data) {
731 				pr_warn("failed to early memremap indirect setup_data\n");
732 				return false;
733 			}
734 
735 			indirect = (struct setup_indirect *)data->data;
736 
737 			if (indirect->type != SETUP_INDIRECT) {
738 				paddr = indirect->addr;
739 				len = indirect->len;
740 			}
741 		}
742 
743 		early_memunmap(data, size);
744 
745 		if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
746 			return true;
747 
748 		paddr = paddr_next;
749 	}
750 
751 	return false;
752 }
753 
754 /*
755  * Architecture function to determine if RAM remap is allowed. By default, a
756  * RAM remap will map the data as encrypted. Determine if a RAM remap should
757  * not be done so that the data will be mapped decrypted.
758  */
759 bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
760 				 unsigned long flags)
761 {
762 	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
763 		return true;
764 
765 	if (flags & MEMREMAP_ENC)
766 		return true;
767 
768 	if (flags & MEMREMAP_DEC)
769 		return false;
770 
771 	if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
772 		if (memremap_is_setup_data(phys_addr, size) ||
773 		    memremap_is_efi_data(phys_addr, size))
774 			return false;
775 	}
776 
777 	return !memremap_should_map_decrypted(phys_addr, size);
778 }
779 
780 /*
781  * Architecture override of __weak function to adjust the protection attributes
782  * used when remapping memory. By default, early_memremap() will map the data
783  * as encrypted. Determine if an encrypted mapping should not be done and set
784  * the appropriate protection attributes.
785  */
786 pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
787 					     unsigned long size,
788 					     pgprot_t prot)
789 {
790 	bool encrypted_prot;
791 
792 	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
793 		return prot;
794 
795 	encrypted_prot = true;
796 
797 	if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
798 		if (early_memremap_is_setup_data(phys_addr, size) ||
799 		    memremap_is_efi_data(phys_addr, size))
800 			encrypted_prot = false;
801 	}
802 
803 	if (encrypted_prot && memremap_should_map_decrypted(phys_addr, size))
804 		encrypted_prot = false;
805 
806 	return encrypted_prot ? pgprot_encrypted(prot)
807 			      : pgprot_decrypted(prot);
808 }
809 
810 bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
811 {
812 	return arch_memremap_can_ram_remap(phys_addr, size, 0);
813 }
814 
815 /* Remap memory with encryption */
816 void __init *early_memremap_encrypted(resource_size_t phys_addr,
817 				      unsigned long size)
818 {
819 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC);
820 }
821 
822 /*
823  * Remap memory with encryption and write-protected - cannot be called
824  * before pat_init() is called
825  */
826 void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
827 					 unsigned long size)
828 {
829 	if (!x86_has_pat_wp())
830 		return NULL;
831 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP);
832 }
833 
834 /* Remap memory without encryption */
835 void __init *early_memremap_decrypted(resource_size_t phys_addr,
836 				      unsigned long size)
837 {
838 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC);
839 }
840 
841 /*
842  * Remap memory without encryption and write-protected - cannot be called
843  * before pat_init() is called
844  */
845 void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
846 					 unsigned long size)
847 {
848 	if (!x86_has_pat_wp())
849 		return NULL;
850 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP);
851 }
852 #endif	/* CONFIG_AMD_MEM_ENCRYPT */
853 
854 static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
855 
856 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
857 {
858 	/* Don't assume we're using swapper_pg_dir at this point */
859 	pgd_t *base = __va(read_cr3_pa());
860 	pgd_t *pgd = &base[pgd_index(addr)];
861 	p4d_t *p4d = p4d_offset(pgd, addr);
862 	pud_t *pud = pud_offset(p4d, addr);
863 	pmd_t *pmd = pmd_offset(pud, addr);
864 
865 	return pmd;
866 }
867 
868 static inline pte_t * __init early_ioremap_pte(unsigned long addr)
869 {
870 	return &bm_pte[pte_index(addr)];
871 }
872 
873 bool __init is_early_ioremap_ptep(pte_t *ptep)
874 {
875 	return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
876 }
877 
878 void __init early_ioremap_init(void)
879 {
880 	pmd_t *pmd;
881 
882 #ifdef CONFIG_X86_64
883 	BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
884 #else
885 	WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
886 #endif
887 
888 	early_ioremap_setup();
889 
890 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
891 	memset(bm_pte, 0, sizeof(bm_pte));
892 	pmd_populate_kernel(&init_mm, pmd, bm_pte);
893 
894 	/*
895 	 * The boot-ioremap range spans multiple pmds, for which
896 	 * we are not prepared:
897 	 */
898 #define __FIXADDR_TOP (-PAGE_SIZE)
899 	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
900 		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
901 #undef __FIXADDR_TOP
902 	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
903 		WARN_ON(1);
904 		printk(KERN_WARNING "pmd %p != %p\n",
905 		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
906 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
907 			fix_to_virt(FIX_BTMAP_BEGIN));
908 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
909 			fix_to_virt(FIX_BTMAP_END));
910 
911 		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
912 		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
913 		       FIX_BTMAP_BEGIN);
914 	}
915 }
916 
917 void __init __early_set_fixmap(enum fixed_addresses idx,
918 			       phys_addr_t phys, pgprot_t flags)
919 {
920 	unsigned long addr = __fix_to_virt(idx);
921 	pte_t *pte;
922 
923 	if (idx >= __end_of_fixed_addresses) {
924 		BUG();
925 		return;
926 	}
927 	pte = early_ioremap_pte(addr);
928 
929 	/* Sanitize 'prot' against any unsupported bits: */
930 	pgprot_val(flags) &= __supported_pte_mask;
931 
932 	if (pgprot_val(flags))
933 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
934 	else
935 		pte_clear(&init_mm, addr, pte);
936 	flush_tlb_one_kernel(addr);
937 }
938