xref: /linux/arch/x86/mm/ioremap.c (revision e814f3fd16acfb7f9966773953de8f740a1e3202)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Re-map IO memory to kernel address space so that we can access it.
4  * This is needed for high PCI addresses that aren't mapped in the
5  * 640k-1MB IO memory area on PC's
6  *
7  * (C) Copyright 1995 1996 Linus Torvalds
8  */
9 
10 #include <linux/memblock.h>
11 #include <linux/init.h>
12 #include <linux/io.h>
13 #include <linux/ioport.h>
14 #include <linux/ioremap.h>
15 #include <linux/slab.h>
16 #include <linux/vmalloc.h>
17 #include <linux/mmiotrace.h>
18 #include <linux/cc_platform.h>
19 #include <linux/efi.h>
20 #include <linux/pgtable.h>
21 #include <linux/kmsan.h>
22 
23 #include <asm/set_memory.h>
24 #include <asm/e820/api.h>
25 #include <asm/efi.h>
26 #include <asm/fixmap.h>
27 #include <asm/tlbflush.h>
28 #include <asm/pgalloc.h>
29 #include <asm/memtype.h>
30 #include <asm/setup.h>
31 
32 #include "physaddr.h"
33 
34 /*
35  * Descriptor controlling ioremap() behavior.
36  */
37 struct ioremap_desc {
38 	unsigned int flags;
39 };
40 
41 /*
42  * Fix up the linear direct mapping of the kernel to avoid cache attribute
43  * conflicts.
44  */
45 int ioremap_change_attr(unsigned long vaddr, unsigned long size,
46 			enum page_cache_mode pcm)
47 {
48 	unsigned long nrpages = size >> PAGE_SHIFT;
49 	int err;
50 
51 	switch (pcm) {
52 	case _PAGE_CACHE_MODE_UC:
53 	default:
54 		err = _set_memory_uc(vaddr, nrpages);
55 		break;
56 	case _PAGE_CACHE_MODE_WC:
57 		err = _set_memory_wc(vaddr, nrpages);
58 		break;
59 	case _PAGE_CACHE_MODE_WT:
60 		err = _set_memory_wt(vaddr, nrpages);
61 		break;
62 	case _PAGE_CACHE_MODE_WB:
63 		err = _set_memory_wb(vaddr, nrpages);
64 		break;
65 	}
66 
67 	return err;
68 }
69 
70 /* Does the range (or a subset of) contain normal RAM? */
71 static unsigned int __ioremap_check_ram(struct resource *res)
72 {
73 	unsigned long start_pfn, stop_pfn;
74 	unsigned long i;
75 
76 	if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM)
77 		return 0;
78 
79 	start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT;
80 	stop_pfn = (res->end + 1) >> PAGE_SHIFT;
81 	if (stop_pfn > start_pfn) {
82 		for (i = 0; i < (stop_pfn - start_pfn); ++i)
83 			if (pfn_valid(start_pfn + i) &&
84 			    !PageReserved(pfn_to_page(start_pfn + i)))
85 				return IORES_MAP_SYSTEM_RAM;
86 	}
87 
88 	return 0;
89 }
90 
91 /*
92  * In a SEV guest, NONE and RESERVED should not be mapped encrypted because
93  * there the whole memory is already encrypted.
94  */
95 static unsigned int __ioremap_check_encrypted(struct resource *res)
96 {
97 	if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
98 		return 0;
99 
100 	switch (res->desc) {
101 	case IORES_DESC_NONE:
102 	case IORES_DESC_RESERVED:
103 		break;
104 	default:
105 		return IORES_MAP_ENCRYPTED;
106 	}
107 
108 	return 0;
109 }
110 
111 /*
112  * The EFI runtime services data area is not covered by walk_mem_res(), but must
113  * be mapped encrypted when SEV is active.
114  */
115 static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc)
116 {
117 	if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
118 		return;
119 
120 	if (x86_platform.hyper.is_private_mmio(addr)) {
121 		desc->flags |= IORES_MAP_ENCRYPTED;
122 		return;
123 	}
124 
125 	if (!IS_ENABLED(CONFIG_EFI))
126 		return;
127 
128 	if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA ||
129 	    (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA &&
130 	     efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME))
131 		desc->flags |= IORES_MAP_ENCRYPTED;
132 }
133 
134 static int __ioremap_collect_map_flags(struct resource *res, void *arg)
135 {
136 	struct ioremap_desc *desc = arg;
137 
138 	if (!(desc->flags & IORES_MAP_SYSTEM_RAM))
139 		desc->flags |= __ioremap_check_ram(res);
140 
141 	if (!(desc->flags & IORES_MAP_ENCRYPTED))
142 		desc->flags |= __ioremap_check_encrypted(res);
143 
144 	return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) ==
145 			       (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED));
146 }
147 
148 /*
149  * To avoid multiple resource walks, this function walks resources marked as
150  * IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a
151  * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES).
152  *
153  * After that, deal with misc other ranges in __ioremap_check_other() which do
154  * not fall into the above category.
155  */
156 static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
157 				struct ioremap_desc *desc)
158 {
159 	u64 start, end;
160 
161 	start = (u64)addr;
162 	end = start + size - 1;
163 	memset(desc, 0, sizeof(struct ioremap_desc));
164 
165 	walk_mem_res(start, end, desc, __ioremap_collect_map_flags);
166 
167 	__ioremap_check_other(addr, desc);
168 }
169 
170 /*
171  * Remap an arbitrary physical address space into the kernel virtual
172  * address space. It transparently creates kernel huge I/O mapping when
173  * the physical address is aligned by a huge page size (1GB or 2MB) and
174  * the requested size is at least the huge page size.
175  *
176  * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
177  * Therefore, the mapping code falls back to use a smaller page toward 4KB
178  * when a mapping range is covered by non-WB type of MTRRs.
179  *
180  * NOTE! We need to allow non-page-aligned mappings too: we will obviously
181  * have to convert them into an offset in a page-aligned mapping, but the
182  * caller shouldn't need to know that small detail.
183  */
184 static void __iomem *
185 __ioremap_caller(resource_size_t phys_addr, unsigned long size,
186 		 enum page_cache_mode pcm, void *caller, bool encrypted)
187 {
188 	unsigned long offset, vaddr;
189 	resource_size_t last_addr;
190 	const resource_size_t unaligned_phys_addr = phys_addr;
191 	const unsigned long unaligned_size = size;
192 	struct ioremap_desc io_desc;
193 	struct vm_struct *area;
194 	enum page_cache_mode new_pcm;
195 	pgprot_t prot;
196 	int retval;
197 	void __iomem *ret_addr;
198 
199 	/* Don't allow wraparound or zero size */
200 	last_addr = phys_addr + size - 1;
201 	if (!size || last_addr < phys_addr)
202 		return NULL;
203 
204 	if (!phys_addr_valid(phys_addr)) {
205 		printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
206 		       (unsigned long long)phys_addr);
207 		WARN_ON_ONCE(1);
208 		return NULL;
209 	}
210 
211 	__ioremap_check_mem(phys_addr, size, &io_desc);
212 
213 	/*
214 	 * Don't allow anybody to remap normal RAM that we're using..
215 	 */
216 	if (io_desc.flags & IORES_MAP_SYSTEM_RAM) {
217 		WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
218 			  &phys_addr, &last_addr);
219 		return NULL;
220 	}
221 
222 	/*
223 	 * Mappings have to be page-aligned
224 	 */
225 	offset = phys_addr & ~PAGE_MASK;
226 	phys_addr &= PAGE_MASK;
227 	size = PAGE_ALIGN(last_addr+1) - phys_addr;
228 
229 	/*
230 	 * Mask out any bits not part of the actual physical
231 	 * address, like memory encryption bits.
232 	 */
233 	phys_addr &= PHYSICAL_PAGE_MASK;
234 
235 	retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
236 						pcm, &new_pcm);
237 	if (retval) {
238 		printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval);
239 		return NULL;
240 	}
241 
242 	if (pcm != new_pcm) {
243 		if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
244 			printk(KERN_ERR
245 		"ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
246 				(unsigned long long)phys_addr,
247 				(unsigned long long)(phys_addr + size),
248 				pcm, new_pcm);
249 			goto err_free_memtype;
250 		}
251 		pcm = new_pcm;
252 	}
253 
254 	/*
255 	 * If the page being mapped is in memory and SEV is active then
256 	 * make sure the memory encryption attribute is enabled in the
257 	 * resulting mapping.
258 	 * In TDX guests, memory is marked private by default. If encryption
259 	 * is not requested (using encrypted), explicitly set decrypt
260 	 * attribute in all IOREMAPPED memory.
261 	 */
262 	prot = PAGE_KERNEL_IO;
263 	if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted)
264 		prot = pgprot_encrypted(prot);
265 	else
266 		prot = pgprot_decrypted(prot);
267 
268 	switch (pcm) {
269 	case _PAGE_CACHE_MODE_UC:
270 	default:
271 		prot = __pgprot(pgprot_val(prot) |
272 				cachemode2protval(_PAGE_CACHE_MODE_UC));
273 		break;
274 	case _PAGE_CACHE_MODE_UC_MINUS:
275 		prot = __pgprot(pgprot_val(prot) |
276 				cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
277 		break;
278 	case _PAGE_CACHE_MODE_WC:
279 		prot = __pgprot(pgprot_val(prot) |
280 				cachemode2protval(_PAGE_CACHE_MODE_WC));
281 		break;
282 	case _PAGE_CACHE_MODE_WT:
283 		prot = __pgprot(pgprot_val(prot) |
284 				cachemode2protval(_PAGE_CACHE_MODE_WT));
285 		break;
286 	case _PAGE_CACHE_MODE_WB:
287 		break;
288 	}
289 
290 	/*
291 	 * Ok, go for it..
292 	 */
293 	area = get_vm_area_caller(size, VM_IOREMAP, caller);
294 	if (!area)
295 		goto err_free_memtype;
296 	area->phys_addr = phys_addr;
297 	vaddr = (unsigned long) area->addr;
298 
299 	if (memtype_kernel_map_sync(phys_addr, size, pcm))
300 		goto err_free_area;
301 
302 	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
303 		goto err_free_area;
304 
305 	ret_addr = (void __iomem *) (vaddr + offset);
306 	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
307 
308 	/*
309 	 * Check if the request spans more than any BAR in the iomem resource
310 	 * tree.
311 	 */
312 	if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
313 		pr_warn("caller %pS mapping multiple BARs\n", caller);
314 
315 	return ret_addr;
316 err_free_area:
317 	free_vm_area(area);
318 err_free_memtype:
319 	memtype_free(phys_addr, phys_addr + size);
320 	return NULL;
321 }
322 
323 /**
324  * ioremap     -   map bus memory into CPU space
325  * @phys_addr:    bus address of the memory
326  * @size:      size of the resource to map
327  *
328  * ioremap performs a platform specific sequence of operations to
329  * make bus memory CPU accessible via the readb/readw/readl/writeb/
330  * writew/writel functions and the other mmio helpers. The returned
331  * address is not guaranteed to be usable directly as a virtual
332  * address.
333  *
334  * This version of ioremap ensures that the memory is marked uncachable
335  * on the CPU as well as honouring existing caching rules from things like
336  * the PCI bus. Note that there are other caches and buffers on many
337  * busses. In particular driver authors should read up on PCI writes
338  *
339  * It's useful if some control registers are in such an area and
340  * write combining or read caching is not desirable:
341  *
342  * Must be freed with iounmap.
343  */
344 void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
345 {
346 	/*
347 	 * Ideally, this should be:
348 	 *	pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
349 	 *
350 	 * Till we fix all X drivers to use ioremap_wc(), we will use
351 	 * UC MINUS. Drivers that are certain they need or can already
352 	 * be converted over to strong UC can use ioremap_uc().
353 	 */
354 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
355 
356 	return __ioremap_caller(phys_addr, size, pcm,
357 				__builtin_return_address(0), false);
358 }
359 EXPORT_SYMBOL(ioremap);
360 
361 /**
362  * ioremap_uc     -   map bus memory into CPU space as strongly uncachable
363  * @phys_addr:    bus address of the memory
364  * @size:      size of the resource to map
365  *
366  * ioremap_uc performs a platform specific sequence of operations to
367  * make bus memory CPU accessible via the readb/readw/readl/writeb/
368  * writew/writel functions and the other mmio helpers. The returned
369  * address is not guaranteed to be usable directly as a virtual
370  * address.
371  *
372  * This version of ioremap ensures that the memory is marked with a strong
373  * preference as completely uncachable on the CPU when possible. For non-PAT
374  * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
375  * systems this will set the PAT entry for the pages as strong UC.  This call
376  * will honor existing caching rules from things like the PCI bus. Note that
377  * there are other caches and buffers on many busses. In particular driver
378  * authors should read up on PCI writes.
379  *
380  * It's useful if some control registers are in such an area and
381  * write combining or read caching is not desirable:
382  *
383  * Must be freed with iounmap.
384  */
385 void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
386 {
387 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
388 
389 	return __ioremap_caller(phys_addr, size, pcm,
390 				__builtin_return_address(0), false);
391 }
392 EXPORT_SYMBOL_GPL(ioremap_uc);
393 
394 /**
395  * ioremap_wc	-	map memory into CPU space write combined
396  * @phys_addr:	bus address of the memory
397  * @size:	size of the resource to map
398  *
399  * This version of ioremap ensures that the memory is marked write combining.
400  * Write combining allows faster writes to some hardware devices.
401  *
402  * Must be freed with iounmap.
403  */
404 void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
405 {
406 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
407 					__builtin_return_address(0), false);
408 }
409 EXPORT_SYMBOL(ioremap_wc);
410 
411 /**
412  * ioremap_wt	-	map memory into CPU space write through
413  * @phys_addr:	bus address of the memory
414  * @size:	size of the resource to map
415  *
416  * This version of ioremap ensures that the memory is marked write through.
417  * Write through stores data into memory while keeping the cache up-to-date.
418  *
419  * Must be freed with iounmap.
420  */
421 void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
422 {
423 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
424 					__builtin_return_address(0), false);
425 }
426 EXPORT_SYMBOL(ioremap_wt);
427 
428 void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size)
429 {
430 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
431 				__builtin_return_address(0), true);
432 }
433 EXPORT_SYMBOL(ioremap_encrypted);
434 
435 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
436 {
437 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
438 				__builtin_return_address(0), false);
439 }
440 EXPORT_SYMBOL(ioremap_cache);
441 
442 void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
443 				unsigned long prot_val)
444 {
445 	return __ioremap_caller(phys_addr, size,
446 				pgprot2cachemode(__pgprot(prot_val)),
447 				__builtin_return_address(0), false);
448 }
449 EXPORT_SYMBOL(ioremap_prot);
450 
451 /**
452  * iounmap - Free a IO remapping
453  * @addr: virtual address from ioremap_*
454  *
455  * Caller must ensure there is only one unmapping for the same pointer.
456  */
457 void iounmap(volatile void __iomem *addr)
458 {
459 	struct vm_struct *p, *o;
460 
461 	if (WARN_ON_ONCE(!is_ioremap_addr((void __force *)addr)))
462 		return;
463 
464 	/*
465 	 * The PCI/ISA range special-casing was removed from __ioremap()
466 	 * so this check, in theory, can be removed. However, there are
467 	 * cases where iounmap() is called for addresses not obtained via
468 	 * ioremap() (vga16fb for example). Add a warning so that these
469 	 * cases can be caught and fixed.
470 	 */
471 	if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
472 	    (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) {
473 		WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n");
474 		return;
475 	}
476 
477 	mmiotrace_iounmap(addr);
478 
479 	addr = (volatile void __iomem *)
480 		(PAGE_MASK & (unsigned long __force)addr);
481 
482 	/* Use the vm area unlocked, assuming the caller
483 	   ensures there isn't another iounmap for the same address
484 	   in parallel. Reuse of the virtual address is prevented by
485 	   leaving it in the global lists until we're done with it.
486 	   cpa takes care of the direct mappings. */
487 	p = find_vm_area((void __force *)addr);
488 
489 	if (!p) {
490 		printk(KERN_ERR "iounmap: bad address %p\n", addr);
491 		dump_stack();
492 		return;
493 	}
494 
495 	kmsan_iounmap_page_range((unsigned long)addr,
496 		(unsigned long)addr + get_vm_area_size(p));
497 	memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
498 
499 	/* Finally remove it */
500 	o = remove_vm_area((void __force *)addr);
501 	BUG_ON(p != o || o == NULL);
502 	kfree(p);
503 }
504 EXPORT_SYMBOL(iounmap);
505 
506 /*
507  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
508  * access
509  */
510 void *xlate_dev_mem_ptr(phys_addr_t phys)
511 {
512 	unsigned long start  = phys &  PAGE_MASK;
513 	unsigned long offset = phys & ~PAGE_MASK;
514 	void *vaddr;
515 
516 	/* memremap() maps if RAM, otherwise falls back to ioremap() */
517 	vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB);
518 
519 	/* Only add the offset on success and return NULL if memremap() failed */
520 	if (vaddr)
521 		vaddr += offset;
522 
523 	return vaddr;
524 }
525 
526 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
527 {
528 	memunmap((void *)((unsigned long)addr & PAGE_MASK));
529 }
530 
531 #ifdef CONFIG_AMD_MEM_ENCRYPT
532 /*
533  * Examine the physical address to determine if it is an area of memory
534  * that should be mapped decrypted.  If the memory is not part of the
535  * kernel usable area it was accessed and created decrypted, so these
536  * areas should be mapped decrypted. And since the encryption key can
537  * change across reboots, persistent memory should also be mapped
538  * decrypted.
539  *
540  * If SEV is active, that implies that BIOS/UEFI also ran encrypted so
541  * only persistent memory should be mapped decrypted.
542  */
543 static bool memremap_should_map_decrypted(resource_size_t phys_addr,
544 					  unsigned long size)
545 {
546 	int is_pmem;
547 
548 	/*
549 	 * Check if the address is part of a persistent memory region.
550 	 * This check covers areas added by E820, EFI and ACPI.
551 	 */
552 	is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
553 				    IORES_DESC_PERSISTENT_MEMORY);
554 	if (is_pmem != REGION_DISJOINT)
555 		return true;
556 
557 	/*
558 	 * Check if the non-volatile attribute is set for an EFI
559 	 * reserved area.
560 	 */
561 	if (efi_enabled(EFI_BOOT)) {
562 		switch (efi_mem_type(phys_addr)) {
563 		case EFI_RESERVED_TYPE:
564 			if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
565 				return true;
566 			break;
567 		default:
568 			break;
569 		}
570 	}
571 
572 	/* Check if the address is outside kernel usable area */
573 	switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
574 	case E820_TYPE_RESERVED:
575 	case E820_TYPE_ACPI:
576 	case E820_TYPE_NVS:
577 	case E820_TYPE_UNUSABLE:
578 		/* For SEV, these areas are encrypted */
579 		if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
580 			break;
581 		fallthrough;
582 
583 	case E820_TYPE_PRAM:
584 		return true;
585 	default:
586 		break;
587 	}
588 
589 	return false;
590 }
591 
592 /*
593  * Examine the physical address to determine if it is EFI data. Check
594  * it against the boot params structure and EFI tables and memory types.
595  */
596 static bool memremap_is_efi_data(resource_size_t phys_addr)
597 {
598 	u64 paddr;
599 
600 	/* Check if the address is part of EFI boot/runtime data */
601 	if (!efi_enabled(EFI_BOOT))
602 		return false;
603 
604 	paddr = boot_params.efi_info.efi_memmap_hi;
605 	paddr <<= 32;
606 	paddr |= boot_params.efi_info.efi_memmap;
607 	if (phys_addr == paddr)
608 		return true;
609 
610 	paddr = boot_params.efi_info.efi_systab_hi;
611 	paddr <<= 32;
612 	paddr |= boot_params.efi_info.efi_systab;
613 	if (phys_addr == paddr)
614 		return true;
615 
616 	if (efi_is_table_address(phys_addr))
617 		return true;
618 
619 	switch (efi_mem_type(phys_addr)) {
620 	case EFI_BOOT_SERVICES_DATA:
621 	case EFI_RUNTIME_SERVICES_DATA:
622 		return true;
623 	default:
624 		break;
625 	}
626 
627 	return false;
628 }
629 
630 /*
631  * Examine the physical address to determine if it is boot data by checking
632  * it against the boot params setup_data chain.
633  */
634 static bool __ref __memremap_is_setup_data(resource_size_t phys_addr, bool early)
635 {
636 	unsigned int setup_data_sz = sizeof(struct setup_data);
637 	struct setup_indirect *indirect;
638 	struct setup_data *data;
639 	u64 paddr, paddr_next;
640 
641 	paddr = boot_params.hdr.setup_data;
642 	while (paddr) {
643 		unsigned int len, size;
644 
645 		if (phys_addr == paddr)
646 			return true;
647 
648 		if (early)
649 			data = early_memremap_decrypted(paddr, setup_data_sz);
650 		else
651 			data = memremap(paddr, setup_data_sz, MEMREMAP_WB | MEMREMAP_DEC);
652 		if (!data) {
653 			pr_warn("failed to remap setup_data entry\n");
654 			return false;
655 		}
656 
657 		size = setup_data_sz;
658 
659 		paddr_next = data->next;
660 		len = data->len;
661 
662 		if ((phys_addr > paddr) &&
663 		    (phys_addr < (paddr + setup_data_sz + len))) {
664 			if (early)
665 				early_memunmap(data, setup_data_sz);
666 			else
667 				memunmap(data);
668 			return true;
669 		}
670 
671 		if (data->type == SETUP_INDIRECT) {
672 			size += len;
673 			if (early) {
674 				early_memunmap(data, setup_data_sz);
675 				data = early_memremap_decrypted(paddr, size);
676 			} else {
677 				memunmap(data);
678 				data = memremap(paddr, size, MEMREMAP_WB | MEMREMAP_DEC);
679 			}
680 			if (!data) {
681 				pr_warn("failed to remap indirect setup_data\n");
682 				return false;
683 			}
684 
685 			indirect = (struct setup_indirect *)data->data;
686 
687 			if (indirect->type != SETUP_INDIRECT) {
688 				paddr = indirect->addr;
689 				len = indirect->len;
690 			}
691 		}
692 
693 		if (early)
694 			early_memunmap(data, size);
695 		else
696 			memunmap(data);
697 
698 		if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
699 			return true;
700 
701 		paddr = paddr_next;
702 	}
703 
704 	return false;
705 }
706 
707 static bool memremap_is_setup_data(resource_size_t phys_addr)
708 {
709 	return __memremap_is_setup_data(phys_addr, false);
710 }
711 
712 static bool __init early_memremap_is_setup_data(resource_size_t phys_addr)
713 {
714 	return __memremap_is_setup_data(phys_addr, true);
715 }
716 
717 /*
718  * Architecture function to determine if RAM remap is allowed. By default, a
719  * RAM remap will map the data as encrypted. Determine if a RAM remap should
720  * not be done so that the data will be mapped decrypted.
721  */
722 bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
723 				 unsigned long flags)
724 {
725 	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
726 		return true;
727 
728 	if (flags & MEMREMAP_ENC)
729 		return true;
730 
731 	if (flags & MEMREMAP_DEC)
732 		return false;
733 
734 	if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
735 		if (memremap_is_setup_data(phys_addr) ||
736 		    memremap_is_efi_data(phys_addr))
737 			return false;
738 	}
739 
740 	return !memremap_should_map_decrypted(phys_addr, size);
741 }
742 
743 /*
744  * Architecture override of __weak function to adjust the protection attributes
745  * used when remapping memory. By default, early_memremap() will map the data
746  * as encrypted. Determine if an encrypted mapping should not be done and set
747  * the appropriate protection attributes.
748  */
749 pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
750 					     unsigned long size,
751 					     pgprot_t prot)
752 {
753 	bool encrypted_prot;
754 
755 	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
756 		return prot;
757 
758 	encrypted_prot = true;
759 
760 	if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
761 		if (early_memremap_is_setup_data(phys_addr) ||
762 		    memremap_is_efi_data(phys_addr))
763 			encrypted_prot = false;
764 	}
765 
766 	if (encrypted_prot && memremap_should_map_decrypted(phys_addr, size))
767 		encrypted_prot = false;
768 
769 	return encrypted_prot ? pgprot_encrypted(prot)
770 			      : pgprot_decrypted(prot);
771 }
772 
773 bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
774 {
775 	return arch_memremap_can_ram_remap(phys_addr, size, 0);
776 }
777 
778 /* Remap memory with encryption */
779 void __init *early_memremap_encrypted(resource_size_t phys_addr,
780 				      unsigned long size)
781 {
782 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC);
783 }
784 
785 /*
786  * Remap memory with encryption and write-protected - cannot be called
787  * before pat_init() is called
788  */
789 void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
790 					 unsigned long size)
791 {
792 	if (!x86_has_pat_wp())
793 		return NULL;
794 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP);
795 }
796 
797 /* Remap memory without encryption */
798 void __init *early_memremap_decrypted(resource_size_t phys_addr,
799 				      unsigned long size)
800 {
801 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC);
802 }
803 
804 /*
805  * Remap memory without encryption and write-protected - cannot be called
806  * before pat_init() is called
807  */
808 void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
809 					 unsigned long size)
810 {
811 	if (!x86_has_pat_wp())
812 		return NULL;
813 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP);
814 }
815 #endif	/* CONFIG_AMD_MEM_ENCRYPT */
816 
817 static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
818 
819 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
820 {
821 	/* Don't assume we're using swapper_pg_dir at this point */
822 	pgd_t *base = __va(read_cr3_pa());
823 	pgd_t *pgd = &base[pgd_index(addr)];
824 	p4d_t *p4d = p4d_offset(pgd, addr);
825 	pud_t *pud = pud_offset(p4d, addr);
826 	pmd_t *pmd = pmd_offset(pud, addr);
827 
828 	return pmd;
829 }
830 
831 static inline pte_t * __init early_ioremap_pte(unsigned long addr)
832 {
833 	return &bm_pte[pte_index(addr)];
834 }
835 
836 bool __init is_early_ioremap_ptep(pte_t *ptep)
837 {
838 	return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
839 }
840 
841 void __init early_ioremap_init(void)
842 {
843 	pmd_t *pmd;
844 
845 #ifdef CONFIG_X86_64
846 	BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
847 #else
848 	WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
849 #endif
850 
851 	early_ioremap_setup();
852 
853 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
854 	memset(bm_pte, 0, sizeof(bm_pte));
855 	pmd_populate_kernel(&init_mm, pmd, bm_pte);
856 
857 	/*
858 	 * The boot-ioremap range spans multiple pmds, for which
859 	 * we are not prepared:
860 	 */
861 #define __FIXADDR_TOP (-PAGE_SIZE)
862 	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
863 		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
864 #undef __FIXADDR_TOP
865 	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
866 		WARN_ON(1);
867 		printk(KERN_WARNING "pmd %p != %p\n",
868 		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
869 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
870 			fix_to_virt(FIX_BTMAP_BEGIN));
871 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
872 			fix_to_virt(FIX_BTMAP_END));
873 
874 		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
875 		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
876 		       FIX_BTMAP_BEGIN);
877 	}
878 }
879 
880 void __init __early_set_fixmap(enum fixed_addresses idx,
881 			       phys_addr_t phys, pgprot_t flags)
882 {
883 	unsigned long addr = __fix_to_virt(idx);
884 	pte_t *pte;
885 
886 	if (idx >= __end_of_fixed_addresses) {
887 		BUG();
888 		return;
889 	}
890 	pte = early_ioremap_pte(addr);
891 
892 	/* Sanitize 'prot' against any unsupported bits: */
893 	pgprot_val(flags) &= __supported_pte_mask;
894 
895 	if (pgprot_val(flags))
896 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
897 	else
898 		pte_clear(&init_mm, addr, pte);
899 	flush_tlb_one_kernel(addr);
900 }
901