xref: /linux/arch/x86/mm/ioremap.c (revision 6863aaa88516292b885fdce5dd91925a00c3a3de)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Re-map IO memory to kernel address space so that we can access it.
4  * This is needed for high PCI addresses that aren't mapped in the
5  * 640k-1MB IO memory area on PC's
6  *
7  * (C) Copyright 1995 1996 Linus Torvalds
8  */
9 
10 #include <linux/memblock.h>
11 #include <linux/init.h>
12 #include <linux/io.h>
13 #include <linux/ioport.h>
14 #include <linux/slab.h>
15 #include <linux/vmalloc.h>
16 #include <linux/mmiotrace.h>
17 #include <linux/cc_platform.h>
18 #include <linux/efi.h>
19 #include <linux/pgtable.h>
20 #include <linux/kmsan.h>
21 
22 #include <asm/set_memory.h>
23 #include <asm/e820/api.h>
24 #include <asm/efi.h>
25 #include <asm/fixmap.h>
26 #include <asm/tlbflush.h>
27 #include <asm/pgalloc.h>
28 #include <asm/memtype.h>
29 #include <asm/setup.h>
30 
31 #include "physaddr.h"
32 
33 /*
34  * Descriptor controlling ioremap() behavior.
35  */
36 struct ioremap_desc {
37 	unsigned int flags;
38 };
39 
40 /*
41  * Fix up the linear direct mapping of the kernel to avoid cache attribute
42  * conflicts.
43  */
44 int ioremap_change_attr(unsigned long vaddr, unsigned long size,
45 			enum page_cache_mode pcm)
46 {
47 	unsigned long nrpages = size >> PAGE_SHIFT;
48 	int err;
49 
50 	switch (pcm) {
51 	case _PAGE_CACHE_MODE_UC:
52 	default:
53 		err = _set_memory_uc(vaddr, nrpages);
54 		break;
55 	case _PAGE_CACHE_MODE_WC:
56 		err = _set_memory_wc(vaddr, nrpages);
57 		break;
58 	case _PAGE_CACHE_MODE_WT:
59 		err = _set_memory_wt(vaddr, nrpages);
60 		break;
61 	case _PAGE_CACHE_MODE_WB:
62 		err = _set_memory_wb(vaddr, nrpages);
63 		break;
64 	}
65 
66 	return err;
67 }
68 
69 /* Does the range (or a subset of) contain normal RAM? */
70 static unsigned int __ioremap_check_ram(struct resource *res)
71 {
72 	unsigned long start_pfn, stop_pfn;
73 	unsigned long i;
74 
75 	if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM)
76 		return 0;
77 
78 	start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT;
79 	stop_pfn = (res->end + 1) >> PAGE_SHIFT;
80 	if (stop_pfn > start_pfn) {
81 		for (i = 0; i < (stop_pfn - start_pfn); ++i)
82 			if (pfn_valid(start_pfn + i) &&
83 			    !PageReserved(pfn_to_page(start_pfn + i)))
84 				return IORES_MAP_SYSTEM_RAM;
85 	}
86 
87 	return 0;
88 }
89 
90 /*
91  * In a SEV guest, NONE and RESERVED should not be mapped encrypted because
92  * there the whole memory is already encrypted.
93  */
94 static unsigned int __ioremap_check_encrypted(struct resource *res)
95 {
96 	if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
97 		return 0;
98 
99 	switch (res->desc) {
100 	case IORES_DESC_NONE:
101 	case IORES_DESC_RESERVED:
102 		break;
103 	default:
104 		return IORES_MAP_ENCRYPTED;
105 	}
106 
107 	return 0;
108 }
109 
110 /*
111  * The EFI runtime services data area is not covered by walk_mem_res(), but must
112  * be mapped encrypted when SEV is active.
113  */
114 static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc)
115 {
116 	if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
117 		return;
118 
119 	if (!IS_ENABLED(CONFIG_EFI))
120 		return;
121 
122 	if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA ||
123 	    (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA &&
124 	     efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME))
125 		desc->flags |= IORES_MAP_ENCRYPTED;
126 }
127 
128 static int __ioremap_collect_map_flags(struct resource *res, void *arg)
129 {
130 	struct ioremap_desc *desc = arg;
131 
132 	if (!(desc->flags & IORES_MAP_SYSTEM_RAM))
133 		desc->flags |= __ioremap_check_ram(res);
134 
135 	if (!(desc->flags & IORES_MAP_ENCRYPTED))
136 		desc->flags |= __ioremap_check_encrypted(res);
137 
138 	return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) ==
139 			       (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED));
140 }
141 
142 /*
143  * To avoid multiple resource walks, this function walks resources marked as
144  * IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a
145  * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES).
146  *
147  * After that, deal with misc other ranges in __ioremap_check_other() which do
148  * not fall into the above category.
149  */
150 static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
151 				struct ioremap_desc *desc)
152 {
153 	u64 start, end;
154 
155 	start = (u64)addr;
156 	end = start + size - 1;
157 	memset(desc, 0, sizeof(struct ioremap_desc));
158 
159 	walk_mem_res(start, end, desc, __ioremap_collect_map_flags);
160 
161 	__ioremap_check_other(addr, desc);
162 }
163 
164 /*
165  * Remap an arbitrary physical address space into the kernel virtual
166  * address space. It transparently creates kernel huge I/O mapping when
167  * the physical address is aligned by a huge page size (1GB or 2MB) and
168  * the requested size is at least the huge page size.
169  *
170  * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
171  * Therefore, the mapping code falls back to use a smaller page toward 4KB
172  * when a mapping range is covered by non-WB type of MTRRs.
173  *
174  * NOTE! We need to allow non-page-aligned mappings too: we will obviously
175  * have to convert them into an offset in a page-aligned mapping, but the
176  * caller shouldn't need to know that small detail.
177  */
178 static void __iomem *
179 __ioremap_caller(resource_size_t phys_addr, unsigned long size,
180 		 enum page_cache_mode pcm, void *caller, bool encrypted)
181 {
182 	unsigned long offset, vaddr;
183 	resource_size_t last_addr;
184 	const resource_size_t unaligned_phys_addr = phys_addr;
185 	const unsigned long unaligned_size = size;
186 	struct ioremap_desc io_desc;
187 	struct vm_struct *area;
188 	enum page_cache_mode new_pcm;
189 	pgprot_t prot;
190 	int retval;
191 	void __iomem *ret_addr;
192 
193 	/* Don't allow wraparound or zero size */
194 	last_addr = phys_addr + size - 1;
195 	if (!size || last_addr < phys_addr)
196 		return NULL;
197 
198 	if (!phys_addr_valid(phys_addr)) {
199 		printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
200 		       (unsigned long long)phys_addr);
201 		WARN_ON_ONCE(1);
202 		return NULL;
203 	}
204 
205 	__ioremap_check_mem(phys_addr, size, &io_desc);
206 
207 	/*
208 	 * Don't allow anybody to remap normal RAM that we're using..
209 	 */
210 	if (io_desc.flags & IORES_MAP_SYSTEM_RAM) {
211 		WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
212 			  &phys_addr, &last_addr);
213 		return NULL;
214 	}
215 
216 	/*
217 	 * Mappings have to be page-aligned
218 	 */
219 	offset = phys_addr & ~PAGE_MASK;
220 	phys_addr &= PHYSICAL_PAGE_MASK;
221 	size = PAGE_ALIGN(last_addr+1) - phys_addr;
222 
223 	retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
224 						pcm, &new_pcm);
225 	if (retval) {
226 		printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval);
227 		return NULL;
228 	}
229 
230 	if (pcm != new_pcm) {
231 		if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
232 			printk(KERN_ERR
233 		"ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
234 				(unsigned long long)phys_addr,
235 				(unsigned long long)(phys_addr + size),
236 				pcm, new_pcm);
237 			goto err_free_memtype;
238 		}
239 		pcm = new_pcm;
240 	}
241 
242 	/*
243 	 * If the page being mapped is in memory and SEV is active then
244 	 * make sure the memory encryption attribute is enabled in the
245 	 * resulting mapping.
246 	 * In TDX guests, memory is marked private by default. If encryption
247 	 * is not requested (using encrypted), explicitly set decrypt
248 	 * attribute in all IOREMAPPED memory.
249 	 */
250 	prot = PAGE_KERNEL_IO;
251 	if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted)
252 		prot = pgprot_encrypted(prot);
253 	else
254 		prot = pgprot_decrypted(prot);
255 
256 	switch (pcm) {
257 	case _PAGE_CACHE_MODE_UC:
258 	default:
259 		prot = __pgprot(pgprot_val(prot) |
260 				cachemode2protval(_PAGE_CACHE_MODE_UC));
261 		break;
262 	case _PAGE_CACHE_MODE_UC_MINUS:
263 		prot = __pgprot(pgprot_val(prot) |
264 				cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
265 		break;
266 	case _PAGE_CACHE_MODE_WC:
267 		prot = __pgprot(pgprot_val(prot) |
268 				cachemode2protval(_PAGE_CACHE_MODE_WC));
269 		break;
270 	case _PAGE_CACHE_MODE_WT:
271 		prot = __pgprot(pgprot_val(prot) |
272 				cachemode2protval(_PAGE_CACHE_MODE_WT));
273 		break;
274 	case _PAGE_CACHE_MODE_WB:
275 		break;
276 	}
277 
278 	/*
279 	 * Ok, go for it..
280 	 */
281 	area = get_vm_area_caller(size, VM_IOREMAP, caller);
282 	if (!area)
283 		goto err_free_memtype;
284 	area->phys_addr = phys_addr;
285 	vaddr = (unsigned long) area->addr;
286 
287 	if (memtype_kernel_map_sync(phys_addr, size, pcm))
288 		goto err_free_area;
289 
290 	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
291 		goto err_free_area;
292 
293 	ret_addr = (void __iomem *) (vaddr + offset);
294 	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
295 
296 	/*
297 	 * Check if the request spans more than any BAR in the iomem resource
298 	 * tree.
299 	 */
300 	if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
301 		pr_warn("caller %pS mapping multiple BARs\n", caller);
302 
303 	return ret_addr;
304 err_free_area:
305 	free_vm_area(area);
306 err_free_memtype:
307 	memtype_free(phys_addr, phys_addr + size);
308 	return NULL;
309 }
310 
311 /**
312  * ioremap     -   map bus memory into CPU space
313  * @phys_addr:    bus address of the memory
314  * @size:      size of the resource to map
315  *
316  * ioremap performs a platform specific sequence of operations to
317  * make bus memory CPU accessible via the readb/readw/readl/writeb/
318  * writew/writel functions and the other mmio helpers. The returned
319  * address is not guaranteed to be usable directly as a virtual
320  * address.
321  *
322  * This version of ioremap ensures that the memory is marked uncachable
323  * on the CPU as well as honouring existing caching rules from things like
324  * the PCI bus. Note that there are other caches and buffers on many
325  * busses. In particular driver authors should read up on PCI writes
326  *
327  * It's useful if some control registers are in such an area and
328  * write combining or read caching is not desirable:
329  *
330  * Must be freed with iounmap.
331  */
332 void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
333 {
334 	/*
335 	 * Ideally, this should be:
336 	 *	pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
337 	 *
338 	 * Till we fix all X drivers to use ioremap_wc(), we will use
339 	 * UC MINUS. Drivers that are certain they need or can already
340 	 * be converted over to strong UC can use ioremap_uc().
341 	 */
342 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
343 
344 	return __ioremap_caller(phys_addr, size, pcm,
345 				__builtin_return_address(0), false);
346 }
347 EXPORT_SYMBOL(ioremap);
348 
349 /**
350  * ioremap_uc     -   map bus memory into CPU space as strongly uncachable
351  * @phys_addr:    bus address of the memory
352  * @size:      size of the resource to map
353  *
354  * ioremap_uc performs a platform specific sequence of operations to
355  * make bus memory CPU accessible via the readb/readw/readl/writeb/
356  * writew/writel functions and the other mmio helpers. The returned
357  * address is not guaranteed to be usable directly as a virtual
358  * address.
359  *
360  * This version of ioremap ensures that the memory is marked with a strong
361  * preference as completely uncachable on the CPU when possible. For non-PAT
362  * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
363  * systems this will set the PAT entry for the pages as strong UC.  This call
364  * will honor existing caching rules from things like the PCI bus. Note that
365  * there are other caches and buffers on many busses. In particular driver
366  * authors should read up on PCI writes.
367  *
368  * It's useful if some control registers are in such an area and
369  * write combining or read caching is not desirable:
370  *
371  * Must be freed with iounmap.
372  */
373 void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
374 {
375 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
376 
377 	return __ioremap_caller(phys_addr, size, pcm,
378 				__builtin_return_address(0), false);
379 }
380 EXPORT_SYMBOL_GPL(ioremap_uc);
381 
382 /**
383  * ioremap_wc	-	map memory into CPU space write combined
384  * @phys_addr:	bus address of the memory
385  * @size:	size of the resource to map
386  *
387  * This version of ioremap ensures that the memory is marked write combining.
388  * Write combining allows faster writes to some hardware devices.
389  *
390  * Must be freed with iounmap.
391  */
392 void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
393 {
394 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
395 					__builtin_return_address(0), false);
396 }
397 EXPORT_SYMBOL(ioremap_wc);
398 
399 /**
400  * ioremap_wt	-	map memory into CPU space write through
401  * @phys_addr:	bus address of the memory
402  * @size:	size of the resource to map
403  *
404  * This version of ioremap ensures that the memory is marked write through.
405  * Write through stores data into memory while keeping the cache up-to-date.
406  *
407  * Must be freed with iounmap.
408  */
409 void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
410 {
411 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
412 					__builtin_return_address(0), false);
413 }
414 EXPORT_SYMBOL(ioremap_wt);
415 
416 void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size)
417 {
418 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
419 				__builtin_return_address(0), true);
420 }
421 EXPORT_SYMBOL(ioremap_encrypted);
422 
423 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
424 {
425 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
426 				__builtin_return_address(0), false);
427 }
428 EXPORT_SYMBOL(ioremap_cache);
429 
430 void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
431 				unsigned long prot_val)
432 {
433 	return __ioremap_caller(phys_addr, size,
434 				pgprot2cachemode(__pgprot(prot_val)),
435 				__builtin_return_address(0), false);
436 }
437 EXPORT_SYMBOL(ioremap_prot);
438 
439 /**
440  * iounmap - Free a IO remapping
441  * @addr: virtual address from ioremap_*
442  *
443  * Caller must ensure there is only one unmapping for the same pointer.
444  */
445 void iounmap(volatile void __iomem *addr)
446 {
447 	struct vm_struct *p, *o;
448 
449 	if ((void __force *)addr <= high_memory)
450 		return;
451 
452 	/*
453 	 * The PCI/ISA range special-casing was removed from __ioremap()
454 	 * so this check, in theory, can be removed. However, there are
455 	 * cases where iounmap() is called for addresses not obtained via
456 	 * ioremap() (vga16fb for example). Add a warning so that these
457 	 * cases can be caught and fixed.
458 	 */
459 	if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
460 	    (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) {
461 		WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n");
462 		return;
463 	}
464 
465 	mmiotrace_iounmap(addr);
466 
467 	addr = (volatile void __iomem *)
468 		(PAGE_MASK & (unsigned long __force)addr);
469 
470 	/* Use the vm area unlocked, assuming the caller
471 	   ensures there isn't another iounmap for the same address
472 	   in parallel. Reuse of the virtual address is prevented by
473 	   leaving it in the global lists until we're done with it.
474 	   cpa takes care of the direct mappings. */
475 	p = find_vm_area((void __force *)addr);
476 
477 	if (!p) {
478 		printk(KERN_ERR "iounmap: bad address %p\n", addr);
479 		dump_stack();
480 		return;
481 	}
482 
483 	kmsan_iounmap_page_range((unsigned long)addr,
484 		(unsigned long)addr + get_vm_area_size(p));
485 	memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
486 
487 	/* Finally remove it */
488 	o = remove_vm_area((void __force *)addr);
489 	BUG_ON(p != o || o == NULL);
490 	kfree(p);
491 }
492 EXPORT_SYMBOL(iounmap);
493 
494 /*
495  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
496  * access
497  */
498 void *xlate_dev_mem_ptr(phys_addr_t phys)
499 {
500 	unsigned long start  = phys &  PAGE_MASK;
501 	unsigned long offset = phys & ~PAGE_MASK;
502 	void *vaddr;
503 
504 	/* memremap() maps if RAM, otherwise falls back to ioremap() */
505 	vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB);
506 
507 	/* Only add the offset on success and return NULL if memremap() failed */
508 	if (vaddr)
509 		vaddr += offset;
510 
511 	return vaddr;
512 }
513 
514 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
515 {
516 	memunmap((void *)((unsigned long)addr & PAGE_MASK));
517 }
518 
519 #ifdef CONFIG_AMD_MEM_ENCRYPT
520 /*
521  * Examine the physical address to determine if it is an area of memory
522  * that should be mapped decrypted.  If the memory is not part of the
523  * kernel usable area it was accessed and created decrypted, so these
524  * areas should be mapped decrypted. And since the encryption key can
525  * change across reboots, persistent memory should also be mapped
526  * decrypted.
527  *
528  * If SEV is active, that implies that BIOS/UEFI also ran encrypted so
529  * only persistent memory should be mapped decrypted.
530  */
531 static bool memremap_should_map_decrypted(resource_size_t phys_addr,
532 					  unsigned long size)
533 {
534 	int is_pmem;
535 
536 	/*
537 	 * Check if the address is part of a persistent memory region.
538 	 * This check covers areas added by E820, EFI and ACPI.
539 	 */
540 	is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
541 				    IORES_DESC_PERSISTENT_MEMORY);
542 	if (is_pmem != REGION_DISJOINT)
543 		return true;
544 
545 	/*
546 	 * Check if the non-volatile attribute is set for an EFI
547 	 * reserved area.
548 	 */
549 	if (efi_enabled(EFI_BOOT)) {
550 		switch (efi_mem_type(phys_addr)) {
551 		case EFI_RESERVED_TYPE:
552 			if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
553 				return true;
554 			break;
555 		default:
556 			break;
557 		}
558 	}
559 
560 	/* Check if the address is outside kernel usable area */
561 	switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
562 	case E820_TYPE_RESERVED:
563 	case E820_TYPE_ACPI:
564 	case E820_TYPE_NVS:
565 	case E820_TYPE_UNUSABLE:
566 		/* For SEV, these areas are encrypted */
567 		if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
568 			break;
569 		fallthrough;
570 
571 	case E820_TYPE_PRAM:
572 		return true;
573 	default:
574 		break;
575 	}
576 
577 	return false;
578 }
579 
580 /*
581  * Examine the physical address to determine if it is EFI data. Check
582  * it against the boot params structure and EFI tables and memory types.
583  */
584 static bool memremap_is_efi_data(resource_size_t phys_addr,
585 				 unsigned long size)
586 {
587 	u64 paddr;
588 
589 	/* Check if the address is part of EFI boot/runtime data */
590 	if (!efi_enabled(EFI_BOOT))
591 		return false;
592 
593 	paddr = boot_params.efi_info.efi_memmap_hi;
594 	paddr <<= 32;
595 	paddr |= boot_params.efi_info.efi_memmap;
596 	if (phys_addr == paddr)
597 		return true;
598 
599 	paddr = boot_params.efi_info.efi_systab_hi;
600 	paddr <<= 32;
601 	paddr |= boot_params.efi_info.efi_systab;
602 	if (phys_addr == paddr)
603 		return true;
604 
605 	if (efi_is_table_address(phys_addr))
606 		return true;
607 
608 	switch (efi_mem_type(phys_addr)) {
609 	case EFI_BOOT_SERVICES_DATA:
610 	case EFI_RUNTIME_SERVICES_DATA:
611 		return true;
612 	default:
613 		break;
614 	}
615 
616 	return false;
617 }
618 
619 /*
620  * Examine the physical address to determine if it is boot data by checking
621  * it against the boot params setup_data chain.
622  */
623 static bool memremap_is_setup_data(resource_size_t phys_addr,
624 				   unsigned long size)
625 {
626 	struct setup_indirect *indirect;
627 	struct setup_data *data;
628 	u64 paddr, paddr_next;
629 
630 	paddr = boot_params.hdr.setup_data;
631 	while (paddr) {
632 		unsigned int len;
633 
634 		if (phys_addr == paddr)
635 			return true;
636 
637 		data = memremap(paddr, sizeof(*data),
638 				MEMREMAP_WB | MEMREMAP_DEC);
639 		if (!data) {
640 			pr_warn("failed to memremap setup_data entry\n");
641 			return false;
642 		}
643 
644 		paddr_next = data->next;
645 		len = data->len;
646 
647 		if ((phys_addr > paddr) && (phys_addr < (paddr + len))) {
648 			memunmap(data);
649 			return true;
650 		}
651 
652 		if (data->type == SETUP_INDIRECT) {
653 			memunmap(data);
654 			data = memremap(paddr, sizeof(*data) + len,
655 					MEMREMAP_WB | MEMREMAP_DEC);
656 			if (!data) {
657 				pr_warn("failed to memremap indirect setup_data\n");
658 				return false;
659 			}
660 
661 			indirect = (struct setup_indirect *)data->data;
662 
663 			if (indirect->type != SETUP_INDIRECT) {
664 				paddr = indirect->addr;
665 				len = indirect->len;
666 			}
667 		}
668 
669 		memunmap(data);
670 
671 		if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
672 			return true;
673 
674 		paddr = paddr_next;
675 	}
676 
677 	return false;
678 }
679 
680 /*
681  * Examine the physical address to determine if it is boot data by checking
682  * it against the boot params setup_data chain (early boot version).
683  */
684 static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
685 						unsigned long size)
686 {
687 	struct setup_indirect *indirect;
688 	struct setup_data *data;
689 	u64 paddr, paddr_next;
690 
691 	paddr = boot_params.hdr.setup_data;
692 	while (paddr) {
693 		unsigned int len, size;
694 
695 		if (phys_addr == paddr)
696 			return true;
697 
698 		data = early_memremap_decrypted(paddr, sizeof(*data));
699 		if (!data) {
700 			pr_warn("failed to early memremap setup_data entry\n");
701 			return false;
702 		}
703 
704 		size = sizeof(*data);
705 
706 		paddr_next = data->next;
707 		len = data->len;
708 
709 		if ((phys_addr > paddr) && (phys_addr < (paddr + len))) {
710 			early_memunmap(data, sizeof(*data));
711 			return true;
712 		}
713 
714 		if (data->type == SETUP_INDIRECT) {
715 			size += len;
716 			early_memunmap(data, sizeof(*data));
717 			data = early_memremap_decrypted(paddr, size);
718 			if (!data) {
719 				pr_warn("failed to early memremap indirect setup_data\n");
720 				return false;
721 			}
722 
723 			indirect = (struct setup_indirect *)data->data;
724 
725 			if (indirect->type != SETUP_INDIRECT) {
726 				paddr = indirect->addr;
727 				len = indirect->len;
728 			}
729 		}
730 
731 		early_memunmap(data, size);
732 
733 		if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
734 			return true;
735 
736 		paddr = paddr_next;
737 	}
738 
739 	return false;
740 }
741 
742 /*
743  * Architecture function to determine if RAM remap is allowed. By default, a
744  * RAM remap will map the data as encrypted. Determine if a RAM remap should
745  * not be done so that the data will be mapped decrypted.
746  */
747 bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
748 				 unsigned long flags)
749 {
750 	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
751 		return true;
752 
753 	if (flags & MEMREMAP_ENC)
754 		return true;
755 
756 	if (flags & MEMREMAP_DEC)
757 		return false;
758 
759 	if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
760 		if (memremap_is_setup_data(phys_addr, size) ||
761 		    memremap_is_efi_data(phys_addr, size))
762 			return false;
763 	}
764 
765 	return !memremap_should_map_decrypted(phys_addr, size);
766 }
767 
768 /*
769  * Architecture override of __weak function to adjust the protection attributes
770  * used when remapping memory. By default, early_memremap() will map the data
771  * as encrypted. Determine if an encrypted mapping should not be done and set
772  * the appropriate protection attributes.
773  */
774 pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
775 					     unsigned long size,
776 					     pgprot_t prot)
777 {
778 	bool encrypted_prot;
779 
780 	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
781 		return prot;
782 
783 	encrypted_prot = true;
784 
785 	if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
786 		if (early_memremap_is_setup_data(phys_addr, size) ||
787 		    memremap_is_efi_data(phys_addr, size))
788 			encrypted_prot = false;
789 	}
790 
791 	if (encrypted_prot && memremap_should_map_decrypted(phys_addr, size))
792 		encrypted_prot = false;
793 
794 	return encrypted_prot ? pgprot_encrypted(prot)
795 			      : pgprot_decrypted(prot);
796 }
797 
798 bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
799 {
800 	return arch_memremap_can_ram_remap(phys_addr, size, 0);
801 }
802 
803 /* Remap memory with encryption */
804 void __init *early_memremap_encrypted(resource_size_t phys_addr,
805 				      unsigned long size)
806 {
807 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC);
808 }
809 
810 /*
811  * Remap memory with encryption and write-protected - cannot be called
812  * before pat_init() is called
813  */
814 void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
815 					 unsigned long size)
816 {
817 	if (!x86_has_pat_wp())
818 		return NULL;
819 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP);
820 }
821 
822 /* Remap memory without encryption */
823 void __init *early_memremap_decrypted(resource_size_t phys_addr,
824 				      unsigned long size)
825 {
826 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC);
827 }
828 
829 /*
830  * Remap memory without encryption and write-protected - cannot be called
831  * before pat_init() is called
832  */
833 void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
834 					 unsigned long size)
835 {
836 	if (!x86_has_pat_wp())
837 		return NULL;
838 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP);
839 }
840 #endif	/* CONFIG_AMD_MEM_ENCRYPT */
841 
842 static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
843 
844 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
845 {
846 	/* Don't assume we're using swapper_pg_dir at this point */
847 	pgd_t *base = __va(read_cr3_pa());
848 	pgd_t *pgd = &base[pgd_index(addr)];
849 	p4d_t *p4d = p4d_offset(pgd, addr);
850 	pud_t *pud = pud_offset(p4d, addr);
851 	pmd_t *pmd = pmd_offset(pud, addr);
852 
853 	return pmd;
854 }
855 
856 static inline pte_t * __init early_ioremap_pte(unsigned long addr)
857 {
858 	return &bm_pte[pte_index(addr)];
859 }
860 
861 bool __init is_early_ioremap_ptep(pte_t *ptep)
862 {
863 	return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
864 }
865 
866 void __init early_ioremap_init(void)
867 {
868 	pmd_t *pmd;
869 
870 #ifdef CONFIG_X86_64
871 	BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
872 #else
873 	WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
874 #endif
875 
876 	early_ioremap_setup();
877 
878 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
879 	memset(bm_pte, 0, sizeof(bm_pte));
880 	pmd_populate_kernel(&init_mm, pmd, bm_pte);
881 
882 	/*
883 	 * The boot-ioremap range spans multiple pmds, for which
884 	 * we are not prepared:
885 	 */
886 #define __FIXADDR_TOP (-PAGE_SIZE)
887 	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
888 		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
889 #undef __FIXADDR_TOP
890 	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
891 		WARN_ON(1);
892 		printk(KERN_WARNING "pmd %p != %p\n",
893 		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
894 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
895 			fix_to_virt(FIX_BTMAP_BEGIN));
896 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
897 			fix_to_virt(FIX_BTMAP_END));
898 
899 		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
900 		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
901 		       FIX_BTMAP_BEGIN);
902 	}
903 }
904 
905 void __init __early_set_fixmap(enum fixed_addresses idx,
906 			       phys_addr_t phys, pgprot_t flags)
907 {
908 	unsigned long addr = __fix_to_virt(idx);
909 	pte_t *pte;
910 
911 	if (idx >= __end_of_fixed_addresses) {
912 		BUG();
913 		return;
914 	}
915 	pte = early_ioremap_pte(addr);
916 
917 	/* Sanitize 'prot' against any unsupported bits: */
918 	pgprot_val(flags) &= __supported_pte_mask;
919 
920 	if (pgprot_val(flags))
921 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
922 	else
923 		pte_clear(&init_mm, addr, pte);
924 	flush_tlb_one_kernel(addr);
925 }
926