xref: /linux/arch/x86/mm/ioremap.c (revision 17e548405a81665fd14cee960db7d093d1396400)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Re-map IO memory to kernel address space so that we can access it.
4  * This is needed for high PCI addresses that aren't mapped in the
5  * 640k-1MB IO memory area on PC's
6  *
7  * (C) Copyright 1995 1996 Linus Torvalds
8  */
9 
10 #include <linux/memblock.h>
11 #include <linux/init.h>
12 #include <linux/io.h>
13 #include <linux/ioport.h>
14 #include <linux/ioremap.h>
15 #include <linux/slab.h>
16 #include <linux/vmalloc.h>
17 #include <linux/mmiotrace.h>
18 #include <linux/cc_platform.h>
19 #include <linux/efi.h>
20 #include <linux/pgtable.h>
21 #include <linux/kmsan.h>
22 
23 #include <asm/set_memory.h>
24 #include <asm/e820/api.h>
25 #include <asm/efi.h>
26 #include <asm/fixmap.h>
27 #include <asm/tlbflush.h>
28 #include <asm/pgalloc.h>
29 #include <asm/memtype.h>
30 #include <asm/setup.h>
31 
32 #include "physaddr.h"
33 
34 /*
35  * Descriptor controlling ioremap() behavior.
36  */
37 struct ioremap_desc {
38 	unsigned int flags;
39 };
40 
41 /*
42  * Fix up the linear direct mapping of the kernel to avoid cache attribute
43  * conflicts.
44  */
45 int ioremap_change_attr(unsigned long vaddr, unsigned long size,
46 			enum page_cache_mode pcm)
47 {
48 	unsigned long nrpages = size >> PAGE_SHIFT;
49 	int err;
50 
51 	switch (pcm) {
52 	case _PAGE_CACHE_MODE_UC:
53 	default:
54 		err = _set_memory_uc(vaddr, nrpages);
55 		break;
56 	case _PAGE_CACHE_MODE_WC:
57 		err = _set_memory_wc(vaddr, nrpages);
58 		break;
59 	case _PAGE_CACHE_MODE_WT:
60 		err = _set_memory_wt(vaddr, nrpages);
61 		break;
62 	case _PAGE_CACHE_MODE_WB:
63 		err = _set_memory_wb(vaddr, nrpages);
64 		break;
65 	}
66 
67 	return err;
68 }
69 
70 /* Does the range (or a subset of) contain normal RAM? */
71 static unsigned int __ioremap_check_ram(struct resource *res)
72 {
73 	unsigned long start_pfn, stop_pfn;
74 	unsigned long i;
75 
76 	if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM)
77 		return 0;
78 
79 	start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT;
80 	stop_pfn = (res->end + 1) >> PAGE_SHIFT;
81 	if (stop_pfn > start_pfn) {
82 		for (i = 0; i < (stop_pfn - start_pfn); ++i)
83 			if (pfn_valid(start_pfn + i) &&
84 			    !PageReserved(pfn_to_page(start_pfn + i)))
85 				return IORES_MAP_SYSTEM_RAM;
86 	}
87 
88 	return 0;
89 }
90 
91 /*
92  * In a SEV guest, NONE and RESERVED should not be mapped encrypted because
93  * there the whole memory is already encrypted.
94  */
95 static unsigned int __ioremap_check_encrypted(struct resource *res)
96 {
97 	if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
98 		return 0;
99 
100 	switch (res->desc) {
101 	case IORES_DESC_NONE:
102 	case IORES_DESC_RESERVED:
103 		break;
104 	default:
105 		return IORES_MAP_ENCRYPTED;
106 	}
107 
108 	return 0;
109 }
110 
111 /*
112  * The EFI runtime services data area is not covered by walk_mem_res(), but must
113  * be mapped encrypted when SEV is active.
114  */
115 static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc)
116 {
117 	if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
118 		return;
119 
120 	if (x86_platform.hyper.is_private_mmio(addr)) {
121 		desc->flags |= IORES_MAP_ENCRYPTED;
122 		return;
123 	}
124 
125 	if (!IS_ENABLED(CONFIG_EFI))
126 		return;
127 
128 	if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA ||
129 	    (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA &&
130 	     efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME))
131 		desc->flags |= IORES_MAP_ENCRYPTED;
132 }
133 
134 static int __ioremap_collect_map_flags(struct resource *res, void *arg)
135 {
136 	struct ioremap_desc *desc = arg;
137 
138 	if (!(desc->flags & IORES_MAP_SYSTEM_RAM))
139 		desc->flags |= __ioremap_check_ram(res);
140 
141 	if (!(desc->flags & IORES_MAP_ENCRYPTED))
142 		desc->flags |= __ioremap_check_encrypted(res);
143 
144 	return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) ==
145 			       (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED));
146 }
147 
148 /*
149  * To avoid multiple resource walks, this function walks resources marked as
150  * IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a
151  * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES).
152  *
153  * After that, deal with misc other ranges in __ioremap_check_other() which do
154  * not fall into the above category.
155  */
156 static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
157 				struct ioremap_desc *desc)
158 {
159 	u64 start, end;
160 
161 	start = (u64)addr;
162 	end = start + size - 1;
163 	memset(desc, 0, sizeof(struct ioremap_desc));
164 
165 	walk_mem_res(start, end, desc, __ioremap_collect_map_flags);
166 
167 	__ioremap_check_other(addr, desc);
168 }
169 
170 /*
171  * Remap an arbitrary physical address space into the kernel virtual
172  * address space. It transparently creates kernel huge I/O mapping when
173  * the physical address is aligned by a huge page size (1GB or 2MB) and
174  * the requested size is at least the huge page size.
175  *
176  * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
177  * Therefore, the mapping code falls back to use a smaller page toward 4KB
178  * when a mapping range is covered by non-WB type of MTRRs.
179  *
180  * NOTE! We need to allow non-page-aligned mappings too: we will obviously
181  * have to convert them into an offset in a page-aligned mapping, but the
182  * caller shouldn't need to know that small detail.
183  */
184 static void __iomem *
185 __ioremap_caller(resource_size_t phys_addr, unsigned long size,
186 		 enum page_cache_mode pcm, void *caller, bool encrypted)
187 {
188 	unsigned long offset, vaddr;
189 	resource_size_t last_addr;
190 	const resource_size_t unaligned_phys_addr = phys_addr;
191 	const unsigned long unaligned_size = size;
192 	struct ioremap_desc io_desc;
193 	struct vm_struct *area;
194 	enum page_cache_mode new_pcm;
195 	pgprot_t prot;
196 	int retval;
197 	void __iomem *ret_addr;
198 
199 	/* Don't allow wraparound or zero size */
200 	last_addr = phys_addr + size - 1;
201 	if (!size || last_addr < phys_addr)
202 		return NULL;
203 
204 	if (!phys_addr_valid(phys_addr)) {
205 		printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
206 		       (unsigned long long)phys_addr);
207 		WARN_ON_ONCE(1);
208 		return NULL;
209 	}
210 
211 	__ioremap_check_mem(phys_addr, size, &io_desc);
212 
213 	/*
214 	 * Don't allow anybody to remap normal RAM that we're using..
215 	 */
216 	if (io_desc.flags & IORES_MAP_SYSTEM_RAM) {
217 		WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
218 			  &phys_addr, &last_addr);
219 		return NULL;
220 	}
221 
222 	/*
223 	 * Mappings have to be page-aligned
224 	 */
225 	offset = phys_addr & ~PAGE_MASK;
226 	phys_addr &= PAGE_MASK;
227 	size = PAGE_ALIGN(last_addr+1) - phys_addr;
228 
229 	/*
230 	 * Mask out any bits not part of the actual physical
231 	 * address, like memory encryption bits.
232 	 */
233 	phys_addr &= PHYSICAL_PAGE_MASK;
234 
235 	retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
236 						pcm, &new_pcm);
237 	if (retval) {
238 		printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval);
239 		return NULL;
240 	}
241 
242 	if (pcm != new_pcm) {
243 		if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
244 			printk(KERN_ERR
245 		"ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
246 				(unsigned long long)phys_addr,
247 				(unsigned long long)(phys_addr + size),
248 				pcm, new_pcm);
249 			goto err_free_memtype;
250 		}
251 		pcm = new_pcm;
252 	}
253 
254 	/*
255 	 * If the page being mapped is in memory and SEV is active then
256 	 * make sure the memory encryption attribute is enabled in the
257 	 * resulting mapping.
258 	 * In TDX guests, memory is marked private by default. If encryption
259 	 * is not requested (using encrypted), explicitly set decrypt
260 	 * attribute in all IOREMAPPED memory.
261 	 */
262 	prot = PAGE_KERNEL_IO;
263 	if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted)
264 		prot = pgprot_encrypted(prot);
265 	else
266 		prot = pgprot_decrypted(prot);
267 
268 	switch (pcm) {
269 	case _PAGE_CACHE_MODE_UC:
270 	default:
271 		prot = __pgprot(pgprot_val(prot) |
272 				cachemode2protval(_PAGE_CACHE_MODE_UC));
273 		break;
274 	case _PAGE_CACHE_MODE_UC_MINUS:
275 		prot = __pgprot(pgprot_val(prot) |
276 				cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
277 		break;
278 	case _PAGE_CACHE_MODE_WC:
279 		prot = __pgprot(pgprot_val(prot) |
280 				cachemode2protval(_PAGE_CACHE_MODE_WC));
281 		break;
282 	case _PAGE_CACHE_MODE_WT:
283 		prot = __pgprot(pgprot_val(prot) |
284 				cachemode2protval(_PAGE_CACHE_MODE_WT));
285 		break;
286 	case _PAGE_CACHE_MODE_WB:
287 		break;
288 	}
289 
290 	/*
291 	 * Ok, go for it..
292 	 */
293 	area = get_vm_area_caller(size, VM_IOREMAP, caller);
294 	if (!area)
295 		goto err_free_memtype;
296 	area->phys_addr = phys_addr;
297 	vaddr = (unsigned long) area->addr;
298 
299 	if (memtype_kernel_map_sync(phys_addr, size, pcm))
300 		goto err_free_area;
301 
302 	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
303 		goto err_free_area;
304 
305 	ret_addr = (void __iomem *) (vaddr + offset);
306 	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
307 
308 	/*
309 	 * Check if the request spans more than any BAR in the iomem resource
310 	 * tree.
311 	 */
312 	if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
313 		pr_warn("caller %pS mapping multiple BARs\n", caller);
314 
315 	return ret_addr;
316 err_free_area:
317 	free_vm_area(area);
318 err_free_memtype:
319 	memtype_free(phys_addr, phys_addr + size);
320 	return NULL;
321 }
322 
323 /**
324  * ioremap     -   map bus memory into CPU space
325  * @phys_addr:    bus address of the memory
326  * @size:      size of the resource to map
327  *
328  * ioremap performs a platform specific sequence of operations to
329  * make bus memory CPU accessible via the readb/readw/readl/writeb/
330  * writew/writel functions and the other mmio helpers. The returned
331  * address is not guaranteed to be usable directly as a virtual
332  * address.
333  *
334  * This version of ioremap ensures that the memory is marked uncachable
335  * on the CPU as well as honouring existing caching rules from things like
336  * the PCI bus. Note that there are other caches and buffers on many
337  * busses. In particular driver authors should read up on PCI writes
338  *
339  * It's useful if some control registers are in such an area and
340  * write combining or read caching is not desirable:
341  *
342  * Must be freed with iounmap.
343  */
344 void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
345 {
346 	/*
347 	 * Ideally, this should be:
348 	 *	pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
349 	 *
350 	 * Till we fix all X drivers to use ioremap_wc(), we will use
351 	 * UC MINUS. Drivers that are certain they need or can already
352 	 * be converted over to strong UC can use ioremap_uc().
353 	 */
354 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
355 
356 	return __ioremap_caller(phys_addr, size, pcm,
357 				__builtin_return_address(0), false);
358 }
359 EXPORT_SYMBOL(ioremap);
360 
361 /**
362  * ioremap_uc     -   map bus memory into CPU space as strongly uncachable
363  * @phys_addr:    bus address of the memory
364  * @size:      size of the resource to map
365  *
366  * ioremap_uc performs a platform specific sequence of operations to
367  * make bus memory CPU accessible via the readb/readw/readl/writeb/
368  * writew/writel functions and the other mmio helpers. The returned
369  * address is not guaranteed to be usable directly as a virtual
370  * address.
371  *
372  * This version of ioremap ensures that the memory is marked with a strong
373  * preference as completely uncachable on the CPU when possible. For non-PAT
374  * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
375  * systems this will set the PAT entry for the pages as strong UC.  This call
376  * will honor existing caching rules from things like the PCI bus. Note that
377  * there are other caches and buffers on many busses. In particular driver
378  * authors should read up on PCI writes.
379  *
380  * It's useful if some control registers are in such an area and
381  * write combining or read caching is not desirable:
382  *
383  * Must be freed with iounmap.
384  */
385 void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
386 {
387 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
388 
389 	return __ioremap_caller(phys_addr, size, pcm,
390 				__builtin_return_address(0), false);
391 }
392 EXPORT_SYMBOL_GPL(ioremap_uc);
393 
394 /**
395  * ioremap_wc	-	map memory into CPU space write combined
396  * @phys_addr:	bus address of the memory
397  * @size:	size of the resource to map
398  *
399  * This version of ioremap ensures that the memory is marked write combining.
400  * Write combining allows faster writes to some hardware devices.
401  *
402  * Must be freed with iounmap.
403  */
404 void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
405 {
406 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
407 					__builtin_return_address(0), false);
408 }
409 EXPORT_SYMBOL(ioremap_wc);
410 
411 /**
412  * ioremap_wt	-	map memory into CPU space write through
413  * @phys_addr:	bus address of the memory
414  * @size:	size of the resource to map
415  *
416  * This version of ioremap ensures that the memory is marked write through.
417  * Write through stores data into memory while keeping the cache up-to-date.
418  *
419  * Must be freed with iounmap.
420  */
421 void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
422 {
423 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
424 					__builtin_return_address(0), false);
425 }
426 EXPORT_SYMBOL(ioremap_wt);
427 
428 void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size)
429 {
430 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
431 				__builtin_return_address(0), true);
432 }
433 EXPORT_SYMBOL(ioremap_encrypted);
434 
435 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
436 {
437 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
438 				__builtin_return_address(0), false);
439 }
440 EXPORT_SYMBOL(ioremap_cache);
441 
442 void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
443 			   pgprot_t prot)
444 {
445 	return __ioremap_caller(phys_addr, size,
446 				pgprot2cachemode(prot),
447 				__builtin_return_address(0), false);
448 }
449 EXPORT_SYMBOL(ioremap_prot);
450 
451 /**
452  * iounmap - Free a IO remapping
453  * @addr: virtual address from ioremap_*
454  *
455  * Caller must ensure there is only one unmapping for the same pointer.
456  */
457 void iounmap(volatile void __iomem *addr)
458 {
459 	struct vm_struct *p, *o;
460 
461 	if (WARN_ON_ONCE(!is_ioremap_addr((void __force *)addr)))
462 		return;
463 
464 	/*
465 	 * The PCI/ISA range special-casing was removed from __ioremap()
466 	 * so this check, in theory, can be removed. However, there are
467 	 * cases where iounmap() is called for addresses not obtained via
468 	 * ioremap() (vga16fb for example). Add a warning so that these
469 	 * cases can be caught and fixed.
470 	 */
471 	if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
472 	    (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) {
473 		WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n");
474 		return;
475 	}
476 
477 	mmiotrace_iounmap(addr);
478 
479 	addr = (volatile void __iomem *)
480 		(PAGE_MASK & (unsigned long __force)addr);
481 
482 	/* Use the vm area unlocked, assuming the caller
483 	   ensures there isn't another iounmap for the same address
484 	   in parallel. Reuse of the virtual address is prevented by
485 	   leaving it in the global lists until we're done with it.
486 	   cpa takes care of the direct mappings. */
487 	p = find_vm_area((void __force *)addr);
488 
489 	if (!p) {
490 		printk(KERN_ERR "iounmap: bad address %p\n", addr);
491 		dump_stack();
492 		return;
493 	}
494 
495 	kmsan_iounmap_page_range((unsigned long)addr,
496 		(unsigned long)addr + get_vm_area_size(p));
497 	memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
498 
499 	/* Finally remove it */
500 	o = remove_vm_area((void __force *)addr);
501 	BUG_ON(p != o || o == NULL);
502 	kfree(p);
503 }
504 EXPORT_SYMBOL(iounmap);
505 
506 void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags)
507 {
508 	if ((flags & MEMREMAP_DEC) || cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
509 		return (void __force *)ioremap_cache(phys_addr, size);
510 
511 	return (void __force *)ioremap_encrypted(phys_addr, size);
512 }
513 
514 /*
515  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
516  * access
517  */
518 void *xlate_dev_mem_ptr(phys_addr_t phys)
519 {
520 	unsigned long start  = phys &  PAGE_MASK;
521 	unsigned long offset = phys & ~PAGE_MASK;
522 	void *vaddr;
523 
524 	/* memremap() maps if RAM, otherwise falls back to ioremap() */
525 	vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB);
526 
527 	/* Only add the offset on success and return NULL if memremap() failed */
528 	if (vaddr)
529 		vaddr += offset;
530 
531 	return vaddr;
532 }
533 
534 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
535 {
536 	memunmap((void *)((unsigned long)addr & PAGE_MASK));
537 }
538 
539 #ifdef CONFIG_AMD_MEM_ENCRYPT
540 /*
541  * Examine the physical address to determine if it is an area of memory
542  * that should be mapped decrypted.  If the memory is not part of the
543  * kernel usable area it was accessed and created decrypted, so these
544  * areas should be mapped decrypted. And since the encryption key can
545  * change across reboots, persistent memory should also be mapped
546  * decrypted.
547  *
548  * If SEV is active, that implies that BIOS/UEFI also ran encrypted so
549  * only persistent memory should be mapped decrypted.
550  */
551 static bool memremap_should_map_decrypted(resource_size_t phys_addr,
552 					  unsigned long size)
553 {
554 	int is_pmem;
555 
556 	/*
557 	 * Check if the address is part of a persistent memory region.
558 	 * This check covers areas added by E820, EFI and ACPI.
559 	 */
560 	is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
561 				    IORES_DESC_PERSISTENT_MEMORY);
562 	if (is_pmem != REGION_DISJOINT)
563 		return true;
564 
565 	/*
566 	 * Check if the non-volatile attribute is set for an EFI
567 	 * reserved area.
568 	 */
569 	if (efi_enabled(EFI_BOOT)) {
570 		switch (efi_mem_type(phys_addr)) {
571 		case EFI_RESERVED_TYPE:
572 			if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
573 				return true;
574 			break;
575 		default:
576 			break;
577 		}
578 	}
579 
580 	/* Check if the address is outside kernel usable area */
581 	switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
582 	case E820_TYPE_RESERVED:
583 	case E820_TYPE_ACPI:
584 	case E820_TYPE_NVS:
585 	case E820_TYPE_UNUSABLE:
586 		/* For SEV, these areas are encrypted */
587 		if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
588 			break;
589 		fallthrough;
590 
591 	case E820_TYPE_PRAM:
592 		return true;
593 	default:
594 		break;
595 	}
596 
597 	return false;
598 }
599 
600 /*
601  * Examine the physical address to determine if it is EFI data. Check
602  * it against the boot params structure and EFI tables and memory types.
603  */
604 static bool memremap_is_efi_data(resource_size_t phys_addr)
605 {
606 	u64 paddr;
607 
608 	/* Check if the address is part of EFI boot/runtime data */
609 	if (!efi_enabled(EFI_BOOT))
610 		return false;
611 
612 	paddr = boot_params.efi_info.efi_memmap_hi;
613 	paddr <<= 32;
614 	paddr |= boot_params.efi_info.efi_memmap;
615 	if (phys_addr == paddr)
616 		return true;
617 
618 	paddr = boot_params.efi_info.efi_systab_hi;
619 	paddr <<= 32;
620 	paddr |= boot_params.efi_info.efi_systab;
621 	if (phys_addr == paddr)
622 		return true;
623 
624 	if (efi_is_table_address(phys_addr))
625 		return true;
626 
627 	switch (efi_mem_type(phys_addr)) {
628 	case EFI_BOOT_SERVICES_DATA:
629 	case EFI_RUNTIME_SERVICES_DATA:
630 		return true;
631 	default:
632 		break;
633 	}
634 
635 	return false;
636 }
637 
638 /*
639  * Examine the physical address to determine if it is boot data by checking
640  * it against the boot params setup_data chain.
641  */
642 static bool __ref __memremap_is_setup_data(resource_size_t phys_addr, bool early)
643 {
644 	unsigned int setup_data_sz = sizeof(struct setup_data);
645 	struct setup_indirect *indirect;
646 	struct setup_data *data;
647 	u64 paddr, paddr_next;
648 
649 	paddr = boot_params.hdr.setup_data;
650 	while (paddr) {
651 		unsigned int len, size;
652 
653 		if (phys_addr == paddr)
654 			return true;
655 
656 		if (early)
657 			data = early_memremap_decrypted(paddr, setup_data_sz);
658 		else
659 			data = memremap(paddr, setup_data_sz, MEMREMAP_WB | MEMREMAP_DEC);
660 		if (!data) {
661 			pr_warn("failed to remap setup_data entry\n");
662 			return false;
663 		}
664 
665 		size = setup_data_sz;
666 
667 		paddr_next = data->next;
668 		len = data->len;
669 
670 		if ((phys_addr > paddr) &&
671 		    (phys_addr < (paddr + setup_data_sz + len))) {
672 			if (early)
673 				early_memunmap(data, setup_data_sz);
674 			else
675 				memunmap(data);
676 			return true;
677 		}
678 
679 		if (data->type == SETUP_INDIRECT) {
680 			size += len;
681 			if (early) {
682 				early_memunmap(data, setup_data_sz);
683 				data = early_memremap_decrypted(paddr, size);
684 			} else {
685 				memunmap(data);
686 				data = memremap(paddr, size, MEMREMAP_WB | MEMREMAP_DEC);
687 			}
688 			if (!data) {
689 				pr_warn("failed to remap indirect setup_data\n");
690 				return false;
691 			}
692 
693 			indirect = (struct setup_indirect *)data->data;
694 
695 			if (indirect->type != SETUP_INDIRECT) {
696 				paddr = indirect->addr;
697 				len = indirect->len;
698 			}
699 		}
700 
701 		if (early)
702 			early_memunmap(data, size);
703 		else
704 			memunmap(data);
705 
706 		if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
707 			return true;
708 
709 		paddr = paddr_next;
710 	}
711 
712 	return false;
713 }
714 
715 static bool memremap_is_setup_data(resource_size_t phys_addr)
716 {
717 	return __memremap_is_setup_data(phys_addr, false);
718 }
719 
720 static bool __init early_memremap_is_setup_data(resource_size_t phys_addr)
721 {
722 	return __memremap_is_setup_data(phys_addr, true);
723 }
724 
725 /*
726  * Architecture function to determine if RAM remap is allowed. By default, a
727  * RAM remap will map the data as encrypted. Determine if a RAM remap should
728  * not be done so that the data will be mapped decrypted.
729  */
730 bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
731 				 unsigned long flags)
732 {
733 	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
734 		return true;
735 
736 	if (flags & MEMREMAP_ENC)
737 		return true;
738 
739 	if (flags & MEMREMAP_DEC)
740 		return false;
741 
742 	if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
743 		if (memremap_is_setup_data(phys_addr) ||
744 		    memremap_is_efi_data(phys_addr))
745 			return false;
746 	}
747 
748 	return !memremap_should_map_decrypted(phys_addr, size);
749 }
750 
751 /*
752  * Architecture override of __weak function to adjust the protection attributes
753  * used when remapping memory. By default, early_memremap() will map the data
754  * as encrypted. Determine if an encrypted mapping should not be done and set
755  * the appropriate protection attributes.
756  */
757 pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
758 					     unsigned long size,
759 					     pgprot_t prot)
760 {
761 	bool encrypted_prot;
762 
763 	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
764 		return prot;
765 
766 	encrypted_prot = true;
767 
768 	if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
769 		if (early_memremap_is_setup_data(phys_addr) ||
770 		    memremap_is_efi_data(phys_addr))
771 			encrypted_prot = false;
772 	}
773 
774 	if (encrypted_prot && memremap_should_map_decrypted(phys_addr, size))
775 		encrypted_prot = false;
776 
777 	return encrypted_prot ? pgprot_encrypted(prot)
778 			      : pgprot_decrypted(prot);
779 }
780 
781 bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
782 {
783 	return arch_memremap_can_ram_remap(phys_addr, size, 0);
784 }
785 
786 /* Remap memory with encryption */
787 void __init *early_memremap_encrypted(resource_size_t phys_addr,
788 				      unsigned long size)
789 {
790 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC);
791 }
792 
793 /*
794  * Remap memory with encryption and write-protected - cannot be called
795  * before pat_init() is called
796  */
797 void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
798 					 unsigned long size)
799 {
800 	if (!x86_has_pat_wp())
801 		return NULL;
802 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP);
803 }
804 
805 /* Remap memory without encryption */
806 void __init *early_memremap_decrypted(resource_size_t phys_addr,
807 				      unsigned long size)
808 {
809 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC);
810 }
811 
812 /*
813  * Remap memory without encryption and write-protected - cannot be called
814  * before pat_init() is called
815  */
816 void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
817 					 unsigned long size)
818 {
819 	if (!x86_has_pat_wp())
820 		return NULL;
821 	return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP);
822 }
823 #endif	/* CONFIG_AMD_MEM_ENCRYPT */
824 
825 static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
826 
827 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
828 {
829 	/* Don't assume we're using swapper_pg_dir at this point */
830 	pgd_t *base = __va(read_cr3_pa());
831 	pgd_t *pgd = &base[pgd_index(addr)];
832 	p4d_t *p4d = p4d_offset(pgd, addr);
833 	pud_t *pud = pud_offset(p4d, addr);
834 	pmd_t *pmd = pmd_offset(pud, addr);
835 
836 	return pmd;
837 }
838 
839 static inline pte_t * __init early_ioremap_pte(unsigned long addr)
840 {
841 	return &bm_pte[pte_index(addr)];
842 }
843 
844 bool __init is_early_ioremap_ptep(pte_t *ptep)
845 {
846 	return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
847 }
848 
849 void __init early_ioremap_init(void)
850 {
851 	pmd_t *pmd;
852 
853 #ifdef CONFIG_X86_64
854 	BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
855 #else
856 	WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
857 #endif
858 
859 	early_ioremap_setup();
860 
861 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
862 	memset(bm_pte, 0, sizeof(bm_pte));
863 	pmd_populate_kernel(&init_mm, pmd, bm_pte);
864 
865 	/*
866 	 * The boot-ioremap range spans multiple pmds, for which
867 	 * we are not prepared:
868 	 */
869 #define __FIXADDR_TOP (-PAGE_SIZE)
870 	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
871 		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
872 #undef __FIXADDR_TOP
873 	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
874 		WARN_ON(1);
875 		printk(KERN_WARNING "pmd %p != %p\n",
876 		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
877 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
878 			fix_to_virt(FIX_BTMAP_BEGIN));
879 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
880 			fix_to_virt(FIX_BTMAP_END));
881 
882 		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
883 		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
884 		       FIX_BTMAP_BEGIN);
885 	}
886 }
887 
888 void __init __early_set_fixmap(enum fixed_addresses idx,
889 			       phys_addr_t phys, pgprot_t flags)
890 {
891 	unsigned long addr = __fix_to_virt(idx);
892 	pte_t *pte;
893 
894 	if (idx >= __end_of_fixed_addresses) {
895 		BUG();
896 		return;
897 	}
898 	pte = early_ioremap_pte(addr);
899 
900 	/* Sanitize 'prot' against any unsupported bits: */
901 	pgprot_val(flags) &= __supported_pte_mask;
902 
903 	if (pgprot_val(flags))
904 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
905 	else
906 		pte_clear(&init_mm, addr, pte);
907 	flush_tlb_one_kernel(addr);
908 }
909