xref: /linux/arch/x86/mm/ioremap.c (revision 33c2b803edd13487518a2c7d5002d84d7e9c878f)
1 /*
2  * Re-map IO memory to kernel address space so that we can access it.
3  * This is needed for high PCI addresses that aren't mapped in the
4  * 640k-1MB IO memory area on PC's
5  *
6  * (C) Copyright 1995 1996 Linus Torvalds
7  */
8 
9 #include <linux/bootmem.h>
10 #include <linux/init.h>
11 #include <linux/io.h>
12 #include <linux/ioport.h>
13 #include <linux/slab.h>
14 #include <linux/vmalloc.h>
15 #include <linux/mmiotrace.h>
16 
17 #include <asm/set_memory.h>
18 #include <asm/e820/api.h>
19 #include <asm/fixmap.h>
20 #include <asm/pgtable.h>
21 #include <asm/tlbflush.h>
22 #include <asm/pgalloc.h>
23 #include <asm/pat.h>
24 
25 #include "physaddr.h"
26 
27 /*
28  * Fix up the linear direct mapping of the kernel to avoid cache attribute
29  * conflicts.
30  */
31 int ioremap_change_attr(unsigned long vaddr, unsigned long size,
32 			enum page_cache_mode pcm)
33 {
34 	unsigned long nrpages = size >> PAGE_SHIFT;
35 	int err;
36 
37 	switch (pcm) {
38 	case _PAGE_CACHE_MODE_UC:
39 	default:
40 		err = _set_memory_uc(vaddr, nrpages);
41 		break;
42 	case _PAGE_CACHE_MODE_WC:
43 		err = _set_memory_wc(vaddr, nrpages);
44 		break;
45 	case _PAGE_CACHE_MODE_WT:
46 		err = _set_memory_wt(vaddr, nrpages);
47 		break;
48 	case _PAGE_CACHE_MODE_WB:
49 		err = _set_memory_wb(vaddr, nrpages);
50 		break;
51 	}
52 
53 	return err;
54 }
55 
56 static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages,
57 			       void *arg)
58 {
59 	unsigned long i;
60 
61 	for (i = 0; i < nr_pages; ++i)
62 		if (pfn_valid(start_pfn + i) &&
63 		    !PageReserved(pfn_to_page(start_pfn + i)))
64 			return 1;
65 
66 	return 0;
67 }
68 
69 /*
70  * Remap an arbitrary physical address space into the kernel virtual
71  * address space. It transparently creates kernel huge I/O mapping when
72  * the physical address is aligned by a huge page size (1GB or 2MB) and
73  * the requested size is at least the huge page size.
74  *
75  * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
76  * Therefore, the mapping code falls back to use a smaller page toward 4KB
77  * when a mapping range is covered by non-WB type of MTRRs.
78  *
79  * NOTE! We need to allow non-page-aligned mappings too: we will obviously
80  * have to convert them into an offset in a page-aligned mapping, but the
81  * caller shouldn't need to know that small detail.
82  */
83 static void __iomem *__ioremap_caller(resource_size_t phys_addr,
84 		unsigned long size, enum page_cache_mode pcm, void *caller)
85 {
86 	unsigned long offset, vaddr;
87 	resource_size_t pfn, last_pfn, last_addr;
88 	const resource_size_t unaligned_phys_addr = phys_addr;
89 	const unsigned long unaligned_size = size;
90 	struct vm_struct *area;
91 	enum page_cache_mode new_pcm;
92 	pgprot_t prot;
93 	int retval;
94 	void __iomem *ret_addr;
95 
96 	/* Don't allow wraparound or zero size */
97 	last_addr = phys_addr + size - 1;
98 	if (!size || last_addr < phys_addr)
99 		return NULL;
100 
101 	if (!phys_addr_valid(phys_addr)) {
102 		printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
103 		       (unsigned long long)phys_addr);
104 		WARN_ON_ONCE(1);
105 		return NULL;
106 	}
107 
108 	/*
109 	 * Don't allow anybody to remap normal RAM that we're using..
110 	 */
111 	pfn      = phys_addr >> PAGE_SHIFT;
112 	last_pfn = last_addr >> PAGE_SHIFT;
113 	if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL,
114 					  __ioremap_check_ram) == 1) {
115 		WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
116 			  &phys_addr, &last_addr);
117 		return NULL;
118 	}
119 
120 	/*
121 	 * Mappings have to be page-aligned
122 	 */
123 	offset = phys_addr & ~PAGE_MASK;
124 	phys_addr &= PHYSICAL_PAGE_MASK;
125 	size = PAGE_ALIGN(last_addr+1) - phys_addr;
126 
127 	retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
128 						pcm, &new_pcm);
129 	if (retval) {
130 		printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
131 		return NULL;
132 	}
133 
134 	if (pcm != new_pcm) {
135 		if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
136 			printk(KERN_ERR
137 		"ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
138 				(unsigned long long)phys_addr,
139 				(unsigned long long)(phys_addr + size),
140 				pcm, new_pcm);
141 			goto err_free_memtype;
142 		}
143 		pcm = new_pcm;
144 	}
145 
146 	prot = PAGE_KERNEL_IO;
147 	switch (pcm) {
148 	case _PAGE_CACHE_MODE_UC:
149 	default:
150 		prot = __pgprot(pgprot_val(prot) |
151 				cachemode2protval(_PAGE_CACHE_MODE_UC));
152 		break;
153 	case _PAGE_CACHE_MODE_UC_MINUS:
154 		prot = __pgprot(pgprot_val(prot) |
155 				cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
156 		break;
157 	case _PAGE_CACHE_MODE_WC:
158 		prot = __pgprot(pgprot_val(prot) |
159 				cachemode2protval(_PAGE_CACHE_MODE_WC));
160 		break;
161 	case _PAGE_CACHE_MODE_WT:
162 		prot = __pgprot(pgprot_val(prot) |
163 				cachemode2protval(_PAGE_CACHE_MODE_WT));
164 		break;
165 	case _PAGE_CACHE_MODE_WB:
166 		break;
167 	}
168 
169 	/*
170 	 * Ok, go for it..
171 	 */
172 	area = get_vm_area_caller(size, VM_IOREMAP, caller);
173 	if (!area)
174 		goto err_free_memtype;
175 	area->phys_addr = phys_addr;
176 	vaddr = (unsigned long) area->addr;
177 
178 	if (kernel_map_sync_memtype(phys_addr, size, pcm))
179 		goto err_free_area;
180 
181 	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
182 		goto err_free_area;
183 
184 	ret_addr = (void __iomem *) (vaddr + offset);
185 	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
186 
187 	/*
188 	 * Check if the request spans more than any BAR in the iomem resource
189 	 * tree.
190 	 */
191 	if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
192 		pr_warn("caller %pS mapping multiple BARs\n", caller);
193 
194 	return ret_addr;
195 err_free_area:
196 	free_vm_area(area);
197 err_free_memtype:
198 	free_memtype(phys_addr, phys_addr + size);
199 	return NULL;
200 }
201 
202 /**
203  * ioremap_nocache     -   map bus memory into CPU space
204  * @phys_addr:    bus address of the memory
205  * @size:      size of the resource to map
206  *
207  * ioremap_nocache performs a platform specific sequence of operations to
208  * make bus memory CPU accessible via the readb/readw/readl/writeb/
209  * writew/writel functions and the other mmio helpers. The returned
210  * address is not guaranteed to be usable directly as a virtual
211  * address.
212  *
213  * This version of ioremap ensures that the memory is marked uncachable
214  * on the CPU as well as honouring existing caching rules from things like
215  * the PCI bus. Note that there are other caches and buffers on many
216  * busses. In particular driver authors should read up on PCI writes
217  *
218  * It's useful if some control registers are in such an area and
219  * write combining or read caching is not desirable:
220  *
221  * Must be freed with iounmap.
222  */
223 void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
224 {
225 	/*
226 	 * Ideally, this should be:
227 	 *	pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
228 	 *
229 	 * Till we fix all X drivers to use ioremap_wc(), we will use
230 	 * UC MINUS. Drivers that are certain they need or can already
231 	 * be converted over to strong UC can use ioremap_uc().
232 	 */
233 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
234 
235 	return __ioremap_caller(phys_addr, size, pcm,
236 				__builtin_return_address(0));
237 }
238 EXPORT_SYMBOL(ioremap_nocache);
239 
240 /**
241  * ioremap_uc     -   map bus memory into CPU space as strongly uncachable
242  * @phys_addr:    bus address of the memory
243  * @size:      size of the resource to map
244  *
245  * ioremap_uc performs a platform specific sequence of operations to
246  * make bus memory CPU accessible via the readb/readw/readl/writeb/
247  * writew/writel functions and the other mmio helpers. The returned
248  * address is not guaranteed to be usable directly as a virtual
249  * address.
250  *
251  * This version of ioremap ensures that the memory is marked with a strong
252  * preference as completely uncachable on the CPU when possible. For non-PAT
253  * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
254  * systems this will set the PAT entry for the pages as strong UC.  This call
255  * will honor existing caching rules from things like the PCI bus. Note that
256  * there are other caches and buffers on many busses. In particular driver
257  * authors should read up on PCI writes.
258  *
259  * It's useful if some control registers are in such an area and
260  * write combining or read caching is not desirable:
261  *
262  * Must be freed with iounmap.
263  */
264 void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
265 {
266 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
267 
268 	return __ioremap_caller(phys_addr, size, pcm,
269 				__builtin_return_address(0));
270 }
271 EXPORT_SYMBOL_GPL(ioremap_uc);
272 
273 /**
274  * ioremap_wc	-	map memory into CPU space write combined
275  * @phys_addr:	bus address of the memory
276  * @size:	size of the resource to map
277  *
278  * This version of ioremap ensures that the memory is marked write combining.
279  * Write combining allows faster writes to some hardware devices.
280  *
281  * Must be freed with iounmap.
282  */
283 void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
284 {
285 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
286 					__builtin_return_address(0));
287 }
288 EXPORT_SYMBOL(ioremap_wc);
289 
290 /**
291  * ioremap_wt	-	map memory into CPU space write through
292  * @phys_addr:	bus address of the memory
293  * @size:	size of the resource to map
294  *
295  * This version of ioremap ensures that the memory is marked write through.
296  * Write through stores data into memory while keeping the cache up-to-date.
297  *
298  * Must be freed with iounmap.
299  */
300 void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
301 {
302 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
303 					__builtin_return_address(0));
304 }
305 EXPORT_SYMBOL(ioremap_wt);
306 
307 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
308 {
309 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
310 				__builtin_return_address(0));
311 }
312 EXPORT_SYMBOL(ioremap_cache);
313 
314 void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
315 				unsigned long prot_val)
316 {
317 	return __ioremap_caller(phys_addr, size,
318 				pgprot2cachemode(__pgprot(prot_val)),
319 				__builtin_return_address(0));
320 }
321 EXPORT_SYMBOL(ioremap_prot);
322 
323 /**
324  * iounmap - Free a IO remapping
325  * @addr: virtual address from ioremap_*
326  *
327  * Caller must ensure there is only one unmapping for the same pointer.
328  */
329 void iounmap(volatile void __iomem *addr)
330 {
331 	struct vm_struct *p, *o;
332 
333 	if ((void __force *)addr <= high_memory)
334 		return;
335 
336 	/*
337 	 * The PCI/ISA range special-casing was removed from __ioremap()
338 	 * so this check, in theory, can be removed. However, there are
339 	 * cases where iounmap() is called for addresses not obtained via
340 	 * ioremap() (vga16fb for example). Add a warning so that these
341 	 * cases can be caught and fixed.
342 	 */
343 	if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
344 	    (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) {
345 		WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n");
346 		return;
347 	}
348 
349 	addr = (volatile void __iomem *)
350 		(PAGE_MASK & (unsigned long __force)addr);
351 
352 	mmiotrace_iounmap(addr);
353 
354 	/* Use the vm area unlocked, assuming the caller
355 	   ensures there isn't another iounmap for the same address
356 	   in parallel. Reuse of the virtual address is prevented by
357 	   leaving it in the global lists until we're done with it.
358 	   cpa takes care of the direct mappings. */
359 	p = find_vm_area((void __force *)addr);
360 
361 	if (!p) {
362 		printk(KERN_ERR "iounmap: bad address %p\n", addr);
363 		dump_stack();
364 		return;
365 	}
366 
367 	free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
368 
369 	/* Finally remove it */
370 	o = remove_vm_area((void __force *)addr);
371 	BUG_ON(p != o || o == NULL);
372 	kfree(p);
373 }
374 EXPORT_SYMBOL(iounmap);
375 
376 int __init arch_ioremap_pud_supported(void)
377 {
378 #ifdef CONFIG_X86_64
379 	return boot_cpu_has(X86_FEATURE_GBPAGES);
380 #else
381 	return 0;
382 #endif
383 }
384 
385 int __init arch_ioremap_pmd_supported(void)
386 {
387 	return boot_cpu_has(X86_FEATURE_PSE);
388 }
389 
390 /*
391  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
392  * access
393  */
394 void *xlate_dev_mem_ptr(phys_addr_t phys)
395 {
396 	unsigned long start  = phys &  PAGE_MASK;
397 	unsigned long offset = phys & ~PAGE_MASK;
398 	void *vaddr;
399 
400 	/* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
401 	if (page_is_ram(start >> PAGE_SHIFT))
402 		return __va(phys);
403 
404 	vaddr = ioremap_cache(start, PAGE_SIZE);
405 	/* Only add the offset on success and return NULL if the ioremap() failed: */
406 	if (vaddr)
407 		vaddr += offset;
408 
409 	return vaddr;
410 }
411 
412 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
413 {
414 	if (page_is_ram(phys >> PAGE_SHIFT))
415 		return;
416 
417 	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
418 }
419 
420 static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
421 
422 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
423 {
424 	/* Don't assume we're using swapper_pg_dir at this point */
425 	pgd_t *base = __va(read_cr3_pa());
426 	pgd_t *pgd = &base[pgd_index(addr)];
427 	p4d_t *p4d = p4d_offset(pgd, addr);
428 	pud_t *pud = pud_offset(p4d, addr);
429 	pmd_t *pmd = pmd_offset(pud, addr);
430 
431 	return pmd;
432 }
433 
434 static inline pte_t * __init early_ioremap_pte(unsigned long addr)
435 {
436 	return &bm_pte[pte_index(addr)];
437 }
438 
439 bool __init is_early_ioremap_ptep(pte_t *ptep)
440 {
441 	return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
442 }
443 
444 void __init early_ioremap_init(void)
445 {
446 	pmd_t *pmd;
447 
448 #ifdef CONFIG_X86_64
449 	BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
450 #else
451 	WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
452 #endif
453 
454 	early_ioremap_setup();
455 
456 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
457 	memset(bm_pte, 0, sizeof(bm_pte));
458 	pmd_populate_kernel(&init_mm, pmd, bm_pte);
459 
460 	/*
461 	 * The boot-ioremap range spans multiple pmds, for which
462 	 * we are not prepared:
463 	 */
464 #define __FIXADDR_TOP (-PAGE_SIZE)
465 	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
466 		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
467 #undef __FIXADDR_TOP
468 	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
469 		WARN_ON(1);
470 		printk(KERN_WARNING "pmd %p != %p\n",
471 		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
472 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
473 			fix_to_virt(FIX_BTMAP_BEGIN));
474 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
475 			fix_to_virt(FIX_BTMAP_END));
476 
477 		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
478 		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
479 		       FIX_BTMAP_BEGIN);
480 	}
481 }
482 
483 void __init __early_set_fixmap(enum fixed_addresses idx,
484 			       phys_addr_t phys, pgprot_t flags)
485 {
486 	unsigned long addr = __fix_to_virt(idx);
487 	pte_t *pte;
488 
489 	if (idx >= __end_of_fixed_addresses) {
490 		BUG();
491 		return;
492 	}
493 	pte = early_ioremap_pte(addr);
494 
495 	if (pgprot_val(flags))
496 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
497 	else
498 		pte_clear(&init_mm, addr, pte);
499 	__flush_tlb_one(addr);
500 }
501