xref: /linux/arch/x86/mm/ioremap.c (revision cf2f33a4e54096f90652cca3511fd6a456ea5abe)
1 /*
2  * Re-map IO memory to kernel address space so that we can access it.
3  * This is needed for high PCI addresses that aren't mapped in the
4  * 640k-1MB IO memory area on PC's
5  *
6  * (C) Copyright 1995 1996 Linus Torvalds
7  */
8 
9 #include <linux/bootmem.h>
10 #include <linux/init.h>
11 #include <linux/io.h>
12 #include <linux/module.h>
13 #include <linux/slab.h>
14 #include <linux/vmalloc.h>
15 #include <linux/mmiotrace.h>
16 
17 #include <asm/cacheflush.h>
18 #include <asm/e820.h>
19 #include <asm/fixmap.h>
20 #include <asm/pgtable.h>
21 #include <asm/tlbflush.h>
22 #include <asm/pgalloc.h>
23 #include <asm/pat.h>
24 
25 #include "physaddr.h"
26 
27 /*
28  * Fix up the linear direct mapping of the kernel to avoid cache attribute
29  * conflicts.
30  */
31 int ioremap_change_attr(unsigned long vaddr, unsigned long size,
32 			enum page_cache_mode pcm)
33 {
34 	unsigned long nrpages = size >> PAGE_SHIFT;
35 	int err;
36 
37 	switch (pcm) {
38 	case _PAGE_CACHE_MODE_UC:
39 	default:
40 		err = _set_memory_uc(vaddr, nrpages);
41 		break;
42 	case _PAGE_CACHE_MODE_WC:
43 		err = _set_memory_wc(vaddr, nrpages);
44 		break;
45 	case _PAGE_CACHE_MODE_WT:
46 		err = _set_memory_wt(vaddr, nrpages);
47 		break;
48 	case _PAGE_CACHE_MODE_WB:
49 		err = _set_memory_wb(vaddr, nrpages);
50 		break;
51 	}
52 
53 	return err;
54 }
55 
56 static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages,
57 			       void *arg)
58 {
59 	unsigned long i;
60 
61 	for (i = 0; i < nr_pages; ++i)
62 		if (pfn_valid(start_pfn + i) &&
63 		    !PageReserved(pfn_to_page(start_pfn + i)))
64 			return 1;
65 
66 	return 0;
67 }
68 
69 /*
70  * Remap an arbitrary physical address space into the kernel virtual
71  * address space. It transparently creates kernel huge I/O mapping when
72  * the physical address is aligned by a huge page size (1GB or 2MB) and
73  * the requested size is at least the huge page size.
74  *
75  * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
76  * Therefore, the mapping code falls back to use a smaller page toward 4KB
77  * when a mapping range is covered by non-WB type of MTRRs.
78  *
79  * NOTE! We need to allow non-page-aligned mappings too: we will obviously
80  * have to convert them into an offset in a page-aligned mapping, but the
81  * caller shouldn't need to know that small detail.
82  */
83 static void __iomem *__ioremap_caller(resource_size_t phys_addr,
84 		unsigned long size, enum page_cache_mode pcm, void *caller)
85 {
86 	unsigned long offset, vaddr;
87 	resource_size_t pfn, last_pfn, last_addr;
88 	const resource_size_t unaligned_phys_addr = phys_addr;
89 	const unsigned long unaligned_size = size;
90 	struct vm_struct *area;
91 	enum page_cache_mode new_pcm;
92 	pgprot_t prot;
93 	int retval;
94 	void __iomem *ret_addr;
95 
96 	/* Don't allow wraparound or zero size */
97 	last_addr = phys_addr + size - 1;
98 	if (!size || last_addr < phys_addr)
99 		return NULL;
100 
101 	if (!phys_addr_valid(phys_addr)) {
102 		printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
103 		       (unsigned long long)phys_addr);
104 		WARN_ON_ONCE(1);
105 		return NULL;
106 	}
107 
108 	/*
109 	 * Don't remap the low PCI/ISA area, it's always mapped..
110 	 */
111 	if (is_ISA_range(phys_addr, last_addr))
112 		return (__force void __iomem *)phys_to_virt(phys_addr);
113 
114 	/*
115 	 * Don't allow anybody to remap normal RAM that we're using..
116 	 */
117 	pfn      = phys_addr >> PAGE_SHIFT;
118 	last_pfn = last_addr >> PAGE_SHIFT;
119 	if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL,
120 					  __ioremap_check_ram) == 1) {
121 		WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
122 			  &phys_addr, &last_addr);
123 		return NULL;
124 	}
125 
126 	/*
127 	 * Mappings have to be page-aligned
128 	 */
129 	offset = phys_addr & ~PAGE_MASK;
130 	phys_addr &= PHYSICAL_PAGE_MASK;
131 	size = PAGE_ALIGN(last_addr+1) - phys_addr;
132 
133 	retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
134 						pcm, &new_pcm);
135 	if (retval) {
136 		printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
137 		return NULL;
138 	}
139 
140 	if (pcm != new_pcm) {
141 		if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
142 			printk(KERN_ERR
143 		"ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
144 				(unsigned long long)phys_addr,
145 				(unsigned long long)(phys_addr + size),
146 				pcm, new_pcm);
147 			goto err_free_memtype;
148 		}
149 		pcm = new_pcm;
150 	}
151 
152 	prot = PAGE_KERNEL_IO;
153 	switch (pcm) {
154 	case _PAGE_CACHE_MODE_UC:
155 	default:
156 		prot = __pgprot(pgprot_val(prot) |
157 				cachemode2protval(_PAGE_CACHE_MODE_UC));
158 		break;
159 	case _PAGE_CACHE_MODE_UC_MINUS:
160 		prot = __pgprot(pgprot_val(prot) |
161 				cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
162 		break;
163 	case _PAGE_CACHE_MODE_WC:
164 		prot = __pgprot(pgprot_val(prot) |
165 				cachemode2protval(_PAGE_CACHE_MODE_WC));
166 		break;
167 	case _PAGE_CACHE_MODE_WT:
168 		prot = __pgprot(pgprot_val(prot) |
169 				cachemode2protval(_PAGE_CACHE_MODE_WT));
170 		break;
171 	case _PAGE_CACHE_MODE_WB:
172 		break;
173 	}
174 
175 	/*
176 	 * Ok, go for it..
177 	 */
178 	area = get_vm_area_caller(size, VM_IOREMAP, caller);
179 	if (!area)
180 		goto err_free_memtype;
181 	area->phys_addr = phys_addr;
182 	vaddr = (unsigned long) area->addr;
183 
184 	if (kernel_map_sync_memtype(phys_addr, size, pcm))
185 		goto err_free_area;
186 
187 	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
188 		goto err_free_area;
189 
190 	ret_addr = (void __iomem *) (vaddr + offset);
191 	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
192 
193 	/*
194 	 * Check if the request spans more than any BAR in the iomem resource
195 	 * tree.
196 	 */
197 	WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size),
198 		  KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
199 
200 	return ret_addr;
201 err_free_area:
202 	free_vm_area(area);
203 err_free_memtype:
204 	free_memtype(phys_addr, phys_addr + size);
205 	return NULL;
206 }
207 
208 /**
209  * ioremap_nocache     -   map bus memory into CPU space
210  * @phys_addr:    bus address of the memory
211  * @size:      size of the resource to map
212  *
213  * ioremap_nocache performs a platform specific sequence of operations to
214  * make bus memory CPU accessible via the readb/readw/readl/writeb/
215  * writew/writel functions and the other mmio helpers. The returned
216  * address is not guaranteed to be usable directly as a virtual
217  * address.
218  *
219  * This version of ioremap ensures that the memory is marked uncachable
220  * on the CPU as well as honouring existing caching rules from things like
221  * the PCI bus. Note that there are other caches and buffers on many
222  * busses. In particular driver authors should read up on PCI writes
223  *
224  * It's useful if some control registers are in such an area and
225  * write combining or read caching is not desirable:
226  *
227  * Must be freed with iounmap.
228  */
229 void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
230 {
231 	/*
232 	 * Ideally, this should be:
233 	 *	pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
234 	 *
235 	 * Till we fix all X drivers to use ioremap_wc(), we will use
236 	 * UC MINUS. Drivers that are certain they need or can already
237 	 * be converted over to strong UC can use ioremap_uc().
238 	 */
239 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
240 
241 	return __ioremap_caller(phys_addr, size, pcm,
242 				__builtin_return_address(0));
243 }
244 EXPORT_SYMBOL(ioremap_nocache);
245 
246 /**
247  * ioremap_uc     -   map bus memory into CPU space as strongly uncachable
248  * @phys_addr:    bus address of the memory
249  * @size:      size of the resource to map
250  *
251  * ioremap_uc performs a platform specific sequence of operations to
252  * make bus memory CPU accessible via the readb/readw/readl/writeb/
253  * writew/writel functions and the other mmio helpers. The returned
254  * address is not guaranteed to be usable directly as a virtual
255  * address.
256  *
257  * This version of ioremap ensures that the memory is marked with a strong
258  * preference as completely uncachable on the CPU when possible. For non-PAT
259  * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
260  * systems this will set the PAT entry for the pages as strong UC.  This call
261  * will honor existing caching rules from things like the PCI bus. Note that
262  * there are other caches and buffers on many busses. In particular driver
263  * authors should read up on PCI writes.
264  *
265  * It's useful if some control registers are in such an area and
266  * write combining or read caching is not desirable:
267  *
268  * Must be freed with iounmap.
269  */
270 void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
271 {
272 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
273 
274 	return __ioremap_caller(phys_addr, size, pcm,
275 				__builtin_return_address(0));
276 }
277 EXPORT_SYMBOL_GPL(ioremap_uc);
278 
279 /**
280  * ioremap_wc	-	map memory into CPU space write combined
281  * @phys_addr:	bus address of the memory
282  * @size:	size of the resource to map
283  *
284  * This version of ioremap ensures that the memory is marked write combining.
285  * Write combining allows faster writes to some hardware devices.
286  *
287  * Must be freed with iounmap.
288  */
289 void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
290 {
291 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
292 					__builtin_return_address(0));
293 }
294 EXPORT_SYMBOL(ioremap_wc);
295 
296 /**
297  * ioremap_wt	-	map memory into CPU space write through
298  * @phys_addr:	bus address of the memory
299  * @size:	size of the resource to map
300  *
301  * This version of ioremap ensures that the memory is marked write through.
302  * Write through stores data into memory while keeping the cache up-to-date.
303  *
304  * Must be freed with iounmap.
305  */
306 void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
307 {
308 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
309 					__builtin_return_address(0));
310 }
311 EXPORT_SYMBOL(ioremap_wt);
312 
313 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
314 {
315 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
316 				__builtin_return_address(0));
317 }
318 EXPORT_SYMBOL(ioremap_cache);
319 
320 void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
321 				unsigned long prot_val)
322 {
323 	return __ioremap_caller(phys_addr, size,
324 				pgprot2cachemode(__pgprot(prot_val)),
325 				__builtin_return_address(0));
326 }
327 EXPORT_SYMBOL(ioremap_prot);
328 
329 /**
330  * iounmap - Free a IO remapping
331  * @addr: virtual address from ioremap_*
332  *
333  * Caller must ensure there is only one unmapping for the same pointer.
334  */
335 void iounmap(volatile void __iomem *addr)
336 {
337 	struct vm_struct *p, *o;
338 
339 	if ((void __force *)addr <= high_memory)
340 		return;
341 
342 	/*
343 	 * __ioremap special-cases the PCI/ISA range by not instantiating a
344 	 * vm_area and by simply returning an address into the kernel mapping
345 	 * of ISA space.   So handle that here.
346 	 */
347 	if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
348 	    (void __force *)addr < phys_to_virt(ISA_END_ADDRESS))
349 		return;
350 
351 	addr = (volatile void __iomem *)
352 		(PAGE_MASK & (unsigned long __force)addr);
353 
354 	mmiotrace_iounmap(addr);
355 
356 	/* Use the vm area unlocked, assuming the caller
357 	   ensures there isn't another iounmap for the same address
358 	   in parallel. Reuse of the virtual address is prevented by
359 	   leaving it in the global lists until we're done with it.
360 	   cpa takes care of the direct mappings. */
361 	p = find_vm_area((void __force *)addr);
362 
363 	if (!p) {
364 		printk(KERN_ERR "iounmap: bad address %p\n", addr);
365 		dump_stack();
366 		return;
367 	}
368 
369 	free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
370 
371 	/* Finally remove it */
372 	o = remove_vm_area((void __force *)addr);
373 	BUG_ON(p != o || o == NULL);
374 	kfree(p);
375 }
376 EXPORT_SYMBOL(iounmap);
377 
378 int __init arch_ioremap_pud_supported(void)
379 {
380 #ifdef CONFIG_X86_64
381 	return cpu_has_gbpages;
382 #else
383 	return 0;
384 #endif
385 }
386 
387 int __init arch_ioremap_pmd_supported(void)
388 {
389 	return cpu_has_pse;
390 }
391 
392 /*
393  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
394  * access
395  */
396 void *xlate_dev_mem_ptr(phys_addr_t phys)
397 {
398 	unsigned long start  = phys &  PAGE_MASK;
399 	unsigned long offset = phys & ~PAGE_MASK;
400 	void *vaddr;
401 
402 	/* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
403 	if (page_is_ram(start >> PAGE_SHIFT))
404 		return __va(phys);
405 
406 	vaddr = ioremap_cache(start, PAGE_SIZE);
407 	/* Only add the offset on success and return NULL if the ioremap() failed: */
408 	if (vaddr)
409 		vaddr += offset;
410 
411 	return vaddr;
412 }
413 
414 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
415 {
416 	if (page_is_ram(phys >> PAGE_SHIFT))
417 		return;
418 
419 	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
420 }
421 
422 static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
423 
424 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
425 {
426 	/* Don't assume we're using swapper_pg_dir at this point */
427 	pgd_t *base = __va(read_cr3());
428 	pgd_t *pgd = &base[pgd_index(addr)];
429 	pud_t *pud = pud_offset(pgd, addr);
430 	pmd_t *pmd = pmd_offset(pud, addr);
431 
432 	return pmd;
433 }
434 
435 static inline pte_t * __init early_ioremap_pte(unsigned long addr)
436 {
437 	return &bm_pte[pte_index(addr)];
438 }
439 
440 bool __init is_early_ioremap_ptep(pte_t *ptep)
441 {
442 	return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
443 }
444 
445 void __init early_ioremap_init(void)
446 {
447 	pmd_t *pmd;
448 
449 #ifdef CONFIG_X86_64
450 	BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
451 #else
452 	WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
453 #endif
454 
455 	early_ioremap_setup();
456 
457 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
458 	memset(bm_pte, 0, sizeof(bm_pte));
459 	pmd_populate_kernel(&init_mm, pmd, bm_pte);
460 
461 	/*
462 	 * The boot-ioremap range spans multiple pmds, for which
463 	 * we are not prepared:
464 	 */
465 #define __FIXADDR_TOP (-PAGE_SIZE)
466 	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
467 		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
468 #undef __FIXADDR_TOP
469 	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
470 		WARN_ON(1);
471 		printk(KERN_WARNING "pmd %p != %p\n",
472 		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
473 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
474 			fix_to_virt(FIX_BTMAP_BEGIN));
475 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
476 			fix_to_virt(FIX_BTMAP_END));
477 
478 		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
479 		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
480 		       FIX_BTMAP_BEGIN);
481 	}
482 }
483 
484 void __init __early_set_fixmap(enum fixed_addresses idx,
485 			       phys_addr_t phys, pgprot_t flags)
486 {
487 	unsigned long addr = __fix_to_virt(idx);
488 	pte_t *pte;
489 
490 	if (idx >= __end_of_fixed_addresses) {
491 		BUG();
492 		return;
493 	}
494 	pte = early_ioremap_pte(addr);
495 
496 	if (pgprot_val(flags))
497 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
498 	else
499 		pte_clear(&init_mm, addr, pte);
500 	__flush_tlb_one(addr);
501 }
502