xref: /linux/arch/x86/mm/ioremap.c (revision 005438a8eef063495ac059d128eea71b58de50e5)
1 /*
2  * Re-map IO memory to kernel address space so that we can access it.
3  * This is needed for high PCI addresses that aren't mapped in the
4  * 640k-1MB IO memory area on PC's
5  *
6  * (C) Copyright 1995 1996 Linus Torvalds
7  */
8 
9 #include <linux/bootmem.h>
10 #include <linux/init.h>
11 #include <linux/io.h>
12 #include <linux/module.h>
13 #include <linux/slab.h>
14 #include <linux/vmalloc.h>
15 #include <linux/mmiotrace.h>
16 
17 #include <asm/cacheflush.h>
18 #include <asm/e820.h>
19 #include <asm/fixmap.h>
20 #include <asm/pgtable.h>
21 #include <asm/tlbflush.h>
22 #include <asm/pgalloc.h>
23 #include <asm/pat.h>
24 
25 #include "physaddr.h"
26 
27 /*
28  * Fix up the linear direct mapping of the kernel to avoid cache attribute
29  * conflicts.
30  */
31 int ioremap_change_attr(unsigned long vaddr, unsigned long size,
32 			enum page_cache_mode pcm)
33 {
34 	unsigned long nrpages = size >> PAGE_SHIFT;
35 	int err;
36 
37 	switch (pcm) {
38 	case _PAGE_CACHE_MODE_UC:
39 	default:
40 		err = _set_memory_uc(vaddr, nrpages);
41 		break;
42 	case _PAGE_CACHE_MODE_WC:
43 		err = _set_memory_wc(vaddr, nrpages);
44 		break;
45 	case _PAGE_CACHE_MODE_WT:
46 		err = _set_memory_wt(vaddr, nrpages);
47 		break;
48 	case _PAGE_CACHE_MODE_WB:
49 		err = _set_memory_wb(vaddr, nrpages);
50 		break;
51 	}
52 
53 	return err;
54 }
55 
56 static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages,
57 			       void *arg)
58 {
59 	unsigned long i;
60 
61 	for (i = 0; i < nr_pages; ++i)
62 		if (pfn_valid(start_pfn + i) &&
63 		    !PageReserved(pfn_to_page(start_pfn + i)))
64 			return 1;
65 
66 	WARN_ONCE(1, "ioremap on RAM pfn 0x%lx\n", start_pfn);
67 
68 	return 0;
69 }
70 
71 /*
72  * Remap an arbitrary physical address space into the kernel virtual
73  * address space. It transparently creates kernel huge I/O mapping when
74  * the physical address is aligned by a huge page size (1GB or 2MB) and
75  * the requested size is at least the huge page size.
76  *
77  * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
78  * Therefore, the mapping code falls back to use a smaller page toward 4KB
79  * when a mapping range is covered by non-WB type of MTRRs.
80  *
81  * NOTE! We need to allow non-page-aligned mappings too: we will obviously
82  * have to convert them into an offset in a page-aligned mapping, but the
83  * caller shouldn't need to know that small detail.
84  */
85 static void __iomem *__ioremap_caller(resource_size_t phys_addr,
86 		unsigned long size, enum page_cache_mode pcm, void *caller)
87 {
88 	unsigned long offset, vaddr;
89 	resource_size_t pfn, last_pfn, last_addr;
90 	const resource_size_t unaligned_phys_addr = phys_addr;
91 	const unsigned long unaligned_size = size;
92 	struct vm_struct *area;
93 	enum page_cache_mode new_pcm;
94 	pgprot_t prot;
95 	int retval;
96 	void __iomem *ret_addr;
97 	int ram_region;
98 
99 	/* Don't allow wraparound or zero size */
100 	last_addr = phys_addr + size - 1;
101 	if (!size || last_addr < phys_addr)
102 		return NULL;
103 
104 	if (!phys_addr_valid(phys_addr)) {
105 		printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
106 		       (unsigned long long)phys_addr);
107 		WARN_ON_ONCE(1);
108 		return NULL;
109 	}
110 
111 	/*
112 	 * Don't remap the low PCI/ISA area, it's always mapped..
113 	 */
114 	if (is_ISA_range(phys_addr, last_addr))
115 		return (__force void __iomem *)phys_to_virt(phys_addr);
116 
117 	/*
118 	 * Don't allow anybody to remap normal RAM that we're using..
119 	 */
120 	/* First check if whole region can be identified as RAM or not */
121 	ram_region = region_is_ram(phys_addr, size);
122 	if (ram_region > 0) {
123 		WARN_ONCE(1, "ioremap on RAM at 0x%lx - 0x%lx\n",
124 				(unsigned long int)phys_addr,
125 				(unsigned long int)last_addr);
126 		return NULL;
127 	}
128 
129 	/* If could not be identified(-1), check page by page */
130 	if (ram_region < 0) {
131 		pfn      = phys_addr >> PAGE_SHIFT;
132 		last_pfn = last_addr >> PAGE_SHIFT;
133 		if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL,
134 					  __ioremap_check_ram) == 1)
135 			return NULL;
136 	}
137 	/*
138 	 * Mappings have to be page-aligned
139 	 */
140 	offset = phys_addr & ~PAGE_MASK;
141 	phys_addr &= PHYSICAL_PAGE_MASK;
142 	size = PAGE_ALIGN(last_addr+1) - phys_addr;
143 
144 	retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
145 						pcm, &new_pcm);
146 	if (retval) {
147 		printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
148 		return NULL;
149 	}
150 
151 	if (pcm != new_pcm) {
152 		if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
153 			printk(KERN_ERR
154 		"ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
155 				(unsigned long long)phys_addr,
156 				(unsigned long long)(phys_addr + size),
157 				pcm, new_pcm);
158 			goto err_free_memtype;
159 		}
160 		pcm = new_pcm;
161 	}
162 
163 	prot = PAGE_KERNEL_IO;
164 	switch (pcm) {
165 	case _PAGE_CACHE_MODE_UC:
166 	default:
167 		prot = __pgprot(pgprot_val(prot) |
168 				cachemode2protval(_PAGE_CACHE_MODE_UC));
169 		break;
170 	case _PAGE_CACHE_MODE_UC_MINUS:
171 		prot = __pgprot(pgprot_val(prot) |
172 				cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
173 		break;
174 	case _PAGE_CACHE_MODE_WC:
175 		prot = __pgprot(pgprot_val(prot) |
176 				cachemode2protval(_PAGE_CACHE_MODE_WC));
177 		break;
178 	case _PAGE_CACHE_MODE_WT:
179 		prot = __pgprot(pgprot_val(prot) |
180 				cachemode2protval(_PAGE_CACHE_MODE_WT));
181 		break;
182 	case _PAGE_CACHE_MODE_WB:
183 		break;
184 	}
185 
186 	/*
187 	 * Ok, go for it..
188 	 */
189 	area = get_vm_area_caller(size, VM_IOREMAP, caller);
190 	if (!area)
191 		goto err_free_memtype;
192 	area->phys_addr = phys_addr;
193 	vaddr = (unsigned long) area->addr;
194 
195 	if (kernel_map_sync_memtype(phys_addr, size, pcm))
196 		goto err_free_area;
197 
198 	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
199 		goto err_free_area;
200 
201 	ret_addr = (void __iomem *) (vaddr + offset);
202 	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
203 
204 	/*
205 	 * Check if the request spans more than any BAR in the iomem resource
206 	 * tree.
207 	 */
208 	WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size),
209 		  KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
210 
211 	return ret_addr;
212 err_free_area:
213 	free_vm_area(area);
214 err_free_memtype:
215 	free_memtype(phys_addr, phys_addr + size);
216 	return NULL;
217 }
218 
219 /**
220  * ioremap_nocache     -   map bus memory into CPU space
221  * @phys_addr:    bus address of the memory
222  * @size:      size of the resource to map
223  *
224  * ioremap_nocache performs a platform specific sequence of operations to
225  * make bus memory CPU accessible via the readb/readw/readl/writeb/
226  * writew/writel functions and the other mmio helpers. The returned
227  * address is not guaranteed to be usable directly as a virtual
228  * address.
229  *
230  * This version of ioremap ensures that the memory is marked uncachable
231  * on the CPU as well as honouring existing caching rules from things like
232  * the PCI bus. Note that there are other caches and buffers on many
233  * busses. In particular driver authors should read up on PCI writes
234  *
235  * It's useful if some control registers are in such an area and
236  * write combining or read caching is not desirable:
237  *
238  * Must be freed with iounmap.
239  */
240 void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
241 {
242 	/*
243 	 * Ideally, this should be:
244 	 *	pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
245 	 *
246 	 * Till we fix all X drivers to use ioremap_wc(), we will use
247 	 * UC MINUS. Drivers that are certain they need or can already
248 	 * be converted over to strong UC can use ioremap_uc().
249 	 */
250 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
251 
252 	return __ioremap_caller(phys_addr, size, pcm,
253 				__builtin_return_address(0));
254 }
255 EXPORT_SYMBOL(ioremap_nocache);
256 
257 /**
258  * ioremap_uc     -   map bus memory into CPU space as strongly uncachable
259  * @phys_addr:    bus address of the memory
260  * @size:      size of the resource to map
261  *
262  * ioremap_uc performs a platform specific sequence of operations to
263  * make bus memory CPU accessible via the readb/readw/readl/writeb/
264  * writew/writel functions and the other mmio helpers. The returned
265  * address is not guaranteed to be usable directly as a virtual
266  * address.
267  *
268  * This version of ioremap ensures that the memory is marked with a strong
269  * preference as completely uncachable on the CPU when possible. For non-PAT
270  * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
271  * systems this will set the PAT entry for the pages as strong UC.  This call
272  * will honor existing caching rules from things like the PCI bus. Note that
273  * there are other caches and buffers on many busses. In particular driver
274  * authors should read up on PCI writes.
275  *
276  * It's useful if some control registers are in such an area and
277  * write combining or read caching is not desirable:
278  *
279  * Must be freed with iounmap.
280  */
281 void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
282 {
283 	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
284 
285 	return __ioremap_caller(phys_addr, size, pcm,
286 				__builtin_return_address(0));
287 }
288 EXPORT_SYMBOL_GPL(ioremap_uc);
289 
290 /**
291  * ioremap_wc	-	map memory into CPU space write combined
292  * @phys_addr:	bus address of the memory
293  * @size:	size of the resource to map
294  *
295  * This version of ioremap ensures that the memory is marked write combining.
296  * Write combining allows faster writes to some hardware devices.
297  *
298  * Must be freed with iounmap.
299  */
300 void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
301 {
302 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
303 					__builtin_return_address(0));
304 }
305 EXPORT_SYMBOL(ioremap_wc);
306 
307 /**
308  * ioremap_wt	-	map memory into CPU space write through
309  * @phys_addr:	bus address of the memory
310  * @size:	size of the resource to map
311  *
312  * This version of ioremap ensures that the memory is marked write through.
313  * Write through stores data into memory while keeping the cache up-to-date.
314  *
315  * Must be freed with iounmap.
316  */
317 void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
318 {
319 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
320 					__builtin_return_address(0));
321 }
322 EXPORT_SYMBOL(ioremap_wt);
323 
324 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
325 {
326 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
327 				__builtin_return_address(0));
328 }
329 EXPORT_SYMBOL(ioremap_cache);
330 
331 void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
332 				unsigned long prot_val)
333 {
334 	return __ioremap_caller(phys_addr, size,
335 				pgprot2cachemode(__pgprot(prot_val)),
336 				__builtin_return_address(0));
337 }
338 EXPORT_SYMBOL(ioremap_prot);
339 
340 /**
341  * iounmap - Free a IO remapping
342  * @addr: virtual address from ioremap_*
343  *
344  * Caller must ensure there is only one unmapping for the same pointer.
345  */
346 void iounmap(volatile void __iomem *addr)
347 {
348 	struct vm_struct *p, *o;
349 
350 	if ((void __force *)addr <= high_memory)
351 		return;
352 
353 	/*
354 	 * __ioremap special-cases the PCI/ISA range by not instantiating a
355 	 * vm_area and by simply returning an address into the kernel mapping
356 	 * of ISA space.   So handle that here.
357 	 */
358 	if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
359 	    (void __force *)addr < phys_to_virt(ISA_END_ADDRESS))
360 		return;
361 
362 	addr = (volatile void __iomem *)
363 		(PAGE_MASK & (unsigned long __force)addr);
364 
365 	mmiotrace_iounmap(addr);
366 
367 	/* Use the vm area unlocked, assuming the caller
368 	   ensures there isn't another iounmap for the same address
369 	   in parallel. Reuse of the virtual address is prevented by
370 	   leaving it in the global lists until we're done with it.
371 	   cpa takes care of the direct mappings. */
372 	p = find_vm_area((void __force *)addr);
373 
374 	if (!p) {
375 		printk(KERN_ERR "iounmap: bad address %p\n", addr);
376 		dump_stack();
377 		return;
378 	}
379 
380 	free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
381 
382 	/* Finally remove it */
383 	o = remove_vm_area((void __force *)addr);
384 	BUG_ON(p != o || o == NULL);
385 	kfree(p);
386 }
387 EXPORT_SYMBOL(iounmap);
388 
389 int __init arch_ioremap_pud_supported(void)
390 {
391 #ifdef CONFIG_X86_64
392 	return cpu_has_gbpages;
393 #else
394 	return 0;
395 #endif
396 }
397 
398 int __init arch_ioremap_pmd_supported(void)
399 {
400 	return cpu_has_pse;
401 }
402 
403 /*
404  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
405  * access
406  */
407 void *xlate_dev_mem_ptr(phys_addr_t phys)
408 {
409 	unsigned long start  = phys &  PAGE_MASK;
410 	unsigned long offset = phys & ~PAGE_MASK;
411 	void *vaddr;
412 
413 	/* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
414 	if (page_is_ram(start >> PAGE_SHIFT))
415 		return __va(phys);
416 
417 	vaddr = ioremap_cache(start, PAGE_SIZE);
418 	/* Only add the offset on success and return NULL if the ioremap() failed: */
419 	if (vaddr)
420 		vaddr += offset;
421 
422 	return vaddr;
423 }
424 
425 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
426 {
427 	if (page_is_ram(phys >> PAGE_SHIFT))
428 		return;
429 
430 	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
431 }
432 
433 static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
434 
435 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
436 {
437 	/* Don't assume we're using swapper_pg_dir at this point */
438 	pgd_t *base = __va(read_cr3());
439 	pgd_t *pgd = &base[pgd_index(addr)];
440 	pud_t *pud = pud_offset(pgd, addr);
441 	pmd_t *pmd = pmd_offset(pud, addr);
442 
443 	return pmd;
444 }
445 
446 static inline pte_t * __init early_ioremap_pte(unsigned long addr)
447 {
448 	return &bm_pte[pte_index(addr)];
449 }
450 
451 bool __init is_early_ioremap_ptep(pte_t *ptep)
452 {
453 	return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
454 }
455 
456 void __init early_ioremap_init(void)
457 {
458 	pmd_t *pmd;
459 
460 #ifdef CONFIG_X86_64
461 	BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
462 #else
463 	WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
464 #endif
465 
466 	early_ioremap_setup();
467 
468 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
469 	memset(bm_pte, 0, sizeof(bm_pte));
470 	pmd_populate_kernel(&init_mm, pmd, bm_pte);
471 
472 	/*
473 	 * The boot-ioremap range spans multiple pmds, for which
474 	 * we are not prepared:
475 	 */
476 #define __FIXADDR_TOP (-PAGE_SIZE)
477 	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
478 		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
479 #undef __FIXADDR_TOP
480 	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
481 		WARN_ON(1);
482 		printk(KERN_WARNING "pmd %p != %p\n",
483 		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
484 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
485 			fix_to_virt(FIX_BTMAP_BEGIN));
486 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
487 			fix_to_virt(FIX_BTMAP_END));
488 
489 		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
490 		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
491 		       FIX_BTMAP_BEGIN);
492 	}
493 }
494 
495 void __init __early_set_fixmap(enum fixed_addresses idx,
496 			       phys_addr_t phys, pgprot_t flags)
497 {
498 	unsigned long addr = __fix_to_virt(idx);
499 	pte_t *pte;
500 
501 	if (idx >= __end_of_fixed_addresses) {
502 		BUG();
503 		return;
504 	}
505 	pte = early_ioremap_pte(addr);
506 
507 	if (pgprot_val(flags))
508 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
509 	else
510 		pte_clear(&init_mm, addr, pte);
511 	__flush_tlb_one(addr);
512 }
513