xref: /linux/arch/x86/mm/ioremap.c (revision 384740dc49ea651ba350704d13ff6be9976e37fe)
1 /*
2  * Re-map IO memory to kernel address space so that we can access it.
3  * This is needed for high PCI addresses that aren't mapped in the
4  * 640k-1MB IO memory area on PC's
5  *
6  * (C) Copyright 1995 1996 Linus Torvalds
7  */
8 
9 #include <linux/bootmem.h>
10 #include <linux/init.h>
11 #include <linux/io.h>
12 #include <linux/module.h>
13 #include <linux/slab.h>
14 #include <linux/vmalloc.h>
15 #include <linux/mmiotrace.h>
16 
17 #include <asm/cacheflush.h>
18 #include <asm/e820.h>
19 #include <asm/fixmap.h>
20 #include <asm/pgtable.h>
21 #include <asm/tlbflush.h>
22 #include <asm/pgalloc.h>
23 #include <asm/pat.h>
24 
25 #ifdef CONFIG_X86_64
26 
27 unsigned long __phys_addr(unsigned long x)
28 {
29 	if (x >= __START_KERNEL_map)
30 		return x - __START_KERNEL_map + phys_base;
31 	return x - PAGE_OFFSET;
32 }
33 EXPORT_SYMBOL(__phys_addr);
34 
35 static inline int phys_addr_valid(unsigned long addr)
36 {
37 	return addr < (1UL << boot_cpu_data.x86_phys_bits);
38 }
39 
40 #else
41 
42 static inline int phys_addr_valid(unsigned long addr)
43 {
44 	return 1;
45 }
46 
47 #endif
48 
49 int page_is_ram(unsigned long pagenr)
50 {
51 	resource_size_t addr, end;
52 	int i;
53 
54 	/*
55 	 * A special case is the first 4Kb of memory;
56 	 * This is a BIOS owned area, not kernel ram, but generally
57 	 * not listed as such in the E820 table.
58 	 */
59 	if (pagenr == 0)
60 		return 0;
61 
62 	/*
63 	 * Second special case: Some BIOSen report the PC BIOS
64 	 * area (640->1Mb) as ram even though it is not.
65 	 */
66 	if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
67 		    pagenr < (BIOS_END >> PAGE_SHIFT))
68 		return 0;
69 
70 	for (i = 0; i < e820.nr_map; i++) {
71 		/*
72 		 * Not usable memory:
73 		 */
74 		if (e820.map[i].type != E820_RAM)
75 			continue;
76 		addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
77 		end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
78 
79 
80 		if ((pagenr >= addr) && (pagenr < end))
81 			return 1;
82 	}
83 	return 0;
84 }
85 
86 /*
87  * Fix up the linear direct mapping of the kernel to avoid cache attribute
88  * conflicts.
89  */
90 int ioremap_change_attr(unsigned long vaddr, unsigned long size,
91 			       unsigned long prot_val)
92 {
93 	unsigned long nrpages = size >> PAGE_SHIFT;
94 	int err;
95 
96 	switch (prot_val) {
97 	case _PAGE_CACHE_UC:
98 	default:
99 		err = _set_memory_uc(vaddr, nrpages);
100 		break;
101 	case _PAGE_CACHE_WC:
102 		err = _set_memory_wc(vaddr, nrpages);
103 		break;
104 	case _PAGE_CACHE_WB:
105 		err = _set_memory_wb(vaddr, nrpages);
106 		break;
107 	}
108 
109 	return err;
110 }
111 
112 /*
113  * Remap an arbitrary physical address space into the kernel virtual
114  * address space. Needed when the kernel wants to access high addresses
115  * directly.
116  *
117  * NOTE! We need to allow non-page-aligned mappings too: we will obviously
118  * have to convert them into an offset in a page-aligned mapping, but the
119  * caller shouldn't need to know that small detail.
120  */
121 static void __iomem *__ioremap_caller(resource_size_t phys_addr,
122 		unsigned long size, unsigned long prot_val, void *caller)
123 {
124 	unsigned long pfn, offset, vaddr;
125 	resource_size_t last_addr;
126 	const resource_size_t unaligned_phys_addr = phys_addr;
127 	const unsigned long unaligned_size = size;
128 	struct vm_struct *area;
129 	unsigned long new_prot_val;
130 	pgprot_t prot;
131 	int retval;
132 	void __iomem *ret_addr;
133 
134 	/* Don't allow wraparound or zero size */
135 	last_addr = phys_addr + size - 1;
136 	if (!size || last_addr < phys_addr)
137 		return NULL;
138 
139 	if (!phys_addr_valid(phys_addr)) {
140 		printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
141 		       (unsigned long long)phys_addr);
142 		WARN_ON_ONCE(1);
143 		return NULL;
144 	}
145 
146 	/*
147 	 * Don't remap the low PCI/ISA area, it's always mapped..
148 	 */
149 	if (is_ISA_range(phys_addr, last_addr))
150 		return (__force void __iomem *)phys_to_virt(phys_addr);
151 
152 	/*
153 	 * Don't allow anybody to remap normal RAM that we're using..
154 	 */
155 	for (pfn = phys_addr >> PAGE_SHIFT;
156 				(pfn << PAGE_SHIFT) < (last_addr & PAGE_MASK);
157 				pfn++) {
158 
159 		int is_ram = page_is_ram(pfn);
160 
161 		if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn)))
162 			return NULL;
163 		WARN_ON_ONCE(is_ram);
164 	}
165 
166 	/*
167 	 * Mappings have to be page-aligned
168 	 */
169 	offset = phys_addr & ~PAGE_MASK;
170 	phys_addr &= PAGE_MASK;
171 	size = PAGE_ALIGN(last_addr+1) - phys_addr;
172 
173 	retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
174 						prot_val, &new_prot_val);
175 	if (retval) {
176 		pr_debug("Warning: reserve_memtype returned %d\n", retval);
177 		return NULL;
178 	}
179 
180 	if (prot_val != new_prot_val) {
181 		/*
182 		 * Do not fallback to certain memory types with certain
183 		 * requested type:
184 		 * - request is uc-, return cannot be write-back
185 		 * - request is uc-, return cannot be write-combine
186 		 * - request is write-combine, return cannot be write-back
187 		 */
188 		if ((prot_val == _PAGE_CACHE_UC_MINUS &&
189 		     (new_prot_val == _PAGE_CACHE_WB ||
190 		      new_prot_val == _PAGE_CACHE_WC)) ||
191 		    (prot_val == _PAGE_CACHE_WC &&
192 		     new_prot_val == _PAGE_CACHE_WB)) {
193 			pr_debug(
194 		"ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
195 				(unsigned long long)phys_addr,
196 				(unsigned long long)(phys_addr + size),
197 				prot_val, new_prot_val);
198 			free_memtype(phys_addr, phys_addr + size);
199 			return NULL;
200 		}
201 		prot_val = new_prot_val;
202 	}
203 
204 	switch (prot_val) {
205 	case _PAGE_CACHE_UC:
206 	default:
207 		prot = PAGE_KERNEL_NOCACHE;
208 		break;
209 	case _PAGE_CACHE_UC_MINUS:
210 		prot = PAGE_KERNEL_UC_MINUS;
211 		break;
212 	case _PAGE_CACHE_WC:
213 		prot = PAGE_KERNEL_WC;
214 		break;
215 	case _PAGE_CACHE_WB:
216 		prot = PAGE_KERNEL;
217 		break;
218 	}
219 
220 	/*
221 	 * Ok, go for it..
222 	 */
223 	area = get_vm_area_caller(size, VM_IOREMAP, caller);
224 	if (!area)
225 		return NULL;
226 	area->phys_addr = phys_addr;
227 	vaddr = (unsigned long) area->addr;
228 	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
229 		free_memtype(phys_addr, phys_addr + size);
230 		free_vm_area(area);
231 		return NULL;
232 	}
233 
234 	if (ioremap_change_attr(vaddr, size, prot_val) < 0) {
235 		free_memtype(phys_addr, phys_addr + size);
236 		vunmap(area->addr);
237 		return NULL;
238 	}
239 
240 	ret_addr = (void __iomem *) (vaddr + offset);
241 	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
242 
243 	return ret_addr;
244 }
245 
246 /**
247  * ioremap_nocache     -   map bus memory into CPU space
248  * @offset:    bus address of the memory
249  * @size:      size of the resource to map
250  *
251  * ioremap_nocache performs a platform specific sequence of operations to
252  * make bus memory CPU accessible via the readb/readw/readl/writeb/
253  * writew/writel functions and the other mmio helpers. The returned
254  * address is not guaranteed to be usable directly as a virtual
255  * address.
256  *
257  * This version of ioremap ensures that the memory is marked uncachable
258  * on the CPU as well as honouring existing caching rules from things like
259  * the PCI bus. Note that there are other caches and buffers on many
260  * busses. In particular driver authors should read up on PCI writes
261  *
262  * It's useful if some control registers are in such an area and
263  * write combining or read caching is not desirable:
264  *
265  * Must be freed with iounmap.
266  */
267 void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
268 {
269 	/*
270 	 * Ideally, this should be:
271 	 *	pat_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
272 	 *
273 	 * Till we fix all X drivers to use ioremap_wc(), we will use
274 	 * UC MINUS.
275 	 */
276 	unsigned long val = _PAGE_CACHE_UC_MINUS;
277 
278 	return __ioremap_caller(phys_addr, size, val,
279 				__builtin_return_address(0));
280 }
281 EXPORT_SYMBOL(ioremap_nocache);
282 
283 /**
284  * ioremap_wc	-	map memory into CPU space write combined
285  * @offset:	bus address of the memory
286  * @size:	size of the resource to map
287  *
288  * This version of ioremap ensures that the memory is marked write combining.
289  * Write combining allows faster writes to some hardware devices.
290  *
291  * Must be freed with iounmap.
292  */
293 void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
294 {
295 	if (pat_enabled)
296 		return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
297 					__builtin_return_address(0));
298 	else
299 		return ioremap_nocache(phys_addr, size);
300 }
301 EXPORT_SYMBOL(ioremap_wc);
302 
303 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
304 {
305 	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WB,
306 				__builtin_return_address(0));
307 }
308 EXPORT_SYMBOL(ioremap_cache);
309 
310 static void __iomem *ioremap_default(resource_size_t phys_addr,
311 					unsigned long size)
312 {
313 	unsigned long flags;
314 	void *ret;
315 	int err;
316 
317 	/*
318 	 * - WB for WB-able memory and no other conflicting mappings
319 	 * - UC_MINUS for non-WB-able memory with no other conflicting mappings
320 	 * - Inherit from confliting mappings otherwise
321 	 */
322 	err = reserve_memtype(phys_addr, phys_addr + size, -1, &flags);
323 	if (err < 0)
324 		return NULL;
325 
326 	ret = (void *) __ioremap_caller(phys_addr, size, flags,
327 					__builtin_return_address(0));
328 
329 	free_memtype(phys_addr, phys_addr + size);
330 	return (void __iomem *)ret;
331 }
332 
333 void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
334 				unsigned long prot_val)
335 {
336 	return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK),
337 				__builtin_return_address(0));
338 }
339 EXPORT_SYMBOL(ioremap_prot);
340 
341 /**
342  * iounmap - Free a IO remapping
343  * @addr: virtual address from ioremap_*
344  *
345  * Caller must ensure there is only one unmapping for the same pointer.
346  */
347 void iounmap(volatile void __iomem *addr)
348 {
349 	struct vm_struct *p, *o;
350 
351 	if ((void __force *)addr <= high_memory)
352 		return;
353 
354 	/*
355 	 * __ioremap special-cases the PCI/ISA range by not instantiating a
356 	 * vm_area and by simply returning an address into the kernel mapping
357 	 * of ISA space.   So handle that here.
358 	 */
359 	if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
360 	    (void __force *)addr < phys_to_virt(ISA_END_ADDRESS))
361 		return;
362 
363 	addr = (volatile void __iomem *)
364 		(PAGE_MASK & (unsigned long __force)addr);
365 
366 	mmiotrace_iounmap(addr);
367 
368 	/* Use the vm area unlocked, assuming the caller
369 	   ensures there isn't another iounmap for the same address
370 	   in parallel. Reuse of the virtual address is prevented by
371 	   leaving it in the global lists until we're done with it.
372 	   cpa takes care of the direct mappings. */
373 	read_lock(&vmlist_lock);
374 	for (p = vmlist; p; p = p->next) {
375 		if (p->addr == (void __force *)addr)
376 			break;
377 	}
378 	read_unlock(&vmlist_lock);
379 
380 	if (!p) {
381 		printk(KERN_ERR "iounmap: bad address %p\n", addr);
382 		dump_stack();
383 		return;
384 	}
385 
386 	free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
387 
388 	/* Finally remove it */
389 	o = remove_vm_area((void __force *)addr);
390 	BUG_ON(p != o || o == NULL);
391 	kfree(p);
392 }
393 EXPORT_SYMBOL(iounmap);
394 
395 /*
396  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
397  * access
398  */
399 void *xlate_dev_mem_ptr(unsigned long phys)
400 {
401 	void *addr;
402 	unsigned long start = phys & PAGE_MASK;
403 
404 	/* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
405 	if (page_is_ram(start >> PAGE_SHIFT))
406 		return __va(phys);
407 
408 	addr = (void __force *)ioremap_default(start, PAGE_SIZE);
409 	if (addr)
410 		addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
411 
412 	return addr;
413 }
414 
415 void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
416 {
417 	if (page_is_ram(phys >> PAGE_SHIFT))
418 		return;
419 
420 	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
421 	return;
422 }
423 
424 static int __initdata early_ioremap_debug;
425 
426 static int __init early_ioremap_debug_setup(char *str)
427 {
428 	early_ioremap_debug = 1;
429 
430 	return 0;
431 }
432 early_param("early_ioremap_debug", early_ioremap_debug_setup);
433 
434 static __initdata int after_paging_init;
435 static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
436 
437 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
438 {
439 	/* Don't assume we're using swapper_pg_dir at this point */
440 	pgd_t *base = __va(read_cr3());
441 	pgd_t *pgd = &base[pgd_index(addr)];
442 	pud_t *pud = pud_offset(pgd, addr);
443 	pmd_t *pmd = pmd_offset(pud, addr);
444 
445 	return pmd;
446 }
447 
448 static inline pte_t * __init early_ioremap_pte(unsigned long addr)
449 {
450 	return &bm_pte[pte_index(addr)];
451 }
452 
453 void __init early_ioremap_init(void)
454 {
455 	pmd_t *pmd;
456 
457 	if (early_ioremap_debug)
458 		printk(KERN_INFO "early_ioremap_init()\n");
459 
460 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
461 	memset(bm_pte, 0, sizeof(bm_pte));
462 	pmd_populate_kernel(&init_mm, pmd, bm_pte);
463 
464 	/*
465 	 * The boot-ioremap range spans multiple pmds, for which
466 	 * we are not prepared:
467 	 */
468 	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
469 		WARN_ON(1);
470 		printk(KERN_WARNING "pmd %p != %p\n",
471 		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
472 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
473 			fix_to_virt(FIX_BTMAP_BEGIN));
474 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
475 			fix_to_virt(FIX_BTMAP_END));
476 
477 		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
478 		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
479 		       FIX_BTMAP_BEGIN);
480 	}
481 }
482 
483 void __init early_ioremap_clear(void)
484 {
485 	pmd_t *pmd;
486 
487 	if (early_ioremap_debug)
488 		printk(KERN_INFO "early_ioremap_clear()\n");
489 
490 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
491 	pmd_clear(pmd);
492 	paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT);
493 	__flush_tlb_all();
494 }
495 
496 void __init early_ioremap_reset(void)
497 {
498 	enum fixed_addresses idx;
499 	unsigned long addr, phys;
500 	pte_t *pte;
501 
502 	after_paging_init = 1;
503 	for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
504 		addr = fix_to_virt(idx);
505 		pte = early_ioremap_pte(addr);
506 		if (pte_present(*pte)) {
507 			phys = pte_val(*pte) & PAGE_MASK;
508 			set_fixmap(idx, phys);
509 		}
510 	}
511 }
512 
513 static void __init __early_set_fixmap(enum fixed_addresses idx,
514 				   unsigned long phys, pgprot_t flags)
515 {
516 	unsigned long addr = __fix_to_virt(idx);
517 	pte_t *pte;
518 
519 	if (idx >= __end_of_fixed_addresses) {
520 		BUG();
521 		return;
522 	}
523 	pte = early_ioremap_pte(addr);
524 
525 	if (pgprot_val(flags))
526 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
527 	else
528 		pte_clear(&init_mm, addr, pte);
529 	__flush_tlb_one(addr);
530 }
531 
532 static inline void __init early_set_fixmap(enum fixed_addresses idx,
533 					unsigned long phys)
534 {
535 	if (after_paging_init)
536 		set_fixmap(idx, phys);
537 	else
538 		__early_set_fixmap(idx, phys, PAGE_KERNEL);
539 }
540 
541 static inline void __init early_clear_fixmap(enum fixed_addresses idx)
542 {
543 	if (after_paging_init)
544 		clear_fixmap(idx);
545 	else
546 		__early_set_fixmap(idx, 0, __pgprot(0));
547 }
548 
549 
550 static int __initdata early_ioremap_nested;
551 
552 static int __init check_early_ioremap_leak(void)
553 {
554 	if (!early_ioremap_nested)
555 		return 0;
556 	WARN(1, KERN_WARNING
557 	       "Debug warning: early ioremap leak of %d areas detected.\n",
558 		early_ioremap_nested);
559 	printk(KERN_WARNING
560 		"please boot with early_ioremap_debug and report the dmesg.\n");
561 
562 	return 1;
563 }
564 late_initcall(check_early_ioremap_leak);
565 
566 void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
567 {
568 	unsigned long offset, last_addr;
569 	unsigned int nrpages, nesting;
570 	enum fixed_addresses idx0, idx;
571 
572 	WARN_ON(system_state != SYSTEM_BOOTING);
573 
574 	nesting = early_ioremap_nested;
575 	if (early_ioremap_debug) {
576 		printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ",
577 		       phys_addr, size, nesting);
578 		dump_stack();
579 	}
580 
581 	/* Don't allow wraparound or zero size */
582 	last_addr = phys_addr + size - 1;
583 	if (!size || last_addr < phys_addr) {
584 		WARN_ON(1);
585 		return NULL;
586 	}
587 
588 	if (nesting >= FIX_BTMAPS_NESTING) {
589 		WARN_ON(1);
590 		return NULL;
591 	}
592 	early_ioremap_nested++;
593 	/*
594 	 * Mappings have to be page-aligned
595 	 */
596 	offset = phys_addr & ~PAGE_MASK;
597 	phys_addr &= PAGE_MASK;
598 	size = PAGE_ALIGN(last_addr) - phys_addr;
599 
600 	/*
601 	 * Mappings have to fit in the FIX_BTMAP area.
602 	 */
603 	nrpages = size >> PAGE_SHIFT;
604 	if (nrpages > NR_FIX_BTMAPS) {
605 		WARN_ON(1);
606 		return NULL;
607 	}
608 
609 	/*
610 	 * Ok, go for it..
611 	 */
612 	idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting;
613 	idx = idx0;
614 	while (nrpages > 0) {
615 		early_set_fixmap(idx, phys_addr);
616 		phys_addr += PAGE_SIZE;
617 		--idx;
618 		--nrpages;
619 	}
620 	if (early_ioremap_debug)
621 		printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0));
622 
623 	return (void *) (offset + fix_to_virt(idx0));
624 }
625 
626 void __init early_iounmap(void *addr, unsigned long size)
627 {
628 	unsigned long virt_addr;
629 	unsigned long offset;
630 	unsigned int nrpages;
631 	enum fixed_addresses idx;
632 	int nesting;
633 
634 	nesting = --early_ioremap_nested;
635 	if (WARN_ON(nesting < 0))
636 		return;
637 
638 	if (early_ioremap_debug) {
639 		printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr,
640 		       size, nesting);
641 		dump_stack();
642 	}
643 
644 	virt_addr = (unsigned long)addr;
645 	if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) {
646 		WARN_ON(1);
647 		return;
648 	}
649 	offset = virt_addr & ~PAGE_MASK;
650 	nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
651 
652 	idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting;
653 	while (nrpages > 0) {
654 		early_clear_fixmap(idx);
655 		--idx;
656 		--nrpages;
657 	}
658 }
659 
660 void __this_fixmap_does_not_exist(void)
661 {
662 	WARN_ON(1);
663 }
664