1f8f98a93SRussell King /* 2f8f98a93SRussell King * linux/arch/arm/lib/copypage-xscale.S 3f8f98a93SRussell King * 4f8f98a93SRussell King * Copyright (C) 1995-2005 Russell King 5f8f98a93SRussell King * 6f8f98a93SRussell King * This program is free software; you can redistribute it and/or modify 7f8f98a93SRussell King * it under the terms of the GNU General Public License version 2 as 8f8f98a93SRussell King * published by the Free Software Foundation. 9f8f98a93SRussell King * 10f8f98a93SRussell King * This handles the mini data cache, as found on SA11x0 and XScale 11f8f98a93SRussell King * processors. When we copy a user page page, we map it in such a way 12f8f98a93SRussell King * that accesses to this page will not touch the main data cache, but 13f8f98a93SRussell King * will be cached in the mini data cache. This prevents us thrashing 14f8f98a93SRussell King * the main data cache on page faults. 15f8f98a93SRussell King */ 16f8f98a93SRussell King #include <linux/init.h> 17f8f98a93SRussell King #include <linux/mm.h> 18063b0a42SRussell King #include <linux/highmem.h> 19f8f98a93SRussell King 20f8f98a93SRussell King #include <asm/pgtable.h> 21f8f98a93SRussell King #include <asm/tlbflush.h> 221c9d3df5SRichard Purdie #include <asm/cacheflush.h> 23f8f98a93SRussell King 241b2e2b73SRussell King #include "mm.h" 251b2e2b73SRussell King 26f8f98a93SRussell King /* 27f8f98a93SRussell King * 0xffff8000 to 0xffffffff is reserved for any ARM architecture 28f8f98a93SRussell King * specific hacks for copying pages efficiently. 29f8f98a93SRussell King */ 30f8f98a93SRussell King #define COPYPAGE_MINICACHE 0xffff8000 31f8f98a93SRussell King 32f8f98a93SRussell King #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ 33bb30f36fSRussell King L_PTE_MT_MINICACHE) 34f8f98a93SRussell King 35f8f98a93SRussell King static DEFINE_SPINLOCK(minicache_lock); 36f8f98a93SRussell King 37f8f98a93SRussell King /* 38063b0a42SRussell King * XScale mini-dcache optimised copy_user_highpage 39f8f98a93SRussell King * 40f8f98a93SRussell King * We flush the destination cache lines just before we write the data into the 41f8f98a93SRussell King * corresponding address. Since the Dcache is read-allocate, this removes the 42f8f98a93SRussell King * Dcache aliasing issue. The writes will be forwarded to the write buffer, 43f8f98a93SRussell King * and merged as appropriate. 44f8f98a93SRussell King */ 45f8f98a93SRussell King static void __attribute__((naked)) 46f8f98a93SRussell King mc_copy_user_page(void *from, void *to) 47f8f98a93SRussell King { 48f8f98a93SRussell King /* 49f8f98a93SRussell King * Strangely enough, best performance is achieved 50f8f98a93SRussell King * when prefetching destination as well. (NP) 51f8f98a93SRussell King */ 52f8f98a93SRussell King asm volatile( 53f8f98a93SRussell King "stmfd sp!, {r4, r5, lr} \n\ 54f8f98a93SRussell King mov lr, %2 \n\ 55f8f98a93SRussell King pld [r0, #0] \n\ 56f8f98a93SRussell King pld [r0, #32] \n\ 57f8f98a93SRussell King pld [r1, #0] \n\ 58f8f98a93SRussell King pld [r1, #32] \n\ 59f8f98a93SRussell King 1: pld [r0, #64] \n\ 60f8f98a93SRussell King pld [r0, #96] \n\ 61f8f98a93SRussell King pld [r1, #64] \n\ 62f8f98a93SRussell King pld [r1, #96] \n\ 63f8f98a93SRussell King 2: ldrd r2, [r0], #8 \n\ 64f8f98a93SRussell King ldrd r4, [r0], #8 \n\ 65f8f98a93SRussell King mov ip, r1 \n\ 66f8f98a93SRussell King strd r2, [r1], #8 \n\ 67f8f98a93SRussell King ldrd r2, [r0], #8 \n\ 68f8f98a93SRussell King strd r4, [r1], #8 \n\ 69f8f98a93SRussell King ldrd r4, [r0], #8 \n\ 70f8f98a93SRussell King strd r2, [r1], #8 \n\ 71f8f98a93SRussell King strd r4, [r1], #8 \n\ 72f8f98a93SRussell King mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ 73f8f98a93SRussell King ldrd r2, [r0], #8 \n\ 74f8f98a93SRussell King mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ 75f8f98a93SRussell King ldrd r4, [r0], #8 \n\ 76f8f98a93SRussell King mov ip, r1 \n\ 77f8f98a93SRussell King strd r2, [r1], #8 \n\ 78f8f98a93SRussell King ldrd r2, [r0], #8 \n\ 79f8f98a93SRussell King strd r4, [r1], #8 \n\ 80f8f98a93SRussell King ldrd r4, [r0], #8 \n\ 81f8f98a93SRussell King strd r2, [r1], #8 \n\ 82f8f98a93SRussell King strd r4, [r1], #8 \n\ 83f8f98a93SRussell King mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ 84f8f98a93SRussell King subs lr, lr, #1 \n\ 85f8f98a93SRussell King mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ 86f8f98a93SRussell King bgt 1b \n\ 87f8f98a93SRussell King beq 2b \n\ 88f8f98a93SRussell King ldmfd sp!, {r4, r5, pc} " 89f8f98a93SRussell King : 90f8f98a93SRussell King : "r" (from), "r" (to), "I" (PAGE_SIZE / 64 - 1)); 91f8f98a93SRussell King } 92f8f98a93SRussell King 93063b0a42SRussell King void xscale_mc_copy_user_highpage(struct page *to, struct page *from, 94063b0a42SRussell King unsigned long vaddr) 95f8f98a93SRussell King { 96063b0a42SRussell King void *kto = kmap_atomic(to, KM_USER1); 971c9d3df5SRichard Purdie 98063b0a42SRussell King if (test_and_clear_bit(PG_dcache_dirty, &from->flags)) 99063b0a42SRussell King __flush_dcache_page(page_mapping(from), from); 1001c9d3df5SRichard Purdie 101f8f98a93SRussell King spin_lock(&minicache_lock); 102f8f98a93SRussell King 103063b0a42SRussell King set_pte_ext(TOP_PTE(COPYPAGE_MINICACHE), pfn_pte(page_to_pfn(from), minicache_pgprot), 0); 104f8f98a93SRussell King flush_tlb_kernel_page(COPYPAGE_MINICACHE); 105f8f98a93SRussell King 106f8f98a93SRussell King mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto); 107f8f98a93SRussell King 108f8f98a93SRussell King spin_unlock(&minicache_lock); 109063b0a42SRussell King 110063b0a42SRussell King kunmap_atomic(kto, KM_USER1); 111f8f98a93SRussell King } 112f8f98a93SRussell King 113f8f98a93SRussell King /* 114f8f98a93SRussell King * XScale optimised clear_user_page 115f8f98a93SRussell King */ 116303c6443SRussell King void 117303c6443SRussell King xscale_mc_clear_user_highpage(struct page *page, unsigned long vaddr) 118f8f98a93SRussell King { 119*43ae286bSNicolas Pitre void *ptr, *kaddr = kmap_atomic(page, KM_USER0); 120f8f98a93SRussell King asm volatile( 121*43ae286bSNicolas Pitre "mov r1, %2 \n\ 122f8f98a93SRussell King mov r2, #0 \n\ 123f8f98a93SRussell King mov r3, #0 \n\ 124303c6443SRussell King 1: mov ip, %0 \n\ 125303c6443SRussell King strd r2, [%0], #8 \n\ 126303c6443SRussell King strd r2, [%0], #8 \n\ 127303c6443SRussell King strd r2, [%0], #8 \n\ 128303c6443SRussell King strd r2, [%0], #8 \n\ 129f8f98a93SRussell King mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ 130f8f98a93SRussell King subs r1, r1, #1 \n\ 131f8f98a93SRussell King mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ 132303c6443SRussell King bne 1b" 133*43ae286bSNicolas Pitre : "=r" (ptr) 134*43ae286bSNicolas Pitre : "0" (kaddr), "I" (PAGE_SIZE / 32) 135303c6443SRussell King : "r1", "r2", "r3", "ip"); 136303c6443SRussell King kunmap_atomic(kaddr, KM_USER0); 137f8f98a93SRussell King } 138f8f98a93SRussell King 139f8f98a93SRussell King struct cpu_user_fns xscale_mc_user_fns __initdata = { 140303c6443SRussell King .cpu_clear_user_highpage = xscale_mc_clear_user_highpage, 141063b0a42SRussell King .cpu_copy_user_highpage = xscale_mc_copy_user_highpage, 142f8f98a93SRussell King }; 143