1 /* 2 * linux/arch/arm/lib/uaccess_with_memcpy.c 3 * 4 * Written by: Lennert Buytenhek and Nicolas Pitre 5 * Copyright (C) 2009 Marvell Semiconductor 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/ctype.h> 14 #include <linux/uaccess.h> 15 #include <linux/rwsem.h> 16 #include <linux/mm.h> 17 #include <linux/sched.h> 18 #include <linux/hardirq.h> /* for in_atomic() */ 19 #include <linux/gfp.h> 20 #include <linux/highmem.h> 21 #include <linux/hugetlb.h> 22 #include <asm/current.h> 23 #include <asm/page.h> 24 25 static int 26 pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) 27 { 28 unsigned long addr = (unsigned long)_addr; 29 pgd_t *pgd; 30 pmd_t *pmd; 31 pte_t *pte; 32 pud_t *pud; 33 spinlock_t *ptl; 34 35 pgd = pgd_offset(current->mm, addr); 36 if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd))) 37 return 0; 38 39 pud = pud_offset(pgd, addr); 40 if (unlikely(pud_none(*pud) || pud_bad(*pud))) 41 return 0; 42 43 pmd = pmd_offset(pud, addr); 44 if (unlikely(pmd_none(*pmd))) 45 return 0; 46 47 /* 48 * A pmd can be bad if it refers to a HugeTLB or THP page. 49 * 50 * Both THP and HugeTLB pages have the same pmd layout 51 * and should not be manipulated by the pte functions. 52 * 53 * Lock the page table for the destination and check 54 * to see that it's still huge and whether or not we will 55 * need to fault on write. 56 */ 57 if (unlikely(pmd_thp_or_huge(*pmd))) { 58 ptl = ¤t->mm->page_table_lock; 59 spin_lock(ptl); 60 if (unlikely(!pmd_thp_or_huge(*pmd) 61 || pmd_hugewillfault(*pmd))) { 62 spin_unlock(ptl); 63 return 0; 64 } 65 66 *ptep = NULL; 67 *ptlp = ptl; 68 return 1; 69 } 70 71 if (unlikely(pmd_bad(*pmd))) 72 return 0; 73 74 pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); 75 if (unlikely(!pte_present(*pte) || !pte_young(*pte) || 76 !pte_write(*pte) || !pte_dirty(*pte))) { 77 pte_unmap_unlock(pte, ptl); 78 return 0; 79 } 80 81 *ptep = pte; 82 *ptlp = ptl; 83 84 return 1; 85 } 86 87 static unsigned long noinline 88 __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n) 89 { 90 unsigned long ua_flags; 91 int atomic; 92 93 if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { 94 memcpy((void *)to, from, n); 95 return 0; 96 } 97 98 /* the mmap semaphore is taken only if not in an atomic context */ 99 atomic = faulthandler_disabled(); 100 101 if (!atomic) 102 down_read(¤t->mm->mmap_sem); 103 while (n) { 104 pte_t *pte; 105 spinlock_t *ptl; 106 int tocopy; 107 108 while (!pin_page_for_write(to, &pte, &ptl)) { 109 if (!atomic) 110 up_read(¤t->mm->mmap_sem); 111 if (__put_user(0, (char __user *)to)) 112 goto out; 113 if (!atomic) 114 down_read(¤t->mm->mmap_sem); 115 } 116 117 tocopy = (~(unsigned long)to & ~PAGE_MASK) + 1; 118 if (tocopy > n) 119 tocopy = n; 120 121 ua_flags = uaccess_save_and_enable(); 122 memcpy((void *)to, from, tocopy); 123 uaccess_restore(ua_flags); 124 to += tocopy; 125 from += tocopy; 126 n -= tocopy; 127 128 if (pte) 129 pte_unmap_unlock(pte, ptl); 130 else 131 spin_unlock(ptl); 132 } 133 if (!atomic) 134 up_read(¤t->mm->mmap_sem); 135 136 out: 137 return n; 138 } 139 140 unsigned long 141 arm_copy_to_user(void __user *to, const void *from, unsigned long n) 142 { 143 /* 144 * This test is stubbed out of the main function above to keep 145 * the overhead for small copies low by avoiding a large 146 * register dump on the stack just to reload them right away. 147 * With frame pointer disabled, tail call optimization kicks in 148 * as well making this test almost invisible. 149 */ 150 if (n < 64) { 151 unsigned long ua_flags = uaccess_save_and_enable(); 152 n = __copy_to_user_std(to, from, n); 153 uaccess_restore(ua_flags); 154 } else { 155 n = __copy_to_user_memcpy(to, from, n); 156 } 157 return n; 158 } 159 160 static unsigned long noinline 161 __clear_user_memset(void __user *addr, unsigned long n) 162 { 163 unsigned long ua_flags; 164 165 if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { 166 memset((void *)addr, 0, n); 167 return 0; 168 } 169 170 down_read(¤t->mm->mmap_sem); 171 while (n) { 172 pte_t *pte; 173 spinlock_t *ptl; 174 int tocopy; 175 176 while (!pin_page_for_write(addr, &pte, &ptl)) { 177 up_read(¤t->mm->mmap_sem); 178 if (__put_user(0, (char __user *)addr)) 179 goto out; 180 down_read(¤t->mm->mmap_sem); 181 } 182 183 tocopy = (~(unsigned long)addr & ~PAGE_MASK) + 1; 184 if (tocopy > n) 185 tocopy = n; 186 187 ua_flags = uaccess_save_and_enable(); 188 memset((void *)addr, 0, tocopy); 189 uaccess_restore(ua_flags); 190 addr += tocopy; 191 n -= tocopy; 192 193 if (pte) 194 pte_unmap_unlock(pte, ptl); 195 else 196 spin_unlock(ptl); 197 } 198 up_read(¤t->mm->mmap_sem); 199 200 out: 201 return n; 202 } 203 204 unsigned long arm_clear_user(void __user *addr, unsigned long n) 205 { 206 /* See rational for this in __copy_to_user() above. */ 207 if (n < 64) { 208 unsigned long ua_flags = uaccess_save_and_enable(); 209 n = __clear_user_std(addr, n); 210 uaccess_restore(ua_flags); 211 } else { 212 n = __clear_user_memset(addr, n); 213 } 214 return n; 215 } 216 217 #if 0 218 219 /* 220 * This code is disabled by default, but kept around in case the chosen 221 * thresholds need to be revalidated. Some overhead (small but still) 222 * would be implied by a runtime determined variable threshold, and 223 * so far the measurement on concerned targets didn't show a worthwhile 224 * variation. 225 * 226 * Note that a fairly precise sched_clock() implementation is needed 227 * for results to make some sense. 228 */ 229 230 #include <linux/vmalloc.h> 231 232 static int __init test_size_treshold(void) 233 { 234 struct page *src_page, *dst_page; 235 void *user_ptr, *kernel_ptr; 236 unsigned long long t0, t1, t2; 237 int size, ret; 238 239 ret = -ENOMEM; 240 src_page = alloc_page(GFP_KERNEL); 241 if (!src_page) 242 goto no_src; 243 dst_page = alloc_page(GFP_KERNEL); 244 if (!dst_page) 245 goto no_dst; 246 kernel_ptr = page_address(src_page); 247 user_ptr = vmap(&dst_page, 1, VM_IOREMAP, __pgprot(__P010)); 248 if (!user_ptr) 249 goto no_vmap; 250 251 /* warm up the src page dcache */ 252 ret = __copy_to_user_memcpy(user_ptr, kernel_ptr, PAGE_SIZE); 253 254 for (size = PAGE_SIZE; size >= 4; size /= 2) { 255 t0 = sched_clock(); 256 ret |= __copy_to_user_memcpy(user_ptr, kernel_ptr, size); 257 t1 = sched_clock(); 258 ret |= __copy_to_user_std(user_ptr, kernel_ptr, size); 259 t2 = sched_clock(); 260 printk("copy_to_user: %d %llu %llu\n", size, t1 - t0, t2 - t1); 261 } 262 263 for (size = PAGE_SIZE; size >= 4; size /= 2) { 264 t0 = sched_clock(); 265 ret |= __clear_user_memset(user_ptr, size); 266 t1 = sched_clock(); 267 ret |= __clear_user_std(user_ptr, size); 268 t2 = sched_clock(); 269 printk("clear_user: %d %llu %llu\n", size, t1 - t0, t2 - t1); 270 } 271 272 if (ret) 273 ret = -EFAULT; 274 275 vunmap(user_ptr); 276 no_vmap: 277 put_page(dst_page); 278 no_dst: 279 put_page(src_page); 280 no_src: 281 return ret; 282 } 283 284 subsys_initcall(test_size_treshold); 285 286 #endif 287