1 #include <linux/mm.h> 2 #include <linux/slab.h> 3 #include <linux/string.h> 4 #include <linux/export.h> 5 #include <linux/err.h> 6 #include <linux/sched.h> 7 #include <linux/security.h> 8 #include <linux/swap.h> 9 #include <linux/swapops.h> 10 #include <linux/mman.h> 11 #include <linux/hugetlb.h> 12 13 #include <asm/uaccess.h> 14 15 #include "internal.h" 16 17 #define CREATE_TRACE_POINTS 18 #include <trace/events/kmem.h> 19 20 /** 21 * kstrdup - allocate space for and copy an existing string 22 * @s: the string to duplicate 23 * @gfp: the GFP mask used in the kmalloc() call when allocating memory 24 */ 25 char *kstrdup(const char *s, gfp_t gfp) 26 { 27 size_t len; 28 char *buf; 29 30 if (!s) 31 return NULL; 32 33 len = strlen(s) + 1; 34 buf = kmalloc_track_caller(len, gfp); 35 if (buf) 36 memcpy(buf, s, len); 37 return buf; 38 } 39 EXPORT_SYMBOL(kstrdup); 40 41 /** 42 * kstrndup - allocate space for and copy an existing string 43 * @s: the string to duplicate 44 * @max: read at most @max chars from @s 45 * @gfp: the GFP mask used in the kmalloc() call when allocating memory 46 */ 47 char *kstrndup(const char *s, size_t max, gfp_t gfp) 48 { 49 size_t len; 50 char *buf; 51 52 if (!s) 53 return NULL; 54 55 len = strnlen(s, max); 56 buf = kmalloc_track_caller(len+1, gfp); 57 if (buf) { 58 memcpy(buf, s, len); 59 buf[len] = '\0'; 60 } 61 return buf; 62 } 63 EXPORT_SYMBOL(kstrndup); 64 65 /** 66 * kmemdup - duplicate region of memory 67 * 68 * @src: memory region to duplicate 69 * @len: memory region length 70 * @gfp: GFP mask to use 71 */ 72 void *kmemdup(const void *src, size_t len, gfp_t gfp) 73 { 74 void *p; 75 76 p = kmalloc_track_caller(len, gfp); 77 if (p) 78 memcpy(p, src, len); 79 return p; 80 } 81 EXPORT_SYMBOL(kmemdup); 82 83 /** 84 * memdup_user - duplicate memory region from user space 85 * 86 * @src: source address in user space 87 * @len: number of bytes to copy 88 * 89 * Returns an ERR_PTR() on failure. 90 */ 91 void *memdup_user(const void __user *src, size_t len) 92 { 93 void *p; 94 95 /* 96 * Always use GFP_KERNEL, since copy_from_user() can sleep and 97 * cause pagefault, which makes it pointless to use GFP_NOFS 98 * or GFP_ATOMIC. 99 */ 100 p = kmalloc_track_caller(len, GFP_KERNEL); 101 if (!p) 102 return ERR_PTR(-ENOMEM); 103 104 if (copy_from_user(p, src, len)) { 105 kfree(p); 106 return ERR_PTR(-EFAULT); 107 } 108 109 return p; 110 } 111 EXPORT_SYMBOL(memdup_user); 112 113 static __always_inline void *__do_krealloc(const void *p, size_t new_size, 114 gfp_t flags) 115 { 116 void *ret; 117 size_t ks = 0; 118 119 if (p) 120 ks = ksize(p); 121 122 if (ks >= new_size) 123 return (void *)p; 124 125 ret = kmalloc_track_caller(new_size, flags); 126 if (ret && p) 127 memcpy(ret, p, ks); 128 129 return ret; 130 } 131 132 /** 133 * __krealloc - like krealloc() but don't free @p. 134 * @p: object to reallocate memory for. 135 * @new_size: how many bytes of memory are required. 136 * @flags: the type of memory to allocate. 137 * 138 * This function is like krealloc() except it never frees the originally 139 * allocated buffer. Use this if you don't want to free the buffer immediately 140 * like, for example, with RCU. 141 */ 142 void *__krealloc(const void *p, size_t new_size, gfp_t flags) 143 { 144 if (unlikely(!new_size)) 145 return ZERO_SIZE_PTR; 146 147 return __do_krealloc(p, new_size, flags); 148 149 } 150 EXPORT_SYMBOL(__krealloc); 151 152 /** 153 * krealloc - reallocate memory. The contents will remain unchanged. 154 * @p: object to reallocate memory for. 155 * @new_size: how many bytes of memory are required. 156 * @flags: the type of memory to allocate. 157 * 158 * The contents of the object pointed to are preserved up to the 159 * lesser of the new and old sizes. If @p is %NULL, krealloc() 160 * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a 161 * %NULL pointer, the object pointed to is freed. 162 */ 163 void *krealloc(const void *p, size_t new_size, gfp_t flags) 164 { 165 void *ret; 166 167 if (unlikely(!new_size)) { 168 kfree(p); 169 return ZERO_SIZE_PTR; 170 } 171 172 ret = __do_krealloc(p, new_size, flags); 173 if (ret && p != ret) 174 kfree(p); 175 176 return ret; 177 } 178 EXPORT_SYMBOL(krealloc); 179 180 /** 181 * kzfree - like kfree but zero memory 182 * @p: object to free memory of 183 * 184 * The memory of the object @p points to is zeroed before freed. 185 * If @p is %NULL, kzfree() does nothing. 186 * 187 * Note: this function zeroes the whole allocated buffer which can be a good 188 * deal bigger than the requested buffer size passed to kmalloc(). So be 189 * careful when using this function in performance sensitive code. 190 */ 191 void kzfree(const void *p) 192 { 193 size_t ks; 194 void *mem = (void *)p; 195 196 if (unlikely(ZERO_OR_NULL_PTR(mem))) 197 return; 198 ks = ksize(mem); 199 memset(mem, 0, ks); 200 kfree(mem); 201 } 202 EXPORT_SYMBOL(kzfree); 203 204 /* 205 * strndup_user - duplicate an existing string from user space 206 * @s: The string to duplicate 207 * @n: Maximum number of bytes to copy, including the trailing NUL. 208 */ 209 char *strndup_user(const char __user *s, long n) 210 { 211 char *p; 212 long length; 213 214 length = strnlen_user(s, n); 215 216 if (!length) 217 return ERR_PTR(-EFAULT); 218 219 if (length > n) 220 return ERR_PTR(-EINVAL); 221 222 p = memdup_user(s, length); 223 224 if (IS_ERR(p)) 225 return p; 226 227 p[length - 1] = '\0'; 228 229 return p; 230 } 231 EXPORT_SYMBOL(strndup_user); 232 233 void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, 234 struct vm_area_struct *prev, struct rb_node *rb_parent) 235 { 236 struct vm_area_struct *next; 237 238 vma->vm_prev = prev; 239 if (prev) { 240 next = prev->vm_next; 241 prev->vm_next = vma; 242 } else { 243 mm->mmap = vma; 244 if (rb_parent) 245 next = rb_entry(rb_parent, 246 struct vm_area_struct, vm_rb); 247 else 248 next = NULL; 249 } 250 vma->vm_next = next; 251 if (next) 252 next->vm_prev = vma; 253 } 254 255 /* Check if the vma is being used as a stack by this task */ 256 static int vm_is_stack_for_task(struct task_struct *t, 257 struct vm_area_struct *vma) 258 { 259 return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t)); 260 } 261 262 /* 263 * Check if the vma is being used as a stack. 264 * If is_group is non-zero, check in the entire thread group or else 265 * just check in the current task. Returns the pid of the task that 266 * the vma is stack for. 267 */ 268 pid_t vm_is_stack(struct task_struct *task, 269 struct vm_area_struct *vma, int in_group) 270 { 271 pid_t ret = 0; 272 273 if (vm_is_stack_for_task(task, vma)) 274 return task->pid; 275 276 if (in_group) { 277 struct task_struct *t; 278 rcu_read_lock(); 279 if (!pid_alive(task)) 280 goto done; 281 282 t = task; 283 do { 284 if (vm_is_stack_for_task(t, vma)) { 285 ret = t->pid; 286 goto done; 287 } 288 } while_each_thread(task, t); 289 done: 290 rcu_read_unlock(); 291 } 292 293 return ret; 294 } 295 296 #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT) 297 void arch_pick_mmap_layout(struct mm_struct *mm) 298 { 299 mm->mmap_base = TASK_UNMAPPED_BASE; 300 mm->get_unmapped_area = arch_get_unmapped_area; 301 } 302 #endif 303 304 /* 305 * Like get_user_pages_fast() except its IRQ-safe in that it won't fall 306 * back to the regular GUP. 307 * If the architecture not support this function, simply return with no 308 * page pinned 309 */ 310 int __attribute__((weak)) __get_user_pages_fast(unsigned long start, 311 int nr_pages, int write, struct page **pages) 312 { 313 return 0; 314 } 315 EXPORT_SYMBOL_GPL(__get_user_pages_fast); 316 317 /** 318 * get_user_pages_fast() - pin user pages in memory 319 * @start: starting user address 320 * @nr_pages: number of pages from start to pin 321 * @write: whether pages will be written to 322 * @pages: array that receives pointers to the pages pinned. 323 * Should be at least nr_pages long. 324 * 325 * Returns number of pages pinned. This may be fewer than the number 326 * requested. If nr_pages is 0 or negative, returns 0. If no pages 327 * were pinned, returns -errno. 328 * 329 * get_user_pages_fast provides equivalent functionality to get_user_pages, 330 * operating on current and current->mm, with force=0 and vma=NULL. However 331 * unlike get_user_pages, it must be called without mmap_sem held. 332 * 333 * get_user_pages_fast may take mmap_sem and page table locks, so no 334 * assumptions can be made about lack of locking. get_user_pages_fast is to be 335 * implemented in a way that is advantageous (vs get_user_pages()) when the 336 * user memory area is already faulted in and present in ptes. However if the 337 * pages have to be faulted in, it may turn out to be slightly slower so 338 * callers need to carefully consider what to use. On many architectures, 339 * get_user_pages_fast simply falls back to get_user_pages. 340 */ 341 int __attribute__((weak)) get_user_pages_fast(unsigned long start, 342 int nr_pages, int write, struct page **pages) 343 { 344 struct mm_struct *mm = current->mm; 345 int ret; 346 347 down_read(&mm->mmap_sem); 348 ret = get_user_pages(current, mm, start, nr_pages, 349 write, 0, pages, NULL); 350 up_read(&mm->mmap_sem); 351 352 return ret; 353 } 354 EXPORT_SYMBOL_GPL(get_user_pages_fast); 355 356 unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, 357 unsigned long len, unsigned long prot, 358 unsigned long flag, unsigned long pgoff) 359 { 360 unsigned long ret; 361 struct mm_struct *mm = current->mm; 362 unsigned long populate; 363 364 ret = security_mmap_file(file, prot, flag); 365 if (!ret) { 366 down_write(&mm->mmap_sem); 367 ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff, 368 &populate); 369 up_write(&mm->mmap_sem); 370 if (populate) 371 mm_populate(ret, populate); 372 } 373 return ret; 374 } 375 376 unsigned long vm_mmap(struct file *file, unsigned long addr, 377 unsigned long len, unsigned long prot, 378 unsigned long flag, unsigned long offset) 379 { 380 if (unlikely(offset + PAGE_ALIGN(len) < offset)) 381 return -EINVAL; 382 if (unlikely(offset & ~PAGE_MASK)) 383 return -EINVAL; 384 385 return vm_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); 386 } 387 EXPORT_SYMBOL(vm_mmap); 388 389 struct address_space *page_mapping(struct page *page) 390 { 391 struct address_space *mapping = page->mapping; 392 393 /* This happens if someone calls flush_dcache_page on slab page */ 394 if (unlikely(PageSlab(page))) 395 return NULL; 396 397 if (unlikely(PageSwapCache(page))) { 398 swp_entry_t entry; 399 400 entry.val = page_private(page); 401 mapping = swap_address_space(entry); 402 } else if ((unsigned long)mapping & PAGE_MAPPING_ANON) 403 mapping = NULL; 404 return mapping; 405 } 406 407 int overcommit_ratio_handler(struct ctl_table *table, int write, 408 void __user *buffer, size_t *lenp, 409 loff_t *ppos) 410 { 411 int ret; 412 413 ret = proc_dointvec(table, write, buffer, lenp, ppos); 414 if (ret == 0 && write) 415 sysctl_overcommit_kbytes = 0; 416 return ret; 417 } 418 419 int overcommit_kbytes_handler(struct ctl_table *table, int write, 420 void __user *buffer, size_t *lenp, 421 loff_t *ppos) 422 { 423 int ret; 424 425 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 426 if (ret == 0 && write) 427 sysctl_overcommit_ratio = 0; 428 return ret; 429 } 430 431 /* 432 * Committed memory limit enforced when OVERCOMMIT_NEVER policy is used 433 */ 434 unsigned long vm_commit_limit(void) 435 { 436 unsigned long allowed; 437 438 if (sysctl_overcommit_kbytes) 439 allowed = sysctl_overcommit_kbytes >> (PAGE_SHIFT - 10); 440 else 441 allowed = ((totalram_pages - hugetlb_total_pages()) 442 * sysctl_overcommit_ratio / 100); 443 allowed += total_swap_pages; 444 445 return allowed; 446 } 447 448 449 /* Tracepoints definitions. */ 450 EXPORT_TRACEPOINT_SYMBOL(kmalloc); 451 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); 452 EXPORT_TRACEPOINT_SYMBOL(kmalloc_node); 453 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node); 454 EXPORT_TRACEPOINT_SYMBOL(kfree); 455 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free); 456