xref: /linux/drivers/android/binder_alloc.c (revision 40ccd6aa3e2e05be93394e3cd560c718dedfcc77)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* binder_alloc.c
3  *
4  * Android IPC Subsystem
5  *
6  * Copyright (C) 2007-2017 Google, Inc.
7  */
8 
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 
11 #include <linux/list.h>
12 #include <linux/sched/mm.h>
13 #include <linux/module.h>
14 #include <linux/rtmutex.h>
15 #include <linux/rbtree.h>
16 #include <linux/seq_file.h>
17 #include <linux/vmalloc.h>
18 #include <linux/slab.h>
19 #include <linux/sched.h>
20 #include <linux/list_lru.h>
21 #include <linux/ratelimit.h>
22 #include <asm/cacheflush.h>
23 #include <linux/uaccess.h>
24 #include <linux/highmem.h>
25 #include <linux/sizes.h>
26 #include "binder_alloc.h"
27 #include "binder_trace.h"
28 
29 struct list_lru binder_freelist;
30 
31 static DEFINE_MUTEX(binder_alloc_mmap_lock);
32 
33 enum {
34 	BINDER_DEBUG_USER_ERROR             = 1U << 0,
35 	BINDER_DEBUG_OPEN_CLOSE             = 1U << 1,
36 	BINDER_DEBUG_BUFFER_ALLOC           = 1U << 2,
37 	BINDER_DEBUG_BUFFER_ALLOC_ASYNC     = 1U << 3,
38 };
39 static uint32_t binder_alloc_debug_mask = BINDER_DEBUG_USER_ERROR;
40 
41 module_param_named(debug_mask, binder_alloc_debug_mask,
42 		   uint, 0644);
43 
44 #define binder_alloc_debug(mask, x...) \
45 	do { \
46 		if (binder_alloc_debug_mask & mask) \
47 			pr_info_ratelimited(x); \
48 	} while (0)
49 
50 static struct binder_buffer *binder_buffer_next(struct binder_buffer *buffer)
51 {
52 	return list_entry(buffer->entry.next, struct binder_buffer, entry);
53 }
54 
55 static struct binder_buffer *binder_buffer_prev(struct binder_buffer *buffer)
56 {
57 	return list_entry(buffer->entry.prev, struct binder_buffer, entry);
58 }
59 
60 static size_t binder_alloc_buffer_size(struct binder_alloc *alloc,
61 				       struct binder_buffer *buffer)
62 {
63 	if (list_is_last(&buffer->entry, &alloc->buffers))
64 		return alloc->buffer + alloc->buffer_size - buffer->user_data;
65 	return binder_buffer_next(buffer)->user_data - buffer->user_data;
66 }
67 
68 static void binder_insert_free_buffer(struct binder_alloc *alloc,
69 				      struct binder_buffer *new_buffer)
70 {
71 	struct rb_node **p = &alloc->free_buffers.rb_node;
72 	struct rb_node *parent = NULL;
73 	struct binder_buffer *buffer;
74 	size_t buffer_size;
75 	size_t new_buffer_size;
76 
77 	BUG_ON(!new_buffer->free);
78 
79 	new_buffer_size = binder_alloc_buffer_size(alloc, new_buffer);
80 
81 	binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
82 		     "%d: add free buffer, size %zd, at %pK\n",
83 		      alloc->pid, new_buffer_size, new_buffer);
84 
85 	while (*p) {
86 		parent = *p;
87 		buffer = rb_entry(parent, struct binder_buffer, rb_node);
88 		BUG_ON(!buffer->free);
89 
90 		buffer_size = binder_alloc_buffer_size(alloc, buffer);
91 
92 		if (new_buffer_size < buffer_size)
93 			p = &parent->rb_left;
94 		else
95 			p = &parent->rb_right;
96 	}
97 	rb_link_node(&new_buffer->rb_node, parent, p);
98 	rb_insert_color(&new_buffer->rb_node, &alloc->free_buffers);
99 }
100 
101 static void binder_insert_allocated_buffer_locked(
102 		struct binder_alloc *alloc, struct binder_buffer *new_buffer)
103 {
104 	struct rb_node **p = &alloc->allocated_buffers.rb_node;
105 	struct rb_node *parent = NULL;
106 	struct binder_buffer *buffer;
107 
108 	BUG_ON(new_buffer->free);
109 
110 	while (*p) {
111 		parent = *p;
112 		buffer = rb_entry(parent, struct binder_buffer, rb_node);
113 		BUG_ON(buffer->free);
114 
115 		if (new_buffer->user_data < buffer->user_data)
116 			p = &parent->rb_left;
117 		else if (new_buffer->user_data > buffer->user_data)
118 			p = &parent->rb_right;
119 		else
120 			BUG();
121 	}
122 	rb_link_node(&new_buffer->rb_node, parent, p);
123 	rb_insert_color(&new_buffer->rb_node, &alloc->allocated_buffers);
124 }
125 
126 static struct binder_buffer *binder_alloc_prepare_to_free_locked(
127 		struct binder_alloc *alloc,
128 		unsigned long user_ptr)
129 {
130 	struct rb_node *n = alloc->allocated_buffers.rb_node;
131 	struct binder_buffer *buffer;
132 
133 	while (n) {
134 		buffer = rb_entry(n, struct binder_buffer, rb_node);
135 		BUG_ON(buffer->free);
136 
137 		if (user_ptr < buffer->user_data) {
138 			n = n->rb_left;
139 		} else if (user_ptr > buffer->user_data) {
140 			n = n->rb_right;
141 		} else {
142 			/*
143 			 * Guard against user threads attempting to
144 			 * free the buffer when in use by kernel or
145 			 * after it's already been freed.
146 			 */
147 			if (!buffer->allow_user_free)
148 				return ERR_PTR(-EPERM);
149 			buffer->allow_user_free = 0;
150 			return buffer;
151 		}
152 	}
153 	return NULL;
154 }
155 
156 /**
157  * binder_alloc_prepare_to_free() - get buffer given user ptr
158  * @alloc:	binder_alloc for this proc
159  * @user_ptr:	User pointer to buffer data
160  *
161  * Validate userspace pointer to buffer data and return buffer corresponding to
162  * that user pointer. Search the rb tree for buffer that matches user data
163  * pointer.
164  *
165  * Return:	Pointer to buffer or NULL
166  */
167 struct binder_buffer *binder_alloc_prepare_to_free(struct binder_alloc *alloc,
168 						   unsigned long user_ptr)
169 {
170 	struct binder_buffer *buffer;
171 
172 	spin_lock(&alloc->lock);
173 	buffer = binder_alloc_prepare_to_free_locked(alloc, user_ptr);
174 	spin_unlock(&alloc->lock);
175 	return buffer;
176 }
177 
178 static inline void
179 binder_set_installed_page(struct binder_lru_page *lru_page,
180 			  struct page *page)
181 {
182 	/* Pairs with acquire in binder_get_installed_page() */
183 	smp_store_release(&lru_page->page_ptr, page);
184 }
185 
186 static inline struct page *
187 binder_get_installed_page(struct binder_lru_page *lru_page)
188 {
189 	/* Pairs with release in binder_set_installed_page() */
190 	return smp_load_acquire(&lru_page->page_ptr);
191 }
192 
193 static void binder_lru_freelist_add(struct binder_alloc *alloc,
194 				    unsigned long start, unsigned long end)
195 {
196 	struct binder_lru_page *page;
197 	unsigned long page_addr;
198 
199 	trace_binder_update_page_range(alloc, false, start, end);
200 
201 	for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
202 		size_t index;
203 		int ret;
204 
205 		index = (page_addr - alloc->buffer) / PAGE_SIZE;
206 		page = &alloc->pages[index];
207 
208 		if (!binder_get_installed_page(page))
209 			continue;
210 
211 		trace_binder_free_lru_start(alloc, index);
212 
213 		ret = list_lru_add_obj(&binder_freelist, &page->lru);
214 		WARN_ON(!ret);
215 
216 		trace_binder_free_lru_end(alloc, index);
217 	}
218 }
219 
220 static int binder_install_single_page(struct binder_alloc *alloc,
221 				      struct binder_lru_page *lru_page,
222 				      unsigned long addr)
223 {
224 	struct page *page;
225 	int ret = 0;
226 
227 	if (!mmget_not_zero(alloc->mm))
228 		return -ESRCH;
229 
230 	/*
231 	 * Protected with mmap_sem in write mode as multiple tasks
232 	 * might race to install the same page.
233 	 */
234 	mmap_write_lock(alloc->mm);
235 	if (binder_get_installed_page(lru_page))
236 		goto out;
237 
238 	if (!alloc->vma) {
239 		pr_err("%d: %s failed, no vma\n", alloc->pid, __func__);
240 		ret = -ESRCH;
241 		goto out;
242 	}
243 
244 	page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
245 	if (!page) {
246 		pr_err("%d: failed to allocate page\n", alloc->pid);
247 		ret = -ENOMEM;
248 		goto out;
249 	}
250 
251 	ret = vm_insert_page(alloc->vma, addr, page);
252 	if (ret) {
253 		pr_err("%d: %s failed to insert page at offset %lx with %d\n",
254 		       alloc->pid, __func__, addr - alloc->buffer, ret);
255 		__free_page(page);
256 		ret = -ENOMEM;
257 		goto out;
258 	}
259 
260 	/* Mark page installation complete and safe to use */
261 	binder_set_installed_page(lru_page, page);
262 out:
263 	mmap_write_unlock(alloc->mm);
264 	mmput_async(alloc->mm);
265 	return ret;
266 }
267 
268 static int binder_install_buffer_pages(struct binder_alloc *alloc,
269 				       struct binder_buffer *buffer,
270 				       size_t size)
271 {
272 	struct binder_lru_page *page;
273 	unsigned long start, final;
274 	unsigned long page_addr;
275 
276 	start = buffer->user_data & PAGE_MASK;
277 	final = PAGE_ALIGN(buffer->user_data + size);
278 
279 	for (page_addr = start; page_addr < final; page_addr += PAGE_SIZE) {
280 		unsigned long index;
281 		int ret;
282 
283 		index = (page_addr - alloc->buffer) / PAGE_SIZE;
284 		page = &alloc->pages[index];
285 
286 		if (binder_get_installed_page(page))
287 			continue;
288 
289 		trace_binder_alloc_page_start(alloc, index);
290 
291 		ret = binder_install_single_page(alloc, page, page_addr);
292 		if (ret)
293 			return ret;
294 
295 		trace_binder_alloc_page_end(alloc, index);
296 	}
297 
298 	return 0;
299 }
300 
301 /* The range of pages should exclude those shared with other buffers */
302 static void binder_lru_freelist_del(struct binder_alloc *alloc,
303 				    unsigned long start, unsigned long end)
304 {
305 	struct binder_lru_page *page;
306 	unsigned long page_addr;
307 
308 	trace_binder_update_page_range(alloc, true, start, end);
309 
310 	for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
311 		unsigned long index;
312 		bool on_lru;
313 
314 		index = (page_addr - alloc->buffer) / PAGE_SIZE;
315 		page = &alloc->pages[index];
316 
317 		if (page->page_ptr) {
318 			trace_binder_alloc_lru_start(alloc, index);
319 
320 			on_lru = list_lru_del_obj(&binder_freelist, &page->lru);
321 			WARN_ON(!on_lru);
322 
323 			trace_binder_alloc_lru_end(alloc, index);
324 			continue;
325 		}
326 
327 		if (index + 1 > alloc->pages_high)
328 			alloc->pages_high = index + 1;
329 	}
330 }
331 
332 static inline void binder_alloc_set_vma(struct binder_alloc *alloc,
333 		struct vm_area_struct *vma)
334 {
335 	/* pairs with smp_load_acquire in binder_alloc_get_vma() */
336 	smp_store_release(&alloc->vma, vma);
337 }
338 
339 static inline struct vm_area_struct *binder_alloc_get_vma(
340 		struct binder_alloc *alloc)
341 {
342 	/* pairs with smp_store_release in binder_alloc_set_vma() */
343 	return smp_load_acquire(&alloc->vma);
344 }
345 
346 static void debug_no_space_locked(struct binder_alloc *alloc)
347 {
348 	size_t largest_alloc_size = 0;
349 	struct binder_buffer *buffer;
350 	size_t allocated_buffers = 0;
351 	size_t largest_free_size = 0;
352 	size_t total_alloc_size = 0;
353 	size_t total_free_size = 0;
354 	size_t free_buffers = 0;
355 	size_t buffer_size;
356 	struct rb_node *n;
357 
358 	for (n = rb_first(&alloc->allocated_buffers); n; n = rb_next(n)) {
359 		buffer = rb_entry(n, struct binder_buffer, rb_node);
360 		buffer_size = binder_alloc_buffer_size(alloc, buffer);
361 		allocated_buffers++;
362 		total_alloc_size += buffer_size;
363 		if (buffer_size > largest_alloc_size)
364 			largest_alloc_size = buffer_size;
365 	}
366 
367 	for (n = rb_first(&alloc->free_buffers); n; n = rb_next(n)) {
368 		buffer = rb_entry(n, struct binder_buffer, rb_node);
369 		buffer_size = binder_alloc_buffer_size(alloc, buffer);
370 		free_buffers++;
371 		total_free_size += buffer_size;
372 		if (buffer_size > largest_free_size)
373 			largest_free_size = buffer_size;
374 	}
375 
376 	binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
377 			   "allocated: %zd (num: %zd largest: %zd), free: %zd (num: %zd largest: %zd)\n",
378 			   total_alloc_size, allocated_buffers,
379 			   largest_alloc_size, total_free_size,
380 			   free_buffers, largest_free_size);
381 }
382 
383 static bool debug_low_async_space_locked(struct binder_alloc *alloc)
384 {
385 	/*
386 	 * Find the amount and size of buffers allocated by the current caller;
387 	 * The idea is that once we cross the threshold, whoever is responsible
388 	 * for the low async space is likely to try to send another async txn,
389 	 * and at some point we'll catch them in the act. This is more efficient
390 	 * than keeping a map per pid.
391 	 */
392 	struct binder_buffer *buffer;
393 	size_t total_alloc_size = 0;
394 	int pid = current->tgid;
395 	size_t num_buffers = 0;
396 	struct rb_node *n;
397 
398 	/*
399 	 * Only start detecting spammers once we have less than 20% of async
400 	 * space left (which is less than 10% of total buffer size).
401 	 */
402 	if (alloc->free_async_space >= alloc->buffer_size / 10) {
403 		alloc->oneway_spam_detected = false;
404 		return false;
405 	}
406 
407 	for (n = rb_first(&alloc->allocated_buffers); n != NULL;
408 		 n = rb_next(n)) {
409 		buffer = rb_entry(n, struct binder_buffer, rb_node);
410 		if (buffer->pid != pid)
411 			continue;
412 		if (!buffer->async_transaction)
413 			continue;
414 		total_alloc_size += binder_alloc_buffer_size(alloc, buffer);
415 		num_buffers++;
416 	}
417 
418 	/*
419 	 * Warn if this pid has more than 50 transactions, or more than 50% of
420 	 * async space (which is 25% of total buffer size). Oneway spam is only
421 	 * detected when the threshold is exceeded.
422 	 */
423 	if (num_buffers > 50 || total_alloc_size > alloc->buffer_size / 4) {
424 		binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
425 			     "%d: pid %d spamming oneway? %zd buffers allocated for a total size of %zd\n",
426 			      alloc->pid, pid, num_buffers, total_alloc_size);
427 		if (!alloc->oneway_spam_detected) {
428 			alloc->oneway_spam_detected = true;
429 			return true;
430 		}
431 	}
432 	return false;
433 }
434 
435 /* Callers preallocate @new_buffer, it is freed by this function if unused */
436 static struct binder_buffer *binder_alloc_new_buf_locked(
437 				struct binder_alloc *alloc,
438 				struct binder_buffer *new_buffer,
439 				size_t size,
440 				int is_async)
441 {
442 	struct rb_node *n = alloc->free_buffers.rb_node;
443 	struct rb_node *best_fit = NULL;
444 	struct binder_buffer *buffer;
445 	unsigned long next_used_page;
446 	unsigned long curr_last_page;
447 	size_t buffer_size;
448 
449 	if (is_async && alloc->free_async_space < size) {
450 		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
451 			     "%d: binder_alloc_buf size %zd failed, no async space left\n",
452 			      alloc->pid, size);
453 		buffer = ERR_PTR(-ENOSPC);
454 		goto out;
455 	}
456 
457 	while (n) {
458 		buffer = rb_entry(n, struct binder_buffer, rb_node);
459 		BUG_ON(!buffer->free);
460 		buffer_size = binder_alloc_buffer_size(alloc, buffer);
461 
462 		if (size < buffer_size) {
463 			best_fit = n;
464 			n = n->rb_left;
465 		} else if (size > buffer_size) {
466 			n = n->rb_right;
467 		} else {
468 			best_fit = n;
469 			break;
470 		}
471 	}
472 
473 	if (unlikely(!best_fit)) {
474 		binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
475 				   "%d: binder_alloc_buf size %zd failed, no address space\n",
476 				   alloc->pid, size);
477 		debug_no_space_locked(alloc);
478 		buffer = ERR_PTR(-ENOSPC);
479 		goto out;
480 	}
481 
482 	if (buffer_size != size) {
483 		/* Found an oversized buffer and needs to be split */
484 		buffer = rb_entry(best_fit, struct binder_buffer, rb_node);
485 		buffer_size = binder_alloc_buffer_size(alloc, buffer);
486 
487 		WARN_ON(n || buffer_size == size);
488 		new_buffer->user_data = buffer->user_data + size;
489 		list_add(&new_buffer->entry, &buffer->entry);
490 		new_buffer->free = 1;
491 		binder_insert_free_buffer(alloc, new_buffer);
492 		new_buffer = NULL;
493 	}
494 
495 	binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
496 		     "%d: binder_alloc_buf size %zd got buffer %pK size %zd\n",
497 		      alloc->pid, size, buffer, buffer_size);
498 
499 	/*
500 	 * Now we remove the pages from the freelist. A clever calculation
501 	 * with buffer_size determines if the last page is shared with an
502 	 * adjacent in-use buffer. In such case, the page has been already
503 	 * removed from the freelist so we trim our range short.
504 	 */
505 	next_used_page = (buffer->user_data + buffer_size) & PAGE_MASK;
506 	curr_last_page = PAGE_ALIGN(buffer->user_data + size);
507 	binder_lru_freelist_del(alloc, PAGE_ALIGN(buffer->user_data),
508 				min(next_used_page, curr_last_page));
509 
510 	rb_erase(&buffer->rb_node, &alloc->free_buffers);
511 	buffer->free = 0;
512 	buffer->allow_user_free = 0;
513 	binder_insert_allocated_buffer_locked(alloc, buffer);
514 	buffer->async_transaction = is_async;
515 	buffer->oneway_spam_suspect = false;
516 	if (is_async) {
517 		alloc->free_async_space -= size;
518 		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
519 			     "%d: binder_alloc_buf size %zd async free %zd\n",
520 			      alloc->pid, size, alloc->free_async_space);
521 		if (debug_low_async_space_locked(alloc))
522 			buffer->oneway_spam_suspect = true;
523 	}
524 
525 out:
526 	/* Discard possibly unused new_buffer */
527 	kfree(new_buffer);
528 	return buffer;
529 }
530 
531 /* Calculate the sanitized total size, returns 0 for invalid request */
532 static inline size_t sanitized_size(size_t data_size,
533 				    size_t offsets_size,
534 				    size_t extra_buffers_size)
535 {
536 	size_t total, tmp;
537 
538 	/* Align to pointer size and check for overflows */
539 	tmp = ALIGN(data_size, sizeof(void *)) +
540 		ALIGN(offsets_size, sizeof(void *));
541 	if (tmp < data_size || tmp < offsets_size)
542 		return 0;
543 	total = tmp + ALIGN(extra_buffers_size, sizeof(void *));
544 	if (total < tmp || total < extra_buffers_size)
545 		return 0;
546 
547 	/* Pad 0-sized buffers so they get a unique address */
548 	total = max(total, sizeof(void *));
549 
550 	return total;
551 }
552 
553 /**
554  * binder_alloc_new_buf() - Allocate a new binder buffer
555  * @alloc:              binder_alloc for this proc
556  * @data_size:          size of user data buffer
557  * @offsets_size:       user specified buffer offset
558  * @extra_buffers_size: size of extra space for meta-data (eg, security context)
559  * @is_async:           buffer for async transaction
560  *
561  * Allocate a new buffer given the requested sizes. Returns
562  * the kernel version of the buffer pointer. The size allocated
563  * is the sum of the three given sizes (each rounded up to
564  * pointer-sized boundary)
565  *
566  * Return:	The allocated buffer or %ERR_PTR(-errno) if error
567  */
568 struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc,
569 					   size_t data_size,
570 					   size_t offsets_size,
571 					   size_t extra_buffers_size,
572 					   int is_async)
573 {
574 	struct binder_buffer *buffer, *next;
575 	size_t size;
576 	int ret;
577 
578 	/* Check binder_alloc is fully initialized */
579 	if (!binder_alloc_get_vma(alloc)) {
580 		binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
581 				   "%d: binder_alloc_buf, no vma\n",
582 				   alloc->pid);
583 		return ERR_PTR(-ESRCH);
584 	}
585 
586 	size = sanitized_size(data_size, offsets_size, extra_buffers_size);
587 	if (unlikely(!size)) {
588 		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
589 				   "%d: got transaction with invalid size %zd-%zd-%zd\n",
590 				   alloc->pid, data_size, offsets_size,
591 				   extra_buffers_size);
592 		return ERR_PTR(-EINVAL);
593 	}
594 
595 	/* Preallocate the next buffer */
596 	next = kzalloc(sizeof(*next), GFP_KERNEL);
597 	if (!next)
598 		return ERR_PTR(-ENOMEM);
599 
600 	spin_lock(&alloc->lock);
601 	buffer = binder_alloc_new_buf_locked(alloc, next, size, is_async);
602 	if (IS_ERR(buffer)) {
603 		spin_unlock(&alloc->lock);
604 		goto out;
605 	}
606 
607 	buffer->data_size = data_size;
608 	buffer->offsets_size = offsets_size;
609 	buffer->extra_buffers_size = extra_buffers_size;
610 	buffer->pid = current->tgid;
611 	spin_unlock(&alloc->lock);
612 
613 	ret = binder_install_buffer_pages(alloc, buffer, size);
614 	if (ret) {
615 		binder_alloc_free_buf(alloc, buffer);
616 		buffer = ERR_PTR(ret);
617 	}
618 out:
619 	return buffer;
620 }
621 
622 static unsigned long buffer_start_page(struct binder_buffer *buffer)
623 {
624 	return buffer->user_data & PAGE_MASK;
625 }
626 
627 static unsigned long prev_buffer_end_page(struct binder_buffer *buffer)
628 {
629 	return (buffer->user_data - 1) & PAGE_MASK;
630 }
631 
632 static void binder_delete_free_buffer(struct binder_alloc *alloc,
633 				      struct binder_buffer *buffer)
634 {
635 	struct binder_buffer *prev, *next;
636 
637 	if (PAGE_ALIGNED(buffer->user_data))
638 		goto skip_freelist;
639 
640 	BUG_ON(alloc->buffers.next == &buffer->entry);
641 	prev = binder_buffer_prev(buffer);
642 	BUG_ON(!prev->free);
643 	if (prev_buffer_end_page(prev) == buffer_start_page(buffer))
644 		goto skip_freelist;
645 
646 	if (!list_is_last(&buffer->entry, &alloc->buffers)) {
647 		next = binder_buffer_next(buffer);
648 		if (buffer_start_page(next) == buffer_start_page(buffer))
649 			goto skip_freelist;
650 	}
651 
652 	binder_lru_freelist_add(alloc, buffer_start_page(buffer),
653 				buffer_start_page(buffer) + PAGE_SIZE);
654 skip_freelist:
655 	list_del(&buffer->entry);
656 	kfree(buffer);
657 }
658 
659 static void binder_free_buf_locked(struct binder_alloc *alloc,
660 				   struct binder_buffer *buffer)
661 {
662 	size_t size, buffer_size;
663 
664 	buffer_size = binder_alloc_buffer_size(alloc, buffer);
665 
666 	size = ALIGN(buffer->data_size, sizeof(void *)) +
667 		ALIGN(buffer->offsets_size, sizeof(void *)) +
668 		ALIGN(buffer->extra_buffers_size, sizeof(void *));
669 
670 	binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
671 		     "%d: binder_free_buf %pK size %zd buffer_size %zd\n",
672 		      alloc->pid, buffer, size, buffer_size);
673 
674 	BUG_ON(buffer->free);
675 	BUG_ON(size > buffer_size);
676 	BUG_ON(buffer->transaction != NULL);
677 	BUG_ON(buffer->user_data < alloc->buffer);
678 	BUG_ON(buffer->user_data > alloc->buffer + alloc->buffer_size);
679 
680 	if (buffer->async_transaction) {
681 		alloc->free_async_space += buffer_size;
682 		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
683 			     "%d: binder_free_buf size %zd async free %zd\n",
684 			      alloc->pid, size, alloc->free_async_space);
685 	}
686 
687 	binder_lru_freelist_add(alloc, PAGE_ALIGN(buffer->user_data),
688 				(buffer->user_data + buffer_size) & PAGE_MASK);
689 
690 	rb_erase(&buffer->rb_node, &alloc->allocated_buffers);
691 	buffer->free = 1;
692 	if (!list_is_last(&buffer->entry, &alloc->buffers)) {
693 		struct binder_buffer *next = binder_buffer_next(buffer);
694 
695 		if (next->free) {
696 			rb_erase(&next->rb_node, &alloc->free_buffers);
697 			binder_delete_free_buffer(alloc, next);
698 		}
699 	}
700 	if (alloc->buffers.next != &buffer->entry) {
701 		struct binder_buffer *prev = binder_buffer_prev(buffer);
702 
703 		if (prev->free) {
704 			binder_delete_free_buffer(alloc, buffer);
705 			rb_erase(&prev->rb_node, &alloc->free_buffers);
706 			buffer = prev;
707 		}
708 	}
709 	binder_insert_free_buffer(alloc, buffer);
710 }
711 
712 /**
713  * binder_alloc_get_page() - get kernel pointer for given buffer offset
714  * @alloc: binder_alloc for this proc
715  * @buffer: binder buffer to be accessed
716  * @buffer_offset: offset into @buffer data
717  * @pgoffp: address to copy final page offset to
718  *
719  * Lookup the struct page corresponding to the address
720  * at @buffer_offset into @buffer->user_data. If @pgoffp is not
721  * NULL, the byte-offset into the page is written there.
722  *
723  * The caller is responsible to ensure that the offset points
724  * to a valid address within the @buffer and that @buffer is
725  * not freeable by the user. Since it can't be freed, we are
726  * guaranteed that the corresponding elements of @alloc->pages[]
727  * cannot change.
728  *
729  * Return: struct page
730  */
731 static struct page *binder_alloc_get_page(struct binder_alloc *alloc,
732 					  struct binder_buffer *buffer,
733 					  binder_size_t buffer_offset,
734 					  pgoff_t *pgoffp)
735 {
736 	binder_size_t buffer_space_offset = buffer_offset +
737 		(buffer->user_data - alloc->buffer);
738 	pgoff_t pgoff = buffer_space_offset & ~PAGE_MASK;
739 	size_t index = buffer_space_offset >> PAGE_SHIFT;
740 	struct binder_lru_page *lru_page;
741 
742 	lru_page = &alloc->pages[index];
743 	*pgoffp = pgoff;
744 	return lru_page->page_ptr;
745 }
746 
747 /**
748  * binder_alloc_clear_buf() - zero out buffer
749  * @alloc: binder_alloc for this proc
750  * @buffer: binder buffer to be cleared
751  *
752  * memset the given buffer to 0
753  */
754 static void binder_alloc_clear_buf(struct binder_alloc *alloc,
755 				   struct binder_buffer *buffer)
756 {
757 	size_t bytes = binder_alloc_buffer_size(alloc, buffer);
758 	binder_size_t buffer_offset = 0;
759 
760 	while (bytes) {
761 		unsigned long size;
762 		struct page *page;
763 		pgoff_t pgoff;
764 
765 		page = binder_alloc_get_page(alloc, buffer,
766 					     buffer_offset, &pgoff);
767 		size = min_t(size_t, bytes, PAGE_SIZE - pgoff);
768 		memset_page(page, pgoff, 0, size);
769 		bytes -= size;
770 		buffer_offset += size;
771 	}
772 }
773 
774 /**
775  * binder_alloc_free_buf() - free a binder buffer
776  * @alloc:	binder_alloc for this proc
777  * @buffer:	kernel pointer to buffer
778  *
779  * Free the buffer allocated via binder_alloc_new_buf()
780  */
781 void binder_alloc_free_buf(struct binder_alloc *alloc,
782 			    struct binder_buffer *buffer)
783 {
784 	/*
785 	 * We could eliminate the call to binder_alloc_clear_buf()
786 	 * from binder_alloc_deferred_release() by moving this to
787 	 * binder_free_buf_locked(). However, that could
788 	 * increase contention for the alloc->lock if clear_on_free
789 	 * is used frequently for large buffers. This lock is not
790 	 * needed for correctness here.
791 	 */
792 	if (buffer->clear_on_free) {
793 		binder_alloc_clear_buf(alloc, buffer);
794 		buffer->clear_on_free = false;
795 	}
796 	spin_lock(&alloc->lock);
797 	binder_free_buf_locked(alloc, buffer);
798 	spin_unlock(&alloc->lock);
799 }
800 
801 /**
802  * binder_alloc_mmap_handler() - map virtual address space for proc
803  * @alloc:	alloc structure for this proc
804  * @vma:	vma passed to mmap()
805  *
806  * Called by binder_mmap() to initialize the space specified in
807  * vma for allocating binder buffers
808  *
809  * Return:
810  *      0 = success
811  *      -EBUSY = address space already mapped
812  *      -ENOMEM = failed to map memory to given address space
813  */
814 int binder_alloc_mmap_handler(struct binder_alloc *alloc,
815 			      struct vm_area_struct *vma)
816 {
817 	struct binder_buffer *buffer;
818 	const char *failure_string;
819 	int ret, i;
820 
821 	if (unlikely(vma->vm_mm != alloc->mm)) {
822 		ret = -EINVAL;
823 		failure_string = "invalid vma->vm_mm";
824 		goto err_invalid_mm;
825 	}
826 
827 	mutex_lock(&binder_alloc_mmap_lock);
828 	if (alloc->buffer_size) {
829 		ret = -EBUSY;
830 		failure_string = "already mapped";
831 		goto err_already_mapped;
832 	}
833 	alloc->buffer_size = min_t(unsigned long, vma->vm_end - vma->vm_start,
834 				   SZ_4M);
835 	mutex_unlock(&binder_alloc_mmap_lock);
836 
837 	alloc->buffer = vma->vm_start;
838 
839 	alloc->pages = kcalloc(alloc->buffer_size / PAGE_SIZE,
840 			       sizeof(alloc->pages[0]),
841 			       GFP_KERNEL);
842 	if (alloc->pages == NULL) {
843 		ret = -ENOMEM;
844 		failure_string = "alloc page array";
845 		goto err_alloc_pages_failed;
846 	}
847 
848 	for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) {
849 		alloc->pages[i].alloc = alloc;
850 		INIT_LIST_HEAD(&alloc->pages[i].lru);
851 	}
852 
853 	buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
854 	if (!buffer) {
855 		ret = -ENOMEM;
856 		failure_string = "alloc buffer struct";
857 		goto err_alloc_buf_struct_failed;
858 	}
859 
860 	buffer->user_data = alloc->buffer;
861 	list_add(&buffer->entry, &alloc->buffers);
862 	buffer->free = 1;
863 	binder_insert_free_buffer(alloc, buffer);
864 	alloc->free_async_space = alloc->buffer_size / 2;
865 
866 	/* Signal binder_alloc is fully initialized */
867 	binder_alloc_set_vma(alloc, vma);
868 
869 	return 0;
870 
871 err_alloc_buf_struct_failed:
872 	kfree(alloc->pages);
873 	alloc->pages = NULL;
874 err_alloc_pages_failed:
875 	alloc->buffer = 0;
876 	mutex_lock(&binder_alloc_mmap_lock);
877 	alloc->buffer_size = 0;
878 err_already_mapped:
879 	mutex_unlock(&binder_alloc_mmap_lock);
880 err_invalid_mm:
881 	binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
882 			   "%s: %d %lx-%lx %s failed %d\n", __func__,
883 			   alloc->pid, vma->vm_start, vma->vm_end,
884 			   failure_string, ret);
885 	return ret;
886 }
887 
888 
889 void binder_alloc_deferred_release(struct binder_alloc *alloc)
890 {
891 	struct rb_node *n;
892 	int buffers, page_count;
893 	struct binder_buffer *buffer;
894 
895 	buffers = 0;
896 	spin_lock(&alloc->lock);
897 	BUG_ON(alloc->vma);
898 
899 	while ((n = rb_first(&alloc->allocated_buffers))) {
900 		buffer = rb_entry(n, struct binder_buffer, rb_node);
901 
902 		/* Transaction should already have been freed */
903 		BUG_ON(buffer->transaction);
904 
905 		if (buffer->clear_on_free) {
906 			binder_alloc_clear_buf(alloc, buffer);
907 			buffer->clear_on_free = false;
908 		}
909 		binder_free_buf_locked(alloc, buffer);
910 		buffers++;
911 	}
912 
913 	while (!list_empty(&alloc->buffers)) {
914 		buffer = list_first_entry(&alloc->buffers,
915 					  struct binder_buffer, entry);
916 		WARN_ON(!buffer->free);
917 
918 		list_del(&buffer->entry);
919 		WARN_ON_ONCE(!list_empty(&alloc->buffers));
920 		kfree(buffer);
921 	}
922 
923 	page_count = 0;
924 	if (alloc->pages) {
925 		int i;
926 
927 		for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) {
928 			bool on_lru;
929 
930 			if (!alloc->pages[i].page_ptr)
931 				continue;
932 
933 			on_lru = list_lru_del_obj(&binder_freelist,
934 						  &alloc->pages[i].lru);
935 			binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
936 				     "%s: %d: page %d %s\n",
937 				     __func__, alloc->pid, i,
938 				     on_lru ? "on lru" : "active");
939 			__free_page(alloc->pages[i].page_ptr);
940 			page_count++;
941 		}
942 		kfree(alloc->pages);
943 	}
944 	spin_unlock(&alloc->lock);
945 	if (alloc->mm)
946 		mmdrop(alloc->mm);
947 
948 	binder_alloc_debug(BINDER_DEBUG_OPEN_CLOSE,
949 		     "%s: %d buffers %d, pages %d\n",
950 		     __func__, alloc->pid, buffers, page_count);
951 }
952 
953 /**
954  * binder_alloc_print_allocated() - print buffer info
955  * @m:     seq_file for output via seq_printf()
956  * @alloc: binder_alloc for this proc
957  *
958  * Prints information about every buffer associated with
959  * the binder_alloc state to the given seq_file
960  */
961 void binder_alloc_print_allocated(struct seq_file *m,
962 				  struct binder_alloc *alloc)
963 {
964 	struct binder_buffer *buffer;
965 	struct rb_node *n;
966 
967 	spin_lock(&alloc->lock);
968 	for (n = rb_first(&alloc->allocated_buffers); n; n = rb_next(n)) {
969 		buffer = rb_entry(n, struct binder_buffer, rb_node);
970 		seq_printf(m, "  buffer %d: %lx size %zd:%zd:%zd %s\n",
971 			   buffer->debug_id,
972 			   buffer->user_data - alloc->buffer,
973 			   buffer->data_size, buffer->offsets_size,
974 			   buffer->extra_buffers_size,
975 			   buffer->transaction ? "active" : "delivered");
976 	}
977 	spin_unlock(&alloc->lock);
978 }
979 
980 /**
981  * binder_alloc_print_pages() - print page usage
982  * @m:     seq_file for output via seq_printf()
983  * @alloc: binder_alloc for this proc
984  */
985 void binder_alloc_print_pages(struct seq_file *m,
986 			      struct binder_alloc *alloc)
987 {
988 	struct binder_lru_page *page;
989 	int i;
990 	int active = 0;
991 	int lru = 0;
992 	int free = 0;
993 
994 	spin_lock(&alloc->lock);
995 	/*
996 	 * Make sure the binder_alloc is fully initialized, otherwise we might
997 	 * read inconsistent state.
998 	 */
999 	if (binder_alloc_get_vma(alloc) != NULL) {
1000 		for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) {
1001 			page = &alloc->pages[i];
1002 			if (!page->page_ptr)
1003 				free++;
1004 			else if (list_empty(&page->lru))
1005 				active++;
1006 			else
1007 				lru++;
1008 		}
1009 	}
1010 	spin_unlock(&alloc->lock);
1011 	seq_printf(m, "  pages: %d:%d:%d\n", active, lru, free);
1012 	seq_printf(m, "  pages high watermark: %zu\n", alloc->pages_high);
1013 }
1014 
1015 /**
1016  * binder_alloc_get_allocated_count() - return count of buffers
1017  * @alloc: binder_alloc for this proc
1018  *
1019  * Return: count of allocated buffers
1020  */
1021 int binder_alloc_get_allocated_count(struct binder_alloc *alloc)
1022 {
1023 	struct rb_node *n;
1024 	int count = 0;
1025 
1026 	spin_lock(&alloc->lock);
1027 	for (n = rb_first(&alloc->allocated_buffers); n != NULL; n = rb_next(n))
1028 		count++;
1029 	spin_unlock(&alloc->lock);
1030 	return count;
1031 }
1032 
1033 
1034 /**
1035  * binder_alloc_vma_close() - invalidate address space
1036  * @alloc: binder_alloc for this proc
1037  *
1038  * Called from binder_vma_close() when releasing address space.
1039  * Clears alloc->vma to prevent new incoming transactions from
1040  * allocating more buffers.
1041  */
1042 void binder_alloc_vma_close(struct binder_alloc *alloc)
1043 {
1044 	binder_alloc_set_vma(alloc, NULL);
1045 }
1046 
1047 /**
1048  * binder_alloc_free_page() - shrinker callback to free pages
1049  * @item:   item to free
1050  * @lock:   lock protecting the item
1051  * @cb_arg: callback argument
1052  *
1053  * Called from list_lru_walk() in binder_shrink_scan() to free
1054  * up pages when the system is under memory pressure.
1055  */
1056 enum lru_status binder_alloc_free_page(struct list_head *item,
1057 				       struct list_lru_one *lru,
1058 				       spinlock_t *lock,
1059 				       void *cb_arg)
1060 	__must_hold(lock)
1061 {
1062 	struct binder_lru_page *page = container_of(item, typeof(*page), lru);
1063 	struct binder_alloc *alloc = page->alloc;
1064 	struct mm_struct *mm = alloc->mm;
1065 	struct vm_area_struct *vma;
1066 	struct page *page_to_free;
1067 	unsigned long page_addr;
1068 	size_t index;
1069 
1070 	if (!mmget_not_zero(mm))
1071 		goto err_mmget;
1072 	if (!mmap_read_trylock(mm))
1073 		goto err_mmap_read_lock_failed;
1074 	if (!spin_trylock(&alloc->lock))
1075 		goto err_get_alloc_lock_failed;
1076 	if (!page->page_ptr)
1077 		goto err_page_already_freed;
1078 
1079 	index = page - alloc->pages;
1080 	page_addr = alloc->buffer + index * PAGE_SIZE;
1081 
1082 	vma = vma_lookup(mm, page_addr);
1083 	if (vma && vma != binder_alloc_get_vma(alloc))
1084 		goto err_invalid_vma;
1085 
1086 	trace_binder_unmap_kernel_start(alloc, index);
1087 
1088 	page_to_free = page->page_ptr;
1089 	page->page_ptr = NULL;
1090 
1091 	trace_binder_unmap_kernel_end(alloc, index);
1092 
1093 	list_lru_isolate(lru, item);
1094 	spin_unlock(&alloc->lock);
1095 	spin_unlock(lock);
1096 
1097 	if (vma) {
1098 		trace_binder_unmap_user_start(alloc, index);
1099 
1100 		zap_page_range_single(vma, page_addr, PAGE_SIZE, NULL);
1101 
1102 		trace_binder_unmap_user_end(alloc, index);
1103 	}
1104 
1105 	mmap_read_unlock(mm);
1106 	mmput_async(mm);
1107 	__free_page(page_to_free);
1108 
1109 	spin_lock(lock);
1110 	return LRU_REMOVED_RETRY;
1111 
1112 err_invalid_vma:
1113 err_page_already_freed:
1114 	spin_unlock(&alloc->lock);
1115 err_get_alloc_lock_failed:
1116 	mmap_read_unlock(mm);
1117 err_mmap_read_lock_failed:
1118 	mmput_async(mm);
1119 err_mmget:
1120 	return LRU_SKIP;
1121 }
1122 
1123 static unsigned long
1124 binder_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
1125 {
1126 	return list_lru_count(&binder_freelist);
1127 }
1128 
1129 static unsigned long
1130 binder_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
1131 {
1132 	return list_lru_walk(&binder_freelist, binder_alloc_free_page,
1133 			    NULL, sc->nr_to_scan);
1134 }
1135 
1136 static struct shrinker *binder_shrinker;
1137 
1138 /**
1139  * binder_alloc_init() - called by binder_open() for per-proc initialization
1140  * @alloc: binder_alloc for this proc
1141  *
1142  * Called from binder_open() to initialize binder_alloc fields for
1143  * new binder proc
1144  */
1145 void binder_alloc_init(struct binder_alloc *alloc)
1146 {
1147 	alloc->pid = current->group_leader->pid;
1148 	alloc->mm = current->mm;
1149 	mmgrab(alloc->mm);
1150 	spin_lock_init(&alloc->lock);
1151 	INIT_LIST_HEAD(&alloc->buffers);
1152 }
1153 
1154 int binder_alloc_shrinker_init(void)
1155 {
1156 	int ret;
1157 
1158 	ret = list_lru_init(&binder_freelist);
1159 	if (ret)
1160 		return ret;
1161 
1162 	binder_shrinker = shrinker_alloc(0, "android-binder");
1163 	if (!binder_shrinker) {
1164 		list_lru_destroy(&binder_freelist);
1165 		return -ENOMEM;
1166 	}
1167 
1168 	binder_shrinker->count_objects = binder_shrink_count;
1169 	binder_shrinker->scan_objects = binder_shrink_scan;
1170 
1171 	shrinker_register(binder_shrinker);
1172 
1173 	return 0;
1174 }
1175 
1176 void binder_alloc_shrinker_exit(void)
1177 {
1178 	shrinker_free(binder_shrinker);
1179 	list_lru_destroy(&binder_freelist);
1180 }
1181 
1182 /**
1183  * check_buffer() - verify that buffer/offset is safe to access
1184  * @alloc: binder_alloc for this proc
1185  * @buffer: binder buffer to be accessed
1186  * @offset: offset into @buffer data
1187  * @bytes: bytes to access from offset
1188  *
1189  * Check that the @offset/@bytes are within the size of the given
1190  * @buffer and that the buffer is currently active and not freeable.
1191  * Offsets must also be multiples of sizeof(u32). The kernel is
1192  * allowed to touch the buffer in two cases:
1193  *
1194  * 1) when the buffer is being created:
1195  *     (buffer->free == 0 && buffer->allow_user_free == 0)
1196  * 2) when the buffer is being torn down:
1197  *     (buffer->free == 0 && buffer->transaction == NULL).
1198  *
1199  * Return: true if the buffer is safe to access
1200  */
1201 static inline bool check_buffer(struct binder_alloc *alloc,
1202 				struct binder_buffer *buffer,
1203 				binder_size_t offset, size_t bytes)
1204 {
1205 	size_t buffer_size = binder_alloc_buffer_size(alloc, buffer);
1206 
1207 	return buffer_size >= bytes &&
1208 		offset <= buffer_size - bytes &&
1209 		IS_ALIGNED(offset, sizeof(u32)) &&
1210 		!buffer->free &&
1211 		(!buffer->allow_user_free || !buffer->transaction);
1212 }
1213 
1214 /**
1215  * binder_alloc_copy_user_to_buffer() - copy src user to tgt user
1216  * @alloc: binder_alloc for this proc
1217  * @buffer: binder buffer to be accessed
1218  * @buffer_offset: offset into @buffer data
1219  * @from: userspace pointer to source buffer
1220  * @bytes: bytes to copy
1221  *
1222  * Copy bytes from source userspace to target buffer.
1223  *
1224  * Return: bytes remaining to be copied
1225  */
1226 unsigned long
1227 binder_alloc_copy_user_to_buffer(struct binder_alloc *alloc,
1228 				 struct binder_buffer *buffer,
1229 				 binder_size_t buffer_offset,
1230 				 const void __user *from,
1231 				 size_t bytes)
1232 {
1233 	if (!check_buffer(alloc, buffer, buffer_offset, bytes))
1234 		return bytes;
1235 
1236 	while (bytes) {
1237 		unsigned long size;
1238 		unsigned long ret;
1239 		struct page *page;
1240 		pgoff_t pgoff;
1241 		void *kptr;
1242 
1243 		page = binder_alloc_get_page(alloc, buffer,
1244 					     buffer_offset, &pgoff);
1245 		size = min_t(size_t, bytes, PAGE_SIZE - pgoff);
1246 		kptr = kmap_local_page(page) + pgoff;
1247 		ret = copy_from_user(kptr, from, size);
1248 		kunmap_local(kptr);
1249 		if (ret)
1250 			return bytes - size + ret;
1251 		bytes -= size;
1252 		from += size;
1253 		buffer_offset += size;
1254 	}
1255 	return 0;
1256 }
1257 
1258 static int binder_alloc_do_buffer_copy(struct binder_alloc *alloc,
1259 				       bool to_buffer,
1260 				       struct binder_buffer *buffer,
1261 				       binder_size_t buffer_offset,
1262 				       void *ptr,
1263 				       size_t bytes)
1264 {
1265 	/* All copies must be 32-bit aligned and 32-bit size */
1266 	if (!check_buffer(alloc, buffer, buffer_offset, bytes))
1267 		return -EINVAL;
1268 
1269 	while (bytes) {
1270 		unsigned long size;
1271 		struct page *page;
1272 		pgoff_t pgoff;
1273 
1274 		page = binder_alloc_get_page(alloc, buffer,
1275 					     buffer_offset, &pgoff);
1276 		size = min_t(size_t, bytes, PAGE_SIZE - pgoff);
1277 		if (to_buffer)
1278 			memcpy_to_page(page, pgoff, ptr, size);
1279 		else
1280 			memcpy_from_page(ptr, page, pgoff, size);
1281 		bytes -= size;
1282 		pgoff = 0;
1283 		ptr = ptr + size;
1284 		buffer_offset += size;
1285 	}
1286 	return 0;
1287 }
1288 
1289 int binder_alloc_copy_to_buffer(struct binder_alloc *alloc,
1290 				struct binder_buffer *buffer,
1291 				binder_size_t buffer_offset,
1292 				void *src,
1293 				size_t bytes)
1294 {
1295 	return binder_alloc_do_buffer_copy(alloc, true, buffer, buffer_offset,
1296 					   src, bytes);
1297 }
1298 
1299 int binder_alloc_copy_from_buffer(struct binder_alloc *alloc,
1300 				  void *dest,
1301 				  struct binder_buffer *buffer,
1302 				  binder_size_t buffer_offset,
1303 				  size_t bytes)
1304 {
1305 	return binder_alloc_do_buffer_copy(alloc, false, buffer, buffer_offset,
1306 					   dest, bytes);
1307 }
1308