xref: /linux/drivers/android/binder/range_alloc/tree.rs (revision 6093a688a07da07808f0122f9aa2a3eed250d853)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 // Copyright (C) 2025 Google LLC.
4 
5 use kernel::{
6     page::PAGE_SIZE,
7     prelude::*,
8     rbtree::{RBTree, RBTreeNode, RBTreeNodeReservation},
9     seq_file::SeqFile,
10     seq_print,
11     task::Pid,
12 };
13 
14 use crate::range_alloc::{DescriptorState, FreedRange, Range};
15 
16 /// Keeps track of allocations in a process' mmap.
17 ///
18 /// Each process has an mmap where the data for incoming transactions will be placed. This struct
19 /// keeps track of allocations made in the mmap. For each allocation, we store a descriptor that
20 /// has metadata related to the allocation. We also keep track of available free space.
21 pub(super) struct TreeRangeAllocator<T> {
22     /// This collection contains descriptors for *both* ranges containing an allocation, *and* free
23     /// ranges between allocations. The free ranges get merged, so there are never two free ranges
24     /// next to each other.
25     tree: RBTree<usize, Descriptor<T>>,
26     /// Contains an entry for every free range in `self.tree`. This tree sorts the ranges by size,
27     /// letting us look up the smallest range whose size is at least some lower bound.
28     free_tree: RBTree<FreeKey, ()>,
29     size: usize,
30     free_oneway_space: usize,
31 }
32 
33 impl<T> TreeRangeAllocator<T> {
34     pub(crate) fn from_array(
35         size: usize,
36         ranges: &mut KVec<Range<T>>,
37         alloc: &mut FromArrayAllocs<T>,
38     ) -> Self {
39         let mut tree = TreeRangeAllocator {
40             tree: RBTree::new(),
41             free_tree: RBTree::new(),
42             size,
43             free_oneway_space: size / 2,
44         };
45 
46         let mut free_offset = 0;
47         for range in ranges.drain_all() {
48             let free_size = range.offset - free_offset;
49             if free_size > 0 {
50                 let free_node = alloc.free_tree.pop().unwrap();
51                 tree.free_tree
52                     .insert(free_node.into_node((free_size, free_offset), ()));
53                 let tree_node = alloc.tree.pop().unwrap();
54                 tree.tree.insert(
55                     tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size)),
56                 );
57             }
58             free_offset = range.endpoint();
59 
60             if range.state.is_oneway() {
61                 tree.free_oneway_space = tree.free_oneway_space.saturating_sub(range.size);
62             }
63 
64             let free_res = alloc.free_tree.pop().unwrap();
65             let tree_node = alloc.tree.pop().unwrap();
66             let mut desc = Descriptor::new(range.offset, range.size);
67             desc.state = Some((range.state, free_res));
68             tree.tree.insert(tree_node.into_node(range.offset, desc));
69         }
70 
71         // After the last range, we may need a free range.
72         if free_offset < size {
73             let free_size = size - free_offset;
74             let free_node = alloc.free_tree.pop().unwrap();
75             tree.free_tree
76                 .insert(free_node.into_node((free_size, free_offset), ()));
77             let tree_node = alloc.tree.pop().unwrap();
78             tree.tree
79                 .insert(tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size)));
80         }
81 
82         tree
83     }
84 
85     pub(crate) fn is_empty(&self) -> bool {
86         let mut tree_iter = self.tree.values();
87         // There's always at least one range, because index zero is either the start of a free or
88         // allocated range.
89         let first_value = tree_iter.next().unwrap();
90         if tree_iter.next().is_some() {
91             // There are never two free ranges next to each other, so if there is more than one
92             // descriptor, then at least one of them must hold an allocated range.
93             return false;
94         }
95         // There is only one descriptor. Return true if it is for a free range.
96         first_value.state.is_none()
97     }
98 
99     pub(crate) fn total_size(&self) -> usize {
100         self.size
101     }
102 
103     pub(crate) fn free_oneway_space(&self) -> usize {
104         self.free_oneway_space
105     }
106 
107     pub(crate) fn count_buffers(&self) -> usize {
108         self.tree
109             .values()
110             .filter(|desc| desc.state.is_some())
111             .count()
112     }
113 
114     pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> {
115         for desc in self.tree.values() {
116             let state = match &desc.state {
117                 Some(state) => &state.0,
118                 None => continue,
119             };
120             seq_print!(
121                 m,
122                 "  buffer: {} size {} pid {}",
123                 desc.offset,
124                 desc.size,
125                 state.pid(),
126             );
127             if state.is_oneway() {
128                 seq_print!(m, " oneway");
129             }
130             match state {
131                 DescriptorState::Reserved(_res) => {
132                     seq_print!(m, " reserved\n");
133                 }
134                 DescriptorState::Allocated(_alloc) => {
135                     seq_print!(m, " allocated\n");
136                 }
137             }
138         }
139         Ok(())
140     }
141 
142     fn find_best_match(&mut self, size: usize) -> Option<&mut Descriptor<T>> {
143         let free_cursor = self.free_tree.cursor_lower_bound(&(size, 0))?;
144         let ((_, offset), ()) = free_cursor.current();
145         self.tree.get_mut(offset)
146     }
147 
148     /// Try to reserve a new buffer, using the provided allocation if necessary.
149     pub(crate) fn reserve_new(
150         &mut self,
151         debug_id: usize,
152         size: usize,
153         is_oneway: bool,
154         pid: Pid,
155         alloc: ReserveNewTreeAlloc<T>,
156     ) -> Result<(usize, bool)> {
157         // Compute new value of free_oneway_space, which is set only on success.
158         let new_oneway_space = if is_oneway {
159             match self.free_oneway_space.checked_sub(size) {
160                 Some(new_oneway_space) => new_oneway_space,
161                 None => return Err(ENOSPC),
162             }
163         } else {
164             self.free_oneway_space
165         };
166 
167         // Start detecting spammers once we have less than 20%
168         // of async space left (which is less than 10% of total
169         // buffer size).
170         //
171         // (This will short-circut, so `low_oneway_space` is
172         // only called when necessary.)
173         let oneway_spam_detected =
174             is_oneway && new_oneway_space < self.size / 10 && self.low_oneway_space(pid);
175 
176         let (found_size, found_off, tree_node, free_tree_node) = match self.find_best_match(size) {
177             None => {
178                 pr_warn!("ENOSPC from range_alloc.reserve_new - size: {}", size);
179                 return Err(ENOSPC);
180             }
181             Some(desc) => {
182                 let found_size = desc.size;
183                 let found_offset = desc.offset;
184 
185                 // In case we need to break up the descriptor
186                 let new_desc = Descriptor::new(found_offset + size, found_size - size);
187                 let (tree_node, free_tree_node, desc_node_res) = alloc.initialize(new_desc);
188 
189                 desc.state = Some((
190                     DescriptorState::new(is_oneway, debug_id, pid),
191                     desc_node_res,
192                 ));
193                 desc.size = size;
194 
195                 (found_size, found_offset, tree_node, free_tree_node)
196             }
197         };
198         self.free_oneway_space = new_oneway_space;
199         self.free_tree.remove(&(found_size, found_off));
200 
201         if found_size != size {
202             self.tree.insert(tree_node);
203             self.free_tree.insert(free_tree_node);
204         }
205 
206         Ok((found_off, oneway_spam_detected))
207     }
208 
209     pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> {
210         let mut cursor = self.tree.cursor_lower_bound(&offset).ok_or_else(|| {
211             pr_warn!(
212                 "EINVAL from range_alloc.reservation_abort - offset: {}",
213                 offset
214             );
215             EINVAL
216         })?;
217 
218         let (_, desc) = cursor.current_mut();
219 
220         if desc.offset != offset {
221             pr_warn!(
222                 "EINVAL from range_alloc.reservation_abort - offset: {}",
223                 offset
224             );
225             return Err(EINVAL);
226         }
227 
228         let (reservation, free_node_res) = desc.try_change_state(|state| match state {
229             Some((DescriptorState::Reserved(reservation), free_node_res)) => {
230                 (None, Ok((reservation, free_node_res)))
231             }
232             None => {
233                 pr_warn!(
234                     "EINVAL from range_alloc.reservation_abort - offset: {}",
235                     offset
236                 );
237                 (None, Err(EINVAL))
238             }
239             allocated => {
240                 pr_warn!(
241                     "EPERM from range_alloc.reservation_abort - offset: {}",
242                     offset
243                 );
244                 (allocated, Err(EPERM))
245             }
246         })?;
247 
248         let mut size = desc.size;
249         let mut offset = desc.offset;
250         let free_oneway_space_add = if reservation.is_oneway { size } else { 0 };
251 
252         self.free_oneway_space += free_oneway_space_add;
253 
254         let mut freed_range = FreedRange::interior_pages(offset, size);
255         // Compute how large the next free region needs to be to include one more page in
256         // the newly freed range.
257         let add_next_page_needed = match (offset + size) % PAGE_SIZE {
258             0 => usize::MAX,
259             unalign => PAGE_SIZE - unalign,
260         };
261         // Compute how large the previous free region needs to be to include one more page
262         // in the newly freed range.
263         let add_prev_page_needed = match offset % PAGE_SIZE {
264             0 => usize::MAX,
265             unalign => unalign,
266         };
267 
268         // Merge next into current if next is free
269         let remove_next = match cursor.peek_next() {
270             Some((_, next)) if next.state.is_none() => {
271                 if next.size >= add_next_page_needed {
272                     freed_range.end_page_idx += 1;
273                 }
274                 self.free_tree.remove(&(next.size, next.offset));
275                 size += next.size;
276                 true
277             }
278             _ => false,
279         };
280 
281         if remove_next {
282             let (_, desc) = cursor.current_mut();
283             desc.size = size;
284             cursor.remove_next();
285         }
286 
287         // Merge current into prev if prev is free
288         match cursor.peek_prev_mut() {
289             Some((_, prev)) if prev.state.is_none() => {
290                 if prev.size >= add_prev_page_needed {
291                     freed_range.start_page_idx -= 1;
292                 }
293                 // merge previous with current, remove current
294                 self.free_tree.remove(&(prev.size, prev.offset));
295                 offset = prev.offset;
296                 size += prev.size;
297                 prev.size = size;
298                 cursor.remove_current();
299             }
300             _ => {}
301         };
302 
303         self.free_tree
304             .insert(free_node_res.into_node((size, offset), ()));
305 
306         Ok(freed_range)
307     }
308 
309     pub(crate) fn reservation_commit(&mut self, offset: usize, data: &mut Option<T>) -> Result {
310         let desc = self.tree.get_mut(&offset).ok_or(ENOENT)?;
311 
312         desc.try_change_state(|state| match state {
313             Some((DescriptorState::Reserved(reservation), free_node_res)) => (
314                 Some((
315                     DescriptorState::Allocated(reservation.allocate(data.take())),
316                     free_node_res,
317                 )),
318                 Ok(()),
319             ),
320             other => (other, Err(ENOENT)),
321         })
322     }
323 
324     /// Takes an entry at the given offset from [`DescriptorState::Allocated`] to
325     /// [`DescriptorState::Reserved`].
326     ///
327     /// Returns the size of the existing entry and the data associated with it.
328     pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> {
329         let desc = self.tree.get_mut(&offset).ok_or_else(|| {
330             pr_warn!(
331                 "ENOENT from range_alloc.reserve_existing - offset: {}",
332                 offset
333             );
334             ENOENT
335         })?;
336 
337         let (debug_id, data) = desc.try_change_state(|state| match state {
338             Some((DescriptorState::Allocated(allocation), free_node_res)) => {
339                 let (reservation, data) = allocation.deallocate();
340                 let debug_id = reservation.debug_id;
341                 (
342                     Some((DescriptorState::Reserved(reservation), free_node_res)),
343                     Ok((debug_id, data)),
344                 )
345             }
346             other => {
347                 pr_warn!(
348                     "ENOENT from range_alloc.reserve_existing - offset: {}",
349                     offset
350                 );
351                 (other, Err(ENOENT))
352             }
353         })?;
354 
355         Ok((desc.size, debug_id, data))
356     }
357 
358     /// Call the provided callback at every allocated region.
359     ///
360     /// This destroys the range allocator. Used only during shutdown.
361     pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) {
362         for (_, desc) in self.tree.iter_mut() {
363             if let Some((DescriptorState::Allocated(allocation), _)) = &mut desc.state {
364                 callback(
365                     desc.offset,
366                     desc.size,
367                     allocation.debug_id(),
368                     allocation.take(),
369                 );
370             }
371         }
372     }
373 
374     /// Find the amount and size of buffers allocated by the current caller.
375     ///
376     /// The idea is that once we cross the threshold, whoever is responsible
377     /// for the low async space is likely to try to send another async transaction,
378     /// and at some point we'll catch them in the act.  This is more efficient
379     /// than keeping a map per pid.
380     fn low_oneway_space(&self, calling_pid: Pid) -> bool {
381         let mut total_alloc_size = 0;
382         let mut num_buffers = 0;
383         for (_, desc) in self.tree.iter() {
384             if let Some((state, _)) = &desc.state {
385                 if state.is_oneway() && state.pid() == calling_pid {
386                     total_alloc_size += desc.size;
387                     num_buffers += 1;
388                 }
389             }
390         }
391 
392         // Warn if this pid has more than 50 transactions, or more than 50% of
393         // async space (which is 25% of total buffer size). Oneway spam is only
394         // detected when the threshold is exceeded.
395         num_buffers > 50 || total_alloc_size > self.size / 4
396     }
397 }
398 
399 type TreeDescriptorState<T> = (DescriptorState<T>, FreeNodeRes);
400 struct Descriptor<T> {
401     size: usize,
402     offset: usize,
403     state: Option<TreeDescriptorState<T>>,
404 }
405 
406 impl<T> Descriptor<T> {
407     fn new(offset: usize, size: usize) -> Self {
408         Self {
409             size,
410             offset,
411             state: None,
412         }
413     }
414 
415     fn try_change_state<F, Data>(&mut self, f: F) -> Result<Data>
416     where
417         F: FnOnce(Option<TreeDescriptorState<T>>) -> (Option<TreeDescriptorState<T>>, Result<Data>),
418     {
419         let (new_state, result) = f(self.state.take());
420         self.state = new_state;
421         result
422     }
423 }
424 
425 // (Descriptor.size, Descriptor.offset)
426 type FreeKey = (usize, usize);
427 type FreeNodeRes = RBTreeNodeReservation<FreeKey, ()>;
428 
429 /// An allocation for use by `reserve_new`.
430 pub(crate) struct ReserveNewTreeAlloc<T> {
431     tree_node_res: RBTreeNodeReservation<usize, Descriptor<T>>,
432     free_tree_node_res: FreeNodeRes,
433     desc_node_res: FreeNodeRes,
434 }
435 
436 impl<T> ReserveNewTreeAlloc<T> {
437     pub(crate) fn try_new() -> Result<Self> {
438         let tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;
439         let free_tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;
440         let desc_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;
441         Ok(Self {
442             tree_node_res,
443             free_tree_node_res,
444             desc_node_res,
445         })
446     }
447 
448     fn initialize(
449         self,
450         desc: Descriptor<T>,
451     ) -> (
452         RBTreeNode<usize, Descriptor<T>>,
453         RBTreeNode<FreeKey, ()>,
454         FreeNodeRes,
455     ) {
456         let size = desc.size;
457         let offset = desc.offset;
458         (
459             self.tree_node_res.into_node(offset, desc),
460             self.free_tree_node_res.into_node((size, offset), ()),
461             self.desc_node_res,
462         )
463     }
464 }
465 
466 /// An allocation for creating a tree from an `ArrayRangeAllocator`.
467 pub(crate) struct FromArrayAllocs<T> {
468     tree: KVec<RBTreeNodeReservation<usize, Descriptor<T>>>,
469     free_tree: KVec<RBTreeNodeReservation<FreeKey, ()>>,
470 }
471 
472 impl<T> FromArrayAllocs<T> {
473     pub(crate) fn try_new(len: usize) -> Result<Self> {
474         let num_descriptors = 2 * len + 1;
475 
476         let mut tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?;
477         for _ in 0..num_descriptors {
478             tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?;
479         }
480 
481         let mut free_tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?;
482         for _ in 0..num_descriptors {
483             free_tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?;
484         }
485 
486         Ok(Self { tree, free_tree })
487     }
488 }
489