xref: /linux/drivers/android/binder/range_alloc/tree.rs (revision eafedbc7c050c44744fbdf80bdf3315e860b7513)
1*eafedbc7SAlice Ryhl // SPDX-License-Identifier: GPL-2.0
2*eafedbc7SAlice Ryhl 
3*eafedbc7SAlice Ryhl // Copyright (C) 2025 Google LLC.
4*eafedbc7SAlice Ryhl 
5*eafedbc7SAlice Ryhl use kernel::{
6*eafedbc7SAlice Ryhl     page::PAGE_SIZE,
7*eafedbc7SAlice Ryhl     prelude::*,
8*eafedbc7SAlice Ryhl     rbtree::{RBTree, RBTreeNode, RBTreeNodeReservation},
9*eafedbc7SAlice Ryhl     seq_file::SeqFile,
10*eafedbc7SAlice Ryhl     seq_print,
11*eafedbc7SAlice Ryhl     task::Pid,
12*eafedbc7SAlice Ryhl };
13*eafedbc7SAlice Ryhl 
14*eafedbc7SAlice Ryhl use crate::range_alloc::{DescriptorState, FreedRange, Range};
15*eafedbc7SAlice Ryhl 
16*eafedbc7SAlice Ryhl /// Keeps track of allocations in a process' mmap.
17*eafedbc7SAlice Ryhl ///
18*eafedbc7SAlice Ryhl /// Each process has an mmap where the data for incoming transactions will be placed. This struct
19*eafedbc7SAlice Ryhl /// keeps track of allocations made in the mmap. For each allocation, we store a descriptor that
20*eafedbc7SAlice Ryhl /// has metadata related to the allocation. We also keep track of available free space.
21*eafedbc7SAlice Ryhl pub(super) struct TreeRangeAllocator<T> {
22*eafedbc7SAlice Ryhl     /// This collection contains descriptors for *both* ranges containing an allocation, *and* free
23*eafedbc7SAlice Ryhl     /// ranges between allocations. The free ranges get merged, so there are never two free ranges
24*eafedbc7SAlice Ryhl     /// next to each other.
25*eafedbc7SAlice Ryhl     tree: RBTree<usize, Descriptor<T>>,
26*eafedbc7SAlice Ryhl     /// Contains an entry for every free range in `self.tree`. This tree sorts the ranges by size,
27*eafedbc7SAlice Ryhl     /// letting us look up the smallest range whose size is at least some lower bound.
28*eafedbc7SAlice Ryhl     free_tree: RBTree<FreeKey, ()>,
29*eafedbc7SAlice Ryhl     size: usize,
30*eafedbc7SAlice Ryhl     free_oneway_space: usize,
31*eafedbc7SAlice Ryhl }
32*eafedbc7SAlice Ryhl 
33*eafedbc7SAlice Ryhl impl<T> TreeRangeAllocator<T> {
34*eafedbc7SAlice Ryhl     pub(crate) fn from_array(
35*eafedbc7SAlice Ryhl         size: usize,
36*eafedbc7SAlice Ryhl         ranges: &mut KVec<Range<T>>,
37*eafedbc7SAlice Ryhl         alloc: &mut FromArrayAllocs<T>,
38*eafedbc7SAlice Ryhl     ) -> Self {
39*eafedbc7SAlice Ryhl         let mut tree = TreeRangeAllocator {
40*eafedbc7SAlice Ryhl             tree: RBTree::new(),
41*eafedbc7SAlice Ryhl             free_tree: RBTree::new(),
42*eafedbc7SAlice Ryhl             size,
43*eafedbc7SAlice Ryhl             free_oneway_space: size / 2,
44*eafedbc7SAlice Ryhl         };
45*eafedbc7SAlice Ryhl 
46*eafedbc7SAlice Ryhl         let mut free_offset = 0;
47*eafedbc7SAlice Ryhl         for range in ranges.drain_all() {
48*eafedbc7SAlice Ryhl             let free_size = range.offset - free_offset;
49*eafedbc7SAlice Ryhl             if free_size > 0 {
50*eafedbc7SAlice Ryhl                 let free_node = alloc.free_tree.pop().unwrap();
51*eafedbc7SAlice Ryhl                 tree.free_tree
52*eafedbc7SAlice Ryhl                     .insert(free_node.into_node((free_size, free_offset), ()));
53*eafedbc7SAlice Ryhl                 let tree_node = alloc.tree.pop().unwrap();
54*eafedbc7SAlice Ryhl                 tree.tree.insert(
55*eafedbc7SAlice Ryhl                     tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size)),
56*eafedbc7SAlice Ryhl                 );
57*eafedbc7SAlice Ryhl             }
58*eafedbc7SAlice Ryhl             free_offset = range.endpoint();
59*eafedbc7SAlice Ryhl 
60*eafedbc7SAlice Ryhl             if range.state.is_oneway() {
61*eafedbc7SAlice Ryhl                 tree.free_oneway_space = tree.free_oneway_space.saturating_sub(range.size);
62*eafedbc7SAlice Ryhl             }
63*eafedbc7SAlice Ryhl 
64*eafedbc7SAlice Ryhl             let free_res = alloc.free_tree.pop().unwrap();
65*eafedbc7SAlice Ryhl             let tree_node = alloc.tree.pop().unwrap();
66*eafedbc7SAlice Ryhl             let mut desc = Descriptor::new(range.offset, range.size);
67*eafedbc7SAlice Ryhl             desc.state = Some((range.state, free_res));
68*eafedbc7SAlice Ryhl             tree.tree.insert(tree_node.into_node(range.offset, desc));
69*eafedbc7SAlice Ryhl         }
70*eafedbc7SAlice Ryhl 
71*eafedbc7SAlice Ryhl         // After the last range, we may need a free range.
72*eafedbc7SAlice Ryhl         if free_offset < size {
73*eafedbc7SAlice Ryhl             let free_size = size - free_offset;
74*eafedbc7SAlice Ryhl             let free_node = alloc.free_tree.pop().unwrap();
75*eafedbc7SAlice Ryhl             tree.free_tree
76*eafedbc7SAlice Ryhl                 .insert(free_node.into_node((free_size, free_offset), ()));
77*eafedbc7SAlice Ryhl             let tree_node = alloc.tree.pop().unwrap();
78*eafedbc7SAlice Ryhl             tree.tree
79*eafedbc7SAlice Ryhl                 .insert(tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size)));
80*eafedbc7SAlice Ryhl         }
81*eafedbc7SAlice Ryhl 
82*eafedbc7SAlice Ryhl         tree
83*eafedbc7SAlice Ryhl     }
84*eafedbc7SAlice Ryhl 
85*eafedbc7SAlice Ryhl     pub(crate) fn is_empty(&self) -> bool {
86*eafedbc7SAlice Ryhl         let mut tree_iter = self.tree.values();
87*eafedbc7SAlice Ryhl         // There's always at least one range, because index zero is either the start of a free or
88*eafedbc7SAlice Ryhl         // allocated range.
89*eafedbc7SAlice Ryhl         let first_value = tree_iter.next().unwrap();
90*eafedbc7SAlice Ryhl         if tree_iter.next().is_some() {
91*eafedbc7SAlice Ryhl             // There are never two free ranges next to each other, so if there is more than one
92*eafedbc7SAlice Ryhl             // descriptor, then at least one of them must hold an allocated range.
93*eafedbc7SAlice Ryhl             return false;
94*eafedbc7SAlice Ryhl         }
95*eafedbc7SAlice Ryhl         // There is only one descriptor. Return true if it is for a free range.
96*eafedbc7SAlice Ryhl         first_value.state.is_none()
97*eafedbc7SAlice Ryhl     }
98*eafedbc7SAlice Ryhl 
99*eafedbc7SAlice Ryhl     pub(crate) fn total_size(&self) -> usize {
100*eafedbc7SAlice Ryhl         self.size
101*eafedbc7SAlice Ryhl     }
102*eafedbc7SAlice Ryhl 
103*eafedbc7SAlice Ryhl     pub(crate) fn free_oneway_space(&self) -> usize {
104*eafedbc7SAlice Ryhl         self.free_oneway_space
105*eafedbc7SAlice Ryhl     }
106*eafedbc7SAlice Ryhl 
107*eafedbc7SAlice Ryhl     pub(crate) fn count_buffers(&self) -> usize {
108*eafedbc7SAlice Ryhl         self.tree
109*eafedbc7SAlice Ryhl             .values()
110*eafedbc7SAlice Ryhl             .filter(|desc| desc.state.is_some())
111*eafedbc7SAlice Ryhl             .count()
112*eafedbc7SAlice Ryhl     }
113*eafedbc7SAlice Ryhl 
114*eafedbc7SAlice Ryhl     pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> {
115*eafedbc7SAlice Ryhl         for desc in self.tree.values() {
116*eafedbc7SAlice Ryhl             let state = match &desc.state {
117*eafedbc7SAlice Ryhl                 Some(state) => &state.0,
118*eafedbc7SAlice Ryhl                 None => continue,
119*eafedbc7SAlice Ryhl             };
120*eafedbc7SAlice Ryhl             seq_print!(
121*eafedbc7SAlice Ryhl                 m,
122*eafedbc7SAlice Ryhl                 "  buffer: {} size {} pid {}",
123*eafedbc7SAlice Ryhl                 desc.offset,
124*eafedbc7SAlice Ryhl                 desc.size,
125*eafedbc7SAlice Ryhl                 state.pid(),
126*eafedbc7SAlice Ryhl             );
127*eafedbc7SAlice Ryhl             if state.is_oneway() {
128*eafedbc7SAlice Ryhl                 seq_print!(m, " oneway");
129*eafedbc7SAlice Ryhl             }
130*eafedbc7SAlice Ryhl             match state {
131*eafedbc7SAlice Ryhl                 DescriptorState::Reserved(_res) => {
132*eafedbc7SAlice Ryhl                     seq_print!(m, " reserved\n");
133*eafedbc7SAlice Ryhl                 }
134*eafedbc7SAlice Ryhl                 DescriptorState::Allocated(_alloc) => {
135*eafedbc7SAlice Ryhl                     seq_print!(m, " allocated\n");
136*eafedbc7SAlice Ryhl                 }
137*eafedbc7SAlice Ryhl             }
138*eafedbc7SAlice Ryhl         }
139*eafedbc7SAlice Ryhl         Ok(())
140*eafedbc7SAlice Ryhl     }
141*eafedbc7SAlice Ryhl 
142*eafedbc7SAlice Ryhl     fn find_best_match(&mut self, size: usize) -> Option<&mut Descriptor<T>> {
143*eafedbc7SAlice Ryhl         let free_cursor = self.free_tree.cursor_lower_bound(&(size, 0))?;
144*eafedbc7SAlice Ryhl         let ((_, offset), ()) = free_cursor.current();
145*eafedbc7SAlice Ryhl         self.tree.get_mut(offset)
146*eafedbc7SAlice Ryhl     }
147*eafedbc7SAlice Ryhl 
148*eafedbc7SAlice Ryhl     /// Try to reserve a new buffer, using the provided allocation if necessary.
149*eafedbc7SAlice Ryhl     pub(crate) fn reserve_new(
150*eafedbc7SAlice Ryhl         &mut self,
151*eafedbc7SAlice Ryhl         debug_id: usize,
152*eafedbc7SAlice Ryhl         size: usize,
153*eafedbc7SAlice Ryhl         is_oneway: bool,
154*eafedbc7SAlice Ryhl         pid: Pid,
155*eafedbc7SAlice Ryhl         alloc: ReserveNewTreeAlloc<T>,
156*eafedbc7SAlice Ryhl     ) -> Result<(usize, bool)> {
157*eafedbc7SAlice Ryhl         // Compute new value of free_oneway_space, which is set only on success.
158*eafedbc7SAlice Ryhl         let new_oneway_space = if is_oneway {
159*eafedbc7SAlice Ryhl             match self.free_oneway_space.checked_sub(size) {
160*eafedbc7SAlice Ryhl                 Some(new_oneway_space) => new_oneway_space,
161*eafedbc7SAlice Ryhl                 None => return Err(ENOSPC),
162*eafedbc7SAlice Ryhl             }
163*eafedbc7SAlice Ryhl         } else {
164*eafedbc7SAlice Ryhl             self.free_oneway_space
165*eafedbc7SAlice Ryhl         };
166*eafedbc7SAlice Ryhl 
167*eafedbc7SAlice Ryhl         // Start detecting spammers once we have less than 20%
168*eafedbc7SAlice Ryhl         // of async space left (which is less than 10% of total
169*eafedbc7SAlice Ryhl         // buffer size).
170*eafedbc7SAlice Ryhl         //
171*eafedbc7SAlice Ryhl         // (This will short-circut, so `low_oneway_space` is
172*eafedbc7SAlice Ryhl         // only called when necessary.)
173*eafedbc7SAlice Ryhl         let oneway_spam_detected =
174*eafedbc7SAlice Ryhl             is_oneway && new_oneway_space < self.size / 10 && self.low_oneway_space(pid);
175*eafedbc7SAlice Ryhl 
176*eafedbc7SAlice Ryhl         let (found_size, found_off, tree_node, free_tree_node) = match self.find_best_match(size) {
177*eafedbc7SAlice Ryhl             None => {
178*eafedbc7SAlice Ryhl                 pr_warn!("ENOSPC from range_alloc.reserve_new - size: {}", size);
179*eafedbc7SAlice Ryhl                 return Err(ENOSPC);
180*eafedbc7SAlice Ryhl             }
181*eafedbc7SAlice Ryhl             Some(desc) => {
182*eafedbc7SAlice Ryhl                 let found_size = desc.size;
183*eafedbc7SAlice Ryhl                 let found_offset = desc.offset;
184*eafedbc7SAlice Ryhl 
185*eafedbc7SAlice Ryhl                 // In case we need to break up the descriptor
186*eafedbc7SAlice Ryhl                 let new_desc = Descriptor::new(found_offset + size, found_size - size);
187*eafedbc7SAlice Ryhl                 let (tree_node, free_tree_node, desc_node_res) = alloc.initialize(new_desc);
188*eafedbc7SAlice Ryhl 
189*eafedbc7SAlice Ryhl                 desc.state = Some((
190*eafedbc7SAlice Ryhl                     DescriptorState::new(is_oneway, debug_id, pid),
191*eafedbc7SAlice Ryhl                     desc_node_res,
192*eafedbc7SAlice Ryhl                 ));
193*eafedbc7SAlice Ryhl                 desc.size = size;
194*eafedbc7SAlice Ryhl 
195*eafedbc7SAlice Ryhl                 (found_size, found_offset, tree_node, free_tree_node)
196*eafedbc7SAlice Ryhl             }
197*eafedbc7SAlice Ryhl         };
198*eafedbc7SAlice Ryhl         self.free_oneway_space = new_oneway_space;
199*eafedbc7SAlice Ryhl         self.free_tree.remove(&(found_size, found_off));
200*eafedbc7SAlice Ryhl 
201*eafedbc7SAlice Ryhl         if found_size != size {
202*eafedbc7SAlice Ryhl             self.tree.insert(tree_node);
203*eafedbc7SAlice Ryhl             self.free_tree.insert(free_tree_node);
204*eafedbc7SAlice Ryhl         }
205*eafedbc7SAlice Ryhl 
206*eafedbc7SAlice Ryhl         Ok((found_off, oneway_spam_detected))
207*eafedbc7SAlice Ryhl     }
208*eafedbc7SAlice Ryhl 
209*eafedbc7SAlice Ryhl     pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> {
210*eafedbc7SAlice Ryhl         let mut cursor = self.tree.cursor_lower_bound(&offset).ok_or_else(|| {
211*eafedbc7SAlice Ryhl             pr_warn!(
212*eafedbc7SAlice Ryhl                 "EINVAL from range_alloc.reservation_abort - offset: {}",
213*eafedbc7SAlice Ryhl                 offset
214*eafedbc7SAlice Ryhl             );
215*eafedbc7SAlice Ryhl             EINVAL
216*eafedbc7SAlice Ryhl         })?;
217*eafedbc7SAlice Ryhl 
218*eafedbc7SAlice Ryhl         let (_, desc) = cursor.current_mut();
219*eafedbc7SAlice Ryhl 
220*eafedbc7SAlice Ryhl         if desc.offset != offset {
221*eafedbc7SAlice Ryhl             pr_warn!(
222*eafedbc7SAlice Ryhl                 "EINVAL from range_alloc.reservation_abort - offset: {}",
223*eafedbc7SAlice Ryhl                 offset
224*eafedbc7SAlice Ryhl             );
225*eafedbc7SAlice Ryhl             return Err(EINVAL);
226*eafedbc7SAlice Ryhl         }
227*eafedbc7SAlice Ryhl 
228*eafedbc7SAlice Ryhl         let (reservation, free_node_res) = desc.try_change_state(|state| match state {
229*eafedbc7SAlice Ryhl             Some((DescriptorState::Reserved(reservation), free_node_res)) => {
230*eafedbc7SAlice Ryhl                 (None, Ok((reservation, free_node_res)))
231*eafedbc7SAlice Ryhl             }
232*eafedbc7SAlice Ryhl             None => {
233*eafedbc7SAlice Ryhl                 pr_warn!(
234*eafedbc7SAlice Ryhl                     "EINVAL from range_alloc.reservation_abort - offset: {}",
235*eafedbc7SAlice Ryhl                     offset
236*eafedbc7SAlice Ryhl                 );
237*eafedbc7SAlice Ryhl                 (None, Err(EINVAL))
238*eafedbc7SAlice Ryhl             }
239*eafedbc7SAlice Ryhl             allocated => {
240*eafedbc7SAlice Ryhl                 pr_warn!(
241*eafedbc7SAlice Ryhl                     "EPERM from range_alloc.reservation_abort - offset: {}",
242*eafedbc7SAlice Ryhl                     offset
243*eafedbc7SAlice Ryhl                 );
244*eafedbc7SAlice Ryhl                 (allocated, Err(EPERM))
245*eafedbc7SAlice Ryhl             }
246*eafedbc7SAlice Ryhl         })?;
247*eafedbc7SAlice Ryhl 
248*eafedbc7SAlice Ryhl         let mut size = desc.size;
249*eafedbc7SAlice Ryhl         let mut offset = desc.offset;
250*eafedbc7SAlice Ryhl         let free_oneway_space_add = if reservation.is_oneway { size } else { 0 };
251*eafedbc7SAlice Ryhl 
252*eafedbc7SAlice Ryhl         self.free_oneway_space += free_oneway_space_add;
253*eafedbc7SAlice Ryhl 
254*eafedbc7SAlice Ryhl         let mut freed_range = FreedRange::interior_pages(offset, size);
255*eafedbc7SAlice Ryhl         // Compute how large the next free region needs to be to include one more page in
256*eafedbc7SAlice Ryhl         // the newly freed range.
257*eafedbc7SAlice Ryhl         let add_next_page_needed = match (offset + size) % PAGE_SIZE {
258*eafedbc7SAlice Ryhl             0 => usize::MAX,
259*eafedbc7SAlice Ryhl             unalign => PAGE_SIZE - unalign,
260*eafedbc7SAlice Ryhl         };
261*eafedbc7SAlice Ryhl         // Compute how large the previous free region needs to be to include one more page
262*eafedbc7SAlice Ryhl         // in the newly freed range.
263*eafedbc7SAlice Ryhl         let add_prev_page_needed = match offset % PAGE_SIZE {
264*eafedbc7SAlice Ryhl             0 => usize::MAX,
265*eafedbc7SAlice Ryhl             unalign => unalign,
266*eafedbc7SAlice Ryhl         };
267*eafedbc7SAlice Ryhl 
268*eafedbc7SAlice Ryhl         // Merge next into current if next is free
269*eafedbc7SAlice Ryhl         let remove_next = match cursor.peek_next() {
270*eafedbc7SAlice Ryhl             Some((_, next)) if next.state.is_none() => {
271*eafedbc7SAlice Ryhl                 if next.size >= add_next_page_needed {
272*eafedbc7SAlice Ryhl                     freed_range.end_page_idx += 1;
273*eafedbc7SAlice Ryhl                 }
274*eafedbc7SAlice Ryhl                 self.free_tree.remove(&(next.size, next.offset));
275*eafedbc7SAlice Ryhl                 size += next.size;
276*eafedbc7SAlice Ryhl                 true
277*eafedbc7SAlice Ryhl             }
278*eafedbc7SAlice Ryhl             _ => false,
279*eafedbc7SAlice Ryhl         };
280*eafedbc7SAlice Ryhl 
281*eafedbc7SAlice Ryhl         if remove_next {
282*eafedbc7SAlice Ryhl             let (_, desc) = cursor.current_mut();
283*eafedbc7SAlice Ryhl             desc.size = size;
284*eafedbc7SAlice Ryhl             cursor.remove_next();
285*eafedbc7SAlice Ryhl         }
286*eafedbc7SAlice Ryhl 
287*eafedbc7SAlice Ryhl         // Merge current into prev if prev is free
288*eafedbc7SAlice Ryhl         match cursor.peek_prev_mut() {
289*eafedbc7SAlice Ryhl             Some((_, prev)) if prev.state.is_none() => {
290*eafedbc7SAlice Ryhl                 if prev.size >= add_prev_page_needed {
291*eafedbc7SAlice Ryhl                     freed_range.start_page_idx -= 1;
292*eafedbc7SAlice Ryhl                 }
293*eafedbc7SAlice Ryhl                 // merge previous with current, remove current
294*eafedbc7SAlice Ryhl                 self.free_tree.remove(&(prev.size, prev.offset));
295*eafedbc7SAlice Ryhl                 offset = prev.offset;
296*eafedbc7SAlice Ryhl                 size += prev.size;
297*eafedbc7SAlice Ryhl                 prev.size = size;
298*eafedbc7SAlice Ryhl                 cursor.remove_current();
299*eafedbc7SAlice Ryhl             }
300*eafedbc7SAlice Ryhl             _ => {}
301*eafedbc7SAlice Ryhl         };
302*eafedbc7SAlice Ryhl 
303*eafedbc7SAlice Ryhl         self.free_tree
304*eafedbc7SAlice Ryhl             .insert(free_node_res.into_node((size, offset), ()));
305*eafedbc7SAlice Ryhl 
306*eafedbc7SAlice Ryhl         Ok(freed_range)
307*eafedbc7SAlice Ryhl     }
308*eafedbc7SAlice Ryhl 
309*eafedbc7SAlice Ryhl     pub(crate) fn reservation_commit(&mut self, offset: usize, data: &mut Option<T>) -> Result {
310*eafedbc7SAlice Ryhl         let desc = self.tree.get_mut(&offset).ok_or(ENOENT)?;
311*eafedbc7SAlice Ryhl 
312*eafedbc7SAlice Ryhl         desc.try_change_state(|state| match state {
313*eafedbc7SAlice Ryhl             Some((DescriptorState::Reserved(reservation), free_node_res)) => (
314*eafedbc7SAlice Ryhl                 Some((
315*eafedbc7SAlice Ryhl                     DescriptorState::Allocated(reservation.allocate(data.take())),
316*eafedbc7SAlice Ryhl                     free_node_res,
317*eafedbc7SAlice Ryhl                 )),
318*eafedbc7SAlice Ryhl                 Ok(()),
319*eafedbc7SAlice Ryhl             ),
320*eafedbc7SAlice Ryhl             other => (other, Err(ENOENT)),
321*eafedbc7SAlice Ryhl         })
322*eafedbc7SAlice Ryhl     }
323*eafedbc7SAlice Ryhl 
324*eafedbc7SAlice Ryhl     /// Takes an entry at the given offset from [`DescriptorState::Allocated`] to
325*eafedbc7SAlice Ryhl     /// [`DescriptorState::Reserved`].
326*eafedbc7SAlice Ryhl     ///
327*eafedbc7SAlice Ryhl     /// Returns the size of the existing entry and the data associated with it.
328*eafedbc7SAlice Ryhl     pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> {
329*eafedbc7SAlice Ryhl         let desc = self.tree.get_mut(&offset).ok_or_else(|| {
330*eafedbc7SAlice Ryhl             pr_warn!(
331*eafedbc7SAlice Ryhl                 "ENOENT from range_alloc.reserve_existing - offset: {}",
332*eafedbc7SAlice Ryhl                 offset
333*eafedbc7SAlice Ryhl             );
334*eafedbc7SAlice Ryhl             ENOENT
335*eafedbc7SAlice Ryhl         })?;
336*eafedbc7SAlice Ryhl 
337*eafedbc7SAlice Ryhl         let (debug_id, data) = desc.try_change_state(|state| match state {
338*eafedbc7SAlice Ryhl             Some((DescriptorState::Allocated(allocation), free_node_res)) => {
339*eafedbc7SAlice Ryhl                 let (reservation, data) = allocation.deallocate();
340*eafedbc7SAlice Ryhl                 let debug_id = reservation.debug_id;
341*eafedbc7SAlice Ryhl                 (
342*eafedbc7SAlice Ryhl                     Some((DescriptorState::Reserved(reservation), free_node_res)),
343*eafedbc7SAlice Ryhl                     Ok((debug_id, data)),
344*eafedbc7SAlice Ryhl                 )
345*eafedbc7SAlice Ryhl             }
346*eafedbc7SAlice Ryhl             other => {
347*eafedbc7SAlice Ryhl                 pr_warn!(
348*eafedbc7SAlice Ryhl                     "ENOENT from range_alloc.reserve_existing - offset: {}",
349*eafedbc7SAlice Ryhl                     offset
350*eafedbc7SAlice Ryhl                 );
351*eafedbc7SAlice Ryhl                 (other, Err(ENOENT))
352*eafedbc7SAlice Ryhl             }
353*eafedbc7SAlice Ryhl         })?;
354*eafedbc7SAlice Ryhl 
355*eafedbc7SAlice Ryhl         Ok((desc.size, debug_id, data))
356*eafedbc7SAlice Ryhl     }
357*eafedbc7SAlice Ryhl 
358*eafedbc7SAlice Ryhl     /// Call the provided callback at every allocated region.
359*eafedbc7SAlice Ryhl     ///
360*eafedbc7SAlice Ryhl     /// This destroys the range allocator. Used only during shutdown.
361*eafedbc7SAlice Ryhl     pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) {
362*eafedbc7SAlice Ryhl         for (_, desc) in self.tree.iter_mut() {
363*eafedbc7SAlice Ryhl             if let Some((DescriptorState::Allocated(allocation), _)) = &mut desc.state {
364*eafedbc7SAlice Ryhl                 callback(
365*eafedbc7SAlice Ryhl                     desc.offset,
366*eafedbc7SAlice Ryhl                     desc.size,
367*eafedbc7SAlice Ryhl                     allocation.debug_id(),
368*eafedbc7SAlice Ryhl                     allocation.take(),
369*eafedbc7SAlice Ryhl                 );
370*eafedbc7SAlice Ryhl             }
371*eafedbc7SAlice Ryhl         }
372*eafedbc7SAlice Ryhl     }
373*eafedbc7SAlice Ryhl 
374*eafedbc7SAlice Ryhl     /// Find the amount and size of buffers allocated by the current caller.
375*eafedbc7SAlice Ryhl     ///
376*eafedbc7SAlice Ryhl     /// The idea is that once we cross the threshold, whoever is responsible
377*eafedbc7SAlice Ryhl     /// for the low async space is likely to try to send another async transaction,
378*eafedbc7SAlice Ryhl     /// and at some point we'll catch them in the act.  This is more efficient
379*eafedbc7SAlice Ryhl     /// than keeping a map per pid.
380*eafedbc7SAlice Ryhl     fn low_oneway_space(&self, calling_pid: Pid) -> bool {
381*eafedbc7SAlice Ryhl         let mut total_alloc_size = 0;
382*eafedbc7SAlice Ryhl         let mut num_buffers = 0;
383*eafedbc7SAlice Ryhl         for (_, desc) in self.tree.iter() {
384*eafedbc7SAlice Ryhl             if let Some((state, _)) = &desc.state {
385*eafedbc7SAlice Ryhl                 if state.is_oneway() && state.pid() == calling_pid {
386*eafedbc7SAlice Ryhl                     total_alloc_size += desc.size;
387*eafedbc7SAlice Ryhl                     num_buffers += 1;
388*eafedbc7SAlice Ryhl                 }
389*eafedbc7SAlice Ryhl             }
390*eafedbc7SAlice Ryhl         }
391*eafedbc7SAlice Ryhl 
392*eafedbc7SAlice Ryhl         // Warn if this pid has more than 50 transactions, or more than 50% of
393*eafedbc7SAlice Ryhl         // async space (which is 25% of total buffer size). Oneway spam is only
394*eafedbc7SAlice Ryhl         // detected when the threshold is exceeded.
395*eafedbc7SAlice Ryhl         num_buffers > 50 || total_alloc_size > self.size / 4
396*eafedbc7SAlice Ryhl     }
397*eafedbc7SAlice Ryhl }
398*eafedbc7SAlice Ryhl 
399*eafedbc7SAlice Ryhl type TreeDescriptorState<T> = (DescriptorState<T>, FreeNodeRes);
400*eafedbc7SAlice Ryhl struct Descriptor<T> {
401*eafedbc7SAlice Ryhl     size: usize,
402*eafedbc7SAlice Ryhl     offset: usize,
403*eafedbc7SAlice Ryhl     state: Option<TreeDescriptorState<T>>,
404*eafedbc7SAlice Ryhl }
405*eafedbc7SAlice Ryhl 
406*eafedbc7SAlice Ryhl impl<T> Descriptor<T> {
407*eafedbc7SAlice Ryhl     fn new(offset: usize, size: usize) -> Self {
408*eafedbc7SAlice Ryhl         Self {
409*eafedbc7SAlice Ryhl             size,
410*eafedbc7SAlice Ryhl             offset,
411*eafedbc7SAlice Ryhl             state: None,
412*eafedbc7SAlice Ryhl         }
413*eafedbc7SAlice Ryhl     }
414*eafedbc7SAlice Ryhl 
415*eafedbc7SAlice Ryhl     fn try_change_state<F, Data>(&mut self, f: F) -> Result<Data>
416*eafedbc7SAlice Ryhl     where
417*eafedbc7SAlice Ryhl         F: FnOnce(Option<TreeDescriptorState<T>>) -> (Option<TreeDescriptorState<T>>, Result<Data>),
418*eafedbc7SAlice Ryhl     {
419*eafedbc7SAlice Ryhl         let (new_state, result) = f(self.state.take());
420*eafedbc7SAlice Ryhl         self.state = new_state;
421*eafedbc7SAlice Ryhl         result
422*eafedbc7SAlice Ryhl     }
423*eafedbc7SAlice Ryhl }
424*eafedbc7SAlice Ryhl 
425*eafedbc7SAlice Ryhl // (Descriptor.size, Descriptor.offset)
426*eafedbc7SAlice Ryhl type FreeKey = (usize, usize);
427*eafedbc7SAlice Ryhl type FreeNodeRes = RBTreeNodeReservation<FreeKey, ()>;
428*eafedbc7SAlice Ryhl 
429*eafedbc7SAlice Ryhl /// An allocation for use by `reserve_new`.
430*eafedbc7SAlice Ryhl pub(crate) struct ReserveNewTreeAlloc<T> {
431*eafedbc7SAlice Ryhl     tree_node_res: RBTreeNodeReservation<usize, Descriptor<T>>,
432*eafedbc7SAlice Ryhl     free_tree_node_res: FreeNodeRes,
433*eafedbc7SAlice Ryhl     desc_node_res: FreeNodeRes,
434*eafedbc7SAlice Ryhl }
435*eafedbc7SAlice Ryhl 
436*eafedbc7SAlice Ryhl impl<T> ReserveNewTreeAlloc<T> {
437*eafedbc7SAlice Ryhl     pub(crate) fn try_new() -> Result<Self> {
438*eafedbc7SAlice Ryhl         let tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;
439*eafedbc7SAlice Ryhl         let free_tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;
440*eafedbc7SAlice Ryhl         let desc_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;
441*eafedbc7SAlice Ryhl         Ok(Self {
442*eafedbc7SAlice Ryhl             tree_node_res,
443*eafedbc7SAlice Ryhl             free_tree_node_res,
444*eafedbc7SAlice Ryhl             desc_node_res,
445*eafedbc7SAlice Ryhl         })
446*eafedbc7SAlice Ryhl     }
447*eafedbc7SAlice Ryhl 
448*eafedbc7SAlice Ryhl     fn initialize(
449*eafedbc7SAlice Ryhl         self,
450*eafedbc7SAlice Ryhl         desc: Descriptor<T>,
451*eafedbc7SAlice Ryhl     ) -> (
452*eafedbc7SAlice Ryhl         RBTreeNode<usize, Descriptor<T>>,
453*eafedbc7SAlice Ryhl         RBTreeNode<FreeKey, ()>,
454*eafedbc7SAlice Ryhl         FreeNodeRes,
455*eafedbc7SAlice Ryhl     ) {
456*eafedbc7SAlice Ryhl         let size = desc.size;
457*eafedbc7SAlice Ryhl         let offset = desc.offset;
458*eafedbc7SAlice Ryhl         (
459*eafedbc7SAlice Ryhl             self.tree_node_res.into_node(offset, desc),
460*eafedbc7SAlice Ryhl             self.free_tree_node_res.into_node((size, offset), ()),
461*eafedbc7SAlice Ryhl             self.desc_node_res,
462*eafedbc7SAlice Ryhl         )
463*eafedbc7SAlice Ryhl     }
464*eafedbc7SAlice Ryhl }
465*eafedbc7SAlice Ryhl 
466*eafedbc7SAlice Ryhl /// An allocation for creating a tree from an `ArrayRangeAllocator`.
467*eafedbc7SAlice Ryhl pub(crate) struct FromArrayAllocs<T> {
468*eafedbc7SAlice Ryhl     tree: KVec<RBTreeNodeReservation<usize, Descriptor<T>>>,
469*eafedbc7SAlice Ryhl     free_tree: KVec<RBTreeNodeReservation<FreeKey, ()>>,
470*eafedbc7SAlice Ryhl }
471*eafedbc7SAlice Ryhl 
472*eafedbc7SAlice Ryhl impl<T> FromArrayAllocs<T> {
473*eafedbc7SAlice Ryhl     pub(crate) fn try_new(len: usize) -> Result<Self> {
474*eafedbc7SAlice Ryhl         let num_descriptors = 2 * len + 1;
475*eafedbc7SAlice Ryhl 
476*eafedbc7SAlice Ryhl         let mut tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?;
477*eafedbc7SAlice Ryhl         for _ in 0..num_descriptors {
478*eafedbc7SAlice Ryhl             tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?;
479*eafedbc7SAlice Ryhl         }
480*eafedbc7SAlice Ryhl 
481*eafedbc7SAlice Ryhl         let mut free_tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?;
482*eafedbc7SAlice Ryhl         for _ in 0..num_descriptors {
483*eafedbc7SAlice Ryhl             free_tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?;
484*eafedbc7SAlice Ryhl         }
485*eafedbc7SAlice Ryhl 
486*eafedbc7SAlice Ryhl         Ok(Self { tree, free_tree })
487*eafedbc7SAlice Ryhl     }
488*eafedbc7SAlice Ryhl }
489