1*eafedbc7SAlice Ryhl // SPDX-License-Identifier: GPL-2.0 2*eafedbc7SAlice Ryhl 3*eafedbc7SAlice Ryhl // Copyright (C) 2025 Google LLC. 4*eafedbc7SAlice Ryhl 5*eafedbc7SAlice Ryhl use kernel::{ 6*eafedbc7SAlice Ryhl page::PAGE_SIZE, 7*eafedbc7SAlice Ryhl prelude::*, 8*eafedbc7SAlice Ryhl rbtree::{RBTree, RBTreeNode, RBTreeNodeReservation}, 9*eafedbc7SAlice Ryhl seq_file::SeqFile, 10*eafedbc7SAlice Ryhl seq_print, 11*eafedbc7SAlice Ryhl task::Pid, 12*eafedbc7SAlice Ryhl }; 13*eafedbc7SAlice Ryhl 14*eafedbc7SAlice Ryhl use crate::range_alloc::{DescriptorState, FreedRange, Range}; 15*eafedbc7SAlice Ryhl 16*eafedbc7SAlice Ryhl /// Keeps track of allocations in a process' mmap. 17*eafedbc7SAlice Ryhl /// 18*eafedbc7SAlice Ryhl /// Each process has an mmap where the data for incoming transactions will be placed. This struct 19*eafedbc7SAlice Ryhl /// keeps track of allocations made in the mmap. For each allocation, we store a descriptor that 20*eafedbc7SAlice Ryhl /// has metadata related to the allocation. We also keep track of available free space. 21*eafedbc7SAlice Ryhl pub(super) struct TreeRangeAllocator<T> { 22*eafedbc7SAlice Ryhl /// This collection contains descriptors for *both* ranges containing an allocation, *and* free 23*eafedbc7SAlice Ryhl /// ranges between allocations. The free ranges get merged, so there are never two free ranges 24*eafedbc7SAlice Ryhl /// next to each other. 25*eafedbc7SAlice Ryhl tree: RBTree<usize, Descriptor<T>>, 26*eafedbc7SAlice Ryhl /// Contains an entry for every free range in `self.tree`. This tree sorts the ranges by size, 27*eafedbc7SAlice Ryhl /// letting us look up the smallest range whose size is at least some lower bound. 28*eafedbc7SAlice Ryhl free_tree: RBTree<FreeKey, ()>, 29*eafedbc7SAlice Ryhl size: usize, 30*eafedbc7SAlice Ryhl free_oneway_space: usize, 31*eafedbc7SAlice Ryhl } 32*eafedbc7SAlice Ryhl 33*eafedbc7SAlice Ryhl impl<T> TreeRangeAllocator<T> { 34*eafedbc7SAlice Ryhl pub(crate) fn from_array( 35*eafedbc7SAlice Ryhl size: usize, 36*eafedbc7SAlice Ryhl ranges: &mut KVec<Range<T>>, 37*eafedbc7SAlice Ryhl alloc: &mut FromArrayAllocs<T>, 38*eafedbc7SAlice Ryhl ) -> Self { 39*eafedbc7SAlice Ryhl let mut tree = TreeRangeAllocator { 40*eafedbc7SAlice Ryhl tree: RBTree::new(), 41*eafedbc7SAlice Ryhl free_tree: RBTree::new(), 42*eafedbc7SAlice Ryhl size, 43*eafedbc7SAlice Ryhl free_oneway_space: size / 2, 44*eafedbc7SAlice Ryhl }; 45*eafedbc7SAlice Ryhl 46*eafedbc7SAlice Ryhl let mut free_offset = 0; 47*eafedbc7SAlice Ryhl for range in ranges.drain_all() { 48*eafedbc7SAlice Ryhl let free_size = range.offset - free_offset; 49*eafedbc7SAlice Ryhl if free_size > 0 { 50*eafedbc7SAlice Ryhl let free_node = alloc.free_tree.pop().unwrap(); 51*eafedbc7SAlice Ryhl tree.free_tree 52*eafedbc7SAlice Ryhl .insert(free_node.into_node((free_size, free_offset), ())); 53*eafedbc7SAlice Ryhl let tree_node = alloc.tree.pop().unwrap(); 54*eafedbc7SAlice Ryhl tree.tree.insert( 55*eafedbc7SAlice Ryhl tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size)), 56*eafedbc7SAlice Ryhl ); 57*eafedbc7SAlice Ryhl } 58*eafedbc7SAlice Ryhl free_offset = range.endpoint(); 59*eafedbc7SAlice Ryhl 60*eafedbc7SAlice Ryhl if range.state.is_oneway() { 61*eafedbc7SAlice Ryhl tree.free_oneway_space = tree.free_oneway_space.saturating_sub(range.size); 62*eafedbc7SAlice Ryhl } 63*eafedbc7SAlice Ryhl 64*eafedbc7SAlice Ryhl let free_res = alloc.free_tree.pop().unwrap(); 65*eafedbc7SAlice Ryhl let tree_node = alloc.tree.pop().unwrap(); 66*eafedbc7SAlice Ryhl let mut desc = Descriptor::new(range.offset, range.size); 67*eafedbc7SAlice Ryhl desc.state = Some((range.state, free_res)); 68*eafedbc7SAlice Ryhl tree.tree.insert(tree_node.into_node(range.offset, desc)); 69*eafedbc7SAlice Ryhl } 70*eafedbc7SAlice Ryhl 71*eafedbc7SAlice Ryhl // After the last range, we may need a free range. 72*eafedbc7SAlice Ryhl if free_offset < size { 73*eafedbc7SAlice Ryhl let free_size = size - free_offset; 74*eafedbc7SAlice Ryhl let free_node = alloc.free_tree.pop().unwrap(); 75*eafedbc7SAlice Ryhl tree.free_tree 76*eafedbc7SAlice Ryhl .insert(free_node.into_node((free_size, free_offset), ())); 77*eafedbc7SAlice Ryhl let tree_node = alloc.tree.pop().unwrap(); 78*eafedbc7SAlice Ryhl tree.tree 79*eafedbc7SAlice Ryhl .insert(tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size))); 80*eafedbc7SAlice Ryhl } 81*eafedbc7SAlice Ryhl 82*eafedbc7SAlice Ryhl tree 83*eafedbc7SAlice Ryhl } 84*eafedbc7SAlice Ryhl 85*eafedbc7SAlice Ryhl pub(crate) fn is_empty(&self) -> bool { 86*eafedbc7SAlice Ryhl let mut tree_iter = self.tree.values(); 87*eafedbc7SAlice Ryhl // There's always at least one range, because index zero is either the start of a free or 88*eafedbc7SAlice Ryhl // allocated range. 89*eafedbc7SAlice Ryhl let first_value = tree_iter.next().unwrap(); 90*eafedbc7SAlice Ryhl if tree_iter.next().is_some() { 91*eafedbc7SAlice Ryhl // There are never two free ranges next to each other, so if there is more than one 92*eafedbc7SAlice Ryhl // descriptor, then at least one of them must hold an allocated range. 93*eafedbc7SAlice Ryhl return false; 94*eafedbc7SAlice Ryhl } 95*eafedbc7SAlice Ryhl // There is only one descriptor. Return true if it is for a free range. 96*eafedbc7SAlice Ryhl first_value.state.is_none() 97*eafedbc7SAlice Ryhl } 98*eafedbc7SAlice Ryhl 99*eafedbc7SAlice Ryhl pub(crate) fn total_size(&self) -> usize { 100*eafedbc7SAlice Ryhl self.size 101*eafedbc7SAlice Ryhl } 102*eafedbc7SAlice Ryhl 103*eafedbc7SAlice Ryhl pub(crate) fn free_oneway_space(&self) -> usize { 104*eafedbc7SAlice Ryhl self.free_oneway_space 105*eafedbc7SAlice Ryhl } 106*eafedbc7SAlice Ryhl 107*eafedbc7SAlice Ryhl pub(crate) fn count_buffers(&self) -> usize { 108*eafedbc7SAlice Ryhl self.tree 109*eafedbc7SAlice Ryhl .values() 110*eafedbc7SAlice Ryhl .filter(|desc| desc.state.is_some()) 111*eafedbc7SAlice Ryhl .count() 112*eafedbc7SAlice Ryhl } 113*eafedbc7SAlice Ryhl 114*eafedbc7SAlice Ryhl pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> { 115*eafedbc7SAlice Ryhl for desc in self.tree.values() { 116*eafedbc7SAlice Ryhl let state = match &desc.state { 117*eafedbc7SAlice Ryhl Some(state) => &state.0, 118*eafedbc7SAlice Ryhl None => continue, 119*eafedbc7SAlice Ryhl }; 120*eafedbc7SAlice Ryhl seq_print!( 121*eafedbc7SAlice Ryhl m, 122*eafedbc7SAlice Ryhl " buffer: {} size {} pid {}", 123*eafedbc7SAlice Ryhl desc.offset, 124*eafedbc7SAlice Ryhl desc.size, 125*eafedbc7SAlice Ryhl state.pid(), 126*eafedbc7SAlice Ryhl ); 127*eafedbc7SAlice Ryhl if state.is_oneway() { 128*eafedbc7SAlice Ryhl seq_print!(m, " oneway"); 129*eafedbc7SAlice Ryhl } 130*eafedbc7SAlice Ryhl match state { 131*eafedbc7SAlice Ryhl DescriptorState::Reserved(_res) => { 132*eafedbc7SAlice Ryhl seq_print!(m, " reserved\n"); 133*eafedbc7SAlice Ryhl } 134*eafedbc7SAlice Ryhl DescriptorState::Allocated(_alloc) => { 135*eafedbc7SAlice Ryhl seq_print!(m, " allocated\n"); 136*eafedbc7SAlice Ryhl } 137*eafedbc7SAlice Ryhl } 138*eafedbc7SAlice Ryhl } 139*eafedbc7SAlice Ryhl Ok(()) 140*eafedbc7SAlice Ryhl } 141*eafedbc7SAlice Ryhl 142*eafedbc7SAlice Ryhl fn find_best_match(&mut self, size: usize) -> Option<&mut Descriptor<T>> { 143*eafedbc7SAlice Ryhl let free_cursor = self.free_tree.cursor_lower_bound(&(size, 0))?; 144*eafedbc7SAlice Ryhl let ((_, offset), ()) = free_cursor.current(); 145*eafedbc7SAlice Ryhl self.tree.get_mut(offset) 146*eafedbc7SAlice Ryhl } 147*eafedbc7SAlice Ryhl 148*eafedbc7SAlice Ryhl /// Try to reserve a new buffer, using the provided allocation if necessary. 149*eafedbc7SAlice Ryhl pub(crate) fn reserve_new( 150*eafedbc7SAlice Ryhl &mut self, 151*eafedbc7SAlice Ryhl debug_id: usize, 152*eafedbc7SAlice Ryhl size: usize, 153*eafedbc7SAlice Ryhl is_oneway: bool, 154*eafedbc7SAlice Ryhl pid: Pid, 155*eafedbc7SAlice Ryhl alloc: ReserveNewTreeAlloc<T>, 156*eafedbc7SAlice Ryhl ) -> Result<(usize, bool)> { 157*eafedbc7SAlice Ryhl // Compute new value of free_oneway_space, which is set only on success. 158*eafedbc7SAlice Ryhl let new_oneway_space = if is_oneway { 159*eafedbc7SAlice Ryhl match self.free_oneway_space.checked_sub(size) { 160*eafedbc7SAlice Ryhl Some(new_oneway_space) => new_oneway_space, 161*eafedbc7SAlice Ryhl None => return Err(ENOSPC), 162*eafedbc7SAlice Ryhl } 163*eafedbc7SAlice Ryhl } else { 164*eafedbc7SAlice Ryhl self.free_oneway_space 165*eafedbc7SAlice Ryhl }; 166*eafedbc7SAlice Ryhl 167*eafedbc7SAlice Ryhl // Start detecting spammers once we have less than 20% 168*eafedbc7SAlice Ryhl // of async space left (which is less than 10% of total 169*eafedbc7SAlice Ryhl // buffer size). 170*eafedbc7SAlice Ryhl // 171*eafedbc7SAlice Ryhl // (This will short-circut, so `low_oneway_space` is 172*eafedbc7SAlice Ryhl // only called when necessary.) 173*eafedbc7SAlice Ryhl let oneway_spam_detected = 174*eafedbc7SAlice Ryhl is_oneway && new_oneway_space < self.size / 10 && self.low_oneway_space(pid); 175*eafedbc7SAlice Ryhl 176*eafedbc7SAlice Ryhl let (found_size, found_off, tree_node, free_tree_node) = match self.find_best_match(size) { 177*eafedbc7SAlice Ryhl None => { 178*eafedbc7SAlice Ryhl pr_warn!("ENOSPC from range_alloc.reserve_new - size: {}", size); 179*eafedbc7SAlice Ryhl return Err(ENOSPC); 180*eafedbc7SAlice Ryhl } 181*eafedbc7SAlice Ryhl Some(desc) => { 182*eafedbc7SAlice Ryhl let found_size = desc.size; 183*eafedbc7SAlice Ryhl let found_offset = desc.offset; 184*eafedbc7SAlice Ryhl 185*eafedbc7SAlice Ryhl // In case we need to break up the descriptor 186*eafedbc7SAlice Ryhl let new_desc = Descriptor::new(found_offset + size, found_size - size); 187*eafedbc7SAlice Ryhl let (tree_node, free_tree_node, desc_node_res) = alloc.initialize(new_desc); 188*eafedbc7SAlice Ryhl 189*eafedbc7SAlice Ryhl desc.state = Some(( 190*eafedbc7SAlice Ryhl DescriptorState::new(is_oneway, debug_id, pid), 191*eafedbc7SAlice Ryhl desc_node_res, 192*eafedbc7SAlice Ryhl )); 193*eafedbc7SAlice Ryhl desc.size = size; 194*eafedbc7SAlice Ryhl 195*eafedbc7SAlice Ryhl (found_size, found_offset, tree_node, free_tree_node) 196*eafedbc7SAlice Ryhl } 197*eafedbc7SAlice Ryhl }; 198*eafedbc7SAlice Ryhl self.free_oneway_space = new_oneway_space; 199*eafedbc7SAlice Ryhl self.free_tree.remove(&(found_size, found_off)); 200*eafedbc7SAlice Ryhl 201*eafedbc7SAlice Ryhl if found_size != size { 202*eafedbc7SAlice Ryhl self.tree.insert(tree_node); 203*eafedbc7SAlice Ryhl self.free_tree.insert(free_tree_node); 204*eafedbc7SAlice Ryhl } 205*eafedbc7SAlice Ryhl 206*eafedbc7SAlice Ryhl Ok((found_off, oneway_spam_detected)) 207*eafedbc7SAlice Ryhl } 208*eafedbc7SAlice Ryhl 209*eafedbc7SAlice Ryhl pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> { 210*eafedbc7SAlice Ryhl let mut cursor = self.tree.cursor_lower_bound(&offset).ok_or_else(|| { 211*eafedbc7SAlice Ryhl pr_warn!( 212*eafedbc7SAlice Ryhl "EINVAL from range_alloc.reservation_abort - offset: {}", 213*eafedbc7SAlice Ryhl offset 214*eafedbc7SAlice Ryhl ); 215*eafedbc7SAlice Ryhl EINVAL 216*eafedbc7SAlice Ryhl })?; 217*eafedbc7SAlice Ryhl 218*eafedbc7SAlice Ryhl let (_, desc) = cursor.current_mut(); 219*eafedbc7SAlice Ryhl 220*eafedbc7SAlice Ryhl if desc.offset != offset { 221*eafedbc7SAlice Ryhl pr_warn!( 222*eafedbc7SAlice Ryhl "EINVAL from range_alloc.reservation_abort - offset: {}", 223*eafedbc7SAlice Ryhl offset 224*eafedbc7SAlice Ryhl ); 225*eafedbc7SAlice Ryhl return Err(EINVAL); 226*eafedbc7SAlice Ryhl } 227*eafedbc7SAlice Ryhl 228*eafedbc7SAlice Ryhl let (reservation, free_node_res) = desc.try_change_state(|state| match state { 229*eafedbc7SAlice Ryhl Some((DescriptorState::Reserved(reservation), free_node_res)) => { 230*eafedbc7SAlice Ryhl (None, Ok((reservation, free_node_res))) 231*eafedbc7SAlice Ryhl } 232*eafedbc7SAlice Ryhl None => { 233*eafedbc7SAlice Ryhl pr_warn!( 234*eafedbc7SAlice Ryhl "EINVAL from range_alloc.reservation_abort - offset: {}", 235*eafedbc7SAlice Ryhl offset 236*eafedbc7SAlice Ryhl ); 237*eafedbc7SAlice Ryhl (None, Err(EINVAL)) 238*eafedbc7SAlice Ryhl } 239*eafedbc7SAlice Ryhl allocated => { 240*eafedbc7SAlice Ryhl pr_warn!( 241*eafedbc7SAlice Ryhl "EPERM from range_alloc.reservation_abort - offset: {}", 242*eafedbc7SAlice Ryhl offset 243*eafedbc7SAlice Ryhl ); 244*eafedbc7SAlice Ryhl (allocated, Err(EPERM)) 245*eafedbc7SAlice Ryhl } 246*eafedbc7SAlice Ryhl })?; 247*eafedbc7SAlice Ryhl 248*eafedbc7SAlice Ryhl let mut size = desc.size; 249*eafedbc7SAlice Ryhl let mut offset = desc.offset; 250*eafedbc7SAlice Ryhl let free_oneway_space_add = if reservation.is_oneway { size } else { 0 }; 251*eafedbc7SAlice Ryhl 252*eafedbc7SAlice Ryhl self.free_oneway_space += free_oneway_space_add; 253*eafedbc7SAlice Ryhl 254*eafedbc7SAlice Ryhl let mut freed_range = FreedRange::interior_pages(offset, size); 255*eafedbc7SAlice Ryhl // Compute how large the next free region needs to be to include one more page in 256*eafedbc7SAlice Ryhl // the newly freed range. 257*eafedbc7SAlice Ryhl let add_next_page_needed = match (offset + size) % PAGE_SIZE { 258*eafedbc7SAlice Ryhl 0 => usize::MAX, 259*eafedbc7SAlice Ryhl unalign => PAGE_SIZE - unalign, 260*eafedbc7SAlice Ryhl }; 261*eafedbc7SAlice Ryhl // Compute how large the previous free region needs to be to include one more page 262*eafedbc7SAlice Ryhl // in the newly freed range. 263*eafedbc7SAlice Ryhl let add_prev_page_needed = match offset % PAGE_SIZE { 264*eafedbc7SAlice Ryhl 0 => usize::MAX, 265*eafedbc7SAlice Ryhl unalign => unalign, 266*eafedbc7SAlice Ryhl }; 267*eafedbc7SAlice Ryhl 268*eafedbc7SAlice Ryhl // Merge next into current if next is free 269*eafedbc7SAlice Ryhl let remove_next = match cursor.peek_next() { 270*eafedbc7SAlice Ryhl Some((_, next)) if next.state.is_none() => { 271*eafedbc7SAlice Ryhl if next.size >= add_next_page_needed { 272*eafedbc7SAlice Ryhl freed_range.end_page_idx += 1; 273*eafedbc7SAlice Ryhl } 274*eafedbc7SAlice Ryhl self.free_tree.remove(&(next.size, next.offset)); 275*eafedbc7SAlice Ryhl size += next.size; 276*eafedbc7SAlice Ryhl true 277*eafedbc7SAlice Ryhl } 278*eafedbc7SAlice Ryhl _ => false, 279*eafedbc7SAlice Ryhl }; 280*eafedbc7SAlice Ryhl 281*eafedbc7SAlice Ryhl if remove_next { 282*eafedbc7SAlice Ryhl let (_, desc) = cursor.current_mut(); 283*eafedbc7SAlice Ryhl desc.size = size; 284*eafedbc7SAlice Ryhl cursor.remove_next(); 285*eafedbc7SAlice Ryhl } 286*eafedbc7SAlice Ryhl 287*eafedbc7SAlice Ryhl // Merge current into prev if prev is free 288*eafedbc7SAlice Ryhl match cursor.peek_prev_mut() { 289*eafedbc7SAlice Ryhl Some((_, prev)) if prev.state.is_none() => { 290*eafedbc7SAlice Ryhl if prev.size >= add_prev_page_needed { 291*eafedbc7SAlice Ryhl freed_range.start_page_idx -= 1; 292*eafedbc7SAlice Ryhl } 293*eafedbc7SAlice Ryhl // merge previous with current, remove current 294*eafedbc7SAlice Ryhl self.free_tree.remove(&(prev.size, prev.offset)); 295*eafedbc7SAlice Ryhl offset = prev.offset; 296*eafedbc7SAlice Ryhl size += prev.size; 297*eafedbc7SAlice Ryhl prev.size = size; 298*eafedbc7SAlice Ryhl cursor.remove_current(); 299*eafedbc7SAlice Ryhl } 300*eafedbc7SAlice Ryhl _ => {} 301*eafedbc7SAlice Ryhl }; 302*eafedbc7SAlice Ryhl 303*eafedbc7SAlice Ryhl self.free_tree 304*eafedbc7SAlice Ryhl .insert(free_node_res.into_node((size, offset), ())); 305*eafedbc7SAlice Ryhl 306*eafedbc7SAlice Ryhl Ok(freed_range) 307*eafedbc7SAlice Ryhl } 308*eafedbc7SAlice Ryhl 309*eafedbc7SAlice Ryhl pub(crate) fn reservation_commit(&mut self, offset: usize, data: &mut Option<T>) -> Result { 310*eafedbc7SAlice Ryhl let desc = self.tree.get_mut(&offset).ok_or(ENOENT)?; 311*eafedbc7SAlice Ryhl 312*eafedbc7SAlice Ryhl desc.try_change_state(|state| match state { 313*eafedbc7SAlice Ryhl Some((DescriptorState::Reserved(reservation), free_node_res)) => ( 314*eafedbc7SAlice Ryhl Some(( 315*eafedbc7SAlice Ryhl DescriptorState::Allocated(reservation.allocate(data.take())), 316*eafedbc7SAlice Ryhl free_node_res, 317*eafedbc7SAlice Ryhl )), 318*eafedbc7SAlice Ryhl Ok(()), 319*eafedbc7SAlice Ryhl ), 320*eafedbc7SAlice Ryhl other => (other, Err(ENOENT)), 321*eafedbc7SAlice Ryhl }) 322*eafedbc7SAlice Ryhl } 323*eafedbc7SAlice Ryhl 324*eafedbc7SAlice Ryhl /// Takes an entry at the given offset from [`DescriptorState::Allocated`] to 325*eafedbc7SAlice Ryhl /// [`DescriptorState::Reserved`]. 326*eafedbc7SAlice Ryhl /// 327*eafedbc7SAlice Ryhl /// Returns the size of the existing entry and the data associated with it. 328*eafedbc7SAlice Ryhl pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> { 329*eafedbc7SAlice Ryhl let desc = self.tree.get_mut(&offset).ok_or_else(|| { 330*eafedbc7SAlice Ryhl pr_warn!( 331*eafedbc7SAlice Ryhl "ENOENT from range_alloc.reserve_existing - offset: {}", 332*eafedbc7SAlice Ryhl offset 333*eafedbc7SAlice Ryhl ); 334*eafedbc7SAlice Ryhl ENOENT 335*eafedbc7SAlice Ryhl })?; 336*eafedbc7SAlice Ryhl 337*eafedbc7SAlice Ryhl let (debug_id, data) = desc.try_change_state(|state| match state { 338*eafedbc7SAlice Ryhl Some((DescriptorState::Allocated(allocation), free_node_res)) => { 339*eafedbc7SAlice Ryhl let (reservation, data) = allocation.deallocate(); 340*eafedbc7SAlice Ryhl let debug_id = reservation.debug_id; 341*eafedbc7SAlice Ryhl ( 342*eafedbc7SAlice Ryhl Some((DescriptorState::Reserved(reservation), free_node_res)), 343*eafedbc7SAlice Ryhl Ok((debug_id, data)), 344*eafedbc7SAlice Ryhl ) 345*eafedbc7SAlice Ryhl } 346*eafedbc7SAlice Ryhl other => { 347*eafedbc7SAlice Ryhl pr_warn!( 348*eafedbc7SAlice Ryhl "ENOENT from range_alloc.reserve_existing - offset: {}", 349*eafedbc7SAlice Ryhl offset 350*eafedbc7SAlice Ryhl ); 351*eafedbc7SAlice Ryhl (other, Err(ENOENT)) 352*eafedbc7SAlice Ryhl } 353*eafedbc7SAlice Ryhl })?; 354*eafedbc7SAlice Ryhl 355*eafedbc7SAlice Ryhl Ok((desc.size, debug_id, data)) 356*eafedbc7SAlice Ryhl } 357*eafedbc7SAlice Ryhl 358*eafedbc7SAlice Ryhl /// Call the provided callback at every allocated region. 359*eafedbc7SAlice Ryhl /// 360*eafedbc7SAlice Ryhl /// This destroys the range allocator. Used only during shutdown. 361*eafedbc7SAlice Ryhl pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) { 362*eafedbc7SAlice Ryhl for (_, desc) in self.tree.iter_mut() { 363*eafedbc7SAlice Ryhl if let Some((DescriptorState::Allocated(allocation), _)) = &mut desc.state { 364*eafedbc7SAlice Ryhl callback( 365*eafedbc7SAlice Ryhl desc.offset, 366*eafedbc7SAlice Ryhl desc.size, 367*eafedbc7SAlice Ryhl allocation.debug_id(), 368*eafedbc7SAlice Ryhl allocation.take(), 369*eafedbc7SAlice Ryhl ); 370*eafedbc7SAlice Ryhl } 371*eafedbc7SAlice Ryhl } 372*eafedbc7SAlice Ryhl } 373*eafedbc7SAlice Ryhl 374*eafedbc7SAlice Ryhl /// Find the amount and size of buffers allocated by the current caller. 375*eafedbc7SAlice Ryhl /// 376*eafedbc7SAlice Ryhl /// The idea is that once we cross the threshold, whoever is responsible 377*eafedbc7SAlice Ryhl /// for the low async space is likely to try to send another async transaction, 378*eafedbc7SAlice Ryhl /// and at some point we'll catch them in the act. This is more efficient 379*eafedbc7SAlice Ryhl /// than keeping a map per pid. 380*eafedbc7SAlice Ryhl fn low_oneway_space(&self, calling_pid: Pid) -> bool { 381*eafedbc7SAlice Ryhl let mut total_alloc_size = 0; 382*eafedbc7SAlice Ryhl let mut num_buffers = 0; 383*eafedbc7SAlice Ryhl for (_, desc) in self.tree.iter() { 384*eafedbc7SAlice Ryhl if let Some((state, _)) = &desc.state { 385*eafedbc7SAlice Ryhl if state.is_oneway() && state.pid() == calling_pid { 386*eafedbc7SAlice Ryhl total_alloc_size += desc.size; 387*eafedbc7SAlice Ryhl num_buffers += 1; 388*eafedbc7SAlice Ryhl } 389*eafedbc7SAlice Ryhl } 390*eafedbc7SAlice Ryhl } 391*eafedbc7SAlice Ryhl 392*eafedbc7SAlice Ryhl // Warn if this pid has more than 50 transactions, or more than 50% of 393*eafedbc7SAlice Ryhl // async space (which is 25% of total buffer size). Oneway spam is only 394*eafedbc7SAlice Ryhl // detected when the threshold is exceeded. 395*eafedbc7SAlice Ryhl num_buffers > 50 || total_alloc_size > self.size / 4 396*eafedbc7SAlice Ryhl } 397*eafedbc7SAlice Ryhl } 398*eafedbc7SAlice Ryhl 399*eafedbc7SAlice Ryhl type TreeDescriptorState<T> = (DescriptorState<T>, FreeNodeRes); 400*eafedbc7SAlice Ryhl struct Descriptor<T> { 401*eafedbc7SAlice Ryhl size: usize, 402*eafedbc7SAlice Ryhl offset: usize, 403*eafedbc7SAlice Ryhl state: Option<TreeDescriptorState<T>>, 404*eafedbc7SAlice Ryhl } 405*eafedbc7SAlice Ryhl 406*eafedbc7SAlice Ryhl impl<T> Descriptor<T> { 407*eafedbc7SAlice Ryhl fn new(offset: usize, size: usize) -> Self { 408*eafedbc7SAlice Ryhl Self { 409*eafedbc7SAlice Ryhl size, 410*eafedbc7SAlice Ryhl offset, 411*eafedbc7SAlice Ryhl state: None, 412*eafedbc7SAlice Ryhl } 413*eafedbc7SAlice Ryhl } 414*eafedbc7SAlice Ryhl 415*eafedbc7SAlice Ryhl fn try_change_state<F, Data>(&mut self, f: F) -> Result<Data> 416*eafedbc7SAlice Ryhl where 417*eafedbc7SAlice Ryhl F: FnOnce(Option<TreeDescriptorState<T>>) -> (Option<TreeDescriptorState<T>>, Result<Data>), 418*eafedbc7SAlice Ryhl { 419*eafedbc7SAlice Ryhl let (new_state, result) = f(self.state.take()); 420*eafedbc7SAlice Ryhl self.state = new_state; 421*eafedbc7SAlice Ryhl result 422*eafedbc7SAlice Ryhl } 423*eafedbc7SAlice Ryhl } 424*eafedbc7SAlice Ryhl 425*eafedbc7SAlice Ryhl // (Descriptor.size, Descriptor.offset) 426*eafedbc7SAlice Ryhl type FreeKey = (usize, usize); 427*eafedbc7SAlice Ryhl type FreeNodeRes = RBTreeNodeReservation<FreeKey, ()>; 428*eafedbc7SAlice Ryhl 429*eafedbc7SAlice Ryhl /// An allocation for use by `reserve_new`. 430*eafedbc7SAlice Ryhl pub(crate) struct ReserveNewTreeAlloc<T> { 431*eafedbc7SAlice Ryhl tree_node_res: RBTreeNodeReservation<usize, Descriptor<T>>, 432*eafedbc7SAlice Ryhl free_tree_node_res: FreeNodeRes, 433*eafedbc7SAlice Ryhl desc_node_res: FreeNodeRes, 434*eafedbc7SAlice Ryhl } 435*eafedbc7SAlice Ryhl 436*eafedbc7SAlice Ryhl impl<T> ReserveNewTreeAlloc<T> { 437*eafedbc7SAlice Ryhl pub(crate) fn try_new() -> Result<Self> { 438*eafedbc7SAlice Ryhl let tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?; 439*eafedbc7SAlice Ryhl let free_tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?; 440*eafedbc7SAlice Ryhl let desc_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?; 441*eafedbc7SAlice Ryhl Ok(Self { 442*eafedbc7SAlice Ryhl tree_node_res, 443*eafedbc7SAlice Ryhl free_tree_node_res, 444*eafedbc7SAlice Ryhl desc_node_res, 445*eafedbc7SAlice Ryhl }) 446*eafedbc7SAlice Ryhl } 447*eafedbc7SAlice Ryhl 448*eafedbc7SAlice Ryhl fn initialize( 449*eafedbc7SAlice Ryhl self, 450*eafedbc7SAlice Ryhl desc: Descriptor<T>, 451*eafedbc7SAlice Ryhl ) -> ( 452*eafedbc7SAlice Ryhl RBTreeNode<usize, Descriptor<T>>, 453*eafedbc7SAlice Ryhl RBTreeNode<FreeKey, ()>, 454*eafedbc7SAlice Ryhl FreeNodeRes, 455*eafedbc7SAlice Ryhl ) { 456*eafedbc7SAlice Ryhl let size = desc.size; 457*eafedbc7SAlice Ryhl let offset = desc.offset; 458*eafedbc7SAlice Ryhl ( 459*eafedbc7SAlice Ryhl self.tree_node_res.into_node(offset, desc), 460*eafedbc7SAlice Ryhl self.free_tree_node_res.into_node((size, offset), ()), 461*eafedbc7SAlice Ryhl self.desc_node_res, 462*eafedbc7SAlice Ryhl ) 463*eafedbc7SAlice Ryhl } 464*eafedbc7SAlice Ryhl } 465*eafedbc7SAlice Ryhl 466*eafedbc7SAlice Ryhl /// An allocation for creating a tree from an `ArrayRangeAllocator`. 467*eafedbc7SAlice Ryhl pub(crate) struct FromArrayAllocs<T> { 468*eafedbc7SAlice Ryhl tree: KVec<RBTreeNodeReservation<usize, Descriptor<T>>>, 469*eafedbc7SAlice Ryhl free_tree: KVec<RBTreeNodeReservation<FreeKey, ()>>, 470*eafedbc7SAlice Ryhl } 471*eafedbc7SAlice Ryhl 472*eafedbc7SAlice Ryhl impl<T> FromArrayAllocs<T> { 473*eafedbc7SAlice Ryhl pub(crate) fn try_new(len: usize) -> Result<Self> { 474*eafedbc7SAlice Ryhl let num_descriptors = 2 * len + 1; 475*eafedbc7SAlice Ryhl 476*eafedbc7SAlice Ryhl let mut tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?; 477*eafedbc7SAlice Ryhl for _ in 0..num_descriptors { 478*eafedbc7SAlice Ryhl tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?; 479*eafedbc7SAlice Ryhl } 480*eafedbc7SAlice Ryhl 481*eafedbc7SAlice Ryhl let mut free_tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?; 482*eafedbc7SAlice Ryhl for _ in 0..num_descriptors { 483*eafedbc7SAlice Ryhl free_tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?; 484*eafedbc7SAlice Ryhl } 485*eafedbc7SAlice Ryhl 486*eafedbc7SAlice Ryhl Ok(Self { tree, free_tree }) 487*eafedbc7SAlice Ryhl } 488*eafedbc7SAlice Ryhl } 489