1*eafedbc7SAlice Ryhl // SPDX-License-Identifier: GPL-2.0 2*eafedbc7SAlice Ryhl 3*eafedbc7SAlice Ryhl // Copyright (C) 2025 Google LLC. 4*eafedbc7SAlice Ryhl 5*eafedbc7SAlice Ryhl use kernel::{ 6*eafedbc7SAlice Ryhl page::{PAGE_MASK, PAGE_SIZE}, 7*eafedbc7SAlice Ryhl prelude::*, 8*eafedbc7SAlice Ryhl seq_file::SeqFile, 9*eafedbc7SAlice Ryhl seq_print, 10*eafedbc7SAlice Ryhl task::Pid, 11*eafedbc7SAlice Ryhl }; 12*eafedbc7SAlice Ryhl 13*eafedbc7SAlice Ryhl use crate::range_alloc::{DescriptorState, FreedRange, Range}; 14*eafedbc7SAlice Ryhl 15*eafedbc7SAlice Ryhl /// Keeps track of allocations in a process' mmap. 16*eafedbc7SAlice Ryhl /// 17*eafedbc7SAlice Ryhl /// Each process has an mmap where the data for incoming transactions will be placed. This struct 18*eafedbc7SAlice Ryhl /// keeps track of allocations made in the mmap. For each allocation, we store a descriptor that 19*eafedbc7SAlice Ryhl /// has metadata related to the allocation. We also keep track of available free space. 20*eafedbc7SAlice Ryhl pub(super) struct ArrayRangeAllocator<T> { 21*eafedbc7SAlice Ryhl /// This stores all ranges that are allocated. Unlike the tree based allocator, we do *not* 22*eafedbc7SAlice Ryhl /// store the free ranges. 23*eafedbc7SAlice Ryhl /// 24*eafedbc7SAlice Ryhl /// Sorted by offset. 25*eafedbc7SAlice Ryhl pub(super) ranges: KVec<Range<T>>, 26*eafedbc7SAlice Ryhl size: usize, 27*eafedbc7SAlice Ryhl free_oneway_space: usize, 28*eafedbc7SAlice Ryhl } 29*eafedbc7SAlice Ryhl 30*eafedbc7SAlice Ryhl struct FindEmptyRes { 31*eafedbc7SAlice Ryhl /// Which index in `ranges` should we insert the new range at? 32*eafedbc7SAlice Ryhl /// 33*eafedbc7SAlice Ryhl /// Inserting the new range at this index keeps `ranges` sorted. 34*eafedbc7SAlice Ryhl insert_at_idx: usize, 35*eafedbc7SAlice Ryhl /// Which offset should we insert the new range at? 36*eafedbc7SAlice Ryhl insert_at_offset: usize, 37*eafedbc7SAlice Ryhl } 38*eafedbc7SAlice Ryhl 39*eafedbc7SAlice Ryhl impl<T> ArrayRangeAllocator<T> { 40*eafedbc7SAlice Ryhl pub(crate) fn new(size: usize, alloc: EmptyArrayAlloc<T>) -> Self { 41*eafedbc7SAlice Ryhl Self { 42*eafedbc7SAlice Ryhl ranges: alloc.ranges, 43*eafedbc7SAlice Ryhl size, 44*eafedbc7SAlice Ryhl free_oneway_space: size / 2, 45*eafedbc7SAlice Ryhl } 46*eafedbc7SAlice Ryhl } 47*eafedbc7SAlice Ryhl 48*eafedbc7SAlice Ryhl pub(crate) fn free_oneway_space(&self) -> usize { 49*eafedbc7SAlice Ryhl self.free_oneway_space 50*eafedbc7SAlice Ryhl } 51*eafedbc7SAlice Ryhl 52*eafedbc7SAlice Ryhl pub(crate) fn count_buffers(&self) -> usize { 53*eafedbc7SAlice Ryhl self.ranges.len() 54*eafedbc7SAlice Ryhl } 55*eafedbc7SAlice Ryhl 56*eafedbc7SAlice Ryhl pub(crate) fn total_size(&self) -> usize { 57*eafedbc7SAlice Ryhl self.size 58*eafedbc7SAlice Ryhl } 59*eafedbc7SAlice Ryhl 60*eafedbc7SAlice Ryhl pub(crate) fn is_full(&self) -> bool { 61*eafedbc7SAlice Ryhl self.ranges.len() == self.ranges.capacity() 62*eafedbc7SAlice Ryhl } 63*eafedbc7SAlice Ryhl 64*eafedbc7SAlice Ryhl pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> { 65*eafedbc7SAlice Ryhl for range in &self.ranges { 66*eafedbc7SAlice Ryhl seq_print!( 67*eafedbc7SAlice Ryhl m, 68*eafedbc7SAlice Ryhl " buffer {}: {} size {} pid {} oneway {}", 69*eafedbc7SAlice Ryhl 0, 70*eafedbc7SAlice Ryhl range.offset, 71*eafedbc7SAlice Ryhl range.size, 72*eafedbc7SAlice Ryhl range.state.pid(), 73*eafedbc7SAlice Ryhl range.state.is_oneway(), 74*eafedbc7SAlice Ryhl ); 75*eafedbc7SAlice Ryhl if let DescriptorState::Reserved(_) = range.state { 76*eafedbc7SAlice Ryhl seq_print!(m, " reserved\n"); 77*eafedbc7SAlice Ryhl } else { 78*eafedbc7SAlice Ryhl seq_print!(m, " allocated\n"); 79*eafedbc7SAlice Ryhl } 80*eafedbc7SAlice Ryhl } 81*eafedbc7SAlice Ryhl Ok(()) 82*eafedbc7SAlice Ryhl } 83*eafedbc7SAlice Ryhl 84*eafedbc7SAlice Ryhl /// Find somewhere to put a new range. 85*eafedbc7SAlice Ryhl /// 86*eafedbc7SAlice Ryhl /// Unlike the tree implementation, we do not bother to find the smallest gap. The idea is that 87*eafedbc7SAlice Ryhl /// fragmentation isn't a big issue when we don't have many ranges. 88*eafedbc7SAlice Ryhl /// 89*eafedbc7SAlice Ryhl /// Returns the index that the new range should have in `self.ranges` after insertion. 90*eafedbc7SAlice Ryhl fn find_empty_range(&self, size: usize) -> Option<FindEmptyRes> { 91*eafedbc7SAlice Ryhl let after_last_range = self.ranges.last().map(Range::endpoint).unwrap_or(0); 92*eafedbc7SAlice Ryhl 93*eafedbc7SAlice Ryhl if size <= self.total_size() - after_last_range { 94*eafedbc7SAlice Ryhl // We can put the range at the end, so just do that. 95*eafedbc7SAlice Ryhl Some(FindEmptyRes { 96*eafedbc7SAlice Ryhl insert_at_idx: self.ranges.len(), 97*eafedbc7SAlice Ryhl insert_at_offset: after_last_range, 98*eafedbc7SAlice Ryhl }) 99*eafedbc7SAlice Ryhl } else { 100*eafedbc7SAlice Ryhl let mut end_of_prev = 0; 101*eafedbc7SAlice Ryhl for (i, range) in self.ranges.iter().enumerate() { 102*eafedbc7SAlice Ryhl // Does it fit before the i'th range? 103*eafedbc7SAlice Ryhl if size <= range.offset - end_of_prev { 104*eafedbc7SAlice Ryhl return Some(FindEmptyRes { 105*eafedbc7SAlice Ryhl insert_at_idx: i, 106*eafedbc7SAlice Ryhl insert_at_offset: end_of_prev, 107*eafedbc7SAlice Ryhl }); 108*eafedbc7SAlice Ryhl } 109*eafedbc7SAlice Ryhl end_of_prev = range.endpoint(); 110*eafedbc7SAlice Ryhl } 111*eafedbc7SAlice Ryhl None 112*eafedbc7SAlice Ryhl } 113*eafedbc7SAlice Ryhl } 114*eafedbc7SAlice Ryhl 115*eafedbc7SAlice Ryhl pub(crate) fn reserve_new( 116*eafedbc7SAlice Ryhl &mut self, 117*eafedbc7SAlice Ryhl debug_id: usize, 118*eafedbc7SAlice Ryhl size: usize, 119*eafedbc7SAlice Ryhl is_oneway: bool, 120*eafedbc7SAlice Ryhl pid: Pid, 121*eafedbc7SAlice Ryhl ) -> Result<usize> { 122*eafedbc7SAlice Ryhl // Compute new value of free_oneway_space, which is set only on success. 123*eafedbc7SAlice Ryhl let new_oneway_space = if is_oneway { 124*eafedbc7SAlice Ryhl match self.free_oneway_space.checked_sub(size) { 125*eafedbc7SAlice Ryhl Some(new_oneway_space) => new_oneway_space, 126*eafedbc7SAlice Ryhl None => return Err(ENOSPC), 127*eafedbc7SAlice Ryhl } 128*eafedbc7SAlice Ryhl } else { 129*eafedbc7SAlice Ryhl self.free_oneway_space 130*eafedbc7SAlice Ryhl }; 131*eafedbc7SAlice Ryhl 132*eafedbc7SAlice Ryhl let FindEmptyRes { 133*eafedbc7SAlice Ryhl insert_at_idx, 134*eafedbc7SAlice Ryhl insert_at_offset, 135*eafedbc7SAlice Ryhl } = self.find_empty_range(size).ok_or(ENOSPC)?; 136*eafedbc7SAlice Ryhl self.free_oneway_space = new_oneway_space; 137*eafedbc7SAlice Ryhl 138*eafedbc7SAlice Ryhl let new_range = Range { 139*eafedbc7SAlice Ryhl offset: insert_at_offset, 140*eafedbc7SAlice Ryhl size, 141*eafedbc7SAlice Ryhl state: DescriptorState::new(is_oneway, debug_id, pid), 142*eafedbc7SAlice Ryhl }; 143*eafedbc7SAlice Ryhl // Insert the value at the given index to keep the array sorted. 144*eafedbc7SAlice Ryhl self.ranges 145*eafedbc7SAlice Ryhl .insert_within_capacity(insert_at_idx, new_range) 146*eafedbc7SAlice Ryhl .ok() 147*eafedbc7SAlice Ryhl .unwrap(); 148*eafedbc7SAlice Ryhl 149*eafedbc7SAlice Ryhl Ok(insert_at_offset) 150*eafedbc7SAlice Ryhl } 151*eafedbc7SAlice Ryhl 152*eafedbc7SAlice Ryhl pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> { 153*eafedbc7SAlice Ryhl // This could use a binary search, but linear scans are usually faster for small arrays. 154*eafedbc7SAlice Ryhl let i = self 155*eafedbc7SAlice Ryhl .ranges 156*eafedbc7SAlice Ryhl .iter() 157*eafedbc7SAlice Ryhl .position(|range| range.offset == offset) 158*eafedbc7SAlice Ryhl .ok_or(EINVAL)?; 159*eafedbc7SAlice Ryhl let range = &self.ranges[i]; 160*eafedbc7SAlice Ryhl 161*eafedbc7SAlice Ryhl if let DescriptorState::Allocated(_) = range.state { 162*eafedbc7SAlice Ryhl return Err(EPERM); 163*eafedbc7SAlice Ryhl } 164*eafedbc7SAlice Ryhl 165*eafedbc7SAlice Ryhl let size = range.size; 166*eafedbc7SAlice Ryhl let offset = range.offset; 167*eafedbc7SAlice Ryhl 168*eafedbc7SAlice Ryhl if range.state.is_oneway() { 169*eafedbc7SAlice Ryhl self.free_oneway_space += size; 170*eafedbc7SAlice Ryhl } 171*eafedbc7SAlice Ryhl 172*eafedbc7SAlice Ryhl // This computes the range of pages that are no longer used by *any* allocated range. The 173*eafedbc7SAlice Ryhl // caller will mark them as unused, which means that they can be freed if the system comes 174*eafedbc7SAlice Ryhl // under memory pressure. 175*eafedbc7SAlice Ryhl let mut freed_range = FreedRange::interior_pages(offset, size); 176*eafedbc7SAlice Ryhl #[expect(clippy::collapsible_if)] // reads better like this 177*eafedbc7SAlice Ryhl if offset % PAGE_SIZE != 0 { 178*eafedbc7SAlice Ryhl if i == 0 || self.ranges[i - 1].endpoint() <= (offset & PAGE_MASK) { 179*eafedbc7SAlice Ryhl freed_range.start_page_idx -= 1; 180*eafedbc7SAlice Ryhl } 181*eafedbc7SAlice Ryhl } 182*eafedbc7SAlice Ryhl if range.endpoint() % PAGE_SIZE != 0 { 183*eafedbc7SAlice Ryhl let page_after = (range.endpoint() & PAGE_MASK) + PAGE_SIZE; 184*eafedbc7SAlice Ryhl if i + 1 == self.ranges.len() || page_after <= self.ranges[i + 1].offset { 185*eafedbc7SAlice Ryhl freed_range.end_page_idx += 1; 186*eafedbc7SAlice Ryhl } 187*eafedbc7SAlice Ryhl } 188*eafedbc7SAlice Ryhl 189*eafedbc7SAlice Ryhl self.ranges.remove(i)?; 190*eafedbc7SAlice Ryhl Ok(freed_range) 191*eafedbc7SAlice Ryhl } 192*eafedbc7SAlice Ryhl 193*eafedbc7SAlice Ryhl pub(crate) fn reservation_commit(&mut self, offset: usize, data: &mut Option<T>) -> Result { 194*eafedbc7SAlice Ryhl // This could use a binary search, but linear scans are usually faster for small arrays. 195*eafedbc7SAlice Ryhl let range = self 196*eafedbc7SAlice Ryhl .ranges 197*eafedbc7SAlice Ryhl .iter_mut() 198*eafedbc7SAlice Ryhl .find(|range| range.offset == offset) 199*eafedbc7SAlice Ryhl .ok_or(ENOENT)?; 200*eafedbc7SAlice Ryhl 201*eafedbc7SAlice Ryhl let DescriptorState::Reserved(reservation) = &range.state else { 202*eafedbc7SAlice Ryhl return Err(ENOENT); 203*eafedbc7SAlice Ryhl }; 204*eafedbc7SAlice Ryhl 205*eafedbc7SAlice Ryhl range.state = DescriptorState::Allocated(reservation.clone().allocate(data.take())); 206*eafedbc7SAlice Ryhl Ok(()) 207*eafedbc7SAlice Ryhl } 208*eafedbc7SAlice Ryhl 209*eafedbc7SAlice Ryhl pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> { 210*eafedbc7SAlice Ryhl // This could use a binary search, but linear scans are usually faster for small arrays. 211*eafedbc7SAlice Ryhl let range = self 212*eafedbc7SAlice Ryhl .ranges 213*eafedbc7SAlice Ryhl .iter_mut() 214*eafedbc7SAlice Ryhl .find(|range| range.offset == offset) 215*eafedbc7SAlice Ryhl .ok_or(ENOENT)?; 216*eafedbc7SAlice Ryhl 217*eafedbc7SAlice Ryhl let DescriptorState::Allocated(allocation) = &mut range.state else { 218*eafedbc7SAlice Ryhl return Err(ENOENT); 219*eafedbc7SAlice Ryhl }; 220*eafedbc7SAlice Ryhl 221*eafedbc7SAlice Ryhl let data = allocation.take(); 222*eafedbc7SAlice Ryhl let debug_id = allocation.reservation.debug_id; 223*eafedbc7SAlice Ryhl range.state = DescriptorState::Reserved(allocation.reservation.clone()); 224*eafedbc7SAlice Ryhl Ok((range.size, debug_id, data)) 225*eafedbc7SAlice Ryhl } 226*eafedbc7SAlice Ryhl 227*eafedbc7SAlice Ryhl pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) { 228*eafedbc7SAlice Ryhl for range in self.ranges.iter_mut() { 229*eafedbc7SAlice Ryhl if let DescriptorState::Allocated(allocation) = &mut range.state { 230*eafedbc7SAlice Ryhl callback( 231*eafedbc7SAlice Ryhl range.offset, 232*eafedbc7SAlice Ryhl range.size, 233*eafedbc7SAlice Ryhl allocation.reservation.debug_id, 234*eafedbc7SAlice Ryhl allocation.data.take(), 235*eafedbc7SAlice Ryhl ); 236*eafedbc7SAlice Ryhl } 237*eafedbc7SAlice Ryhl } 238*eafedbc7SAlice Ryhl } 239*eafedbc7SAlice Ryhl } 240*eafedbc7SAlice Ryhl 241*eafedbc7SAlice Ryhl pub(crate) struct EmptyArrayAlloc<T> { 242*eafedbc7SAlice Ryhl ranges: KVec<Range<T>>, 243*eafedbc7SAlice Ryhl } 244*eafedbc7SAlice Ryhl 245*eafedbc7SAlice Ryhl impl<T> EmptyArrayAlloc<T> { 246*eafedbc7SAlice Ryhl pub(crate) fn try_new(capacity: usize) -> Result<Self> { 247*eafedbc7SAlice Ryhl Ok(Self { 248*eafedbc7SAlice Ryhl ranges: KVec::with_capacity(capacity, GFP_KERNEL)?, 249*eafedbc7SAlice Ryhl }) 250*eafedbc7SAlice Ryhl } 251*eafedbc7SAlice Ryhl } 252