1 // SPDX-License-Identifier: GPL-2.0 2 3 // Copyright (C) 2025 Google LLC. 4 5 use kernel::{ 6 page::{PAGE_MASK, PAGE_SIZE}, 7 prelude::*, 8 seq_file::SeqFile, 9 seq_print, 10 task::Pid, 11 }; 12 13 use crate::range_alloc::{DescriptorState, FreedRange, Range}; 14 15 /// Keeps track of allocations in a process' mmap. 16 /// 17 /// Each process has an mmap where the data for incoming transactions will be placed. This struct 18 /// keeps track of allocations made in the mmap. For each allocation, we store a descriptor that 19 /// has metadata related to the allocation. We also keep track of available free space. 20 pub(super) struct ArrayRangeAllocator<T> { 21 /// This stores all ranges that are allocated. Unlike the tree based allocator, we do *not* 22 /// store the free ranges. 23 /// 24 /// Sorted by offset. 25 pub(super) ranges: KVec<Range<T>>, 26 size: usize, 27 free_oneway_space: usize, 28 } 29 30 struct FindEmptyRes { 31 /// Which index in `ranges` should we insert the new range at? 32 /// 33 /// Inserting the new range at this index keeps `ranges` sorted. 34 insert_at_idx: usize, 35 /// Which offset should we insert the new range at? 36 insert_at_offset: usize, 37 } 38 39 impl<T> ArrayRangeAllocator<T> { 40 pub(crate) fn new(size: usize, alloc: EmptyArrayAlloc<T>) -> Self { 41 Self { 42 ranges: alloc.ranges, 43 size, 44 free_oneway_space: size / 2, 45 } 46 } 47 48 pub(crate) fn free_oneway_space(&self) -> usize { 49 self.free_oneway_space 50 } 51 52 pub(crate) fn count_buffers(&self) -> usize { 53 self.ranges.len() 54 } 55 56 pub(crate) fn total_size(&self) -> usize { 57 self.size 58 } 59 60 pub(crate) fn is_full(&self) -> bool { 61 self.ranges.len() == self.ranges.capacity() 62 } 63 64 pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> { 65 for range in &self.ranges { 66 seq_print!( 67 m, 68 " buffer {}: {} size {} pid {} oneway {}", 69 0, 70 range.offset, 71 range.size, 72 range.state.pid(), 73 range.state.is_oneway(), 74 ); 75 if let DescriptorState::Reserved(_) = range.state { 76 seq_print!(m, " reserved\n"); 77 } else { 78 seq_print!(m, " allocated\n"); 79 } 80 } 81 Ok(()) 82 } 83 84 /// Find somewhere to put a new range. 85 /// 86 /// Unlike the tree implementation, we do not bother to find the smallest gap. The idea is that 87 /// fragmentation isn't a big issue when we don't have many ranges. 88 /// 89 /// Returns the index that the new range should have in `self.ranges` after insertion. 90 fn find_empty_range(&self, size: usize) -> Option<FindEmptyRes> { 91 let after_last_range = self.ranges.last().map(Range::endpoint).unwrap_or(0); 92 93 if size <= self.total_size() - after_last_range { 94 // We can put the range at the end, so just do that. 95 Some(FindEmptyRes { 96 insert_at_idx: self.ranges.len(), 97 insert_at_offset: after_last_range, 98 }) 99 } else { 100 let mut end_of_prev = 0; 101 for (i, range) in self.ranges.iter().enumerate() { 102 // Does it fit before the i'th range? 103 if size <= range.offset - end_of_prev { 104 return Some(FindEmptyRes { 105 insert_at_idx: i, 106 insert_at_offset: end_of_prev, 107 }); 108 } 109 end_of_prev = range.endpoint(); 110 } 111 None 112 } 113 } 114 115 pub(crate) fn reserve_new( 116 &mut self, 117 debug_id: usize, 118 size: usize, 119 is_oneway: bool, 120 pid: Pid, 121 ) -> Result<(usize, bool)> { 122 // Compute new value of free_oneway_space, which is set only on success. 123 let new_oneway_space = if is_oneway { 124 match self.free_oneway_space.checked_sub(size) { 125 Some(new_oneway_space) => new_oneway_space, 126 None => return Err(ENOSPC), 127 } 128 } else { 129 self.free_oneway_space 130 }; 131 132 let FindEmptyRes { 133 insert_at_idx, 134 insert_at_offset, 135 } = self.find_empty_range(size).ok_or(ENOSPC)?; 136 self.free_oneway_space = new_oneway_space; 137 138 let new_range = Range { 139 offset: insert_at_offset, 140 size, 141 state: DescriptorState::new(is_oneway, debug_id, pid), 142 }; 143 // Insert the value at the given index to keep the array sorted. 144 self.ranges 145 .insert_within_capacity(insert_at_idx, new_range) 146 .ok() 147 .unwrap(); 148 149 // Start detecting spammers once we have less than 20% 150 // of async space left (which is less than 10% of total 151 // buffer size). 152 // 153 // (This will short-circuit, so `low_oneway_space` is 154 // only called when necessary.) 155 let oneway_spam_detected = 156 is_oneway && new_oneway_space < self.size / 10 && self.low_oneway_space(pid); 157 158 Ok((insert_at_offset, oneway_spam_detected)) 159 } 160 161 /// Find the amount and size of buffers allocated by the current caller. 162 /// 163 /// The idea is that once we cross the threshold, whoever is responsible 164 /// for the low async space is likely to try to send another async transaction, 165 /// and at some point we'll catch them in the act. This is more efficient 166 /// than keeping a map per pid. 167 fn low_oneway_space(&self, calling_pid: Pid) -> bool { 168 let mut total_alloc_size = 0; 169 let mut num_buffers = 0; 170 171 // Warn if this pid has more than 50 transactions, or more than 50% of 172 // async space (which is 25% of total buffer size). Oneway spam is only 173 // detected when the threshold is exceeded. 174 for range in &self.ranges { 175 if range.state.is_oneway() && range.state.pid() == calling_pid { 176 total_alloc_size += range.size; 177 num_buffers += 1; 178 } 179 } 180 num_buffers > 50 || total_alloc_size > self.size / 4 181 } 182 183 pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> { 184 // This could use a binary search, but linear scans are usually faster for small arrays. 185 let i = self 186 .ranges 187 .iter() 188 .position(|range| range.offset == offset) 189 .ok_or(EINVAL)?; 190 let range = &self.ranges[i]; 191 192 if let DescriptorState::Allocated(_) = range.state { 193 return Err(EPERM); 194 } 195 196 let size = range.size; 197 let offset = range.offset; 198 199 if range.state.is_oneway() { 200 self.free_oneway_space += size; 201 } 202 203 // This computes the range of pages that are no longer used by *any* allocated range. The 204 // caller will mark them as unused, which means that they can be freed if the system comes 205 // under memory pressure. 206 let mut freed_range = FreedRange::interior_pages(offset, size); 207 #[expect(clippy::collapsible_if)] // reads better like this 208 if offset % PAGE_SIZE != 0 { 209 if i == 0 || self.ranges[i - 1].endpoint() <= (offset & PAGE_MASK) { 210 freed_range.start_page_idx -= 1; 211 } 212 } 213 if range.endpoint() % PAGE_SIZE != 0 { 214 let page_after = (range.endpoint() & PAGE_MASK) + PAGE_SIZE; 215 if i + 1 == self.ranges.len() || page_after <= self.ranges[i + 1].offset { 216 freed_range.end_page_idx += 1; 217 } 218 } 219 220 self.ranges.remove(i)?; 221 Ok(freed_range) 222 } 223 224 pub(crate) fn reservation_commit(&mut self, offset: usize, data: &mut Option<T>) -> Result { 225 // This could use a binary search, but linear scans are usually faster for small arrays. 226 let range = self 227 .ranges 228 .iter_mut() 229 .find(|range| range.offset == offset) 230 .ok_or(ENOENT)?; 231 232 let DescriptorState::Reserved(reservation) = &range.state else { 233 return Err(ENOENT); 234 }; 235 236 range.state = DescriptorState::Allocated(reservation.clone().allocate(data.take())); 237 Ok(()) 238 } 239 240 pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> { 241 // This could use a binary search, but linear scans are usually faster for small arrays. 242 let range = self 243 .ranges 244 .iter_mut() 245 .find(|range| range.offset == offset) 246 .ok_or(ENOENT)?; 247 248 let DescriptorState::Allocated(allocation) = &mut range.state else { 249 return Err(ENOENT); 250 }; 251 252 let data = allocation.take(); 253 let debug_id = allocation.reservation.debug_id; 254 range.state = DescriptorState::Reserved(allocation.reservation.clone()); 255 Ok((range.size, debug_id, data)) 256 } 257 258 pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) { 259 for range in self.ranges.iter_mut() { 260 if let DescriptorState::Allocated(allocation) = &mut range.state { 261 callback( 262 range.offset, 263 range.size, 264 allocation.reservation.debug_id, 265 allocation.data.take(), 266 ); 267 } 268 } 269 } 270 } 271 272 pub(crate) struct EmptyArrayAlloc<T> { 273 ranges: KVec<Range<T>>, 274 } 275 276 impl<T> EmptyArrayAlloc<T> { 277 pub(crate) fn try_new(capacity: usize) -> Result<Self> { 278 Ok(Self { 279 ranges: KVec::with_capacity(capacity, GFP_KERNEL)?, 280 }) 281 } 282 } 283