1 // SPDX-License-Identifier: GPL-2.0 2 3 // Copyright (C) 2025 Google LLC. 4 5 use kernel::{ 6 page::{PAGE_MASK, PAGE_SIZE}, 7 prelude::*, 8 seq_file::SeqFile, 9 seq_print, 10 task::Pid, 11 }; 12 13 use crate::range_alloc::{DescriptorState, FreedRange, Range}; 14 15 /// Keeps track of allocations in a process' mmap. 16 /// 17 /// Each process has an mmap where the data for incoming transactions will be placed. This struct 18 /// keeps track of allocations made in the mmap. For each allocation, we store a descriptor that 19 /// has metadata related to the allocation. We also keep track of available free space. 20 pub(super) struct ArrayRangeAllocator<T> { 21 /// This stores all ranges that are allocated. Unlike the tree based allocator, we do *not* 22 /// store the free ranges. 23 /// 24 /// Sorted by offset. 25 pub(super) ranges: KVec<Range<T>>, 26 size: usize, 27 free_oneway_space: usize, 28 } 29 30 struct FindEmptyRes { 31 /// Which index in `ranges` should we insert the new range at? 32 /// 33 /// Inserting the new range at this index keeps `ranges` sorted. 34 insert_at_idx: usize, 35 /// Which offset should we insert the new range at? 36 insert_at_offset: usize, 37 } 38 39 impl<T> ArrayRangeAllocator<T> { 40 pub(crate) fn new(size: usize, alloc: EmptyArrayAlloc<T>) -> Self { 41 Self { 42 ranges: alloc.ranges, 43 size, 44 free_oneway_space: size / 2, 45 } 46 } 47 48 pub(crate) fn free_oneway_space(&self) -> usize { 49 self.free_oneway_space 50 } 51 52 pub(crate) fn count_buffers(&self) -> usize { 53 self.ranges.len() 54 } 55 56 pub(crate) fn total_size(&self) -> usize { 57 self.size 58 } 59 60 pub(crate) fn is_full(&self) -> bool { 61 self.ranges.len() == self.ranges.capacity() 62 } 63 64 pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> { 65 for range in &self.ranges { 66 seq_print!( 67 m, 68 " buffer {}: {} size {} pid {} oneway {}", 69 0, 70 range.offset, 71 range.size, 72 range.state.pid(), 73 range.state.is_oneway(), 74 ); 75 if let DescriptorState::Reserved(_) = range.state { 76 seq_print!(m, " reserved\n"); 77 } else { 78 seq_print!(m, " allocated\n"); 79 } 80 } 81 Ok(()) 82 } 83 84 /// Find somewhere to put a new range. 85 /// 86 /// Unlike the tree implementation, we do not bother to find the smallest gap. The idea is that 87 /// fragmentation isn't a big issue when we don't have many ranges. 88 /// 89 /// Returns the index that the new range should have in `self.ranges` after insertion. 90 fn find_empty_range(&self, size: usize) -> Option<FindEmptyRes> { 91 let after_last_range = self.ranges.last().map(Range::endpoint).unwrap_or(0); 92 93 if size <= self.total_size() - after_last_range { 94 // We can put the range at the end, so just do that. 95 Some(FindEmptyRes { 96 insert_at_idx: self.ranges.len(), 97 insert_at_offset: after_last_range, 98 }) 99 } else { 100 let mut end_of_prev = 0; 101 for (i, range) in self.ranges.iter().enumerate() { 102 // Does it fit before the i'th range? 103 if size <= range.offset - end_of_prev { 104 return Some(FindEmptyRes { 105 insert_at_idx: i, 106 insert_at_offset: end_of_prev, 107 }); 108 } 109 end_of_prev = range.endpoint(); 110 } 111 None 112 } 113 } 114 115 pub(crate) fn reserve_new( 116 &mut self, 117 debug_id: usize, 118 size: usize, 119 is_oneway: bool, 120 pid: Pid, 121 ) -> Result<(usize, bool)> { 122 // Compute new value of free_oneway_space, which is set only on success. 123 let new_oneway_space = if is_oneway { 124 match self.free_oneway_space.checked_sub(size) { 125 Some(new_oneway_space) => new_oneway_space, 126 None => return Err(ENOSPC), 127 } 128 } else { 129 self.free_oneway_space 130 }; 131 132 let FindEmptyRes { 133 insert_at_idx, 134 insert_at_offset, 135 } = self.find_empty_range(size).ok_or(ENOSPC)?; 136 self.free_oneway_space = new_oneway_space; 137 138 let new_range = Range { 139 offset: insert_at_offset, 140 size, 141 state: DescriptorState::new(is_oneway, debug_id, pid), 142 }; 143 // Insert the value at the given index to keep the array sorted. 144 self.ranges 145 .insert_within_capacity(insert_at_idx, new_range) 146 .ok() 147 .unwrap(); 148 149 // Start detecting spammers once we have less than 20% 150 // of async space left (which is less than 10% of total 151 // buffer size). 152 // 153 // (This will short-circuit, so `low_oneway_space` is 154 // only called when necessary.) 155 let oneway_spam_detected = 156 is_oneway && new_oneway_space < self.size / 10 && self.low_oneway_space(pid); 157 158 Ok((insert_at_offset, oneway_spam_detected)) 159 } 160 161 /// Find the amount and size of buffers allocated by the current caller. 162 /// 163 /// The idea is that once we cross the threshold, whoever is responsible 164 /// for the low async space is likely to try to send another async transaction, 165 /// and at some point we'll catch them in the act. This is more efficient 166 /// than keeping a map per pid. 167 fn low_oneway_space(&self, calling_pid: Pid) -> bool { 168 let mut total_alloc_size = 0; 169 let mut num_buffers = 0; 170 171 // Warn if this pid has more than 50 transactions, or more than 50% of 172 // async space (which is 25% of total buffer size). Oneway spam is only 173 // detected when the threshold is exceeded. 174 for range in &self.ranges { 175 if range.state.is_oneway() && range.state.pid() == calling_pid { 176 total_alloc_size += range.size; 177 num_buffers += 1; 178 } 179 } 180 num_buffers > 50 || total_alloc_size > self.size / 4 181 } 182 183 pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> { 184 // This could use a binary search, but linear scans are usually faster for small arrays. 185 let i = self 186 .ranges 187 .iter() 188 .position(|range| range.offset == offset) 189 .ok_or(EINVAL)?; 190 let range = &self.ranges[i]; 191 192 if let DescriptorState::Allocated(_) = range.state { 193 return Err(EPERM); 194 } 195 196 let size = range.size; 197 let offset = range.offset; 198 199 if range.state.is_oneway() { 200 self.free_oneway_space += size; 201 } 202 203 // This computes the range of pages that are no longer used by *any* allocated range. The 204 // caller will mark them as unused, which means that they can be freed if the system comes 205 // under memory pressure. 206 let mut freed_range = FreedRange::interior_pages(offset, size); 207 if offset % PAGE_SIZE != 0 { 208 if i == 0 || self.ranges[i - 1].endpoint() <= (offset & PAGE_MASK) { 209 freed_range.start_page_idx -= 1; 210 } 211 } 212 if range.endpoint() % PAGE_SIZE != 0 { 213 let page_after = (range.endpoint() & PAGE_MASK) + PAGE_SIZE; 214 if i + 1 == self.ranges.len() || page_after <= self.ranges[i + 1].offset { 215 freed_range.end_page_idx += 1; 216 } 217 } 218 219 self.ranges.remove(i)?; 220 Ok(freed_range) 221 } 222 223 pub(crate) fn reservation_commit(&mut self, offset: usize, data: &mut Option<T>) -> Result { 224 // This could use a binary search, but linear scans are usually faster for small arrays. 225 let range = self 226 .ranges 227 .iter_mut() 228 .find(|range| range.offset == offset) 229 .ok_or(ENOENT)?; 230 231 let DescriptorState::Reserved(reservation) = &range.state else { 232 return Err(ENOENT); 233 }; 234 235 range.state = DescriptorState::Allocated(reservation.clone().allocate(data.take())); 236 Ok(()) 237 } 238 239 pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> { 240 // This could use a binary search, but linear scans are usually faster for small arrays. 241 let range = self 242 .ranges 243 .iter_mut() 244 .find(|range| range.offset == offset) 245 .ok_or(ENOENT)?; 246 247 let DescriptorState::Allocated(allocation) = &mut range.state else { 248 return Err(ENOENT); 249 }; 250 251 let data = allocation.take(); 252 let debug_id = allocation.reservation.debug_id; 253 range.state = DescriptorState::Reserved(allocation.reservation.clone()); 254 Ok((range.size, debug_id, data)) 255 } 256 257 pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) { 258 for range in self.ranges.iter_mut() { 259 if let DescriptorState::Allocated(allocation) = &mut range.state { 260 callback( 261 range.offset, 262 range.size, 263 allocation.reservation.debug_id, 264 allocation.data.take(), 265 ); 266 } 267 } 268 } 269 } 270 271 pub(crate) struct EmptyArrayAlloc<T> { 272 ranges: KVec<Range<T>>, 273 } 274 275 impl<T> EmptyArrayAlloc<T> { 276 pub(crate) fn try_new(capacity: usize) -> Result<Self> { 277 Ok(Self { 278 ranges: KVec::with_capacity(capacity, GFP_KERNEL)?, 279 }) 280 } 281 } 282