1 // SPDX-License-Identifier: GPL-2.0 2 3 //! GPU buddy allocator bindings. 4 //! 5 //! C header: [`include/linux/gpu_buddy.h`](srctree/include/linux/gpu_buddy.h) 6 //! 7 //! This module provides Rust abstractions over the Linux kernel's GPU buddy 8 //! allocator, which implements a binary buddy memory allocator. 9 //! 10 //! The buddy allocator manages a contiguous address space and allocates blocks 11 //! in power-of-two sizes, useful for GPU physical memory management. 12 //! 13 //! # Examples 14 //! 15 //! Create a buddy allocator and perform a basic range allocation: 16 //! 17 //! ``` 18 //! use kernel::{ 19 //! gpu::buddy::{ 20 //! GpuBuddy, 21 //! GpuBuddyAllocFlags, 22 //! GpuBuddyAllocMode, 23 //! GpuBuddyParams, // 24 //! }, 25 //! prelude::*, 26 //! ptr::Alignment, 27 //! sizes::*, // 28 //! }; 29 //! 30 //! // Create a 1GB buddy allocator with 4KB minimum chunk size. 31 //! let buddy = GpuBuddy::new(GpuBuddyParams { 32 //! base_offset: 0, 33 //! size: SZ_1G as u64, 34 //! chunk_size: Alignment::new::<SZ_4K>(), 35 //! })?; 36 //! 37 //! assert_eq!(buddy.size(), SZ_1G as u64); 38 //! assert_eq!(buddy.chunk_size(), Alignment::new::<SZ_4K>()); 39 //! let initial_free = buddy.avail(); 40 //! 41 //! // Allocate 16MB. Block lands at the top of the address range. 42 //! let allocated = KBox::pin_init( 43 //! buddy.alloc_blocks( 44 //! GpuBuddyAllocMode::Simple, 45 //! SZ_16M as u64, 46 //! Alignment::new::<SZ_16M>(), 47 //! GpuBuddyAllocFlags::default(), 48 //! ), 49 //! GFP_KERNEL, 50 //! )?; 51 //! assert_eq!(buddy.avail(), initial_free - SZ_16M as u64); 52 //! 53 //! let block = allocated.iter().next().expect("expected one block"); 54 //! assert_eq!(block.offset(), (SZ_1G - SZ_16M) as u64); 55 //! assert_eq!(block.order(), 12); // 2^12 pages = 16MB 56 //! assert_eq!(block.size(), SZ_16M as u64); 57 //! assert_eq!(allocated.iter().count(), 1); 58 //! 59 //! // Dropping the allocation returns the range to the buddy allocator. 60 //! drop(allocated); 61 //! assert_eq!(buddy.avail(), initial_free); 62 //! # Ok::<(), Error>(()) 63 //! ``` 64 //! 65 //! Top-down allocation allocates from the highest addresses: 66 //! 67 //! ``` 68 //! # use kernel::{ 69 //! # gpu::buddy::{GpuBuddy, GpuBuddyAllocMode, GpuBuddyAllocFlags, GpuBuddyParams}, 70 //! # prelude::*, 71 //! # ptr::Alignment, 72 //! # sizes::*, // 73 //! # }; 74 //! # let buddy = GpuBuddy::new(GpuBuddyParams { 75 //! # base_offset: 0, 76 //! # size: SZ_1G as u64, 77 //! # chunk_size: Alignment::new::<SZ_4K>(), 78 //! # })?; 79 //! # let initial_free = buddy.avail(); 80 //! let topdown = KBox::pin_init( 81 //! buddy.alloc_blocks( 82 //! GpuBuddyAllocMode::TopDown, 83 //! SZ_16M as u64, 84 //! Alignment::new::<SZ_16M>(), 85 //! GpuBuddyAllocFlags::default(), 86 //! ), 87 //! GFP_KERNEL, 88 //! )?; 89 //! assert_eq!(buddy.avail(), initial_free - SZ_16M as u64); 90 //! 91 //! let block = topdown.iter().next().expect("expected one block"); 92 //! assert_eq!(block.offset(), (SZ_1G - SZ_16M) as u64); 93 //! assert_eq!(block.order(), 12); 94 //! assert_eq!(block.size(), SZ_16M as u64); 95 //! 96 //! // Dropping the allocation returns the range to the buddy allocator. 97 //! drop(topdown); 98 //! assert_eq!(buddy.avail(), initial_free); 99 //! # Ok::<(), Error>(()) 100 //! ``` 101 //! 102 //! Non-contiguous allocation can fill fragmented memory by returning multiple 103 //! blocks: 104 //! 105 //! ``` 106 //! # use kernel::{ 107 //! # gpu::buddy::{ 108 //! # GpuBuddy, GpuBuddyAllocFlags, GpuBuddyAllocMode, GpuBuddyParams, 109 //! # }, 110 //! # prelude::*, 111 //! # ptr::Alignment, 112 //! # sizes::*, // 113 //! # }; 114 //! # let buddy = GpuBuddy::new(GpuBuddyParams { 115 //! # base_offset: 0, 116 //! # size: SZ_1G as u64, 117 //! # chunk_size: Alignment::new::<SZ_4K>(), 118 //! # })?; 119 //! # let initial_free = buddy.avail(); 120 //! // Create fragmentation by allocating 4MB blocks at [0,4M) and [8M,12M). 121 //! let frag1 = KBox::pin_init( 122 //! buddy.alloc_blocks( 123 //! GpuBuddyAllocMode::Range(0..SZ_4M as u64), 124 //! SZ_4M as u64, 125 //! Alignment::new::<SZ_4M>(), 126 //! GpuBuddyAllocFlags::default(), 127 //! ), 128 //! GFP_KERNEL, 129 //! )?; 130 //! assert_eq!(buddy.avail(), initial_free - SZ_4M as u64); 131 //! 132 //! let frag2 = KBox::pin_init( 133 //! buddy.alloc_blocks( 134 //! GpuBuddyAllocMode::Range(SZ_8M as u64..(SZ_8M + SZ_4M) as u64), 135 //! SZ_4M as u64, 136 //! Alignment::new::<SZ_4M>(), 137 //! GpuBuddyAllocFlags::default(), 138 //! ), 139 //! GFP_KERNEL, 140 //! )?; 141 //! assert_eq!(buddy.avail(), initial_free - SZ_8M as u64); 142 //! 143 //! // Allocate 8MB, this returns 2 blocks from the holes. 144 //! let fragmented = KBox::pin_init( 145 //! buddy.alloc_blocks( 146 //! GpuBuddyAllocMode::Range(0..SZ_16M as u64), 147 //! SZ_8M as u64, 148 //! Alignment::new::<SZ_4M>(), 149 //! GpuBuddyAllocFlags::default(), 150 //! ), 151 //! GFP_KERNEL, 152 //! )?; 153 //! assert_eq!(buddy.avail(), initial_free - SZ_16M as u64); 154 //! 155 //! let (mut count, mut total) = (0u32, 0u64); 156 //! for block in fragmented.iter() { 157 //! assert_eq!(block.size(), SZ_4M as u64); 158 //! total += block.size(); 159 //! count += 1; 160 //! } 161 //! assert_eq!(total, SZ_8M as u64); 162 //! assert_eq!(count, 2); 163 //! # Ok::<(), Error>(()) 164 //! ``` 165 //! 166 //! Contiguous allocation fails when only fragmented space is available: 167 //! 168 //! ``` 169 //! # use kernel::{ 170 //! # gpu::buddy::{ 171 //! # GpuBuddy, GpuBuddyAllocFlag, GpuBuddyAllocFlags, GpuBuddyAllocMode, GpuBuddyParams, 172 //! # }, 173 //! # prelude::*, 174 //! # ptr::Alignment, 175 //! # sizes::*, // 176 //! # }; 177 //! // Create a small 16MB buddy allocator with fragmented memory. 178 //! let small = GpuBuddy::new(GpuBuddyParams { 179 //! base_offset: 0, 180 //! size: SZ_16M as u64, 181 //! chunk_size: Alignment::new::<SZ_4K>(), 182 //! })?; 183 //! 184 //! let _hole1 = KBox::pin_init( 185 //! small.alloc_blocks( 186 //! GpuBuddyAllocMode::Range(0..SZ_4M as u64), 187 //! SZ_4M as u64, 188 //! Alignment::new::<SZ_4M>(), 189 //! GpuBuddyAllocFlags::default(), 190 //! ), 191 //! GFP_KERNEL, 192 //! )?; 193 //! 194 //! let _hole2 = KBox::pin_init( 195 //! small.alloc_blocks( 196 //! GpuBuddyAllocMode::Range(SZ_8M as u64..(SZ_8M + SZ_4M) as u64), 197 //! SZ_4M as u64, 198 //! Alignment::new::<SZ_4M>(), 199 //! GpuBuddyAllocFlags::default(), 200 //! ), 201 //! GFP_KERNEL, 202 //! )?; 203 //! 204 //! // 8MB contiguous should fail, only two non-contiguous 4MB holes exist. 205 //! let result = KBox::pin_init( 206 //! small.alloc_blocks( 207 //! GpuBuddyAllocMode::Simple, 208 //! SZ_8M as u64, 209 //! Alignment::new::<SZ_4M>(), 210 //! GpuBuddyAllocFlag::Contiguous, 211 //! ), 212 //! GFP_KERNEL, 213 //! ); 214 //! assert!(result.is_err()); 215 //! # Ok::<(), Error>(()) 216 //! ``` 217 218 use core::ops::Range; 219 220 use crate::{ 221 bindings, 222 clist_create, 223 error::to_result, 224 interop::list::CListHead, 225 new_mutex, 226 prelude::*, 227 ptr::Alignment, 228 sync::{ 229 lock::mutex::MutexGuard, 230 Arc, 231 Mutex, // 232 }, 233 types::Opaque, // 234 }; 235 236 /// Allocation mode for the GPU buddy allocator. 237 /// 238 /// The mode determines the primary allocation strategy. Modes are mutually 239 /// exclusive: an allocation is either simple, range-constrained, or top-down. 240 /// 241 /// Orthogonal modifier flags (e.g., contiguous, clear) are specified separately 242 /// via [`GpuBuddyAllocFlags`]. 243 #[derive(Clone, Debug, PartialEq, Eq)] 244 pub enum GpuBuddyAllocMode { 245 /// Simple allocation without constraints. 246 Simple, 247 /// Range-based allocation within the given address range. 248 Range(Range<u64>), 249 /// Allocate from top of address space downward. 250 TopDown, 251 } 252 253 impl GpuBuddyAllocMode { 254 /// Returns the C flags corresponding to the allocation mode. 255 fn as_flags(&self) -> usize { 256 match self { 257 Self::Simple => 0, 258 Self::Range(_) => bindings::GPU_BUDDY_RANGE_ALLOCATION, 259 Self::TopDown => bindings::GPU_BUDDY_TOPDOWN_ALLOCATION, 260 } 261 } 262 263 /// Extracts the range start/end, defaulting to `(0, 0)` for non-range modes. 264 fn range(&self) -> (u64, u64) { 265 match self { 266 Self::Range(range) => (range.start, range.end), 267 _ => (0, 0), 268 } 269 } 270 } 271 272 crate::impl_flags!( 273 /// Modifier flags for GPU buddy allocation. 274 /// 275 /// These flags can be combined with any [`GpuBuddyAllocMode`] to control 276 /// additional allocation behavior. 277 #[derive(Clone, Copy, Default, PartialEq, Eq)] 278 pub struct GpuBuddyAllocFlags(usize); 279 280 /// Individual modifier flag for GPU buddy allocation. 281 #[derive(Clone, Copy, PartialEq, Eq)] 282 pub enum GpuBuddyAllocFlag { 283 /// Allocate physically contiguous blocks. 284 Contiguous = bindings::GPU_BUDDY_CONTIGUOUS_ALLOCATION, 285 286 /// Request allocation from cleared (zeroed) memory. 287 Clear = bindings::GPU_BUDDY_CLEAR_ALLOCATION, 288 289 /// Disable trimming of partially used blocks. 290 TrimDisable = bindings::GPU_BUDDY_TRIM_DISABLE, 291 } 292 ); 293 294 /// Parameters for creating a GPU buddy allocator. 295 pub struct GpuBuddyParams { 296 /// Base offset (in bytes) where the managed memory region starts. 297 /// Allocations will be offset by this value. 298 pub base_offset: u64, 299 /// Total size (in bytes) of the address space managed by the allocator. 300 pub size: u64, 301 /// Minimum allocation unit / chunk size; must be >= 4KB. 302 pub chunk_size: Alignment, 303 } 304 305 /// Inner structure holding the actual buddy allocator. 306 /// 307 /// # Synchronization 308 /// 309 /// The C `gpu_buddy` API requires synchronization (see `include/linux/gpu_buddy.h`). 310 /// Internal locking ensures all allocator and free operations are properly 311 /// synchronized, preventing races between concurrent allocations and the 312 /// freeing that occurs when [`AllocatedBlocks`] is dropped. 313 /// 314 /// # Invariants 315 /// 316 /// The inner [`Opaque`] contains an initialized buddy allocator. 317 #[pin_data(PinnedDrop)] 318 struct GpuBuddyInner { 319 #[pin] 320 inner: Opaque<bindings::gpu_buddy>, 321 322 // TODO: Replace `Mutex<()>` with `Mutex<Opaque<..>>` once `Mutex::new()` 323 // accepts `impl PinInit<T>`. 324 #[pin] 325 lock: Mutex<()>, 326 /// Cached creation parameters (do not change after init). 327 params: GpuBuddyParams, 328 } 329 330 impl GpuBuddyInner { 331 /// Create a pin-initializer for the buddy allocator. 332 fn new(params: GpuBuddyParams) -> impl PinInit<Self, Error> { 333 let size = params.size; 334 let chunk_size = params.chunk_size; 335 336 // INVARIANT: `gpu_buddy_init` returns 0 on success, at which point the 337 // `gpu_buddy` structure is initialized and ready for use with all 338 // `gpu_buddy_*` APIs. `try_pin_init!` only completes if all fields succeed, 339 // so the invariant holds when construction finishes. 340 try_pin_init!(Self { 341 inner <- Opaque::try_ffi_init(|ptr| { 342 // SAFETY: `ptr` points to valid uninitialized memory from the pin-init 343 // infrastructure. `gpu_buddy_init` will initialize the structure. 344 to_result(unsafe { 345 bindings::gpu_buddy_init(ptr, size, chunk_size.as_usize() as u64) 346 }) 347 }), 348 lock <- new_mutex!(()), 349 params, 350 }) 351 } 352 353 /// Lock the mutex and return a guard for accessing the allocator. 354 fn lock(&self) -> GpuBuddyGuard<'_> { 355 GpuBuddyGuard { 356 inner: self, 357 _guard: self.lock.lock(), 358 } 359 } 360 } 361 362 #[pinned_drop] 363 impl PinnedDrop for GpuBuddyInner { 364 fn drop(self: Pin<&mut Self>) { 365 let guard = self.lock(); 366 367 // SAFETY: Per the type invariant, `inner` contains an initialized 368 // allocator. `guard` provides exclusive access. 369 unsafe { bindings::gpu_buddy_fini(guard.as_raw()) }; 370 } 371 } 372 373 // SAFETY: `GpuBuddyInner` can be sent between threads. 374 unsafe impl Send for GpuBuddyInner {} 375 376 // SAFETY: `GpuBuddyInner` is `Sync` because `GpuBuddyInner::lock` 377 // serializes all access to the C allocator, preventing data races. 378 unsafe impl Sync for GpuBuddyInner {} 379 380 /// Guard that proves the lock is held, enabling access to the allocator. 381 /// 382 /// The `_guard` holds the lock for the duration of this guard's lifetime. 383 struct GpuBuddyGuard<'a> { 384 inner: &'a GpuBuddyInner, 385 _guard: MutexGuard<'a, ()>, 386 } 387 388 impl GpuBuddyGuard<'_> { 389 /// Get a raw pointer to the underlying C `gpu_buddy` structure. 390 fn as_raw(&self) -> *mut bindings::gpu_buddy { 391 self.inner.inner.get() 392 } 393 } 394 395 /// GPU buddy allocator instance. 396 /// 397 /// This structure wraps the C `gpu_buddy` allocator using reference counting. 398 /// The allocator is automatically cleaned up when all references are dropped. 399 /// 400 /// Refer to the module-level documentation for usage examples. 401 pub struct GpuBuddy(Arc<GpuBuddyInner>); 402 403 impl GpuBuddy { 404 /// Create a new buddy allocator. 405 /// 406 /// The allocator manages a contiguous address space of the given size, with the 407 /// specified minimum allocation unit (chunk_size must be at least 4KB). 408 pub fn new(params: GpuBuddyParams) -> Result<Self> { 409 Arc::pin_init(GpuBuddyInner::new(params), GFP_KERNEL).map(Self) 410 } 411 412 /// Get the base offset for allocations. 413 pub fn base_offset(&self) -> u64 { 414 self.0.params.base_offset 415 } 416 417 /// Get the chunk size (minimum allocation unit). 418 pub fn chunk_size(&self) -> Alignment { 419 self.0.params.chunk_size 420 } 421 422 /// Get the total managed size. 423 pub fn size(&self) -> u64 { 424 self.0.params.size 425 } 426 427 /// Get the available (free) memory in bytes. 428 pub fn avail(&self) -> u64 { 429 let guard = self.0.lock(); 430 431 // SAFETY: Per the type invariant, `inner` contains an initialized allocator. 432 // `guard` provides exclusive access. 433 unsafe { (*guard.as_raw()).avail } 434 } 435 436 /// Allocate blocks from the buddy allocator. 437 /// 438 /// Returns a pin-initializer for [`AllocatedBlocks`]. 439 pub fn alloc_blocks( 440 &self, 441 mode: GpuBuddyAllocMode, 442 size: u64, 443 min_block_size: Alignment, 444 flags: impl Into<GpuBuddyAllocFlags>, 445 ) -> impl PinInit<AllocatedBlocks, Error> { 446 let buddy_arc = Arc::clone(&self.0); 447 let (start, end) = mode.range(); 448 let mode_flags = mode.as_flags(); 449 let modifier_flags = flags.into(); 450 451 // Create pin-initializer that initializes list and allocates blocks. 452 try_pin_init!(AllocatedBlocks { 453 buddy: buddy_arc, 454 list <- CListHead::new(), 455 _: { 456 // Reject zero-sized or inverted ranges. 457 if let GpuBuddyAllocMode::Range(range) = &mode { 458 if range.is_empty() { 459 Err::<(), Error>(EINVAL)?; 460 } 461 } 462 463 // Lock while allocating to serialize with concurrent frees. 464 let guard = buddy.lock(); 465 466 // SAFETY: Per the type invariant, `inner` contains an initialized 467 // allocator. `guard` provides exclusive access. 468 to_result(unsafe { 469 bindings::gpu_buddy_alloc_blocks( 470 guard.as_raw(), 471 start, 472 end, 473 size, 474 min_block_size.as_usize() as u64, 475 list.as_raw(), 476 mode_flags | usize::from(modifier_flags), 477 ) 478 })? 479 } 480 }) 481 } 482 } 483 484 /// Allocated blocks from the buddy allocator with automatic cleanup. 485 /// 486 /// This structure owns a list of allocated blocks and ensures they are 487 /// automatically freed when dropped. Use `iter()` to iterate over all 488 /// allocated blocks. 489 /// 490 /// # Invariants 491 /// 492 /// - `list` is an initialized, valid list head containing allocated blocks. 493 #[pin_data(PinnedDrop)] 494 pub struct AllocatedBlocks { 495 #[pin] 496 list: CListHead, 497 buddy: Arc<GpuBuddyInner>, 498 } 499 500 impl AllocatedBlocks { 501 /// Check if the block list is empty. 502 pub fn is_empty(&self) -> bool { 503 // An empty list head points to itself. 504 !self.list.is_linked() 505 } 506 507 /// Iterate over allocated blocks. 508 /// 509 /// Returns an iterator yielding [`AllocatedBlock`] values. Each [`AllocatedBlock`] 510 /// borrows `self` and is only valid for the duration of that borrow. 511 pub fn iter(&self) -> impl Iterator<Item = AllocatedBlock<'_>> + '_ { 512 let head = self.list.as_raw(); 513 // SAFETY: Per the type invariant, `list` is an initialized sentinel `list_head` 514 // and is not concurrently modified (we hold a `&self` borrow). The list contains 515 // `gpu_buddy_block` items linked via `__bindgen_anon_1.link`. `Block` is 516 // `#[repr(transparent)]` over `gpu_buddy_block`. 517 let clist = unsafe { 518 clist_create!( 519 head, 520 Block, 521 bindings::gpu_buddy_block, 522 __bindgen_anon_1.link 523 ) 524 }; 525 526 clist 527 .iter() 528 .map(|this| AllocatedBlock { this, blocks: self }) 529 } 530 } 531 532 #[pinned_drop] 533 impl PinnedDrop for AllocatedBlocks { 534 fn drop(self: Pin<&mut Self>) { 535 let guard = self.buddy.lock(); 536 537 // SAFETY: 538 // - list is valid per the type's invariants. 539 // - guard provides exclusive access to the allocator. 540 unsafe { 541 bindings::gpu_buddy_free_list(guard.as_raw(), self.list.as_raw(), 0); 542 } 543 } 544 } 545 546 /// A GPU buddy block. 547 /// 548 /// Transparent wrapper over C `gpu_buddy_block` structure. This type is returned 549 /// as references during iteration over [`AllocatedBlocks`]. 550 /// 551 /// # Invariants 552 /// 553 /// The inner [`Opaque`] contains a valid, allocated `gpu_buddy_block`. 554 #[repr(transparent)] 555 struct Block(Opaque<bindings::gpu_buddy_block>); 556 557 impl Block { 558 /// Get a raw pointer to the underlying C block. 559 fn as_raw(&self) -> *mut bindings::gpu_buddy_block { 560 self.0.get() 561 } 562 563 /// Get the block's raw offset in the buddy address space (without base offset). 564 fn offset(&self) -> u64 { 565 // SAFETY: `self.as_raw()` is valid per the type's invariants. 566 unsafe { bindings::gpu_buddy_block_offset(self.as_raw()) } 567 } 568 569 /// Get the block order. 570 fn order(&self) -> u32 { 571 // SAFETY: `self.as_raw()` is valid per the type's invariants. 572 unsafe { bindings::gpu_buddy_block_order(self.as_raw()) } 573 } 574 } 575 576 // SAFETY: `Block` is a wrapper around `gpu_buddy_block` which can be 577 // sent across threads safely. 578 unsafe impl Send for Block {} 579 580 // SAFETY: `Block` is only accessed through shared references after 581 // allocation, and thus safe to access concurrently across threads. 582 unsafe impl Sync for Block {} 583 584 /// A buddy block paired with its owning [`AllocatedBlocks`] context. 585 /// 586 /// Unlike a raw block, which only knows its offset within the buddy address 587 /// space, an [`AllocatedBlock`] also has access to the allocator's `base_offset` 588 /// and `chunk_size`, enabling it to compute absolute offsets and byte sizes. 589 /// 590 /// Returned by [`AllocatedBlocks::iter()`]. 591 pub struct AllocatedBlock<'a> { 592 this: &'a Block, 593 blocks: &'a AllocatedBlocks, 594 } 595 596 impl AllocatedBlock<'_> { 597 /// Get the block's offset in the address space. 598 /// 599 /// Returns the absolute offset including the allocator's base offset. 600 /// This is the actual address to use for accessing the allocated memory. 601 pub fn offset(&self) -> u64 { 602 self.blocks.buddy.params.base_offset + self.this.offset() 603 } 604 605 /// Get the block order (size = chunk_size << order). 606 pub fn order(&self) -> u32 { 607 self.this.order() 608 } 609 610 /// Get the block's size in bytes. 611 pub fn size(&self) -> u64 { 612 (self.blocks.buddy.params.chunk_size.as_usize() as u64) << self.this.order() 613 } 614 } 615