1 // SPDX-License-Identifier: GPL-2.0 2 3 use core::ops::Range; 4 5 use kernel::{ 6 device, 7 dma::Device, 8 fmt, 9 io::Io, 10 num::Bounded, 11 pci, 12 prelude::*, // 13 }; 14 15 use crate::{ 16 bounded_enum, 17 driver::Bar0, 18 falcon::{ 19 gsp::Gsp as GspFalcon, 20 sec2::Sec2 as Sec2Falcon, 21 Falcon, // 22 }, 23 fb::SysmemFlush, 24 gsp::{ 25 self, 26 Gsp, // 27 }, 28 regs, 29 }; 30 31 mod hal; 32 33 macro_rules! define_chipset { 34 ({ $($variant:ident = $value:expr),* $(,)* }) => 35 { 36 /// Enum representation of the GPU chipset. 37 #[derive(fmt::Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq)] 38 pub(crate) enum Chipset { 39 $($variant = $value),*, 40 } 41 42 impl Chipset { 43 pub(crate) const ALL: &'static [Chipset] = &[ 44 $( Chipset::$variant, )* 45 ]; 46 47 ::kernel::macros::paste!( 48 /// Returns the name of this chipset, in lowercase. 49 /// 50 /// # Examples 51 /// 52 /// ``` 53 /// let chipset = Chipset::GA102; 54 /// assert_eq!(chipset.name(), "ga102"); 55 /// ``` 56 pub(crate) const fn name(&self) -> &'static str { 57 match *self { 58 $( 59 Chipset::$variant => stringify!([<$variant:lower>]), 60 )* 61 } 62 } 63 ); 64 } 65 66 // TODO[FPRI]: replace with something like derive(FromPrimitive) 67 impl TryFrom<u32> for Chipset { 68 type Error = kernel::error::Error; 69 70 fn try_from(value: u32) -> Result<Self, Self::Error> { 71 match value { 72 $( $value => Ok(Chipset::$variant), )* 73 _ => Err(ENODEV), 74 } 75 } 76 } 77 } 78 } 79 80 define_chipset!({ 81 // Turing 82 TU102 = 0x162, 83 TU104 = 0x164, 84 TU106 = 0x166, 85 TU117 = 0x167, 86 TU116 = 0x168, 87 // Ampere 88 GA100 = 0x170, 89 GA102 = 0x172, 90 GA103 = 0x173, 91 GA104 = 0x174, 92 GA106 = 0x176, 93 GA107 = 0x177, 94 // Hopper 95 GH100 = 0x180, 96 // Ada 97 AD102 = 0x192, 98 AD103 = 0x193, 99 AD104 = 0x194, 100 AD106 = 0x196, 101 AD107 = 0x197, 102 // Blackwell GB10x 103 GB100 = 0x1a0, 104 GB102 = 0x1a2, 105 // Blackwell GB20x 106 GB202 = 0x1b2, 107 GB203 = 0x1b3, 108 GB205 = 0x1b5, 109 GB206 = 0x1b6, 110 GB207 = 0x1b7, 111 }); 112 113 impl Chipset { 114 pub(crate) const fn arch(self) -> Architecture { 115 match self { 116 Self::TU102 | Self::TU104 | Self::TU106 | Self::TU117 | Self::TU116 => { 117 Architecture::Turing 118 } 119 Self::GA100 | Self::GA102 | Self::GA103 | Self::GA104 | Self::GA106 | Self::GA107 => { 120 Architecture::Ampere 121 } 122 Self::GH100 => Architecture::Hopper, 123 Self::AD102 | Self::AD103 | Self::AD104 | Self::AD106 | Self::AD107 => { 124 Architecture::Ada 125 } 126 Self::GB100 | Self::GB102 => Architecture::BlackwellGB10x, 127 Self::GB202 | Self::GB203 | Self::GB205 | Self::GB206 | Self::GB207 => { 128 Architecture::BlackwellGB20x 129 } 130 } 131 } 132 133 /// Returns `true` if this chipset requires the PIO-loaded bootloader in order to boot FWSEC. 134 /// 135 /// This includes all chipsets < GA102. 136 pub(crate) const fn needs_fwsec_bootloader(self) -> bool { 137 matches!(self.arch(), Architecture::Turing) || matches!(self, Self::GA100) 138 } 139 140 /// Returns `true` if this chipset boots via FSP (Hopper and later), which requires the FMC 141 /// firmware image. 142 pub(crate) const fn uses_fsp(self) -> bool { 143 matches!( 144 self.arch(), 145 Architecture::Hopper | Architecture::BlackwellGB10x | Architecture::BlackwellGB20x 146 ) 147 } 148 149 /// Returns the address range of the PCI config mirror space. 150 pub(crate) fn pci_config_mirror_range(self) -> Range<u32> { 151 hal::gpu_hal(self).pci_config_mirror_range() 152 } 153 } 154 155 // TODO 156 // 157 // The resulting strings are used to generate firmware paths, hence the 158 // generated strings have to be stable. 159 // 160 // Hence, replace with something like strum_macros derive(Display). 161 // 162 // For now, redirect to fmt::Debug for convenience. 163 impl fmt::Display for Chipset { 164 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 165 write!(f, "{self:?}") 166 } 167 } 168 169 bounded_enum! { 170 /// Enum representation of the GPU generation. 171 #[derive(fmt::Debug, Copy, Clone)] 172 pub(crate) enum Architecture with TryFrom<Bounded<u32, 6>> { 173 Turing = 0x16, 174 Ampere = 0x17, 175 Hopper = 0x18, 176 Ada = 0x19, 177 BlackwellGB10x = 0x1a, 178 BlackwellGB20x = 0x1b, 179 } 180 } 181 182 #[derive(Clone, Copy)] 183 pub(crate) struct Revision { 184 major: Bounded<u8, 4>, 185 minor: Bounded<u8, 4>, 186 } 187 188 impl From<regs::NV_PMC_BOOT_42> for Revision { 189 fn from(boot0: regs::NV_PMC_BOOT_42) -> Self { 190 Self { 191 major: boot0.major_revision().cast(), 192 minor: boot0.minor_revision().cast(), 193 } 194 } 195 } 196 197 impl fmt::Display for Revision { 198 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 199 write!(f, "{:x}.{:x}", self.major, self.minor) 200 } 201 } 202 203 /// Structure holding a basic description of the GPU: `Chipset` and `Revision`. 204 #[derive(Clone, Copy)] 205 pub(crate) struct Spec { 206 chipset: Chipset, 207 revision: Revision, 208 } 209 210 impl Spec { 211 fn new(dev: &device::Device, bar: Bar0<'_>) -> Result<Spec> { 212 // Some brief notes about boot0 and boot42, in chronological order: 213 // 214 // NV04 through NV50: 215 // 216 // Not supported by Nova. boot0 is necessary and sufficient to identify these GPUs. 217 // boot42 may not even exist on some of these GPUs. 218 // 219 // Fermi through Volta: 220 // 221 // Not supported by Nova. boot0 is still sufficient to identify these GPUs, but boot42 222 // is also guaranteed to be both present and accurate. 223 // 224 // Turing and later: 225 // 226 // Supported by Nova. Identified by first checking boot0 to ensure that the GPU is not 227 // from an earlier (pre-Fermi) era, and then using boot42 to precisely identify the GPU. 228 // Somewhere in the Rubin timeframe, boot0 will no longer have space to add new GPU IDs. 229 230 let boot0 = bar.read(regs::NV_PMC_BOOT_0); 231 232 if boot0.is_older_than_fermi() { 233 return Err(ENODEV); 234 } 235 236 let boot42 = bar.read(regs::NV_PMC_BOOT_42); 237 Spec::try_from(boot42).inspect_err(|_| { 238 dev_err!(dev, "Unsupported chipset: {}\n", boot42); 239 }) 240 } 241 } 242 243 impl TryFrom<regs::NV_PMC_BOOT_42> for Spec { 244 type Error = Error; 245 246 fn try_from(boot42: regs::NV_PMC_BOOT_42) -> Result<Self> { 247 Ok(Self { 248 chipset: boot42.chipset()?, 249 revision: boot42.into(), 250 }) 251 } 252 } 253 254 impl fmt::Display for Spec { 255 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 256 f.write_fmt(fmt!( 257 "Chipset: {}, Architecture: {:?}, Revision: {}", 258 self.chipset, 259 self.chipset.arch(), 260 self.revision 261 )) 262 } 263 } 264 265 /// Structure holding the resources required to operate the GPU. 266 #[pin_data(PinnedDrop)] 267 pub(crate) struct Gpu<'gpu> { 268 /// Device owning the GPU. 269 device: &'gpu device::Device<device::Bound>, 270 spec: Spec, 271 /// MMIO mapping of PCI BAR 0. 272 bar: Bar0<'gpu>, 273 /// System memory page required for flushing all pending GPU-side memory writes done through 274 /// PCIE into system memory, via sysmembar (A GPU-initiated HW memory-barrier operation). 275 sysmem_flush: SysmemFlush<'gpu>, 276 /// GSP falcon instance, used for GSP boot up and cleanup. 277 gsp_falcon: Falcon<GspFalcon>, 278 /// SEC2 falcon instance, used for GSP boot up and cleanup. 279 sec2_falcon: Falcon<Sec2Falcon>, 280 /// GSP runtime data. Temporarily an empty placeholder. 281 #[pin] 282 gsp: Gsp, 283 /// GSP unload firmware bundle, if any. 284 unload_bundle: Option<gsp::UnloadBundle>, 285 } 286 287 impl<'gpu> Gpu<'gpu> { 288 pub(crate) fn new( 289 pdev: &'gpu pci::Device<device::Core<'_>>, 290 bar: Bar0<'gpu>, 291 ) -> impl PinInit<Self, Error> + 'gpu { 292 try_pin_init!(Self { 293 device: pdev.as_ref(), 294 spec: Spec::new(pdev.as_ref(), bar).inspect(|spec| { 295 dev_info!(pdev,"NVIDIA ({})\n", spec); 296 })?, 297 298 // We must wait for GFW_BOOT completion before doing any significant setup on the GPU. 299 _: { 300 let hal = hal::gpu_hal(spec.chipset); 301 let dma_mask = hal.dma_mask(); 302 303 // SAFETY: `Gpu` owns all DMA allocations for this device, and we are 304 // still constructing it, so no concurrent DMA allocations can exist. 305 unsafe { pdev.dma_set_mask_and_coherent(dma_mask)? }; 306 307 hal.wait_gfw_boot_completion(bar) 308 .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?; 309 }, 310 311 sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?, 312 313 gsp_falcon: Falcon::new( 314 pdev.as_ref(), 315 spec.chipset, 316 ) 317 .inspect(|falcon| falcon.clear_swgen0_intr(bar))?, 318 319 sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset)?, 320 321 gsp <- Gsp::new(pdev), 322 323 // This member must be initialized last, so the `UnloadBundle` can never be dropped from 324 // outside of the constructed `Gpu`, ensuring that the unload sequence is properly run 325 // in case of failure. 326 unload_bundle: gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)?, 327 bar, 328 }) 329 } 330 } 331 332 #[pinned_drop] 333 impl PinnedDrop for Gpu<'_> { 334 fn drop(self: Pin<&mut Self>) { 335 let this = self.project(); 336 let device = *this.device; 337 let bar = *this.bar; 338 let bundle = this.unload_bundle.take(); 339 340 let _ = this 341 .gsp 342 .as_ref() 343 .get_ref() 344 .unload(device, bar, &*this.gsp_falcon, &*this.sec2_falcon, bundle) 345 .inspect_err(|e| dev_err!(device, "failed to unload GSP: {:?}\n", e)); 346 } 347 } 348