xref: /linux/drivers/gpu/nova-core/gpu.rs (revision d639d9fa162aadec1ae9980c4dcf6e50bd2f8290)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 use core::ops::Range;
4 
5 use kernel::{
6     device,
7     dma::Device,
8     fmt,
9     io::Io,
10     num::Bounded,
11     pci,
12     prelude::*, //
13 };
14 
15 use crate::{
16     bounded_enum,
17     driver::Bar0,
18     falcon::{
19         gsp::Gsp as GspFalcon,
20         sec2::Sec2 as Sec2Falcon,
21         Falcon, //
22     },
23     fb::SysmemFlush,
24     gsp::{
25         self,
26         Gsp, //
27     },
28     regs,
29 };
30 
31 mod hal;
32 
33 macro_rules! define_chipset {
34     ({ $($variant:ident = $value:expr),* $(,)* }) =>
35     {
36         /// Enum representation of the GPU chipset.
37         #[derive(fmt::Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq)]
38         pub(crate) enum Chipset {
39             $($variant = $value),*,
40         }
41 
42         impl Chipset {
43             pub(crate) const ALL: &'static [Chipset] = &[
44                 $( Chipset::$variant, )*
45             ];
46 
47             ::kernel::macros::paste!(
48             /// Returns the name of this chipset, in lowercase.
49             ///
50             /// # Examples
51             ///
52             /// ```
53             /// let chipset = Chipset::GA102;
54             /// assert_eq!(chipset.name(), "ga102");
55             /// ```
56             pub(crate) const fn name(&self) -> &'static str {
57                 match *self {
58                 $(
59                     Chipset::$variant => stringify!([<$variant:lower>]),
60                 )*
61                 }
62             }
63             );
64         }
65 
66         // TODO[FPRI]: replace with something like derive(FromPrimitive)
67         impl TryFrom<u32> for Chipset {
68             type Error = kernel::error::Error;
69 
70             fn try_from(value: u32) -> Result<Self, Self::Error> {
71                 match value {
72                     $( $value => Ok(Chipset::$variant), )*
73                     _ => Err(ENODEV),
74                 }
75             }
76         }
77     }
78 }
79 
80 define_chipset!({
81     // Turing
82     TU102 = 0x162,
83     TU104 = 0x164,
84     TU106 = 0x166,
85     TU117 = 0x167,
86     TU116 = 0x168,
87     // Ampere
88     GA100 = 0x170,
89     GA102 = 0x172,
90     GA103 = 0x173,
91     GA104 = 0x174,
92     GA106 = 0x176,
93     GA107 = 0x177,
94     // Hopper
95     GH100 = 0x180,
96     // Ada
97     AD102 = 0x192,
98     AD103 = 0x193,
99     AD104 = 0x194,
100     AD106 = 0x196,
101     AD107 = 0x197,
102     // Blackwell GB10x
103     GB100 = 0x1a0,
104     GB102 = 0x1a2,
105     // Blackwell GB20x
106     GB202 = 0x1b2,
107     GB203 = 0x1b3,
108     GB205 = 0x1b5,
109     GB206 = 0x1b6,
110     GB207 = 0x1b7,
111 });
112 
113 impl Chipset {
114     pub(crate) const fn arch(self) -> Architecture {
115         match self {
116             Self::TU102 | Self::TU104 | Self::TU106 | Self::TU117 | Self::TU116 => {
117                 Architecture::Turing
118             }
119             Self::GA100 | Self::GA102 | Self::GA103 | Self::GA104 | Self::GA106 | Self::GA107 => {
120                 Architecture::Ampere
121             }
122             Self::GH100 => Architecture::Hopper,
123             Self::AD102 | Self::AD103 | Self::AD104 | Self::AD106 | Self::AD107 => {
124                 Architecture::Ada
125             }
126             Self::GB100 | Self::GB102 => Architecture::BlackwellGB10x,
127             Self::GB202 | Self::GB203 | Self::GB205 | Self::GB206 | Self::GB207 => {
128                 Architecture::BlackwellGB20x
129             }
130         }
131     }
132 
133     /// Returns `true` if this chipset requires the PIO-loaded bootloader in order to boot FWSEC.
134     ///
135     /// This includes all chipsets < GA102.
136     pub(crate) const fn needs_fwsec_bootloader(self) -> bool {
137         matches!(self.arch(), Architecture::Turing) || matches!(self, Self::GA100)
138     }
139 
140     /// Returns `true` if this chipset boots via FSP (Hopper and later), which requires the FMC
141     /// firmware image.
142     pub(crate) const fn uses_fsp(self) -> bool {
143         matches!(
144             self.arch(),
145             Architecture::Hopper | Architecture::BlackwellGB10x | Architecture::BlackwellGB20x
146         )
147     }
148 
149     /// Returns the address range of the PCI config mirror space.
150     pub(crate) fn pci_config_mirror_range(self) -> Range<u32> {
151         hal::gpu_hal(self).pci_config_mirror_range()
152     }
153 }
154 
155 // TODO
156 //
157 // The resulting strings are used to generate firmware paths, hence the
158 // generated strings have to be stable.
159 //
160 // Hence, replace with something like strum_macros derive(Display).
161 //
162 // For now, redirect to fmt::Debug for convenience.
163 impl fmt::Display for Chipset {
164     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165         write!(f, "{self:?}")
166     }
167 }
168 
169 bounded_enum! {
170     /// Enum representation of the GPU generation.
171     #[derive(fmt::Debug, Copy, Clone)]
172     pub(crate) enum Architecture with TryFrom<Bounded<u32, 6>> {
173         Turing = 0x16,
174         Ampere = 0x17,
175         Hopper = 0x18,
176         Ada = 0x19,
177         BlackwellGB10x = 0x1a,
178         BlackwellGB20x = 0x1b,
179     }
180 }
181 
182 #[derive(Clone, Copy)]
183 pub(crate) struct Revision {
184     major: Bounded<u8, 4>,
185     minor: Bounded<u8, 4>,
186 }
187 
188 impl From<regs::NV_PMC_BOOT_42> for Revision {
189     fn from(boot0: regs::NV_PMC_BOOT_42) -> Self {
190         Self {
191             major: boot0.major_revision().cast(),
192             minor: boot0.minor_revision().cast(),
193         }
194     }
195 }
196 
197 impl fmt::Display for Revision {
198     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
199         write!(f, "{:x}.{:x}", self.major, self.minor)
200     }
201 }
202 
203 /// Structure holding a basic description of the GPU: `Chipset` and `Revision`.
204 #[derive(Clone, Copy)]
205 pub(crate) struct Spec {
206     chipset: Chipset,
207     revision: Revision,
208 }
209 
210 impl Spec {
211     fn new(dev: &device::Device, bar: Bar0<'_>) -> Result<Spec> {
212         // Some brief notes about boot0 and boot42, in chronological order:
213         //
214         // NV04 through NV50:
215         //
216         //    Not supported by Nova. boot0 is necessary and sufficient to identify these GPUs.
217         //    boot42 may not even exist on some of these GPUs.
218         //
219         // Fermi through Volta:
220         //
221         //     Not supported by Nova. boot0 is still sufficient to identify these GPUs, but boot42
222         //     is also guaranteed to be both present and accurate.
223         //
224         // Turing and later:
225         //
226         //     Supported by Nova. Identified by first checking boot0 to ensure that the GPU is not
227         //     from an earlier (pre-Fermi) era, and then using boot42 to precisely identify the GPU.
228         //     Somewhere in the Rubin timeframe, boot0 will no longer have space to add new GPU IDs.
229 
230         let boot0 = bar.read(regs::NV_PMC_BOOT_0);
231 
232         if boot0.is_older_than_fermi() {
233             return Err(ENODEV);
234         }
235 
236         let boot42 = bar.read(regs::NV_PMC_BOOT_42);
237         Spec::try_from(boot42).inspect_err(|_| {
238             dev_err!(dev, "Unsupported chipset: {}\n", boot42);
239         })
240     }
241 }
242 
243 impl TryFrom<regs::NV_PMC_BOOT_42> for Spec {
244     type Error = Error;
245 
246     fn try_from(boot42: regs::NV_PMC_BOOT_42) -> Result<Self> {
247         Ok(Self {
248             chipset: boot42.chipset()?,
249             revision: boot42.into(),
250         })
251     }
252 }
253 
254 impl fmt::Display for Spec {
255     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
256         f.write_fmt(fmt!(
257             "Chipset: {}, Architecture: {:?}, Revision: {}",
258             self.chipset,
259             self.chipset.arch(),
260             self.revision
261         ))
262     }
263 }
264 
265 /// Structure holding the resources required to operate the GPU.
266 #[pin_data(PinnedDrop)]
267 pub(crate) struct Gpu<'gpu> {
268     /// Device owning the GPU.
269     device: &'gpu device::Device<device::Bound>,
270     spec: Spec,
271     /// MMIO mapping of PCI BAR 0.
272     bar: Bar0<'gpu>,
273     /// System memory page required for flushing all pending GPU-side memory writes done through
274     /// PCIE into system memory, via sysmembar (A GPU-initiated HW memory-barrier operation).
275     sysmem_flush: SysmemFlush<'gpu>,
276     /// GSP falcon instance, used for GSP boot up and cleanup.
277     gsp_falcon: Falcon<GspFalcon>,
278     /// SEC2 falcon instance, used for GSP boot up and cleanup.
279     sec2_falcon: Falcon<Sec2Falcon>,
280     /// GSP runtime data. Temporarily an empty placeholder.
281     #[pin]
282     gsp: Gsp,
283     /// GSP unload firmware bundle, if any.
284     unload_bundle: Option<gsp::UnloadBundle>,
285 }
286 
287 impl<'gpu> Gpu<'gpu> {
288     pub(crate) fn new(
289         pdev: &'gpu pci::Device<device::Core<'_>>,
290         bar: Bar0<'gpu>,
291     ) -> impl PinInit<Self, Error> + 'gpu {
292         try_pin_init!(Self {
293             device: pdev.as_ref(),
294             spec: Spec::new(pdev.as_ref(), bar).inspect(|spec| {
295                 dev_info!(pdev,"NVIDIA ({})\n", spec);
296             })?,
297 
298             // We must wait for GFW_BOOT completion before doing any significant setup on the GPU.
299             _: {
300                 let hal = hal::gpu_hal(spec.chipset);
301                 let dma_mask = hal.dma_mask();
302 
303                 // SAFETY: `Gpu` owns all DMA allocations for this device, and we are
304                 // still constructing it, so no concurrent DMA allocations can exist.
305                 unsafe { pdev.dma_set_mask_and_coherent(dma_mask)? };
306 
307                 hal.wait_gfw_boot_completion(bar)
308                     .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
309             },
310 
311             sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?,
312 
313             gsp_falcon: Falcon::new(
314                 pdev.as_ref(),
315                 spec.chipset,
316             )
317             .inspect(|falcon| falcon.clear_swgen0_intr(bar))?,
318 
319             sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset)?,
320 
321             gsp <- Gsp::new(pdev),
322 
323             // This member must be initialized last, so the `UnloadBundle` can never be dropped from
324             // outside of the constructed `Gpu`, ensuring that the unload sequence is properly run
325             // in case of failure.
326             unload_bundle: gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)?,
327             bar,
328         })
329     }
330 }
331 
332 #[pinned_drop]
333 impl PinnedDrop for Gpu<'_> {
334     fn drop(self: Pin<&mut Self>) {
335         let this = self.project();
336         let device = *this.device;
337         let bar = *this.bar;
338         let bundle = this.unload_bundle.take();
339 
340         let _ = this
341             .gsp
342             .as_ref()
343             .get_ref()
344             .unload(device, bar, &*this.gsp_falcon, &*this.sec2_falcon, bundle)
345             .inspect_err(|e| dev_err!(device, "failed to unload GSP: {:?}\n", e));
346     }
347 }
348