1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3 * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
4 */
5 #ifndef __GENERIC_PT_IOMMU_H
6 #define __GENERIC_PT_IOMMU_H
7
8 #include <linux/generic_pt/common.h>
9 #include <linux/iommu.h>
10 #include <linux/mm_types.h>
11
12 struct iommu_iotlb_gather;
13 struct pt_iommu_ops;
14 struct pt_iommu_driver_ops;
15 struct iommu_dirty_bitmap;
16
17 /**
18 * DOC: IOMMU Radix Page Table
19 *
20 * The IOMMU implementation of the Generic Page Table provides an ops struct
21 * that is useful to go with an iommu_domain to serve the DMA API, IOMMUFD and
22 * the generic map/unmap interface.
23 *
24 * This interface uses a caller provided locking approach. The caller must have
25 * a VA range lock concept that prevents concurrent threads from calling ops on
26 * the same VA. Generally the range lock must be at least as large as a single
27 * map call.
28 */
29
30 /**
31 * struct pt_iommu - Base structure for IOMMU page tables
32 *
33 * The format-specific struct will include this as the first member.
34 */
35 struct pt_iommu {
36 /**
37 * @domain: The core IOMMU domain. The driver should use a union to
38 * overlay this memory with its previously existing domain struct to
39 * create an alias.
40 */
41 struct iommu_domain domain;
42
43 /**
44 * @ops: Function pointers to access the API
45 */
46 const struct pt_iommu_ops *ops;
47
48 /**
49 * @driver_ops: Function pointers provided by the HW driver to help
50 * manage HW details like caches.
51 */
52 const struct pt_iommu_driver_ops *driver_ops;
53
54 /**
55 * @nid: Node ID to use for table memory allocations. The IOMMU driver
56 * may want to set the NID to the device's NID, if there are multiple
57 * table walkers.
58 */
59 int nid;
60
61 /**
62 * @iommu_device: Device pointer used for any DMA cache flushing when
63 * PT_FEAT_DMA_INCOHERENT. This is the iommu device that created the
64 * page table which must have dma ops that perform cache flushing.
65 */
66 struct device *iommu_device;
67 };
68
69 /**
70 * struct pt_iommu_info - Details about the IOMMU page table
71 *
72 * Returned from pt_iommu_ops->get_info()
73 */
74 struct pt_iommu_info {
75 /**
76 * @pgsize_bitmap: A bitmask where each set bit indicates
77 * a page size that can be natively stored in the page table.
78 */
79 u64 pgsize_bitmap;
80 };
81
82 struct pt_iommu_ops {
83 /**
84 * @set_dirty: Make the iova write dirty
85 * @iommu_table: Table to manipulate
86 * @iova: IO virtual address to start
87 *
88 * This is only used by iommufd testing. It makes the iova dirty so that
89 * read_and_clear_dirty() will see it as dirty. Unlike all the other ops
90 * this one is safe to call without holding any locking. It may return
91 * -EAGAIN if there is a race.
92 */
93 int (*set_dirty)(struct pt_iommu *iommu_table, dma_addr_t iova);
94
95 /**
96 * @get_info: Return the pt_iommu_info structure
97 * @iommu_table: Table to query
98 *
99 * Return some basic static information about the page table.
100 */
101 void (*get_info)(struct pt_iommu *iommu_table,
102 struct pt_iommu_info *info);
103
104 /**
105 * @deinit: Undo a format specific init operation
106 * @iommu_table: Table to destroy
107 *
108 * Release all of the memory. The caller must have already removed the
109 * table from all HW access and all caches.
110 */
111 void (*deinit)(struct pt_iommu *iommu_table);
112 };
113
114 /**
115 * struct pt_iommu_driver_ops - HW IOTLB cache flushing operations
116 *
117 * The IOMMU driver should implement these using container_of(iommu_table) to
118 * get to it's iommu_domain derived structure. All ops can be called in atomic
119 * contexts as they are buried under DMA API calls.
120 */
121 struct pt_iommu_driver_ops {
122 /**
123 * @change_top: Update the top of table pointer
124 * @iommu_table: Table to operate on
125 * @top_paddr: New CPU physical address of the top pointer
126 * @top_level: IOMMU PT level of the new top
127 *
128 * Called under the get_top_lock() spinlock. The driver must update all
129 * HW references to this domain with a new top address and
130 * configuration. On return mappings placed in the new top must be
131 * reachable by the HW.
132 *
133 * top_level encodes the level in IOMMU PT format, level 0 is the
134 * smallest page size increasing from there. This has to be translated
135 * to any HW specific format. During this call the new top will not be
136 * visible to any other API.
137 *
138 * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if
139 * enabled.
140 */
141 void (*change_top)(struct pt_iommu *iommu_table, phys_addr_t top_paddr,
142 unsigned int top_level);
143
144 /**
145 * @get_top_lock: lock to hold when changing the table top
146 * @iommu_table: Table to operate on
147 *
148 * Return a lock to hold when changing the table top page table from
149 * being stored in HW. The lock will be held prior to calling
150 * change_top() and released once the top is fully visible.
151 *
152 * Typically this would be a lock that protects the iommu_domain's
153 * attachment list.
154 *
155 * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if
156 * enabled.
157 */
158 spinlock_t *(*get_top_lock)(struct pt_iommu *iommu_table);
159 };
160
pt_iommu_deinit(struct pt_iommu * iommu_table)161 static inline void pt_iommu_deinit(struct pt_iommu *iommu_table)
162 {
163 /*
164 * It is safe to call pt_iommu_deinit() before an init, or if init
165 * fails. The ops pointer will only become non-NULL if deinit needs to be
166 * run.
167 */
168 if (iommu_table->ops)
169 iommu_table->ops->deinit(iommu_table);
170 }
171
172 /**
173 * struct pt_iommu_cfg - Common configuration values for all formats
174 */
175 struct pt_iommu_cfg {
176 /**
177 * @features: Features required. Only these features will be turned on.
178 * The feature list should reflect what the IOMMU HW is capable of.
179 */
180 unsigned int features;
181 /**
182 * @hw_max_vasz_lg2: Maximum VA the IOMMU HW can support. This will
183 * imply the top level of the table.
184 */
185 u8 hw_max_vasz_lg2;
186 /**
187 * @hw_max_oasz_lg2: Maximum OA the IOMMU HW can support. The format
188 * might select a lower maximum OA.
189 */
190 u8 hw_max_oasz_lg2;
191 };
192
193 /* Generate the exported function signatures from iommu_pt.h */
194 #define IOMMU_PROTOTYPES(fmt) \
195 phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \
196 dma_addr_t iova); \
197 int pt_iommu_##fmt##_map_pages(struct iommu_domain *domain, \
198 unsigned long iova, phys_addr_t paddr, \
199 size_t pgsize, size_t pgcount, \
200 int prot, gfp_t gfp, size_t *mapped); \
201 size_t pt_iommu_##fmt##_unmap_pages( \
202 struct iommu_domain *domain, unsigned long iova, \
203 size_t pgsize, size_t pgcount, \
204 struct iommu_iotlb_gather *iotlb_gather); \
205 int pt_iommu_##fmt##_read_and_clear_dirty( \
206 struct iommu_domain *domain, unsigned long iova, size_t size, \
207 unsigned long flags, struct iommu_dirty_bitmap *dirty); \
208 int pt_iommu_##fmt##_init(struct pt_iommu_##fmt *table, \
209 const struct pt_iommu_##fmt##_cfg *cfg, \
210 gfp_t gfp); \
211 void pt_iommu_##fmt##_hw_info(struct pt_iommu_##fmt *table, \
212 struct pt_iommu_##fmt##_hw_info *info)
213 #define IOMMU_FORMAT(fmt, member) \
214 struct pt_iommu_##fmt { \
215 struct pt_iommu iommu; \
216 struct pt_##fmt member; \
217 }; \
218 IOMMU_PROTOTYPES(fmt)
219
220 /*
221 * A driver uses IOMMU_PT_DOMAIN_OPS to populate the iommu_domain_ops for the
222 * iommu_pt
223 */
224 #define IOMMU_PT_DOMAIN_OPS(fmt) \
225 .iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, \
226 .map_pages = &pt_iommu_##fmt##_map_pages, \
227 .unmap_pages = &pt_iommu_##fmt##_unmap_pages
228 #define IOMMU_PT_DIRTY_OPS(fmt) \
229 .read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty
230
231 /*
232 * The driver should setup its domain struct like
233 * union {
234 * struct iommu_domain domain;
235 * struct pt_iommu_xxx xx;
236 * };
237 * PT_IOMMU_CHECK_DOMAIN(struct mock_iommu_domain, xx.iommu, domain);
238 *
239 * Which creates an alias between driver_domain.domain and
240 * driver_domain.xx.iommu.domain. This is to avoid a mass rename of existing
241 * driver_domain.domain users.
242 */
243 #define PT_IOMMU_CHECK_DOMAIN(s, pt_iommu_memb, domain_memb) \
244 static_assert(offsetof(s, pt_iommu_memb.domain) == \
245 offsetof(s, domain_memb))
246
247 struct pt_iommu_amdv1_cfg {
248 struct pt_iommu_cfg common;
249 unsigned int starting_level;
250 };
251
252 struct pt_iommu_amdv1_hw_info {
253 u64 host_pt_root;
254 u8 mode;
255 };
256
257 IOMMU_FORMAT(amdv1, amdpt);
258
259 /* amdv1_mock is used by the iommufd selftest */
260 #define pt_iommu_amdv1_mock pt_iommu_amdv1
261 #define pt_iommu_amdv1_mock_cfg pt_iommu_amdv1_cfg
262 struct pt_iommu_amdv1_mock_hw_info;
263 IOMMU_PROTOTYPES(amdv1_mock);
264
265 struct pt_iommu_vtdss_cfg {
266 struct pt_iommu_cfg common;
267 /* 4 is a 57 bit 5 level table */
268 unsigned int top_level;
269 };
270
271 struct pt_iommu_vtdss_hw_info {
272 u64 ssptptr;
273 u8 aw;
274 };
275
276 IOMMU_FORMAT(vtdss, vtdss_pt);
277
278 struct pt_iommu_x86_64_cfg {
279 struct pt_iommu_cfg common;
280 /* 4 is a 57 bit 5 level table */
281 unsigned int top_level;
282 };
283
284 struct pt_iommu_x86_64_hw_info {
285 u64 gcr3_pt;
286 u8 levels;
287 };
288
289 IOMMU_FORMAT(x86_64, x86_64_pt);
290
291 #undef IOMMU_PROTOTYPES
292 #undef IOMMU_FORMAT
293 #endif
294