xref: /linux/drivers/iommu/generic_pt/fmt/x86_64.h (revision ce5cfb0fa20dc6454da039612e34325b7b4a8243)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
4  *
5  * x86 page table. Supports the 4 and 5 level variations.
6  *
7  * The 4 and 5 level version is described in:
8  *   Section "4.4 4-Level Paging and 5-Level Paging" of the Intel Software
9  *   Developer's Manual Volume 3
10  *
11  *   Section "9.7 First-Stage Paging Entries" of the "Intel Virtualization
12  *   Technology for Directed I/O Architecture Specification"
13  *
14  *   Section "2.2.6 I/O Page Tables for Guest Translations" of the "AMD I/O
15  *   Virtualization Technology (IOMMU) Specification"
16  *
17  * It is used by x86 CPUs, AMD and VT-d IOMMU HW.
18  *
19  * Note the 3 level format is very similar and almost implemented here. The
20  * reserved/ignored layout is different and there are functional bit
21  * differences.
22  *
23  * This format uses PT_FEAT_SIGN_EXTEND to have a upper/non-canonical/lower
24  * split. PT_FEAT_SIGN_EXTEND is optional as AMD IOMMU sometimes uses non-sign
25  * extended addressing with this page table format.
26  *
27  * The named levels in the spec map to the pts->level as:
28  *   Table/PTE - 0
29  *   Directory/PDE - 1
30  *   Directory Ptr/PDPTE - 2
31  *   PML4/PML4E - 3
32  *   PML5/PML5E - 4
33  */
34 #ifndef __GENERIC_PT_FMT_X86_64_H
35 #define __GENERIC_PT_FMT_X86_64_H
36 
37 #include "defs_x86_64.h"
38 #include "../pt_defs.h"
39 
40 #include <linux/bitfield.h>
41 #include <linux/container_of.h>
42 #include <linux/log2.h>
43 #include <linux/mem_encrypt.h>
44 
45 enum {
46 	PT_MAX_OUTPUT_ADDRESS_LG2 = 52,
47 	PT_MAX_VA_ADDRESS_LG2 = 57,
48 	PT_ITEM_WORD_SIZE = sizeof(u64),
49 	PT_MAX_TOP_LEVEL = 4,
50 	PT_GRANULE_LG2SZ = 12,
51 	PT_TABLEMEM_LG2SZ = 12,
52 
53 	/*
54 	 * For AMD the GCR3 Base only has these bits. For VT-d FSPTPTR is 4k
55 	 * aligned and is limited by the architected HAW
56 	 */
57 	PT_TOP_PHYS_MASK = GENMASK_ULL(51, 12),
58 };
59 
60 /* Shared descriptor bits */
61 enum {
62 	X86_64_FMT_P = BIT(0),
63 	X86_64_FMT_RW = BIT(1),
64 	X86_64_FMT_U = BIT(2),
65 	X86_64_FMT_A = BIT(5),
66 	X86_64_FMT_D = BIT(6),
67 	X86_64_FMT_OA = GENMASK_ULL(51, 12),
68 	X86_64_FMT_XD = BIT_ULL(63),
69 };
70 
71 /* PDPTE/PDE */
72 enum {
73 	X86_64_FMT_PS = BIT(7),
74 };
75 
x86_64_pt_table_pa(const struct pt_state * pts)76 static inline pt_oaddr_t x86_64_pt_table_pa(const struct pt_state *pts)
77 {
78 	u64 entry = pts->entry;
79 
80 	if (pts_feature(pts, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES))
81 		entry = __sme_clr(entry);
82 	return oalog2_mul(FIELD_GET(X86_64_FMT_OA, entry),
83 			  PT_TABLEMEM_LG2SZ);
84 }
85 #define pt_table_pa x86_64_pt_table_pa
86 
x86_64_pt_entry_oa(const struct pt_state * pts)87 static inline pt_oaddr_t x86_64_pt_entry_oa(const struct pt_state *pts)
88 {
89 	u64 entry = pts->entry;
90 
91 	if (pts_feature(pts, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES))
92 		entry = __sme_clr(entry);
93 	return oalog2_mul(FIELD_GET(X86_64_FMT_OA, entry),
94 			  PT_GRANULE_LG2SZ);
95 }
96 #define pt_entry_oa x86_64_pt_entry_oa
97 
x86_64_pt_can_have_leaf(const struct pt_state * pts)98 static inline bool x86_64_pt_can_have_leaf(const struct pt_state *pts)
99 {
100 	return pts->level <= 2;
101 }
102 #define pt_can_have_leaf x86_64_pt_can_have_leaf
103 
x86_64_pt_num_items_lg2(const struct pt_state * pts)104 static inline unsigned int x86_64_pt_num_items_lg2(const struct pt_state *pts)
105 {
106 	return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64));
107 }
108 #define pt_num_items_lg2 x86_64_pt_num_items_lg2
109 
x86_64_pt_load_entry_raw(struct pt_state * pts)110 static inline enum pt_entry_type x86_64_pt_load_entry_raw(struct pt_state *pts)
111 {
112 	const u64 *tablep = pt_cur_table(pts, u64);
113 	u64 entry;
114 
115 	pts->entry = entry = READ_ONCE(tablep[pts->index]);
116 	if (!(entry & X86_64_FMT_P))
117 		return PT_ENTRY_EMPTY;
118 	if (pts->level == 0 ||
119 	    (x86_64_pt_can_have_leaf(pts) && (entry & X86_64_FMT_PS)))
120 		return PT_ENTRY_OA;
121 	return PT_ENTRY_TABLE;
122 }
123 #define pt_load_entry_raw x86_64_pt_load_entry_raw
124 
125 static inline void
x86_64_pt_install_leaf_entry(struct pt_state * pts,pt_oaddr_t oa,unsigned int oasz_lg2,const struct pt_write_attrs * attrs)126 x86_64_pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa,
127 			     unsigned int oasz_lg2,
128 			     const struct pt_write_attrs *attrs)
129 {
130 	u64 *tablep = pt_cur_table(pts, u64);
131 	u64 entry;
132 
133 	if (!pt_check_install_leaf_args(pts, oa, oasz_lg2))
134 		return;
135 
136 	entry = X86_64_FMT_P |
137 		FIELD_PREP(X86_64_FMT_OA, log2_div(oa, PT_GRANULE_LG2SZ)) |
138 		attrs->descriptor_bits;
139 	if (pts->level != 0)
140 		entry |= X86_64_FMT_PS;
141 
142 	WRITE_ONCE(tablep[pts->index], entry);
143 	pts->entry = entry;
144 }
145 #define pt_install_leaf_entry x86_64_pt_install_leaf_entry
146 
x86_64_pt_install_table(struct pt_state * pts,pt_oaddr_t table_pa,const struct pt_write_attrs * attrs)147 static inline bool x86_64_pt_install_table(struct pt_state *pts,
148 					   pt_oaddr_t table_pa,
149 					   const struct pt_write_attrs *attrs)
150 {
151 	u64 entry;
152 
153 	entry = X86_64_FMT_P | X86_64_FMT_RW | X86_64_FMT_U | X86_64_FMT_A |
154 		FIELD_PREP(X86_64_FMT_OA, log2_div(table_pa, PT_GRANULE_LG2SZ));
155 	if (pts_feature(pts, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES))
156 		entry = __sme_set(entry);
157 	return pt_table_install64(pts, entry);
158 }
159 #define pt_install_table x86_64_pt_install_table
160 
x86_64_pt_attr_from_entry(const struct pt_state * pts,struct pt_write_attrs * attrs)161 static inline void x86_64_pt_attr_from_entry(const struct pt_state *pts,
162 					     struct pt_write_attrs *attrs)
163 {
164 	attrs->descriptor_bits = pts->entry &
165 				 (X86_64_FMT_RW | X86_64_FMT_U | X86_64_FMT_A |
166 				  X86_64_FMT_D | X86_64_FMT_XD);
167 }
168 #define pt_attr_from_entry x86_64_pt_attr_from_entry
169 
x86_64_pt_max_sw_bit(struct pt_common * common)170 static inline unsigned int x86_64_pt_max_sw_bit(struct pt_common *common)
171 {
172 	return 12;
173 }
174 #define pt_max_sw_bit x86_64_pt_max_sw_bit
175 
x86_64_pt_sw_bit(unsigned int bitnr)176 static inline u64 x86_64_pt_sw_bit(unsigned int bitnr)
177 {
178 	if (__builtin_constant_p(bitnr) && bitnr > 12)
179 		BUILD_BUG();
180 
181 	/* Bits marked Ignored/AVL in the specification */
182 	switch (bitnr) {
183 	case 0:
184 		return BIT(9);
185 	case 1:
186 		return BIT(11);
187 	case 2 ... 12:
188 		return BIT_ULL((bitnr - 2) + 52);
189 	/* Some bits in 8,6,4,3 are available in some entries */
190 	default:
191 		PT_WARN_ON(true);
192 		return 0;
193 	}
194 }
195 #define pt_sw_bit x86_64_pt_sw_bit
196 
197 /* --- iommu */
198 #include <linux/generic_pt/iommu.h>
199 #include <linux/iommu.h>
200 
201 #define pt_iommu_table pt_iommu_x86_64
202 
203 /* The common struct is in the per-format common struct */
common_from_iommu(struct pt_iommu * iommu_table)204 static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table)
205 {
206 	return &container_of(iommu_table, struct pt_iommu_table, iommu)
207 			->x86_64_pt.common;
208 }
209 
iommu_from_common(struct pt_common * common)210 static inline struct pt_iommu *iommu_from_common(struct pt_common *common)
211 {
212 	return &container_of(common, struct pt_iommu_table, x86_64_pt.common)
213 			->iommu;
214 }
215 
x86_64_pt_iommu_set_prot(struct pt_common * common,struct pt_write_attrs * attrs,unsigned int iommu_prot)216 static inline int x86_64_pt_iommu_set_prot(struct pt_common *common,
217 					   struct pt_write_attrs *attrs,
218 					   unsigned int iommu_prot)
219 {
220 	u64 pte;
221 
222 	pte = X86_64_FMT_U | X86_64_FMT_A;
223 	if (iommu_prot & IOMMU_WRITE)
224 		pte |= X86_64_FMT_RW | X86_64_FMT_D;
225 
226 	/*
227 	 * Ideally we'd have an IOMMU_ENCRYPTED flag set by higher levels to
228 	 * control this. For now if the tables use sme_set then so do the ptes.
229 	 */
230 	if (pt_feature(common, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES))
231 		pte = __sme_set(pte);
232 
233 	attrs->descriptor_bits = pte;
234 	return 0;
235 }
236 #define pt_iommu_set_prot x86_64_pt_iommu_set_prot
237 
238 static inline int
x86_64_pt_iommu_fmt_init(struct pt_iommu_x86_64 * iommu_table,const struct pt_iommu_x86_64_cfg * cfg)239 x86_64_pt_iommu_fmt_init(struct pt_iommu_x86_64 *iommu_table,
240 			 const struct pt_iommu_x86_64_cfg *cfg)
241 {
242 	struct pt_x86_64 *table = &iommu_table->x86_64_pt;
243 
244 	if (cfg->top_level < 3 || cfg->top_level > 4)
245 		return -EOPNOTSUPP;
246 
247 	pt_top_set_level(&table->common, cfg->top_level);
248 
249 	table->common.max_oasz_lg2 =
250 		min(PT_MAX_OUTPUT_ADDRESS_LG2, cfg->common.hw_max_oasz_lg2);
251 	return 0;
252 }
253 #define pt_iommu_fmt_init x86_64_pt_iommu_fmt_init
254 
255 static inline void
x86_64_pt_iommu_fmt_hw_info(struct pt_iommu_x86_64 * table,const struct pt_range * top_range,struct pt_iommu_x86_64_hw_info * info)256 x86_64_pt_iommu_fmt_hw_info(struct pt_iommu_x86_64 *table,
257 			    const struct pt_range *top_range,
258 			    struct pt_iommu_x86_64_hw_info *info)
259 {
260 	info->gcr3_pt = virt_to_phys(top_range->top_table);
261 	PT_WARN_ON(info->gcr3_pt & ~PT_TOP_PHYS_MASK);
262 	info->levels = top_range->top_level + 1;
263 }
264 #define pt_iommu_fmt_hw_info x86_64_pt_iommu_fmt_hw_info
265 
266 #if defined(GENERIC_PT_KUNIT)
267 static const struct pt_iommu_x86_64_cfg x86_64_kunit_fmt_cfgs[] = {
268 	[0] = { .common.features = BIT(PT_FEAT_SIGN_EXTEND),
269 		.common.hw_max_vasz_lg2 = 48, .top_level = 3 },
270 	[1] = { .common.features = BIT(PT_FEAT_SIGN_EXTEND),
271 		.common.hw_max_vasz_lg2 = 57, .top_level = 4 },
272 	/* AMD IOMMU PASID 0 formats with no SIGN_EXTEND */
273 	[2] = { .common.hw_max_vasz_lg2 = 47, .top_level = 3 },
274 	[3] = { .common.hw_max_vasz_lg2 = 56, .top_level = 4},
275 };
276 #define kunit_fmt_cfgs x86_64_kunit_fmt_cfgs
277 enum { KUNIT_FMT_FEATURES =  BIT(PT_FEAT_SIGN_EXTEND)};
278 #endif
279 #endif
280