1aef5de75SJason Gunthorpe /* SPDX-License-Identifier: GPL-2.0-only */
2aef5de75SJason Gunthorpe /*
3aef5de75SJason Gunthorpe * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
4aef5de75SJason Gunthorpe *
5aef5de75SJason Gunthorpe * x86 page table. Supports the 4 and 5 level variations.
6aef5de75SJason Gunthorpe *
7aef5de75SJason Gunthorpe * The 4 and 5 level version is described in:
8aef5de75SJason Gunthorpe * Section "4.4 4-Level Paging and 5-Level Paging" of the Intel Software
9aef5de75SJason Gunthorpe * Developer's Manual Volume 3
10aef5de75SJason Gunthorpe *
11aef5de75SJason Gunthorpe * Section "9.7 First-Stage Paging Entries" of the "Intel Virtualization
12aef5de75SJason Gunthorpe * Technology for Directed I/O Architecture Specification"
13aef5de75SJason Gunthorpe *
14aef5de75SJason Gunthorpe * Section "2.2.6 I/O Page Tables for Guest Translations" of the "AMD I/O
15aef5de75SJason Gunthorpe * Virtualization Technology (IOMMU) Specification"
16aef5de75SJason Gunthorpe *
17aef5de75SJason Gunthorpe * It is used by x86 CPUs, AMD and VT-d IOMMU HW.
18aef5de75SJason Gunthorpe *
19aef5de75SJason Gunthorpe * Note the 3 level format is very similar and almost implemented here. The
20aef5de75SJason Gunthorpe * reserved/ignored layout is different and there are functional bit
21aef5de75SJason Gunthorpe * differences.
22aef5de75SJason Gunthorpe *
23aef5de75SJason Gunthorpe * This format uses PT_FEAT_SIGN_EXTEND to have a upper/non-canonical/lower
24aef5de75SJason Gunthorpe * split. PT_FEAT_SIGN_EXTEND is optional as AMD IOMMU sometimes uses non-sign
25aef5de75SJason Gunthorpe * extended addressing with this page table format.
26aef5de75SJason Gunthorpe *
27aef5de75SJason Gunthorpe * The named levels in the spec map to the pts->level as:
28aef5de75SJason Gunthorpe * Table/PTE - 0
29aef5de75SJason Gunthorpe * Directory/PDE - 1
30aef5de75SJason Gunthorpe * Directory Ptr/PDPTE - 2
31aef5de75SJason Gunthorpe * PML4/PML4E - 3
32aef5de75SJason Gunthorpe * PML5/PML5E - 4
33aef5de75SJason Gunthorpe */
34aef5de75SJason Gunthorpe #ifndef __GENERIC_PT_FMT_X86_64_H
35aef5de75SJason Gunthorpe #define __GENERIC_PT_FMT_X86_64_H
36aef5de75SJason Gunthorpe
37aef5de75SJason Gunthorpe #include "defs_x86_64.h"
38aef5de75SJason Gunthorpe #include "../pt_defs.h"
39aef5de75SJason Gunthorpe
40aef5de75SJason Gunthorpe #include <linux/bitfield.h>
41aef5de75SJason Gunthorpe #include <linux/container_of.h>
42aef5de75SJason Gunthorpe #include <linux/log2.h>
43aef5de75SJason Gunthorpe #include <linux/mem_encrypt.h>
44aef5de75SJason Gunthorpe
45aef5de75SJason Gunthorpe enum {
46aef5de75SJason Gunthorpe PT_MAX_OUTPUT_ADDRESS_LG2 = 52,
47aef5de75SJason Gunthorpe PT_MAX_VA_ADDRESS_LG2 = 57,
48aef5de75SJason Gunthorpe PT_ITEM_WORD_SIZE = sizeof(u64),
49aef5de75SJason Gunthorpe PT_MAX_TOP_LEVEL = 4,
50aef5de75SJason Gunthorpe PT_GRANULE_LG2SZ = 12,
51aef5de75SJason Gunthorpe PT_TABLEMEM_LG2SZ = 12,
52aef5de75SJason Gunthorpe
53aef5de75SJason Gunthorpe /*
54aef5de75SJason Gunthorpe * For AMD the GCR3 Base only has these bits. For VT-d FSPTPTR is 4k
55aef5de75SJason Gunthorpe * aligned and is limited by the architected HAW
56aef5de75SJason Gunthorpe */
57aef5de75SJason Gunthorpe PT_TOP_PHYS_MASK = GENMASK_ULL(51, 12),
58aef5de75SJason Gunthorpe };
59aef5de75SJason Gunthorpe
60aef5de75SJason Gunthorpe /* Shared descriptor bits */
61aef5de75SJason Gunthorpe enum {
62aef5de75SJason Gunthorpe X86_64_FMT_P = BIT(0),
63aef5de75SJason Gunthorpe X86_64_FMT_RW = BIT(1),
64aef5de75SJason Gunthorpe X86_64_FMT_U = BIT(2),
65aef5de75SJason Gunthorpe X86_64_FMT_A = BIT(5),
66aef5de75SJason Gunthorpe X86_64_FMT_D = BIT(6),
67aef5de75SJason Gunthorpe X86_64_FMT_OA = GENMASK_ULL(51, 12),
68aef5de75SJason Gunthorpe X86_64_FMT_XD = BIT_ULL(63),
69aef5de75SJason Gunthorpe };
70aef5de75SJason Gunthorpe
71aef5de75SJason Gunthorpe /* PDPTE/PDE */
72aef5de75SJason Gunthorpe enum {
73aef5de75SJason Gunthorpe X86_64_FMT_PS = BIT(7),
74aef5de75SJason Gunthorpe };
75aef5de75SJason Gunthorpe
x86_64_pt_table_pa(const struct pt_state * pts)76aef5de75SJason Gunthorpe static inline pt_oaddr_t x86_64_pt_table_pa(const struct pt_state *pts)
77aef5de75SJason Gunthorpe {
78aef5de75SJason Gunthorpe u64 entry = pts->entry;
79aef5de75SJason Gunthorpe
80aef5de75SJason Gunthorpe if (pts_feature(pts, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES))
81aef5de75SJason Gunthorpe entry = __sme_clr(entry);
82aef5de75SJason Gunthorpe return oalog2_mul(FIELD_GET(X86_64_FMT_OA, entry),
83aef5de75SJason Gunthorpe PT_TABLEMEM_LG2SZ);
84aef5de75SJason Gunthorpe }
85aef5de75SJason Gunthorpe #define pt_table_pa x86_64_pt_table_pa
86aef5de75SJason Gunthorpe
x86_64_pt_entry_oa(const struct pt_state * pts)87aef5de75SJason Gunthorpe static inline pt_oaddr_t x86_64_pt_entry_oa(const struct pt_state *pts)
88aef5de75SJason Gunthorpe {
89aef5de75SJason Gunthorpe u64 entry = pts->entry;
90aef5de75SJason Gunthorpe
91aef5de75SJason Gunthorpe if (pts_feature(pts, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES))
92aef5de75SJason Gunthorpe entry = __sme_clr(entry);
93aef5de75SJason Gunthorpe return oalog2_mul(FIELD_GET(X86_64_FMT_OA, entry),
94aef5de75SJason Gunthorpe PT_GRANULE_LG2SZ);
95aef5de75SJason Gunthorpe }
96aef5de75SJason Gunthorpe #define pt_entry_oa x86_64_pt_entry_oa
97aef5de75SJason Gunthorpe
x86_64_pt_can_have_leaf(const struct pt_state * pts)98aef5de75SJason Gunthorpe static inline bool x86_64_pt_can_have_leaf(const struct pt_state *pts)
99aef5de75SJason Gunthorpe {
100aef5de75SJason Gunthorpe return pts->level <= 2;
101aef5de75SJason Gunthorpe }
102aef5de75SJason Gunthorpe #define pt_can_have_leaf x86_64_pt_can_have_leaf
103aef5de75SJason Gunthorpe
x86_64_pt_num_items_lg2(const struct pt_state * pts)104aef5de75SJason Gunthorpe static inline unsigned int x86_64_pt_num_items_lg2(const struct pt_state *pts)
105aef5de75SJason Gunthorpe {
106aef5de75SJason Gunthorpe return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64));
107aef5de75SJason Gunthorpe }
108aef5de75SJason Gunthorpe #define pt_num_items_lg2 x86_64_pt_num_items_lg2
109aef5de75SJason Gunthorpe
x86_64_pt_load_entry_raw(struct pt_state * pts)110aef5de75SJason Gunthorpe static inline enum pt_entry_type x86_64_pt_load_entry_raw(struct pt_state *pts)
111aef5de75SJason Gunthorpe {
112aef5de75SJason Gunthorpe const u64 *tablep = pt_cur_table(pts, u64);
113aef5de75SJason Gunthorpe u64 entry;
114aef5de75SJason Gunthorpe
115aef5de75SJason Gunthorpe pts->entry = entry = READ_ONCE(tablep[pts->index]);
116aef5de75SJason Gunthorpe if (!(entry & X86_64_FMT_P))
117aef5de75SJason Gunthorpe return PT_ENTRY_EMPTY;
118aef5de75SJason Gunthorpe if (pts->level == 0 ||
119aef5de75SJason Gunthorpe (x86_64_pt_can_have_leaf(pts) && (entry & X86_64_FMT_PS)))
120aef5de75SJason Gunthorpe return PT_ENTRY_OA;
121aef5de75SJason Gunthorpe return PT_ENTRY_TABLE;
122aef5de75SJason Gunthorpe }
123aef5de75SJason Gunthorpe #define pt_load_entry_raw x86_64_pt_load_entry_raw
124aef5de75SJason Gunthorpe
125aef5de75SJason Gunthorpe static inline void
x86_64_pt_install_leaf_entry(struct pt_state * pts,pt_oaddr_t oa,unsigned int oasz_lg2,const struct pt_write_attrs * attrs)126aef5de75SJason Gunthorpe x86_64_pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa,
127aef5de75SJason Gunthorpe unsigned int oasz_lg2,
128aef5de75SJason Gunthorpe const struct pt_write_attrs *attrs)
129aef5de75SJason Gunthorpe {
130aef5de75SJason Gunthorpe u64 *tablep = pt_cur_table(pts, u64);
131aef5de75SJason Gunthorpe u64 entry;
132aef5de75SJason Gunthorpe
133aef5de75SJason Gunthorpe if (!pt_check_install_leaf_args(pts, oa, oasz_lg2))
134aef5de75SJason Gunthorpe return;
135aef5de75SJason Gunthorpe
136aef5de75SJason Gunthorpe entry = X86_64_FMT_P |
137aef5de75SJason Gunthorpe FIELD_PREP(X86_64_FMT_OA, log2_div(oa, PT_GRANULE_LG2SZ)) |
138aef5de75SJason Gunthorpe attrs->descriptor_bits;
139aef5de75SJason Gunthorpe if (pts->level != 0)
140aef5de75SJason Gunthorpe entry |= X86_64_FMT_PS;
141aef5de75SJason Gunthorpe
142aef5de75SJason Gunthorpe WRITE_ONCE(tablep[pts->index], entry);
143aef5de75SJason Gunthorpe pts->entry = entry;
144aef5de75SJason Gunthorpe }
145aef5de75SJason Gunthorpe #define pt_install_leaf_entry x86_64_pt_install_leaf_entry
146aef5de75SJason Gunthorpe
x86_64_pt_install_table(struct pt_state * pts,pt_oaddr_t table_pa,const struct pt_write_attrs * attrs)147aef5de75SJason Gunthorpe static inline bool x86_64_pt_install_table(struct pt_state *pts,
148aef5de75SJason Gunthorpe pt_oaddr_t table_pa,
149aef5de75SJason Gunthorpe const struct pt_write_attrs *attrs)
150aef5de75SJason Gunthorpe {
151aef5de75SJason Gunthorpe u64 entry;
152aef5de75SJason Gunthorpe
153aef5de75SJason Gunthorpe entry = X86_64_FMT_P | X86_64_FMT_RW | X86_64_FMT_U | X86_64_FMT_A |
154aef5de75SJason Gunthorpe FIELD_PREP(X86_64_FMT_OA, log2_div(table_pa, PT_GRANULE_LG2SZ));
155aef5de75SJason Gunthorpe if (pts_feature(pts, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES))
156aef5de75SJason Gunthorpe entry = __sme_set(entry);
157aef5de75SJason Gunthorpe return pt_table_install64(pts, entry);
158aef5de75SJason Gunthorpe }
159aef5de75SJason Gunthorpe #define pt_install_table x86_64_pt_install_table
160aef5de75SJason Gunthorpe
x86_64_pt_attr_from_entry(const struct pt_state * pts,struct pt_write_attrs * attrs)161aef5de75SJason Gunthorpe static inline void x86_64_pt_attr_from_entry(const struct pt_state *pts,
162aef5de75SJason Gunthorpe struct pt_write_attrs *attrs)
163aef5de75SJason Gunthorpe {
164aef5de75SJason Gunthorpe attrs->descriptor_bits = pts->entry &
165aef5de75SJason Gunthorpe (X86_64_FMT_RW | X86_64_FMT_U | X86_64_FMT_A |
166aef5de75SJason Gunthorpe X86_64_FMT_D | X86_64_FMT_XD);
167aef5de75SJason Gunthorpe }
168aef5de75SJason Gunthorpe #define pt_attr_from_entry x86_64_pt_attr_from_entry
169aef5de75SJason Gunthorpe
x86_64_pt_max_sw_bit(struct pt_common * common)170ef7bfe5bSJason Gunthorpe static inline unsigned int x86_64_pt_max_sw_bit(struct pt_common *common)
171ef7bfe5bSJason Gunthorpe {
172ef7bfe5bSJason Gunthorpe return 12;
173ef7bfe5bSJason Gunthorpe }
174ef7bfe5bSJason Gunthorpe #define pt_max_sw_bit x86_64_pt_max_sw_bit
175ef7bfe5bSJason Gunthorpe
x86_64_pt_sw_bit(unsigned int bitnr)176ef7bfe5bSJason Gunthorpe static inline u64 x86_64_pt_sw_bit(unsigned int bitnr)
177ef7bfe5bSJason Gunthorpe {
1785de863efSJason Gunthorpe if (__builtin_constant_p(bitnr) && bitnr > 12)
1795de863efSJason Gunthorpe BUILD_BUG();
1805de863efSJason Gunthorpe
181ef7bfe5bSJason Gunthorpe /* Bits marked Ignored/AVL in the specification */
182ef7bfe5bSJason Gunthorpe switch (bitnr) {
183ef7bfe5bSJason Gunthorpe case 0:
184ef7bfe5bSJason Gunthorpe return BIT(9);
185ef7bfe5bSJason Gunthorpe case 1:
186ef7bfe5bSJason Gunthorpe return BIT(11);
187ef7bfe5bSJason Gunthorpe case 2 ... 12:
188ef7bfe5bSJason Gunthorpe return BIT_ULL((bitnr - 2) + 52);
189ef7bfe5bSJason Gunthorpe /* Some bits in 8,6,4,3 are available in some entries */
190ef7bfe5bSJason Gunthorpe default:
191ef7bfe5bSJason Gunthorpe PT_WARN_ON(true);
192ef7bfe5bSJason Gunthorpe return 0;
193ef7bfe5bSJason Gunthorpe }
194ef7bfe5bSJason Gunthorpe }
195ef7bfe5bSJason Gunthorpe #define pt_sw_bit x86_64_pt_sw_bit
196ef7bfe5bSJason Gunthorpe
197aef5de75SJason Gunthorpe /* --- iommu */
198aef5de75SJason Gunthorpe #include <linux/generic_pt/iommu.h>
199aef5de75SJason Gunthorpe #include <linux/iommu.h>
200aef5de75SJason Gunthorpe
201aef5de75SJason Gunthorpe #define pt_iommu_table pt_iommu_x86_64
202aef5de75SJason Gunthorpe
203aef5de75SJason Gunthorpe /* The common struct is in the per-format common struct */
common_from_iommu(struct pt_iommu * iommu_table)204aef5de75SJason Gunthorpe static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table)
205aef5de75SJason Gunthorpe {
206aef5de75SJason Gunthorpe return &container_of(iommu_table, struct pt_iommu_table, iommu)
207aef5de75SJason Gunthorpe ->x86_64_pt.common;
208aef5de75SJason Gunthorpe }
209aef5de75SJason Gunthorpe
iommu_from_common(struct pt_common * common)210aef5de75SJason Gunthorpe static inline struct pt_iommu *iommu_from_common(struct pt_common *common)
211aef5de75SJason Gunthorpe {
212aef5de75SJason Gunthorpe return &container_of(common, struct pt_iommu_table, x86_64_pt.common)
213aef5de75SJason Gunthorpe ->iommu;
214aef5de75SJason Gunthorpe }
215aef5de75SJason Gunthorpe
x86_64_pt_iommu_set_prot(struct pt_common * common,struct pt_write_attrs * attrs,unsigned int iommu_prot)216aef5de75SJason Gunthorpe static inline int x86_64_pt_iommu_set_prot(struct pt_common *common,
217aef5de75SJason Gunthorpe struct pt_write_attrs *attrs,
218aef5de75SJason Gunthorpe unsigned int iommu_prot)
219aef5de75SJason Gunthorpe {
220aef5de75SJason Gunthorpe u64 pte;
221aef5de75SJason Gunthorpe
2221978fac2SJason Gunthorpe pte = X86_64_FMT_U | X86_64_FMT_A;
223aef5de75SJason Gunthorpe if (iommu_prot & IOMMU_WRITE)
2241978fac2SJason Gunthorpe pte |= X86_64_FMT_RW | X86_64_FMT_D;
225aef5de75SJason Gunthorpe
226aef5de75SJason Gunthorpe /*
227aef5de75SJason Gunthorpe * Ideally we'd have an IOMMU_ENCRYPTED flag set by higher levels to
228aef5de75SJason Gunthorpe * control this. For now if the tables use sme_set then so do the ptes.
229aef5de75SJason Gunthorpe */
230aef5de75SJason Gunthorpe if (pt_feature(common, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES))
231aef5de75SJason Gunthorpe pte = __sme_set(pte);
232aef5de75SJason Gunthorpe
233aef5de75SJason Gunthorpe attrs->descriptor_bits = pte;
234aef5de75SJason Gunthorpe return 0;
235aef5de75SJason Gunthorpe }
236aef5de75SJason Gunthorpe #define pt_iommu_set_prot x86_64_pt_iommu_set_prot
237aef5de75SJason Gunthorpe
238aef5de75SJason Gunthorpe static inline int
x86_64_pt_iommu_fmt_init(struct pt_iommu_x86_64 * iommu_table,const struct pt_iommu_x86_64_cfg * cfg)239aef5de75SJason Gunthorpe x86_64_pt_iommu_fmt_init(struct pt_iommu_x86_64 *iommu_table,
240aef5de75SJason Gunthorpe const struct pt_iommu_x86_64_cfg *cfg)
241aef5de75SJason Gunthorpe {
242aef5de75SJason Gunthorpe struct pt_x86_64 *table = &iommu_table->x86_64_pt;
243aef5de75SJason Gunthorpe
244*1eb0ae6fSJason Gunthorpe if (cfg->top_level < 3 || cfg->top_level > 4)
245*1eb0ae6fSJason Gunthorpe return -EOPNOTSUPP;
246aef5de75SJason Gunthorpe
247*1eb0ae6fSJason Gunthorpe pt_top_set_level(&table->common, cfg->top_level);
248aef5de75SJason Gunthorpe
249aef5de75SJason Gunthorpe table->common.max_oasz_lg2 =
250aef5de75SJason Gunthorpe min(PT_MAX_OUTPUT_ADDRESS_LG2, cfg->common.hw_max_oasz_lg2);
251aef5de75SJason Gunthorpe return 0;
252aef5de75SJason Gunthorpe }
253aef5de75SJason Gunthorpe #define pt_iommu_fmt_init x86_64_pt_iommu_fmt_init
254aef5de75SJason Gunthorpe
255aef5de75SJason Gunthorpe static inline void
x86_64_pt_iommu_fmt_hw_info(struct pt_iommu_x86_64 * table,const struct pt_range * top_range,struct pt_iommu_x86_64_hw_info * info)256aef5de75SJason Gunthorpe x86_64_pt_iommu_fmt_hw_info(struct pt_iommu_x86_64 *table,
257aef5de75SJason Gunthorpe const struct pt_range *top_range,
258aef5de75SJason Gunthorpe struct pt_iommu_x86_64_hw_info *info)
259aef5de75SJason Gunthorpe {
260aef5de75SJason Gunthorpe info->gcr3_pt = virt_to_phys(top_range->top_table);
261aef5de75SJason Gunthorpe PT_WARN_ON(info->gcr3_pt & ~PT_TOP_PHYS_MASK);
262aef5de75SJason Gunthorpe info->levels = top_range->top_level + 1;
263aef5de75SJason Gunthorpe }
264aef5de75SJason Gunthorpe #define pt_iommu_fmt_hw_info x86_64_pt_iommu_fmt_hw_info
265aef5de75SJason Gunthorpe
266aef5de75SJason Gunthorpe #if defined(GENERIC_PT_KUNIT)
267aef5de75SJason Gunthorpe static const struct pt_iommu_x86_64_cfg x86_64_kunit_fmt_cfgs[] = {
268aef5de75SJason Gunthorpe [0] = { .common.features = BIT(PT_FEAT_SIGN_EXTEND),
269*1eb0ae6fSJason Gunthorpe .common.hw_max_vasz_lg2 = 48, .top_level = 3 },
270aef5de75SJason Gunthorpe [1] = { .common.features = BIT(PT_FEAT_SIGN_EXTEND),
271*1eb0ae6fSJason Gunthorpe .common.hw_max_vasz_lg2 = 57, .top_level = 4 },
272aef5de75SJason Gunthorpe /* AMD IOMMU PASID 0 formats with no SIGN_EXTEND */
273*1eb0ae6fSJason Gunthorpe [2] = { .common.hw_max_vasz_lg2 = 47, .top_level = 3 },
274*1eb0ae6fSJason Gunthorpe [3] = { .common.hw_max_vasz_lg2 = 56, .top_level = 4},
275aef5de75SJason Gunthorpe };
276aef5de75SJason Gunthorpe #define kunit_fmt_cfgs x86_64_kunit_fmt_cfgs
277aef5de75SJason Gunthorpe enum { KUNIT_FMT_FEATURES = BIT(PT_FEAT_SIGN_EXTEND)};
278aef5de75SJason Gunthorpe #endif
279aef5de75SJason Gunthorpe #endif
280