xref: /linux/include/linux/generic_pt/common.h (revision ce5cfb0fa20dc6454da039612e34325b7b4a8243)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
4  */
5 #ifndef __GENERIC_PT_COMMON_H
6 #define __GENERIC_PT_COMMON_H
7 
8 #include <linux/types.h>
9 #include <linux/build_bug.h>
10 #include <linux/bits.h>
11 
12 /**
13  * DOC: Generic Radix Page Table
14  *
15  * Generic Radix Page Table is a set of functions and helpers to efficiently
16  * parse radix style page tables typically seen in HW implementations. The
17  * interface is built to deliver similar code generation as the mm's pte/pmd/etc
18  * system by fully inlining the exact code required to handle each table level.
19  *
20  * Like the mm subsystem each format contributes its parsing implementation
21  * under common names and the common code implements the required algorithms.
22  *
23  * The system is divided into three logical levels:
24  *
25  *  - The page table format and its manipulation functions
26  *  - Generic helpers to give a consistent API regardless of underlying format
27  *  - An algorithm implementation (e.g. IOMMU/DRM/KVM/MM)
28  *
29  * Multiple implementations are supported. The intention is to have the generic
30  * format code be re-usable for whatever specialized implementation is required.
31  * The generic code is solely about the format of the radix tree; it does not
32  * include memory allocation or higher level decisions that are left for the
33  * implementation.
34  *
35  * The generic framework supports a superset of functions across many HW
36  * implementations:
37  *
38  *  - Entries comprised of contiguous blocks of IO PTEs for larger page sizes
39  *  - Multi-level tables, up to 6 levels. Runtime selected top level
40  *  - Runtime variable table level size (ARM's concatenated tables)
41  *  - Expandable top level allowing dynamic sizing of table levels
42  *  - Optional leaf entries at any level
43  *  - 32-bit/64-bit virtual and output addresses, using every address bit
44  *  - Dirty tracking
45  *  - Sign extended addressing
46  */
47 
48 /**
49  * struct pt_common - struct for all page table implementations
50  */
51 struct pt_common {
52 	/**
53 	 * @top_of_table: Encodes the table top pointer and the top level in a
54 	 * single value. Must use READ_ONCE/WRITE_ONCE to access it. The lower
55 	 * bits of the aligned table pointer are used for the level.
56 	 */
57 	uintptr_t top_of_table;
58 	/**
59 	 * @max_oasz_lg2: Maximum number of bits the OA can contain. Upper bits
60 	 * must be zero. This may be less than what the page table format
61 	 * supports, but must not be more.
62 	 */
63 	u8 max_oasz_lg2;
64 	/**
65 	 * @max_vasz_lg2: Maximum number of bits the VA can contain. Upper bits
66 	 * are 0 or 1 depending on pt_full_va_prefix(). This may be less than
67 	 * what the page table format supports, but must not be more. When
68 	 * PT_FEAT_DYNAMIC_TOP is set this reflects the maximum VA capability.
69 	 */
70 	u8 max_vasz_lg2;
71 	/**
72 	 * @features: Bitmap of `enum pt_features`
73 	 */
74 	unsigned int features;
75 };
76 
77 /* Encoding parameters for top_of_table */
78 enum {
79 	PT_TOP_LEVEL_BITS = 3,
80 	PT_TOP_LEVEL_MASK = GENMASK(PT_TOP_LEVEL_BITS - 1, 0),
81 };
82 
83 /**
84  * enum pt_features - Features turned on in the table. Each symbol is a bit
85  * position.
86  */
87 enum pt_features {
88 	/**
89 	 * @PT_FEAT_DMA_INCOHERENT: Cache flush page table memory before
90 	 * assuming the HW can read it. Otherwise a SMP release is sufficient
91 	 * for HW to read it.
92 	 */
93 	PT_FEAT_DMA_INCOHERENT,
94 	/**
95 	 * @PT_FEAT_FULL_VA: The table can span the full VA range from 0 to
96 	 * PT_VADDR_MAX.
97 	 */
98 	PT_FEAT_FULL_VA,
99 	/**
100 	 * @PT_FEAT_DYNAMIC_TOP: The table's top level can be increased
101 	 * dynamically during map. This requires HW support for atomically
102 	 * setting both the table top pointer and the starting table level.
103 	 */
104 	PT_FEAT_DYNAMIC_TOP,
105 	/**
106 	 * @PT_FEAT_SIGN_EXTEND: The top most bit of the valid VA range sign
107 	 * extends up to the full pt_vaddr_t. This divides the page table into
108 	 * three VA ranges::
109 	 *
110 	 *   0         -> 2^N - 1             Lower
111 	 *   2^N       -> (MAX - 2^N - 1)     Non-Canonical
112 	 *   MAX - 2^N -> MAX                 Upper
113 	 *
114 	 * In this mode pt_common::max_vasz_lg2 includes the sign bit and the
115 	 * upper bits that don't fall within the translation are just validated.
116 	 *
117 	 * If not set there is no sign extension and valid VA goes from 0 to 2^N
118 	 * - 1.
119 	 */
120 	PT_FEAT_SIGN_EXTEND,
121 	/**
122 	 * @PT_FEAT_FLUSH_RANGE: IOTLB maintenance is done by flushing IOVA
123 	 * ranges which will clean out any walk cache or any IOPTE fully
124 	 * contained by the range. The optimization objective is to minimize the
125 	 * number of flushes even if ranges include IOVA gaps that do not need
126 	 * to be flushed.
127 	 */
128 	PT_FEAT_FLUSH_RANGE,
129 	/**
130 	 * @PT_FEAT_FLUSH_RANGE_NO_GAPS: Like PT_FEAT_FLUSH_RANGE except that
131 	 * the optimization objective is to only flush IOVA that has been
132 	 * changed. This mode is suitable for cases like hypervisor shadowing
133 	 * where flushing unchanged ranges may cause the hypervisor to reparse
134 	 * significant amount of page table.
135 	 */
136 	PT_FEAT_FLUSH_RANGE_NO_GAPS,
137 	/* private: */
138 	PT_FEAT_FMT_START,
139 };
140 
141 struct pt_amdv1 {
142 	struct pt_common common;
143 };
144 
145 enum {
146 	/*
147 	 * The memory backing the tables is encrypted. Use __sme_set() to adjust
148 	 * the page table pointers in the tree. This only works with
149 	 * CONFIG_AMD_MEM_ENCRYPT.
150 	 */
151 	PT_FEAT_AMDV1_ENCRYPT_TABLES = PT_FEAT_FMT_START,
152 	/*
153 	 * The PTEs are set to prevent cache incoherent traffic, such as PCI no
154 	 * snoop. This is set either at creation time or before the first map
155 	 * operation.
156 	 */
157 	PT_FEAT_AMDV1_FORCE_COHERENCE,
158 };
159 
160 struct pt_vtdss {
161 	struct pt_common common;
162 };
163 
164 enum {
165 	/*
166 	 * The PTEs are set to prevent cache incoherent traffic, such as PCI no
167 	 * snoop. This is set either at creation time or before the first map
168 	 * operation.
169 	 */
170 	PT_FEAT_VTDSS_FORCE_COHERENCE = PT_FEAT_FMT_START,
171 	/*
172 	 * Prevent creating read-only PTEs. Used to work around HW errata
173 	 * ERRATA_772415_SPR17.
174 	 */
175 	PT_FEAT_VTDSS_FORCE_WRITEABLE,
176 };
177 
178 struct pt_x86_64 {
179 	struct pt_common common;
180 };
181 
182 enum {
183 	/*
184 	 * The memory backing the tables is encrypted. Use __sme_set() to adjust
185 	 * the page table pointers in the tree. This only works with
186 	 * CONFIG_AMD_MEM_ENCRYPT.
187 	 */
188 	PT_FEAT_X86_64_AMD_ENCRYPT_TABLES = PT_FEAT_FMT_START,
189 };
190 
191 #endif
192