1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES 4 */ 5 #ifndef __GENERIC_PT_COMMON_H 6 #define __GENERIC_PT_COMMON_H 7 8 #include <linux/types.h> 9 #include <linux/build_bug.h> 10 #include <linux/bits.h> 11 12 /** 13 * DOC: Generic Radix Page Table 14 * 15 * Generic Radix Page Table is a set of functions and helpers to efficiently 16 * parse radix style page tables typically seen in HW implementations. The 17 * interface is built to deliver similar code generation as the mm's pte/pmd/etc 18 * system by fully inlining the exact code required to handle each table level. 19 * 20 * Like the mm subsystem each format contributes its parsing implementation 21 * under common names and the common code implements the required algorithms. 22 * 23 * The system is divided into three logical levels: 24 * 25 * - The page table format and its manipulation functions 26 * - Generic helpers to give a consistent API regardless of underlying format 27 * - An algorithm implementation (e.g. IOMMU/DRM/KVM/MM) 28 * 29 * Multiple implementations are supported. The intention is to have the generic 30 * format code be re-usable for whatever specialized implementation is required. 31 * The generic code is solely about the format of the radix tree; it does not 32 * include memory allocation or higher level decisions that are left for the 33 * implementation. 34 * 35 * The generic framework supports a superset of functions across many HW 36 * implementations: 37 * 38 * - Entries comprised of contiguous blocks of IO PTEs for larger page sizes 39 * - Multi-level tables, up to 6 levels. Runtime selected top level 40 * - Runtime variable table level size (ARM's concatenated tables) 41 * - Expandable top level allowing dynamic sizing of table levels 42 * - Optional leaf entries at any level 43 * - 32-bit/64-bit virtual and output addresses, using every address bit 44 * - Dirty tracking 45 * - Sign extended addressing 46 */ 47 48 /** 49 * struct pt_common - struct for all page table implementations 50 */ 51 struct pt_common { 52 /** 53 * @top_of_table: Encodes the table top pointer and the top level in a 54 * single value. Must use READ_ONCE/WRITE_ONCE to access it. The lower 55 * bits of the aligned table pointer are used for the level. 56 */ 57 uintptr_t top_of_table; 58 /** 59 * @max_oasz_lg2: Maximum number of bits the OA can contain. Upper bits 60 * must be zero. This may be less than what the page table format 61 * supports, but must not be more. 62 */ 63 u8 max_oasz_lg2; 64 /** 65 * @max_vasz_lg2: Maximum number of bits the VA can contain. Upper bits 66 * are 0 or 1 depending on pt_full_va_prefix(). This may be less than 67 * what the page table format supports, but must not be more. When 68 * PT_FEAT_DYNAMIC_TOP is set this reflects the maximum VA capability. 69 */ 70 u8 max_vasz_lg2; 71 /** 72 * @features: Bitmap of `enum pt_features` 73 */ 74 unsigned int features; 75 }; 76 77 /* Encoding parameters for top_of_table */ 78 enum { 79 PT_TOP_LEVEL_BITS = 3, 80 PT_TOP_LEVEL_MASK = GENMASK(PT_TOP_LEVEL_BITS - 1, 0), 81 }; 82 83 /** 84 * enum pt_features - Features turned on in the table. Each symbol is a bit 85 * position. 86 */ 87 enum pt_features { 88 /** 89 * @PT_FEAT_DMA_INCOHERENT: Cache flush page table memory before 90 * assuming the HW can read it. Otherwise a SMP release is sufficient 91 * for HW to read it. 92 */ 93 PT_FEAT_DMA_INCOHERENT, 94 /** 95 * @PT_FEAT_FULL_VA: The table can span the full VA range from 0 to 96 * PT_VADDR_MAX. 97 */ 98 PT_FEAT_FULL_VA, 99 /** 100 * @PT_FEAT_DYNAMIC_TOP: The table's top level can be increased 101 * dynamically during map. This requires HW support for atomically 102 * setting both the table top pointer and the starting table level. 103 */ 104 PT_FEAT_DYNAMIC_TOP, 105 /** 106 * @PT_FEAT_SIGN_EXTEND: The top most bit of the valid VA range sign 107 * extends up to the full pt_vaddr_t. This divides the page table into 108 * three VA ranges:: 109 * 110 * 0 -> 2^N - 1 Lower 111 * 2^N -> (MAX - 2^N - 1) Non-Canonical 112 * MAX - 2^N -> MAX Upper 113 * 114 * In this mode pt_common::max_vasz_lg2 includes the sign bit and the 115 * upper bits that don't fall within the translation are just validated. 116 * 117 * If not set there is no sign extension and valid VA goes from 0 to 2^N 118 * - 1. 119 */ 120 PT_FEAT_SIGN_EXTEND, 121 /** 122 * @PT_FEAT_FLUSH_RANGE: IOTLB maintenance is done by flushing IOVA 123 * ranges which will clean out any walk cache or any IOPTE fully 124 * contained by the range. The optimization objective is to minimize the 125 * number of flushes even if ranges include IOVA gaps that do not need 126 * to be flushed. 127 */ 128 PT_FEAT_FLUSH_RANGE, 129 /** 130 * @PT_FEAT_FLUSH_RANGE_NO_GAPS: Like PT_FEAT_FLUSH_RANGE except that 131 * the optimization objective is to only flush IOVA that has been 132 * changed. This mode is suitable for cases like hypervisor shadowing 133 * where flushing unchanged ranges may cause the hypervisor to reparse 134 * significant amount of page table. 135 */ 136 PT_FEAT_FLUSH_RANGE_NO_GAPS, 137 /* private: */ 138 PT_FEAT_FMT_START, 139 }; 140 141 struct pt_amdv1 { 142 struct pt_common common; 143 }; 144 145 enum { 146 /* 147 * The memory backing the tables is encrypted. Use __sme_set() to adjust 148 * the page table pointers in the tree. This only works with 149 * CONFIG_AMD_MEM_ENCRYPT. 150 */ 151 PT_FEAT_AMDV1_ENCRYPT_TABLES = PT_FEAT_FMT_START, 152 /* 153 * The PTEs are set to prevent cache incoherent traffic, such as PCI no 154 * snoop. This is set either at creation time or before the first map 155 * operation. 156 */ 157 PT_FEAT_AMDV1_FORCE_COHERENCE, 158 }; 159 160 struct pt_vtdss { 161 struct pt_common common; 162 }; 163 164 enum { 165 /* 166 * The PTEs are set to prevent cache incoherent traffic, such as PCI no 167 * snoop. This is set either at creation time or before the first map 168 * operation. 169 */ 170 PT_FEAT_VTDSS_FORCE_COHERENCE = PT_FEAT_FMT_START, 171 /* 172 * Prevent creating read-only PTEs. Used to work around HW errata 173 * ERRATA_772415_SPR17. 174 */ 175 PT_FEAT_VTDSS_FORCE_WRITEABLE, 176 }; 177 178 struct pt_x86_64 { 179 struct pt_common common; 180 }; 181 182 enum { 183 /* 184 * The memory backing the tables is encrypted. Use __sme_set() to adjust 185 * the page table pointers in the tree. This only works with 186 * CONFIG_AMD_MEM_ENCRYPT. 187 */ 188 PT_FEAT_X86_64_AMD_ENCRYPT_TABLES = PT_FEAT_FMT_START, 189 }; 190 191 #endif 192