1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES 4 * 5 * This header is included before the format. It contains definitions 6 * that are required to compile the format. The header order is: 7 * pt_defs.h 8 * fmt_XX.h 9 * pt_common.h 10 */ 11 #ifndef __GENERIC_PT_DEFS_H 12 #define __GENERIC_PT_DEFS_H 13 14 #include <linux/generic_pt/common.h> 15 16 #include <linux/types.h> 17 #include <linux/atomic.h> 18 #include <linux/bits.h> 19 #include <linux/limits.h> 20 #include <linux/bug.h> 21 #include <linux/kconfig.h> 22 #include "pt_log2.h" 23 24 /* Header self-compile default defines */ 25 #ifndef pt_write_attrs 26 typedef u64 pt_vaddr_t; 27 typedef u64 pt_oaddr_t; 28 #endif 29 30 struct pt_table_p; 31 32 enum { 33 PT_VADDR_MAX = sizeof(pt_vaddr_t) == 8 ? U64_MAX : U32_MAX, 34 PT_VADDR_MAX_LG2 = sizeof(pt_vaddr_t) == 8 ? 64 : 32, 35 PT_OADDR_MAX = sizeof(pt_oaddr_t) == 8 ? U64_MAX : U32_MAX, 36 PT_OADDR_MAX_LG2 = sizeof(pt_oaddr_t) == 8 ? 64 : 32, 37 }; 38 39 /* 40 * The format instantiation can have features wired off or on to optimize the 41 * code gen. Supported features are just a reflection of what the current set of 42 * kernel users want to use. 43 */ 44 #ifndef PT_SUPPORTED_FEATURES 45 #define PT_SUPPORTED_FEATURES 0 46 #endif 47 48 /* 49 * When in debug mode we compile all formats with all features. This allows the 50 * kunit to test the full matrix. SIGN_EXTEND can't co-exist with DYNAMIC_TOP or 51 * FULL_VA. DMA_INCOHERENT requires a SW bit that not all formats have 52 */ 53 #if IS_ENABLED(CONFIG_DEBUG_GENERIC_PT) 54 enum { 55 PT_ORIG_SUPPORTED_FEATURES = PT_SUPPORTED_FEATURES, 56 PT_DEBUG_SUPPORTED_FEATURES = 57 UINT_MAX & 58 ~((PT_ORIG_SUPPORTED_FEATURES & BIT(PT_FEAT_DMA_INCOHERENT) ? 59 0 : 60 BIT(PT_FEAT_DMA_INCOHERENT))) & 61 ~((PT_ORIG_SUPPORTED_FEATURES & BIT(PT_FEAT_SIGN_EXTEND)) ? 62 BIT(PT_FEAT_DYNAMIC_TOP) | BIT(PT_FEAT_FULL_VA) : 63 BIT(PT_FEAT_SIGN_EXTEND)), 64 }; 65 #undef PT_SUPPORTED_FEATURES 66 #define PT_SUPPORTED_FEATURES PT_DEBUG_SUPPORTED_FEATURES 67 #endif 68 69 #ifndef PT_FORCE_ENABLED_FEATURES 70 #define PT_FORCE_ENABLED_FEATURES 0 71 #endif 72 73 /** 74 * DOC: Generic Page Table Language 75 * 76 * Language used in Generic Page Table 77 * VA 78 * The input address to the page table, often the virtual address. 79 * OA 80 * The output address from the page table, often the physical address. 81 * leaf 82 * An entry that results in an output address. 83 * start/end 84 * An half-open range, e.g. [0,0) refers to no VA. 85 * start/last 86 * An inclusive closed range, e.g. [0,0] refers to the VA 0 87 * common 88 * The generic page table container struct pt_common 89 * level 90 * Level 0 is always a table of only leaves with no futher table pointers. 91 * Increasing levels increase the size of the table items. The least 92 * significant VA bits used to index page tables are used to index the Level 93 * 0 table. The various labels for table levels used by HW descriptions are 94 * not used. 95 * top_level 96 * The inclusive highest level of the table. A two-level table 97 * has a top level of 1. 98 * table 99 * A linear array of translation items for that level. 100 * index 101 * The position in a table of an element: item = table[index] 102 * item 103 * A single index in a table 104 * entry 105 * A single logical element in a table. If contiguous pages are not 106 * supported then item and entry are the same thing, otherwise entry refers 107 * to all the items that comprise a single contiguous translation. 108 * item/entry_size 109 * The number of bytes of VA the table index translates for. 110 * If the item is a table entry then the next table covers 111 * this size. If the entry translates to an output address then the 112 * full OA is: OA | (VA % entry_size) 113 * contig_count 114 * The number of consecutive items fused into a single entry. 115 * item_size * contig_count is the size of that entry's translation. 116 * lg2 117 * Indicates the value is encoded as log2, i.e. 1<<x is the actual value. 118 * Normally the compiler is fine to optimize divide and mod with log2 values 119 * automatically when inlining, however if the values are not constant 120 * expressions it can't. So we do it by hand; we want to avoid 64-bit 121 * divmod. 122 */ 123 124 /* Returned by pt_load_entry() and for_each_pt_level_entry() */ 125 enum pt_entry_type { 126 PT_ENTRY_EMPTY, 127 /* Entry is valid and points to a lower table level */ 128 PT_ENTRY_TABLE, 129 /* Entry is valid and returns an output address */ 130 PT_ENTRY_OA, 131 }; 132 133 struct pt_range { 134 struct pt_common *common; 135 struct pt_table_p *top_table; 136 pt_vaddr_t va; 137 pt_vaddr_t last_va; 138 u8 top_level; 139 u8 max_vasz_lg2; 140 }; 141 142 /* 143 * Similar to xa_state, this records information about an in-progress parse at a 144 * single level. 145 */ 146 struct pt_state { 147 struct pt_range *range; 148 struct pt_table_p *table; 149 struct pt_table_p *table_lower; 150 u64 entry; 151 enum pt_entry_type type; 152 unsigned short index; 153 unsigned short end_index; 154 u8 level; 155 }; 156 157 #define pt_cur_table(pts, type) ((type *)((pts)->table)) 158 159 /* 160 * Try to install a new table pointer. The locking methodology requires this to 161 * be atomic (multiple threads can race to install a pointer). The losing 162 * threads will fail the atomic and return false. They should free any memory 163 * and reparse the table level again. 164 */ 165 #if !IS_ENABLED(CONFIG_GENERIC_ATOMIC64) 166 static inline bool pt_table_install64(struct pt_state *pts, u64 table_entry) 167 { 168 u64 *entryp = pt_cur_table(pts, u64) + pts->index; 169 u64 old_entry = pts->entry; 170 bool ret; 171 172 /* 173 * Ensure the zero'd table content itself is visible before its PTE can 174 * be. release is a NOP on !SMP, but the HW is still doing an acquire. 175 */ 176 if (!IS_ENABLED(CONFIG_SMP)) 177 dma_wmb(); 178 ret = try_cmpxchg64_release(entryp, &old_entry, table_entry); 179 if (ret) 180 pts->entry = table_entry; 181 return ret; 182 } 183 #endif 184 185 static inline bool pt_table_install32(struct pt_state *pts, u32 table_entry) 186 { 187 u32 *entryp = pt_cur_table(pts, u32) + pts->index; 188 u32 old_entry = pts->entry; 189 bool ret; 190 191 /* 192 * Ensure the zero'd table content itself is visible before its PTE can 193 * be. release is a NOP on !SMP, but the HW is still doing an acquire. 194 */ 195 if (!IS_ENABLED(CONFIG_SMP)) 196 dma_wmb(); 197 ret = try_cmpxchg_release(entryp, &old_entry, table_entry); 198 if (ret) 199 pts->entry = table_entry; 200 return ret; 201 } 202 203 #define PT_SUPPORTED_FEATURE(feature_nr) (PT_SUPPORTED_FEATURES & BIT(feature_nr)) 204 205 static __always_inline bool pt_feature(const struct pt_common *common, 206 unsigned int feature_nr) 207 { 208 if (PT_FORCE_ENABLED_FEATURES & BIT(feature_nr)) 209 return true; 210 if (!PT_SUPPORTED_FEATURE(feature_nr)) 211 return false; 212 return common->features & BIT(feature_nr); 213 } 214 215 static __always_inline bool pts_feature(const struct pt_state *pts, 216 unsigned int feature_nr) 217 { 218 return pt_feature(pts->range->common, feature_nr); 219 } 220 221 /* 222 * PT_WARN_ON is used for invariants that the kunit should be checking can't 223 * happen. 224 */ 225 #if IS_ENABLED(CONFIG_DEBUG_GENERIC_PT) 226 #define PT_WARN_ON WARN_ON 227 #else 228 static inline bool PT_WARN_ON(bool condition) 229 { 230 return false; 231 } 232 #endif 233 234 /* These all work on the VA type */ 235 #define log2_to_int(a_lg2) log2_to_int_t(pt_vaddr_t, a_lg2) 236 #define log2_to_max_int(a_lg2) log2_to_max_int_t(pt_vaddr_t, a_lg2) 237 #define log2_div(a, b_lg2) log2_div_t(pt_vaddr_t, a, b_lg2) 238 #define log2_div_eq(a, b, c_lg2) log2_div_eq_t(pt_vaddr_t, a, b, c_lg2) 239 #define log2_mod(a, b_lg2) log2_mod_t(pt_vaddr_t, a, b_lg2) 240 #define log2_mod_eq_max(a, b_lg2) log2_mod_eq_max_t(pt_vaddr_t, a, b_lg2) 241 #define log2_set_mod(a, val, b_lg2) log2_set_mod_t(pt_vaddr_t, a, val, b_lg2) 242 #define log2_set_mod_max(a, b_lg2) log2_set_mod_max_t(pt_vaddr_t, a, b_lg2) 243 #define log2_mul(a, b_lg2) log2_mul_t(pt_vaddr_t, a, b_lg2) 244 #define vaffs(a) ffs_t(pt_vaddr_t, a) 245 #define vafls(a) fls_t(pt_vaddr_t, a) 246 #define vaffz(a) ffz_t(pt_vaddr_t, a) 247 248 /* 249 * The full VA (fva) versions permit the lg2 value to be == PT_VADDR_MAX_LG2 and 250 * generate a useful defined result. The non-fva versions will malfunction at 251 * this extreme. 252 */ 253 static inline pt_vaddr_t fvalog2_div(pt_vaddr_t a, unsigned int b_lg2) 254 { 255 if (PT_SUPPORTED_FEATURE(PT_FEAT_FULL_VA) && b_lg2 == PT_VADDR_MAX_LG2) 256 return 0; 257 return log2_div_t(pt_vaddr_t, a, b_lg2); 258 } 259 260 static inline pt_vaddr_t fvalog2_mod(pt_vaddr_t a, unsigned int b_lg2) 261 { 262 if (PT_SUPPORTED_FEATURE(PT_FEAT_FULL_VA) && b_lg2 == PT_VADDR_MAX_LG2) 263 return a; 264 return log2_mod_t(pt_vaddr_t, a, b_lg2); 265 } 266 267 static inline bool fvalog2_div_eq(pt_vaddr_t a, pt_vaddr_t b, 268 unsigned int c_lg2) 269 { 270 if (PT_SUPPORTED_FEATURE(PT_FEAT_FULL_VA) && c_lg2 == PT_VADDR_MAX_LG2) 271 return true; 272 return log2_div_eq_t(pt_vaddr_t, a, b, c_lg2); 273 } 274 275 static inline pt_vaddr_t fvalog2_set_mod(pt_vaddr_t a, pt_vaddr_t val, 276 unsigned int b_lg2) 277 { 278 if (PT_SUPPORTED_FEATURE(PT_FEAT_FULL_VA) && b_lg2 == PT_VADDR_MAX_LG2) 279 return val; 280 return log2_set_mod_t(pt_vaddr_t, a, val, b_lg2); 281 } 282 283 static inline pt_vaddr_t fvalog2_set_mod_max(pt_vaddr_t a, unsigned int b_lg2) 284 { 285 if (PT_SUPPORTED_FEATURE(PT_FEAT_FULL_VA) && b_lg2 == PT_VADDR_MAX_LG2) 286 return PT_VADDR_MAX; 287 return log2_set_mod_max_t(pt_vaddr_t, a, b_lg2); 288 } 289 290 /* These all work on the OA type */ 291 #define oalog2_to_int(a_lg2) log2_to_int_t(pt_oaddr_t, a_lg2) 292 #define oalog2_to_max_int(a_lg2) log2_to_max_int_t(pt_oaddr_t, a_lg2) 293 #define oalog2_div(a, b_lg2) log2_div_t(pt_oaddr_t, a, b_lg2) 294 #define oalog2_div_eq(a, b, c_lg2) log2_div_eq_t(pt_oaddr_t, a, b, c_lg2) 295 #define oalog2_mod(a, b_lg2) log2_mod_t(pt_oaddr_t, a, b_lg2) 296 #define oalog2_mod_eq_max(a, b_lg2) log2_mod_eq_max_t(pt_oaddr_t, a, b_lg2) 297 #define oalog2_set_mod(a, val, b_lg2) log2_set_mod_t(pt_oaddr_t, a, val, b_lg2) 298 #define oalog2_set_mod_max(a, b_lg2) log2_set_mod_max_t(pt_oaddr_t, a, b_lg2) 299 #define oalog2_mul(a, b_lg2) log2_mul_t(pt_oaddr_t, a, b_lg2) 300 #define oaffs(a) ffs_t(pt_oaddr_t, a) 301 #define oafls(a) fls_t(pt_oaddr_t, a) 302 #define oaffz(a) ffz_t(pt_oaddr_t, a) 303 304 static inline uintptr_t _pt_top_set(struct pt_table_p *table_mem, 305 unsigned int top_level) 306 { 307 return top_level | (uintptr_t)table_mem; 308 } 309 310 static inline void pt_top_set(struct pt_common *common, 311 struct pt_table_p *table_mem, 312 unsigned int top_level) 313 { 314 WRITE_ONCE(common->top_of_table, _pt_top_set(table_mem, top_level)); 315 } 316 317 static inline void pt_top_set_level(struct pt_common *common, 318 unsigned int top_level) 319 { 320 pt_top_set(common, NULL, top_level); 321 } 322 323 static inline unsigned int pt_top_get_level(const struct pt_common *common) 324 { 325 return READ_ONCE(common->top_of_table) % (1 << PT_TOP_LEVEL_BITS); 326 } 327 328 static inline bool pt_check_install_leaf_args(struct pt_state *pts, 329 pt_oaddr_t oa, 330 unsigned int oasz_lg2); 331 332 #endif 333