1aef5de75SJason Gunthorpe /* SPDX-License-Identifier: GPL-2.0-only */ 2aef5de75SJason Gunthorpe /* 3aef5de75SJason Gunthorpe * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES 4aef5de75SJason Gunthorpe * 5aef5de75SJason Gunthorpe * x86 page table. Supports the 4 and 5 level variations. 6aef5de75SJason Gunthorpe * 7aef5de75SJason Gunthorpe * The 4 and 5 level version is described in: 8aef5de75SJason Gunthorpe * Section "4.4 4-Level Paging and 5-Level Paging" of the Intel Software 9aef5de75SJason Gunthorpe * Developer's Manual Volume 3 10aef5de75SJason Gunthorpe * 11aef5de75SJason Gunthorpe * Section "9.7 First-Stage Paging Entries" of the "Intel Virtualization 12aef5de75SJason Gunthorpe * Technology for Directed I/O Architecture Specification" 13aef5de75SJason Gunthorpe * 14aef5de75SJason Gunthorpe * Section "2.2.6 I/O Page Tables for Guest Translations" of the "AMD I/O 15aef5de75SJason Gunthorpe * Virtualization Technology (IOMMU) Specification" 16aef5de75SJason Gunthorpe * 17aef5de75SJason Gunthorpe * It is used by x86 CPUs, AMD and VT-d IOMMU HW. 18aef5de75SJason Gunthorpe * 19aef5de75SJason Gunthorpe * Note the 3 level format is very similar and almost implemented here. The 20aef5de75SJason Gunthorpe * reserved/ignored layout is different and there are functional bit 21aef5de75SJason Gunthorpe * differences. 22aef5de75SJason Gunthorpe * 23aef5de75SJason Gunthorpe * This format uses PT_FEAT_SIGN_EXTEND to have a upper/non-canonical/lower 24aef5de75SJason Gunthorpe * split. PT_FEAT_SIGN_EXTEND is optional as AMD IOMMU sometimes uses non-sign 25aef5de75SJason Gunthorpe * extended addressing with this page table format. 26aef5de75SJason Gunthorpe * 27aef5de75SJason Gunthorpe * The named levels in the spec map to the pts->level as: 28aef5de75SJason Gunthorpe * Table/PTE - 0 29aef5de75SJason Gunthorpe * Directory/PDE - 1 30aef5de75SJason Gunthorpe * Directory Ptr/PDPTE - 2 31aef5de75SJason Gunthorpe * PML4/PML4E - 3 32aef5de75SJason Gunthorpe * PML5/PML5E - 4 33aef5de75SJason Gunthorpe */ 34aef5de75SJason Gunthorpe #ifndef __GENERIC_PT_FMT_X86_64_H 35aef5de75SJason Gunthorpe #define __GENERIC_PT_FMT_X86_64_H 36aef5de75SJason Gunthorpe 37aef5de75SJason Gunthorpe #include "defs_x86_64.h" 38aef5de75SJason Gunthorpe #include "../pt_defs.h" 39aef5de75SJason Gunthorpe 40aef5de75SJason Gunthorpe #include <linux/bitfield.h> 41aef5de75SJason Gunthorpe #include <linux/container_of.h> 42aef5de75SJason Gunthorpe #include <linux/log2.h> 43aef5de75SJason Gunthorpe #include <linux/mem_encrypt.h> 44aef5de75SJason Gunthorpe 45aef5de75SJason Gunthorpe enum { 46aef5de75SJason Gunthorpe PT_MAX_OUTPUT_ADDRESS_LG2 = 52, 47aef5de75SJason Gunthorpe PT_MAX_VA_ADDRESS_LG2 = 57, 48aef5de75SJason Gunthorpe PT_ITEM_WORD_SIZE = sizeof(u64), 49aef5de75SJason Gunthorpe PT_MAX_TOP_LEVEL = 4, 50aef5de75SJason Gunthorpe PT_GRANULE_LG2SZ = 12, 51aef5de75SJason Gunthorpe PT_TABLEMEM_LG2SZ = 12, 52aef5de75SJason Gunthorpe 53aef5de75SJason Gunthorpe /* 54aef5de75SJason Gunthorpe * For AMD the GCR3 Base only has these bits. For VT-d FSPTPTR is 4k 55aef5de75SJason Gunthorpe * aligned and is limited by the architected HAW 56aef5de75SJason Gunthorpe */ 57aef5de75SJason Gunthorpe PT_TOP_PHYS_MASK = GENMASK_ULL(51, 12), 58aef5de75SJason Gunthorpe }; 59aef5de75SJason Gunthorpe 60aef5de75SJason Gunthorpe /* Shared descriptor bits */ 61aef5de75SJason Gunthorpe enum { 62aef5de75SJason Gunthorpe X86_64_FMT_P = BIT(0), 63aef5de75SJason Gunthorpe X86_64_FMT_RW = BIT(1), 64aef5de75SJason Gunthorpe X86_64_FMT_U = BIT(2), 65aef5de75SJason Gunthorpe X86_64_FMT_A = BIT(5), 66aef5de75SJason Gunthorpe X86_64_FMT_D = BIT(6), 67aef5de75SJason Gunthorpe X86_64_FMT_OA = GENMASK_ULL(51, 12), 68aef5de75SJason Gunthorpe X86_64_FMT_XD = BIT_ULL(63), 69aef5de75SJason Gunthorpe }; 70aef5de75SJason Gunthorpe 71aef5de75SJason Gunthorpe /* PDPTE/PDE */ 72aef5de75SJason Gunthorpe enum { 73aef5de75SJason Gunthorpe X86_64_FMT_PS = BIT(7), 74aef5de75SJason Gunthorpe }; 75aef5de75SJason Gunthorpe 76aef5de75SJason Gunthorpe static inline pt_oaddr_t x86_64_pt_table_pa(const struct pt_state *pts) 77aef5de75SJason Gunthorpe { 78aef5de75SJason Gunthorpe u64 entry = pts->entry; 79aef5de75SJason Gunthorpe 80aef5de75SJason Gunthorpe if (pts_feature(pts, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES)) 81aef5de75SJason Gunthorpe entry = __sme_clr(entry); 82aef5de75SJason Gunthorpe return oalog2_mul(FIELD_GET(X86_64_FMT_OA, entry), 83aef5de75SJason Gunthorpe PT_TABLEMEM_LG2SZ); 84aef5de75SJason Gunthorpe } 85aef5de75SJason Gunthorpe #define pt_table_pa x86_64_pt_table_pa 86aef5de75SJason Gunthorpe 87aef5de75SJason Gunthorpe static inline pt_oaddr_t x86_64_pt_entry_oa(const struct pt_state *pts) 88aef5de75SJason Gunthorpe { 89aef5de75SJason Gunthorpe u64 entry = pts->entry; 90aef5de75SJason Gunthorpe 91aef5de75SJason Gunthorpe if (pts_feature(pts, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES)) 92aef5de75SJason Gunthorpe entry = __sme_clr(entry); 93aef5de75SJason Gunthorpe return oalog2_mul(FIELD_GET(X86_64_FMT_OA, entry), 94aef5de75SJason Gunthorpe PT_GRANULE_LG2SZ); 95aef5de75SJason Gunthorpe } 96aef5de75SJason Gunthorpe #define pt_entry_oa x86_64_pt_entry_oa 97aef5de75SJason Gunthorpe 98aef5de75SJason Gunthorpe static inline bool x86_64_pt_can_have_leaf(const struct pt_state *pts) 99aef5de75SJason Gunthorpe { 100aef5de75SJason Gunthorpe return pts->level <= 2; 101aef5de75SJason Gunthorpe } 102aef5de75SJason Gunthorpe #define pt_can_have_leaf x86_64_pt_can_have_leaf 103aef5de75SJason Gunthorpe 104aef5de75SJason Gunthorpe static inline unsigned int x86_64_pt_num_items_lg2(const struct pt_state *pts) 105aef5de75SJason Gunthorpe { 106aef5de75SJason Gunthorpe return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64)); 107aef5de75SJason Gunthorpe } 108aef5de75SJason Gunthorpe #define pt_num_items_lg2 x86_64_pt_num_items_lg2 109aef5de75SJason Gunthorpe 110aef5de75SJason Gunthorpe static inline enum pt_entry_type x86_64_pt_load_entry_raw(struct pt_state *pts) 111aef5de75SJason Gunthorpe { 112aef5de75SJason Gunthorpe const u64 *tablep = pt_cur_table(pts, u64); 113aef5de75SJason Gunthorpe u64 entry; 114aef5de75SJason Gunthorpe 115aef5de75SJason Gunthorpe pts->entry = entry = READ_ONCE(tablep[pts->index]); 116aef5de75SJason Gunthorpe if (!(entry & X86_64_FMT_P)) 117aef5de75SJason Gunthorpe return PT_ENTRY_EMPTY; 118aef5de75SJason Gunthorpe if (pts->level == 0 || 119aef5de75SJason Gunthorpe (x86_64_pt_can_have_leaf(pts) && (entry & X86_64_FMT_PS))) 120aef5de75SJason Gunthorpe return PT_ENTRY_OA; 121aef5de75SJason Gunthorpe return PT_ENTRY_TABLE; 122aef5de75SJason Gunthorpe } 123aef5de75SJason Gunthorpe #define pt_load_entry_raw x86_64_pt_load_entry_raw 124aef5de75SJason Gunthorpe 125aef5de75SJason Gunthorpe static inline void 126aef5de75SJason Gunthorpe x86_64_pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa, 127aef5de75SJason Gunthorpe unsigned int oasz_lg2, 128aef5de75SJason Gunthorpe const struct pt_write_attrs *attrs) 129aef5de75SJason Gunthorpe { 130aef5de75SJason Gunthorpe u64 *tablep = pt_cur_table(pts, u64); 131aef5de75SJason Gunthorpe u64 entry; 132aef5de75SJason Gunthorpe 133aef5de75SJason Gunthorpe if (!pt_check_install_leaf_args(pts, oa, oasz_lg2)) 134aef5de75SJason Gunthorpe return; 135aef5de75SJason Gunthorpe 136aef5de75SJason Gunthorpe entry = X86_64_FMT_P | 137aef5de75SJason Gunthorpe FIELD_PREP(X86_64_FMT_OA, log2_div(oa, PT_GRANULE_LG2SZ)) | 138aef5de75SJason Gunthorpe attrs->descriptor_bits; 139aef5de75SJason Gunthorpe if (pts->level != 0) 140aef5de75SJason Gunthorpe entry |= X86_64_FMT_PS; 141aef5de75SJason Gunthorpe 142aef5de75SJason Gunthorpe WRITE_ONCE(tablep[pts->index], entry); 143aef5de75SJason Gunthorpe pts->entry = entry; 144aef5de75SJason Gunthorpe } 145aef5de75SJason Gunthorpe #define pt_install_leaf_entry x86_64_pt_install_leaf_entry 146aef5de75SJason Gunthorpe 147aef5de75SJason Gunthorpe static inline bool x86_64_pt_install_table(struct pt_state *pts, 148aef5de75SJason Gunthorpe pt_oaddr_t table_pa, 149aef5de75SJason Gunthorpe const struct pt_write_attrs *attrs) 150aef5de75SJason Gunthorpe { 151aef5de75SJason Gunthorpe u64 entry; 152aef5de75SJason Gunthorpe 153aef5de75SJason Gunthorpe entry = X86_64_FMT_P | X86_64_FMT_RW | X86_64_FMT_U | X86_64_FMT_A | 154aef5de75SJason Gunthorpe FIELD_PREP(X86_64_FMT_OA, log2_div(table_pa, PT_GRANULE_LG2SZ)); 155aef5de75SJason Gunthorpe if (pts_feature(pts, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES)) 156aef5de75SJason Gunthorpe entry = __sme_set(entry); 157aef5de75SJason Gunthorpe return pt_table_install64(pts, entry); 158aef5de75SJason Gunthorpe } 159aef5de75SJason Gunthorpe #define pt_install_table x86_64_pt_install_table 160aef5de75SJason Gunthorpe 161aef5de75SJason Gunthorpe static inline void x86_64_pt_attr_from_entry(const struct pt_state *pts, 162aef5de75SJason Gunthorpe struct pt_write_attrs *attrs) 163aef5de75SJason Gunthorpe { 164aef5de75SJason Gunthorpe attrs->descriptor_bits = pts->entry & 165aef5de75SJason Gunthorpe (X86_64_FMT_RW | X86_64_FMT_U | X86_64_FMT_A | 166aef5de75SJason Gunthorpe X86_64_FMT_D | X86_64_FMT_XD); 167aef5de75SJason Gunthorpe } 168aef5de75SJason Gunthorpe #define pt_attr_from_entry x86_64_pt_attr_from_entry 169aef5de75SJason Gunthorpe 170aef5de75SJason Gunthorpe /* --- iommu */ 171aef5de75SJason Gunthorpe #include <linux/generic_pt/iommu.h> 172aef5de75SJason Gunthorpe #include <linux/iommu.h> 173aef5de75SJason Gunthorpe 174aef5de75SJason Gunthorpe #define pt_iommu_table pt_iommu_x86_64 175aef5de75SJason Gunthorpe 176aef5de75SJason Gunthorpe /* The common struct is in the per-format common struct */ 177aef5de75SJason Gunthorpe static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table) 178aef5de75SJason Gunthorpe { 179aef5de75SJason Gunthorpe return &container_of(iommu_table, struct pt_iommu_table, iommu) 180aef5de75SJason Gunthorpe ->x86_64_pt.common; 181aef5de75SJason Gunthorpe } 182aef5de75SJason Gunthorpe 183aef5de75SJason Gunthorpe static inline struct pt_iommu *iommu_from_common(struct pt_common *common) 184aef5de75SJason Gunthorpe { 185aef5de75SJason Gunthorpe return &container_of(common, struct pt_iommu_table, x86_64_pt.common) 186aef5de75SJason Gunthorpe ->iommu; 187aef5de75SJason Gunthorpe } 188aef5de75SJason Gunthorpe 189aef5de75SJason Gunthorpe static inline int x86_64_pt_iommu_set_prot(struct pt_common *common, 190aef5de75SJason Gunthorpe struct pt_write_attrs *attrs, 191aef5de75SJason Gunthorpe unsigned int iommu_prot) 192aef5de75SJason Gunthorpe { 193aef5de75SJason Gunthorpe u64 pte; 194aef5de75SJason Gunthorpe 195*1978fac2SJason Gunthorpe pte = X86_64_FMT_U | X86_64_FMT_A; 196aef5de75SJason Gunthorpe if (iommu_prot & IOMMU_WRITE) 197*1978fac2SJason Gunthorpe pte |= X86_64_FMT_RW | X86_64_FMT_D; 198aef5de75SJason Gunthorpe 199aef5de75SJason Gunthorpe /* 200aef5de75SJason Gunthorpe * Ideally we'd have an IOMMU_ENCRYPTED flag set by higher levels to 201aef5de75SJason Gunthorpe * control this. For now if the tables use sme_set then so do the ptes. 202aef5de75SJason Gunthorpe */ 203aef5de75SJason Gunthorpe if (pt_feature(common, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES)) 204aef5de75SJason Gunthorpe pte = __sme_set(pte); 205aef5de75SJason Gunthorpe 206aef5de75SJason Gunthorpe attrs->descriptor_bits = pte; 207aef5de75SJason Gunthorpe return 0; 208aef5de75SJason Gunthorpe } 209aef5de75SJason Gunthorpe #define pt_iommu_set_prot x86_64_pt_iommu_set_prot 210aef5de75SJason Gunthorpe 211aef5de75SJason Gunthorpe static inline int 212aef5de75SJason Gunthorpe x86_64_pt_iommu_fmt_init(struct pt_iommu_x86_64 *iommu_table, 213aef5de75SJason Gunthorpe const struct pt_iommu_x86_64_cfg *cfg) 214aef5de75SJason Gunthorpe { 215aef5de75SJason Gunthorpe struct pt_x86_64 *table = &iommu_table->x86_64_pt; 216aef5de75SJason Gunthorpe 217aef5de75SJason Gunthorpe if (cfg->common.hw_max_vasz_lg2 < 31 || 218aef5de75SJason Gunthorpe cfg->common.hw_max_vasz_lg2 > 57) 219aef5de75SJason Gunthorpe return -EINVAL; 220aef5de75SJason Gunthorpe 221aef5de75SJason Gunthorpe /* Top of 2, 3, 4 */ 222aef5de75SJason Gunthorpe pt_top_set_level(&table->common, 223aef5de75SJason Gunthorpe (cfg->common.hw_max_vasz_lg2 - 31) / 9 + 2); 224aef5de75SJason Gunthorpe 225aef5de75SJason Gunthorpe table->common.max_oasz_lg2 = 226aef5de75SJason Gunthorpe min(PT_MAX_OUTPUT_ADDRESS_LG2, cfg->common.hw_max_oasz_lg2); 227aef5de75SJason Gunthorpe return 0; 228aef5de75SJason Gunthorpe } 229aef5de75SJason Gunthorpe #define pt_iommu_fmt_init x86_64_pt_iommu_fmt_init 230aef5de75SJason Gunthorpe 231aef5de75SJason Gunthorpe static inline void 232aef5de75SJason Gunthorpe x86_64_pt_iommu_fmt_hw_info(struct pt_iommu_x86_64 *table, 233aef5de75SJason Gunthorpe const struct pt_range *top_range, 234aef5de75SJason Gunthorpe struct pt_iommu_x86_64_hw_info *info) 235aef5de75SJason Gunthorpe { 236aef5de75SJason Gunthorpe info->gcr3_pt = virt_to_phys(top_range->top_table); 237aef5de75SJason Gunthorpe PT_WARN_ON(info->gcr3_pt & ~PT_TOP_PHYS_MASK); 238aef5de75SJason Gunthorpe info->levels = top_range->top_level + 1; 239aef5de75SJason Gunthorpe } 240aef5de75SJason Gunthorpe #define pt_iommu_fmt_hw_info x86_64_pt_iommu_fmt_hw_info 241aef5de75SJason Gunthorpe 242aef5de75SJason Gunthorpe #if defined(GENERIC_PT_KUNIT) 243aef5de75SJason Gunthorpe static const struct pt_iommu_x86_64_cfg x86_64_kunit_fmt_cfgs[] = { 244aef5de75SJason Gunthorpe [0] = { .common.features = BIT(PT_FEAT_SIGN_EXTEND), 245aef5de75SJason Gunthorpe .common.hw_max_vasz_lg2 = 48 }, 246aef5de75SJason Gunthorpe [1] = { .common.features = BIT(PT_FEAT_SIGN_EXTEND), 247aef5de75SJason Gunthorpe .common.hw_max_vasz_lg2 = 57 }, 248aef5de75SJason Gunthorpe /* AMD IOMMU PASID 0 formats with no SIGN_EXTEND */ 249aef5de75SJason Gunthorpe [2] = { .common.hw_max_vasz_lg2 = 47 }, 250aef5de75SJason Gunthorpe [3] = { .common.hw_max_vasz_lg2 = 56 }, 251aef5de75SJason Gunthorpe }; 252aef5de75SJason Gunthorpe #define kunit_fmt_cfgs x86_64_kunit_fmt_cfgs 253aef5de75SJason Gunthorpe enum { KUNIT_FMT_FEATURES = BIT(PT_FEAT_SIGN_EXTEND)}; 254aef5de75SJason Gunthorpe #endif 255aef5de75SJason Gunthorpe #endif 256