1*aef5de75SJason Gunthorpe /* SPDX-License-Identifier: GPL-2.0-only */ 2*aef5de75SJason Gunthorpe /* 3*aef5de75SJason Gunthorpe * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES 4*aef5de75SJason Gunthorpe * 5*aef5de75SJason Gunthorpe * x86 page table. Supports the 4 and 5 level variations. 6*aef5de75SJason Gunthorpe * 7*aef5de75SJason Gunthorpe * The 4 and 5 level version is described in: 8*aef5de75SJason Gunthorpe * Section "4.4 4-Level Paging and 5-Level Paging" of the Intel Software 9*aef5de75SJason Gunthorpe * Developer's Manual Volume 3 10*aef5de75SJason Gunthorpe * 11*aef5de75SJason Gunthorpe * Section "9.7 First-Stage Paging Entries" of the "Intel Virtualization 12*aef5de75SJason Gunthorpe * Technology for Directed I/O Architecture Specification" 13*aef5de75SJason Gunthorpe * 14*aef5de75SJason Gunthorpe * Section "2.2.6 I/O Page Tables for Guest Translations" of the "AMD I/O 15*aef5de75SJason Gunthorpe * Virtualization Technology (IOMMU) Specification" 16*aef5de75SJason Gunthorpe * 17*aef5de75SJason Gunthorpe * It is used by x86 CPUs, AMD and VT-d IOMMU HW. 18*aef5de75SJason Gunthorpe * 19*aef5de75SJason Gunthorpe * Note the 3 level format is very similar and almost implemented here. The 20*aef5de75SJason Gunthorpe * reserved/ignored layout is different and there are functional bit 21*aef5de75SJason Gunthorpe * differences. 22*aef5de75SJason Gunthorpe * 23*aef5de75SJason Gunthorpe * This format uses PT_FEAT_SIGN_EXTEND to have a upper/non-canonical/lower 24*aef5de75SJason Gunthorpe * split. PT_FEAT_SIGN_EXTEND is optional as AMD IOMMU sometimes uses non-sign 25*aef5de75SJason Gunthorpe * extended addressing with this page table format. 26*aef5de75SJason Gunthorpe * 27*aef5de75SJason Gunthorpe * The named levels in the spec map to the pts->level as: 28*aef5de75SJason Gunthorpe * Table/PTE - 0 29*aef5de75SJason Gunthorpe * Directory/PDE - 1 30*aef5de75SJason Gunthorpe * Directory Ptr/PDPTE - 2 31*aef5de75SJason Gunthorpe * PML4/PML4E - 3 32*aef5de75SJason Gunthorpe * PML5/PML5E - 4 33*aef5de75SJason Gunthorpe */ 34*aef5de75SJason Gunthorpe #ifndef __GENERIC_PT_FMT_X86_64_H 35*aef5de75SJason Gunthorpe #define __GENERIC_PT_FMT_X86_64_H 36*aef5de75SJason Gunthorpe 37*aef5de75SJason Gunthorpe #include "defs_x86_64.h" 38*aef5de75SJason Gunthorpe #include "../pt_defs.h" 39*aef5de75SJason Gunthorpe 40*aef5de75SJason Gunthorpe #include <linux/bitfield.h> 41*aef5de75SJason Gunthorpe #include <linux/container_of.h> 42*aef5de75SJason Gunthorpe #include <linux/log2.h> 43*aef5de75SJason Gunthorpe #include <linux/mem_encrypt.h> 44*aef5de75SJason Gunthorpe 45*aef5de75SJason Gunthorpe enum { 46*aef5de75SJason Gunthorpe PT_MAX_OUTPUT_ADDRESS_LG2 = 52, 47*aef5de75SJason Gunthorpe PT_MAX_VA_ADDRESS_LG2 = 57, 48*aef5de75SJason Gunthorpe PT_ITEM_WORD_SIZE = sizeof(u64), 49*aef5de75SJason Gunthorpe PT_MAX_TOP_LEVEL = 4, 50*aef5de75SJason Gunthorpe PT_GRANULE_LG2SZ = 12, 51*aef5de75SJason Gunthorpe PT_TABLEMEM_LG2SZ = 12, 52*aef5de75SJason Gunthorpe 53*aef5de75SJason Gunthorpe /* 54*aef5de75SJason Gunthorpe * For AMD the GCR3 Base only has these bits. For VT-d FSPTPTR is 4k 55*aef5de75SJason Gunthorpe * aligned and is limited by the architected HAW 56*aef5de75SJason Gunthorpe */ 57*aef5de75SJason Gunthorpe PT_TOP_PHYS_MASK = GENMASK_ULL(51, 12), 58*aef5de75SJason Gunthorpe }; 59*aef5de75SJason Gunthorpe 60*aef5de75SJason Gunthorpe /* Shared descriptor bits */ 61*aef5de75SJason Gunthorpe enum { 62*aef5de75SJason Gunthorpe X86_64_FMT_P = BIT(0), 63*aef5de75SJason Gunthorpe X86_64_FMT_RW = BIT(1), 64*aef5de75SJason Gunthorpe X86_64_FMT_U = BIT(2), 65*aef5de75SJason Gunthorpe X86_64_FMT_A = BIT(5), 66*aef5de75SJason Gunthorpe X86_64_FMT_D = BIT(6), 67*aef5de75SJason Gunthorpe X86_64_FMT_OA = GENMASK_ULL(51, 12), 68*aef5de75SJason Gunthorpe X86_64_FMT_XD = BIT_ULL(63), 69*aef5de75SJason Gunthorpe }; 70*aef5de75SJason Gunthorpe 71*aef5de75SJason Gunthorpe /* PDPTE/PDE */ 72*aef5de75SJason Gunthorpe enum { 73*aef5de75SJason Gunthorpe X86_64_FMT_PS = BIT(7), 74*aef5de75SJason Gunthorpe }; 75*aef5de75SJason Gunthorpe 76*aef5de75SJason Gunthorpe static inline pt_oaddr_t x86_64_pt_table_pa(const struct pt_state *pts) 77*aef5de75SJason Gunthorpe { 78*aef5de75SJason Gunthorpe u64 entry = pts->entry; 79*aef5de75SJason Gunthorpe 80*aef5de75SJason Gunthorpe if (pts_feature(pts, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES)) 81*aef5de75SJason Gunthorpe entry = __sme_clr(entry); 82*aef5de75SJason Gunthorpe return oalog2_mul(FIELD_GET(X86_64_FMT_OA, entry), 83*aef5de75SJason Gunthorpe PT_TABLEMEM_LG2SZ); 84*aef5de75SJason Gunthorpe } 85*aef5de75SJason Gunthorpe #define pt_table_pa x86_64_pt_table_pa 86*aef5de75SJason Gunthorpe 87*aef5de75SJason Gunthorpe static inline pt_oaddr_t x86_64_pt_entry_oa(const struct pt_state *pts) 88*aef5de75SJason Gunthorpe { 89*aef5de75SJason Gunthorpe u64 entry = pts->entry; 90*aef5de75SJason Gunthorpe 91*aef5de75SJason Gunthorpe if (pts_feature(pts, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES)) 92*aef5de75SJason Gunthorpe entry = __sme_clr(entry); 93*aef5de75SJason Gunthorpe return oalog2_mul(FIELD_GET(X86_64_FMT_OA, entry), 94*aef5de75SJason Gunthorpe PT_GRANULE_LG2SZ); 95*aef5de75SJason Gunthorpe } 96*aef5de75SJason Gunthorpe #define pt_entry_oa x86_64_pt_entry_oa 97*aef5de75SJason Gunthorpe 98*aef5de75SJason Gunthorpe static inline bool x86_64_pt_can_have_leaf(const struct pt_state *pts) 99*aef5de75SJason Gunthorpe { 100*aef5de75SJason Gunthorpe return pts->level <= 2; 101*aef5de75SJason Gunthorpe } 102*aef5de75SJason Gunthorpe #define pt_can_have_leaf x86_64_pt_can_have_leaf 103*aef5de75SJason Gunthorpe 104*aef5de75SJason Gunthorpe static inline unsigned int x86_64_pt_num_items_lg2(const struct pt_state *pts) 105*aef5de75SJason Gunthorpe { 106*aef5de75SJason Gunthorpe return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64)); 107*aef5de75SJason Gunthorpe } 108*aef5de75SJason Gunthorpe #define pt_num_items_lg2 x86_64_pt_num_items_lg2 109*aef5de75SJason Gunthorpe 110*aef5de75SJason Gunthorpe static inline enum pt_entry_type x86_64_pt_load_entry_raw(struct pt_state *pts) 111*aef5de75SJason Gunthorpe { 112*aef5de75SJason Gunthorpe const u64 *tablep = pt_cur_table(pts, u64); 113*aef5de75SJason Gunthorpe u64 entry; 114*aef5de75SJason Gunthorpe 115*aef5de75SJason Gunthorpe pts->entry = entry = READ_ONCE(tablep[pts->index]); 116*aef5de75SJason Gunthorpe if (!(entry & X86_64_FMT_P)) 117*aef5de75SJason Gunthorpe return PT_ENTRY_EMPTY; 118*aef5de75SJason Gunthorpe if (pts->level == 0 || 119*aef5de75SJason Gunthorpe (x86_64_pt_can_have_leaf(pts) && (entry & X86_64_FMT_PS))) 120*aef5de75SJason Gunthorpe return PT_ENTRY_OA; 121*aef5de75SJason Gunthorpe return PT_ENTRY_TABLE; 122*aef5de75SJason Gunthorpe } 123*aef5de75SJason Gunthorpe #define pt_load_entry_raw x86_64_pt_load_entry_raw 124*aef5de75SJason Gunthorpe 125*aef5de75SJason Gunthorpe static inline void 126*aef5de75SJason Gunthorpe x86_64_pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa, 127*aef5de75SJason Gunthorpe unsigned int oasz_lg2, 128*aef5de75SJason Gunthorpe const struct pt_write_attrs *attrs) 129*aef5de75SJason Gunthorpe { 130*aef5de75SJason Gunthorpe u64 *tablep = pt_cur_table(pts, u64); 131*aef5de75SJason Gunthorpe u64 entry; 132*aef5de75SJason Gunthorpe 133*aef5de75SJason Gunthorpe if (!pt_check_install_leaf_args(pts, oa, oasz_lg2)) 134*aef5de75SJason Gunthorpe return; 135*aef5de75SJason Gunthorpe 136*aef5de75SJason Gunthorpe entry = X86_64_FMT_P | 137*aef5de75SJason Gunthorpe FIELD_PREP(X86_64_FMT_OA, log2_div(oa, PT_GRANULE_LG2SZ)) | 138*aef5de75SJason Gunthorpe attrs->descriptor_bits; 139*aef5de75SJason Gunthorpe if (pts->level != 0) 140*aef5de75SJason Gunthorpe entry |= X86_64_FMT_PS; 141*aef5de75SJason Gunthorpe 142*aef5de75SJason Gunthorpe WRITE_ONCE(tablep[pts->index], entry); 143*aef5de75SJason Gunthorpe pts->entry = entry; 144*aef5de75SJason Gunthorpe } 145*aef5de75SJason Gunthorpe #define pt_install_leaf_entry x86_64_pt_install_leaf_entry 146*aef5de75SJason Gunthorpe 147*aef5de75SJason Gunthorpe static inline bool x86_64_pt_install_table(struct pt_state *pts, 148*aef5de75SJason Gunthorpe pt_oaddr_t table_pa, 149*aef5de75SJason Gunthorpe const struct pt_write_attrs *attrs) 150*aef5de75SJason Gunthorpe { 151*aef5de75SJason Gunthorpe u64 entry; 152*aef5de75SJason Gunthorpe 153*aef5de75SJason Gunthorpe entry = X86_64_FMT_P | X86_64_FMT_RW | X86_64_FMT_U | X86_64_FMT_A | 154*aef5de75SJason Gunthorpe FIELD_PREP(X86_64_FMT_OA, log2_div(table_pa, PT_GRANULE_LG2SZ)); 155*aef5de75SJason Gunthorpe if (pts_feature(pts, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES)) 156*aef5de75SJason Gunthorpe entry = __sme_set(entry); 157*aef5de75SJason Gunthorpe return pt_table_install64(pts, entry); 158*aef5de75SJason Gunthorpe } 159*aef5de75SJason Gunthorpe #define pt_install_table x86_64_pt_install_table 160*aef5de75SJason Gunthorpe 161*aef5de75SJason Gunthorpe static inline void x86_64_pt_attr_from_entry(const struct pt_state *pts, 162*aef5de75SJason Gunthorpe struct pt_write_attrs *attrs) 163*aef5de75SJason Gunthorpe { 164*aef5de75SJason Gunthorpe attrs->descriptor_bits = pts->entry & 165*aef5de75SJason Gunthorpe (X86_64_FMT_RW | X86_64_FMT_U | X86_64_FMT_A | 166*aef5de75SJason Gunthorpe X86_64_FMT_D | X86_64_FMT_XD); 167*aef5de75SJason Gunthorpe } 168*aef5de75SJason Gunthorpe #define pt_attr_from_entry x86_64_pt_attr_from_entry 169*aef5de75SJason Gunthorpe 170*aef5de75SJason Gunthorpe /* --- iommu */ 171*aef5de75SJason Gunthorpe #include <linux/generic_pt/iommu.h> 172*aef5de75SJason Gunthorpe #include <linux/iommu.h> 173*aef5de75SJason Gunthorpe 174*aef5de75SJason Gunthorpe #define pt_iommu_table pt_iommu_x86_64 175*aef5de75SJason Gunthorpe 176*aef5de75SJason Gunthorpe /* The common struct is in the per-format common struct */ 177*aef5de75SJason Gunthorpe static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table) 178*aef5de75SJason Gunthorpe { 179*aef5de75SJason Gunthorpe return &container_of(iommu_table, struct pt_iommu_table, iommu) 180*aef5de75SJason Gunthorpe ->x86_64_pt.common; 181*aef5de75SJason Gunthorpe } 182*aef5de75SJason Gunthorpe 183*aef5de75SJason Gunthorpe static inline struct pt_iommu *iommu_from_common(struct pt_common *common) 184*aef5de75SJason Gunthorpe { 185*aef5de75SJason Gunthorpe return &container_of(common, struct pt_iommu_table, x86_64_pt.common) 186*aef5de75SJason Gunthorpe ->iommu; 187*aef5de75SJason Gunthorpe } 188*aef5de75SJason Gunthorpe 189*aef5de75SJason Gunthorpe static inline int x86_64_pt_iommu_set_prot(struct pt_common *common, 190*aef5de75SJason Gunthorpe struct pt_write_attrs *attrs, 191*aef5de75SJason Gunthorpe unsigned int iommu_prot) 192*aef5de75SJason Gunthorpe { 193*aef5de75SJason Gunthorpe u64 pte; 194*aef5de75SJason Gunthorpe 195*aef5de75SJason Gunthorpe pte = X86_64_FMT_U | X86_64_FMT_A | X86_64_FMT_D; 196*aef5de75SJason Gunthorpe if (iommu_prot & IOMMU_WRITE) 197*aef5de75SJason Gunthorpe pte |= X86_64_FMT_RW; 198*aef5de75SJason Gunthorpe 199*aef5de75SJason Gunthorpe /* 200*aef5de75SJason Gunthorpe * Ideally we'd have an IOMMU_ENCRYPTED flag set by higher levels to 201*aef5de75SJason Gunthorpe * control this. For now if the tables use sme_set then so do the ptes. 202*aef5de75SJason Gunthorpe */ 203*aef5de75SJason Gunthorpe if (pt_feature(common, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES)) 204*aef5de75SJason Gunthorpe pte = __sme_set(pte); 205*aef5de75SJason Gunthorpe 206*aef5de75SJason Gunthorpe attrs->descriptor_bits = pte; 207*aef5de75SJason Gunthorpe return 0; 208*aef5de75SJason Gunthorpe } 209*aef5de75SJason Gunthorpe #define pt_iommu_set_prot x86_64_pt_iommu_set_prot 210*aef5de75SJason Gunthorpe 211*aef5de75SJason Gunthorpe static inline int 212*aef5de75SJason Gunthorpe x86_64_pt_iommu_fmt_init(struct pt_iommu_x86_64 *iommu_table, 213*aef5de75SJason Gunthorpe const struct pt_iommu_x86_64_cfg *cfg) 214*aef5de75SJason Gunthorpe { 215*aef5de75SJason Gunthorpe struct pt_x86_64 *table = &iommu_table->x86_64_pt; 216*aef5de75SJason Gunthorpe 217*aef5de75SJason Gunthorpe if (cfg->common.hw_max_vasz_lg2 < 31 || 218*aef5de75SJason Gunthorpe cfg->common.hw_max_vasz_lg2 > 57) 219*aef5de75SJason Gunthorpe return -EINVAL; 220*aef5de75SJason Gunthorpe 221*aef5de75SJason Gunthorpe /* Top of 2, 3, 4 */ 222*aef5de75SJason Gunthorpe pt_top_set_level(&table->common, 223*aef5de75SJason Gunthorpe (cfg->common.hw_max_vasz_lg2 - 31) / 9 + 2); 224*aef5de75SJason Gunthorpe 225*aef5de75SJason Gunthorpe table->common.max_oasz_lg2 = 226*aef5de75SJason Gunthorpe min(PT_MAX_OUTPUT_ADDRESS_LG2, cfg->common.hw_max_oasz_lg2); 227*aef5de75SJason Gunthorpe return 0; 228*aef5de75SJason Gunthorpe } 229*aef5de75SJason Gunthorpe #define pt_iommu_fmt_init x86_64_pt_iommu_fmt_init 230*aef5de75SJason Gunthorpe 231*aef5de75SJason Gunthorpe static inline void 232*aef5de75SJason Gunthorpe x86_64_pt_iommu_fmt_hw_info(struct pt_iommu_x86_64 *table, 233*aef5de75SJason Gunthorpe const struct pt_range *top_range, 234*aef5de75SJason Gunthorpe struct pt_iommu_x86_64_hw_info *info) 235*aef5de75SJason Gunthorpe { 236*aef5de75SJason Gunthorpe info->gcr3_pt = virt_to_phys(top_range->top_table); 237*aef5de75SJason Gunthorpe PT_WARN_ON(info->gcr3_pt & ~PT_TOP_PHYS_MASK); 238*aef5de75SJason Gunthorpe info->levels = top_range->top_level + 1; 239*aef5de75SJason Gunthorpe } 240*aef5de75SJason Gunthorpe #define pt_iommu_fmt_hw_info x86_64_pt_iommu_fmt_hw_info 241*aef5de75SJason Gunthorpe 242*aef5de75SJason Gunthorpe #if defined(GENERIC_PT_KUNIT) 243*aef5de75SJason Gunthorpe static const struct pt_iommu_x86_64_cfg x86_64_kunit_fmt_cfgs[] = { 244*aef5de75SJason Gunthorpe [0] = { .common.features = BIT(PT_FEAT_SIGN_EXTEND), 245*aef5de75SJason Gunthorpe .common.hw_max_vasz_lg2 = 48 }, 246*aef5de75SJason Gunthorpe [1] = { .common.features = BIT(PT_FEAT_SIGN_EXTEND), 247*aef5de75SJason Gunthorpe .common.hw_max_vasz_lg2 = 57 }, 248*aef5de75SJason Gunthorpe /* AMD IOMMU PASID 0 formats with no SIGN_EXTEND */ 249*aef5de75SJason Gunthorpe [2] = { .common.hw_max_vasz_lg2 = 47 }, 250*aef5de75SJason Gunthorpe [3] = { .common.hw_max_vasz_lg2 = 56 }, 251*aef5de75SJason Gunthorpe }; 252*aef5de75SJason Gunthorpe #define kunit_fmt_cfgs x86_64_kunit_fmt_cfgs 253*aef5de75SJason Gunthorpe enum { KUNIT_FMT_FEATURES = BIT(PT_FEAT_SIGN_EXTEND)}; 254*aef5de75SJason Gunthorpe #endif 255*aef5de75SJason Gunthorpe #endif 256