1*7c5b184dSJason Gunthorpe /* SPDX-License-Identifier: GPL-2.0-only */ 2*7c5b184dSJason Gunthorpe /* 3*7c5b184dSJason Gunthorpe * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES 4*7c5b184dSJason Gunthorpe * 5*7c5b184dSJason Gunthorpe * Iterators for Generic Page Table 6*7c5b184dSJason Gunthorpe */ 7*7c5b184dSJason Gunthorpe #ifndef __GENERIC_PT_PT_ITER_H 8*7c5b184dSJason Gunthorpe #define __GENERIC_PT_PT_ITER_H 9*7c5b184dSJason Gunthorpe 10*7c5b184dSJason Gunthorpe #include "pt_common.h" 11*7c5b184dSJason Gunthorpe 12*7c5b184dSJason Gunthorpe #include <linux/errno.h> 13*7c5b184dSJason Gunthorpe 14*7c5b184dSJason Gunthorpe /* 15*7c5b184dSJason Gunthorpe * Use to mangle symbols so that backtraces and the symbol table are 16*7c5b184dSJason Gunthorpe * understandable. Any non-inlined function should get mangled like this. 17*7c5b184dSJason Gunthorpe */ 18*7c5b184dSJason Gunthorpe #define NS(fn) CONCATENATE(PTPFX, fn) 19*7c5b184dSJason Gunthorpe 20*7c5b184dSJason Gunthorpe /** 21*7c5b184dSJason Gunthorpe * pt_check_range() - Validate the range can be iterated 22*7c5b184dSJason Gunthorpe * @range: Range to validate 23*7c5b184dSJason Gunthorpe * 24*7c5b184dSJason Gunthorpe * Check that VA and last_va fall within the permitted range of VAs. If the 25*7c5b184dSJason Gunthorpe * format is using PT_FEAT_SIGN_EXTEND then this also checks the sign extension 26*7c5b184dSJason Gunthorpe * is correct. 27*7c5b184dSJason Gunthorpe */ 28*7c5b184dSJason Gunthorpe static inline int pt_check_range(struct pt_range *range) 29*7c5b184dSJason Gunthorpe { 30*7c5b184dSJason Gunthorpe pt_vaddr_t prefix; 31*7c5b184dSJason Gunthorpe 32*7c5b184dSJason Gunthorpe PT_WARN_ON(!range->max_vasz_lg2); 33*7c5b184dSJason Gunthorpe 34*7c5b184dSJason Gunthorpe if (pt_feature(range->common, PT_FEAT_SIGN_EXTEND)) { 35*7c5b184dSJason Gunthorpe PT_WARN_ON(range->common->max_vasz_lg2 != range->max_vasz_lg2); 36*7c5b184dSJason Gunthorpe prefix = fvalog2_div(range->va, range->max_vasz_lg2 - 1) ? 37*7c5b184dSJason Gunthorpe PT_VADDR_MAX : 38*7c5b184dSJason Gunthorpe 0; 39*7c5b184dSJason Gunthorpe } else { 40*7c5b184dSJason Gunthorpe prefix = pt_full_va_prefix(range->common); 41*7c5b184dSJason Gunthorpe } 42*7c5b184dSJason Gunthorpe 43*7c5b184dSJason Gunthorpe if (!fvalog2_div_eq(range->va, prefix, range->max_vasz_lg2) || 44*7c5b184dSJason Gunthorpe !fvalog2_div_eq(range->last_va, prefix, range->max_vasz_lg2)) 45*7c5b184dSJason Gunthorpe return -ERANGE; 46*7c5b184dSJason Gunthorpe return 0; 47*7c5b184dSJason Gunthorpe } 48*7c5b184dSJason Gunthorpe 49*7c5b184dSJason Gunthorpe /** 50*7c5b184dSJason Gunthorpe * pt_index_to_va() - Update range->va to the current pts->index 51*7c5b184dSJason Gunthorpe * @pts: Iteration State 52*7c5b184dSJason Gunthorpe * 53*7c5b184dSJason Gunthorpe * Adjust range->va to match the current index. This is done in a lazy manner 54*7c5b184dSJason Gunthorpe * since computing the VA takes several instructions and is rarely required. 55*7c5b184dSJason Gunthorpe */ 56*7c5b184dSJason Gunthorpe static inline void pt_index_to_va(struct pt_state *pts) 57*7c5b184dSJason Gunthorpe { 58*7c5b184dSJason Gunthorpe pt_vaddr_t lower_va; 59*7c5b184dSJason Gunthorpe 60*7c5b184dSJason Gunthorpe lower_va = log2_mul(pts->index, pt_table_item_lg2sz(pts)); 61*7c5b184dSJason Gunthorpe pts->range->va = fvalog2_set_mod(pts->range->va, lower_va, 62*7c5b184dSJason Gunthorpe pt_table_oa_lg2sz(pts)); 63*7c5b184dSJason Gunthorpe } 64*7c5b184dSJason Gunthorpe 65*7c5b184dSJason Gunthorpe /* 66*7c5b184dSJason Gunthorpe * Add index_count_lg2 number of entries to pts's VA and index. The VA will be 67*7c5b184dSJason Gunthorpe * adjusted to the end of the contiguous block if it is currently in the middle. 68*7c5b184dSJason Gunthorpe */ 69*7c5b184dSJason Gunthorpe static inline void _pt_advance(struct pt_state *pts, 70*7c5b184dSJason Gunthorpe unsigned int index_count_lg2) 71*7c5b184dSJason Gunthorpe { 72*7c5b184dSJason Gunthorpe pts->index = log2_set_mod(pts->index + log2_to_int(index_count_lg2), 0, 73*7c5b184dSJason Gunthorpe index_count_lg2); 74*7c5b184dSJason Gunthorpe } 75*7c5b184dSJason Gunthorpe 76*7c5b184dSJason Gunthorpe /** 77*7c5b184dSJason Gunthorpe * pt_entry_fully_covered() - Check if the item or entry is entirely contained 78*7c5b184dSJason Gunthorpe * within pts->range 79*7c5b184dSJason Gunthorpe * @pts: Iteration State 80*7c5b184dSJason Gunthorpe * @oasz_lg2: The size of the item to check, pt_table_item_lg2sz() or 81*7c5b184dSJason Gunthorpe * pt_entry_oa_lg2sz() 82*7c5b184dSJason Gunthorpe * 83*7c5b184dSJason Gunthorpe * Returns: true if the item is fully enclosed by the pts->range. 84*7c5b184dSJason Gunthorpe */ 85*7c5b184dSJason Gunthorpe static inline bool pt_entry_fully_covered(const struct pt_state *pts, 86*7c5b184dSJason Gunthorpe unsigned int oasz_lg2) 87*7c5b184dSJason Gunthorpe { 88*7c5b184dSJason Gunthorpe struct pt_range *range = pts->range; 89*7c5b184dSJason Gunthorpe 90*7c5b184dSJason Gunthorpe /* Range begins at the start of the entry */ 91*7c5b184dSJason Gunthorpe if (log2_mod(pts->range->va, oasz_lg2)) 92*7c5b184dSJason Gunthorpe return false; 93*7c5b184dSJason Gunthorpe 94*7c5b184dSJason Gunthorpe /* Range ends past the end of the entry */ 95*7c5b184dSJason Gunthorpe if (!log2_div_eq(range->va, range->last_va, oasz_lg2)) 96*7c5b184dSJason Gunthorpe return true; 97*7c5b184dSJason Gunthorpe 98*7c5b184dSJason Gunthorpe /* Range ends at the end of the entry */ 99*7c5b184dSJason Gunthorpe return log2_mod_eq_max(range->last_va, oasz_lg2); 100*7c5b184dSJason Gunthorpe } 101*7c5b184dSJason Gunthorpe 102*7c5b184dSJason Gunthorpe /** 103*7c5b184dSJason Gunthorpe * pt_range_to_index() - Starting index for an iteration 104*7c5b184dSJason Gunthorpe * @pts: Iteration State 105*7c5b184dSJason Gunthorpe * 106*7c5b184dSJason Gunthorpe * Return: the starting index for the iteration in pts. 107*7c5b184dSJason Gunthorpe */ 108*7c5b184dSJason Gunthorpe static inline unsigned int pt_range_to_index(const struct pt_state *pts) 109*7c5b184dSJason Gunthorpe { 110*7c5b184dSJason Gunthorpe unsigned int isz_lg2 = pt_table_item_lg2sz(pts); 111*7c5b184dSJason Gunthorpe 112*7c5b184dSJason Gunthorpe PT_WARN_ON(pts->level > pts->range->top_level); 113*7c5b184dSJason Gunthorpe if (pts->range->top_level == pts->level) 114*7c5b184dSJason Gunthorpe return log2_div(fvalog2_mod(pts->range->va, 115*7c5b184dSJason Gunthorpe pts->range->max_vasz_lg2), 116*7c5b184dSJason Gunthorpe isz_lg2); 117*7c5b184dSJason Gunthorpe return log2_mod(log2_div(pts->range->va, isz_lg2), 118*7c5b184dSJason Gunthorpe pt_num_items_lg2(pts)); 119*7c5b184dSJason Gunthorpe } 120*7c5b184dSJason Gunthorpe 121*7c5b184dSJason Gunthorpe /** 122*7c5b184dSJason Gunthorpe * pt_range_to_end_index() - Ending index iteration 123*7c5b184dSJason Gunthorpe * @pts: Iteration State 124*7c5b184dSJason Gunthorpe * 125*7c5b184dSJason Gunthorpe * Return: the last index for the iteration in pts. 126*7c5b184dSJason Gunthorpe */ 127*7c5b184dSJason Gunthorpe static inline unsigned int pt_range_to_end_index(const struct pt_state *pts) 128*7c5b184dSJason Gunthorpe { 129*7c5b184dSJason Gunthorpe unsigned int isz_lg2 = pt_table_item_lg2sz(pts); 130*7c5b184dSJason Gunthorpe struct pt_range *range = pts->range; 131*7c5b184dSJason Gunthorpe unsigned int num_entries_lg2; 132*7c5b184dSJason Gunthorpe 133*7c5b184dSJason Gunthorpe if (range->va == range->last_va) 134*7c5b184dSJason Gunthorpe return pts->index + 1; 135*7c5b184dSJason Gunthorpe 136*7c5b184dSJason Gunthorpe if (pts->range->top_level == pts->level) 137*7c5b184dSJason Gunthorpe return log2_div(fvalog2_mod(pts->range->last_va, 138*7c5b184dSJason Gunthorpe pts->range->max_vasz_lg2), 139*7c5b184dSJason Gunthorpe isz_lg2) + 140*7c5b184dSJason Gunthorpe 1; 141*7c5b184dSJason Gunthorpe 142*7c5b184dSJason Gunthorpe num_entries_lg2 = pt_num_items_lg2(pts); 143*7c5b184dSJason Gunthorpe 144*7c5b184dSJason Gunthorpe /* last_va falls within this table */ 145*7c5b184dSJason Gunthorpe if (log2_div_eq(range->va, range->last_va, num_entries_lg2 + isz_lg2)) 146*7c5b184dSJason Gunthorpe return log2_mod(log2_div(pts->range->last_va, isz_lg2), 147*7c5b184dSJason Gunthorpe num_entries_lg2) + 148*7c5b184dSJason Gunthorpe 1; 149*7c5b184dSJason Gunthorpe 150*7c5b184dSJason Gunthorpe return log2_to_int(num_entries_lg2); 151*7c5b184dSJason Gunthorpe } 152*7c5b184dSJason Gunthorpe 153*7c5b184dSJason Gunthorpe static inline void _pt_iter_first(struct pt_state *pts) 154*7c5b184dSJason Gunthorpe { 155*7c5b184dSJason Gunthorpe pts->index = pt_range_to_index(pts); 156*7c5b184dSJason Gunthorpe pts->end_index = pt_range_to_end_index(pts); 157*7c5b184dSJason Gunthorpe PT_WARN_ON(pts->index > pts->end_index); 158*7c5b184dSJason Gunthorpe } 159*7c5b184dSJason Gunthorpe 160*7c5b184dSJason Gunthorpe static inline bool _pt_iter_load(struct pt_state *pts) 161*7c5b184dSJason Gunthorpe { 162*7c5b184dSJason Gunthorpe if (pts->index >= pts->end_index) 163*7c5b184dSJason Gunthorpe return false; 164*7c5b184dSJason Gunthorpe pt_load_entry(pts); 165*7c5b184dSJason Gunthorpe return true; 166*7c5b184dSJason Gunthorpe } 167*7c5b184dSJason Gunthorpe 168*7c5b184dSJason Gunthorpe /** 169*7c5b184dSJason Gunthorpe * pt_next_entry() - Advance pts to the next entry 170*7c5b184dSJason Gunthorpe * @pts: Iteration State 171*7c5b184dSJason Gunthorpe * 172*7c5b184dSJason Gunthorpe * Update pts to go to the next index at this level. If pts is pointing at a 173*7c5b184dSJason Gunthorpe * contiguous entry then the index may advance my more than one. 174*7c5b184dSJason Gunthorpe */ 175*7c5b184dSJason Gunthorpe static inline void pt_next_entry(struct pt_state *pts) 176*7c5b184dSJason Gunthorpe { 177*7c5b184dSJason Gunthorpe if (pts->type == PT_ENTRY_OA && 178*7c5b184dSJason Gunthorpe !__builtin_constant_p(pt_entry_num_contig_lg2(pts) == 0)) 179*7c5b184dSJason Gunthorpe _pt_advance(pts, pt_entry_num_contig_lg2(pts)); 180*7c5b184dSJason Gunthorpe else 181*7c5b184dSJason Gunthorpe pts->index++; 182*7c5b184dSJason Gunthorpe pt_index_to_va(pts); 183*7c5b184dSJason Gunthorpe } 184*7c5b184dSJason Gunthorpe 185*7c5b184dSJason Gunthorpe /** 186*7c5b184dSJason Gunthorpe * for_each_pt_level_entry() - For loop wrapper over entries in the range 187*7c5b184dSJason Gunthorpe * @pts: Iteration State 188*7c5b184dSJason Gunthorpe * 189*7c5b184dSJason Gunthorpe * This is the basic iteration primitive. It iterates over all the entries in 190*7c5b184dSJason Gunthorpe * pts->range that fall within the pts's current table level. Each step does 191*7c5b184dSJason Gunthorpe * pt_load_entry(pts). 192*7c5b184dSJason Gunthorpe */ 193*7c5b184dSJason Gunthorpe #define for_each_pt_level_entry(pts) \ 194*7c5b184dSJason Gunthorpe for (_pt_iter_first(pts); _pt_iter_load(pts); pt_next_entry(pts)) 195*7c5b184dSJason Gunthorpe 196*7c5b184dSJason Gunthorpe /** 197*7c5b184dSJason Gunthorpe * pt_load_single_entry() - Version of pt_load_entry() usable within a walker 198*7c5b184dSJason Gunthorpe * @pts: Iteration State 199*7c5b184dSJason Gunthorpe * 200*7c5b184dSJason Gunthorpe * Alternative to for_each_pt_level_entry() if the walker function uses only a 201*7c5b184dSJason Gunthorpe * single entry. 202*7c5b184dSJason Gunthorpe */ 203*7c5b184dSJason Gunthorpe static inline enum pt_entry_type pt_load_single_entry(struct pt_state *pts) 204*7c5b184dSJason Gunthorpe { 205*7c5b184dSJason Gunthorpe pts->index = pt_range_to_index(pts); 206*7c5b184dSJason Gunthorpe pt_load_entry(pts); 207*7c5b184dSJason Gunthorpe return pts->type; 208*7c5b184dSJason Gunthorpe } 209*7c5b184dSJason Gunthorpe 210*7c5b184dSJason Gunthorpe static __always_inline struct pt_range _pt_top_range(struct pt_common *common, 211*7c5b184dSJason Gunthorpe uintptr_t top_of_table) 212*7c5b184dSJason Gunthorpe { 213*7c5b184dSJason Gunthorpe struct pt_range range = { 214*7c5b184dSJason Gunthorpe .common = common, 215*7c5b184dSJason Gunthorpe .top_table = 216*7c5b184dSJason Gunthorpe (struct pt_table_p *)(top_of_table & 217*7c5b184dSJason Gunthorpe ~(uintptr_t)PT_TOP_LEVEL_MASK), 218*7c5b184dSJason Gunthorpe .top_level = top_of_table % (1 << PT_TOP_LEVEL_BITS), 219*7c5b184dSJason Gunthorpe }; 220*7c5b184dSJason Gunthorpe struct pt_state pts = { .range = &range, .level = range.top_level }; 221*7c5b184dSJason Gunthorpe unsigned int max_vasz_lg2; 222*7c5b184dSJason Gunthorpe 223*7c5b184dSJason Gunthorpe max_vasz_lg2 = common->max_vasz_lg2; 224*7c5b184dSJason Gunthorpe if (pt_feature(common, PT_FEAT_DYNAMIC_TOP) && 225*7c5b184dSJason Gunthorpe pts.level != PT_MAX_TOP_LEVEL) 226*7c5b184dSJason Gunthorpe max_vasz_lg2 = min_t(unsigned int, common->max_vasz_lg2, 227*7c5b184dSJason Gunthorpe pt_num_items_lg2(&pts) + 228*7c5b184dSJason Gunthorpe pt_table_item_lg2sz(&pts)); 229*7c5b184dSJason Gunthorpe 230*7c5b184dSJason Gunthorpe /* 231*7c5b184dSJason Gunthorpe * The top range will default to the lower region only with sign extend. 232*7c5b184dSJason Gunthorpe */ 233*7c5b184dSJason Gunthorpe range.max_vasz_lg2 = max_vasz_lg2; 234*7c5b184dSJason Gunthorpe if (pt_feature(common, PT_FEAT_SIGN_EXTEND)) 235*7c5b184dSJason Gunthorpe max_vasz_lg2--; 236*7c5b184dSJason Gunthorpe 237*7c5b184dSJason Gunthorpe range.va = fvalog2_set_mod(pt_full_va_prefix(common), 0, max_vasz_lg2); 238*7c5b184dSJason Gunthorpe range.last_va = 239*7c5b184dSJason Gunthorpe fvalog2_set_mod_max(pt_full_va_prefix(common), max_vasz_lg2); 240*7c5b184dSJason Gunthorpe return range; 241*7c5b184dSJason Gunthorpe } 242*7c5b184dSJason Gunthorpe 243*7c5b184dSJason Gunthorpe /** 244*7c5b184dSJason Gunthorpe * pt_top_range() - Return a range that spans part of the top level 245*7c5b184dSJason Gunthorpe * @common: Table 246*7c5b184dSJason Gunthorpe * 247*7c5b184dSJason Gunthorpe * For PT_FEAT_SIGN_EXTEND this will return the lower range, and cover half the 248*7c5b184dSJason Gunthorpe * total page table. Otherwise it returns the entire page table. 249*7c5b184dSJason Gunthorpe */ 250*7c5b184dSJason Gunthorpe static __always_inline struct pt_range pt_top_range(struct pt_common *common) 251*7c5b184dSJason Gunthorpe { 252*7c5b184dSJason Gunthorpe /* 253*7c5b184dSJason Gunthorpe * The top pointer can change without locking. We capture the value and 254*7c5b184dSJason Gunthorpe * it's level here and are safe to walk it so long as both values are 255*7c5b184dSJason Gunthorpe * captured without tearing. 256*7c5b184dSJason Gunthorpe */ 257*7c5b184dSJason Gunthorpe return _pt_top_range(common, READ_ONCE(common->top_of_table)); 258*7c5b184dSJason Gunthorpe } 259*7c5b184dSJason Gunthorpe 260*7c5b184dSJason Gunthorpe /** 261*7c5b184dSJason Gunthorpe * pt_all_range() - Return a range that spans the entire page table 262*7c5b184dSJason Gunthorpe * @common: Table 263*7c5b184dSJason Gunthorpe * 264*7c5b184dSJason Gunthorpe * The returned range spans the whole page table. Due to how PT_FEAT_SIGN_EXTEND 265*7c5b184dSJason Gunthorpe * is supported range->va and range->last_va will be incorrect during the 266*7c5b184dSJason Gunthorpe * iteration and must not be accessed. 267*7c5b184dSJason Gunthorpe */ 268*7c5b184dSJason Gunthorpe static inline struct pt_range pt_all_range(struct pt_common *common) 269*7c5b184dSJason Gunthorpe { 270*7c5b184dSJason Gunthorpe struct pt_range range = pt_top_range(common); 271*7c5b184dSJason Gunthorpe 272*7c5b184dSJason Gunthorpe if (!pt_feature(common, PT_FEAT_SIGN_EXTEND)) 273*7c5b184dSJason Gunthorpe return range; 274*7c5b184dSJason Gunthorpe 275*7c5b184dSJason Gunthorpe /* 276*7c5b184dSJason Gunthorpe * Pretend the table is linear from 0 without a sign extension. This 277*7c5b184dSJason Gunthorpe * generates the correct indexes for iteration. 278*7c5b184dSJason Gunthorpe */ 279*7c5b184dSJason Gunthorpe range.last_va = fvalog2_set_mod_max(0, range.max_vasz_lg2); 280*7c5b184dSJason Gunthorpe return range; 281*7c5b184dSJason Gunthorpe } 282*7c5b184dSJason Gunthorpe 283*7c5b184dSJason Gunthorpe /** 284*7c5b184dSJason Gunthorpe * pt_upper_range() - Return a range that spans part of the top level 285*7c5b184dSJason Gunthorpe * @common: Table 286*7c5b184dSJason Gunthorpe * 287*7c5b184dSJason Gunthorpe * For PT_FEAT_SIGN_EXTEND this will return the upper range, and cover half the 288*7c5b184dSJason Gunthorpe * total page table. Otherwise it returns the entire page table. 289*7c5b184dSJason Gunthorpe */ 290*7c5b184dSJason Gunthorpe static inline struct pt_range pt_upper_range(struct pt_common *common) 291*7c5b184dSJason Gunthorpe { 292*7c5b184dSJason Gunthorpe struct pt_range range = pt_top_range(common); 293*7c5b184dSJason Gunthorpe 294*7c5b184dSJason Gunthorpe if (!pt_feature(common, PT_FEAT_SIGN_EXTEND)) 295*7c5b184dSJason Gunthorpe return range; 296*7c5b184dSJason Gunthorpe 297*7c5b184dSJason Gunthorpe range.va = fvalog2_set_mod(PT_VADDR_MAX, 0, range.max_vasz_lg2 - 1); 298*7c5b184dSJason Gunthorpe range.last_va = PT_VADDR_MAX; 299*7c5b184dSJason Gunthorpe return range; 300*7c5b184dSJason Gunthorpe } 301*7c5b184dSJason Gunthorpe 302*7c5b184dSJason Gunthorpe /** 303*7c5b184dSJason Gunthorpe * pt_make_range() - Return a range that spans part of the table 304*7c5b184dSJason Gunthorpe * @common: Table 305*7c5b184dSJason Gunthorpe * @va: Start address 306*7c5b184dSJason Gunthorpe * @last_va: Last address 307*7c5b184dSJason Gunthorpe * 308*7c5b184dSJason Gunthorpe * The caller must validate the range with pt_check_range() before using it. 309*7c5b184dSJason Gunthorpe */ 310*7c5b184dSJason Gunthorpe static __always_inline struct pt_range 311*7c5b184dSJason Gunthorpe pt_make_range(struct pt_common *common, pt_vaddr_t va, pt_vaddr_t last_va) 312*7c5b184dSJason Gunthorpe { 313*7c5b184dSJason Gunthorpe struct pt_range range = 314*7c5b184dSJason Gunthorpe _pt_top_range(common, READ_ONCE(common->top_of_table)); 315*7c5b184dSJason Gunthorpe 316*7c5b184dSJason Gunthorpe range.va = va; 317*7c5b184dSJason Gunthorpe range.last_va = last_va; 318*7c5b184dSJason Gunthorpe 319*7c5b184dSJason Gunthorpe return range; 320*7c5b184dSJason Gunthorpe } 321*7c5b184dSJason Gunthorpe 322*7c5b184dSJason Gunthorpe /* 323*7c5b184dSJason Gunthorpe * Span a slice of the table starting at a lower table level from an active 324*7c5b184dSJason Gunthorpe * walk. 325*7c5b184dSJason Gunthorpe */ 326*7c5b184dSJason Gunthorpe static __always_inline struct pt_range 327*7c5b184dSJason Gunthorpe pt_make_child_range(const struct pt_range *parent, pt_vaddr_t va, 328*7c5b184dSJason Gunthorpe pt_vaddr_t last_va) 329*7c5b184dSJason Gunthorpe { 330*7c5b184dSJason Gunthorpe struct pt_range range = *parent; 331*7c5b184dSJason Gunthorpe 332*7c5b184dSJason Gunthorpe range.va = va; 333*7c5b184dSJason Gunthorpe range.last_va = last_va; 334*7c5b184dSJason Gunthorpe 335*7c5b184dSJason Gunthorpe PT_WARN_ON(last_va < va); 336*7c5b184dSJason Gunthorpe PT_WARN_ON(pt_check_range(&range)); 337*7c5b184dSJason Gunthorpe 338*7c5b184dSJason Gunthorpe return range; 339*7c5b184dSJason Gunthorpe } 340*7c5b184dSJason Gunthorpe 341*7c5b184dSJason Gunthorpe /** 342*7c5b184dSJason Gunthorpe * pt_init() - Initialize a pt_state on the stack 343*7c5b184dSJason Gunthorpe * @range: Range pointer to embed in the state 344*7c5b184dSJason Gunthorpe * @level: Table level for the state 345*7c5b184dSJason Gunthorpe * @table: Pointer to the table memory at level 346*7c5b184dSJason Gunthorpe * 347*7c5b184dSJason Gunthorpe * Helper to initialize the on-stack pt_state from walker arguments. 348*7c5b184dSJason Gunthorpe */ 349*7c5b184dSJason Gunthorpe static __always_inline struct pt_state 350*7c5b184dSJason Gunthorpe pt_init(struct pt_range *range, unsigned int level, struct pt_table_p *table) 351*7c5b184dSJason Gunthorpe { 352*7c5b184dSJason Gunthorpe struct pt_state pts = { 353*7c5b184dSJason Gunthorpe .range = range, 354*7c5b184dSJason Gunthorpe .table = table, 355*7c5b184dSJason Gunthorpe .level = level, 356*7c5b184dSJason Gunthorpe }; 357*7c5b184dSJason Gunthorpe return pts; 358*7c5b184dSJason Gunthorpe } 359*7c5b184dSJason Gunthorpe 360*7c5b184dSJason Gunthorpe /** 361*7c5b184dSJason Gunthorpe * pt_init_top() - Initialize a pt_state on the stack 362*7c5b184dSJason Gunthorpe * @range: Range pointer to embed in the state 363*7c5b184dSJason Gunthorpe * 364*7c5b184dSJason Gunthorpe * The pt_state points to the top most level. 365*7c5b184dSJason Gunthorpe */ 366*7c5b184dSJason Gunthorpe static __always_inline struct pt_state pt_init_top(struct pt_range *range) 367*7c5b184dSJason Gunthorpe { 368*7c5b184dSJason Gunthorpe return pt_init(range, range->top_level, range->top_table); 369*7c5b184dSJason Gunthorpe } 370*7c5b184dSJason Gunthorpe 371*7c5b184dSJason Gunthorpe typedef int (*pt_level_fn_t)(struct pt_range *range, void *arg, 372*7c5b184dSJason Gunthorpe unsigned int level, struct pt_table_p *table); 373*7c5b184dSJason Gunthorpe 374*7c5b184dSJason Gunthorpe /** 375*7c5b184dSJason Gunthorpe * pt_descend() - Recursively invoke the walker for the lower level 376*7c5b184dSJason Gunthorpe * @pts: Iteration State 377*7c5b184dSJason Gunthorpe * @arg: Value to pass to the function 378*7c5b184dSJason Gunthorpe * @fn: Walker function to call 379*7c5b184dSJason Gunthorpe * 380*7c5b184dSJason Gunthorpe * pts must point to a table item. Invoke fn as a walker on the table 381*7c5b184dSJason Gunthorpe * pts points to. 382*7c5b184dSJason Gunthorpe */ 383*7c5b184dSJason Gunthorpe static __always_inline int pt_descend(struct pt_state *pts, void *arg, 384*7c5b184dSJason Gunthorpe pt_level_fn_t fn) 385*7c5b184dSJason Gunthorpe { 386*7c5b184dSJason Gunthorpe int ret; 387*7c5b184dSJason Gunthorpe 388*7c5b184dSJason Gunthorpe if (PT_WARN_ON(!pts->table_lower)) 389*7c5b184dSJason Gunthorpe return -EINVAL; 390*7c5b184dSJason Gunthorpe 391*7c5b184dSJason Gunthorpe ret = (*fn)(pts->range, arg, pts->level - 1, pts->table_lower); 392*7c5b184dSJason Gunthorpe return ret; 393*7c5b184dSJason Gunthorpe } 394*7c5b184dSJason Gunthorpe 395*7c5b184dSJason Gunthorpe /** 396*7c5b184dSJason Gunthorpe * pt_walk_range() - Walk over a VA range 397*7c5b184dSJason Gunthorpe * @range: Range pointer 398*7c5b184dSJason Gunthorpe * @fn: Walker function to call 399*7c5b184dSJason Gunthorpe * @arg: Value to pass to the function 400*7c5b184dSJason Gunthorpe * 401*7c5b184dSJason Gunthorpe * Walk over a VA range. The caller should have done a validity check, at 402*7c5b184dSJason Gunthorpe * least calling pt_check_range(), when building range. The walk will 403*7c5b184dSJason Gunthorpe * start at the top most table. 404*7c5b184dSJason Gunthorpe */ 405*7c5b184dSJason Gunthorpe static __always_inline int pt_walk_range(struct pt_range *range, 406*7c5b184dSJason Gunthorpe pt_level_fn_t fn, void *arg) 407*7c5b184dSJason Gunthorpe { 408*7c5b184dSJason Gunthorpe return fn(range, arg, range->top_level, range->top_table); 409*7c5b184dSJason Gunthorpe } 410*7c5b184dSJason Gunthorpe 411*7c5b184dSJason Gunthorpe /* 412*7c5b184dSJason Gunthorpe * pt_walk_descend() - Recursively invoke the walker for a slice of a lower 413*7c5b184dSJason Gunthorpe * level 414*7c5b184dSJason Gunthorpe * @pts: Iteration State 415*7c5b184dSJason Gunthorpe * @va: Start address 416*7c5b184dSJason Gunthorpe * @last_va: Last address 417*7c5b184dSJason Gunthorpe * @fn: Walker function to call 418*7c5b184dSJason Gunthorpe * @arg: Value to pass to the function 419*7c5b184dSJason Gunthorpe * 420*7c5b184dSJason Gunthorpe * With pts pointing at a table item this will descend and over a slice of the 421*7c5b184dSJason Gunthorpe * lower table. The caller must ensure that va/last_va are within the table 422*7c5b184dSJason Gunthorpe * item. This creates a new walk and does not alter pts or pts->range. 423*7c5b184dSJason Gunthorpe */ 424*7c5b184dSJason Gunthorpe static __always_inline int pt_walk_descend(const struct pt_state *pts, 425*7c5b184dSJason Gunthorpe pt_vaddr_t va, pt_vaddr_t last_va, 426*7c5b184dSJason Gunthorpe pt_level_fn_t fn, void *arg) 427*7c5b184dSJason Gunthorpe { 428*7c5b184dSJason Gunthorpe struct pt_range range = pt_make_child_range(pts->range, va, last_va); 429*7c5b184dSJason Gunthorpe 430*7c5b184dSJason Gunthorpe if (PT_WARN_ON(!pt_can_have_table(pts)) || 431*7c5b184dSJason Gunthorpe PT_WARN_ON(!pts->table_lower)) 432*7c5b184dSJason Gunthorpe return -EINVAL; 433*7c5b184dSJason Gunthorpe 434*7c5b184dSJason Gunthorpe return fn(&range, arg, pts->level - 1, pts->table_lower); 435*7c5b184dSJason Gunthorpe } 436*7c5b184dSJason Gunthorpe 437*7c5b184dSJason Gunthorpe /* 438*7c5b184dSJason Gunthorpe * pt_walk_descend_all() - Recursively invoke the walker for a table item 439*7c5b184dSJason Gunthorpe * @parent_pts: Iteration State 440*7c5b184dSJason Gunthorpe * @fn: Walker function to call 441*7c5b184dSJason Gunthorpe * @arg: Value to pass to the function 442*7c5b184dSJason Gunthorpe * 443*7c5b184dSJason Gunthorpe * With pts pointing at a table item this will descend and over the entire lower 444*7c5b184dSJason Gunthorpe * table. This creates a new walk and does not alter pts or pts->range. 445*7c5b184dSJason Gunthorpe */ 446*7c5b184dSJason Gunthorpe static __always_inline int 447*7c5b184dSJason Gunthorpe pt_walk_descend_all(const struct pt_state *parent_pts, pt_level_fn_t fn, 448*7c5b184dSJason Gunthorpe void *arg) 449*7c5b184dSJason Gunthorpe { 450*7c5b184dSJason Gunthorpe unsigned int isz_lg2 = pt_table_item_lg2sz(parent_pts); 451*7c5b184dSJason Gunthorpe 452*7c5b184dSJason Gunthorpe return pt_walk_descend(parent_pts, 453*7c5b184dSJason Gunthorpe log2_set_mod(parent_pts->range->va, 0, isz_lg2), 454*7c5b184dSJason Gunthorpe log2_set_mod_max(parent_pts->range->va, isz_lg2), 455*7c5b184dSJason Gunthorpe fn, arg); 456*7c5b184dSJason Gunthorpe } 457*7c5b184dSJason Gunthorpe 458*7c5b184dSJason Gunthorpe /** 459*7c5b184dSJason Gunthorpe * pt_range_slice() - Return a range that spans indexes 460*7c5b184dSJason Gunthorpe * @pts: Iteration State 461*7c5b184dSJason Gunthorpe * @start_index: Starting index within pts 462*7c5b184dSJason Gunthorpe * @end_index: Ending index within pts 463*7c5b184dSJason Gunthorpe * 464*7c5b184dSJason Gunthorpe * Create a range than spans an index range of the current table level 465*7c5b184dSJason Gunthorpe * pt_state points at. 466*7c5b184dSJason Gunthorpe */ 467*7c5b184dSJason Gunthorpe static inline struct pt_range pt_range_slice(const struct pt_state *pts, 468*7c5b184dSJason Gunthorpe unsigned int start_index, 469*7c5b184dSJason Gunthorpe unsigned int end_index) 470*7c5b184dSJason Gunthorpe { 471*7c5b184dSJason Gunthorpe unsigned int table_lg2sz = pt_table_oa_lg2sz(pts); 472*7c5b184dSJason Gunthorpe pt_vaddr_t last_va; 473*7c5b184dSJason Gunthorpe pt_vaddr_t va; 474*7c5b184dSJason Gunthorpe 475*7c5b184dSJason Gunthorpe va = fvalog2_set_mod(pts->range->va, 476*7c5b184dSJason Gunthorpe log2_mul(start_index, pt_table_item_lg2sz(pts)), 477*7c5b184dSJason Gunthorpe table_lg2sz); 478*7c5b184dSJason Gunthorpe last_va = fvalog2_set_mod( 479*7c5b184dSJason Gunthorpe pts->range->va, 480*7c5b184dSJason Gunthorpe log2_mul(end_index, pt_table_item_lg2sz(pts)) - 1, table_lg2sz); 481*7c5b184dSJason Gunthorpe return pt_make_child_range(pts->range, va, last_va); 482*7c5b184dSJason Gunthorpe } 483*7c5b184dSJason Gunthorpe 484*7c5b184dSJason Gunthorpe /** 485*7c5b184dSJason Gunthorpe * pt_top_memsize_lg2() 486*7c5b184dSJason Gunthorpe * @common: Table 487*7c5b184dSJason Gunthorpe * @top_of_table: Top of table value from _pt_top_set() 488*7c5b184dSJason Gunthorpe * 489*7c5b184dSJason Gunthorpe * Compute the allocation size of the top table. For PT_FEAT_DYNAMIC_TOP this 490*7c5b184dSJason Gunthorpe * will compute the top size assuming the table will grow. 491*7c5b184dSJason Gunthorpe */ 492*7c5b184dSJason Gunthorpe static inline unsigned int pt_top_memsize_lg2(struct pt_common *common, 493*7c5b184dSJason Gunthorpe uintptr_t top_of_table) 494*7c5b184dSJason Gunthorpe { 495*7c5b184dSJason Gunthorpe struct pt_range range = _pt_top_range(common, top_of_table); 496*7c5b184dSJason Gunthorpe struct pt_state pts = pt_init_top(&range); 497*7c5b184dSJason Gunthorpe unsigned int num_items_lg2; 498*7c5b184dSJason Gunthorpe 499*7c5b184dSJason Gunthorpe num_items_lg2 = common->max_vasz_lg2 - pt_table_item_lg2sz(&pts); 500*7c5b184dSJason Gunthorpe if (range.top_level != PT_MAX_TOP_LEVEL && 501*7c5b184dSJason Gunthorpe pt_feature(common, PT_FEAT_DYNAMIC_TOP)) 502*7c5b184dSJason Gunthorpe num_items_lg2 = min(num_items_lg2, pt_num_items_lg2(&pts)); 503*7c5b184dSJason Gunthorpe 504*7c5b184dSJason Gunthorpe /* Round up the allocation size to the minimum alignment */ 505*7c5b184dSJason Gunthorpe return max(ffs_t(u64, PT_TOP_PHYS_MASK), 506*7c5b184dSJason Gunthorpe num_items_lg2 + ilog2(PT_ITEM_WORD_SIZE)); 507*7c5b184dSJason Gunthorpe } 508*7c5b184dSJason Gunthorpe 509*7c5b184dSJason Gunthorpe /** 510*7c5b184dSJason Gunthorpe * pt_compute_best_pgsize() - Determine the best page size for leaf entries 511*7c5b184dSJason Gunthorpe * @pgsz_bitmap: Permitted page sizes 512*7c5b184dSJason Gunthorpe * @va: Starting virtual address for the leaf entry 513*7c5b184dSJason Gunthorpe * @last_va: Last virtual address for the leaf entry, sets the max page size 514*7c5b184dSJason Gunthorpe * @oa: Starting output address for the leaf entry 515*7c5b184dSJason Gunthorpe * 516*7c5b184dSJason Gunthorpe * Compute the largest page size for va, last_va, and oa together and return it 517*7c5b184dSJason Gunthorpe * in lg2. The largest page size depends on the format's supported page sizes at 518*7c5b184dSJason Gunthorpe * this level, and the relative alignment of the VA and OA addresses. 0 means 519*7c5b184dSJason Gunthorpe * the OA cannot be stored with the provided pgsz_bitmap. 520*7c5b184dSJason Gunthorpe */ 521*7c5b184dSJason Gunthorpe static inline unsigned int pt_compute_best_pgsize(pt_vaddr_t pgsz_bitmap, 522*7c5b184dSJason Gunthorpe pt_vaddr_t va, 523*7c5b184dSJason Gunthorpe pt_vaddr_t last_va, 524*7c5b184dSJason Gunthorpe pt_oaddr_t oa) 525*7c5b184dSJason Gunthorpe { 526*7c5b184dSJason Gunthorpe unsigned int best_pgsz_lg2; 527*7c5b184dSJason Gunthorpe unsigned int pgsz_lg2; 528*7c5b184dSJason Gunthorpe pt_vaddr_t len = last_va - va + 1; 529*7c5b184dSJason Gunthorpe pt_vaddr_t mask; 530*7c5b184dSJason Gunthorpe 531*7c5b184dSJason Gunthorpe if (PT_WARN_ON(va >= last_va)) 532*7c5b184dSJason Gunthorpe return 0; 533*7c5b184dSJason Gunthorpe 534*7c5b184dSJason Gunthorpe /* 535*7c5b184dSJason Gunthorpe * Given a VA/OA pair the best page size is the largest page size 536*7c5b184dSJason Gunthorpe * where: 537*7c5b184dSJason Gunthorpe * 538*7c5b184dSJason Gunthorpe * 1) VA and OA start at the page. Bitwise this is the count of least 539*7c5b184dSJason Gunthorpe * significant 0 bits. 540*7c5b184dSJason Gunthorpe * This also implies that last_va/oa has the same prefix as va/oa. 541*7c5b184dSJason Gunthorpe */ 542*7c5b184dSJason Gunthorpe mask = va | oa; 543*7c5b184dSJason Gunthorpe 544*7c5b184dSJason Gunthorpe /* 545*7c5b184dSJason Gunthorpe * 2) The page size is not larger than the last_va (length). Since page 546*7c5b184dSJason Gunthorpe * sizes are always power of two this can't be larger than the 547*7c5b184dSJason Gunthorpe * largest power of two factor of the length. 548*7c5b184dSJason Gunthorpe */ 549*7c5b184dSJason Gunthorpe mask |= log2_to_int(vafls(len) - 1); 550*7c5b184dSJason Gunthorpe 551*7c5b184dSJason Gunthorpe best_pgsz_lg2 = vaffs(mask); 552*7c5b184dSJason Gunthorpe 553*7c5b184dSJason Gunthorpe /* Choose the highest bit <= best_pgsz_lg2 */ 554*7c5b184dSJason Gunthorpe if (best_pgsz_lg2 < PT_VADDR_MAX_LG2 - 1) 555*7c5b184dSJason Gunthorpe pgsz_bitmap = log2_mod(pgsz_bitmap, best_pgsz_lg2 + 1); 556*7c5b184dSJason Gunthorpe 557*7c5b184dSJason Gunthorpe pgsz_lg2 = vafls(pgsz_bitmap); 558*7c5b184dSJason Gunthorpe if (!pgsz_lg2) 559*7c5b184dSJason Gunthorpe return 0; 560*7c5b184dSJason Gunthorpe 561*7c5b184dSJason Gunthorpe pgsz_lg2--; 562*7c5b184dSJason Gunthorpe 563*7c5b184dSJason Gunthorpe PT_WARN_ON(log2_mod(va, pgsz_lg2) != 0); 564*7c5b184dSJason Gunthorpe PT_WARN_ON(oalog2_mod(oa, pgsz_lg2) != 0); 565*7c5b184dSJason Gunthorpe PT_WARN_ON(va + log2_to_int(pgsz_lg2) - 1 > last_va); 566*7c5b184dSJason Gunthorpe PT_WARN_ON(!log2_div_eq(va, va + log2_to_int(pgsz_lg2) - 1, pgsz_lg2)); 567*7c5b184dSJason Gunthorpe PT_WARN_ON( 568*7c5b184dSJason Gunthorpe !oalog2_div_eq(oa, oa + log2_to_int(pgsz_lg2) - 1, pgsz_lg2)); 569*7c5b184dSJason Gunthorpe return pgsz_lg2; 570*7c5b184dSJason Gunthorpe } 571*7c5b184dSJason Gunthorpe 572*7c5b184dSJason Gunthorpe #define _PT_MAKE_CALL_LEVEL(fn) \ 573*7c5b184dSJason Gunthorpe static __always_inline int fn(struct pt_range *range, void *arg, \ 574*7c5b184dSJason Gunthorpe unsigned int level, \ 575*7c5b184dSJason Gunthorpe struct pt_table_p *table) \ 576*7c5b184dSJason Gunthorpe { \ 577*7c5b184dSJason Gunthorpe static_assert(PT_MAX_TOP_LEVEL <= 5); \ 578*7c5b184dSJason Gunthorpe if (level == 0) \ 579*7c5b184dSJason Gunthorpe return CONCATENATE(fn, 0)(range, arg, 0, table); \ 580*7c5b184dSJason Gunthorpe if (level == 1 || PT_MAX_TOP_LEVEL == 1) \ 581*7c5b184dSJason Gunthorpe return CONCATENATE(fn, 1)(range, arg, 1, table); \ 582*7c5b184dSJason Gunthorpe if (level == 2 || PT_MAX_TOP_LEVEL == 2) \ 583*7c5b184dSJason Gunthorpe return CONCATENATE(fn, 2)(range, arg, 2, table); \ 584*7c5b184dSJason Gunthorpe if (level == 3 || PT_MAX_TOP_LEVEL == 3) \ 585*7c5b184dSJason Gunthorpe return CONCATENATE(fn, 3)(range, arg, 3, table); \ 586*7c5b184dSJason Gunthorpe if (level == 4 || PT_MAX_TOP_LEVEL == 4) \ 587*7c5b184dSJason Gunthorpe return CONCATENATE(fn, 4)(range, arg, 4, table); \ 588*7c5b184dSJason Gunthorpe return CONCATENATE(fn, 5)(range, arg, 5, table); \ 589*7c5b184dSJason Gunthorpe } 590*7c5b184dSJason Gunthorpe 591*7c5b184dSJason Gunthorpe static inline int __pt_make_level_fn_err(struct pt_range *range, void *arg, 592*7c5b184dSJason Gunthorpe unsigned int unused_level, 593*7c5b184dSJason Gunthorpe struct pt_table_p *table) 594*7c5b184dSJason Gunthorpe { 595*7c5b184dSJason Gunthorpe static_assert(PT_MAX_TOP_LEVEL <= 5); 596*7c5b184dSJason Gunthorpe return -EPROTOTYPE; 597*7c5b184dSJason Gunthorpe } 598*7c5b184dSJason Gunthorpe 599*7c5b184dSJason Gunthorpe #define __PT_MAKE_LEVEL_FN(fn, level, descend_fn, do_fn) \ 600*7c5b184dSJason Gunthorpe static inline int fn(struct pt_range *range, void *arg, \ 601*7c5b184dSJason Gunthorpe unsigned int unused_level, \ 602*7c5b184dSJason Gunthorpe struct pt_table_p *table) \ 603*7c5b184dSJason Gunthorpe { \ 604*7c5b184dSJason Gunthorpe return do_fn(range, arg, level, table, descend_fn); \ 605*7c5b184dSJason Gunthorpe } 606*7c5b184dSJason Gunthorpe 607*7c5b184dSJason Gunthorpe /** 608*7c5b184dSJason Gunthorpe * PT_MAKE_LEVELS() - Build an unwound walker 609*7c5b184dSJason Gunthorpe * @fn: Name of the walker function 610*7c5b184dSJason Gunthorpe * @do_fn: Function to call at each level 611*7c5b184dSJason Gunthorpe * 612*7c5b184dSJason Gunthorpe * This builds a function call tree that can be fully inlined. 613*7c5b184dSJason Gunthorpe * The caller must provide a function body in an __always_inline function:: 614*7c5b184dSJason Gunthorpe * 615*7c5b184dSJason Gunthorpe * static __always_inline int do(struct pt_range *range, void *arg, 616*7c5b184dSJason Gunthorpe * unsigned int level, struct pt_table_p *table, 617*7c5b184dSJason Gunthorpe * pt_level_fn_t descend_fn) 618*7c5b184dSJason Gunthorpe * 619*7c5b184dSJason Gunthorpe * An inline function will be created for each table level that calls do_fn with 620*7c5b184dSJason Gunthorpe * a compile time constant for level and a pointer to the next lower function. 621*7c5b184dSJason Gunthorpe * This generates an optimally inlined walk where each of the functions sees a 622*7c5b184dSJason Gunthorpe * constant level and can codegen the exact constants/etc for that level. 623*7c5b184dSJason Gunthorpe * 624*7c5b184dSJason Gunthorpe * Note this can produce a lot of code! 625*7c5b184dSJason Gunthorpe */ 626*7c5b184dSJason Gunthorpe #define PT_MAKE_LEVELS(fn, do_fn) \ 627*7c5b184dSJason Gunthorpe __PT_MAKE_LEVEL_FN(CONCATENATE(fn, 0), 0, __pt_make_level_fn_err, \ 628*7c5b184dSJason Gunthorpe do_fn); \ 629*7c5b184dSJason Gunthorpe __PT_MAKE_LEVEL_FN(CONCATENATE(fn, 1), 1, CONCATENATE(fn, 0), do_fn); \ 630*7c5b184dSJason Gunthorpe __PT_MAKE_LEVEL_FN(CONCATENATE(fn, 2), 2, CONCATENATE(fn, 1), do_fn); \ 631*7c5b184dSJason Gunthorpe __PT_MAKE_LEVEL_FN(CONCATENATE(fn, 3), 3, CONCATENATE(fn, 2), do_fn); \ 632*7c5b184dSJason Gunthorpe __PT_MAKE_LEVEL_FN(CONCATENATE(fn, 4), 4, CONCATENATE(fn, 3), do_fn); \ 633*7c5b184dSJason Gunthorpe __PT_MAKE_LEVEL_FN(CONCATENATE(fn, 5), 5, CONCATENATE(fn, 4), do_fn); \ 634*7c5b184dSJason Gunthorpe _PT_MAKE_CALL_LEVEL(fn) 635*7c5b184dSJason Gunthorpe 636*7c5b184dSJason Gunthorpe #endif 637