17c5b184dSJason Gunthorpe /* SPDX-License-Identifier: GPL-2.0-only */ 27c5b184dSJason Gunthorpe /* 37c5b184dSJason Gunthorpe * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES 47c5b184dSJason Gunthorpe * 57c5b184dSJason Gunthorpe * Iterators for Generic Page Table 67c5b184dSJason Gunthorpe */ 77c5b184dSJason Gunthorpe #ifndef __GENERIC_PT_PT_ITER_H 87c5b184dSJason Gunthorpe #define __GENERIC_PT_PT_ITER_H 97c5b184dSJason Gunthorpe 107c5b184dSJason Gunthorpe #include "pt_common.h" 117c5b184dSJason Gunthorpe 127c5b184dSJason Gunthorpe #include <linux/errno.h> 137c5b184dSJason Gunthorpe 147c5b184dSJason Gunthorpe /* 157c5b184dSJason Gunthorpe * Use to mangle symbols so that backtraces and the symbol table are 167c5b184dSJason Gunthorpe * understandable. Any non-inlined function should get mangled like this. 177c5b184dSJason Gunthorpe */ 187c5b184dSJason Gunthorpe #define NS(fn) CONCATENATE(PTPFX, fn) 197c5b184dSJason Gunthorpe 207c5b184dSJason Gunthorpe /** 217c5b184dSJason Gunthorpe * pt_check_range() - Validate the range can be iterated 227c5b184dSJason Gunthorpe * @range: Range to validate 237c5b184dSJason Gunthorpe * 247c5b184dSJason Gunthorpe * Check that VA and last_va fall within the permitted range of VAs. If the 257c5b184dSJason Gunthorpe * format is using PT_FEAT_SIGN_EXTEND then this also checks the sign extension 267c5b184dSJason Gunthorpe * is correct. 277c5b184dSJason Gunthorpe */ 287c5b184dSJason Gunthorpe static inline int pt_check_range(struct pt_range *range) 297c5b184dSJason Gunthorpe { 307c5b184dSJason Gunthorpe pt_vaddr_t prefix; 317c5b184dSJason Gunthorpe 327c5b184dSJason Gunthorpe PT_WARN_ON(!range->max_vasz_lg2); 337c5b184dSJason Gunthorpe 347c5b184dSJason Gunthorpe if (pt_feature(range->common, PT_FEAT_SIGN_EXTEND)) { 357c5b184dSJason Gunthorpe PT_WARN_ON(range->common->max_vasz_lg2 != range->max_vasz_lg2); 367c5b184dSJason Gunthorpe prefix = fvalog2_div(range->va, range->max_vasz_lg2 - 1) ? 377c5b184dSJason Gunthorpe PT_VADDR_MAX : 387c5b184dSJason Gunthorpe 0; 397c5b184dSJason Gunthorpe } else { 407c5b184dSJason Gunthorpe prefix = pt_full_va_prefix(range->common); 417c5b184dSJason Gunthorpe } 427c5b184dSJason Gunthorpe 437c5b184dSJason Gunthorpe if (!fvalog2_div_eq(range->va, prefix, range->max_vasz_lg2) || 447c5b184dSJason Gunthorpe !fvalog2_div_eq(range->last_va, prefix, range->max_vasz_lg2)) 457c5b184dSJason Gunthorpe return -ERANGE; 467c5b184dSJason Gunthorpe return 0; 477c5b184dSJason Gunthorpe } 487c5b184dSJason Gunthorpe 497c5b184dSJason Gunthorpe /** 507c5b184dSJason Gunthorpe * pt_index_to_va() - Update range->va to the current pts->index 517c5b184dSJason Gunthorpe * @pts: Iteration State 527c5b184dSJason Gunthorpe * 537c5b184dSJason Gunthorpe * Adjust range->va to match the current index. This is done in a lazy manner 547c5b184dSJason Gunthorpe * since computing the VA takes several instructions and is rarely required. 557c5b184dSJason Gunthorpe */ 567c5b184dSJason Gunthorpe static inline void pt_index_to_va(struct pt_state *pts) 577c5b184dSJason Gunthorpe { 587c5b184dSJason Gunthorpe pt_vaddr_t lower_va; 597c5b184dSJason Gunthorpe 607c5b184dSJason Gunthorpe lower_va = log2_mul(pts->index, pt_table_item_lg2sz(pts)); 617c5b184dSJason Gunthorpe pts->range->va = fvalog2_set_mod(pts->range->va, lower_va, 627c5b184dSJason Gunthorpe pt_table_oa_lg2sz(pts)); 637c5b184dSJason Gunthorpe } 647c5b184dSJason Gunthorpe 657c5b184dSJason Gunthorpe /* 667c5b184dSJason Gunthorpe * Add index_count_lg2 number of entries to pts's VA and index. The VA will be 677c5b184dSJason Gunthorpe * adjusted to the end of the contiguous block if it is currently in the middle. 687c5b184dSJason Gunthorpe */ 697c5b184dSJason Gunthorpe static inline void _pt_advance(struct pt_state *pts, 707c5b184dSJason Gunthorpe unsigned int index_count_lg2) 717c5b184dSJason Gunthorpe { 727c5b184dSJason Gunthorpe pts->index = log2_set_mod(pts->index + log2_to_int(index_count_lg2), 0, 737c5b184dSJason Gunthorpe index_count_lg2); 747c5b184dSJason Gunthorpe } 757c5b184dSJason Gunthorpe 767c5b184dSJason Gunthorpe /** 777c5b184dSJason Gunthorpe * pt_entry_fully_covered() - Check if the item or entry is entirely contained 787c5b184dSJason Gunthorpe * within pts->range 797c5b184dSJason Gunthorpe * @pts: Iteration State 807c5b184dSJason Gunthorpe * @oasz_lg2: The size of the item to check, pt_table_item_lg2sz() or 817c5b184dSJason Gunthorpe * pt_entry_oa_lg2sz() 827c5b184dSJason Gunthorpe * 837c5b184dSJason Gunthorpe * Returns: true if the item is fully enclosed by the pts->range. 847c5b184dSJason Gunthorpe */ 857c5b184dSJason Gunthorpe static inline bool pt_entry_fully_covered(const struct pt_state *pts, 867c5b184dSJason Gunthorpe unsigned int oasz_lg2) 877c5b184dSJason Gunthorpe { 887c5b184dSJason Gunthorpe struct pt_range *range = pts->range; 897c5b184dSJason Gunthorpe 907c5b184dSJason Gunthorpe /* Range begins at the start of the entry */ 917c5b184dSJason Gunthorpe if (log2_mod(pts->range->va, oasz_lg2)) 927c5b184dSJason Gunthorpe return false; 937c5b184dSJason Gunthorpe 947c5b184dSJason Gunthorpe /* Range ends past the end of the entry */ 957c5b184dSJason Gunthorpe if (!log2_div_eq(range->va, range->last_va, oasz_lg2)) 967c5b184dSJason Gunthorpe return true; 977c5b184dSJason Gunthorpe 987c5b184dSJason Gunthorpe /* Range ends at the end of the entry */ 997c5b184dSJason Gunthorpe return log2_mod_eq_max(range->last_va, oasz_lg2); 1007c5b184dSJason Gunthorpe } 1017c5b184dSJason Gunthorpe 1027c5b184dSJason Gunthorpe /** 1037c5b184dSJason Gunthorpe * pt_range_to_index() - Starting index for an iteration 1047c5b184dSJason Gunthorpe * @pts: Iteration State 1057c5b184dSJason Gunthorpe * 1067c5b184dSJason Gunthorpe * Return: the starting index for the iteration in pts. 1077c5b184dSJason Gunthorpe */ 1087c5b184dSJason Gunthorpe static inline unsigned int pt_range_to_index(const struct pt_state *pts) 1097c5b184dSJason Gunthorpe { 1107c5b184dSJason Gunthorpe unsigned int isz_lg2 = pt_table_item_lg2sz(pts); 1117c5b184dSJason Gunthorpe 1127c5b184dSJason Gunthorpe PT_WARN_ON(pts->level > pts->range->top_level); 1137c5b184dSJason Gunthorpe if (pts->range->top_level == pts->level) 1147c5b184dSJason Gunthorpe return log2_div(fvalog2_mod(pts->range->va, 1157c5b184dSJason Gunthorpe pts->range->max_vasz_lg2), 1167c5b184dSJason Gunthorpe isz_lg2); 1177c5b184dSJason Gunthorpe return log2_mod(log2_div(pts->range->va, isz_lg2), 1187c5b184dSJason Gunthorpe pt_num_items_lg2(pts)); 1197c5b184dSJason Gunthorpe } 1207c5b184dSJason Gunthorpe 1217c5b184dSJason Gunthorpe /** 1227c5b184dSJason Gunthorpe * pt_range_to_end_index() - Ending index iteration 1237c5b184dSJason Gunthorpe * @pts: Iteration State 1247c5b184dSJason Gunthorpe * 1257c5b184dSJason Gunthorpe * Return: the last index for the iteration in pts. 1267c5b184dSJason Gunthorpe */ 1277c5b184dSJason Gunthorpe static inline unsigned int pt_range_to_end_index(const struct pt_state *pts) 1287c5b184dSJason Gunthorpe { 1297c5b184dSJason Gunthorpe unsigned int isz_lg2 = pt_table_item_lg2sz(pts); 1307c5b184dSJason Gunthorpe struct pt_range *range = pts->range; 1317c5b184dSJason Gunthorpe unsigned int num_entries_lg2; 1327c5b184dSJason Gunthorpe 1337c5b184dSJason Gunthorpe if (range->va == range->last_va) 1347c5b184dSJason Gunthorpe return pts->index + 1; 1357c5b184dSJason Gunthorpe 1367c5b184dSJason Gunthorpe if (pts->range->top_level == pts->level) 1377c5b184dSJason Gunthorpe return log2_div(fvalog2_mod(pts->range->last_va, 1387c5b184dSJason Gunthorpe pts->range->max_vasz_lg2), 1397c5b184dSJason Gunthorpe isz_lg2) + 1407c5b184dSJason Gunthorpe 1; 1417c5b184dSJason Gunthorpe 1427c5b184dSJason Gunthorpe num_entries_lg2 = pt_num_items_lg2(pts); 1437c5b184dSJason Gunthorpe 1447c5b184dSJason Gunthorpe /* last_va falls within this table */ 1457c5b184dSJason Gunthorpe if (log2_div_eq(range->va, range->last_va, num_entries_lg2 + isz_lg2)) 1467c5b184dSJason Gunthorpe return log2_mod(log2_div(pts->range->last_va, isz_lg2), 1477c5b184dSJason Gunthorpe num_entries_lg2) + 1487c5b184dSJason Gunthorpe 1; 1497c5b184dSJason Gunthorpe 1507c5b184dSJason Gunthorpe return log2_to_int(num_entries_lg2); 1517c5b184dSJason Gunthorpe } 1527c5b184dSJason Gunthorpe 1537c5b184dSJason Gunthorpe static inline void _pt_iter_first(struct pt_state *pts) 1547c5b184dSJason Gunthorpe { 1557c5b184dSJason Gunthorpe pts->index = pt_range_to_index(pts); 1567c5b184dSJason Gunthorpe pts->end_index = pt_range_to_end_index(pts); 1577c5b184dSJason Gunthorpe PT_WARN_ON(pts->index > pts->end_index); 1587c5b184dSJason Gunthorpe } 1597c5b184dSJason Gunthorpe 1607c5b184dSJason Gunthorpe static inline bool _pt_iter_load(struct pt_state *pts) 1617c5b184dSJason Gunthorpe { 1627c5b184dSJason Gunthorpe if (pts->index >= pts->end_index) 1637c5b184dSJason Gunthorpe return false; 1647c5b184dSJason Gunthorpe pt_load_entry(pts); 1657c5b184dSJason Gunthorpe return true; 1667c5b184dSJason Gunthorpe } 1677c5b184dSJason Gunthorpe 1687c5b184dSJason Gunthorpe /** 1697c5b184dSJason Gunthorpe * pt_next_entry() - Advance pts to the next entry 1707c5b184dSJason Gunthorpe * @pts: Iteration State 1717c5b184dSJason Gunthorpe * 1727c5b184dSJason Gunthorpe * Update pts to go to the next index at this level. If pts is pointing at a 1737c5b184dSJason Gunthorpe * contiguous entry then the index may advance my more than one. 1747c5b184dSJason Gunthorpe */ 1757c5b184dSJason Gunthorpe static inline void pt_next_entry(struct pt_state *pts) 1767c5b184dSJason Gunthorpe { 1777c5b184dSJason Gunthorpe if (pts->type == PT_ENTRY_OA && 1787c5b184dSJason Gunthorpe !__builtin_constant_p(pt_entry_num_contig_lg2(pts) == 0)) 1797c5b184dSJason Gunthorpe _pt_advance(pts, pt_entry_num_contig_lg2(pts)); 1807c5b184dSJason Gunthorpe else 1817c5b184dSJason Gunthorpe pts->index++; 1827c5b184dSJason Gunthorpe pt_index_to_va(pts); 1837c5b184dSJason Gunthorpe } 1847c5b184dSJason Gunthorpe 1857c5b184dSJason Gunthorpe /** 1867c5b184dSJason Gunthorpe * for_each_pt_level_entry() - For loop wrapper over entries in the range 1877c5b184dSJason Gunthorpe * @pts: Iteration State 1887c5b184dSJason Gunthorpe * 1897c5b184dSJason Gunthorpe * This is the basic iteration primitive. It iterates over all the entries in 1907c5b184dSJason Gunthorpe * pts->range that fall within the pts's current table level. Each step does 1917c5b184dSJason Gunthorpe * pt_load_entry(pts). 1927c5b184dSJason Gunthorpe */ 1937c5b184dSJason Gunthorpe #define for_each_pt_level_entry(pts) \ 1947c5b184dSJason Gunthorpe for (_pt_iter_first(pts); _pt_iter_load(pts); pt_next_entry(pts)) 1957c5b184dSJason Gunthorpe 1967c5b184dSJason Gunthorpe /** 1977c5b184dSJason Gunthorpe * pt_load_single_entry() - Version of pt_load_entry() usable within a walker 1987c5b184dSJason Gunthorpe * @pts: Iteration State 1997c5b184dSJason Gunthorpe * 2007c5b184dSJason Gunthorpe * Alternative to for_each_pt_level_entry() if the walker function uses only a 2017c5b184dSJason Gunthorpe * single entry. 2027c5b184dSJason Gunthorpe */ 2037c5b184dSJason Gunthorpe static inline enum pt_entry_type pt_load_single_entry(struct pt_state *pts) 2047c5b184dSJason Gunthorpe { 2057c5b184dSJason Gunthorpe pts->index = pt_range_to_index(pts); 2067c5b184dSJason Gunthorpe pt_load_entry(pts); 2077c5b184dSJason Gunthorpe return pts->type; 2087c5b184dSJason Gunthorpe } 2097c5b184dSJason Gunthorpe 2107c5b184dSJason Gunthorpe static __always_inline struct pt_range _pt_top_range(struct pt_common *common, 2117c5b184dSJason Gunthorpe uintptr_t top_of_table) 2127c5b184dSJason Gunthorpe { 2137c5b184dSJason Gunthorpe struct pt_range range = { 2147c5b184dSJason Gunthorpe .common = common, 2157c5b184dSJason Gunthorpe .top_table = 2167c5b184dSJason Gunthorpe (struct pt_table_p *)(top_of_table & 2177c5b184dSJason Gunthorpe ~(uintptr_t)PT_TOP_LEVEL_MASK), 2187c5b184dSJason Gunthorpe .top_level = top_of_table % (1 << PT_TOP_LEVEL_BITS), 2197c5b184dSJason Gunthorpe }; 2207c5b184dSJason Gunthorpe struct pt_state pts = { .range = &range, .level = range.top_level }; 2217c5b184dSJason Gunthorpe unsigned int max_vasz_lg2; 2227c5b184dSJason Gunthorpe 2237c5b184dSJason Gunthorpe max_vasz_lg2 = common->max_vasz_lg2; 2247c5b184dSJason Gunthorpe if (pt_feature(common, PT_FEAT_DYNAMIC_TOP) && 2257c5b184dSJason Gunthorpe pts.level != PT_MAX_TOP_LEVEL) 2267c5b184dSJason Gunthorpe max_vasz_lg2 = min_t(unsigned int, common->max_vasz_lg2, 2277c5b184dSJason Gunthorpe pt_num_items_lg2(&pts) + 2287c5b184dSJason Gunthorpe pt_table_item_lg2sz(&pts)); 2297c5b184dSJason Gunthorpe 2307c5b184dSJason Gunthorpe /* 2317c5b184dSJason Gunthorpe * The top range will default to the lower region only with sign extend. 2327c5b184dSJason Gunthorpe */ 2337c5b184dSJason Gunthorpe range.max_vasz_lg2 = max_vasz_lg2; 2347c5b184dSJason Gunthorpe if (pt_feature(common, PT_FEAT_SIGN_EXTEND)) 2357c5b184dSJason Gunthorpe max_vasz_lg2--; 2367c5b184dSJason Gunthorpe 2377c5b184dSJason Gunthorpe range.va = fvalog2_set_mod(pt_full_va_prefix(common), 0, max_vasz_lg2); 2387c5b184dSJason Gunthorpe range.last_va = 2397c5b184dSJason Gunthorpe fvalog2_set_mod_max(pt_full_va_prefix(common), max_vasz_lg2); 2407c5b184dSJason Gunthorpe return range; 2417c5b184dSJason Gunthorpe } 2427c5b184dSJason Gunthorpe 2437c5b184dSJason Gunthorpe /** 2447c5b184dSJason Gunthorpe * pt_top_range() - Return a range that spans part of the top level 2457c5b184dSJason Gunthorpe * @common: Table 2467c5b184dSJason Gunthorpe * 2477c5b184dSJason Gunthorpe * For PT_FEAT_SIGN_EXTEND this will return the lower range, and cover half the 2487c5b184dSJason Gunthorpe * total page table. Otherwise it returns the entire page table. 2497c5b184dSJason Gunthorpe */ 2507c5b184dSJason Gunthorpe static __always_inline struct pt_range pt_top_range(struct pt_common *common) 2517c5b184dSJason Gunthorpe { 2527c5b184dSJason Gunthorpe /* 2537c5b184dSJason Gunthorpe * The top pointer can change without locking. We capture the value and 2547c5b184dSJason Gunthorpe * it's level here and are safe to walk it so long as both values are 2557c5b184dSJason Gunthorpe * captured without tearing. 2567c5b184dSJason Gunthorpe */ 2577c5b184dSJason Gunthorpe return _pt_top_range(common, READ_ONCE(common->top_of_table)); 2587c5b184dSJason Gunthorpe } 2597c5b184dSJason Gunthorpe 2607c5b184dSJason Gunthorpe /** 2617c5b184dSJason Gunthorpe * pt_all_range() - Return a range that spans the entire page table 2627c5b184dSJason Gunthorpe * @common: Table 2637c5b184dSJason Gunthorpe * 2647c5b184dSJason Gunthorpe * The returned range spans the whole page table. Due to how PT_FEAT_SIGN_EXTEND 2657c5b184dSJason Gunthorpe * is supported range->va and range->last_va will be incorrect during the 2667c5b184dSJason Gunthorpe * iteration and must not be accessed. 2677c5b184dSJason Gunthorpe */ 2687c5b184dSJason Gunthorpe static inline struct pt_range pt_all_range(struct pt_common *common) 2697c5b184dSJason Gunthorpe { 2707c5b184dSJason Gunthorpe struct pt_range range = pt_top_range(common); 2717c5b184dSJason Gunthorpe 2727c5b184dSJason Gunthorpe if (!pt_feature(common, PT_FEAT_SIGN_EXTEND)) 2737c5b184dSJason Gunthorpe return range; 2747c5b184dSJason Gunthorpe 2757c5b184dSJason Gunthorpe /* 2767c5b184dSJason Gunthorpe * Pretend the table is linear from 0 without a sign extension. This 2777c5b184dSJason Gunthorpe * generates the correct indexes for iteration. 2787c5b184dSJason Gunthorpe */ 2797c5b184dSJason Gunthorpe range.last_va = fvalog2_set_mod_max(0, range.max_vasz_lg2); 2807c5b184dSJason Gunthorpe return range; 2817c5b184dSJason Gunthorpe } 2827c5b184dSJason Gunthorpe 2837c5b184dSJason Gunthorpe /** 2847c5b184dSJason Gunthorpe * pt_upper_range() - Return a range that spans part of the top level 2857c5b184dSJason Gunthorpe * @common: Table 2867c5b184dSJason Gunthorpe * 2877c5b184dSJason Gunthorpe * For PT_FEAT_SIGN_EXTEND this will return the upper range, and cover half the 2887c5b184dSJason Gunthorpe * total page table. Otherwise it returns the entire page table. 2897c5b184dSJason Gunthorpe */ 2907c5b184dSJason Gunthorpe static inline struct pt_range pt_upper_range(struct pt_common *common) 2917c5b184dSJason Gunthorpe { 2927c5b184dSJason Gunthorpe struct pt_range range = pt_top_range(common); 2937c5b184dSJason Gunthorpe 2947c5b184dSJason Gunthorpe if (!pt_feature(common, PT_FEAT_SIGN_EXTEND)) 2957c5b184dSJason Gunthorpe return range; 2967c5b184dSJason Gunthorpe 2977c5b184dSJason Gunthorpe range.va = fvalog2_set_mod(PT_VADDR_MAX, 0, range.max_vasz_lg2 - 1); 2987c5b184dSJason Gunthorpe range.last_va = PT_VADDR_MAX; 2997c5b184dSJason Gunthorpe return range; 3007c5b184dSJason Gunthorpe } 3017c5b184dSJason Gunthorpe 3027c5b184dSJason Gunthorpe /** 3037c5b184dSJason Gunthorpe * pt_make_range() - Return a range that spans part of the table 3047c5b184dSJason Gunthorpe * @common: Table 3057c5b184dSJason Gunthorpe * @va: Start address 3067c5b184dSJason Gunthorpe * @last_va: Last address 3077c5b184dSJason Gunthorpe * 3087c5b184dSJason Gunthorpe * The caller must validate the range with pt_check_range() before using it. 3097c5b184dSJason Gunthorpe */ 3107c5b184dSJason Gunthorpe static __always_inline struct pt_range 3117c5b184dSJason Gunthorpe pt_make_range(struct pt_common *common, pt_vaddr_t va, pt_vaddr_t last_va) 3127c5b184dSJason Gunthorpe { 3137c5b184dSJason Gunthorpe struct pt_range range = 3147c5b184dSJason Gunthorpe _pt_top_range(common, READ_ONCE(common->top_of_table)); 3157c5b184dSJason Gunthorpe 3167c5b184dSJason Gunthorpe range.va = va; 3177c5b184dSJason Gunthorpe range.last_va = last_va; 3187c5b184dSJason Gunthorpe 3197c5b184dSJason Gunthorpe return range; 3207c5b184dSJason Gunthorpe } 3217c5b184dSJason Gunthorpe 3227c5b184dSJason Gunthorpe /* 3237c5b184dSJason Gunthorpe * Span a slice of the table starting at a lower table level from an active 3247c5b184dSJason Gunthorpe * walk. 3257c5b184dSJason Gunthorpe */ 3267c5b184dSJason Gunthorpe static __always_inline struct pt_range 3277c5b184dSJason Gunthorpe pt_make_child_range(const struct pt_range *parent, pt_vaddr_t va, 3287c5b184dSJason Gunthorpe pt_vaddr_t last_va) 3297c5b184dSJason Gunthorpe { 3307c5b184dSJason Gunthorpe struct pt_range range = *parent; 3317c5b184dSJason Gunthorpe 3327c5b184dSJason Gunthorpe range.va = va; 3337c5b184dSJason Gunthorpe range.last_va = last_va; 3347c5b184dSJason Gunthorpe 3357c5b184dSJason Gunthorpe PT_WARN_ON(last_va < va); 3367c5b184dSJason Gunthorpe PT_WARN_ON(pt_check_range(&range)); 3377c5b184dSJason Gunthorpe 3387c5b184dSJason Gunthorpe return range; 3397c5b184dSJason Gunthorpe } 3407c5b184dSJason Gunthorpe 3417c5b184dSJason Gunthorpe /** 3427c5b184dSJason Gunthorpe * pt_init() - Initialize a pt_state on the stack 3437c5b184dSJason Gunthorpe * @range: Range pointer to embed in the state 3447c5b184dSJason Gunthorpe * @level: Table level for the state 3457c5b184dSJason Gunthorpe * @table: Pointer to the table memory at level 3467c5b184dSJason Gunthorpe * 3477c5b184dSJason Gunthorpe * Helper to initialize the on-stack pt_state from walker arguments. 3487c5b184dSJason Gunthorpe */ 3497c5b184dSJason Gunthorpe static __always_inline struct pt_state 3507c5b184dSJason Gunthorpe pt_init(struct pt_range *range, unsigned int level, struct pt_table_p *table) 3517c5b184dSJason Gunthorpe { 3527c5b184dSJason Gunthorpe struct pt_state pts = { 3537c5b184dSJason Gunthorpe .range = range, 3547c5b184dSJason Gunthorpe .table = table, 3557c5b184dSJason Gunthorpe .level = level, 3567c5b184dSJason Gunthorpe }; 3577c5b184dSJason Gunthorpe return pts; 3587c5b184dSJason Gunthorpe } 3597c5b184dSJason Gunthorpe 3607c5b184dSJason Gunthorpe /** 3617c5b184dSJason Gunthorpe * pt_init_top() - Initialize a pt_state on the stack 3627c5b184dSJason Gunthorpe * @range: Range pointer to embed in the state 3637c5b184dSJason Gunthorpe * 3647c5b184dSJason Gunthorpe * The pt_state points to the top most level. 3657c5b184dSJason Gunthorpe */ 3667c5b184dSJason Gunthorpe static __always_inline struct pt_state pt_init_top(struct pt_range *range) 3677c5b184dSJason Gunthorpe { 3687c5b184dSJason Gunthorpe return pt_init(range, range->top_level, range->top_table); 3697c5b184dSJason Gunthorpe } 3707c5b184dSJason Gunthorpe 3717c5b184dSJason Gunthorpe typedef int (*pt_level_fn_t)(struct pt_range *range, void *arg, 3727c5b184dSJason Gunthorpe unsigned int level, struct pt_table_p *table); 3737c5b184dSJason Gunthorpe 3747c5b184dSJason Gunthorpe /** 3757c5b184dSJason Gunthorpe * pt_descend() - Recursively invoke the walker for the lower level 3767c5b184dSJason Gunthorpe * @pts: Iteration State 3777c5b184dSJason Gunthorpe * @arg: Value to pass to the function 3787c5b184dSJason Gunthorpe * @fn: Walker function to call 3797c5b184dSJason Gunthorpe * 3807c5b184dSJason Gunthorpe * pts must point to a table item. Invoke fn as a walker on the table 3817c5b184dSJason Gunthorpe * pts points to. 3827c5b184dSJason Gunthorpe */ 3837c5b184dSJason Gunthorpe static __always_inline int pt_descend(struct pt_state *pts, void *arg, 3847c5b184dSJason Gunthorpe pt_level_fn_t fn) 3857c5b184dSJason Gunthorpe { 3867c5b184dSJason Gunthorpe int ret; 3877c5b184dSJason Gunthorpe 3887c5b184dSJason Gunthorpe if (PT_WARN_ON(!pts->table_lower)) 3897c5b184dSJason Gunthorpe return -EINVAL; 3907c5b184dSJason Gunthorpe 3917c5b184dSJason Gunthorpe ret = (*fn)(pts->range, arg, pts->level - 1, pts->table_lower); 3927c5b184dSJason Gunthorpe return ret; 3937c5b184dSJason Gunthorpe } 3947c5b184dSJason Gunthorpe 3957c5b184dSJason Gunthorpe /** 3967c5b184dSJason Gunthorpe * pt_walk_range() - Walk over a VA range 3977c5b184dSJason Gunthorpe * @range: Range pointer 3987c5b184dSJason Gunthorpe * @fn: Walker function to call 3997c5b184dSJason Gunthorpe * @arg: Value to pass to the function 4007c5b184dSJason Gunthorpe * 4017c5b184dSJason Gunthorpe * Walk over a VA range. The caller should have done a validity check, at 4027c5b184dSJason Gunthorpe * least calling pt_check_range(), when building range. The walk will 4037c5b184dSJason Gunthorpe * start at the top most table. 4047c5b184dSJason Gunthorpe */ 4057c5b184dSJason Gunthorpe static __always_inline int pt_walk_range(struct pt_range *range, 4067c5b184dSJason Gunthorpe pt_level_fn_t fn, void *arg) 4077c5b184dSJason Gunthorpe { 4087c5b184dSJason Gunthorpe return fn(range, arg, range->top_level, range->top_table); 4097c5b184dSJason Gunthorpe } 4107c5b184dSJason Gunthorpe 4117c5b184dSJason Gunthorpe /* 4127c5b184dSJason Gunthorpe * pt_walk_descend() - Recursively invoke the walker for a slice of a lower 4137c5b184dSJason Gunthorpe * level 4147c5b184dSJason Gunthorpe * @pts: Iteration State 4157c5b184dSJason Gunthorpe * @va: Start address 4167c5b184dSJason Gunthorpe * @last_va: Last address 4177c5b184dSJason Gunthorpe * @fn: Walker function to call 4187c5b184dSJason Gunthorpe * @arg: Value to pass to the function 4197c5b184dSJason Gunthorpe * 4207c5b184dSJason Gunthorpe * With pts pointing at a table item this will descend and over a slice of the 4217c5b184dSJason Gunthorpe * lower table. The caller must ensure that va/last_va are within the table 4227c5b184dSJason Gunthorpe * item. This creates a new walk and does not alter pts or pts->range. 4237c5b184dSJason Gunthorpe */ 4247c5b184dSJason Gunthorpe static __always_inline int pt_walk_descend(const struct pt_state *pts, 4257c5b184dSJason Gunthorpe pt_vaddr_t va, pt_vaddr_t last_va, 4267c5b184dSJason Gunthorpe pt_level_fn_t fn, void *arg) 4277c5b184dSJason Gunthorpe { 4287c5b184dSJason Gunthorpe struct pt_range range = pt_make_child_range(pts->range, va, last_va); 4297c5b184dSJason Gunthorpe 4307c5b184dSJason Gunthorpe if (PT_WARN_ON(!pt_can_have_table(pts)) || 4317c5b184dSJason Gunthorpe PT_WARN_ON(!pts->table_lower)) 4327c5b184dSJason Gunthorpe return -EINVAL; 4337c5b184dSJason Gunthorpe 4347c5b184dSJason Gunthorpe return fn(&range, arg, pts->level - 1, pts->table_lower); 4357c5b184dSJason Gunthorpe } 4367c5b184dSJason Gunthorpe 4377c5b184dSJason Gunthorpe /* 4387c5b184dSJason Gunthorpe * pt_walk_descend_all() - Recursively invoke the walker for a table item 4397c5b184dSJason Gunthorpe * @parent_pts: Iteration State 4407c5b184dSJason Gunthorpe * @fn: Walker function to call 4417c5b184dSJason Gunthorpe * @arg: Value to pass to the function 4427c5b184dSJason Gunthorpe * 4437c5b184dSJason Gunthorpe * With pts pointing at a table item this will descend and over the entire lower 4447c5b184dSJason Gunthorpe * table. This creates a new walk and does not alter pts or pts->range. 4457c5b184dSJason Gunthorpe */ 4467c5b184dSJason Gunthorpe static __always_inline int 4477c5b184dSJason Gunthorpe pt_walk_descend_all(const struct pt_state *parent_pts, pt_level_fn_t fn, 4487c5b184dSJason Gunthorpe void *arg) 4497c5b184dSJason Gunthorpe { 4507c5b184dSJason Gunthorpe unsigned int isz_lg2 = pt_table_item_lg2sz(parent_pts); 4517c5b184dSJason Gunthorpe 4527c5b184dSJason Gunthorpe return pt_walk_descend(parent_pts, 4537c5b184dSJason Gunthorpe log2_set_mod(parent_pts->range->va, 0, isz_lg2), 4547c5b184dSJason Gunthorpe log2_set_mod_max(parent_pts->range->va, isz_lg2), 4557c5b184dSJason Gunthorpe fn, arg); 4567c5b184dSJason Gunthorpe } 4577c5b184dSJason Gunthorpe 4587c5b184dSJason Gunthorpe /** 4597c5b184dSJason Gunthorpe * pt_range_slice() - Return a range that spans indexes 4607c5b184dSJason Gunthorpe * @pts: Iteration State 4617c5b184dSJason Gunthorpe * @start_index: Starting index within pts 4627c5b184dSJason Gunthorpe * @end_index: Ending index within pts 4637c5b184dSJason Gunthorpe * 4647c5b184dSJason Gunthorpe * Create a range than spans an index range of the current table level 4657c5b184dSJason Gunthorpe * pt_state points at. 4667c5b184dSJason Gunthorpe */ 4677c5b184dSJason Gunthorpe static inline struct pt_range pt_range_slice(const struct pt_state *pts, 4687c5b184dSJason Gunthorpe unsigned int start_index, 4697c5b184dSJason Gunthorpe unsigned int end_index) 4707c5b184dSJason Gunthorpe { 4717c5b184dSJason Gunthorpe unsigned int table_lg2sz = pt_table_oa_lg2sz(pts); 4727c5b184dSJason Gunthorpe pt_vaddr_t last_va; 4737c5b184dSJason Gunthorpe pt_vaddr_t va; 4747c5b184dSJason Gunthorpe 4757c5b184dSJason Gunthorpe va = fvalog2_set_mod(pts->range->va, 4767c5b184dSJason Gunthorpe log2_mul(start_index, pt_table_item_lg2sz(pts)), 4777c5b184dSJason Gunthorpe table_lg2sz); 4787c5b184dSJason Gunthorpe last_va = fvalog2_set_mod( 4797c5b184dSJason Gunthorpe pts->range->va, 4807c5b184dSJason Gunthorpe log2_mul(end_index, pt_table_item_lg2sz(pts)) - 1, table_lg2sz); 4817c5b184dSJason Gunthorpe return pt_make_child_range(pts->range, va, last_va); 4827c5b184dSJason Gunthorpe } 4837c5b184dSJason Gunthorpe 4847c5b184dSJason Gunthorpe /** 4857c5b184dSJason Gunthorpe * pt_top_memsize_lg2() 4867c5b184dSJason Gunthorpe * @common: Table 4877c5b184dSJason Gunthorpe * @top_of_table: Top of table value from _pt_top_set() 4887c5b184dSJason Gunthorpe * 4897c5b184dSJason Gunthorpe * Compute the allocation size of the top table. For PT_FEAT_DYNAMIC_TOP this 4907c5b184dSJason Gunthorpe * will compute the top size assuming the table will grow. 4917c5b184dSJason Gunthorpe */ 4927c5b184dSJason Gunthorpe static inline unsigned int pt_top_memsize_lg2(struct pt_common *common, 4937c5b184dSJason Gunthorpe uintptr_t top_of_table) 4947c5b184dSJason Gunthorpe { 4957c5b184dSJason Gunthorpe struct pt_range range = _pt_top_range(common, top_of_table); 4967c5b184dSJason Gunthorpe struct pt_state pts = pt_init_top(&range); 4977c5b184dSJason Gunthorpe unsigned int num_items_lg2; 4987c5b184dSJason Gunthorpe 4997c5b184dSJason Gunthorpe num_items_lg2 = common->max_vasz_lg2 - pt_table_item_lg2sz(&pts); 5007c5b184dSJason Gunthorpe if (range.top_level != PT_MAX_TOP_LEVEL && 5017c5b184dSJason Gunthorpe pt_feature(common, PT_FEAT_DYNAMIC_TOP)) 5027c5b184dSJason Gunthorpe num_items_lg2 = min(num_items_lg2, pt_num_items_lg2(&pts)); 5037c5b184dSJason Gunthorpe 5047c5b184dSJason Gunthorpe /* Round up the allocation size to the minimum alignment */ 5057c5b184dSJason Gunthorpe return max(ffs_t(u64, PT_TOP_PHYS_MASK), 5067c5b184dSJason Gunthorpe num_items_lg2 + ilog2(PT_ITEM_WORD_SIZE)); 5077c5b184dSJason Gunthorpe } 5087c5b184dSJason Gunthorpe 5097c5b184dSJason Gunthorpe /** 5107c5b184dSJason Gunthorpe * pt_compute_best_pgsize() - Determine the best page size for leaf entries 5117c5b184dSJason Gunthorpe * @pgsz_bitmap: Permitted page sizes 5127c5b184dSJason Gunthorpe * @va: Starting virtual address for the leaf entry 5137c5b184dSJason Gunthorpe * @last_va: Last virtual address for the leaf entry, sets the max page size 5147c5b184dSJason Gunthorpe * @oa: Starting output address for the leaf entry 5157c5b184dSJason Gunthorpe * 5167c5b184dSJason Gunthorpe * Compute the largest page size for va, last_va, and oa together and return it 5177c5b184dSJason Gunthorpe * in lg2. The largest page size depends on the format's supported page sizes at 5187c5b184dSJason Gunthorpe * this level, and the relative alignment of the VA and OA addresses. 0 means 5197c5b184dSJason Gunthorpe * the OA cannot be stored with the provided pgsz_bitmap. 5207c5b184dSJason Gunthorpe */ 5217c5b184dSJason Gunthorpe static inline unsigned int pt_compute_best_pgsize(pt_vaddr_t pgsz_bitmap, 5227c5b184dSJason Gunthorpe pt_vaddr_t va, 5237c5b184dSJason Gunthorpe pt_vaddr_t last_va, 5247c5b184dSJason Gunthorpe pt_oaddr_t oa) 5257c5b184dSJason Gunthorpe { 5267c5b184dSJason Gunthorpe unsigned int best_pgsz_lg2; 5277c5b184dSJason Gunthorpe unsigned int pgsz_lg2; 5287c5b184dSJason Gunthorpe pt_vaddr_t len = last_va - va + 1; 5297c5b184dSJason Gunthorpe pt_vaddr_t mask; 5307c5b184dSJason Gunthorpe 5317c5b184dSJason Gunthorpe if (PT_WARN_ON(va >= last_va)) 5327c5b184dSJason Gunthorpe return 0; 5337c5b184dSJason Gunthorpe 5347c5b184dSJason Gunthorpe /* 5357c5b184dSJason Gunthorpe * Given a VA/OA pair the best page size is the largest page size 5367c5b184dSJason Gunthorpe * where: 5377c5b184dSJason Gunthorpe * 5387c5b184dSJason Gunthorpe * 1) VA and OA start at the page. Bitwise this is the count of least 5397c5b184dSJason Gunthorpe * significant 0 bits. 5407c5b184dSJason Gunthorpe * This also implies that last_va/oa has the same prefix as va/oa. 5417c5b184dSJason Gunthorpe */ 5427c5b184dSJason Gunthorpe mask = va | oa; 5437c5b184dSJason Gunthorpe 5447c5b184dSJason Gunthorpe /* 5457c5b184dSJason Gunthorpe * 2) The page size is not larger than the last_va (length). Since page 5467c5b184dSJason Gunthorpe * sizes are always power of two this can't be larger than the 5477c5b184dSJason Gunthorpe * largest power of two factor of the length. 5487c5b184dSJason Gunthorpe */ 5497c5b184dSJason Gunthorpe mask |= log2_to_int(vafls(len) - 1); 5507c5b184dSJason Gunthorpe 5517c5b184dSJason Gunthorpe best_pgsz_lg2 = vaffs(mask); 5527c5b184dSJason Gunthorpe 5537c5b184dSJason Gunthorpe /* Choose the highest bit <= best_pgsz_lg2 */ 5547c5b184dSJason Gunthorpe if (best_pgsz_lg2 < PT_VADDR_MAX_LG2 - 1) 5557c5b184dSJason Gunthorpe pgsz_bitmap = log2_mod(pgsz_bitmap, best_pgsz_lg2 + 1); 5567c5b184dSJason Gunthorpe 5577c5b184dSJason Gunthorpe pgsz_lg2 = vafls(pgsz_bitmap); 5587c5b184dSJason Gunthorpe if (!pgsz_lg2) 5597c5b184dSJason Gunthorpe return 0; 5607c5b184dSJason Gunthorpe 5617c5b184dSJason Gunthorpe pgsz_lg2--; 5627c5b184dSJason Gunthorpe 5637c5b184dSJason Gunthorpe PT_WARN_ON(log2_mod(va, pgsz_lg2) != 0); 5647c5b184dSJason Gunthorpe PT_WARN_ON(oalog2_mod(oa, pgsz_lg2) != 0); 5657c5b184dSJason Gunthorpe PT_WARN_ON(va + log2_to_int(pgsz_lg2) - 1 > last_va); 5667c5b184dSJason Gunthorpe PT_WARN_ON(!log2_div_eq(va, va + log2_to_int(pgsz_lg2) - 1, pgsz_lg2)); 5677c5b184dSJason Gunthorpe PT_WARN_ON( 5687c5b184dSJason Gunthorpe !oalog2_div_eq(oa, oa + log2_to_int(pgsz_lg2) - 1, pgsz_lg2)); 5697c5b184dSJason Gunthorpe return pgsz_lg2; 5707c5b184dSJason Gunthorpe } 5717c5b184dSJason Gunthorpe 5727c5b184dSJason Gunthorpe #define _PT_MAKE_CALL_LEVEL(fn) \ 5737c5b184dSJason Gunthorpe static __always_inline int fn(struct pt_range *range, void *arg, \ 5747c5b184dSJason Gunthorpe unsigned int level, \ 5757c5b184dSJason Gunthorpe struct pt_table_p *table) \ 5767c5b184dSJason Gunthorpe { \ 5777c5b184dSJason Gunthorpe static_assert(PT_MAX_TOP_LEVEL <= 5); \ 5787c5b184dSJason Gunthorpe if (level == 0) \ 5797c5b184dSJason Gunthorpe return CONCATENATE(fn, 0)(range, arg, 0, table); \ 5807c5b184dSJason Gunthorpe if (level == 1 || PT_MAX_TOP_LEVEL == 1) \ 5817c5b184dSJason Gunthorpe return CONCATENATE(fn, 1)(range, arg, 1, table); \ 5827c5b184dSJason Gunthorpe if (level == 2 || PT_MAX_TOP_LEVEL == 2) \ 5837c5b184dSJason Gunthorpe return CONCATENATE(fn, 2)(range, arg, 2, table); \ 5847c5b184dSJason Gunthorpe if (level == 3 || PT_MAX_TOP_LEVEL == 3) \ 5857c5b184dSJason Gunthorpe return CONCATENATE(fn, 3)(range, arg, 3, table); \ 5867c5b184dSJason Gunthorpe if (level == 4 || PT_MAX_TOP_LEVEL == 4) \ 5877c5b184dSJason Gunthorpe return CONCATENATE(fn, 4)(range, arg, 4, table); \ 5887c5b184dSJason Gunthorpe return CONCATENATE(fn, 5)(range, arg, 5, table); \ 5897c5b184dSJason Gunthorpe } 5907c5b184dSJason Gunthorpe 5917c5b184dSJason Gunthorpe static inline int __pt_make_level_fn_err(struct pt_range *range, void *arg, 5927c5b184dSJason Gunthorpe unsigned int unused_level, 5937c5b184dSJason Gunthorpe struct pt_table_p *table) 5947c5b184dSJason Gunthorpe { 5957c5b184dSJason Gunthorpe static_assert(PT_MAX_TOP_LEVEL <= 5); 5967c5b184dSJason Gunthorpe return -EPROTOTYPE; 5977c5b184dSJason Gunthorpe } 5987c5b184dSJason Gunthorpe 5997c5b184dSJason Gunthorpe #define __PT_MAKE_LEVEL_FN(fn, level, descend_fn, do_fn) \ 6007c5b184dSJason Gunthorpe static inline int fn(struct pt_range *range, void *arg, \ 6017c5b184dSJason Gunthorpe unsigned int unused_level, \ 6027c5b184dSJason Gunthorpe struct pt_table_p *table) \ 6037c5b184dSJason Gunthorpe { \ 6047c5b184dSJason Gunthorpe return do_fn(range, arg, level, table, descend_fn); \ 6057c5b184dSJason Gunthorpe } 6067c5b184dSJason Gunthorpe 6077c5b184dSJason Gunthorpe /** 6087c5b184dSJason Gunthorpe * PT_MAKE_LEVELS() - Build an unwound walker 6097c5b184dSJason Gunthorpe * @fn: Name of the walker function 6107c5b184dSJason Gunthorpe * @do_fn: Function to call at each level 6117c5b184dSJason Gunthorpe * 6127c5b184dSJason Gunthorpe * This builds a function call tree that can be fully inlined. 6137c5b184dSJason Gunthorpe * The caller must provide a function body in an __always_inline function:: 6147c5b184dSJason Gunthorpe * 615*dcd6a011SJason Gunthorpe * static __always_inline int do_fn(struct pt_range *range, void *arg, 6167c5b184dSJason Gunthorpe * unsigned int level, struct pt_table_p *table, 6177c5b184dSJason Gunthorpe * pt_level_fn_t descend_fn) 6187c5b184dSJason Gunthorpe * 6197c5b184dSJason Gunthorpe * An inline function will be created for each table level that calls do_fn with 6207c5b184dSJason Gunthorpe * a compile time constant for level and a pointer to the next lower function. 6217c5b184dSJason Gunthorpe * This generates an optimally inlined walk where each of the functions sees a 6227c5b184dSJason Gunthorpe * constant level and can codegen the exact constants/etc for that level. 6237c5b184dSJason Gunthorpe * 6247c5b184dSJason Gunthorpe * Note this can produce a lot of code! 6257c5b184dSJason Gunthorpe */ 6267c5b184dSJason Gunthorpe #define PT_MAKE_LEVELS(fn, do_fn) \ 6277c5b184dSJason Gunthorpe __PT_MAKE_LEVEL_FN(CONCATENATE(fn, 0), 0, __pt_make_level_fn_err, \ 6287c5b184dSJason Gunthorpe do_fn); \ 6297c5b184dSJason Gunthorpe __PT_MAKE_LEVEL_FN(CONCATENATE(fn, 1), 1, CONCATENATE(fn, 0), do_fn); \ 6307c5b184dSJason Gunthorpe __PT_MAKE_LEVEL_FN(CONCATENATE(fn, 2), 2, CONCATENATE(fn, 1), do_fn); \ 6317c5b184dSJason Gunthorpe __PT_MAKE_LEVEL_FN(CONCATENATE(fn, 3), 3, CONCATENATE(fn, 2), do_fn); \ 6327c5b184dSJason Gunthorpe __PT_MAKE_LEVEL_FN(CONCATENATE(fn, 4), 4, CONCATENATE(fn, 3), do_fn); \ 6337c5b184dSJason Gunthorpe __PT_MAKE_LEVEL_FN(CONCATENATE(fn, 5), 5, CONCATENATE(fn, 4), do_fn); \ 6347c5b184dSJason Gunthorpe _PT_MAKE_CALL_LEVEL(fn) 6357c5b184dSJason Gunthorpe 6367c5b184dSJason Gunthorpe #endif 637