11c6fdbd8SKent Overstreet /* SPDX-License-Identifier: GPL-2.0 */ 21c6fdbd8SKent Overstreet #ifndef _EYTZINGER_H 31c6fdbd8SKent Overstreet #define _EYTZINGER_H 41c6fdbd8SKent Overstreet 51c6fdbd8SKent Overstreet #include <linux/bitops.h> 61c6fdbd8SKent Overstreet #include <linux/log2.h> 71c6fdbd8SKent Overstreet 8ca1e02f7SKent Overstreet #ifdef EYTZINGER_DEBUG 9ca1e02f7SKent Overstreet #define EYTZINGER_BUG_ON(cond) BUG_ON(cond) 10ca1e02f7SKent Overstreet #else 11ca1e02f7SKent Overstreet #define EYTZINGER_BUG_ON(cond) 12ca1e02f7SKent Overstreet #endif 131c6fdbd8SKent Overstreet 141c6fdbd8SKent Overstreet /* 151c6fdbd8SKent Overstreet * Traversal for trees in eytzinger layout - a full binary tree layed out in an 16ca1e02f7SKent Overstreet * array. 171c6fdbd8SKent Overstreet * 18ca1e02f7SKent Overstreet * Consider using an eytzinger tree any time you would otherwise be doing binary 19ca1e02f7SKent Overstreet * search over an array. Binary search is a worst case scenario for branch 20ca1e02f7SKent Overstreet * prediction and prefetching, but in an eytzinger tree every node's children 21ca1e02f7SKent Overstreet * are adjacent in memory, thus we can prefetch children before knowing the 22ca1e02f7SKent Overstreet * result of the comparison, assuming multiple nodes fit on a cacheline. 23ca1e02f7SKent Overstreet * 24ca1e02f7SKent Overstreet * Two variants are provided, for one based indexing and zero based indexing. 25ca1e02f7SKent Overstreet * 26ca1e02f7SKent Overstreet * Zero based indexing is more convenient, but one based indexing has better 27ca1e02f7SKent Overstreet * alignment and thus better performance because each new level of the tree 28ca1e02f7SKent Overstreet * starts at a power of two, and thus if element 0 was cacheline aligned, each 29ca1e02f7SKent Overstreet * new level will be as well. 301c6fdbd8SKent Overstreet */ 311c6fdbd8SKent Overstreet 321c6fdbd8SKent Overstreet static inline unsigned eytzinger1_child(unsigned i, unsigned child) 331c6fdbd8SKent Overstreet { 34ca1e02f7SKent Overstreet EYTZINGER_BUG_ON(child > 1); 351c6fdbd8SKent Overstreet 361c6fdbd8SKent Overstreet return (i << 1) + child; 371c6fdbd8SKent Overstreet } 381c6fdbd8SKent Overstreet 391c6fdbd8SKent Overstreet static inline unsigned eytzinger1_left_child(unsigned i) 401c6fdbd8SKent Overstreet { 411c6fdbd8SKent Overstreet return eytzinger1_child(i, 0); 421c6fdbd8SKent Overstreet } 431c6fdbd8SKent Overstreet 441c6fdbd8SKent Overstreet static inline unsigned eytzinger1_right_child(unsigned i) 451c6fdbd8SKent Overstreet { 461c6fdbd8SKent Overstreet return eytzinger1_child(i, 1); 471c6fdbd8SKent Overstreet } 481c6fdbd8SKent Overstreet 491c6fdbd8SKent Overstreet static inline unsigned eytzinger1_first(unsigned size) 501c6fdbd8SKent Overstreet { 5172492d55SKent Overstreet return rounddown_pow_of_two(size); 521c6fdbd8SKent Overstreet } 531c6fdbd8SKent Overstreet 541c6fdbd8SKent Overstreet static inline unsigned eytzinger1_last(unsigned size) 551c6fdbd8SKent Overstreet { 5672492d55SKent Overstreet return rounddown_pow_of_two(size + 1) - 1; 571c6fdbd8SKent Overstreet } 581c6fdbd8SKent Overstreet 591c6fdbd8SKent Overstreet /* 601c6fdbd8SKent Overstreet * eytzinger1_next() and eytzinger1_prev() have the nice properties that 611c6fdbd8SKent Overstreet * 621c6fdbd8SKent Overstreet * eytzinger1_next(0) == eytzinger1_first()) 631c6fdbd8SKent Overstreet * eytzinger1_prev(0) == eytzinger1_last()) 641c6fdbd8SKent Overstreet * 651c6fdbd8SKent Overstreet * eytzinger1_prev(eytzinger1_first()) == 0 661c6fdbd8SKent Overstreet * eytzinger1_next(eytzinger1_last()) == 0 671c6fdbd8SKent Overstreet */ 681c6fdbd8SKent Overstreet 691c6fdbd8SKent Overstreet static inline unsigned eytzinger1_next(unsigned i, unsigned size) 701c6fdbd8SKent Overstreet { 71ca1e02f7SKent Overstreet EYTZINGER_BUG_ON(i > size); 721c6fdbd8SKent Overstreet 7372492d55SKent Overstreet if (eytzinger1_right_child(i) <= size) { 741c6fdbd8SKent Overstreet i = eytzinger1_right_child(i); 751c6fdbd8SKent Overstreet 7672492d55SKent Overstreet i <<= __fls(size + 1) - __fls(i); 7772492d55SKent Overstreet i >>= i > size; 781c6fdbd8SKent Overstreet } else { 791c6fdbd8SKent Overstreet i >>= ffz(i) + 1; 801c6fdbd8SKent Overstreet } 811c6fdbd8SKent Overstreet 821c6fdbd8SKent Overstreet return i; 831c6fdbd8SKent Overstreet } 841c6fdbd8SKent Overstreet 851c6fdbd8SKent Overstreet static inline unsigned eytzinger1_prev(unsigned i, unsigned size) 861c6fdbd8SKent Overstreet { 87ca1e02f7SKent Overstreet EYTZINGER_BUG_ON(i > size); 881c6fdbd8SKent Overstreet 8972492d55SKent Overstreet if (eytzinger1_left_child(i) <= size) { 901c6fdbd8SKent Overstreet i = eytzinger1_left_child(i) + 1; 911c6fdbd8SKent Overstreet 9272492d55SKent Overstreet i <<= __fls(size + 1) - __fls(i); 931c6fdbd8SKent Overstreet i -= 1; 9472492d55SKent Overstreet i >>= i > size; 951c6fdbd8SKent Overstreet } else { 961c6fdbd8SKent Overstreet i >>= __ffs(i) + 1; 971c6fdbd8SKent Overstreet } 981c6fdbd8SKent Overstreet 991c6fdbd8SKent Overstreet return i; 1001c6fdbd8SKent Overstreet } 1011c6fdbd8SKent Overstreet 1021c6fdbd8SKent Overstreet static inline unsigned eytzinger1_extra(unsigned size) 1031c6fdbd8SKent Overstreet { 10472492d55SKent Overstreet return (size + 1 - rounddown_pow_of_two(size)) << 1; 1051c6fdbd8SKent Overstreet } 1061c6fdbd8SKent Overstreet 1071c6fdbd8SKent Overstreet static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size, 1081c6fdbd8SKent Overstreet unsigned extra) 1091c6fdbd8SKent Overstreet { 1101c6fdbd8SKent Overstreet unsigned b = __fls(i); 11172492d55SKent Overstreet unsigned shift = __fls(size) - b; 1121c6fdbd8SKent Overstreet int s; 1131c6fdbd8SKent Overstreet 114ca1e02f7SKent Overstreet EYTZINGER_BUG_ON(!i || i > size); 1151c6fdbd8SKent Overstreet 1161c6fdbd8SKent Overstreet i ^= 1U << b; 1171c6fdbd8SKent Overstreet i <<= 1; 1181c6fdbd8SKent Overstreet i |= 1; 1191c6fdbd8SKent Overstreet i <<= shift; 1201c6fdbd8SKent Overstreet 1211c6fdbd8SKent Overstreet /* 1221c6fdbd8SKent Overstreet * sign bit trick: 1231c6fdbd8SKent Overstreet * 1241c6fdbd8SKent Overstreet * if (i > extra) 1251c6fdbd8SKent Overstreet * i -= (i - extra) >> 1; 1261c6fdbd8SKent Overstreet */ 1271c6fdbd8SKent Overstreet s = extra - i; 1281c6fdbd8SKent Overstreet i += (s >> 1) & (s >> 31); 1291c6fdbd8SKent Overstreet 1301c6fdbd8SKent Overstreet return i; 1311c6fdbd8SKent Overstreet } 1321c6fdbd8SKent Overstreet 1331c6fdbd8SKent Overstreet static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size, 1341c6fdbd8SKent Overstreet unsigned extra) 1351c6fdbd8SKent Overstreet { 1361c6fdbd8SKent Overstreet unsigned shift; 1371c6fdbd8SKent Overstreet int s; 1381c6fdbd8SKent Overstreet 139ca1e02f7SKent Overstreet EYTZINGER_BUG_ON(!i || i > size); 1401c6fdbd8SKent Overstreet 1411c6fdbd8SKent Overstreet /* 1421c6fdbd8SKent Overstreet * sign bit trick: 1431c6fdbd8SKent Overstreet * 1441c6fdbd8SKent Overstreet * if (i > extra) 1451c6fdbd8SKent Overstreet * i += i - extra; 1461c6fdbd8SKent Overstreet */ 1471c6fdbd8SKent Overstreet s = extra - i; 1481c6fdbd8SKent Overstreet i -= s & (s >> 31); 1491c6fdbd8SKent Overstreet 1501c6fdbd8SKent Overstreet shift = __ffs(i); 1511c6fdbd8SKent Overstreet 1521c6fdbd8SKent Overstreet i >>= shift + 1; 15372492d55SKent Overstreet i |= 1U << (__fls(size) - shift); 1541c6fdbd8SKent Overstreet 1551c6fdbd8SKent Overstreet return i; 1561c6fdbd8SKent Overstreet } 1571c6fdbd8SKent Overstreet 1581c6fdbd8SKent Overstreet static inline unsigned eytzinger1_to_inorder(unsigned i, unsigned size) 1591c6fdbd8SKent Overstreet { 1601c6fdbd8SKent Overstreet return __eytzinger1_to_inorder(i, size, eytzinger1_extra(size)); 1611c6fdbd8SKent Overstreet } 1621c6fdbd8SKent Overstreet 1631c6fdbd8SKent Overstreet static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size) 1641c6fdbd8SKent Overstreet { 1651c6fdbd8SKent Overstreet return __inorder_to_eytzinger1(i, size, eytzinger1_extra(size)); 1661c6fdbd8SKent Overstreet } 1671c6fdbd8SKent Overstreet 1681c6fdbd8SKent Overstreet #define eytzinger1_for_each(_i, _size) \ 1693fe8a186SKent Overstreet for (unsigned (_i) = eytzinger1_first((_size)); \ 1701c6fdbd8SKent Overstreet (_i) != 0; \ 1711c6fdbd8SKent Overstreet (_i) = eytzinger1_next((_i), (_size))) 1721c6fdbd8SKent Overstreet 1731c6fdbd8SKent Overstreet /* Zero based indexing version: */ 1741c6fdbd8SKent Overstreet 1751c6fdbd8SKent Overstreet static inline unsigned eytzinger0_child(unsigned i, unsigned child) 1761c6fdbd8SKent Overstreet { 177ca1e02f7SKent Overstreet EYTZINGER_BUG_ON(child > 1); 1781c6fdbd8SKent Overstreet 1791c6fdbd8SKent Overstreet return (i << 1) + 1 + child; 1801c6fdbd8SKent Overstreet } 1811c6fdbd8SKent Overstreet 1821c6fdbd8SKent Overstreet static inline unsigned eytzinger0_left_child(unsigned i) 1831c6fdbd8SKent Overstreet { 1841c6fdbd8SKent Overstreet return eytzinger0_child(i, 0); 1851c6fdbd8SKent Overstreet } 1861c6fdbd8SKent Overstreet 1871c6fdbd8SKent Overstreet static inline unsigned eytzinger0_right_child(unsigned i) 1881c6fdbd8SKent Overstreet { 1891c6fdbd8SKent Overstreet return eytzinger0_child(i, 1); 1901c6fdbd8SKent Overstreet } 1911c6fdbd8SKent Overstreet 1921c6fdbd8SKent Overstreet static inline unsigned eytzinger0_first(unsigned size) 1931c6fdbd8SKent Overstreet { 19472492d55SKent Overstreet return eytzinger1_first(size) - 1; 1951c6fdbd8SKent Overstreet } 1961c6fdbd8SKent Overstreet 1971c6fdbd8SKent Overstreet static inline unsigned eytzinger0_last(unsigned size) 1981c6fdbd8SKent Overstreet { 19972492d55SKent Overstreet return eytzinger1_last(size) - 1; 2001c6fdbd8SKent Overstreet } 2011c6fdbd8SKent Overstreet 2021c6fdbd8SKent Overstreet static inline unsigned eytzinger0_next(unsigned i, unsigned size) 2031c6fdbd8SKent Overstreet { 20472492d55SKent Overstreet return eytzinger1_next(i + 1, size) - 1; 2051c6fdbd8SKent Overstreet } 2061c6fdbd8SKent Overstreet 2071c6fdbd8SKent Overstreet static inline unsigned eytzinger0_prev(unsigned i, unsigned size) 2081c6fdbd8SKent Overstreet { 20972492d55SKent Overstreet return eytzinger1_prev(i + 1, size) - 1; 2101c6fdbd8SKent Overstreet } 2111c6fdbd8SKent Overstreet 2121c6fdbd8SKent Overstreet static inline unsigned eytzinger0_extra(unsigned size) 2131c6fdbd8SKent Overstreet { 21472492d55SKent Overstreet return eytzinger1_extra(size); 2151c6fdbd8SKent Overstreet } 2161c6fdbd8SKent Overstreet 2171c6fdbd8SKent Overstreet static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size, 2181c6fdbd8SKent Overstreet unsigned extra) 2191c6fdbd8SKent Overstreet { 22072492d55SKent Overstreet return __eytzinger1_to_inorder(i + 1, size, extra) - 1; 2211c6fdbd8SKent Overstreet } 2221c6fdbd8SKent Overstreet 2231c6fdbd8SKent Overstreet static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size, 2241c6fdbd8SKent Overstreet unsigned extra) 2251c6fdbd8SKent Overstreet { 22672492d55SKent Overstreet return __inorder_to_eytzinger1(i + 1, size, extra) - 1; 2271c6fdbd8SKent Overstreet } 2281c6fdbd8SKent Overstreet 2291c6fdbd8SKent Overstreet static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size) 2301c6fdbd8SKent Overstreet { 2311c6fdbd8SKent Overstreet return __eytzinger0_to_inorder(i, size, eytzinger0_extra(size)); 2321c6fdbd8SKent Overstreet } 2331c6fdbd8SKent Overstreet 2341c6fdbd8SKent Overstreet static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size) 2351c6fdbd8SKent Overstreet { 2361c6fdbd8SKent Overstreet return __inorder_to_eytzinger0(i, size, eytzinger0_extra(size)); 2371c6fdbd8SKent Overstreet } 2381c6fdbd8SKent Overstreet 2391c6fdbd8SKent Overstreet #define eytzinger0_for_each(_i, _size) \ 2403fe8a186SKent Overstreet for (unsigned (_i) = eytzinger0_first((_size)); \ 2411c6fdbd8SKent Overstreet (_i) != -1; \ 2421c6fdbd8SKent Overstreet (_i) = eytzinger0_next((_i), (_size))) 2431c6fdbd8SKent Overstreet 2441c6fdbd8SKent Overstreet /* return greatest node <= @search, or -1 if not found */ 245*9c432404SKent Overstreet static inline int eytzinger0_find_le(void *base, size_t nr, size_t size, 246ca1e02f7SKent Overstreet cmp_func_t cmp, const void *search) 2471c6fdbd8SKent Overstreet { 2481c6fdbd8SKent Overstreet unsigned i, n = 0; 2491c6fdbd8SKent Overstreet 2501c6fdbd8SKent Overstreet if (!nr) 2511c6fdbd8SKent Overstreet return -1; 2521c6fdbd8SKent Overstreet 2531c6fdbd8SKent Overstreet do { 2541c6fdbd8SKent Overstreet i = n; 255ca1e02f7SKent Overstreet n = eytzinger0_child(i, cmp(base + i * size, search) <= 0); 2561c6fdbd8SKent Overstreet } while (n < nr); 2571c6fdbd8SKent Overstreet 2581c6fdbd8SKent Overstreet if (n & 1) { 259*9c432404SKent Overstreet /* 260*9c432404SKent Overstreet * @i was greater than @search, return previous node: 261*9c432404SKent Overstreet * 262*9c432404SKent Overstreet * if @i was leftmost/smallest element, 263*9c432404SKent Overstreet * eytzinger0_prev(eytzinger0_first())) returns -1, as expected 264*9c432404SKent Overstreet */ 2651c6fdbd8SKent Overstreet return eytzinger0_prev(i, nr); 2661c6fdbd8SKent Overstreet } else { 2671c6fdbd8SKent Overstreet return i; 2681c6fdbd8SKent Overstreet } 2691c6fdbd8SKent Overstreet } 2701c6fdbd8SKent Overstreet 271*9c432404SKent Overstreet static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size, 272ca1e02f7SKent Overstreet cmp_func_t cmp, const void *search) 273ca1e02f7SKent Overstreet { 274ca1e02f7SKent Overstreet ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search); 275*9c432404SKent Overstreet 276*9c432404SKent Overstreet /* 277*9c432404SKent Overstreet * if eytitzinger0_find_le() returned -1 - no element was <= search - we 278*9c432404SKent Overstreet * want to return the first element; next/prev identities mean this work 279*9c432404SKent Overstreet * as expected 280*9c432404SKent Overstreet * 281*9c432404SKent Overstreet * similarly if find_le() returns last element, we should return -1; 282*9c432404SKent Overstreet * identities mean this all works out: 283*9c432404SKent Overstreet */ 284*9c432404SKent Overstreet return eytzinger0_next(idx, nr); 285ca1e02f7SKent Overstreet } 286ca1e02f7SKent Overstreet 2877ef2a73aSKent Overstreet #define eytzinger0_find(base, nr, size, _cmp, search) \ 2887ef2a73aSKent Overstreet ({ \ 2897ef2a73aSKent Overstreet void *_base = (base); \ 290169de419SKent Overstreet const void *_search = (search); \ 2917ef2a73aSKent Overstreet size_t _nr = (nr); \ 2927ef2a73aSKent Overstreet size_t _size = (size); \ 2937ef2a73aSKent Overstreet size_t _i = 0; \ 2947ef2a73aSKent Overstreet int _res; \ 2957ef2a73aSKent Overstreet \ 2967ef2a73aSKent Overstreet while (_i < _nr && \ 297ca1e02f7SKent Overstreet (_res = _cmp(_search, _base + _i * _size))) \ 2987ef2a73aSKent Overstreet _i = eytzinger0_child(_i, _res > 0); \ 2997ef2a73aSKent Overstreet _i; \ 3007ef2a73aSKent Overstreet }) 3011c6fdbd8SKent Overstreet 302ca1e02f7SKent Overstreet void eytzinger0_sort_r(void *, size_t, size_t, 303ca1e02f7SKent Overstreet cmp_r_func_t, swap_r_func_t, const void *); 304ca1e02f7SKent Overstreet void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t); 3051c6fdbd8SKent Overstreet 3061c6fdbd8SKent Overstreet #endif /* _EYTZINGER_H */ 307