11c6fdbd8SKent Overstreet /* SPDX-License-Identifier: GPL-2.0 */
21c6fdbd8SKent Overstreet #ifndef _EYTZINGER_H
31c6fdbd8SKent Overstreet #define _EYTZINGER_H
41c6fdbd8SKent Overstreet
51c6fdbd8SKent Overstreet #include <linux/bitops.h>
61c6fdbd8SKent Overstreet #include <linux/log2.h>
71c6fdbd8SKent Overstreet
8ca1e02f7SKent Overstreet #ifdef EYTZINGER_DEBUG
9ca1e02f7SKent Overstreet #define EYTZINGER_BUG_ON(cond) BUG_ON(cond)
10ca1e02f7SKent Overstreet #else
11ca1e02f7SKent Overstreet #define EYTZINGER_BUG_ON(cond)
12ca1e02f7SKent Overstreet #endif
131c6fdbd8SKent Overstreet
141c6fdbd8SKent Overstreet /*
151c6fdbd8SKent Overstreet * Traversal for trees in eytzinger layout - a full binary tree layed out in an
16ca1e02f7SKent Overstreet * array.
171c6fdbd8SKent Overstreet *
18ca1e02f7SKent Overstreet * Consider using an eytzinger tree any time you would otherwise be doing binary
19ca1e02f7SKent Overstreet * search over an array. Binary search is a worst case scenario for branch
20ca1e02f7SKent Overstreet * prediction and prefetching, but in an eytzinger tree every node's children
21ca1e02f7SKent Overstreet * are adjacent in memory, thus we can prefetch children before knowing the
22ca1e02f7SKent Overstreet * result of the comparison, assuming multiple nodes fit on a cacheline.
23ca1e02f7SKent Overstreet *
24ca1e02f7SKent Overstreet * Two variants are provided, for one based indexing and zero based indexing.
25ca1e02f7SKent Overstreet *
26ca1e02f7SKent Overstreet * Zero based indexing is more convenient, but one based indexing has better
27ca1e02f7SKent Overstreet * alignment and thus better performance because each new level of the tree
28ca1e02f7SKent Overstreet * starts at a power of two, and thus if element 0 was cacheline aligned, each
29ca1e02f7SKent Overstreet * new level will be as well.
301c6fdbd8SKent Overstreet */
311c6fdbd8SKent Overstreet
eytzinger1_child(unsigned i,unsigned child)321c6fdbd8SKent Overstreet static inline unsigned eytzinger1_child(unsigned i, unsigned child)
331c6fdbd8SKent Overstreet {
34ca1e02f7SKent Overstreet EYTZINGER_BUG_ON(child > 1);
351c6fdbd8SKent Overstreet
361c6fdbd8SKent Overstreet return (i << 1) + child;
371c6fdbd8SKent Overstreet }
381c6fdbd8SKent Overstreet
eytzinger1_left_child(unsigned i)391c6fdbd8SKent Overstreet static inline unsigned eytzinger1_left_child(unsigned i)
401c6fdbd8SKent Overstreet {
411c6fdbd8SKent Overstreet return eytzinger1_child(i, 0);
421c6fdbd8SKent Overstreet }
431c6fdbd8SKent Overstreet
eytzinger1_right_child(unsigned i)441c6fdbd8SKent Overstreet static inline unsigned eytzinger1_right_child(unsigned i)
451c6fdbd8SKent Overstreet {
461c6fdbd8SKent Overstreet return eytzinger1_child(i, 1);
471c6fdbd8SKent Overstreet }
481c6fdbd8SKent Overstreet
eytzinger1_first(unsigned size)491c6fdbd8SKent Overstreet static inline unsigned eytzinger1_first(unsigned size)
501c6fdbd8SKent Overstreet {
518ed58789SKent Overstreet return size ? rounddown_pow_of_two(size) : 0;
521c6fdbd8SKent Overstreet }
531c6fdbd8SKent Overstreet
eytzinger1_last(unsigned size)541c6fdbd8SKent Overstreet static inline unsigned eytzinger1_last(unsigned size)
551c6fdbd8SKent Overstreet {
5672492d55SKent Overstreet return rounddown_pow_of_two(size + 1) - 1;
571c6fdbd8SKent Overstreet }
581c6fdbd8SKent Overstreet
591c6fdbd8SKent Overstreet /*
601c6fdbd8SKent Overstreet * eytzinger1_next() and eytzinger1_prev() have the nice properties that
611c6fdbd8SKent Overstreet *
621c6fdbd8SKent Overstreet * eytzinger1_next(0) == eytzinger1_first())
631c6fdbd8SKent Overstreet * eytzinger1_prev(0) == eytzinger1_last())
641c6fdbd8SKent Overstreet *
651c6fdbd8SKent Overstreet * eytzinger1_prev(eytzinger1_first()) == 0
661c6fdbd8SKent Overstreet * eytzinger1_next(eytzinger1_last()) == 0
671c6fdbd8SKent Overstreet */
681c6fdbd8SKent Overstreet
eytzinger1_next(unsigned i,unsigned size)691c6fdbd8SKent Overstreet static inline unsigned eytzinger1_next(unsigned i, unsigned size)
701c6fdbd8SKent Overstreet {
71ca1e02f7SKent Overstreet EYTZINGER_BUG_ON(i > size);
721c6fdbd8SKent Overstreet
7372492d55SKent Overstreet if (eytzinger1_right_child(i) <= size) {
741c6fdbd8SKent Overstreet i = eytzinger1_right_child(i);
751c6fdbd8SKent Overstreet
7672492d55SKent Overstreet i <<= __fls(size + 1) - __fls(i);
7772492d55SKent Overstreet i >>= i > size;
781c6fdbd8SKent Overstreet } else {
791c6fdbd8SKent Overstreet i >>= ffz(i) + 1;
801c6fdbd8SKent Overstreet }
811c6fdbd8SKent Overstreet
821c6fdbd8SKent Overstreet return i;
831c6fdbd8SKent Overstreet }
841c6fdbd8SKent Overstreet
eytzinger1_prev(unsigned i,unsigned size)851c6fdbd8SKent Overstreet static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
861c6fdbd8SKent Overstreet {
87ca1e02f7SKent Overstreet EYTZINGER_BUG_ON(i > size);
881c6fdbd8SKent Overstreet
8972492d55SKent Overstreet if (eytzinger1_left_child(i) <= size) {
901c6fdbd8SKent Overstreet i = eytzinger1_left_child(i) + 1;
911c6fdbd8SKent Overstreet
9272492d55SKent Overstreet i <<= __fls(size + 1) - __fls(i);
931c6fdbd8SKent Overstreet i -= 1;
9472492d55SKent Overstreet i >>= i > size;
951c6fdbd8SKent Overstreet } else {
961c6fdbd8SKent Overstreet i >>= __ffs(i) + 1;
971c6fdbd8SKent Overstreet }
981c6fdbd8SKent Overstreet
991c6fdbd8SKent Overstreet return i;
1001c6fdbd8SKent Overstreet }
1011c6fdbd8SKent Overstreet
eytzinger1_extra(unsigned size)1021c6fdbd8SKent Overstreet static inline unsigned eytzinger1_extra(unsigned size)
1031c6fdbd8SKent Overstreet {
1048ed58789SKent Overstreet return size
1058ed58789SKent Overstreet ? (size + 1 - rounddown_pow_of_two(size)) << 1
1068ed58789SKent Overstreet : 0;
1071c6fdbd8SKent Overstreet }
1081c6fdbd8SKent Overstreet
__eytzinger1_to_inorder(unsigned i,unsigned size,unsigned extra)1091c6fdbd8SKent Overstreet static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
1101c6fdbd8SKent Overstreet unsigned extra)
1111c6fdbd8SKent Overstreet {
1121c6fdbd8SKent Overstreet unsigned b = __fls(i);
11372492d55SKent Overstreet unsigned shift = __fls(size) - b;
1141c6fdbd8SKent Overstreet int s;
1151c6fdbd8SKent Overstreet
116ca1e02f7SKent Overstreet EYTZINGER_BUG_ON(!i || i > size);
1171c6fdbd8SKent Overstreet
1181c6fdbd8SKent Overstreet i ^= 1U << b;
1191c6fdbd8SKent Overstreet i <<= 1;
1201c6fdbd8SKent Overstreet i |= 1;
1211c6fdbd8SKent Overstreet i <<= shift;
1221c6fdbd8SKent Overstreet
1231c6fdbd8SKent Overstreet /*
1241c6fdbd8SKent Overstreet * sign bit trick:
1251c6fdbd8SKent Overstreet *
1261c6fdbd8SKent Overstreet * if (i > extra)
1271c6fdbd8SKent Overstreet * i -= (i - extra) >> 1;
1281c6fdbd8SKent Overstreet */
1291c6fdbd8SKent Overstreet s = extra - i;
1301c6fdbd8SKent Overstreet i += (s >> 1) & (s >> 31);
1311c6fdbd8SKent Overstreet
1321c6fdbd8SKent Overstreet return i;
1331c6fdbd8SKent Overstreet }
1341c6fdbd8SKent Overstreet
__inorder_to_eytzinger1(unsigned i,unsigned size,unsigned extra)1351c6fdbd8SKent Overstreet static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
1361c6fdbd8SKent Overstreet unsigned extra)
1371c6fdbd8SKent Overstreet {
1381c6fdbd8SKent Overstreet unsigned shift;
1391c6fdbd8SKent Overstreet int s;
1401c6fdbd8SKent Overstreet
141ca1e02f7SKent Overstreet EYTZINGER_BUG_ON(!i || i > size);
1421c6fdbd8SKent Overstreet
1431c6fdbd8SKent Overstreet /*
1441c6fdbd8SKent Overstreet * sign bit trick:
1451c6fdbd8SKent Overstreet *
1461c6fdbd8SKent Overstreet * if (i > extra)
1471c6fdbd8SKent Overstreet * i += i - extra;
1481c6fdbd8SKent Overstreet */
1491c6fdbd8SKent Overstreet s = extra - i;
1501c6fdbd8SKent Overstreet i -= s & (s >> 31);
1511c6fdbd8SKent Overstreet
1521c6fdbd8SKent Overstreet shift = __ffs(i);
1531c6fdbd8SKent Overstreet
1541c6fdbd8SKent Overstreet i >>= shift + 1;
15572492d55SKent Overstreet i |= 1U << (__fls(size) - shift);
1561c6fdbd8SKent Overstreet
1571c6fdbd8SKent Overstreet return i;
1581c6fdbd8SKent Overstreet }
1591c6fdbd8SKent Overstreet
eytzinger1_to_inorder(unsigned i,unsigned size)1601c6fdbd8SKent Overstreet static inline unsigned eytzinger1_to_inorder(unsigned i, unsigned size)
1611c6fdbd8SKent Overstreet {
1621c6fdbd8SKent Overstreet return __eytzinger1_to_inorder(i, size, eytzinger1_extra(size));
1631c6fdbd8SKent Overstreet }
1641c6fdbd8SKent Overstreet
inorder_to_eytzinger1(unsigned i,unsigned size)1651c6fdbd8SKent Overstreet static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
1661c6fdbd8SKent Overstreet {
1671c6fdbd8SKent Overstreet return __inorder_to_eytzinger1(i, size, eytzinger1_extra(size));
1681c6fdbd8SKent Overstreet }
1691c6fdbd8SKent Overstreet
1701c6fdbd8SKent Overstreet #define eytzinger1_for_each(_i, _size) \
1713fe8a186SKent Overstreet for (unsigned (_i) = eytzinger1_first((_size)); \
1721c6fdbd8SKent Overstreet (_i) != 0; \
1731c6fdbd8SKent Overstreet (_i) = eytzinger1_next((_i), (_size)))
1741c6fdbd8SKent Overstreet
1751c6fdbd8SKent Overstreet /* Zero based indexing version: */
1761c6fdbd8SKent Overstreet
eytzinger0_child(unsigned i,unsigned child)1771c6fdbd8SKent Overstreet static inline unsigned eytzinger0_child(unsigned i, unsigned child)
1781c6fdbd8SKent Overstreet {
179ca1e02f7SKent Overstreet EYTZINGER_BUG_ON(child > 1);
1801c6fdbd8SKent Overstreet
1811c6fdbd8SKent Overstreet return (i << 1) + 1 + child;
1821c6fdbd8SKent Overstreet }
1831c6fdbd8SKent Overstreet
eytzinger0_left_child(unsigned i)1841c6fdbd8SKent Overstreet static inline unsigned eytzinger0_left_child(unsigned i)
1851c6fdbd8SKent Overstreet {
1861c6fdbd8SKent Overstreet return eytzinger0_child(i, 0);
1871c6fdbd8SKent Overstreet }
1881c6fdbd8SKent Overstreet
eytzinger0_right_child(unsigned i)1891c6fdbd8SKent Overstreet static inline unsigned eytzinger0_right_child(unsigned i)
1901c6fdbd8SKent Overstreet {
1911c6fdbd8SKent Overstreet return eytzinger0_child(i, 1);
1921c6fdbd8SKent Overstreet }
1931c6fdbd8SKent Overstreet
eytzinger0_first(unsigned size)1941c6fdbd8SKent Overstreet static inline unsigned eytzinger0_first(unsigned size)
1951c6fdbd8SKent Overstreet {
19672492d55SKent Overstreet return eytzinger1_first(size) - 1;
1971c6fdbd8SKent Overstreet }
1981c6fdbd8SKent Overstreet
eytzinger0_last(unsigned size)1991c6fdbd8SKent Overstreet static inline unsigned eytzinger0_last(unsigned size)
2001c6fdbd8SKent Overstreet {
20172492d55SKent Overstreet return eytzinger1_last(size) - 1;
2021c6fdbd8SKent Overstreet }
2031c6fdbd8SKent Overstreet
eytzinger0_next(unsigned i,unsigned size)2041c6fdbd8SKent Overstreet static inline unsigned eytzinger0_next(unsigned i, unsigned size)
2051c6fdbd8SKent Overstreet {
20672492d55SKent Overstreet return eytzinger1_next(i + 1, size) - 1;
2071c6fdbd8SKent Overstreet }
2081c6fdbd8SKent Overstreet
eytzinger0_prev(unsigned i,unsigned size)2091c6fdbd8SKent Overstreet static inline unsigned eytzinger0_prev(unsigned i, unsigned size)
2101c6fdbd8SKent Overstreet {
21172492d55SKent Overstreet return eytzinger1_prev(i + 1, size) - 1;
2121c6fdbd8SKent Overstreet }
2131c6fdbd8SKent Overstreet
eytzinger0_extra(unsigned size)2141c6fdbd8SKent Overstreet static inline unsigned eytzinger0_extra(unsigned size)
2151c6fdbd8SKent Overstreet {
21672492d55SKent Overstreet return eytzinger1_extra(size);
2171c6fdbd8SKent Overstreet }
2181c6fdbd8SKent Overstreet
__eytzinger0_to_inorder(unsigned i,unsigned size,unsigned extra)2191c6fdbd8SKent Overstreet static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size,
2201c6fdbd8SKent Overstreet unsigned extra)
2211c6fdbd8SKent Overstreet {
22272492d55SKent Overstreet return __eytzinger1_to_inorder(i + 1, size, extra) - 1;
2231c6fdbd8SKent Overstreet }
2241c6fdbd8SKent Overstreet
__inorder_to_eytzinger0(unsigned i,unsigned size,unsigned extra)2251c6fdbd8SKent Overstreet static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size,
2261c6fdbd8SKent Overstreet unsigned extra)
2271c6fdbd8SKent Overstreet {
22872492d55SKent Overstreet return __inorder_to_eytzinger1(i + 1, size, extra) - 1;
2291c6fdbd8SKent Overstreet }
2301c6fdbd8SKent Overstreet
eytzinger0_to_inorder(unsigned i,unsigned size)2311c6fdbd8SKent Overstreet static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size)
2321c6fdbd8SKent Overstreet {
2331c6fdbd8SKent Overstreet return __eytzinger0_to_inorder(i, size, eytzinger0_extra(size));
2341c6fdbd8SKent Overstreet }
2351c6fdbd8SKent Overstreet
inorder_to_eytzinger0(unsigned i,unsigned size)2361c6fdbd8SKent Overstreet static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
2371c6fdbd8SKent Overstreet {
2381c6fdbd8SKent Overstreet return __inorder_to_eytzinger0(i, size, eytzinger0_extra(size));
2391c6fdbd8SKent Overstreet }
2401c6fdbd8SKent Overstreet
2411c6fdbd8SKent Overstreet #define eytzinger0_for_each(_i, _size) \
2423fe8a186SKent Overstreet for (unsigned (_i) = eytzinger0_first((_size)); \
2431c6fdbd8SKent Overstreet (_i) != -1; \
2441c6fdbd8SKent Overstreet (_i) = eytzinger0_next((_i), (_size)))
2451c6fdbd8SKent Overstreet
2461c6fdbd8SKent Overstreet /* return greatest node <= @search, or -1 if not found */
eytzinger0_find_le(void * base,size_t nr,size_t size,cmp_func_t cmp,const void * search)2479c432404SKent Overstreet static inline int eytzinger0_find_le(void *base, size_t nr, size_t size,
248ca1e02f7SKent Overstreet cmp_func_t cmp, const void *search)
2491c6fdbd8SKent Overstreet {
2501c6fdbd8SKent Overstreet unsigned i, n = 0;
2511c6fdbd8SKent Overstreet
2521c6fdbd8SKent Overstreet if (!nr)
2531c6fdbd8SKent Overstreet return -1;
2541c6fdbd8SKent Overstreet
2551c6fdbd8SKent Overstreet do {
2561c6fdbd8SKent Overstreet i = n;
257ca1e02f7SKent Overstreet n = eytzinger0_child(i, cmp(base + i * size, search) <= 0);
2581c6fdbd8SKent Overstreet } while (n < nr);
2591c6fdbd8SKent Overstreet
2601c6fdbd8SKent Overstreet if (n & 1) {
2619c432404SKent Overstreet /*
2629c432404SKent Overstreet * @i was greater than @search, return previous node:
2639c432404SKent Overstreet *
2649c432404SKent Overstreet * if @i was leftmost/smallest element,
2659c432404SKent Overstreet * eytzinger0_prev(eytzinger0_first())) returns -1, as expected
2669c432404SKent Overstreet */
2671c6fdbd8SKent Overstreet return eytzinger0_prev(i, nr);
2681c6fdbd8SKent Overstreet } else {
2691c6fdbd8SKent Overstreet return i;
2701c6fdbd8SKent Overstreet }
2711c6fdbd8SKent Overstreet }
2721c6fdbd8SKent Overstreet
eytzinger0_find_gt(void * base,size_t nr,size_t size,cmp_func_t cmp,const void * search)2739c432404SKent Overstreet static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size,
274ca1e02f7SKent Overstreet cmp_func_t cmp, const void *search)
275ca1e02f7SKent Overstreet {
276ca1e02f7SKent Overstreet ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
2779c432404SKent Overstreet
2789c432404SKent Overstreet /*
2799c432404SKent Overstreet * if eytitzinger0_find_le() returned -1 - no element was <= search - we
2809c432404SKent Overstreet * want to return the first element; next/prev identities mean this work
2819c432404SKent Overstreet * as expected
2829c432404SKent Overstreet *
2839c432404SKent Overstreet * similarly if find_le() returns last element, we should return -1;
2849c432404SKent Overstreet * identities mean this all works out:
2859c432404SKent Overstreet */
2869c432404SKent Overstreet return eytzinger0_next(idx, nr);
287ca1e02f7SKent Overstreet }
288ca1e02f7SKent Overstreet
eytzinger0_find_ge(void * base,size_t nr,size_t size,cmp_func_t cmp,const void * search)289*820b9efeSKent Overstreet static inline int eytzinger0_find_ge(void *base, size_t nr, size_t size,
290*820b9efeSKent Overstreet cmp_func_t cmp, const void *search)
291*820b9efeSKent Overstreet {
292*820b9efeSKent Overstreet ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
293*820b9efeSKent Overstreet
294*820b9efeSKent Overstreet if (idx < nr && !cmp(base + idx * size, search))
295*820b9efeSKent Overstreet return idx;
296*820b9efeSKent Overstreet
297*820b9efeSKent Overstreet return eytzinger0_next(idx, nr);
298*820b9efeSKent Overstreet }
299*820b9efeSKent Overstreet
3007ef2a73aSKent Overstreet #define eytzinger0_find(base, nr, size, _cmp, search) \
3017ef2a73aSKent Overstreet ({ \
3027ef2a73aSKent Overstreet void *_base = (base); \
303169de419SKent Overstreet const void *_search = (search); \
3047ef2a73aSKent Overstreet size_t _nr = (nr); \
3057ef2a73aSKent Overstreet size_t _size = (size); \
3067ef2a73aSKent Overstreet size_t _i = 0; \
3077ef2a73aSKent Overstreet int _res; \
3087ef2a73aSKent Overstreet \
3097ef2a73aSKent Overstreet while (_i < _nr && \
310ca1e02f7SKent Overstreet (_res = _cmp(_search, _base + _i * _size))) \
3117ef2a73aSKent Overstreet _i = eytzinger0_child(_i, _res > 0); \
3127ef2a73aSKent Overstreet _i; \
3137ef2a73aSKent Overstreet })
3141c6fdbd8SKent Overstreet
315ca1e02f7SKent Overstreet void eytzinger0_sort_r(void *, size_t, size_t,
316ca1e02f7SKent Overstreet cmp_r_func_t, swap_r_func_t, const void *);
317ca1e02f7SKent Overstreet void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t);
3181c6fdbd8SKent Overstreet
3191c6fdbd8SKent Overstreet #endif /* _EYTZINGER_H */
320