xref: /linux/fs/bcachefs/eytzinger.h (revision a1ff5a7d78a036d6c2178ee5acd6ba4946243800)
11c6fdbd8SKent Overstreet /* SPDX-License-Identifier: GPL-2.0 */
21c6fdbd8SKent Overstreet #ifndef _EYTZINGER_H
31c6fdbd8SKent Overstreet #define _EYTZINGER_H
41c6fdbd8SKent Overstreet 
51c6fdbd8SKent Overstreet #include <linux/bitops.h>
61c6fdbd8SKent Overstreet #include <linux/log2.h>
71c6fdbd8SKent Overstreet 
8ca1e02f7SKent Overstreet #ifdef EYTZINGER_DEBUG
9ca1e02f7SKent Overstreet #define EYTZINGER_BUG_ON(cond)		BUG_ON(cond)
10ca1e02f7SKent Overstreet #else
11ca1e02f7SKent Overstreet #define EYTZINGER_BUG_ON(cond)
12ca1e02f7SKent Overstreet #endif
131c6fdbd8SKent Overstreet 
141c6fdbd8SKent Overstreet /*
151c6fdbd8SKent Overstreet  * Traversal for trees in eytzinger layout - a full binary tree layed out in an
16ca1e02f7SKent Overstreet  * array.
171c6fdbd8SKent Overstreet  *
18ca1e02f7SKent Overstreet  * Consider using an eytzinger tree any time you would otherwise be doing binary
19ca1e02f7SKent Overstreet  * search over an array. Binary search is a worst case scenario for branch
20ca1e02f7SKent Overstreet  * prediction and prefetching, but in an eytzinger tree every node's children
21ca1e02f7SKent Overstreet  * are adjacent in memory, thus we can prefetch children before knowing the
22ca1e02f7SKent Overstreet  * result of the comparison, assuming multiple nodes fit on a cacheline.
23ca1e02f7SKent Overstreet  *
24ca1e02f7SKent Overstreet  * Two variants are provided, for one based indexing and zero based indexing.
25ca1e02f7SKent Overstreet  *
26ca1e02f7SKent Overstreet  * Zero based indexing is more convenient, but one based indexing has better
27ca1e02f7SKent Overstreet  * alignment and thus better performance because each new level of the tree
28ca1e02f7SKent Overstreet  * starts at a power of two, and thus if element 0 was cacheline aligned, each
29ca1e02f7SKent Overstreet  * new level will be as well.
301c6fdbd8SKent Overstreet  */
311c6fdbd8SKent Overstreet 
eytzinger1_child(unsigned i,unsigned child)321c6fdbd8SKent Overstreet static inline unsigned eytzinger1_child(unsigned i, unsigned child)
331c6fdbd8SKent Overstreet {
34ca1e02f7SKent Overstreet 	EYTZINGER_BUG_ON(child > 1);
351c6fdbd8SKent Overstreet 
361c6fdbd8SKent Overstreet 	return (i << 1) + child;
371c6fdbd8SKent Overstreet }
381c6fdbd8SKent Overstreet 
eytzinger1_left_child(unsigned i)391c6fdbd8SKent Overstreet static inline unsigned eytzinger1_left_child(unsigned i)
401c6fdbd8SKent Overstreet {
411c6fdbd8SKent Overstreet 	return eytzinger1_child(i, 0);
421c6fdbd8SKent Overstreet }
431c6fdbd8SKent Overstreet 
eytzinger1_right_child(unsigned i)441c6fdbd8SKent Overstreet static inline unsigned eytzinger1_right_child(unsigned i)
451c6fdbd8SKent Overstreet {
461c6fdbd8SKent Overstreet 	return eytzinger1_child(i, 1);
471c6fdbd8SKent Overstreet }
481c6fdbd8SKent Overstreet 
eytzinger1_first(unsigned size)491c6fdbd8SKent Overstreet static inline unsigned eytzinger1_first(unsigned size)
501c6fdbd8SKent Overstreet {
518ed58789SKent Overstreet 	return size ? rounddown_pow_of_two(size) : 0;
521c6fdbd8SKent Overstreet }
531c6fdbd8SKent Overstreet 
eytzinger1_last(unsigned size)541c6fdbd8SKent Overstreet static inline unsigned eytzinger1_last(unsigned size)
551c6fdbd8SKent Overstreet {
5672492d55SKent Overstreet 	return rounddown_pow_of_two(size + 1) - 1;
571c6fdbd8SKent Overstreet }
581c6fdbd8SKent Overstreet 
591c6fdbd8SKent Overstreet /*
601c6fdbd8SKent Overstreet  * eytzinger1_next() and eytzinger1_prev() have the nice properties that
611c6fdbd8SKent Overstreet  *
621c6fdbd8SKent Overstreet  * eytzinger1_next(0) == eytzinger1_first())
631c6fdbd8SKent Overstreet  * eytzinger1_prev(0) == eytzinger1_last())
641c6fdbd8SKent Overstreet  *
651c6fdbd8SKent Overstreet  * eytzinger1_prev(eytzinger1_first()) == 0
661c6fdbd8SKent Overstreet  * eytzinger1_next(eytzinger1_last()) == 0
671c6fdbd8SKent Overstreet  */
681c6fdbd8SKent Overstreet 
eytzinger1_next(unsigned i,unsigned size)691c6fdbd8SKent Overstreet static inline unsigned eytzinger1_next(unsigned i, unsigned size)
701c6fdbd8SKent Overstreet {
71ca1e02f7SKent Overstreet 	EYTZINGER_BUG_ON(i > size);
721c6fdbd8SKent Overstreet 
7372492d55SKent Overstreet 	if (eytzinger1_right_child(i) <= size) {
741c6fdbd8SKent Overstreet 		i = eytzinger1_right_child(i);
751c6fdbd8SKent Overstreet 
7672492d55SKent Overstreet 		i <<= __fls(size + 1) - __fls(i);
7772492d55SKent Overstreet 		i >>= i > size;
781c6fdbd8SKent Overstreet 	} else {
791c6fdbd8SKent Overstreet 		i >>= ffz(i) + 1;
801c6fdbd8SKent Overstreet 	}
811c6fdbd8SKent Overstreet 
821c6fdbd8SKent Overstreet 	return i;
831c6fdbd8SKent Overstreet }
841c6fdbd8SKent Overstreet 
eytzinger1_prev(unsigned i,unsigned size)851c6fdbd8SKent Overstreet static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
861c6fdbd8SKent Overstreet {
87ca1e02f7SKent Overstreet 	EYTZINGER_BUG_ON(i > size);
881c6fdbd8SKent Overstreet 
8972492d55SKent Overstreet 	if (eytzinger1_left_child(i) <= size) {
901c6fdbd8SKent Overstreet 		i = eytzinger1_left_child(i) + 1;
911c6fdbd8SKent Overstreet 
9272492d55SKent Overstreet 		i <<= __fls(size + 1) - __fls(i);
931c6fdbd8SKent Overstreet 		i -= 1;
9472492d55SKent Overstreet 		i >>= i > size;
951c6fdbd8SKent Overstreet 	} else {
961c6fdbd8SKent Overstreet 		i >>= __ffs(i) + 1;
971c6fdbd8SKent Overstreet 	}
981c6fdbd8SKent Overstreet 
991c6fdbd8SKent Overstreet 	return i;
1001c6fdbd8SKent Overstreet }
1011c6fdbd8SKent Overstreet 
eytzinger1_extra(unsigned size)1021c6fdbd8SKent Overstreet static inline unsigned eytzinger1_extra(unsigned size)
1031c6fdbd8SKent Overstreet {
1048ed58789SKent Overstreet 	return size
1058ed58789SKent Overstreet 		? (size + 1 - rounddown_pow_of_two(size)) << 1
1068ed58789SKent Overstreet 		: 0;
1071c6fdbd8SKent Overstreet }
1081c6fdbd8SKent Overstreet 
__eytzinger1_to_inorder(unsigned i,unsigned size,unsigned extra)1091c6fdbd8SKent Overstreet static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
1101c6fdbd8SKent Overstreet 					      unsigned extra)
1111c6fdbd8SKent Overstreet {
1121c6fdbd8SKent Overstreet 	unsigned b = __fls(i);
11372492d55SKent Overstreet 	unsigned shift = __fls(size) - b;
1141c6fdbd8SKent Overstreet 	int s;
1151c6fdbd8SKent Overstreet 
116ca1e02f7SKent Overstreet 	EYTZINGER_BUG_ON(!i || i > size);
1171c6fdbd8SKent Overstreet 
1181c6fdbd8SKent Overstreet 	i  ^= 1U << b;
1191c6fdbd8SKent Overstreet 	i <<= 1;
1201c6fdbd8SKent Overstreet 	i  |= 1;
1211c6fdbd8SKent Overstreet 	i <<= shift;
1221c6fdbd8SKent Overstreet 
1231c6fdbd8SKent Overstreet 	/*
1241c6fdbd8SKent Overstreet 	 * sign bit trick:
1251c6fdbd8SKent Overstreet 	 *
1261c6fdbd8SKent Overstreet 	 * if (i > extra)
1271c6fdbd8SKent Overstreet 	 *	i -= (i - extra) >> 1;
1281c6fdbd8SKent Overstreet 	 */
1291c6fdbd8SKent Overstreet 	s = extra - i;
1301c6fdbd8SKent Overstreet 	i += (s >> 1) & (s >> 31);
1311c6fdbd8SKent Overstreet 
1321c6fdbd8SKent Overstreet 	return i;
1331c6fdbd8SKent Overstreet }
1341c6fdbd8SKent Overstreet 
__inorder_to_eytzinger1(unsigned i,unsigned size,unsigned extra)1351c6fdbd8SKent Overstreet static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
1361c6fdbd8SKent Overstreet 					       unsigned extra)
1371c6fdbd8SKent Overstreet {
1381c6fdbd8SKent Overstreet 	unsigned shift;
1391c6fdbd8SKent Overstreet 	int s;
1401c6fdbd8SKent Overstreet 
141ca1e02f7SKent Overstreet 	EYTZINGER_BUG_ON(!i || i > size);
1421c6fdbd8SKent Overstreet 
1431c6fdbd8SKent Overstreet 	/*
1441c6fdbd8SKent Overstreet 	 * sign bit trick:
1451c6fdbd8SKent Overstreet 	 *
1461c6fdbd8SKent Overstreet 	 * if (i > extra)
1471c6fdbd8SKent Overstreet 	 *	i += i - extra;
1481c6fdbd8SKent Overstreet 	 */
1491c6fdbd8SKent Overstreet 	s = extra - i;
1501c6fdbd8SKent Overstreet 	i -= s & (s >> 31);
1511c6fdbd8SKent Overstreet 
1521c6fdbd8SKent Overstreet 	shift = __ffs(i);
1531c6fdbd8SKent Overstreet 
1541c6fdbd8SKent Overstreet 	i >>= shift + 1;
15572492d55SKent Overstreet 	i  |= 1U << (__fls(size) - shift);
1561c6fdbd8SKent Overstreet 
1571c6fdbd8SKent Overstreet 	return i;
1581c6fdbd8SKent Overstreet }
1591c6fdbd8SKent Overstreet 
eytzinger1_to_inorder(unsigned i,unsigned size)1601c6fdbd8SKent Overstreet static inline unsigned eytzinger1_to_inorder(unsigned i, unsigned size)
1611c6fdbd8SKent Overstreet {
1621c6fdbd8SKent Overstreet 	return __eytzinger1_to_inorder(i, size, eytzinger1_extra(size));
1631c6fdbd8SKent Overstreet }
1641c6fdbd8SKent Overstreet 
inorder_to_eytzinger1(unsigned i,unsigned size)1651c6fdbd8SKent Overstreet static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
1661c6fdbd8SKent Overstreet {
1671c6fdbd8SKent Overstreet 	return __inorder_to_eytzinger1(i, size, eytzinger1_extra(size));
1681c6fdbd8SKent Overstreet }
1691c6fdbd8SKent Overstreet 
1701c6fdbd8SKent Overstreet #define eytzinger1_for_each(_i, _size)			\
1713fe8a186SKent Overstreet 	for (unsigned (_i) = eytzinger1_first((_size));	\
1721c6fdbd8SKent Overstreet 	     (_i) != 0;					\
1731c6fdbd8SKent Overstreet 	     (_i) = eytzinger1_next((_i), (_size)))
1741c6fdbd8SKent Overstreet 
1751c6fdbd8SKent Overstreet /* Zero based indexing version: */
1761c6fdbd8SKent Overstreet 
eytzinger0_child(unsigned i,unsigned child)1771c6fdbd8SKent Overstreet static inline unsigned eytzinger0_child(unsigned i, unsigned child)
1781c6fdbd8SKent Overstreet {
179ca1e02f7SKent Overstreet 	EYTZINGER_BUG_ON(child > 1);
1801c6fdbd8SKent Overstreet 
1811c6fdbd8SKent Overstreet 	return (i << 1) + 1 + child;
1821c6fdbd8SKent Overstreet }
1831c6fdbd8SKent Overstreet 
eytzinger0_left_child(unsigned i)1841c6fdbd8SKent Overstreet static inline unsigned eytzinger0_left_child(unsigned i)
1851c6fdbd8SKent Overstreet {
1861c6fdbd8SKent Overstreet 	return eytzinger0_child(i, 0);
1871c6fdbd8SKent Overstreet }
1881c6fdbd8SKent Overstreet 
eytzinger0_right_child(unsigned i)1891c6fdbd8SKent Overstreet static inline unsigned eytzinger0_right_child(unsigned i)
1901c6fdbd8SKent Overstreet {
1911c6fdbd8SKent Overstreet 	return eytzinger0_child(i, 1);
1921c6fdbd8SKent Overstreet }
1931c6fdbd8SKent Overstreet 
eytzinger0_first(unsigned size)1941c6fdbd8SKent Overstreet static inline unsigned eytzinger0_first(unsigned size)
1951c6fdbd8SKent Overstreet {
19672492d55SKent Overstreet 	return eytzinger1_first(size) - 1;
1971c6fdbd8SKent Overstreet }
1981c6fdbd8SKent Overstreet 
eytzinger0_last(unsigned size)1991c6fdbd8SKent Overstreet static inline unsigned eytzinger0_last(unsigned size)
2001c6fdbd8SKent Overstreet {
20172492d55SKent Overstreet 	return eytzinger1_last(size) - 1;
2021c6fdbd8SKent Overstreet }
2031c6fdbd8SKent Overstreet 
eytzinger0_next(unsigned i,unsigned size)2041c6fdbd8SKent Overstreet static inline unsigned eytzinger0_next(unsigned i, unsigned size)
2051c6fdbd8SKent Overstreet {
20672492d55SKent Overstreet 	return eytzinger1_next(i + 1, size) - 1;
2071c6fdbd8SKent Overstreet }
2081c6fdbd8SKent Overstreet 
eytzinger0_prev(unsigned i,unsigned size)2091c6fdbd8SKent Overstreet static inline unsigned eytzinger0_prev(unsigned i, unsigned size)
2101c6fdbd8SKent Overstreet {
21172492d55SKent Overstreet 	return eytzinger1_prev(i + 1, size) - 1;
2121c6fdbd8SKent Overstreet }
2131c6fdbd8SKent Overstreet 
eytzinger0_extra(unsigned size)2141c6fdbd8SKent Overstreet static inline unsigned eytzinger0_extra(unsigned size)
2151c6fdbd8SKent Overstreet {
21672492d55SKent Overstreet 	return eytzinger1_extra(size);
2171c6fdbd8SKent Overstreet }
2181c6fdbd8SKent Overstreet 
__eytzinger0_to_inorder(unsigned i,unsigned size,unsigned extra)2191c6fdbd8SKent Overstreet static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size,
2201c6fdbd8SKent Overstreet 					       unsigned extra)
2211c6fdbd8SKent Overstreet {
22272492d55SKent Overstreet 	return __eytzinger1_to_inorder(i + 1, size, extra) - 1;
2231c6fdbd8SKent Overstreet }
2241c6fdbd8SKent Overstreet 
__inorder_to_eytzinger0(unsigned i,unsigned size,unsigned extra)2251c6fdbd8SKent Overstreet static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size,
2261c6fdbd8SKent Overstreet 					       unsigned extra)
2271c6fdbd8SKent Overstreet {
22872492d55SKent Overstreet 	return __inorder_to_eytzinger1(i + 1, size, extra) - 1;
2291c6fdbd8SKent Overstreet }
2301c6fdbd8SKent Overstreet 
eytzinger0_to_inorder(unsigned i,unsigned size)2311c6fdbd8SKent Overstreet static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size)
2321c6fdbd8SKent Overstreet {
2331c6fdbd8SKent Overstreet 	return __eytzinger0_to_inorder(i, size, eytzinger0_extra(size));
2341c6fdbd8SKent Overstreet }
2351c6fdbd8SKent Overstreet 
inorder_to_eytzinger0(unsigned i,unsigned size)2361c6fdbd8SKent Overstreet static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
2371c6fdbd8SKent Overstreet {
2381c6fdbd8SKent Overstreet 	return __inorder_to_eytzinger0(i, size, eytzinger0_extra(size));
2391c6fdbd8SKent Overstreet }
2401c6fdbd8SKent Overstreet 
2411c6fdbd8SKent Overstreet #define eytzinger0_for_each(_i, _size)			\
2423fe8a186SKent Overstreet 	for (unsigned (_i) = eytzinger0_first((_size));	\
2431c6fdbd8SKent Overstreet 	     (_i) != -1;				\
2441c6fdbd8SKent Overstreet 	     (_i) = eytzinger0_next((_i), (_size)))
2451c6fdbd8SKent Overstreet 
2461c6fdbd8SKent Overstreet /* return greatest node <= @search, or -1 if not found */
eytzinger0_find_le(void * base,size_t nr,size_t size,cmp_func_t cmp,const void * search)2479c432404SKent Overstreet static inline int eytzinger0_find_le(void *base, size_t nr, size_t size,
248ca1e02f7SKent Overstreet 				     cmp_func_t cmp, const void *search)
2491c6fdbd8SKent Overstreet {
2501c6fdbd8SKent Overstreet 	unsigned i, n = 0;
2511c6fdbd8SKent Overstreet 
2521c6fdbd8SKent Overstreet 	if (!nr)
2531c6fdbd8SKent Overstreet 		return -1;
2541c6fdbd8SKent Overstreet 
2551c6fdbd8SKent Overstreet 	do {
2561c6fdbd8SKent Overstreet 		i = n;
257ca1e02f7SKent Overstreet 		n = eytzinger0_child(i, cmp(base + i * size, search) <= 0);
2581c6fdbd8SKent Overstreet 	} while (n < nr);
2591c6fdbd8SKent Overstreet 
2601c6fdbd8SKent Overstreet 	if (n & 1) {
2619c432404SKent Overstreet 		/*
2629c432404SKent Overstreet 		 * @i was greater than @search, return previous node:
2639c432404SKent Overstreet 		 *
2649c432404SKent Overstreet 		 * if @i was leftmost/smallest element,
2659c432404SKent Overstreet 		 * eytzinger0_prev(eytzinger0_first())) returns -1, as expected
2669c432404SKent Overstreet 		 */
2671c6fdbd8SKent Overstreet 		return eytzinger0_prev(i, nr);
2681c6fdbd8SKent Overstreet 	} else {
2691c6fdbd8SKent Overstreet 		return i;
2701c6fdbd8SKent Overstreet 	}
2711c6fdbd8SKent Overstreet }
2721c6fdbd8SKent Overstreet 
eytzinger0_find_gt(void * base,size_t nr,size_t size,cmp_func_t cmp,const void * search)2739c432404SKent Overstreet static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size,
274ca1e02f7SKent Overstreet 				     cmp_func_t cmp, const void *search)
275ca1e02f7SKent Overstreet {
276ca1e02f7SKent Overstreet 	ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
2779c432404SKent Overstreet 
2789c432404SKent Overstreet 	/*
2799c432404SKent Overstreet 	 * if eytitzinger0_find_le() returned -1 - no element was <= search - we
2809c432404SKent Overstreet 	 * want to return the first element; next/prev identities mean this work
2819c432404SKent Overstreet 	 * as expected
2829c432404SKent Overstreet 	 *
2839c432404SKent Overstreet 	 * similarly if find_le() returns last element, we should return -1;
2849c432404SKent Overstreet 	 * identities mean this all works out:
2859c432404SKent Overstreet 	 */
2869c432404SKent Overstreet 	return eytzinger0_next(idx, nr);
287ca1e02f7SKent Overstreet }
288ca1e02f7SKent Overstreet 
eytzinger0_find_ge(void * base,size_t nr,size_t size,cmp_func_t cmp,const void * search)289*820b9efeSKent Overstreet static inline int eytzinger0_find_ge(void *base, size_t nr, size_t size,
290*820b9efeSKent Overstreet 				     cmp_func_t cmp, const void *search)
291*820b9efeSKent Overstreet {
292*820b9efeSKent Overstreet 	ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
293*820b9efeSKent Overstreet 
294*820b9efeSKent Overstreet 	if (idx < nr && !cmp(base + idx * size, search))
295*820b9efeSKent Overstreet 		return idx;
296*820b9efeSKent Overstreet 
297*820b9efeSKent Overstreet 	return eytzinger0_next(idx, nr);
298*820b9efeSKent Overstreet }
299*820b9efeSKent Overstreet 
3007ef2a73aSKent Overstreet #define eytzinger0_find(base, nr, size, _cmp, search)			\
3017ef2a73aSKent Overstreet ({									\
3027ef2a73aSKent Overstreet 	void *_base		= (base);				\
303169de419SKent Overstreet 	const void *_search	= (search);				\
3047ef2a73aSKent Overstreet 	size_t _nr		= (nr);					\
3057ef2a73aSKent Overstreet 	size_t _size		= (size);				\
3067ef2a73aSKent Overstreet 	size_t _i		= 0;					\
3077ef2a73aSKent Overstreet 	int _res;							\
3087ef2a73aSKent Overstreet 									\
3097ef2a73aSKent Overstreet 	while (_i < _nr &&						\
310ca1e02f7SKent Overstreet 	       (_res = _cmp(_search, _base + _i * _size)))		\
3117ef2a73aSKent Overstreet 		_i = eytzinger0_child(_i, _res > 0);			\
3127ef2a73aSKent Overstreet 	_i;								\
3137ef2a73aSKent Overstreet })
3141c6fdbd8SKent Overstreet 
315ca1e02f7SKent Overstreet void eytzinger0_sort_r(void *, size_t, size_t,
316ca1e02f7SKent Overstreet 		       cmp_r_func_t, swap_r_func_t, const void *);
317ca1e02f7SKent Overstreet void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t);
3181c6fdbd8SKent Overstreet 
3191c6fdbd8SKent Overstreet #endif /* _EYTZINGER_H */
320