xref: /linux/lib/maple_tree.c (revision 2c1ed907520c50326b8f604907a8478b27881a2e)
1  // SPDX-License-Identifier: GPL-2.0+
2  /*
3   * Maple Tree implementation
4   * Copyright (c) 2018-2022 Oracle Corporation
5   * Authors: Liam R. Howlett <Liam.Howlett@oracle.com>
6   *	    Matthew Wilcox <willy@infradead.org>
7   * Copyright (c) 2023 ByteDance
8   * Author: Peng Zhang <zhangpeng.00@bytedance.com>
9   */
10  
11  /*
12   * DOC: Interesting implementation details of the Maple Tree
13   *
14   * Each node type has a number of slots for entries and a number of slots for
15   * pivots.  In the case of dense nodes, the pivots are implied by the position
16   * and are simply the slot index + the minimum of the node.
17   *
18   * In regular B-Tree terms, pivots are called keys.  The term pivot is used to
19   * indicate that the tree is specifying ranges.  Pivots may appear in the
20   * subtree with an entry attached to the value whereas keys are unique to a
21   * specific position of a B-tree.  Pivot values are inclusive of the slot with
22   * the same index.
23   *
24   *
25   * The following illustrates the layout of a range64 nodes slots and pivots.
26   *
27   *
28   *  Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 |
29   *           ┬   ┬   ┬   ┬     ┬    ┬    ┬    ┬    ┬
30   *           │   │   │   │     │    │    │    │    └─ Implied maximum
31   *           │   │   │   │     │    │    │    └─ Pivot 14
32   *           │   │   │   │     │    │    └─ Pivot 13
33   *           │   │   │   │     │    └─ Pivot 12
34   *           │   │   │   │     └─ Pivot 11
35   *           │   │   │   └─ Pivot 2
36   *           │   │   └─ Pivot 1
37   *           │   └─ Pivot 0
38   *           └─  Implied minimum
39   *
40   * Slot contents:
41   *  Internal (non-leaf) nodes contain pointers to other nodes.
42   *  Leaf nodes contain entries.
43   *
44   * The location of interest is often referred to as an offset.  All offsets have
45   * a slot, but the last offset has an implied pivot from the node above (or
46   * UINT_MAX for the root node.
47   *
48   * Ranges complicate certain write activities.  When modifying any of
49   * the B-tree variants, it is known that one entry will either be added or
50   * deleted.  When modifying the Maple Tree, one store operation may overwrite
51   * the entire data set, or one half of the tree, or the middle half of the tree.
52   *
53   */
54  
55  
56  #include <linux/maple_tree.h>
57  #include <linux/xarray.h>
58  #include <linux/types.h>
59  #include <linux/export.h>
60  #include <linux/slab.h>
61  #include <linux/limits.h>
62  #include <asm/barrier.h>
63  
64  #define CREATE_TRACE_POINTS
65  #include <trace/events/maple_tree.h>
66  
67  /*
68   * Kernel pointer hashing renders much of the maple tree dump useless as tagged
69   * pointers get hashed to arbitrary values.
70   *
71   * If CONFIG_DEBUG_VM_MAPLE_TREE is set we are in a debug mode where it is
72   * permissible to bypass this. Otherwise remain cautious and retain the hashing.
73   *
74   * Userland doesn't know about %px so also use %p there.
75   */
76  #if defined(__KERNEL__) && defined(CONFIG_DEBUG_VM_MAPLE_TREE)
77  #define PTR_FMT "%px"
78  #else
79  #define PTR_FMT "%p"
80  #endif
81  
82  #define MA_ROOT_PARENT 1
83  
84  /*
85   * Maple state flags
86   * * MA_STATE_BULK		- Bulk insert mode
87   * * MA_STATE_REBALANCE		- Indicate a rebalance during bulk insert
88   * * MA_STATE_PREALLOC		- Preallocated nodes, WARN_ON allocation
89   */
90  #define MA_STATE_BULK		1
91  #define MA_STATE_REBALANCE	2
92  #define MA_STATE_PREALLOC	4
93  
94  #define ma_parent_ptr(x) ((struct maple_pnode *)(x))
95  #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT)
96  #define ma_mnode_ptr(x) ((struct maple_node *)(x))
97  #define ma_enode_ptr(x) ((struct maple_enode *)(x))
98  static struct kmem_cache *maple_node_cache;
99  
100  #ifdef CONFIG_DEBUG_MAPLE_TREE
101  static const unsigned long mt_max[] = {
102  	[maple_dense]		= MAPLE_NODE_SLOTS,
103  	[maple_leaf_64]		= ULONG_MAX,
104  	[maple_range_64]	= ULONG_MAX,
105  	[maple_arange_64]	= ULONG_MAX,
106  };
107  #define mt_node_max(x) mt_max[mte_node_type(x)]
108  #endif
109  
110  static const unsigned char mt_slots[] = {
111  	[maple_dense]		= MAPLE_NODE_SLOTS,
112  	[maple_leaf_64]		= MAPLE_RANGE64_SLOTS,
113  	[maple_range_64]	= MAPLE_RANGE64_SLOTS,
114  	[maple_arange_64]	= MAPLE_ARANGE64_SLOTS,
115  };
116  #define mt_slot_count(x) mt_slots[mte_node_type(x)]
117  
118  static const unsigned char mt_pivots[] = {
119  	[maple_dense]		= 0,
120  	[maple_leaf_64]		= MAPLE_RANGE64_SLOTS - 1,
121  	[maple_range_64]	= MAPLE_RANGE64_SLOTS - 1,
122  	[maple_arange_64]	= MAPLE_ARANGE64_SLOTS - 1,
123  };
124  #define mt_pivot_count(x) mt_pivots[mte_node_type(x)]
125  
126  static const unsigned char mt_min_slots[] = {
127  	[maple_dense]		= MAPLE_NODE_SLOTS / 2,
128  	[maple_leaf_64]		= (MAPLE_RANGE64_SLOTS / 2) - 2,
129  	[maple_range_64]	= (MAPLE_RANGE64_SLOTS / 2) - 2,
130  	[maple_arange_64]	= (MAPLE_ARANGE64_SLOTS / 2) - 1,
131  };
132  #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)]
133  
134  #define MAPLE_BIG_NODE_SLOTS	(MAPLE_RANGE64_SLOTS * 2 + 2)
135  #define MAPLE_BIG_NODE_GAPS	(MAPLE_ARANGE64_SLOTS * 2 + 1)
136  
137  struct maple_big_node {
138  	unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1];
139  	union {
140  		struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS];
141  		struct {
142  			unsigned long padding[MAPLE_BIG_NODE_GAPS];
143  			unsigned long gap[MAPLE_BIG_NODE_GAPS];
144  		};
145  	};
146  	unsigned char b_end;
147  	enum maple_type type;
148  };
149  
150  /*
151   * The maple_subtree_state is used to build a tree to replace a segment of an
152   * existing tree in a more atomic way.  Any walkers of the older tree will hit a
153   * dead node and restart on updates.
154   */
155  struct maple_subtree_state {
156  	struct ma_state *orig_l;	/* Original left side of subtree */
157  	struct ma_state *orig_r;	/* Original right side of subtree */
158  	struct ma_state *l;		/* New left side of subtree */
159  	struct ma_state *m;		/* New middle of subtree (rare) */
160  	struct ma_state *r;		/* New right side of subtree */
161  	struct ma_topiary *free;	/* nodes to be freed */
162  	struct ma_topiary *destroy;	/* Nodes to be destroyed (walked and freed) */
163  	struct maple_big_node *bn;
164  };
165  
166  #ifdef CONFIG_KASAN_STACK
167  /* Prevent mas_wr_bnode() from exceeding the stack frame limit */
168  #define noinline_for_kasan noinline_for_stack
169  #else
170  #define noinline_for_kasan inline
171  #endif
172  
173  /* Functions */
mt_alloc_one(gfp_t gfp)174  static inline struct maple_node *mt_alloc_one(gfp_t gfp)
175  {
176  	return kmem_cache_alloc(maple_node_cache, gfp);
177  }
178  
mt_alloc_bulk(gfp_t gfp,size_t size,void ** nodes)179  static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
180  {
181  	return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes);
182  }
183  
mt_free_one(struct maple_node * node)184  static inline void mt_free_one(struct maple_node *node)
185  {
186  	kmem_cache_free(maple_node_cache, node);
187  }
188  
mt_free_bulk(size_t size,void __rcu ** nodes)189  static inline void mt_free_bulk(size_t size, void __rcu **nodes)
190  {
191  	kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes);
192  }
193  
mt_free_rcu(struct rcu_head * head)194  static void mt_free_rcu(struct rcu_head *head)
195  {
196  	struct maple_node *node = container_of(head, struct maple_node, rcu);
197  
198  	kmem_cache_free(maple_node_cache, node);
199  }
200  
201  /*
202   * ma_free_rcu() - Use rcu callback to free a maple node
203   * @node: The node to free
204   *
205   * The maple tree uses the parent pointer to indicate this node is no longer in
206   * use and will be freed.
207   */
ma_free_rcu(struct maple_node * node)208  static void ma_free_rcu(struct maple_node *node)
209  {
210  	WARN_ON(node->parent != ma_parent_ptr(node));
211  	call_rcu(&node->rcu, mt_free_rcu);
212  }
213  
mas_set_height(struct ma_state * mas)214  static void mas_set_height(struct ma_state *mas)
215  {
216  	unsigned int new_flags = mas->tree->ma_flags;
217  
218  	new_flags &= ~MT_FLAGS_HEIGHT_MASK;
219  	MAS_BUG_ON(mas, mas->depth > MAPLE_HEIGHT_MAX);
220  	new_flags |= mas->depth << MT_FLAGS_HEIGHT_OFFSET;
221  	mas->tree->ma_flags = new_flags;
222  }
223  
mas_mt_height(struct ma_state * mas)224  static unsigned int mas_mt_height(struct ma_state *mas)
225  {
226  	return mt_height(mas->tree);
227  }
228  
mt_attr(struct maple_tree * mt)229  static inline unsigned int mt_attr(struct maple_tree *mt)
230  {
231  	return mt->ma_flags & ~MT_FLAGS_HEIGHT_MASK;
232  }
233  
mte_node_type(const struct maple_enode * entry)234  static __always_inline enum maple_type mte_node_type(
235  		const struct maple_enode *entry)
236  {
237  	return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) &
238  		MAPLE_NODE_TYPE_MASK;
239  }
240  
ma_is_dense(const enum maple_type type)241  static __always_inline bool ma_is_dense(const enum maple_type type)
242  {
243  	return type < maple_leaf_64;
244  }
245  
ma_is_leaf(const enum maple_type type)246  static __always_inline bool ma_is_leaf(const enum maple_type type)
247  {
248  	return type < maple_range_64;
249  }
250  
mte_is_leaf(const struct maple_enode * entry)251  static __always_inline bool mte_is_leaf(const struct maple_enode *entry)
252  {
253  	return ma_is_leaf(mte_node_type(entry));
254  }
255  
256  /*
257   * We also reserve values with the bottom two bits set to '10' which are
258   * below 4096
259   */
mt_is_reserved(const void * entry)260  static __always_inline bool mt_is_reserved(const void *entry)
261  {
262  	return ((unsigned long)entry < MAPLE_RESERVED_RANGE) &&
263  		xa_is_internal(entry);
264  }
265  
mas_set_err(struct ma_state * mas,long err)266  static __always_inline void mas_set_err(struct ma_state *mas, long err)
267  {
268  	mas->node = MA_ERROR(err);
269  	mas->status = ma_error;
270  }
271  
mas_is_ptr(const struct ma_state * mas)272  static __always_inline bool mas_is_ptr(const struct ma_state *mas)
273  {
274  	return mas->status == ma_root;
275  }
276  
mas_is_start(const struct ma_state * mas)277  static __always_inline bool mas_is_start(const struct ma_state *mas)
278  {
279  	return mas->status == ma_start;
280  }
281  
mas_is_none(const struct ma_state * mas)282  static __always_inline bool mas_is_none(const struct ma_state *mas)
283  {
284  	return mas->status == ma_none;
285  }
286  
mas_is_paused(const struct ma_state * mas)287  static __always_inline bool mas_is_paused(const struct ma_state *mas)
288  {
289  	return mas->status == ma_pause;
290  }
291  
mas_is_overflow(struct ma_state * mas)292  static __always_inline bool mas_is_overflow(struct ma_state *mas)
293  {
294  	return mas->status == ma_overflow;
295  }
296  
mas_is_underflow(struct ma_state * mas)297  static inline bool mas_is_underflow(struct ma_state *mas)
298  {
299  	return mas->status == ma_underflow;
300  }
301  
mte_to_node(const struct maple_enode * entry)302  static __always_inline struct maple_node *mte_to_node(
303  		const struct maple_enode *entry)
304  {
305  	return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK);
306  }
307  
308  /*
309   * mte_to_mat() - Convert a maple encoded node to a maple topiary node.
310   * @entry: The maple encoded node
311   *
312   * Return: a maple topiary pointer
313   */
mte_to_mat(const struct maple_enode * entry)314  static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry)
315  {
316  	return (struct maple_topiary *)
317  		((unsigned long)entry & ~MAPLE_NODE_MASK);
318  }
319  
320  /*
321   * mas_mn() - Get the maple state node.
322   * @mas: The maple state
323   *
324   * Return: the maple node (not encoded - bare pointer).
325   */
mas_mn(const struct ma_state * mas)326  static inline struct maple_node *mas_mn(const struct ma_state *mas)
327  {
328  	return mte_to_node(mas->node);
329  }
330  
331  /*
332   * mte_set_node_dead() - Set a maple encoded node as dead.
333   * @mn: The maple encoded node.
334   */
mte_set_node_dead(struct maple_enode * mn)335  static inline void mte_set_node_dead(struct maple_enode *mn)
336  {
337  	mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn));
338  	smp_wmb(); /* Needed for RCU */
339  }
340  
341  /* Bit 1 indicates the root is a node */
342  #define MAPLE_ROOT_NODE			0x02
343  /* maple_type stored bit 3-6 */
344  #define MAPLE_ENODE_TYPE_SHIFT		0x03
345  /* Bit 2 means a NULL somewhere below */
346  #define MAPLE_ENODE_NULL		0x04
347  
mt_mk_node(const struct maple_node * node,enum maple_type type)348  static inline struct maple_enode *mt_mk_node(const struct maple_node *node,
349  					     enum maple_type type)
350  {
351  	return (void *)((unsigned long)node |
352  			(type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL);
353  }
354  
mte_mk_root(const struct maple_enode * node)355  static inline void *mte_mk_root(const struct maple_enode *node)
356  {
357  	return (void *)((unsigned long)node | MAPLE_ROOT_NODE);
358  }
359  
mte_safe_root(const struct maple_enode * node)360  static inline void *mte_safe_root(const struct maple_enode *node)
361  {
362  	return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE);
363  }
364  
mte_set_full(const struct maple_enode * node)365  static inline void __maybe_unused *mte_set_full(const struct maple_enode *node)
366  {
367  	return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL);
368  }
369  
mte_clear_full(const struct maple_enode * node)370  static inline void __maybe_unused *mte_clear_full(const struct maple_enode *node)
371  {
372  	return (void *)((unsigned long)node | MAPLE_ENODE_NULL);
373  }
374  
mte_has_null(const struct maple_enode * node)375  static inline bool __maybe_unused mte_has_null(const struct maple_enode *node)
376  {
377  	return (unsigned long)node & MAPLE_ENODE_NULL;
378  }
379  
ma_is_root(struct maple_node * node)380  static __always_inline bool ma_is_root(struct maple_node *node)
381  {
382  	return ((unsigned long)node->parent & MA_ROOT_PARENT);
383  }
384  
mte_is_root(const struct maple_enode * node)385  static __always_inline bool mte_is_root(const struct maple_enode *node)
386  {
387  	return ma_is_root(mte_to_node(node));
388  }
389  
mas_is_root_limits(const struct ma_state * mas)390  static inline bool mas_is_root_limits(const struct ma_state *mas)
391  {
392  	return !mas->min && mas->max == ULONG_MAX;
393  }
394  
mt_is_alloc(struct maple_tree * mt)395  static __always_inline bool mt_is_alloc(struct maple_tree *mt)
396  {
397  	return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE);
398  }
399  
400  /*
401   * The Parent Pointer
402   * Excluding root, the parent pointer is 256B aligned like all other tree nodes.
403   * When storing a 32 or 64 bit values, the offset can fit into 5 bits.  The 16
404   * bit values need an extra bit to store the offset.  This extra bit comes from
405   * a reuse of the last bit in the node type.  This is possible by using bit 1 to
406   * indicate if bit 2 is part of the type or the slot.
407   *
408   * Note types:
409   *  0x??1 = Root
410   *  0x?00 = 16 bit nodes
411   *  0x010 = 32 bit nodes
412   *  0x110 = 64 bit nodes
413   *
414   * Slot size and alignment
415   *  0b??1 : Root
416   *  0b?00 : 16 bit values, type in 0-1, slot in 2-7
417   *  0b010 : 32 bit values, type in 0-2, slot in 3-7
418   *  0b110 : 64 bit values, type in 0-2, slot in 3-7
419   */
420  
421  #define MAPLE_PARENT_ROOT		0x01
422  
423  #define MAPLE_PARENT_SLOT_SHIFT		0x03
424  #define MAPLE_PARENT_SLOT_MASK		0xF8
425  
426  #define MAPLE_PARENT_16B_SLOT_SHIFT	0x02
427  #define MAPLE_PARENT_16B_SLOT_MASK	0xFC
428  
429  #define MAPLE_PARENT_RANGE64		0x06
430  #define MAPLE_PARENT_RANGE32		0x04
431  #define MAPLE_PARENT_NOT_RANGE16	0x02
432  
433  /*
434   * mte_parent_shift() - Get the parent shift for the slot storage.
435   * @parent: The parent pointer cast as an unsigned long
436   * Return: The shift into that pointer to the star to of the slot
437   */
mte_parent_shift(unsigned long parent)438  static inline unsigned long mte_parent_shift(unsigned long parent)
439  {
440  	/* Note bit 1 == 0 means 16B */
441  	if (likely(parent & MAPLE_PARENT_NOT_RANGE16))
442  		return MAPLE_PARENT_SLOT_SHIFT;
443  
444  	return MAPLE_PARENT_16B_SLOT_SHIFT;
445  }
446  
447  /*
448   * mte_parent_slot_mask() - Get the slot mask for the parent.
449   * @parent: The parent pointer cast as an unsigned long.
450   * Return: The slot mask for that parent.
451   */
mte_parent_slot_mask(unsigned long parent)452  static inline unsigned long mte_parent_slot_mask(unsigned long parent)
453  {
454  	/* Note bit 1 == 0 means 16B */
455  	if (likely(parent & MAPLE_PARENT_NOT_RANGE16))
456  		return MAPLE_PARENT_SLOT_MASK;
457  
458  	return MAPLE_PARENT_16B_SLOT_MASK;
459  }
460  
461  /*
462   * mas_parent_type() - Return the maple_type of the parent from the stored
463   * parent type.
464   * @mas: The maple state
465   * @enode: The maple_enode to extract the parent's enum
466   * Return: The node->parent maple_type
467   */
468  static inline
mas_parent_type(struct ma_state * mas,struct maple_enode * enode)469  enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode)
470  {
471  	unsigned long p_type;
472  
473  	p_type = (unsigned long)mte_to_node(enode)->parent;
474  	if (WARN_ON(p_type & MAPLE_PARENT_ROOT))
475  		return 0;
476  
477  	p_type &= MAPLE_NODE_MASK;
478  	p_type &= ~mte_parent_slot_mask(p_type);
479  	switch (p_type) {
480  	case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */
481  		if (mt_is_alloc(mas->tree))
482  			return maple_arange_64;
483  		return maple_range_64;
484  	}
485  
486  	return 0;
487  }
488  
489  /*
490   * mas_set_parent() - Set the parent node and encode the slot
491   * @mas: The maple state
492   * @enode: The encoded maple node.
493   * @parent: The encoded maple node that is the parent of @enode.
494   * @slot: The slot that @enode resides in @parent.
495   *
496   * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the
497   * parent type.
498   */
499  static inline
mas_set_parent(struct ma_state * mas,struct maple_enode * enode,const struct maple_enode * parent,unsigned char slot)500  void mas_set_parent(struct ma_state *mas, struct maple_enode *enode,
501  		    const struct maple_enode *parent, unsigned char slot)
502  {
503  	unsigned long val = (unsigned long)parent;
504  	unsigned long shift;
505  	unsigned long type;
506  	enum maple_type p_type = mte_node_type(parent);
507  
508  	MAS_BUG_ON(mas, p_type == maple_dense);
509  	MAS_BUG_ON(mas, p_type == maple_leaf_64);
510  
511  	switch (p_type) {
512  	case maple_range_64:
513  	case maple_arange_64:
514  		shift = MAPLE_PARENT_SLOT_SHIFT;
515  		type = MAPLE_PARENT_RANGE64;
516  		break;
517  	default:
518  	case maple_dense:
519  	case maple_leaf_64:
520  		shift = type = 0;
521  		break;
522  	}
523  
524  	val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */
525  	val |= (slot << shift) | type;
526  	mte_to_node(enode)->parent = ma_parent_ptr(val);
527  }
528  
529  /*
530   * mte_parent_slot() - get the parent slot of @enode.
531   * @enode: The encoded maple node.
532   *
533   * Return: The slot in the parent node where @enode resides.
534   */
535  static __always_inline
mte_parent_slot(const struct maple_enode * enode)536  unsigned int mte_parent_slot(const struct maple_enode *enode)
537  {
538  	unsigned long val = (unsigned long)mte_to_node(enode)->parent;
539  
540  	if (unlikely(val & MA_ROOT_PARENT))
541  		return 0;
542  
543  	/*
544  	 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost
545  	 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT
546  	 */
547  	return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val);
548  }
549  
550  /*
551   * mte_parent() - Get the parent of @node.
552   * @enode: The encoded maple node.
553   *
554   * Return: The parent maple node.
555   */
556  static __always_inline
mte_parent(const struct maple_enode * enode)557  struct maple_node *mte_parent(const struct maple_enode *enode)
558  {
559  	return (void *)((unsigned long)
560  			(mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK);
561  }
562  
563  /*
564   * ma_dead_node() - check if the @enode is dead.
565   * @enode: The encoded maple node
566   *
567   * Return: true if dead, false otherwise.
568   */
ma_dead_node(const struct maple_node * node)569  static __always_inline bool ma_dead_node(const struct maple_node *node)
570  {
571  	struct maple_node *parent;
572  
573  	/* Do not reorder reads from the node prior to the parent check */
574  	smp_rmb();
575  	parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK);
576  	return (parent == node);
577  }
578  
579  /*
580   * mte_dead_node() - check if the @enode is dead.
581   * @enode: The encoded maple node
582   *
583   * Return: true if dead, false otherwise.
584   */
mte_dead_node(const struct maple_enode * enode)585  static __always_inline bool mte_dead_node(const struct maple_enode *enode)
586  {
587  	struct maple_node *parent, *node;
588  
589  	node = mte_to_node(enode);
590  	/* Do not reorder reads from the node prior to the parent check */
591  	smp_rmb();
592  	parent = mte_parent(enode);
593  	return (parent == node);
594  }
595  
596  /*
597   * mas_allocated() - Get the number of nodes allocated in a maple state.
598   * @mas: The maple state
599   *
600   * The ma_state alloc member is overloaded to hold a pointer to the first
601   * allocated node or to the number of requested nodes to allocate.  If bit 0 is
602   * set, then the alloc contains the number of requested nodes.  If there is an
603   * allocated node, then the total allocated nodes is in that node.
604   *
605   * Return: The total number of nodes allocated
606   */
mas_allocated(const struct ma_state * mas)607  static inline unsigned long mas_allocated(const struct ma_state *mas)
608  {
609  	if (!mas->alloc || ((unsigned long)mas->alloc & 0x1))
610  		return 0;
611  
612  	return mas->alloc->total;
613  }
614  
615  /*
616   * mas_set_alloc_req() - Set the requested number of allocations.
617   * @mas: the maple state
618   * @count: the number of allocations.
619   *
620   * The requested number of allocations is either in the first allocated node,
621   * located in @mas->alloc->request_count, or directly in @mas->alloc if there is
622   * no allocated node.  Set the request either in the node or do the necessary
623   * encoding to store in @mas->alloc directly.
624   */
mas_set_alloc_req(struct ma_state * mas,unsigned long count)625  static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count)
626  {
627  	if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) {
628  		if (!count)
629  			mas->alloc = NULL;
630  		else
631  			mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U);
632  		return;
633  	}
634  
635  	mas->alloc->request_count = count;
636  }
637  
638  /*
639   * mas_alloc_req() - get the requested number of allocations.
640   * @mas: The maple state
641   *
642   * The alloc count is either stored directly in @mas, or in
643   * @mas->alloc->request_count if there is at least one node allocated.  Decode
644   * the request count if it's stored directly in @mas->alloc.
645   *
646   * Return: The allocation request count.
647   */
mas_alloc_req(const struct ma_state * mas)648  static inline unsigned int mas_alloc_req(const struct ma_state *mas)
649  {
650  	if ((unsigned long)mas->alloc & 0x1)
651  		return (unsigned long)(mas->alloc) >> 1;
652  	else if (mas->alloc)
653  		return mas->alloc->request_count;
654  	return 0;
655  }
656  
657  /*
658   * ma_pivots() - Get a pointer to the maple node pivots.
659   * @node: the maple node
660   * @type: the node type
661   *
662   * In the event of a dead node, this array may be %NULL
663   *
664   * Return: A pointer to the maple node pivots
665   */
ma_pivots(struct maple_node * node,enum maple_type type)666  static inline unsigned long *ma_pivots(struct maple_node *node,
667  					   enum maple_type type)
668  {
669  	switch (type) {
670  	case maple_arange_64:
671  		return node->ma64.pivot;
672  	case maple_range_64:
673  	case maple_leaf_64:
674  		return node->mr64.pivot;
675  	case maple_dense:
676  		return NULL;
677  	}
678  	return NULL;
679  }
680  
681  /*
682   * ma_gaps() - Get a pointer to the maple node gaps.
683   * @node: the maple node
684   * @type: the node type
685   *
686   * Return: A pointer to the maple node gaps
687   */
ma_gaps(struct maple_node * node,enum maple_type type)688  static inline unsigned long *ma_gaps(struct maple_node *node,
689  				     enum maple_type type)
690  {
691  	switch (type) {
692  	case maple_arange_64:
693  		return node->ma64.gap;
694  	case maple_range_64:
695  	case maple_leaf_64:
696  	case maple_dense:
697  		return NULL;
698  	}
699  	return NULL;
700  }
701  
702  /*
703   * mas_safe_pivot() - get the pivot at @piv or mas->max.
704   * @mas: The maple state
705   * @pivots: The pointer to the maple node pivots
706   * @piv: The pivot to fetch
707   * @type: The maple node type
708   *
709   * Return: The pivot at @piv within the limit of the @pivots array, @mas->max
710   * otherwise.
711   */
712  static __always_inline unsigned long
mas_safe_pivot(const struct ma_state * mas,unsigned long * pivots,unsigned char piv,enum maple_type type)713  mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots,
714  	       unsigned char piv, enum maple_type type)
715  {
716  	if (piv >= mt_pivots[type])
717  		return mas->max;
718  
719  	return pivots[piv];
720  }
721  
722  /*
723   * mas_safe_min() - Return the minimum for a given offset.
724   * @mas: The maple state
725   * @pivots: The pointer to the maple node pivots
726   * @offset: The offset into the pivot array
727   *
728   * Return: The minimum range value that is contained in @offset.
729   */
730  static inline unsigned long
mas_safe_min(struct ma_state * mas,unsigned long * pivots,unsigned char offset)731  mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset)
732  {
733  	if (likely(offset))
734  		return pivots[offset - 1] + 1;
735  
736  	return mas->min;
737  }
738  
739  /*
740   * mte_set_pivot() - Set a pivot to a value in an encoded maple node.
741   * @mn: The encoded maple node
742   * @piv: The pivot offset
743   * @val: The value of the pivot
744   */
mte_set_pivot(struct maple_enode * mn,unsigned char piv,unsigned long val)745  static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv,
746  				unsigned long val)
747  {
748  	struct maple_node *node = mte_to_node(mn);
749  	enum maple_type type = mte_node_type(mn);
750  
751  	BUG_ON(piv >= mt_pivots[type]);
752  	switch (type) {
753  	case maple_range_64:
754  	case maple_leaf_64:
755  		node->mr64.pivot[piv] = val;
756  		break;
757  	case maple_arange_64:
758  		node->ma64.pivot[piv] = val;
759  		break;
760  	case maple_dense:
761  		break;
762  	}
763  
764  }
765  
766  /*
767   * ma_slots() - Get a pointer to the maple node slots.
768   * @mn: The maple node
769   * @mt: The maple node type
770   *
771   * Return: A pointer to the maple node slots
772   */
ma_slots(struct maple_node * mn,enum maple_type mt)773  static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt)
774  {
775  	switch (mt) {
776  	case maple_arange_64:
777  		return mn->ma64.slot;
778  	case maple_range_64:
779  	case maple_leaf_64:
780  		return mn->mr64.slot;
781  	case maple_dense:
782  		return mn->slot;
783  	}
784  
785  	return NULL;
786  }
787  
mt_write_locked(const struct maple_tree * mt)788  static inline bool mt_write_locked(const struct maple_tree *mt)
789  {
790  	return mt_external_lock(mt) ? mt_write_lock_is_held(mt) :
791  		lockdep_is_held(&mt->ma_lock);
792  }
793  
mt_locked(const struct maple_tree * mt)794  static __always_inline bool mt_locked(const struct maple_tree *mt)
795  {
796  	return mt_external_lock(mt) ? mt_lock_is_held(mt) :
797  		lockdep_is_held(&mt->ma_lock);
798  }
799  
mt_slot(const struct maple_tree * mt,void __rcu ** slots,unsigned char offset)800  static __always_inline void *mt_slot(const struct maple_tree *mt,
801  		void __rcu **slots, unsigned char offset)
802  {
803  	return rcu_dereference_check(slots[offset], mt_locked(mt));
804  }
805  
mt_slot_locked(struct maple_tree * mt,void __rcu ** slots,unsigned char offset)806  static __always_inline void *mt_slot_locked(struct maple_tree *mt,
807  		void __rcu **slots, unsigned char offset)
808  {
809  	return rcu_dereference_protected(slots[offset], mt_write_locked(mt));
810  }
811  /*
812   * mas_slot_locked() - Get the slot value when holding the maple tree lock.
813   * @mas: The maple state
814   * @slots: The pointer to the slots
815   * @offset: The offset into the slots array to fetch
816   *
817   * Return: The entry stored in @slots at the @offset.
818   */
mas_slot_locked(struct ma_state * mas,void __rcu ** slots,unsigned char offset)819  static __always_inline void *mas_slot_locked(struct ma_state *mas,
820  		void __rcu **slots, unsigned char offset)
821  {
822  	return mt_slot_locked(mas->tree, slots, offset);
823  }
824  
825  /*
826   * mas_slot() - Get the slot value when not holding the maple tree lock.
827   * @mas: The maple state
828   * @slots: The pointer to the slots
829   * @offset: The offset into the slots array to fetch
830   *
831   * Return: The entry stored in @slots at the @offset
832   */
mas_slot(struct ma_state * mas,void __rcu ** slots,unsigned char offset)833  static __always_inline void *mas_slot(struct ma_state *mas, void __rcu **slots,
834  		unsigned char offset)
835  {
836  	return mt_slot(mas->tree, slots, offset);
837  }
838  
839  /*
840   * mas_root() - Get the maple tree root.
841   * @mas: The maple state.
842   *
843   * Return: The pointer to the root of the tree
844   */
mas_root(struct ma_state * mas)845  static __always_inline void *mas_root(struct ma_state *mas)
846  {
847  	return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree));
848  }
849  
mt_root_locked(struct maple_tree * mt)850  static inline void *mt_root_locked(struct maple_tree *mt)
851  {
852  	return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt));
853  }
854  
855  /*
856   * mas_root_locked() - Get the maple tree root when holding the maple tree lock.
857   * @mas: The maple state.
858   *
859   * Return: The pointer to the root of the tree
860   */
mas_root_locked(struct ma_state * mas)861  static inline void *mas_root_locked(struct ma_state *mas)
862  {
863  	return mt_root_locked(mas->tree);
864  }
865  
ma_meta(struct maple_node * mn,enum maple_type mt)866  static inline struct maple_metadata *ma_meta(struct maple_node *mn,
867  					     enum maple_type mt)
868  {
869  	switch (mt) {
870  	case maple_arange_64:
871  		return &mn->ma64.meta;
872  	default:
873  		return &mn->mr64.meta;
874  	}
875  }
876  
877  /*
878   * ma_set_meta() - Set the metadata information of a node.
879   * @mn: The maple node
880   * @mt: The maple node type
881   * @offset: The offset of the highest sub-gap in this node.
882   * @end: The end of the data in this node.
883   */
ma_set_meta(struct maple_node * mn,enum maple_type mt,unsigned char offset,unsigned char end)884  static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt,
885  			       unsigned char offset, unsigned char end)
886  {
887  	struct maple_metadata *meta = ma_meta(mn, mt);
888  
889  	meta->gap = offset;
890  	meta->end = end;
891  }
892  
893  /*
894   * mt_clear_meta() - clear the metadata information of a node, if it exists
895   * @mt: The maple tree
896   * @mn: The maple node
897   * @type: The maple node type
898   */
mt_clear_meta(struct maple_tree * mt,struct maple_node * mn,enum maple_type type)899  static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn,
900  				  enum maple_type type)
901  {
902  	struct maple_metadata *meta;
903  	unsigned long *pivots;
904  	void __rcu **slots;
905  	void *next;
906  
907  	switch (type) {
908  	case maple_range_64:
909  		pivots = mn->mr64.pivot;
910  		if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) {
911  			slots = mn->mr64.slot;
912  			next = mt_slot_locked(mt, slots,
913  					      MAPLE_RANGE64_SLOTS - 1);
914  			if (unlikely((mte_to_node(next) &&
915  				      mte_node_type(next))))
916  				return; /* no metadata, could be node */
917  		}
918  		fallthrough;
919  	case maple_arange_64:
920  		meta = ma_meta(mn, type);
921  		break;
922  	default:
923  		return;
924  	}
925  
926  	meta->gap = 0;
927  	meta->end = 0;
928  }
929  
930  /*
931   * ma_meta_end() - Get the data end of a node from the metadata
932   * @mn: The maple node
933   * @mt: The maple node type
934   */
ma_meta_end(struct maple_node * mn,enum maple_type mt)935  static inline unsigned char ma_meta_end(struct maple_node *mn,
936  					enum maple_type mt)
937  {
938  	struct maple_metadata *meta = ma_meta(mn, mt);
939  
940  	return meta->end;
941  }
942  
943  /*
944   * ma_meta_gap() - Get the largest gap location of a node from the metadata
945   * @mn: The maple node
946   */
ma_meta_gap(struct maple_node * mn)947  static inline unsigned char ma_meta_gap(struct maple_node *mn)
948  {
949  	return mn->ma64.meta.gap;
950  }
951  
952  /*
953   * ma_set_meta_gap() - Set the largest gap location in a nodes metadata
954   * @mn: The maple node
955   * @mt: The maple node type
956   * @offset: The location of the largest gap.
957   */
ma_set_meta_gap(struct maple_node * mn,enum maple_type mt,unsigned char offset)958  static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt,
959  				   unsigned char offset)
960  {
961  
962  	struct maple_metadata *meta = ma_meta(mn, mt);
963  
964  	meta->gap = offset;
965  }
966  
967  /*
968   * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes.
969   * @mat: the ma_topiary, a linked list of dead nodes.
970   * @dead_enode: the node to be marked as dead and added to the tail of the list
971   *
972   * Add the @dead_enode to the linked list in @mat.
973   */
mat_add(struct ma_topiary * mat,struct maple_enode * dead_enode)974  static inline void mat_add(struct ma_topiary *mat,
975  			   struct maple_enode *dead_enode)
976  {
977  	mte_set_node_dead(dead_enode);
978  	mte_to_mat(dead_enode)->next = NULL;
979  	if (!mat->tail) {
980  		mat->tail = mat->head = dead_enode;
981  		return;
982  	}
983  
984  	mte_to_mat(mat->tail)->next = dead_enode;
985  	mat->tail = dead_enode;
986  }
987  
988  static void mt_free_walk(struct rcu_head *head);
989  static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
990  			    bool free);
991  /*
992   * mas_mat_destroy() - Free all nodes and subtrees in a dead list.
993   * @mas: the maple state
994   * @mat: the ma_topiary linked list of dead nodes to free.
995   *
996   * Destroy walk a dead list.
997   */
mas_mat_destroy(struct ma_state * mas,struct ma_topiary * mat)998  static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat)
999  {
1000  	struct maple_enode *next;
1001  	struct maple_node *node;
1002  	bool in_rcu = mt_in_rcu(mas->tree);
1003  
1004  	while (mat->head) {
1005  		next = mte_to_mat(mat->head)->next;
1006  		node = mte_to_node(mat->head);
1007  		mt_destroy_walk(mat->head, mas->tree, !in_rcu);
1008  		if (in_rcu)
1009  			call_rcu(&node->rcu, mt_free_walk);
1010  		mat->head = next;
1011  	}
1012  }
1013  /*
1014   * mas_descend() - Descend into the slot stored in the ma_state.
1015   * @mas: the maple state.
1016   *
1017   * Note: Not RCU safe, only use in write side or debug code.
1018   */
mas_descend(struct ma_state * mas)1019  static inline void mas_descend(struct ma_state *mas)
1020  {
1021  	enum maple_type type;
1022  	unsigned long *pivots;
1023  	struct maple_node *node;
1024  	void __rcu **slots;
1025  
1026  	node = mas_mn(mas);
1027  	type = mte_node_type(mas->node);
1028  	pivots = ma_pivots(node, type);
1029  	slots = ma_slots(node, type);
1030  
1031  	if (mas->offset)
1032  		mas->min = pivots[mas->offset - 1] + 1;
1033  	mas->max = mas_safe_pivot(mas, pivots, mas->offset, type);
1034  	mas->node = mas_slot(mas, slots, mas->offset);
1035  }
1036  
1037  /*
1038   * mte_set_gap() - Set a maple node gap.
1039   * @mn: The encoded maple node
1040   * @gap: The offset of the gap to set
1041   * @val: The gap value
1042   */
mte_set_gap(const struct maple_enode * mn,unsigned char gap,unsigned long val)1043  static inline void mte_set_gap(const struct maple_enode *mn,
1044  				 unsigned char gap, unsigned long val)
1045  {
1046  	switch (mte_node_type(mn)) {
1047  	default:
1048  		break;
1049  	case maple_arange_64:
1050  		mte_to_node(mn)->ma64.gap[gap] = val;
1051  		break;
1052  	}
1053  }
1054  
1055  /*
1056   * mas_ascend() - Walk up a level of the tree.
1057   * @mas: The maple state
1058   *
1059   * Sets the @mas->max and @mas->min to the correct values when walking up.  This
1060   * may cause several levels of walking up to find the correct min and max.
1061   * May find a dead node which will cause a premature return.
1062   * Return: 1 on dead node, 0 otherwise
1063   */
mas_ascend(struct ma_state * mas)1064  static int mas_ascend(struct ma_state *mas)
1065  {
1066  	struct maple_enode *p_enode; /* parent enode. */
1067  	struct maple_enode *a_enode; /* ancestor enode. */
1068  	struct maple_node *a_node; /* ancestor node. */
1069  	struct maple_node *p_node; /* parent node. */
1070  	unsigned char a_slot;
1071  	enum maple_type a_type;
1072  	unsigned long min, max;
1073  	unsigned long *pivots;
1074  	bool set_max = false, set_min = false;
1075  
1076  	a_node = mas_mn(mas);
1077  	if (ma_is_root(a_node)) {
1078  		mas->offset = 0;
1079  		return 0;
1080  	}
1081  
1082  	p_node = mte_parent(mas->node);
1083  	if (unlikely(a_node == p_node))
1084  		return 1;
1085  
1086  	a_type = mas_parent_type(mas, mas->node);
1087  	mas->offset = mte_parent_slot(mas->node);
1088  	a_enode = mt_mk_node(p_node, a_type);
1089  
1090  	/* Check to make sure all parent information is still accurate */
1091  	if (p_node != mte_parent(mas->node))
1092  		return 1;
1093  
1094  	mas->node = a_enode;
1095  
1096  	if (mte_is_root(a_enode)) {
1097  		mas->max = ULONG_MAX;
1098  		mas->min = 0;
1099  		return 0;
1100  	}
1101  
1102  	min = 0;
1103  	max = ULONG_MAX;
1104  	if (!mas->offset) {
1105  		min = mas->min;
1106  		set_min = true;
1107  	}
1108  
1109  	if (mas->max == ULONG_MAX)
1110  		set_max = true;
1111  
1112  	do {
1113  		p_enode = a_enode;
1114  		a_type = mas_parent_type(mas, p_enode);
1115  		a_node = mte_parent(p_enode);
1116  		a_slot = mte_parent_slot(p_enode);
1117  		a_enode = mt_mk_node(a_node, a_type);
1118  		pivots = ma_pivots(a_node, a_type);
1119  
1120  		if (unlikely(ma_dead_node(a_node)))
1121  			return 1;
1122  
1123  		if (!set_min && a_slot) {
1124  			set_min = true;
1125  			min = pivots[a_slot - 1] + 1;
1126  		}
1127  
1128  		if (!set_max && a_slot < mt_pivots[a_type]) {
1129  			set_max = true;
1130  			max = pivots[a_slot];
1131  		}
1132  
1133  		if (unlikely(ma_dead_node(a_node)))
1134  			return 1;
1135  
1136  		if (unlikely(ma_is_root(a_node)))
1137  			break;
1138  
1139  	} while (!set_min || !set_max);
1140  
1141  	mas->max = max;
1142  	mas->min = min;
1143  	return 0;
1144  }
1145  
1146  /*
1147   * mas_pop_node() - Get a previously allocated maple node from the maple state.
1148   * @mas: The maple state
1149   *
1150   * Return: A pointer to a maple node.
1151   */
mas_pop_node(struct ma_state * mas)1152  static inline struct maple_node *mas_pop_node(struct ma_state *mas)
1153  {
1154  	struct maple_alloc *ret, *node = mas->alloc;
1155  	unsigned long total = mas_allocated(mas);
1156  	unsigned int req = mas_alloc_req(mas);
1157  
1158  	/* nothing or a request pending. */
1159  	if (WARN_ON(!total))
1160  		return NULL;
1161  
1162  	if (total == 1) {
1163  		/* single allocation in this ma_state */
1164  		mas->alloc = NULL;
1165  		ret = node;
1166  		goto single_node;
1167  	}
1168  
1169  	if (node->node_count == 1) {
1170  		/* Single allocation in this node. */
1171  		mas->alloc = node->slot[0];
1172  		mas->alloc->total = node->total - 1;
1173  		ret = node;
1174  		goto new_head;
1175  	}
1176  	node->total--;
1177  	ret = node->slot[--node->node_count];
1178  	node->slot[node->node_count] = NULL;
1179  
1180  single_node:
1181  new_head:
1182  	if (req) {
1183  		req++;
1184  		mas_set_alloc_req(mas, req);
1185  	}
1186  
1187  	memset(ret, 0, sizeof(*ret));
1188  	return (struct maple_node *)ret;
1189  }
1190  
1191  /*
1192   * mas_push_node() - Push a node back on the maple state allocation.
1193   * @mas: The maple state
1194   * @used: The used maple node
1195   *
1196   * Stores the maple node back into @mas->alloc for reuse.  Updates allocated and
1197   * requested node count as necessary.
1198   */
mas_push_node(struct ma_state * mas,struct maple_node * used)1199  static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
1200  {
1201  	struct maple_alloc *reuse = (struct maple_alloc *)used;
1202  	struct maple_alloc *head = mas->alloc;
1203  	unsigned long count;
1204  	unsigned int requested = mas_alloc_req(mas);
1205  
1206  	count = mas_allocated(mas);
1207  
1208  	reuse->request_count = 0;
1209  	reuse->node_count = 0;
1210  	if (count) {
1211  		if (head->node_count < MAPLE_ALLOC_SLOTS) {
1212  			head->slot[head->node_count++] = reuse;
1213  			head->total++;
1214  			goto done;
1215  		}
1216  		reuse->slot[0] = head;
1217  		reuse->node_count = 1;
1218  	}
1219  
1220  	reuse->total = count + 1;
1221  	mas->alloc = reuse;
1222  done:
1223  	if (requested > 1)
1224  		mas_set_alloc_req(mas, requested - 1);
1225  }
1226  
1227  /*
1228   * mas_alloc_nodes() - Allocate nodes into a maple state
1229   * @mas: The maple state
1230   * @gfp: The GFP Flags
1231   */
mas_alloc_nodes(struct ma_state * mas,gfp_t gfp)1232  static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
1233  {
1234  	struct maple_alloc *node;
1235  	unsigned long allocated = mas_allocated(mas);
1236  	unsigned int requested = mas_alloc_req(mas);
1237  	unsigned int count;
1238  	void **slots = NULL;
1239  	unsigned int max_req = 0;
1240  
1241  	if (!requested)
1242  		return;
1243  
1244  	mas_set_alloc_req(mas, 0);
1245  	if (mas->mas_flags & MA_STATE_PREALLOC) {
1246  		if (allocated)
1247  			return;
1248  		BUG_ON(!allocated);
1249  		WARN_ON(!allocated);
1250  	}
1251  
1252  	if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) {
1253  		node = (struct maple_alloc *)mt_alloc_one(gfp);
1254  		if (!node)
1255  			goto nomem_one;
1256  
1257  		if (allocated) {
1258  			node->slot[0] = mas->alloc;
1259  			node->node_count = 1;
1260  		} else {
1261  			node->node_count = 0;
1262  		}
1263  
1264  		mas->alloc = node;
1265  		node->total = ++allocated;
1266  		node->request_count = 0;
1267  		requested--;
1268  	}
1269  
1270  	node = mas->alloc;
1271  	while (requested) {
1272  		max_req = MAPLE_ALLOC_SLOTS - node->node_count;
1273  		slots = (void **)&node->slot[node->node_count];
1274  		max_req = min(requested, max_req);
1275  		count = mt_alloc_bulk(gfp, max_req, slots);
1276  		if (!count)
1277  			goto nomem_bulk;
1278  
1279  		if (node->node_count == 0) {
1280  			node->slot[0]->node_count = 0;
1281  			node->slot[0]->request_count = 0;
1282  		}
1283  
1284  		node->node_count += count;
1285  		allocated += count;
1286  		/* find a non-full node*/
1287  		do {
1288  			node = node->slot[0];
1289  		} while (unlikely(node->node_count == MAPLE_ALLOC_SLOTS));
1290  		requested -= count;
1291  	}
1292  	mas->alloc->total = allocated;
1293  	return;
1294  
1295  nomem_bulk:
1296  	/* Clean up potential freed allocations on bulk failure */
1297  	memset(slots, 0, max_req * sizeof(unsigned long));
1298  	mas->alloc->total = allocated;
1299  nomem_one:
1300  	mas_set_alloc_req(mas, requested);
1301  	mas_set_err(mas, -ENOMEM);
1302  }
1303  
1304  /*
1305   * mas_free() - Free an encoded maple node
1306   * @mas: The maple state
1307   * @used: The encoded maple node to free.
1308   *
1309   * Uses rcu free if necessary, pushes @used back on the maple state allocations
1310   * otherwise.
1311   */
mas_free(struct ma_state * mas,struct maple_enode * used)1312  static inline void mas_free(struct ma_state *mas, struct maple_enode *used)
1313  {
1314  	struct maple_node *tmp = mte_to_node(used);
1315  
1316  	if (mt_in_rcu(mas->tree))
1317  		ma_free_rcu(tmp);
1318  	else
1319  		mas_push_node(mas, tmp);
1320  }
1321  
1322  /*
1323   * mas_node_count_gfp() - Check if enough nodes are allocated and request more
1324   * if there is not enough nodes.
1325   * @mas: The maple state
1326   * @count: The number of nodes needed
1327   * @gfp: the gfp flags
1328   */
mas_node_count_gfp(struct ma_state * mas,int count,gfp_t gfp)1329  static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp)
1330  {
1331  	unsigned long allocated = mas_allocated(mas);
1332  
1333  	if (allocated < count) {
1334  		mas_set_alloc_req(mas, count - allocated);
1335  		mas_alloc_nodes(mas, gfp);
1336  	}
1337  }
1338  
1339  /*
1340   * mas_node_count() - Check if enough nodes are allocated and request more if
1341   * there is not enough nodes.
1342   * @mas: The maple state
1343   * @count: The number of nodes needed
1344   *
1345   * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags.
1346   */
mas_node_count(struct ma_state * mas,int count)1347  static void mas_node_count(struct ma_state *mas, int count)
1348  {
1349  	return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN);
1350  }
1351  
1352  /*
1353   * mas_start() - Sets up maple state for operations.
1354   * @mas: The maple state.
1355   *
1356   * If mas->status == mas_start, then set the min, max and depth to
1357   * defaults.
1358   *
1359   * Return:
1360   * - If mas->node is an error or not mas_start, return NULL.
1361   * - If it's an empty tree:     NULL & mas->status == ma_none
1362   * - If it's a single entry:    The entry & mas->status == ma_root
1363   * - If it's a tree:            NULL & mas->status == ma_active
1364   */
mas_start(struct ma_state * mas)1365  static inline struct maple_enode *mas_start(struct ma_state *mas)
1366  {
1367  	if (likely(mas_is_start(mas))) {
1368  		struct maple_enode *root;
1369  
1370  		mas->min = 0;
1371  		mas->max = ULONG_MAX;
1372  
1373  retry:
1374  		mas->depth = 0;
1375  		root = mas_root(mas);
1376  		/* Tree with nodes */
1377  		if (likely(xa_is_node(root))) {
1378  			mas->depth = 1;
1379  			mas->status = ma_active;
1380  			mas->node = mte_safe_root(root);
1381  			mas->offset = 0;
1382  			if (mte_dead_node(mas->node))
1383  				goto retry;
1384  
1385  			return NULL;
1386  		}
1387  
1388  		mas->node = NULL;
1389  		/* empty tree */
1390  		if (unlikely(!root)) {
1391  			mas->status = ma_none;
1392  			mas->offset = MAPLE_NODE_SLOTS;
1393  			return NULL;
1394  		}
1395  
1396  		/* Single entry tree */
1397  		mas->status = ma_root;
1398  		mas->offset = MAPLE_NODE_SLOTS;
1399  
1400  		/* Single entry tree. */
1401  		if (mas->index > 0)
1402  			return NULL;
1403  
1404  		return root;
1405  	}
1406  
1407  	return NULL;
1408  }
1409  
1410  /*
1411   * ma_data_end() - Find the end of the data in a node.
1412   * @node: The maple node
1413   * @type: The maple node type
1414   * @pivots: The array of pivots in the node
1415   * @max: The maximum value in the node
1416   *
1417   * Uses metadata to find the end of the data when possible.
1418   * Return: The zero indexed last slot with data (may be null).
1419   */
ma_data_end(struct maple_node * node,enum maple_type type,unsigned long * pivots,unsigned long max)1420  static __always_inline unsigned char ma_data_end(struct maple_node *node,
1421  		enum maple_type type, unsigned long *pivots, unsigned long max)
1422  {
1423  	unsigned char offset;
1424  
1425  	if (!pivots)
1426  		return 0;
1427  
1428  	if (type == maple_arange_64)
1429  		return ma_meta_end(node, type);
1430  
1431  	offset = mt_pivots[type] - 1;
1432  	if (likely(!pivots[offset]))
1433  		return ma_meta_end(node, type);
1434  
1435  	if (likely(pivots[offset] == max))
1436  		return offset;
1437  
1438  	return mt_pivots[type];
1439  }
1440  
1441  /*
1442   * mas_data_end() - Find the end of the data (slot).
1443   * @mas: the maple state
1444   *
1445   * This method is optimized to check the metadata of a node if the node type
1446   * supports data end metadata.
1447   *
1448   * Return: The zero indexed last slot with data (may be null).
1449   */
mas_data_end(struct ma_state * mas)1450  static inline unsigned char mas_data_end(struct ma_state *mas)
1451  {
1452  	enum maple_type type;
1453  	struct maple_node *node;
1454  	unsigned char offset;
1455  	unsigned long *pivots;
1456  
1457  	type = mte_node_type(mas->node);
1458  	node = mas_mn(mas);
1459  	if (type == maple_arange_64)
1460  		return ma_meta_end(node, type);
1461  
1462  	pivots = ma_pivots(node, type);
1463  	if (unlikely(ma_dead_node(node)))
1464  		return 0;
1465  
1466  	offset = mt_pivots[type] - 1;
1467  	if (likely(!pivots[offset]))
1468  		return ma_meta_end(node, type);
1469  
1470  	if (likely(pivots[offset] == mas->max))
1471  		return offset;
1472  
1473  	return mt_pivots[type];
1474  }
1475  
1476  /*
1477   * mas_leaf_max_gap() - Returns the largest gap in a leaf node
1478   * @mas: the maple state
1479   *
1480   * Return: The maximum gap in the leaf.
1481   */
mas_leaf_max_gap(struct ma_state * mas)1482  static unsigned long mas_leaf_max_gap(struct ma_state *mas)
1483  {
1484  	enum maple_type mt;
1485  	unsigned long pstart, gap, max_gap;
1486  	struct maple_node *mn;
1487  	unsigned long *pivots;
1488  	void __rcu **slots;
1489  	unsigned char i;
1490  	unsigned char max_piv;
1491  
1492  	mt = mte_node_type(mas->node);
1493  	mn = mas_mn(mas);
1494  	slots = ma_slots(mn, mt);
1495  	max_gap = 0;
1496  	if (unlikely(ma_is_dense(mt))) {
1497  		gap = 0;
1498  		for (i = 0; i < mt_slots[mt]; i++) {
1499  			if (slots[i]) {
1500  				if (gap > max_gap)
1501  					max_gap = gap;
1502  				gap = 0;
1503  			} else {
1504  				gap++;
1505  			}
1506  		}
1507  		if (gap > max_gap)
1508  			max_gap = gap;
1509  		return max_gap;
1510  	}
1511  
1512  	/*
1513  	 * Check the first implied pivot optimizes the loop below and slot 1 may
1514  	 * be skipped if there is a gap in slot 0.
1515  	 */
1516  	pivots = ma_pivots(mn, mt);
1517  	if (likely(!slots[0])) {
1518  		max_gap = pivots[0] - mas->min + 1;
1519  		i = 2;
1520  	} else {
1521  		i = 1;
1522  	}
1523  
1524  	/* reduce max_piv as the special case is checked before the loop */
1525  	max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1;
1526  	/*
1527  	 * Check end implied pivot which can only be a gap on the right most
1528  	 * node.
1529  	 */
1530  	if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) {
1531  		gap = ULONG_MAX - pivots[max_piv];
1532  		if (gap > max_gap)
1533  			max_gap = gap;
1534  
1535  		if (max_gap > pivots[max_piv] - mas->min)
1536  			return max_gap;
1537  	}
1538  
1539  	for (; i <= max_piv; i++) {
1540  		/* data == no gap. */
1541  		if (likely(slots[i]))
1542  			continue;
1543  
1544  		pstart = pivots[i - 1];
1545  		gap = pivots[i] - pstart;
1546  		if (gap > max_gap)
1547  			max_gap = gap;
1548  
1549  		/* There cannot be two gaps in a row. */
1550  		i++;
1551  	}
1552  	return max_gap;
1553  }
1554  
1555  /*
1556   * ma_max_gap() - Get the maximum gap in a maple node (non-leaf)
1557   * @node: The maple node
1558   * @gaps: The pointer to the gaps
1559   * @mt: The maple node type
1560   * @off: Pointer to store the offset location of the gap.
1561   *
1562   * Uses the metadata data end to scan backwards across set gaps.
1563   *
1564   * Return: The maximum gap value
1565   */
1566  static inline unsigned long
ma_max_gap(struct maple_node * node,unsigned long * gaps,enum maple_type mt,unsigned char * off)1567  ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt,
1568  	    unsigned char *off)
1569  {
1570  	unsigned char offset, i;
1571  	unsigned long max_gap = 0;
1572  
1573  	i = offset = ma_meta_end(node, mt);
1574  	do {
1575  		if (gaps[i] > max_gap) {
1576  			max_gap = gaps[i];
1577  			offset = i;
1578  		}
1579  	} while (i--);
1580  
1581  	*off = offset;
1582  	return max_gap;
1583  }
1584  
1585  /*
1586   * mas_max_gap() - find the largest gap in a non-leaf node and set the slot.
1587   * @mas: The maple state.
1588   *
1589   * Return: The gap value.
1590   */
mas_max_gap(struct ma_state * mas)1591  static inline unsigned long mas_max_gap(struct ma_state *mas)
1592  {
1593  	unsigned long *gaps;
1594  	unsigned char offset;
1595  	enum maple_type mt;
1596  	struct maple_node *node;
1597  
1598  	mt = mte_node_type(mas->node);
1599  	if (ma_is_leaf(mt))
1600  		return mas_leaf_max_gap(mas);
1601  
1602  	node = mas_mn(mas);
1603  	MAS_BUG_ON(mas, mt != maple_arange_64);
1604  	offset = ma_meta_gap(node);
1605  	gaps = ma_gaps(node, mt);
1606  	return gaps[offset];
1607  }
1608  
1609  /*
1610   * mas_parent_gap() - Set the parent gap and any gaps above, as needed
1611   * @mas: The maple state
1612   * @offset: The gap offset in the parent to set
1613   * @new: The new gap value.
1614   *
1615   * Set the parent gap then continue to set the gap upwards, using the metadata
1616   * of the parent to see if it is necessary to check the node above.
1617   */
mas_parent_gap(struct ma_state * mas,unsigned char offset,unsigned long new)1618  static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset,
1619  		unsigned long new)
1620  {
1621  	unsigned long meta_gap = 0;
1622  	struct maple_node *pnode;
1623  	struct maple_enode *penode;
1624  	unsigned long *pgaps;
1625  	unsigned char meta_offset;
1626  	enum maple_type pmt;
1627  
1628  	pnode = mte_parent(mas->node);
1629  	pmt = mas_parent_type(mas, mas->node);
1630  	penode = mt_mk_node(pnode, pmt);
1631  	pgaps = ma_gaps(pnode, pmt);
1632  
1633  ascend:
1634  	MAS_BUG_ON(mas, pmt != maple_arange_64);
1635  	meta_offset = ma_meta_gap(pnode);
1636  	meta_gap = pgaps[meta_offset];
1637  
1638  	pgaps[offset] = new;
1639  
1640  	if (meta_gap == new)
1641  		return;
1642  
1643  	if (offset != meta_offset) {
1644  		if (meta_gap > new)
1645  			return;
1646  
1647  		ma_set_meta_gap(pnode, pmt, offset);
1648  	} else if (new < meta_gap) {
1649  		new = ma_max_gap(pnode, pgaps, pmt, &meta_offset);
1650  		ma_set_meta_gap(pnode, pmt, meta_offset);
1651  	}
1652  
1653  	if (ma_is_root(pnode))
1654  		return;
1655  
1656  	/* Go to the parent node. */
1657  	pnode = mte_parent(penode);
1658  	pmt = mas_parent_type(mas, penode);
1659  	pgaps = ma_gaps(pnode, pmt);
1660  	offset = mte_parent_slot(penode);
1661  	penode = mt_mk_node(pnode, pmt);
1662  	goto ascend;
1663  }
1664  
1665  /*
1666   * mas_update_gap() - Update a nodes gaps and propagate up if necessary.
1667   * @mas: the maple state.
1668   */
mas_update_gap(struct ma_state * mas)1669  static inline void mas_update_gap(struct ma_state *mas)
1670  {
1671  	unsigned char pslot;
1672  	unsigned long p_gap;
1673  	unsigned long max_gap;
1674  
1675  	if (!mt_is_alloc(mas->tree))
1676  		return;
1677  
1678  	if (mte_is_root(mas->node))
1679  		return;
1680  
1681  	max_gap = mas_max_gap(mas);
1682  
1683  	pslot = mte_parent_slot(mas->node);
1684  	p_gap = ma_gaps(mte_parent(mas->node),
1685  			mas_parent_type(mas, mas->node))[pslot];
1686  
1687  	if (p_gap != max_gap)
1688  		mas_parent_gap(mas, pslot, max_gap);
1689  }
1690  
1691  /*
1692   * mas_adopt_children() - Set the parent pointer of all nodes in @parent to
1693   * @parent with the slot encoded.
1694   * @mas: the maple state (for the tree)
1695   * @parent: the maple encoded node containing the children.
1696   */
mas_adopt_children(struct ma_state * mas,struct maple_enode * parent)1697  static inline void mas_adopt_children(struct ma_state *mas,
1698  		struct maple_enode *parent)
1699  {
1700  	enum maple_type type = mte_node_type(parent);
1701  	struct maple_node *node = mte_to_node(parent);
1702  	void __rcu **slots = ma_slots(node, type);
1703  	unsigned long *pivots = ma_pivots(node, type);
1704  	struct maple_enode *child;
1705  	unsigned char offset;
1706  
1707  	offset = ma_data_end(node, type, pivots, mas->max);
1708  	do {
1709  		child = mas_slot_locked(mas, slots, offset);
1710  		mas_set_parent(mas, child, parent, offset);
1711  	} while (offset--);
1712  }
1713  
1714  /*
1715   * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old
1716   * node as dead.
1717   * @mas: the maple state with the new node
1718   * @old_enode: The old maple encoded node to replace.
1719   */
mas_put_in_tree(struct ma_state * mas,struct maple_enode * old_enode)1720  static inline void mas_put_in_tree(struct ma_state *mas,
1721  		struct maple_enode *old_enode)
1722  	__must_hold(mas->tree->ma_lock)
1723  {
1724  	unsigned char offset;
1725  	void __rcu **slots;
1726  
1727  	if (mte_is_root(mas->node)) {
1728  		mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas));
1729  		rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
1730  		mas_set_height(mas);
1731  	} else {
1732  
1733  		offset = mte_parent_slot(mas->node);
1734  		slots = ma_slots(mte_parent(mas->node),
1735  				 mas_parent_type(mas, mas->node));
1736  		rcu_assign_pointer(slots[offset], mas->node);
1737  	}
1738  
1739  	mte_set_node_dead(old_enode);
1740  }
1741  
1742  /*
1743   * mas_replace_node() - Replace a node by putting it in the tree, marking it
1744   * dead, and freeing it.
1745   * the parent encoding to locate the maple node in the tree.
1746   * @mas: the ma_state with @mas->node pointing to the new node.
1747   * @old_enode: The old maple encoded node.
1748   */
mas_replace_node(struct ma_state * mas,struct maple_enode * old_enode)1749  static inline void mas_replace_node(struct ma_state *mas,
1750  		struct maple_enode *old_enode)
1751  	__must_hold(mas->tree->ma_lock)
1752  {
1753  	mas_put_in_tree(mas, old_enode);
1754  	mas_free(mas, old_enode);
1755  }
1756  
1757  /*
1758   * mas_find_child() - Find a child who has the parent @mas->node.
1759   * @mas: the maple state with the parent.
1760   * @child: the maple state to store the child.
1761   */
mas_find_child(struct ma_state * mas,struct ma_state * child)1762  static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child)
1763  	__must_hold(mas->tree->ma_lock)
1764  {
1765  	enum maple_type mt;
1766  	unsigned char offset;
1767  	unsigned char end;
1768  	unsigned long *pivots;
1769  	struct maple_enode *entry;
1770  	struct maple_node *node;
1771  	void __rcu **slots;
1772  
1773  	mt = mte_node_type(mas->node);
1774  	node = mas_mn(mas);
1775  	slots = ma_slots(node, mt);
1776  	pivots = ma_pivots(node, mt);
1777  	end = ma_data_end(node, mt, pivots, mas->max);
1778  	for (offset = mas->offset; offset <= end; offset++) {
1779  		entry = mas_slot_locked(mas, slots, offset);
1780  		if (mte_parent(entry) == node) {
1781  			*child = *mas;
1782  			mas->offset = offset + 1;
1783  			child->offset = offset;
1784  			mas_descend(child);
1785  			child->offset = 0;
1786  			return true;
1787  		}
1788  	}
1789  	return false;
1790  }
1791  
1792  /*
1793   * mab_shift_right() - Shift the data in mab right. Note, does not clean out the
1794   * old data or set b_node->b_end.
1795   * @b_node: the maple_big_node
1796   * @shift: the shift count
1797   */
mab_shift_right(struct maple_big_node * b_node,unsigned char shift)1798  static inline void mab_shift_right(struct maple_big_node *b_node,
1799  				 unsigned char shift)
1800  {
1801  	unsigned long size = b_node->b_end * sizeof(unsigned long);
1802  
1803  	memmove(b_node->pivot + shift, b_node->pivot, size);
1804  	memmove(b_node->slot + shift, b_node->slot, size);
1805  	if (b_node->type == maple_arange_64)
1806  		memmove(b_node->gap + shift, b_node->gap, size);
1807  }
1808  
1809  /*
1810   * mab_middle_node() - Check if a middle node is needed (unlikely)
1811   * @b_node: the maple_big_node that contains the data.
1812   * @split: the potential split location
1813   * @slot_count: the size that can be stored in a single node being considered.
1814   *
1815   * Return: true if a middle node is required.
1816   */
mab_middle_node(struct maple_big_node * b_node,int split,unsigned char slot_count)1817  static inline bool mab_middle_node(struct maple_big_node *b_node, int split,
1818  				   unsigned char slot_count)
1819  {
1820  	unsigned char size = b_node->b_end;
1821  
1822  	if (size >= 2 * slot_count)
1823  		return true;
1824  
1825  	if (!b_node->slot[split] && (size >= 2 * slot_count - 1))
1826  		return true;
1827  
1828  	return false;
1829  }
1830  
1831  /*
1832   * mab_no_null_split() - ensure the split doesn't fall on a NULL
1833   * @b_node: the maple_big_node with the data
1834   * @split: the suggested split location
1835   * @slot_count: the number of slots in the node being considered.
1836   *
1837   * Return: the split location.
1838   */
mab_no_null_split(struct maple_big_node * b_node,unsigned char split,unsigned char slot_count)1839  static inline int mab_no_null_split(struct maple_big_node *b_node,
1840  				    unsigned char split, unsigned char slot_count)
1841  {
1842  	if (!b_node->slot[split]) {
1843  		/*
1844  		 * If the split is less than the max slot && the right side will
1845  		 * still be sufficient, then increment the split on NULL.
1846  		 */
1847  		if ((split < slot_count - 1) &&
1848  		    (b_node->b_end - split) > (mt_min_slots[b_node->type]))
1849  			split++;
1850  		else
1851  			split--;
1852  	}
1853  	return split;
1854  }
1855  
1856  /*
1857   * mab_calc_split() - Calculate the split location and if there needs to be two
1858   * splits.
1859   * @mas: The maple state
1860   * @bn: The maple_big_node with the data
1861   * @mid_split: The second split, if required.  0 otherwise.
1862   *
1863   * Return: The first split location.  The middle split is set in @mid_split.
1864   */
mab_calc_split(struct ma_state * mas,struct maple_big_node * bn,unsigned char * mid_split)1865  static inline int mab_calc_split(struct ma_state *mas,
1866  	 struct maple_big_node *bn, unsigned char *mid_split)
1867  {
1868  	unsigned char b_end = bn->b_end;
1869  	int split = b_end / 2; /* Assume equal split. */
1870  	unsigned char slot_count = mt_slots[bn->type];
1871  
1872  	/*
1873  	 * To support gap tracking, all NULL entries are kept together and a node cannot
1874  	 * end on a NULL entry, with the exception of the left-most leaf.  The
1875  	 * limitation means that the split of a node must be checked for this condition
1876  	 * and be able to put more data in one direction or the other.
1877  	 */
1878  	if (unlikely((mas->mas_flags & MA_STATE_BULK))) {
1879  		*mid_split = 0;
1880  		split = b_end - mt_min_slots[bn->type];
1881  
1882  		if (!ma_is_leaf(bn->type))
1883  			return split;
1884  
1885  		mas->mas_flags |= MA_STATE_REBALANCE;
1886  		if (!bn->slot[split])
1887  			split--;
1888  		return split;
1889  	}
1890  
1891  	/*
1892  	 * Although extremely rare, it is possible to enter what is known as the 3-way
1893  	 * split scenario.  The 3-way split comes about by means of a store of a range
1894  	 * that overwrites the end and beginning of two full nodes.  The result is a set
1895  	 * of entries that cannot be stored in 2 nodes.  Sometimes, these two nodes can
1896  	 * also be located in different parent nodes which are also full.  This can
1897  	 * carry upwards all the way to the root in the worst case.
1898  	 */
1899  	if (unlikely(mab_middle_node(bn, split, slot_count))) {
1900  		split = b_end / 3;
1901  		*mid_split = split * 2;
1902  	} else {
1903  		*mid_split = 0;
1904  	}
1905  
1906  	/* Avoid ending a node on a NULL entry */
1907  	split = mab_no_null_split(bn, split, slot_count);
1908  
1909  	if (unlikely(*mid_split))
1910  		*mid_split = mab_no_null_split(bn, *mid_split, slot_count);
1911  
1912  	return split;
1913  }
1914  
1915  /*
1916   * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node
1917   * and set @b_node->b_end to the next free slot.
1918   * @mas: The maple state
1919   * @mas_start: The starting slot to copy
1920   * @mas_end: The end slot to copy (inclusively)
1921   * @b_node: The maple_big_node to place the data
1922   * @mab_start: The starting location in maple_big_node to store the data.
1923   */
mas_mab_cp(struct ma_state * mas,unsigned char mas_start,unsigned char mas_end,struct maple_big_node * b_node,unsigned char mab_start)1924  static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start,
1925  			unsigned char mas_end, struct maple_big_node *b_node,
1926  			unsigned char mab_start)
1927  {
1928  	enum maple_type mt;
1929  	struct maple_node *node;
1930  	void __rcu **slots;
1931  	unsigned long *pivots, *gaps;
1932  	int i = mas_start, j = mab_start;
1933  	unsigned char piv_end;
1934  
1935  	node = mas_mn(mas);
1936  	mt = mte_node_type(mas->node);
1937  	pivots = ma_pivots(node, mt);
1938  	if (!i) {
1939  		b_node->pivot[j] = pivots[i++];
1940  		if (unlikely(i > mas_end))
1941  			goto complete;
1942  		j++;
1943  	}
1944  
1945  	piv_end = min(mas_end, mt_pivots[mt]);
1946  	for (; i < piv_end; i++, j++) {
1947  		b_node->pivot[j] = pivots[i];
1948  		if (unlikely(!b_node->pivot[j]))
1949  			goto complete;
1950  
1951  		if (unlikely(mas->max == b_node->pivot[j]))
1952  			goto complete;
1953  	}
1954  
1955  	b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt);
1956  
1957  complete:
1958  	b_node->b_end = ++j;
1959  	j -= mab_start;
1960  	slots = ma_slots(node, mt);
1961  	memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j);
1962  	if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) {
1963  		gaps = ma_gaps(node, mt);
1964  		memcpy(b_node->gap + mab_start, gaps + mas_start,
1965  		       sizeof(unsigned long) * j);
1966  	}
1967  }
1968  
1969  /*
1970   * mas_leaf_set_meta() - Set the metadata of a leaf if possible.
1971   * @node: The maple node
1972   * @mt: The maple type
1973   * @end: The node end
1974   */
mas_leaf_set_meta(struct maple_node * node,enum maple_type mt,unsigned char end)1975  static inline void mas_leaf_set_meta(struct maple_node *node,
1976  		enum maple_type mt, unsigned char end)
1977  {
1978  	if (end < mt_slots[mt] - 1)
1979  		ma_set_meta(node, mt, 0, end);
1980  }
1981  
1982  /*
1983   * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node.
1984   * @b_node: the maple_big_node that has the data
1985   * @mab_start: the start location in @b_node.
1986   * @mab_end: The end location in @b_node (inclusively)
1987   * @mas: The maple state with the maple encoded node.
1988   */
mab_mas_cp(struct maple_big_node * b_node,unsigned char mab_start,unsigned char mab_end,struct ma_state * mas,bool new_max)1989  static inline void mab_mas_cp(struct maple_big_node *b_node,
1990  			      unsigned char mab_start, unsigned char mab_end,
1991  			      struct ma_state *mas, bool new_max)
1992  {
1993  	int i, j = 0;
1994  	enum maple_type mt = mte_node_type(mas->node);
1995  	struct maple_node *node = mte_to_node(mas->node);
1996  	void __rcu **slots = ma_slots(node, mt);
1997  	unsigned long *pivots = ma_pivots(node, mt);
1998  	unsigned long *gaps = NULL;
1999  	unsigned char end;
2000  
2001  	if (mab_end - mab_start > mt_pivots[mt])
2002  		mab_end--;
2003  
2004  	if (!pivots[mt_pivots[mt] - 1])
2005  		slots[mt_pivots[mt]] = NULL;
2006  
2007  	i = mab_start;
2008  	do {
2009  		pivots[j++] = b_node->pivot[i++];
2010  	} while (i <= mab_end && likely(b_node->pivot[i]));
2011  
2012  	memcpy(slots, b_node->slot + mab_start,
2013  	       sizeof(void *) * (i - mab_start));
2014  
2015  	if (new_max)
2016  		mas->max = b_node->pivot[i - 1];
2017  
2018  	end = j - 1;
2019  	if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) {
2020  		unsigned long max_gap = 0;
2021  		unsigned char offset = 0;
2022  
2023  		gaps = ma_gaps(node, mt);
2024  		do {
2025  			gaps[--j] = b_node->gap[--i];
2026  			if (gaps[j] > max_gap) {
2027  				offset = j;
2028  				max_gap = gaps[j];
2029  			}
2030  		} while (j);
2031  
2032  		ma_set_meta(node, mt, offset, end);
2033  	} else {
2034  		mas_leaf_set_meta(node, mt, end);
2035  	}
2036  }
2037  
2038  /*
2039   * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert.
2040   * @mas: The maple state
2041   * @end: The maple node end
2042   * @mt: The maple node type
2043   */
mas_bulk_rebalance(struct ma_state * mas,unsigned char end,enum maple_type mt)2044  static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end,
2045  				      enum maple_type mt)
2046  {
2047  	if (!(mas->mas_flags & MA_STATE_BULK))
2048  		return;
2049  
2050  	if (mte_is_root(mas->node))
2051  		return;
2052  
2053  	if (end > mt_min_slots[mt]) {
2054  		mas->mas_flags &= ~MA_STATE_REBALANCE;
2055  		return;
2056  	}
2057  }
2058  
2059  /*
2060   * mas_store_b_node() - Store an @entry into the b_node while also copying the
2061   * data from a maple encoded node.
2062   * @wr_mas: the maple write state
2063   * @b_node: the maple_big_node to fill with data
2064   * @offset_end: the offset to end copying
2065   *
2066   * Return: The actual end of the data stored in @b_node
2067   */
mas_store_b_node(struct ma_wr_state * wr_mas,struct maple_big_node * b_node,unsigned char offset_end)2068  static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas,
2069  		struct maple_big_node *b_node, unsigned char offset_end)
2070  {
2071  	unsigned char slot;
2072  	unsigned char b_end;
2073  	/* Possible underflow of piv will wrap back to 0 before use. */
2074  	unsigned long piv;
2075  	struct ma_state *mas = wr_mas->mas;
2076  
2077  	b_node->type = wr_mas->type;
2078  	b_end = 0;
2079  	slot = mas->offset;
2080  	if (slot) {
2081  		/* Copy start data up to insert. */
2082  		mas_mab_cp(mas, 0, slot - 1, b_node, 0);
2083  		b_end = b_node->b_end;
2084  		piv = b_node->pivot[b_end - 1];
2085  	} else
2086  		piv = mas->min - 1;
2087  
2088  	if (piv + 1 < mas->index) {
2089  		/* Handle range starting after old range */
2090  		b_node->slot[b_end] = wr_mas->content;
2091  		if (!wr_mas->content)
2092  			b_node->gap[b_end] = mas->index - 1 - piv;
2093  		b_node->pivot[b_end++] = mas->index - 1;
2094  	}
2095  
2096  	/* Store the new entry. */
2097  	mas->offset = b_end;
2098  	b_node->slot[b_end] = wr_mas->entry;
2099  	b_node->pivot[b_end] = mas->last;
2100  
2101  	/* Appended. */
2102  	if (mas->last >= mas->max)
2103  		goto b_end;
2104  
2105  	/* Handle new range ending before old range ends */
2106  	piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type);
2107  	if (piv > mas->last) {
2108  		if (piv == ULONG_MAX)
2109  			mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type);
2110  
2111  		if (offset_end != slot)
2112  			wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
2113  							  offset_end);
2114  
2115  		b_node->slot[++b_end] = wr_mas->content;
2116  		if (!wr_mas->content)
2117  			b_node->gap[b_end] = piv - mas->last + 1;
2118  		b_node->pivot[b_end] = piv;
2119  	}
2120  
2121  	slot = offset_end + 1;
2122  	if (slot > mas->end)
2123  		goto b_end;
2124  
2125  	/* Copy end data to the end of the node. */
2126  	mas_mab_cp(mas, slot, mas->end + 1, b_node, ++b_end);
2127  	b_node->b_end--;
2128  	return;
2129  
2130  b_end:
2131  	b_node->b_end = b_end;
2132  }
2133  
2134  /*
2135   * mas_prev_sibling() - Find the previous node with the same parent.
2136   * @mas: the maple state
2137   *
2138   * Return: True if there is a previous sibling, false otherwise.
2139   */
mas_prev_sibling(struct ma_state * mas)2140  static inline bool mas_prev_sibling(struct ma_state *mas)
2141  {
2142  	unsigned int p_slot = mte_parent_slot(mas->node);
2143  
2144  	/* For root node, p_slot is set to 0 by mte_parent_slot(). */
2145  	if (!p_slot)
2146  		return false;
2147  
2148  	mas_ascend(mas);
2149  	mas->offset = p_slot - 1;
2150  	mas_descend(mas);
2151  	return true;
2152  }
2153  
2154  /*
2155   * mas_next_sibling() - Find the next node with the same parent.
2156   * @mas: the maple state
2157   *
2158   * Return: true if there is a next sibling, false otherwise.
2159   */
mas_next_sibling(struct ma_state * mas)2160  static inline bool mas_next_sibling(struct ma_state *mas)
2161  {
2162  	MA_STATE(parent, mas->tree, mas->index, mas->last);
2163  
2164  	if (mte_is_root(mas->node))
2165  		return false;
2166  
2167  	parent = *mas;
2168  	mas_ascend(&parent);
2169  	parent.offset = mte_parent_slot(mas->node) + 1;
2170  	if (parent.offset > mas_data_end(&parent))
2171  		return false;
2172  
2173  	*mas = parent;
2174  	mas_descend(mas);
2175  	return true;
2176  }
2177  
2178  /*
2179   * mas_node_or_none() - Set the enode and state.
2180   * @mas: the maple state
2181   * @enode: The encoded maple node.
2182   *
2183   * Set the node to the enode and the status.
2184   */
mas_node_or_none(struct ma_state * mas,struct maple_enode * enode)2185  static inline void mas_node_or_none(struct ma_state *mas,
2186  		struct maple_enode *enode)
2187  {
2188  	if (enode) {
2189  		mas->node = enode;
2190  		mas->status = ma_active;
2191  	} else {
2192  		mas->node = NULL;
2193  		mas->status = ma_none;
2194  	}
2195  }
2196  
2197  /*
2198   * mas_wr_node_walk() - Find the correct offset for the index in the @mas.
2199   *                      If @mas->index cannot be found within the containing
2200   *                      node, we traverse to the last entry in the node.
2201   * @wr_mas: The maple write state
2202   *
2203   * Uses mas_slot_locked() and does not need to worry about dead nodes.
2204   */
mas_wr_node_walk(struct ma_wr_state * wr_mas)2205  static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas)
2206  {
2207  	struct ma_state *mas = wr_mas->mas;
2208  	unsigned char count, offset;
2209  
2210  	if (unlikely(ma_is_dense(wr_mas->type))) {
2211  		wr_mas->r_max = wr_mas->r_min = mas->index;
2212  		mas->offset = mas->index = mas->min;
2213  		return;
2214  	}
2215  
2216  	wr_mas->node = mas_mn(wr_mas->mas);
2217  	wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type);
2218  	count = mas->end = ma_data_end(wr_mas->node, wr_mas->type,
2219  				       wr_mas->pivots, mas->max);
2220  	offset = mas->offset;
2221  
2222  	while (offset < count && mas->index > wr_mas->pivots[offset])
2223  		offset++;
2224  
2225  	wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max;
2226  	wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset);
2227  	wr_mas->offset_end = mas->offset = offset;
2228  }
2229  
2230  /*
2231   * mast_rebalance_next() - Rebalance against the next node
2232   * @mast: The maple subtree state
2233   */
mast_rebalance_next(struct maple_subtree_state * mast)2234  static inline void mast_rebalance_next(struct maple_subtree_state *mast)
2235  {
2236  	unsigned char b_end = mast->bn->b_end;
2237  
2238  	mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node),
2239  		   mast->bn, b_end);
2240  	mast->orig_r->last = mast->orig_r->max;
2241  }
2242  
2243  /*
2244   * mast_rebalance_prev() - Rebalance against the previous node
2245   * @mast: The maple subtree state
2246   */
mast_rebalance_prev(struct maple_subtree_state * mast)2247  static inline void mast_rebalance_prev(struct maple_subtree_state *mast)
2248  {
2249  	unsigned char end = mas_data_end(mast->orig_l) + 1;
2250  	unsigned char b_end = mast->bn->b_end;
2251  
2252  	mab_shift_right(mast->bn, end);
2253  	mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0);
2254  	mast->l->min = mast->orig_l->min;
2255  	mast->orig_l->index = mast->orig_l->min;
2256  	mast->bn->b_end = end + b_end;
2257  	mast->l->offset += end;
2258  }
2259  
2260  /*
2261   * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring
2262   * the node to the right.  Checking the nodes to the right then the left at each
2263   * level upwards until root is reached.
2264   * Data is copied into the @mast->bn.
2265   * @mast: The maple_subtree_state.
2266   */
2267  static inline
mast_spanning_rebalance(struct maple_subtree_state * mast)2268  bool mast_spanning_rebalance(struct maple_subtree_state *mast)
2269  {
2270  	struct ma_state r_tmp = *mast->orig_r;
2271  	struct ma_state l_tmp = *mast->orig_l;
2272  	unsigned char depth = 0;
2273  
2274  	do {
2275  		mas_ascend(mast->orig_r);
2276  		mas_ascend(mast->orig_l);
2277  		depth++;
2278  		if (mast->orig_r->offset < mas_data_end(mast->orig_r)) {
2279  			mast->orig_r->offset++;
2280  			do {
2281  				mas_descend(mast->orig_r);
2282  				mast->orig_r->offset = 0;
2283  			} while (--depth);
2284  
2285  			mast_rebalance_next(mast);
2286  			*mast->orig_l = l_tmp;
2287  			return true;
2288  		} else if (mast->orig_l->offset != 0) {
2289  			mast->orig_l->offset--;
2290  			do {
2291  				mas_descend(mast->orig_l);
2292  				mast->orig_l->offset =
2293  					mas_data_end(mast->orig_l);
2294  			} while (--depth);
2295  
2296  			mast_rebalance_prev(mast);
2297  			*mast->orig_r = r_tmp;
2298  			return true;
2299  		}
2300  	} while (!mte_is_root(mast->orig_r->node));
2301  
2302  	*mast->orig_r = r_tmp;
2303  	*mast->orig_l = l_tmp;
2304  	return false;
2305  }
2306  
2307  /*
2308   * mast_ascend() - Ascend the original left and right maple states.
2309   * @mast: the maple subtree state.
2310   *
2311   * Ascend the original left and right sides.  Set the offsets to point to the
2312   * data already in the new tree (@mast->l and @mast->r).
2313   */
mast_ascend(struct maple_subtree_state * mast)2314  static inline void mast_ascend(struct maple_subtree_state *mast)
2315  {
2316  	MA_WR_STATE(wr_mas, mast->orig_r,  NULL);
2317  	mas_ascend(mast->orig_l);
2318  	mas_ascend(mast->orig_r);
2319  
2320  	mast->orig_r->offset = 0;
2321  	mast->orig_r->index = mast->r->max;
2322  	/* last should be larger than or equal to index */
2323  	if (mast->orig_r->last < mast->orig_r->index)
2324  		mast->orig_r->last = mast->orig_r->index;
2325  
2326  	wr_mas.type = mte_node_type(mast->orig_r->node);
2327  	mas_wr_node_walk(&wr_mas);
2328  	/* Set up the left side of things */
2329  	mast->orig_l->offset = 0;
2330  	mast->orig_l->index = mast->l->min;
2331  	wr_mas.mas = mast->orig_l;
2332  	wr_mas.type = mte_node_type(mast->orig_l->node);
2333  	mas_wr_node_walk(&wr_mas);
2334  
2335  	mast->bn->type = wr_mas.type;
2336  }
2337  
2338  /*
2339   * mas_new_ma_node() - Create and return a new maple node.  Helper function.
2340   * @mas: the maple state with the allocations.
2341   * @b_node: the maple_big_node with the type encoding.
2342   *
2343   * Use the node type from the maple_big_node to allocate a new node from the
2344   * ma_state.  This function exists mainly for code readability.
2345   *
2346   * Return: A new maple encoded node
2347   */
2348  static inline struct maple_enode
mas_new_ma_node(struct ma_state * mas,struct maple_big_node * b_node)2349  *mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node)
2350  {
2351  	return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type);
2352  }
2353  
2354  /*
2355   * mas_mab_to_node() - Set up right and middle nodes
2356   *
2357   * @mas: the maple state that contains the allocations.
2358   * @b_node: the node which contains the data.
2359   * @left: The pointer which will have the left node
2360   * @right: The pointer which may have the right node
2361   * @middle: the pointer which may have the middle node (rare)
2362   * @mid_split: the split location for the middle node
2363   *
2364   * Return: the split of left.
2365   */
mas_mab_to_node(struct ma_state * mas,struct maple_big_node * b_node,struct maple_enode ** left,struct maple_enode ** right,struct maple_enode ** middle,unsigned char * mid_split)2366  static inline unsigned char mas_mab_to_node(struct ma_state *mas,
2367  	struct maple_big_node *b_node, struct maple_enode **left,
2368  	struct maple_enode **right, struct maple_enode **middle,
2369  	unsigned char *mid_split)
2370  {
2371  	unsigned char split = 0;
2372  	unsigned char slot_count = mt_slots[b_node->type];
2373  
2374  	*left = mas_new_ma_node(mas, b_node);
2375  	*right = NULL;
2376  	*middle = NULL;
2377  	*mid_split = 0;
2378  
2379  	if (b_node->b_end < slot_count) {
2380  		split = b_node->b_end;
2381  	} else {
2382  		split = mab_calc_split(mas, b_node, mid_split);
2383  		*right = mas_new_ma_node(mas, b_node);
2384  	}
2385  
2386  	if (*mid_split)
2387  		*middle = mas_new_ma_node(mas, b_node);
2388  
2389  	return split;
2390  
2391  }
2392  
2393  /*
2394   * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end
2395   * pointer.
2396   * @b_node: the big node to add the entry
2397   * @mas: the maple state to get the pivot (mas->max)
2398   * @entry: the entry to add, if NULL nothing happens.
2399   */
mab_set_b_end(struct maple_big_node * b_node,struct ma_state * mas,void * entry)2400  static inline void mab_set_b_end(struct maple_big_node *b_node,
2401  				 struct ma_state *mas,
2402  				 void *entry)
2403  {
2404  	if (!entry)
2405  		return;
2406  
2407  	b_node->slot[b_node->b_end] = entry;
2408  	if (mt_is_alloc(mas->tree))
2409  		b_node->gap[b_node->b_end] = mas_max_gap(mas);
2410  	b_node->pivot[b_node->b_end++] = mas->max;
2411  }
2412  
2413  /*
2414   * mas_set_split_parent() - combine_then_separate helper function.  Sets the parent
2415   * of @mas->node to either @left or @right, depending on @slot and @split
2416   *
2417   * @mas: the maple state with the node that needs a parent
2418   * @left: possible parent 1
2419   * @right: possible parent 2
2420   * @slot: the slot the mas->node was placed
2421   * @split: the split location between @left and @right
2422   */
mas_set_split_parent(struct ma_state * mas,struct maple_enode * left,struct maple_enode * right,unsigned char * slot,unsigned char split)2423  static inline void mas_set_split_parent(struct ma_state *mas,
2424  					struct maple_enode *left,
2425  					struct maple_enode *right,
2426  					unsigned char *slot, unsigned char split)
2427  {
2428  	if (mas_is_none(mas))
2429  		return;
2430  
2431  	if ((*slot) <= split)
2432  		mas_set_parent(mas, mas->node, left, *slot);
2433  	else if (right)
2434  		mas_set_parent(mas, mas->node, right, (*slot) - split - 1);
2435  
2436  	(*slot)++;
2437  }
2438  
2439  /*
2440   * mte_mid_split_check() - Check if the next node passes the mid-split
2441   * @l: Pointer to left encoded maple node.
2442   * @m: Pointer to middle encoded maple node.
2443   * @r: Pointer to right encoded maple node.
2444   * @slot: The offset
2445   * @split: The split location.
2446   * @mid_split: The middle split.
2447   */
mte_mid_split_check(struct maple_enode ** l,struct maple_enode ** r,struct maple_enode * right,unsigned char slot,unsigned char * split,unsigned char mid_split)2448  static inline void mte_mid_split_check(struct maple_enode **l,
2449  				       struct maple_enode **r,
2450  				       struct maple_enode *right,
2451  				       unsigned char slot,
2452  				       unsigned char *split,
2453  				       unsigned char mid_split)
2454  {
2455  	if (*r == right)
2456  		return;
2457  
2458  	if (slot < mid_split)
2459  		return;
2460  
2461  	*l = *r;
2462  	*r = right;
2463  	*split = mid_split;
2464  }
2465  
2466  /*
2467   * mast_set_split_parents() - Helper function to set three nodes parents.  Slot
2468   * is taken from @mast->l.
2469   * @mast: the maple subtree state
2470   * @left: the left node
2471   * @right: the right node
2472   * @split: the split location.
2473   */
mast_set_split_parents(struct maple_subtree_state * mast,struct maple_enode * left,struct maple_enode * middle,struct maple_enode * right,unsigned char split,unsigned char mid_split)2474  static inline void mast_set_split_parents(struct maple_subtree_state *mast,
2475  					  struct maple_enode *left,
2476  					  struct maple_enode *middle,
2477  					  struct maple_enode *right,
2478  					  unsigned char split,
2479  					  unsigned char mid_split)
2480  {
2481  	unsigned char slot;
2482  	struct maple_enode *l = left;
2483  	struct maple_enode *r = right;
2484  
2485  	if (mas_is_none(mast->l))
2486  		return;
2487  
2488  	if (middle)
2489  		r = middle;
2490  
2491  	slot = mast->l->offset;
2492  
2493  	mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
2494  	mas_set_split_parent(mast->l, l, r, &slot, split);
2495  
2496  	mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
2497  	mas_set_split_parent(mast->m, l, r, &slot, split);
2498  
2499  	mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
2500  	mas_set_split_parent(mast->r, l, r, &slot, split);
2501  }
2502  
2503  /*
2504   * mas_topiary_node() - Dispose of a single node
2505   * @mas: The maple state for pushing nodes
2506   * @in_rcu: If the tree is in rcu mode
2507   *
2508   * The node will either be RCU freed or pushed back on the maple state.
2509   */
mas_topiary_node(struct ma_state * mas,struct ma_state * tmp_mas,bool in_rcu)2510  static inline void mas_topiary_node(struct ma_state *mas,
2511  		struct ma_state *tmp_mas, bool in_rcu)
2512  {
2513  	struct maple_node *tmp;
2514  	struct maple_enode *enode;
2515  
2516  	if (mas_is_none(tmp_mas))
2517  		return;
2518  
2519  	enode = tmp_mas->node;
2520  	tmp = mte_to_node(enode);
2521  	mte_set_node_dead(enode);
2522  	if (in_rcu)
2523  		ma_free_rcu(tmp);
2524  	else
2525  		mas_push_node(mas, tmp);
2526  }
2527  
2528  /*
2529   * mas_topiary_replace() - Replace the data with new data, then repair the
2530   * parent links within the new tree.  Iterate over the dead sub-tree and collect
2531   * the dead subtrees and topiary the nodes that are no longer of use.
2532   *
2533   * The new tree will have up to three children with the correct parent.  Keep
2534   * track of the new entries as they need to be followed to find the next level
2535   * of new entries.
2536   *
2537   * The old tree will have up to three children with the old parent.  Keep track
2538   * of the old entries as they may have more nodes below replaced.  Nodes within
2539   * [index, last] are dead subtrees, others need to be freed and followed.
2540   *
2541   * @mas: The maple state pointing at the new data
2542   * @old_enode: The maple encoded node being replaced
2543   *
2544   */
mas_topiary_replace(struct ma_state * mas,struct maple_enode * old_enode)2545  static inline void mas_topiary_replace(struct ma_state *mas,
2546  		struct maple_enode *old_enode)
2547  {
2548  	struct ma_state tmp[3], tmp_next[3];
2549  	MA_TOPIARY(subtrees, mas->tree);
2550  	bool in_rcu;
2551  	int i, n;
2552  
2553  	/* Place data in tree & then mark node as old */
2554  	mas_put_in_tree(mas, old_enode);
2555  
2556  	/* Update the parent pointers in the tree */
2557  	tmp[0] = *mas;
2558  	tmp[0].offset = 0;
2559  	tmp[1].status = ma_none;
2560  	tmp[2].status = ma_none;
2561  	while (!mte_is_leaf(tmp[0].node)) {
2562  		n = 0;
2563  		for (i = 0; i < 3; i++) {
2564  			if (mas_is_none(&tmp[i]))
2565  				continue;
2566  
2567  			while (n < 3) {
2568  				if (!mas_find_child(&tmp[i], &tmp_next[n]))
2569  					break;
2570  				n++;
2571  			}
2572  
2573  			mas_adopt_children(&tmp[i], tmp[i].node);
2574  		}
2575  
2576  		if (MAS_WARN_ON(mas, n == 0))
2577  			break;
2578  
2579  		while (n < 3)
2580  			tmp_next[n++].status = ma_none;
2581  
2582  		for (i = 0; i < 3; i++)
2583  			tmp[i] = tmp_next[i];
2584  	}
2585  
2586  	/* Collect the old nodes that need to be discarded */
2587  	if (mte_is_leaf(old_enode))
2588  		return mas_free(mas, old_enode);
2589  
2590  	tmp[0] = *mas;
2591  	tmp[0].offset = 0;
2592  	tmp[0].node = old_enode;
2593  	tmp[1].status = ma_none;
2594  	tmp[2].status = ma_none;
2595  	in_rcu = mt_in_rcu(mas->tree);
2596  	do {
2597  		n = 0;
2598  		for (i = 0; i < 3; i++) {
2599  			if (mas_is_none(&tmp[i]))
2600  				continue;
2601  
2602  			while (n < 3) {
2603  				if (!mas_find_child(&tmp[i], &tmp_next[n]))
2604  					break;
2605  
2606  				if ((tmp_next[n].min >= tmp_next->index) &&
2607  				    (tmp_next[n].max <= tmp_next->last)) {
2608  					mat_add(&subtrees, tmp_next[n].node);
2609  					tmp_next[n].status = ma_none;
2610  				} else {
2611  					n++;
2612  				}
2613  			}
2614  		}
2615  
2616  		if (MAS_WARN_ON(mas, n == 0))
2617  			break;
2618  
2619  		while (n < 3)
2620  			tmp_next[n++].status = ma_none;
2621  
2622  		for (i = 0; i < 3; i++) {
2623  			mas_topiary_node(mas, &tmp[i], in_rcu);
2624  			tmp[i] = tmp_next[i];
2625  		}
2626  	} while (!mte_is_leaf(tmp[0].node));
2627  
2628  	for (i = 0; i < 3; i++)
2629  		mas_topiary_node(mas, &tmp[i], in_rcu);
2630  
2631  	mas_mat_destroy(mas, &subtrees);
2632  }
2633  
2634  /*
2635   * mas_wmb_replace() - Write memory barrier and replace
2636   * @mas: The maple state
2637   * @old_enode: The old maple encoded node that is being replaced.
2638   *
2639   * Updates gap as necessary.
2640   */
mas_wmb_replace(struct ma_state * mas,struct maple_enode * old_enode)2641  static inline void mas_wmb_replace(struct ma_state *mas,
2642  		struct maple_enode *old_enode)
2643  {
2644  	/* Insert the new data in the tree */
2645  	mas_topiary_replace(mas, old_enode);
2646  
2647  	if (mte_is_leaf(mas->node))
2648  		return;
2649  
2650  	mas_update_gap(mas);
2651  }
2652  
2653  /*
2654   * mast_cp_to_nodes() - Copy data out to nodes.
2655   * @mast: The maple subtree state
2656   * @left: The left encoded maple node
2657   * @middle: The middle encoded maple node
2658   * @right: The right encoded maple node
2659   * @split: The location to split between left and (middle ? middle : right)
2660   * @mid_split: The location to split between middle and right.
2661   */
mast_cp_to_nodes(struct maple_subtree_state * mast,struct maple_enode * left,struct maple_enode * middle,struct maple_enode * right,unsigned char split,unsigned char mid_split)2662  static inline void mast_cp_to_nodes(struct maple_subtree_state *mast,
2663  	struct maple_enode *left, struct maple_enode *middle,
2664  	struct maple_enode *right, unsigned char split, unsigned char mid_split)
2665  {
2666  	bool new_lmax = true;
2667  
2668  	mas_node_or_none(mast->l, left);
2669  	mas_node_or_none(mast->m, middle);
2670  	mas_node_or_none(mast->r, right);
2671  
2672  	mast->l->min = mast->orig_l->min;
2673  	if (split == mast->bn->b_end) {
2674  		mast->l->max = mast->orig_r->max;
2675  		new_lmax = false;
2676  	}
2677  
2678  	mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax);
2679  
2680  	if (middle) {
2681  		mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true);
2682  		mast->m->min = mast->bn->pivot[split] + 1;
2683  		split = mid_split;
2684  	}
2685  
2686  	mast->r->max = mast->orig_r->max;
2687  	if (right) {
2688  		mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false);
2689  		mast->r->min = mast->bn->pivot[split] + 1;
2690  	}
2691  }
2692  
2693  /*
2694   * mast_combine_cp_left - Copy in the original left side of the tree into the
2695   * combined data set in the maple subtree state big node.
2696   * @mast: The maple subtree state
2697   */
mast_combine_cp_left(struct maple_subtree_state * mast)2698  static inline void mast_combine_cp_left(struct maple_subtree_state *mast)
2699  {
2700  	unsigned char l_slot = mast->orig_l->offset;
2701  
2702  	if (!l_slot)
2703  		return;
2704  
2705  	mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0);
2706  }
2707  
2708  /*
2709   * mast_combine_cp_right: Copy in the original right side of the tree into the
2710   * combined data set in the maple subtree state big node.
2711   * @mast: The maple subtree state
2712   */
mast_combine_cp_right(struct maple_subtree_state * mast)2713  static inline void mast_combine_cp_right(struct maple_subtree_state *mast)
2714  {
2715  	if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max)
2716  		return;
2717  
2718  	mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1,
2719  		   mt_slot_count(mast->orig_r->node), mast->bn,
2720  		   mast->bn->b_end);
2721  	mast->orig_r->last = mast->orig_r->max;
2722  }
2723  
2724  /*
2725   * mast_sufficient: Check if the maple subtree state has enough data in the big
2726   * node to create at least one sufficient node
2727   * @mast: the maple subtree state
2728   */
mast_sufficient(struct maple_subtree_state * mast)2729  static inline bool mast_sufficient(struct maple_subtree_state *mast)
2730  {
2731  	if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node))
2732  		return true;
2733  
2734  	return false;
2735  }
2736  
2737  /*
2738   * mast_overflow: Check if there is too much data in the subtree state for a
2739   * single node.
2740   * @mast: The maple subtree state
2741   */
mast_overflow(struct maple_subtree_state * mast)2742  static inline bool mast_overflow(struct maple_subtree_state *mast)
2743  {
2744  	if (mast->bn->b_end >= mt_slot_count(mast->orig_l->node))
2745  		return true;
2746  
2747  	return false;
2748  }
2749  
mtree_range_walk(struct ma_state * mas)2750  static inline void *mtree_range_walk(struct ma_state *mas)
2751  {
2752  	unsigned long *pivots;
2753  	unsigned char offset;
2754  	struct maple_node *node;
2755  	struct maple_enode *next, *last;
2756  	enum maple_type type;
2757  	void __rcu **slots;
2758  	unsigned char end;
2759  	unsigned long max, min;
2760  	unsigned long prev_max, prev_min;
2761  
2762  	next = mas->node;
2763  	min = mas->min;
2764  	max = mas->max;
2765  	do {
2766  		last = next;
2767  		node = mte_to_node(next);
2768  		type = mte_node_type(next);
2769  		pivots = ma_pivots(node, type);
2770  		end = ma_data_end(node, type, pivots, max);
2771  		prev_min = min;
2772  		prev_max = max;
2773  		if (pivots[0] >= mas->index) {
2774  			offset = 0;
2775  			max = pivots[0];
2776  			goto next;
2777  		}
2778  
2779  		offset = 1;
2780  		while (offset < end) {
2781  			if (pivots[offset] >= mas->index) {
2782  				max = pivots[offset];
2783  				break;
2784  			}
2785  			offset++;
2786  		}
2787  
2788  		min = pivots[offset - 1] + 1;
2789  next:
2790  		slots = ma_slots(node, type);
2791  		next = mt_slot(mas->tree, slots, offset);
2792  		if (unlikely(ma_dead_node(node)))
2793  			goto dead_node;
2794  	} while (!ma_is_leaf(type));
2795  
2796  	mas->end = end;
2797  	mas->offset = offset;
2798  	mas->index = min;
2799  	mas->last = max;
2800  	mas->min = prev_min;
2801  	mas->max = prev_max;
2802  	mas->node = last;
2803  	return (void *)next;
2804  
2805  dead_node:
2806  	mas_reset(mas);
2807  	return NULL;
2808  }
2809  
2810  /*
2811   * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers.
2812   * @mas: The starting maple state
2813   * @mast: The maple_subtree_state, keeps track of 4 maple states.
2814   * @count: The estimated count of iterations needed.
2815   *
2816   * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root
2817   * is hit.  First @b_node is split into two entries which are inserted into the
2818   * next iteration of the loop.  @b_node is returned populated with the final
2819   * iteration. @mas is used to obtain allocations.  orig_l_mas keeps track of the
2820   * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last
2821   * to account of what has been copied into the new sub-tree.  The update of
2822   * orig_l_mas->last is used in mas_consume to find the slots that will need to
2823   * be either freed or destroyed.  orig_l_mas->depth keeps track of the height of
2824   * the new sub-tree in case the sub-tree becomes the full tree.
2825   */
mas_spanning_rebalance(struct ma_state * mas,struct maple_subtree_state * mast,unsigned char count)2826  static void mas_spanning_rebalance(struct ma_state *mas,
2827  		struct maple_subtree_state *mast, unsigned char count)
2828  {
2829  	unsigned char split, mid_split;
2830  	unsigned char slot = 0;
2831  	struct maple_enode *left = NULL, *middle = NULL, *right = NULL;
2832  	struct maple_enode *old_enode;
2833  
2834  	MA_STATE(l_mas, mas->tree, mas->index, mas->index);
2835  	MA_STATE(r_mas, mas->tree, mas->index, mas->last);
2836  	MA_STATE(m_mas, mas->tree, mas->index, mas->index);
2837  
2838  	/*
2839  	 * The tree needs to be rebalanced and leaves need to be kept at the same level.
2840  	 * Rebalancing is done by use of the ``struct maple_topiary``.
2841  	 */
2842  	mast->l = &l_mas;
2843  	mast->m = &m_mas;
2844  	mast->r = &r_mas;
2845  	l_mas.status = r_mas.status = m_mas.status = ma_none;
2846  
2847  	/* Check if this is not root and has sufficient data.  */
2848  	if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) &&
2849  	    unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type]))
2850  		mast_spanning_rebalance(mast);
2851  
2852  	l_mas.depth = 0;
2853  
2854  	/*
2855  	 * Each level of the tree is examined and balanced, pushing data to the left or
2856  	 * right, or rebalancing against left or right nodes is employed to avoid
2857  	 * rippling up the tree to limit the amount of churn.  Once a new sub-section of
2858  	 * the tree is created, there may be a mix of new and old nodes.  The old nodes
2859  	 * will have the incorrect parent pointers and currently be in two trees: the
2860  	 * original tree and the partially new tree.  To remedy the parent pointers in
2861  	 * the old tree, the new data is swapped into the active tree and a walk down
2862  	 * the tree is performed and the parent pointers are updated.
2863  	 * See mas_topiary_replace() for more information.
2864  	 */
2865  	while (count--) {
2866  		mast->bn->b_end--;
2867  		mast->bn->type = mte_node_type(mast->orig_l->node);
2868  		split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle,
2869  					&mid_split);
2870  		mast_set_split_parents(mast, left, middle, right, split,
2871  				       mid_split);
2872  		mast_cp_to_nodes(mast, left, middle, right, split, mid_split);
2873  
2874  		/*
2875  		 * Copy data from next level in the tree to mast->bn from next
2876  		 * iteration
2877  		 */
2878  		memset(mast->bn, 0, sizeof(struct maple_big_node));
2879  		mast->bn->type = mte_node_type(left);
2880  		l_mas.depth++;
2881  
2882  		/* Root already stored in l->node. */
2883  		if (mas_is_root_limits(mast->l))
2884  			goto new_root;
2885  
2886  		mast_ascend(mast);
2887  		mast_combine_cp_left(mast);
2888  		l_mas.offset = mast->bn->b_end;
2889  		mab_set_b_end(mast->bn, &l_mas, left);
2890  		mab_set_b_end(mast->bn, &m_mas, middle);
2891  		mab_set_b_end(mast->bn, &r_mas, right);
2892  
2893  		/* Copy anything necessary out of the right node. */
2894  		mast_combine_cp_right(mast);
2895  		mast->orig_l->last = mast->orig_l->max;
2896  
2897  		if (mast_sufficient(mast))
2898  			continue;
2899  
2900  		if (mast_overflow(mast))
2901  			continue;
2902  
2903  		/* May be a new root stored in mast->bn */
2904  		if (mas_is_root_limits(mast->orig_l))
2905  			break;
2906  
2907  		mast_spanning_rebalance(mast);
2908  
2909  		/* rebalancing from other nodes may require another loop. */
2910  		if (!count)
2911  			count++;
2912  	}
2913  
2914  	l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)),
2915  				mte_node_type(mast->orig_l->node));
2916  	l_mas.depth++;
2917  	mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true);
2918  	mas_set_parent(mas, left, l_mas.node, slot);
2919  	if (middle)
2920  		mas_set_parent(mas, middle, l_mas.node, ++slot);
2921  
2922  	if (right)
2923  		mas_set_parent(mas, right, l_mas.node, ++slot);
2924  
2925  	if (mas_is_root_limits(mast->l)) {
2926  new_root:
2927  		mas_mn(mast->l)->parent = ma_parent_ptr(mas_tree_parent(mas));
2928  		while (!mte_is_root(mast->orig_l->node))
2929  			mast_ascend(mast);
2930  	} else {
2931  		mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent;
2932  	}
2933  
2934  	old_enode = mast->orig_l->node;
2935  	mas->depth = l_mas.depth;
2936  	mas->node = l_mas.node;
2937  	mas->min = l_mas.min;
2938  	mas->max = l_mas.max;
2939  	mas->offset = l_mas.offset;
2940  	mas_wmb_replace(mas, old_enode);
2941  	mtree_range_walk(mas);
2942  	return;
2943  }
2944  
2945  /*
2946   * mas_rebalance() - Rebalance a given node.
2947   * @mas: The maple state
2948   * @b_node: The big maple node.
2949   *
2950   * Rebalance two nodes into a single node or two new nodes that are sufficient.
2951   * Continue upwards until tree is sufficient.
2952   */
mas_rebalance(struct ma_state * mas,struct maple_big_node * b_node)2953  static inline void mas_rebalance(struct ma_state *mas,
2954  				struct maple_big_node *b_node)
2955  {
2956  	char empty_count = mas_mt_height(mas);
2957  	struct maple_subtree_state mast;
2958  	unsigned char shift, b_end = ++b_node->b_end;
2959  
2960  	MA_STATE(l_mas, mas->tree, mas->index, mas->last);
2961  	MA_STATE(r_mas, mas->tree, mas->index, mas->last);
2962  
2963  	trace_ma_op(__func__, mas);
2964  
2965  	/*
2966  	 * Rebalancing occurs if a node is insufficient.  Data is rebalanced
2967  	 * against the node to the right if it exists, otherwise the node to the
2968  	 * left of this node is rebalanced against this node.  If rebalancing
2969  	 * causes just one node to be produced instead of two, then the parent
2970  	 * is also examined and rebalanced if it is insufficient.  Every level
2971  	 * tries to combine the data in the same way.  If one node contains the
2972  	 * entire range of the tree, then that node is used as a new root node.
2973  	 */
2974  
2975  	mast.orig_l = &l_mas;
2976  	mast.orig_r = &r_mas;
2977  	mast.bn = b_node;
2978  	mast.bn->type = mte_node_type(mas->node);
2979  
2980  	l_mas = r_mas = *mas;
2981  
2982  	if (mas_next_sibling(&r_mas)) {
2983  		mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end);
2984  		r_mas.last = r_mas.index = r_mas.max;
2985  	} else {
2986  		mas_prev_sibling(&l_mas);
2987  		shift = mas_data_end(&l_mas) + 1;
2988  		mab_shift_right(b_node, shift);
2989  		mas->offset += shift;
2990  		mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0);
2991  		b_node->b_end = shift + b_end;
2992  		l_mas.index = l_mas.last = l_mas.min;
2993  	}
2994  
2995  	return mas_spanning_rebalance(mas, &mast, empty_count);
2996  }
2997  
2998  /*
2999   * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple
3000   * state.
3001   * @mas: The maple state
3002   * @end: The end of the left-most node.
3003   *
3004   * During a mass-insert event (such as forking), it may be necessary to
3005   * rebalance the left-most node when it is not sufficient.
3006   */
mas_destroy_rebalance(struct ma_state * mas,unsigned char end)3007  static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end)
3008  {
3009  	enum maple_type mt = mte_node_type(mas->node);
3010  	struct maple_node reuse, *newnode, *parent, *new_left, *left, *node;
3011  	struct maple_enode *eparent, *old_eparent;
3012  	unsigned char offset, tmp, split = mt_slots[mt] / 2;
3013  	void __rcu **l_slots, **slots;
3014  	unsigned long *l_pivs, *pivs, gap;
3015  	bool in_rcu = mt_in_rcu(mas->tree);
3016  
3017  	MA_STATE(l_mas, mas->tree, mas->index, mas->last);
3018  
3019  	l_mas = *mas;
3020  	mas_prev_sibling(&l_mas);
3021  
3022  	/* set up node. */
3023  	if (in_rcu) {
3024  		newnode = mas_pop_node(mas);
3025  	} else {
3026  		newnode = &reuse;
3027  	}
3028  
3029  	node = mas_mn(mas);
3030  	newnode->parent = node->parent;
3031  	slots = ma_slots(newnode, mt);
3032  	pivs = ma_pivots(newnode, mt);
3033  	left = mas_mn(&l_mas);
3034  	l_slots = ma_slots(left, mt);
3035  	l_pivs = ma_pivots(left, mt);
3036  	if (!l_slots[split])
3037  		split++;
3038  	tmp = mas_data_end(&l_mas) - split;
3039  
3040  	memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp);
3041  	memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp);
3042  	pivs[tmp] = l_mas.max;
3043  	memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end);
3044  	memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end);
3045  
3046  	l_mas.max = l_pivs[split];
3047  	mas->min = l_mas.max + 1;
3048  	old_eparent = mt_mk_node(mte_parent(l_mas.node),
3049  			     mas_parent_type(&l_mas, l_mas.node));
3050  	tmp += end;
3051  	if (!in_rcu) {
3052  		unsigned char max_p = mt_pivots[mt];
3053  		unsigned char max_s = mt_slots[mt];
3054  
3055  		if (tmp < max_p)
3056  			memset(pivs + tmp, 0,
3057  			       sizeof(unsigned long) * (max_p - tmp));
3058  
3059  		if (tmp < mt_slots[mt])
3060  			memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp));
3061  
3062  		memcpy(node, newnode, sizeof(struct maple_node));
3063  		ma_set_meta(node, mt, 0, tmp - 1);
3064  		mte_set_pivot(old_eparent, mte_parent_slot(l_mas.node),
3065  			      l_pivs[split]);
3066  
3067  		/* Remove data from l_pivs. */
3068  		tmp = split + 1;
3069  		memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp));
3070  		memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp));
3071  		ma_set_meta(left, mt, 0, split);
3072  		eparent = old_eparent;
3073  
3074  		goto done;
3075  	}
3076  
3077  	/* RCU requires replacing both l_mas, mas, and parent. */
3078  	mas->node = mt_mk_node(newnode, mt);
3079  	ma_set_meta(newnode, mt, 0, tmp);
3080  
3081  	new_left = mas_pop_node(mas);
3082  	new_left->parent = left->parent;
3083  	mt = mte_node_type(l_mas.node);
3084  	slots = ma_slots(new_left, mt);
3085  	pivs = ma_pivots(new_left, mt);
3086  	memcpy(slots, l_slots, sizeof(void *) * split);
3087  	memcpy(pivs, l_pivs, sizeof(unsigned long) * split);
3088  	ma_set_meta(new_left, mt, 0, split);
3089  	l_mas.node = mt_mk_node(new_left, mt);
3090  
3091  	/* replace parent. */
3092  	offset = mte_parent_slot(mas->node);
3093  	mt = mas_parent_type(&l_mas, l_mas.node);
3094  	parent = mas_pop_node(mas);
3095  	slots = ma_slots(parent, mt);
3096  	pivs = ma_pivots(parent, mt);
3097  	memcpy(parent, mte_to_node(old_eparent), sizeof(struct maple_node));
3098  	rcu_assign_pointer(slots[offset], mas->node);
3099  	rcu_assign_pointer(slots[offset - 1], l_mas.node);
3100  	pivs[offset - 1] = l_mas.max;
3101  	eparent = mt_mk_node(parent, mt);
3102  done:
3103  	gap = mas_leaf_max_gap(mas);
3104  	mte_set_gap(eparent, mte_parent_slot(mas->node), gap);
3105  	gap = mas_leaf_max_gap(&l_mas);
3106  	mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap);
3107  	mas_ascend(mas);
3108  
3109  	if (in_rcu) {
3110  		mas_replace_node(mas, old_eparent);
3111  		mas_adopt_children(mas, mas->node);
3112  	}
3113  
3114  	mas_update_gap(mas);
3115  }
3116  
3117  /*
3118   * mas_split_final_node() - Split the final node in a subtree operation.
3119   * @mast: the maple subtree state
3120   * @mas: The maple state
3121   * @height: The height of the tree in case it's a new root.
3122   */
mas_split_final_node(struct maple_subtree_state * mast,struct ma_state * mas,int height)3123  static inline void mas_split_final_node(struct maple_subtree_state *mast,
3124  					struct ma_state *mas, int height)
3125  {
3126  	struct maple_enode *ancestor;
3127  
3128  	if (mte_is_root(mas->node)) {
3129  		if (mt_is_alloc(mas->tree))
3130  			mast->bn->type = maple_arange_64;
3131  		else
3132  			mast->bn->type = maple_range_64;
3133  		mas->depth = height;
3134  	}
3135  	/*
3136  	 * Only a single node is used here, could be root.
3137  	 * The Big_node data should just fit in a single node.
3138  	 */
3139  	ancestor = mas_new_ma_node(mas, mast->bn);
3140  	mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset);
3141  	mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset);
3142  	mte_to_node(ancestor)->parent = mas_mn(mas)->parent;
3143  
3144  	mast->l->node = ancestor;
3145  	mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true);
3146  	mas->offset = mast->bn->b_end - 1;
3147  }
3148  
3149  /*
3150   * mast_fill_bnode() - Copy data into the big node in the subtree state
3151   * @mast: The maple subtree state
3152   * @mas: the maple state
3153   * @skip: The number of entries to skip for new nodes insertion.
3154   */
mast_fill_bnode(struct maple_subtree_state * mast,struct ma_state * mas,unsigned char skip)3155  static inline void mast_fill_bnode(struct maple_subtree_state *mast,
3156  					 struct ma_state *mas,
3157  					 unsigned char skip)
3158  {
3159  	bool cp = true;
3160  	unsigned char split;
3161  
3162  	memset(mast->bn, 0, sizeof(struct maple_big_node));
3163  
3164  	if (mte_is_root(mas->node)) {
3165  		cp = false;
3166  	} else {
3167  		mas_ascend(mas);
3168  		mas->offset = mte_parent_slot(mas->node);
3169  	}
3170  
3171  	if (cp && mast->l->offset)
3172  		mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0);
3173  
3174  	split = mast->bn->b_end;
3175  	mab_set_b_end(mast->bn, mast->l, mast->l->node);
3176  	mast->r->offset = mast->bn->b_end;
3177  	mab_set_b_end(mast->bn, mast->r, mast->r->node);
3178  	if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max)
3179  		cp = false;
3180  
3181  	if (cp)
3182  		mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1,
3183  			   mast->bn, mast->bn->b_end);
3184  
3185  	mast->bn->b_end--;
3186  	mast->bn->type = mte_node_type(mas->node);
3187  }
3188  
3189  /*
3190   * mast_split_data() - Split the data in the subtree state big node into regular
3191   * nodes.
3192   * @mast: The maple subtree state
3193   * @mas: The maple state
3194   * @split: The location to split the big node
3195   */
mast_split_data(struct maple_subtree_state * mast,struct ma_state * mas,unsigned char split)3196  static inline void mast_split_data(struct maple_subtree_state *mast,
3197  	   struct ma_state *mas, unsigned char split)
3198  {
3199  	unsigned char p_slot;
3200  
3201  	mab_mas_cp(mast->bn, 0, split, mast->l, true);
3202  	mte_set_pivot(mast->r->node, 0, mast->r->max);
3203  	mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false);
3204  	mast->l->offset = mte_parent_slot(mas->node);
3205  	mast->l->max = mast->bn->pivot[split];
3206  	mast->r->min = mast->l->max + 1;
3207  	if (mte_is_leaf(mas->node))
3208  		return;
3209  
3210  	p_slot = mast->orig_l->offset;
3211  	mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node,
3212  			     &p_slot, split);
3213  	mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node,
3214  			     &p_slot, split);
3215  }
3216  
3217  /*
3218   * mas_push_data() - Instead of splitting a node, it is beneficial to push the
3219   * data to the right or left node if there is room.
3220   * @mas: The maple state
3221   * @height: The current height of the maple state
3222   * @mast: The maple subtree state
3223   * @left: Push left or not.
3224   *
3225   * Keeping the height of the tree low means faster lookups.
3226   *
3227   * Return: True if pushed, false otherwise.
3228   */
mas_push_data(struct ma_state * mas,int height,struct maple_subtree_state * mast,bool left)3229  static inline bool mas_push_data(struct ma_state *mas, int height,
3230  				 struct maple_subtree_state *mast, bool left)
3231  {
3232  	unsigned char slot_total = mast->bn->b_end;
3233  	unsigned char end, space, split;
3234  
3235  	MA_STATE(tmp_mas, mas->tree, mas->index, mas->last);
3236  	tmp_mas = *mas;
3237  	tmp_mas.depth = mast->l->depth;
3238  
3239  	if (left && !mas_prev_sibling(&tmp_mas))
3240  		return false;
3241  	else if (!left && !mas_next_sibling(&tmp_mas))
3242  		return false;
3243  
3244  	end = mas_data_end(&tmp_mas);
3245  	slot_total += end;
3246  	space = 2 * mt_slot_count(mas->node) - 2;
3247  	/* -2 instead of -1 to ensure there isn't a triple split */
3248  	if (ma_is_leaf(mast->bn->type))
3249  		space--;
3250  
3251  	if (mas->max == ULONG_MAX)
3252  		space--;
3253  
3254  	if (slot_total >= space)
3255  		return false;
3256  
3257  	/* Get the data; Fill mast->bn */
3258  	mast->bn->b_end++;
3259  	if (left) {
3260  		mab_shift_right(mast->bn, end + 1);
3261  		mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0);
3262  		mast->bn->b_end = slot_total + 1;
3263  	} else {
3264  		mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end);
3265  	}
3266  
3267  	/* Configure mast for splitting of mast->bn */
3268  	split = mt_slots[mast->bn->type] - 2;
3269  	if (left) {
3270  		/*  Switch mas to prev node  */
3271  		*mas = tmp_mas;
3272  		/* Start using mast->l for the left side. */
3273  		tmp_mas.node = mast->l->node;
3274  		*mast->l = tmp_mas;
3275  	} else {
3276  		tmp_mas.node = mast->r->node;
3277  		*mast->r = tmp_mas;
3278  		split = slot_total - split;
3279  	}
3280  	split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]);
3281  	/* Update parent slot for split calculation. */
3282  	if (left)
3283  		mast->orig_l->offset += end + 1;
3284  
3285  	mast_split_data(mast, mas, split);
3286  	mast_fill_bnode(mast, mas, 2);
3287  	mas_split_final_node(mast, mas, height + 1);
3288  	return true;
3289  }
3290  
3291  /*
3292   * mas_split() - Split data that is too big for one node into two.
3293   * @mas: The maple state
3294   * @b_node: The maple big node
3295   */
mas_split(struct ma_state * mas,struct maple_big_node * b_node)3296  static void mas_split(struct ma_state *mas, struct maple_big_node *b_node)
3297  {
3298  	struct maple_subtree_state mast;
3299  	int height = 0;
3300  	unsigned char mid_split, split = 0;
3301  	struct maple_enode *old;
3302  
3303  	/*
3304  	 * Splitting is handled differently from any other B-tree; the Maple
3305  	 * Tree splits upwards.  Splitting up means that the split operation
3306  	 * occurs when the walk of the tree hits the leaves and not on the way
3307  	 * down.  The reason for splitting up is that it is impossible to know
3308  	 * how much space will be needed until the leaf is (or leaves are)
3309  	 * reached.  Since overwriting data is allowed and a range could
3310  	 * overwrite more than one range or result in changing one entry into 3
3311  	 * entries, it is impossible to know if a split is required until the
3312  	 * data is examined.
3313  	 *
3314  	 * Splitting is a balancing act between keeping allocations to a minimum
3315  	 * and avoiding a 'jitter' event where a tree is expanded to make room
3316  	 * for an entry followed by a contraction when the entry is removed.  To
3317  	 * accomplish the balance, there are empty slots remaining in both left
3318  	 * and right nodes after a split.
3319  	 */
3320  	MA_STATE(l_mas, mas->tree, mas->index, mas->last);
3321  	MA_STATE(r_mas, mas->tree, mas->index, mas->last);
3322  	MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last);
3323  	MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last);
3324  
3325  	trace_ma_op(__func__, mas);
3326  	mas->depth = mas_mt_height(mas);
3327  
3328  	mast.l = &l_mas;
3329  	mast.r = &r_mas;
3330  	mast.orig_l = &prev_l_mas;
3331  	mast.orig_r = &prev_r_mas;
3332  	mast.bn = b_node;
3333  
3334  	while (height++ <= mas->depth) {
3335  		if (mt_slots[b_node->type] > b_node->b_end) {
3336  			mas_split_final_node(&mast, mas, height);
3337  			break;
3338  		}
3339  
3340  		l_mas = r_mas = *mas;
3341  		l_mas.node = mas_new_ma_node(mas, b_node);
3342  		r_mas.node = mas_new_ma_node(mas, b_node);
3343  		/*
3344  		 * Another way that 'jitter' is avoided is to terminate a split up early if the
3345  		 * left or right node has space to spare.  This is referred to as "pushing left"
3346  		 * or "pushing right" and is similar to the B* tree, except the nodes left or
3347  		 * right can rarely be reused due to RCU, but the ripple upwards is halted which
3348  		 * is a significant savings.
3349  		 */
3350  		/* Try to push left. */
3351  		if (mas_push_data(mas, height, &mast, true))
3352  			break;
3353  		/* Try to push right. */
3354  		if (mas_push_data(mas, height, &mast, false))
3355  			break;
3356  
3357  		split = mab_calc_split(mas, b_node, &mid_split);
3358  		mast_split_data(&mast, mas, split);
3359  		/*
3360  		 * Usually correct, mab_mas_cp in the above call overwrites
3361  		 * r->max.
3362  		 */
3363  		mast.r->max = mas->max;
3364  		mast_fill_bnode(&mast, mas, 1);
3365  		prev_l_mas = *mast.l;
3366  		prev_r_mas = *mast.r;
3367  	}
3368  
3369  	/* Set the original node as dead */
3370  	old = mas->node;
3371  	mas->node = l_mas.node;
3372  	mas_wmb_replace(mas, old);
3373  	mtree_range_walk(mas);
3374  	return;
3375  }
3376  
3377  /*
3378   * mas_commit_b_node() - Commit the big node into the tree.
3379   * @wr_mas: The maple write state
3380   * @b_node: The maple big node
3381   */
mas_commit_b_node(struct ma_wr_state * wr_mas,struct maple_big_node * b_node)3382  static noinline_for_kasan void mas_commit_b_node(struct ma_wr_state *wr_mas,
3383  			    struct maple_big_node *b_node)
3384  {
3385  	enum store_type type = wr_mas->mas->store_type;
3386  
3387  	WARN_ON_ONCE(type != wr_rebalance && type != wr_split_store);
3388  
3389  	if (type == wr_rebalance)
3390  		return mas_rebalance(wr_mas->mas, b_node);
3391  
3392  	return mas_split(wr_mas->mas, b_node);
3393  }
3394  
3395  /*
3396   * mas_root_expand() - Expand a root to a node
3397   * @mas: The maple state
3398   * @entry: The entry to store into the tree
3399   */
mas_root_expand(struct ma_state * mas,void * entry)3400  static inline void mas_root_expand(struct ma_state *mas, void *entry)
3401  {
3402  	void *contents = mas_root_locked(mas);
3403  	enum maple_type type = maple_leaf_64;
3404  	struct maple_node *node;
3405  	void __rcu **slots;
3406  	unsigned long *pivots;
3407  	int slot = 0;
3408  
3409  	node = mas_pop_node(mas);
3410  	pivots = ma_pivots(node, type);
3411  	slots = ma_slots(node, type);
3412  	node->parent = ma_parent_ptr(mas_tree_parent(mas));
3413  	mas->node = mt_mk_node(node, type);
3414  	mas->status = ma_active;
3415  
3416  	if (mas->index) {
3417  		if (contents) {
3418  			rcu_assign_pointer(slots[slot], contents);
3419  			if (likely(mas->index > 1))
3420  				slot++;
3421  		}
3422  		pivots[slot++] = mas->index - 1;
3423  	}
3424  
3425  	rcu_assign_pointer(slots[slot], entry);
3426  	mas->offset = slot;
3427  	pivots[slot] = mas->last;
3428  	if (mas->last != ULONG_MAX)
3429  		pivots[++slot] = ULONG_MAX;
3430  
3431  	mas->depth = 1;
3432  	mas_set_height(mas);
3433  	ma_set_meta(node, maple_leaf_64, 0, slot);
3434  	/* swap the new root into the tree */
3435  	rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
3436  	return;
3437  }
3438  
3439  /*
3440   * mas_store_root() - Storing value into root.
3441   * @mas: The maple state
3442   * @entry: The entry to store.
3443   *
3444   * There is no root node now and we are storing a value into the root - this
3445   * function either assigns the pointer or expands into a node.
3446   */
mas_store_root(struct ma_state * mas,void * entry)3447  static inline void mas_store_root(struct ma_state *mas, void *entry)
3448  {
3449  	if (!entry) {
3450  		if (!mas->index)
3451  			rcu_assign_pointer(mas->tree->ma_root, NULL);
3452  	} else if (likely((mas->last != 0) || (mas->index != 0)))
3453  		mas_root_expand(mas, entry);
3454  	else if (((unsigned long) (entry) & 3) == 2)
3455  		mas_root_expand(mas, entry);
3456  	else {
3457  		rcu_assign_pointer(mas->tree->ma_root, entry);
3458  		mas->status = ma_start;
3459  	}
3460  }
3461  
3462  /*
3463   * mas_is_span_wr() - Check if the write needs to be treated as a write that
3464   * spans the node.
3465   * @wr_mas: The maple write state
3466   *
3467   * Spanning writes are writes that start in one node and end in another OR if
3468   * the write of a %NULL will cause the node to end with a %NULL.
3469   *
3470   * Return: True if this is a spanning write, false otherwise.
3471   */
mas_is_span_wr(struct ma_wr_state * wr_mas)3472  static bool mas_is_span_wr(struct ma_wr_state *wr_mas)
3473  {
3474  	unsigned long max = wr_mas->r_max;
3475  	unsigned long last = wr_mas->mas->last;
3476  	enum maple_type type = wr_mas->type;
3477  	void *entry = wr_mas->entry;
3478  
3479  	/* Contained in this pivot, fast path */
3480  	if (last < max)
3481  		return false;
3482  
3483  	if (ma_is_leaf(type)) {
3484  		max = wr_mas->mas->max;
3485  		if (last < max)
3486  			return false;
3487  	}
3488  
3489  	if (last == max) {
3490  		/*
3491  		 * The last entry of leaf node cannot be NULL unless it is the
3492  		 * rightmost node (writing ULONG_MAX), otherwise it spans slots.
3493  		 */
3494  		if (entry || last == ULONG_MAX)
3495  			return false;
3496  	}
3497  
3498  	trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry);
3499  	return true;
3500  }
3501  
mas_wr_walk_descend(struct ma_wr_state * wr_mas)3502  static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas)
3503  {
3504  	wr_mas->type = mte_node_type(wr_mas->mas->node);
3505  	mas_wr_node_walk(wr_mas);
3506  	wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type);
3507  }
3508  
mas_wr_walk_traverse(struct ma_wr_state * wr_mas)3509  static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas)
3510  {
3511  	wr_mas->mas->max = wr_mas->r_max;
3512  	wr_mas->mas->min = wr_mas->r_min;
3513  	wr_mas->mas->node = wr_mas->content;
3514  	wr_mas->mas->offset = 0;
3515  	wr_mas->mas->depth++;
3516  }
3517  /*
3518   * mas_wr_walk() - Walk the tree for a write.
3519   * @wr_mas: The maple write state
3520   *
3521   * Uses mas_slot_locked() and does not need to worry about dead nodes.
3522   *
3523   * Return: True if it's contained in a node, false on spanning write.
3524   */
mas_wr_walk(struct ma_wr_state * wr_mas)3525  static bool mas_wr_walk(struct ma_wr_state *wr_mas)
3526  {
3527  	struct ma_state *mas = wr_mas->mas;
3528  
3529  	while (true) {
3530  		mas_wr_walk_descend(wr_mas);
3531  		if (unlikely(mas_is_span_wr(wr_mas)))
3532  			return false;
3533  
3534  		wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
3535  						  mas->offset);
3536  		if (ma_is_leaf(wr_mas->type))
3537  			return true;
3538  
3539  		mas_wr_walk_traverse(wr_mas);
3540  	}
3541  
3542  	return true;
3543  }
3544  
mas_wr_walk_index(struct ma_wr_state * wr_mas)3545  static void mas_wr_walk_index(struct ma_wr_state *wr_mas)
3546  {
3547  	struct ma_state *mas = wr_mas->mas;
3548  
3549  	while (true) {
3550  		mas_wr_walk_descend(wr_mas);
3551  		wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
3552  						  mas->offset);
3553  		if (ma_is_leaf(wr_mas->type))
3554  			return;
3555  		mas_wr_walk_traverse(wr_mas);
3556  	}
3557  }
3558  /*
3559   * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs.
3560   * @l_wr_mas: The left maple write state
3561   * @r_wr_mas: The right maple write state
3562   */
mas_extend_spanning_null(struct ma_wr_state * l_wr_mas,struct ma_wr_state * r_wr_mas)3563  static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas,
3564  					    struct ma_wr_state *r_wr_mas)
3565  {
3566  	struct ma_state *r_mas = r_wr_mas->mas;
3567  	struct ma_state *l_mas = l_wr_mas->mas;
3568  	unsigned char l_slot;
3569  
3570  	l_slot = l_mas->offset;
3571  	if (!l_wr_mas->content)
3572  		l_mas->index = l_wr_mas->r_min;
3573  
3574  	if ((l_mas->index == l_wr_mas->r_min) &&
3575  		 (l_slot &&
3576  		  !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) {
3577  		if (l_slot > 1)
3578  			l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1;
3579  		else
3580  			l_mas->index = l_mas->min;
3581  
3582  		l_mas->offset = l_slot - 1;
3583  	}
3584  
3585  	if (!r_wr_mas->content) {
3586  		if (r_mas->last < r_wr_mas->r_max)
3587  			r_mas->last = r_wr_mas->r_max;
3588  		r_mas->offset++;
3589  	} else if ((r_mas->last == r_wr_mas->r_max) &&
3590  	    (r_mas->last < r_mas->max) &&
3591  	    !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) {
3592  		r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots,
3593  					     r_wr_mas->type, r_mas->offset + 1);
3594  		r_mas->offset++;
3595  	}
3596  }
3597  
mas_state_walk(struct ma_state * mas)3598  static inline void *mas_state_walk(struct ma_state *mas)
3599  {
3600  	void *entry;
3601  
3602  	entry = mas_start(mas);
3603  	if (mas_is_none(mas))
3604  		return NULL;
3605  
3606  	if (mas_is_ptr(mas))
3607  		return entry;
3608  
3609  	return mtree_range_walk(mas);
3610  }
3611  
3612  /*
3613   * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up
3614   * to date.
3615   *
3616   * @mas: The maple state.
3617   *
3618   * Note: Leaves mas in undesirable state.
3619   * Return: The entry for @mas->index or %NULL on dead node.
3620   */
mtree_lookup_walk(struct ma_state * mas)3621  static inline void *mtree_lookup_walk(struct ma_state *mas)
3622  {
3623  	unsigned long *pivots;
3624  	unsigned char offset;
3625  	struct maple_node *node;
3626  	struct maple_enode *next;
3627  	enum maple_type type;
3628  	void __rcu **slots;
3629  	unsigned char end;
3630  
3631  	next = mas->node;
3632  	do {
3633  		node = mte_to_node(next);
3634  		type = mte_node_type(next);
3635  		pivots = ma_pivots(node, type);
3636  		end = mt_pivots[type];
3637  		offset = 0;
3638  		do {
3639  			if (pivots[offset] >= mas->index)
3640  				break;
3641  		} while (++offset < end);
3642  
3643  		slots = ma_slots(node, type);
3644  		next = mt_slot(mas->tree, slots, offset);
3645  		if (unlikely(ma_dead_node(node)))
3646  			goto dead_node;
3647  	} while (!ma_is_leaf(type));
3648  
3649  	return (void *)next;
3650  
3651  dead_node:
3652  	mas_reset(mas);
3653  	return NULL;
3654  }
3655  
3656  static void mte_destroy_walk(struct maple_enode *, struct maple_tree *);
3657  /*
3658   * mas_new_root() - Create a new root node that only contains the entry passed
3659   * in.
3660   * @mas: The maple state
3661   * @entry: The entry to store.
3662   *
3663   * Only valid when the index == 0 and the last == ULONG_MAX
3664   */
mas_new_root(struct ma_state * mas,void * entry)3665  static inline void mas_new_root(struct ma_state *mas, void *entry)
3666  {
3667  	struct maple_enode *root = mas_root_locked(mas);
3668  	enum maple_type type = maple_leaf_64;
3669  	struct maple_node *node;
3670  	void __rcu **slots;
3671  	unsigned long *pivots;
3672  
3673  	WARN_ON_ONCE(mas->index || mas->last != ULONG_MAX);
3674  
3675  	if (!entry) {
3676  		mas->depth = 0;
3677  		mas_set_height(mas);
3678  		rcu_assign_pointer(mas->tree->ma_root, entry);
3679  		mas->status = ma_start;
3680  		goto done;
3681  	}
3682  
3683  	node = mas_pop_node(mas);
3684  	pivots = ma_pivots(node, type);
3685  	slots = ma_slots(node, type);
3686  	node->parent = ma_parent_ptr(mas_tree_parent(mas));
3687  	mas->node = mt_mk_node(node, type);
3688  	mas->status = ma_active;
3689  	rcu_assign_pointer(slots[0], entry);
3690  	pivots[0] = mas->last;
3691  	mas->depth = 1;
3692  	mas_set_height(mas);
3693  	rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
3694  
3695  done:
3696  	if (xa_is_node(root))
3697  		mte_destroy_walk(root, mas->tree);
3698  
3699  	return;
3700  }
3701  /*
3702   * mas_wr_spanning_store() - Create a subtree with the store operation completed
3703   * and new nodes where necessary, then place the sub-tree in the actual tree.
3704   * Note that mas is expected to point to the node which caused the store to
3705   * span.
3706   * @wr_mas: The maple write state
3707   */
mas_wr_spanning_store(struct ma_wr_state * wr_mas)3708  static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas)
3709  {
3710  	struct maple_subtree_state mast;
3711  	struct maple_big_node b_node;
3712  	struct ma_state *mas;
3713  	unsigned char height;
3714  
3715  	/* Left and Right side of spanning store */
3716  	MA_STATE(l_mas, NULL, 0, 0);
3717  	MA_STATE(r_mas, NULL, 0, 0);
3718  	MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry);
3719  	MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry);
3720  
3721  	/*
3722  	 * A store operation that spans multiple nodes is called a spanning
3723  	 * store and is handled early in the store call stack by the function
3724  	 * mas_is_span_wr().  When a spanning store is identified, the maple
3725  	 * state is duplicated.  The first maple state walks the left tree path
3726  	 * to ``index``, the duplicate walks the right tree path to ``last``.
3727  	 * The data in the two nodes are combined into a single node, two nodes,
3728  	 * or possibly three nodes (see the 3-way split above).  A ``NULL``
3729  	 * written to the last entry of a node is considered a spanning store as
3730  	 * a rebalance is required for the operation to complete and an overflow
3731  	 * of data may happen.
3732  	 */
3733  	mas = wr_mas->mas;
3734  	trace_ma_op(__func__, mas);
3735  
3736  	if (unlikely(!mas->index && mas->last == ULONG_MAX))
3737  		return mas_new_root(mas, wr_mas->entry);
3738  	/*
3739  	 * Node rebalancing may occur due to this store, so there may be three new
3740  	 * entries per level plus a new root.
3741  	 */
3742  	height = mas_mt_height(mas);
3743  
3744  	/*
3745  	 * Set up right side.  Need to get to the next offset after the spanning
3746  	 * store to ensure it's not NULL and to combine both the next node and
3747  	 * the node with the start together.
3748  	 */
3749  	r_mas = *mas;
3750  	/* Avoid overflow, walk to next slot in the tree. */
3751  	if (r_mas.last + 1)
3752  		r_mas.last++;
3753  
3754  	r_mas.index = r_mas.last;
3755  	mas_wr_walk_index(&r_wr_mas);
3756  	r_mas.last = r_mas.index = mas->last;
3757  
3758  	/* Set up left side. */
3759  	l_mas = *mas;
3760  	mas_wr_walk_index(&l_wr_mas);
3761  
3762  	if (!wr_mas->entry) {
3763  		mas_extend_spanning_null(&l_wr_mas, &r_wr_mas);
3764  		mas->offset = l_mas.offset;
3765  		mas->index = l_mas.index;
3766  		mas->last = l_mas.last = r_mas.last;
3767  	}
3768  
3769  	/* expanding NULLs may make this cover the entire range */
3770  	if (!l_mas.index && r_mas.last == ULONG_MAX) {
3771  		mas_set_range(mas, 0, ULONG_MAX);
3772  		return mas_new_root(mas, wr_mas->entry);
3773  	}
3774  
3775  	memset(&b_node, 0, sizeof(struct maple_big_node));
3776  	/* Copy l_mas and store the value in b_node. */
3777  	mas_store_b_node(&l_wr_mas, &b_node, l_mas.end);
3778  	/* Copy r_mas into b_node if there is anything to copy. */
3779  	if (r_mas.max > r_mas.last)
3780  		mas_mab_cp(&r_mas, r_mas.offset, r_mas.end,
3781  			   &b_node, b_node.b_end + 1);
3782  	else
3783  		b_node.b_end++;
3784  
3785  	/* Stop spanning searches by searching for just index. */
3786  	l_mas.index = l_mas.last = mas->index;
3787  
3788  	mast.bn = &b_node;
3789  	mast.orig_l = &l_mas;
3790  	mast.orig_r = &r_mas;
3791  	/* Combine l_mas and r_mas and split them up evenly again. */
3792  	return mas_spanning_rebalance(mas, &mast, height + 1);
3793  }
3794  
3795  /*
3796   * mas_wr_node_store() - Attempt to store the value in a node
3797   * @wr_mas: The maple write state
3798   *
3799   * Attempts to reuse the node, but may allocate.
3800   */
mas_wr_node_store(struct ma_wr_state * wr_mas,unsigned char new_end)3801  static inline void mas_wr_node_store(struct ma_wr_state *wr_mas,
3802  				     unsigned char new_end)
3803  {
3804  	struct ma_state *mas = wr_mas->mas;
3805  	void __rcu **dst_slots;
3806  	unsigned long *dst_pivots;
3807  	unsigned char dst_offset, offset_end = wr_mas->offset_end;
3808  	struct maple_node reuse, *newnode;
3809  	unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type];
3810  	bool in_rcu = mt_in_rcu(mas->tree);
3811  
3812  	if (mas->last == wr_mas->end_piv)
3813  		offset_end++; /* don't copy this offset */
3814  	else if (unlikely(wr_mas->r_max == ULONG_MAX))
3815  		mas_bulk_rebalance(mas, mas->end, wr_mas->type);
3816  
3817  	/* set up node. */
3818  	if (in_rcu) {
3819  		newnode = mas_pop_node(mas);
3820  	} else {
3821  		memset(&reuse, 0, sizeof(struct maple_node));
3822  		newnode = &reuse;
3823  	}
3824  
3825  	newnode->parent = mas_mn(mas)->parent;
3826  	dst_pivots = ma_pivots(newnode, wr_mas->type);
3827  	dst_slots = ma_slots(newnode, wr_mas->type);
3828  	/* Copy from start to insert point */
3829  	memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset);
3830  	memcpy(dst_slots, wr_mas->slots, sizeof(void *) * mas->offset);
3831  
3832  	/* Handle insert of new range starting after old range */
3833  	if (wr_mas->r_min < mas->index) {
3834  		rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content);
3835  		dst_pivots[mas->offset++] = mas->index - 1;
3836  	}
3837  
3838  	/* Store the new entry and range end. */
3839  	if (mas->offset < node_pivots)
3840  		dst_pivots[mas->offset] = mas->last;
3841  	rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry);
3842  
3843  	/*
3844  	 * this range wrote to the end of the node or it overwrote the rest of
3845  	 * the data
3846  	 */
3847  	if (offset_end > mas->end)
3848  		goto done;
3849  
3850  	dst_offset = mas->offset + 1;
3851  	/* Copy to the end of node if necessary. */
3852  	copy_size = mas->end - offset_end + 1;
3853  	memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end,
3854  	       sizeof(void *) * copy_size);
3855  	memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end,
3856  	       sizeof(unsigned long) * (copy_size - 1));
3857  
3858  	if (new_end < node_pivots)
3859  		dst_pivots[new_end] = mas->max;
3860  
3861  done:
3862  	mas_leaf_set_meta(newnode, maple_leaf_64, new_end);
3863  	if (in_rcu) {
3864  		struct maple_enode *old_enode = mas->node;
3865  
3866  		mas->node = mt_mk_node(newnode, wr_mas->type);
3867  		mas_replace_node(mas, old_enode);
3868  	} else {
3869  		memcpy(wr_mas->node, newnode, sizeof(struct maple_node));
3870  	}
3871  	trace_ma_write(__func__, mas, 0, wr_mas->entry);
3872  	mas_update_gap(mas);
3873  	mas->end = new_end;
3874  	return;
3875  }
3876  
3877  /*
3878   * mas_wr_slot_store: Attempt to store a value in a slot.
3879   * @wr_mas: the maple write state
3880   */
mas_wr_slot_store(struct ma_wr_state * wr_mas)3881  static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas)
3882  {
3883  	struct ma_state *mas = wr_mas->mas;
3884  	unsigned char offset = mas->offset;
3885  	void __rcu **slots = wr_mas->slots;
3886  	bool gap = false;
3887  
3888  	gap |= !mt_slot_locked(mas->tree, slots, offset);
3889  	gap |= !mt_slot_locked(mas->tree, slots, offset + 1);
3890  
3891  	if (wr_mas->offset_end - offset == 1) {
3892  		if (mas->index == wr_mas->r_min) {
3893  			/* Overwriting the range and a part of the next one */
3894  			rcu_assign_pointer(slots[offset], wr_mas->entry);
3895  			wr_mas->pivots[offset] = mas->last;
3896  		} else {
3897  			/* Overwriting a part of the range and the next one */
3898  			rcu_assign_pointer(slots[offset + 1], wr_mas->entry);
3899  			wr_mas->pivots[offset] = mas->index - 1;
3900  			mas->offset++; /* Keep mas accurate. */
3901  		}
3902  	} else {
3903  		WARN_ON_ONCE(mt_in_rcu(mas->tree));
3904  		/*
3905  		 * Expand the range, only partially overwriting the previous and
3906  		 * next ranges
3907  		 */
3908  		gap |= !mt_slot_locked(mas->tree, slots, offset + 2);
3909  		rcu_assign_pointer(slots[offset + 1], wr_mas->entry);
3910  		wr_mas->pivots[offset] = mas->index - 1;
3911  		wr_mas->pivots[offset + 1] = mas->last;
3912  		mas->offset++; /* Keep mas accurate. */
3913  	}
3914  
3915  	trace_ma_write(__func__, mas, 0, wr_mas->entry);
3916  	/*
3917  	 * Only update gap when the new entry is empty or there is an empty
3918  	 * entry in the original two ranges.
3919  	 */
3920  	if (!wr_mas->entry || gap)
3921  		mas_update_gap(mas);
3922  
3923  	return;
3924  }
3925  
mas_wr_extend_null(struct ma_wr_state * wr_mas)3926  static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas)
3927  {
3928  	struct ma_state *mas = wr_mas->mas;
3929  
3930  	if (!wr_mas->slots[wr_mas->offset_end]) {
3931  		/* If this one is null, the next and prev are not */
3932  		mas->last = wr_mas->end_piv;
3933  	} else {
3934  		/* Check next slot(s) if we are overwriting the end */
3935  		if ((mas->last == wr_mas->end_piv) &&
3936  		    (mas->end != wr_mas->offset_end) &&
3937  		    !wr_mas->slots[wr_mas->offset_end + 1]) {
3938  			wr_mas->offset_end++;
3939  			if (wr_mas->offset_end == mas->end)
3940  				mas->last = mas->max;
3941  			else
3942  				mas->last = wr_mas->pivots[wr_mas->offset_end];
3943  			wr_mas->end_piv = mas->last;
3944  		}
3945  	}
3946  
3947  	if (!wr_mas->content) {
3948  		/* If this one is null, the next and prev are not */
3949  		mas->index = wr_mas->r_min;
3950  	} else {
3951  		/* Check prev slot if we are overwriting the start */
3952  		if (mas->index == wr_mas->r_min && mas->offset &&
3953  		    !wr_mas->slots[mas->offset - 1]) {
3954  			mas->offset--;
3955  			wr_mas->r_min = mas->index =
3956  				mas_safe_min(mas, wr_mas->pivots, mas->offset);
3957  			wr_mas->r_max = wr_mas->pivots[mas->offset];
3958  		}
3959  	}
3960  }
3961  
mas_wr_end_piv(struct ma_wr_state * wr_mas)3962  static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas)
3963  {
3964  	while ((wr_mas->offset_end < wr_mas->mas->end) &&
3965  	       (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end]))
3966  		wr_mas->offset_end++;
3967  
3968  	if (wr_mas->offset_end < wr_mas->mas->end)
3969  		wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end];
3970  	else
3971  		wr_mas->end_piv = wr_mas->mas->max;
3972  }
3973  
mas_wr_new_end(struct ma_wr_state * wr_mas)3974  static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas)
3975  {
3976  	struct ma_state *mas = wr_mas->mas;
3977  	unsigned char new_end = mas->end + 2;
3978  
3979  	new_end -= wr_mas->offset_end - mas->offset;
3980  	if (wr_mas->r_min == mas->index)
3981  		new_end--;
3982  
3983  	if (wr_mas->end_piv == mas->last)
3984  		new_end--;
3985  
3986  	return new_end;
3987  }
3988  
3989  /*
3990   * mas_wr_append: Attempt to append
3991   * @wr_mas: the maple write state
3992   * @new_end: The end of the node after the modification
3993   *
3994   * This is currently unsafe in rcu mode since the end of the node may be cached
3995   * by readers while the node contents may be updated which could result in
3996   * inaccurate information.
3997   */
mas_wr_append(struct ma_wr_state * wr_mas,unsigned char new_end)3998  static inline void mas_wr_append(struct ma_wr_state *wr_mas,
3999  		unsigned char new_end)
4000  {
4001  	struct ma_state *mas = wr_mas->mas;
4002  	void __rcu **slots;
4003  	unsigned char end = mas->end;
4004  
4005  	if (new_end < mt_pivots[wr_mas->type]) {
4006  		wr_mas->pivots[new_end] = wr_mas->pivots[end];
4007  		ma_set_meta(wr_mas->node, wr_mas->type, 0, new_end);
4008  	}
4009  
4010  	slots = wr_mas->slots;
4011  	if (new_end == end + 1) {
4012  		if (mas->last == wr_mas->r_max) {
4013  			/* Append to end of range */
4014  			rcu_assign_pointer(slots[new_end], wr_mas->entry);
4015  			wr_mas->pivots[end] = mas->index - 1;
4016  			mas->offset = new_end;
4017  		} else {
4018  			/* Append to start of range */
4019  			rcu_assign_pointer(slots[new_end], wr_mas->content);
4020  			wr_mas->pivots[end] = mas->last;
4021  			rcu_assign_pointer(slots[end], wr_mas->entry);
4022  		}
4023  	} else {
4024  		/* Append to the range without touching any boundaries. */
4025  		rcu_assign_pointer(slots[new_end], wr_mas->content);
4026  		wr_mas->pivots[end + 1] = mas->last;
4027  		rcu_assign_pointer(slots[end + 1], wr_mas->entry);
4028  		wr_mas->pivots[end] = mas->index - 1;
4029  		mas->offset = end + 1;
4030  	}
4031  
4032  	if (!wr_mas->content || !wr_mas->entry)
4033  		mas_update_gap(mas);
4034  
4035  	mas->end = new_end;
4036  	trace_ma_write(__func__, mas, new_end, wr_mas->entry);
4037  	return;
4038  }
4039  
4040  /*
4041   * mas_wr_bnode() - Slow path for a modification.
4042   * @wr_mas: The write maple state
4043   *
4044   * This is where split, rebalance end up.
4045   */
mas_wr_bnode(struct ma_wr_state * wr_mas)4046  static void mas_wr_bnode(struct ma_wr_state *wr_mas)
4047  {
4048  	struct maple_big_node b_node;
4049  
4050  	trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry);
4051  	memset(&b_node, 0, sizeof(struct maple_big_node));
4052  	mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end);
4053  	mas_commit_b_node(wr_mas, &b_node);
4054  }
4055  
4056  /*
4057   * mas_wr_store_entry() - Internal call to store a value
4058   * @wr_mas: The maple write state
4059   */
mas_wr_store_entry(struct ma_wr_state * wr_mas)4060  static inline void mas_wr_store_entry(struct ma_wr_state *wr_mas)
4061  {
4062  	struct ma_state *mas = wr_mas->mas;
4063  	unsigned char new_end = mas_wr_new_end(wr_mas);
4064  
4065  	switch (mas->store_type) {
4066  	case wr_invalid:
4067  		MT_BUG_ON(mas->tree, 1);
4068  		return;
4069  	case wr_new_root:
4070  		mas_new_root(mas, wr_mas->entry);
4071  		break;
4072  	case wr_store_root:
4073  		mas_store_root(mas, wr_mas->entry);
4074  		break;
4075  	case wr_exact_fit:
4076  		rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry);
4077  		if (!!wr_mas->entry ^ !!wr_mas->content)
4078  			mas_update_gap(mas);
4079  		break;
4080  	case wr_append:
4081  		mas_wr_append(wr_mas, new_end);
4082  		break;
4083  	case wr_slot_store:
4084  		mas_wr_slot_store(wr_mas);
4085  		break;
4086  	case wr_node_store:
4087  		mas_wr_node_store(wr_mas, new_end);
4088  		break;
4089  	case wr_spanning_store:
4090  		mas_wr_spanning_store(wr_mas);
4091  		break;
4092  	case wr_split_store:
4093  	case wr_rebalance:
4094  		mas_wr_bnode(wr_mas);
4095  		break;
4096  	}
4097  
4098  	return;
4099  }
4100  
mas_wr_prealloc_setup(struct ma_wr_state * wr_mas)4101  static inline void mas_wr_prealloc_setup(struct ma_wr_state *wr_mas)
4102  {
4103  	struct ma_state *mas = wr_mas->mas;
4104  
4105  	if (!mas_is_active(mas)) {
4106  		if (mas_is_start(mas))
4107  			goto set_content;
4108  
4109  		if (unlikely(mas_is_paused(mas)))
4110  			goto reset;
4111  
4112  		if (unlikely(mas_is_none(mas)))
4113  			goto reset;
4114  
4115  		if (unlikely(mas_is_overflow(mas)))
4116  			goto reset;
4117  
4118  		if (unlikely(mas_is_underflow(mas)))
4119  			goto reset;
4120  	}
4121  
4122  	/*
4123  	 * A less strict version of mas_is_span_wr() where we allow spanning
4124  	 * writes within this node.  This is to stop partial walks in
4125  	 * mas_prealloc() from being reset.
4126  	 */
4127  	if (mas->last > mas->max)
4128  		goto reset;
4129  
4130  	if (wr_mas->entry)
4131  		goto set_content;
4132  
4133  	if (mte_is_leaf(mas->node) && mas->last == mas->max)
4134  		goto reset;
4135  
4136  	goto set_content;
4137  
4138  reset:
4139  	mas_reset(mas);
4140  set_content:
4141  	wr_mas->content = mas_start(mas);
4142  }
4143  
4144  /**
4145   * mas_prealloc_calc() - Calculate number of nodes needed for a
4146   * given store oepration
4147   * @mas: The maple state
4148   * @entry: The entry to store into the tree
4149   *
4150   * Return: Number of nodes required for preallocation.
4151   */
mas_prealloc_calc(struct ma_state * mas,void * entry)4152  static inline int mas_prealloc_calc(struct ma_state *mas, void *entry)
4153  {
4154  	int ret = mas_mt_height(mas) * 3 + 1;
4155  
4156  	switch (mas->store_type) {
4157  	case wr_invalid:
4158  		WARN_ON_ONCE(1);
4159  		break;
4160  	case wr_new_root:
4161  		ret = 1;
4162  		break;
4163  	case wr_store_root:
4164  		if (likely((mas->last != 0) || (mas->index != 0)))
4165  			ret = 1;
4166  		else if (((unsigned long) (entry) & 3) == 2)
4167  			ret = 1;
4168  		else
4169  			ret = 0;
4170  		break;
4171  	case wr_spanning_store:
4172  		ret =  mas_mt_height(mas) * 3 + 1;
4173  		break;
4174  	case wr_split_store:
4175  		ret =  mas_mt_height(mas) * 2 + 1;
4176  		break;
4177  	case wr_rebalance:
4178  		ret =  mas_mt_height(mas) * 2 - 1;
4179  		break;
4180  	case wr_node_store:
4181  		ret = mt_in_rcu(mas->tree) ? 1 : 0;
4182  		break;
4183  	case wr_append:
4184  	case wr_exact_fit:
4185  	case wr_slot_store:
4186  		ret = 0;
4187  	}
4188  
4189  	return ret;
4190  }
4191  
4192  /*
4193   * mas_wr_store_type() - Determine the store type for a given
4194   * store operation.
4195   * @wr_mas: The maple write state
4196   *
4197   * Return: the type of store needed for the operation
4198   */
mas_wr_store_type(struct ma_wr_state * wr_mas)4199  static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas)
4200  {
4201  	struct ma_state *mas = wr_mas->mas;
4202  	unsigned char new_end;
4203  
4204  	if (unlikely(mas_is_none(mas) || mas_is_ptr(mas)))
4205  		return wr_store_root;
4206  
4207  	if (unlikely(!mas_wr_walk(wr_mas)))
4208  		return wr_spanning_store;
4209  
4210  	/* At this point, we are at the leaf node that needs to be altered. */
4211  	mas_wr_end_piv(wr_mas);
4212  	if (!wr_mas->entry)
4213  		mas_wr_extend_null(wr_mas);
4214  
4215  	if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last))
4216  		return wr_exact_fit;
4217  
4218  	if (unlikely(!mas->index && mas->last == ULONG_MAX))
4219  		return wr_new_root;
4220  
4221  	new_end = mas_wr_new_end(wr_mas);
4222  	/* Potential spanning rebalance collapsing a node */
4223  	if (new_end < mt_min_slots[wr_mas->type]) {
4224  		if (!mte_is_root(mas->node) && !(mas->mas_flags & MA_STATE_BULK))
4225  			return  wr_rebalance;
4226  		return wr_node_store;
4227  	}
4228  
4229  	if (new_end >= mt_slots[wr_mas->type])
4230  		return wr_split_store;
4231  
4232  	if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end))
4233  		return wr_append;
4234  
4235  	if ((new_end == mas->end) && (!mt_in_rcu(mas->tree) ||
4236  		(wr_mas->offset_end - mas->offset == 1)))
4237  		return wr_slot_store;
4238  
4239  	return wr_node_store;
4240  }
4241  
4242  /**
4243   * mas_wr_preallocate() - Preallocate enough nodes for a store operation
4244   * @wr_mas: The maple write state
4245   * @entry: The entry that will be stored
4246   *
4247   */
mas_wr_preallocate(struct ma_wr_state * wr_mas,void * entry)4248  static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry)
4249  {
4250  	struct ma_state *mas = wr_mas->mas;
4251  	int request;
4252  
4253  	mas_wr_prealloc_setup(wr_mas);
4254  	mas->store_type = mas_wr_store_type(wr_mas);
4255  	request = mas_prealloc_calc(mas, entry);
4256  	if (!request)
4257  		return;
4258  
4259  	mas_node_count(mas, request);
4260  }
4261  
4262  /**
4263   * mas_insert() - Internal call to insert a value
4264   * @mas: The maple state
4265   * @entry: The entry to store
4266   *
4267   * Return: %NULL or the contents that already exists at the requested index
4268   * otherwise.  The maple state needs to be checked for error conditions.
4269   */
mas_insert(struct ma_state * mas,void * entry)4270  static inline void *mas_insert(struct ma_state *mas, void *entry)
4271  {
4272  	MA_WR_STATE(wr_mas, mas, entry);
4273  
4274  	/*
4275  	 * Inserting a new range inserts either 0, 1, or 2 pivots within the
4276  	 * tree.  If the insert fits exactly into an existing gap with a value
4277  	 * of NULL, then the slot only needs to be written with the new value.
4278  	 * If the range being inserted is adjacent to another range, then only a
4279  	 * single pivot needs to be inserted (as well as writing the entry).  If
4280  	 * the new range is within a gap but does not touch any other ranges,
4281  	 * then two pivots need to be inserted: the start - 1, and the end.  As
4282  	 * usual, the entry must be written.  Most operations require a new node
4283  	 * to be allocated and replace an existing node to ensure RCU safety,
4284  	 * when in RCU mode.  The exception to requiring a newly allocated node
4285  	 * is when inserting at the end of a node (appending).  When done
4286  	 * carefully, appending can reuse the node in place.
4287  	 */
4288  	wr_mas.content = mas_start(mas);
4289  	if (wr_mas.content)
4290  		goto exists;
4291  
4292  	mas_wr_preallocate(&wr_mas, entry);
4293  	if (mas_is_err(mas))
4294  		return NULL;
4295  
4296  	/* spanning writes always overwrite something */
4297  	if (mas->store_type == wr_spanning_store)
4298  		goto exists;
4299  
4300  	/* At this point, we are at the leaf node that needs to be altered. */
4301  	if (mas->store_type != wr_new_root && mas->store_type != wr_store_root) {
4302  		wr_mas.offset_end = mas->offset;
4303  		wr_mas.end_piv = wr_mas.r_max;
4304  
4305  		if (wr_mas.content || (mas->last > wr_mas.r_max))
4306  			goto exists;
4307  	}
4308  
4309  	mas_wr_store_entry(&wr_mas);
4310  	return wr_mas.content;
4311  
4312  exists:
4313  	mas_set_err(mas, -EEXIST);
4314  	return wr_mas.content;
4315  
4316  }
4317  
4318  /**
4319   * mas_alloc_cyclic() - Internal call to find somewhere to store an entry
4320   * @mas: The maple state.
4321   * @startp: Pointer to ID.
4322   * @range_lo: Lower bound of range to search.
4323   * @range_hi: Upper bound of range to search.
4324   * @entry: The entry to store.
4325   * @next: Pointer to next ID to allocate.
4326   * @gfp: The GFP_FLAGS to use for allocations.
4327   *
4328   * Return: 0 if the allocation succeeded without wrapping, 1 if the
4329   * allocation succeeded after wrapping, or -EBUSY if there are no
4330   * free entries.
4331   */
mas_alloc_cyclic(struct ma_state * mas,unsigned long * startp,void * entry,unsigned long range_lo,unsigned long range_hi,unsigned long * next,gfp_t gfp)4332  int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp,
4333  		void *entry, unsigned long range_lo, unsigned long range_hi,
4334  		unsigned long *next, gfp_t gfp)
4335  {
4336  	unsigned long min = range_lo;
4337  	int ret = 0;
4338  
4339  	range_lo = max(min, *next);
4340  	ret = mas_empty_area(mas, range_lo, range_hi, 1);
4341  	if ((mas->tree->ma_flags & MT_FLAGS_ALLOC_WRAPPED) && ret == 0) {
4342  		mas->tree->ma_flags &= ~MT_FLAGS_ALLOC_WRAPPED;
4343  		ret = 1;
4344  	}
4345  	if (ret < 0 && range_lo > min) {
4346  		mas_reset(mas);
4347  		ret = mas_empty_area(mas, min, range_hi, 1);
4348  		if (ret == 0)
4349  			ret = 1;
4350  	}
4351  	if (ret < 0)
4352  		return ret;
4353  
4354  	do {
4355  		mas_insert(mas, entry);
4356  	} while (mas_nomem(mas, gfp));
4357  	if (mas_is_err(mas))
4358  		return xa_err(mas->node);
4359  
4360  	*startp = mas->index;
4361  	*next = *startp + 1;
4362  	if (*next == 0)
4363  		mas->tree->ma_flags |= MT_FLAGS_ALLOC_WRAPPED;
4364  
4365  	mas_destroy(mas);
4366  	return ret;
4367  }
4368  EXPORT_SYMBOL(mas_alloc_cyclic);
4369  
mas_rewalk(struct ma_state * mas,unsigned long index)4370  static __always_inline void mas_rewalk(struct ma_state *mas, unsigned long index)
4371  {
4372  retry:
4373  	mas_set(mas, index);
4374  	mas_state_walk(mas);
4375  	if (mas_is_start(mas))
4376  		goto retry;
4377  }
4378  
mas_rewalk_if_dead(struct ma_state * mas,struct maple_node * node,const unsigned long index)4379  static __always_inline bool mas_rewalk_if_dead(struct ma_state *mas,
4380  		struct maple_node *node, const unsigned long index)
4381  {
4382  	if (unlikely(ma_dead_node(node))) {
4383  		mas_rewalk(mas, index);
4384  		return true;
4385  	}
4386  	return false;
4387  }
4388  
4389  /*
4390   * mas_prev_node() - Find the prev non-null entry at the same level in the
4391   * tree.  The prev value will be mas->node[mas->offset] or the status will be
4392   * ma_none.
4393   * @mas: The maple state
4394   * @min: The lower limit to search
4395   *
4396   * The prev node value will be mas->node[mas->offset] or the status will be
4397   * ma_none.
4398   * Return: 1 if the node is dead, 0 otherwise.
4399   */
mas_prev_node(struct ma_state * mas,unsigned long min)4400  static int mas_prev_node(struct ma_state *mas, unsigned long min)
4401  {
4402  	enum maple_type mt;
4403  	int offset, level;
4404  	void __rcu **slots;
4405  	struct maple_node *node;
4406  	unsigned long *pivots;
4407  	unsigned long max;
4408  
4409  	node = mas_mn(mas);
4410  	if (!mas->min)
4411  		goto no_entry;
4412  
4413  	max = mas->min - 1;
4414  	if (max < min)
4415  		goto no_entry;
4416  
4417  	level = 0;
4418  	do {
4419  		if (ma_is_root(node))
4420  			goto no_entry;
4421  
4422  		/* Walk up. */
4423  		if (unlikely(mas_ascend(mas)))
4424  			return 1;
4425  		offset = mas->offset;
4426  		level++;
4427  		node = mas_mn(mas);
4428  	} while (!offset);
4429  
4430  	offset--;
4431  	mt = mte_node_type(mas->node);
4432  	while (level > 1) {
4433  		level--;
4434  		slots = ma_slots(node, mt);
4435  		mas->node = mas_slot(mas, slots, offset);
4436  		if (unlikely(ma_dead_node(node)))
4437  			return 1;
4438  
4439  		mt = mte_node_type(mas->node);
4440  		node = mas_mn(mas);
4441  		pivots = ma_pivots(node, mt);
4442  		offset = ma_data_end(node, mt, pivots, max);
4443  		if (unlikely(ma_dead_node(node)))
4444  			return 1;
4445  	}
4446  
4447  	slots = ma_slots(node, mt);
4448  	mas->node = mas_slot(mas, slots, offset);
4449  	pivots = ma_pivots(node, mt);
4450  	if (unlikely(ma_dead_node(node)))
4451  		return 1;
4452  
4453  	if (likely(offset))
4454  		mas->min = pivots[offset - 1] + 1;
4455  	mas->max = max;
4456  	mas->offset = mas_data_end(mas);
4457  	if (unlikely(mte_dead_node(mas->node)))
4458  		return 1;
4459  
4460  	mas->end = mas->offset;
4461  	return 0;
4462  
4463  no_entry:
4464  	if (unlikely(ma_dead_node(node)))
4465  		return 1;
4466  
4467  	mas->status = ma_underflow;
4468  	return 0;
4469  }
4470  
4471  /*
4472   * mas_prev_slot() - Get the entry in the previous slot
4473   *
4474   * @mas: The maple state
4475   * @min: The minimum starting range
4476   * @empty: Can be empty
4477   *
4478   * Return: The entry in the previous slot which is possibly NULL
4479   */
mas_prev_slot(struct ma_state * mas,unsigned long min,bool empty)4480  static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty)
4481  {
4482  	void *entry;
4483  	void __rcu **slots;
4484  	unsigned long pivot;
4485  	enum maple_type type;
4486  	unsigned long *pivots;
4487  	struct maple_node *node;
4488  	unsigned long save_point = mas->index;
4489  
4490  retry:
4491  	node = mas_mn(mas);
4492  	type = mte_node_type(mas->node);
4493  	pivots = ma_pivots(node, type);
4494  	if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4495  		goto retry;
4496  
4497  	if (mas->min <= min) {
4498  		pivot = mas_safe_min(mas, pivots, mas->offset);
4499  
4500  		if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4501  			goto retry;
4502  
4503  		if (pivot <= min)
4504  			goto underflow;
4505  	}
4506  
4507  again:
4508  	if (likely(mas->offset)) {
4509  		mas->offset--;
4510  		mas->last = mas->index - 1;
4511  		mas->index = mas_safe_min(mas, pivots, mas->offset);
4512  	} else  {
4513  		if (mas->index <= min)
4514  			goto underflow;
4515  
4516  		if (mas_prev_node(mas, min)) {
4517  			mas_rewalk(mas, save_point);
4518  			goto retry;
4519  		}
4520  
4521  		if (WARN_ON_ONCE(mas_is_underflow(mas)))
4522  			return NULL;
4523  
4524  		mas->last = mas->max;
4525  		node = mas_mn(mas);
4526  		type = mte_node_type(mas->node);
4527  		pivots = ma_pivots(node, type);
4528  		mas->index = pivots[mas->offset - 1] + 1;
4529  	}
4530  
4531  	slots = ma_slots(node, type);
4532  	entry = mas_slot(mas, slots, mas->offset);
4533  	if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4534  		goto retry;
4535  
4536  
4537  	if (likely(entry))
4538  		return entry;
4539  
4540  	if (!empty) {
4541  		if (mas->index <= min) {
4542  			mas->status = ma_underflow;
4543  			return NULL;
4544  		}
4545  
4546  		goto again;
4547  	}
4548  
4549  	return entry;
4550  
4551  underflow:
4552  	mas->status = ma_underflow;
4553  	return NULL;
4554  }
4555  
4556  /*
4557   * mas_next_node() - Get the next node at the same level in the tree.
4558   * @mas: The maple state
4559   * @node: The maple node
4560   * @max: The maximum pivot value to check.
4561   *
4562   * The next value will be mas->node[mas->offset] or the status will have
4563   * overflowed.
4564   * Return: 1 on dead node, 0 otherwise.
4565   */
mas_next_node(struct ma_state * mas,struct maple_node * node,unsigned long max)4566  static int mas_next_node(struct ma_state *mas, struct maple_node *node,
4567  		unsigned long max)
4568  {
4569  	unsigned long min;
4570  	unsigned long *pivots;
4571  	struct maple_enode *enode;
4572  	struct maple_node *tmp;
4573  	int level = 0;
4574  	unsigned char node_end;
4575  	enum maple_type mt;
4576  	void __rcu **slots;
4577  
4578  	if (mas->max >= max)
4579  		goto overflow;
4580  
4581  	min = mas->max + 1;
4582  	level = 0;
4583  	do {
4584  		if (ma_is_root(node))
4585  			goto overflow;
4586  
4587  		/* Walk up. */
4588  		if (unlikely(mas_ascend(mas)))
4589  			return 1;
4590  
4591  		level++;
4592  		node = mas_mn(mas);
4593  		mt = mte_node_type(mas->node);
4594  		pivots = ma_pivots(node, mt);
4595  		node_end = ma_data_end(node, mt, pivots, mas->max);
4596  		if (unlikely(ma_dead_node(node)))
4597  			return 1;
4598  
4599  	} while (unlikely(mas->offset == node_end));
4600  
4601  	slots = ma_slots(node, mt);
4602  	mas->offset++;
4603  	enode = mas_slot(mas, slots, mas->offset);
4604  	if (unlikely(ma_dead_node(node)))
4605  		return 1;
4606  
4607  	if (level > 1)
4608  		mas->offset = 0;
4609  
4610  	while (unlikely(level > 1)) {
4611  		level--;
4612  		mas->node = enode;
4613  		node = mas_mn(mas);
4614  		mt = mte_node_type(mas->node);
4615  		slots = ma_slots(node, mt);
4616  		enode = mas_slot(mas, slots, 0);
4617  		if (unlikely(ma_dead_node(node)))
4618  			return 1;
4619  	}
4620  
4621  	if (!mas->offset)
4622  		pivots = ma_pivots(node, mt);
4623  
4624  	mas->max = mas_safe_pivot(mas, pivots, mas->offset, mt);
4625  	tmp = mte_to_node(enode);
4626  	mt = mte_node_type(enode);
4627  	pivots = ma_pivots(tmp, mt);
4628  	mas->end = ma_data_end(tmp, mt, pivots, mas->max);
4629  	if (unlikely(ma_dead_node(node)))
4630  		return 1;
4631  
4632  	mas->node = enode;
4633  	mas->min = min;
4634  	return 0;
4635  
4636  overflow:
4637  	if (unlikely(ma_dead_node(node)))
4638  		return 1;
4639  
4640  	mas->status = ma_overflow;
4641  	return 0;
4642  }
4643  
4644  /*
4645   * mas_next_slot() - Get the entry in the next slot
4646   *
4647   * @mas: The maple state
4648   * @max: The maximum starting range
4649   * @empty: Can be empty
4650   *
4651   * Return: The entry in the next slot which is possibly NULL
4652   */
mas_next_slot(struct ma_state * mas,unsigned long max,bool empty)4653  static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty)
4654  {
4655  	void __rcu **slots;
4656  	unsigned long *pivots;
4657  	unsigned long pivot;
4658  	enum maple_type type;
4659  	struct maple_node *node;
4660  	unsigned long save_point = mas->last;
4661  	void *entry;
4662  
4663  retry:
4664  	node = mas_mn(mas);
4665  	type = mte_node_type(mas->node);
4666  	pivots = ma_pivots(node, type);
4667  	if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4668  		goto retry;
4669  
4670  	if (mas->max >= max) {
4671  		if (likely(mas->offset < mas->end))
4672  			pivot = pivots[mas->offset];
4673  		else
4674  			pivot = mas->max;
4675  
4676  		if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4677  			goto retry;
4678  
4679  		if (pivot >= max) { /* Was at the limit, next will extend beyond */
4680  			mas->status = ma_overflow;
4681  			return NULL;
4682  		}
4683  	}
4684  
4685  	if (likely(mas->offset < mas->end)) {
4686  		mas->index = pivots[mas->offset] + 1;
4687  again:
4688  		mas->offset++;
4689  		if (likely(mas->offset < mas->end))
4690  			mas->last = pivots[mas->offset];
4691  		else
4692  			mas->last = mas->max;
4693  	} else  {
4694  		if (mas->last >= max) {
4695  			mas->status = ma_overflow;
4696  			return NULL;
4697  		}
4698  
4699  		if (mas_next_node(mas, node, max)) {
4700  			mas_rewalk(mas, save_point);
4701  			goto retry;
4702  		}
4703  
4704  		if (WARN_ON_ONCE(mas_is_overflow(mas)))
4705  			return NULL;
4706  
4707  		mas->offset = 0;
4708  		mas->index = mas->min;
4709  		node = mas_mn(mas);
4710  		type = mte_node_type(mas->node);
4711  		pivots = ma_pivots(node, type);
4712  		mas->last = pivots[0];
4713  	}
4714  
4715  	slots = ma_slots(node, type);
4716  	entry = mt_slot(mas->tree, slots, mas->offset);
4717  	if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4718  		goto retry;
4719  
4720  	if (entry)
4721  		return entry;
4722  
4723  
4724  	if (!empty) {
4725  		if (mas->last >= max) {
4726  			mas->status = ma_overflow;
4727  			return NULL;
4728  		}
4729  
4730  		mas->index = mas->last + 1;
4731  		goto again;
4732  	}
4733  
4734  	return entry;
4735  }
4736  
4737  /*
4738   * mas_rev_awalk() - Internal function.  Reverse allocation walk.  Find the
4739   * highest gap address of a given size in a given node and descend.
4740   * @mas: The maple state
4741   * @size: The needed size.
4742   *
4743   * Return: True if found in a leaf, false otherwise.
4744   *
4745   */
mas_rev_awalk(struct ma_state * mas,unsigned long size,unsigned long * gap_min,unsigned long * gap_max)4746  static bool mas_rev_awalk(struct ma_state *mas, unsigned long size,
4747  		unsigned long *gap_min, unsigned long *gap_max)
4748  {
4749  	enum maple_type type = mte_node_type(mas->node);
4750  	struct maple_node *node = mas_mn(mas);
4751  	unsigned long *pivots, *gaps;
4752  	void __rcu **slots;
4753  	unsigned long gap = 0;
4754  	unsigned long max, min;
4755  	unsigned char offset;
4756  
4757  	if (unlikely(mas_is_err(mas)))
4758  		return true;
4759  
4760  	if (ma_is_dense(type)) {
4761  		/* dense nodes. */
4762  		mas->offset = (unsigned char)(mas->index - mas->min);
4763  		return true;
4764  	}
4765  
4766  	pivots = ma_pivots(node, type);
4767  	slots = ma_slots(node, type);
4768  	gaps = ma_gaps(node, type);
4769  	offset = mas->offset;
4770  	min = mas_safe_min(mas, pivots, offset);
4771  	/* Skip out of bounds. */
4772  	while (mas->last < min)
4773  		min = mas_safe_min(mas, pivots, --offset);
4774  
4775  	max = mas_safe_pivot(mas, pivots, offset, type);
4776  	while (mas->index <= max) {
4777  		gap = 0;
4778  		if (gaps)
4779  			gap = gaps[offset];
4780  		else if (!mas_slot(mas, slots, offset))
4781  			gap = max - min + 1;
4782  
4783  		if (gap) {
4784  			if ((size <= gap) && (size <= mas->last - min + 1))
4785  				break;
4786  
4787  			if (!gaps) {
4788  				/* Skip the next slot, it cannot be a gap. */
4789  				if (offset < 2)
4790  					goto ascend;
4791  
4792  				offset -= 2;
4793  				max = pivots[offset];
4794  				min = mas_safe_min(mas, pivots, offset);
4795  				continue;
4796  			}
4797  		}
4798  
4799  		if (!offset)
4800  			goto ascend;
4801  
4802  		offset--;
4803  		max = min - 1;
4804  		min = mas_safe_min(mas, pivots, offset);
4805  	}
4806  
4807  	if (unlikely((mas->index > max) || (size - 1 > max - mas->index)))
4808  		goto no_space;
4809  
4810  	if (unlikely(ma_is_leaf(type))) {
4811  		mas->offset = offset;
4812  		*gap_min = min;
4813  		*gap_max = min + gap - 1;
4814  		return true;
4815  	}
4816  
4817  	/* descend, only happens under lock. */
4818  	mas->node = mas_slot(mas, slots, offset);
4819  	mas->min = min;
4820  	mas->max = max;
4821  	mas->offset = mas_data_end(mas);
4822  	return false;
4823  
4824  ascend:
4825  	if (!mte_is_root(mas->node))
4826  		return false;
4827  
4828  no_space:
4829  	mas_set_err(mas, -EBUSY);
4830  	return false;
4831  }
4832  
mas_anode_descend(struct ma_state * mas,unsigned long size)4833  static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
4834  {
4835  	enum maple_type type = mte_node_type(mas->node);
4836  	unsigned long pivot, min, gap = 0;
4837  	unsigned char offset, data_end;
4838  	unsigned long *gaps, *pivots;
4839  	void __rcu **slots;
4840  	struct maple_node *node;
4841  	bool found = false;
4842  
4843  	if (ma_is_dense(type)) {
4844  		mas->offset = (unsigned char)(mas->index - mas->min);
4845  		return true;
4846  	}
4847  
4848  	node = mas_mn(mas);
4849  	pivots = ma_pivots(node, type);
4850  	slots = ma_slots(node, type);
4851  	gaps = ma_gaps(node, type);
4852  	offset = mas->offset;
4853  	min = mas_safe_min(mas, pivots, offset);
4854  	data_end = ma_data_end(node, type, pivots, mas->max);
4855  	for (; offset <= data_end; offset++) {
4856  		pivot = mas_safe_pivot(mas, pivots, offset, type);
4857  
4858  		/* Not within lower bounds */
4859  		if (mas->index > pivot)
4860  			goto next_slot;
4861  
4862  		if (gaps)
4863  			gap = gaps[offset];
4864  		else if (!mas_slot(mas, slots, offset))
4865  			gap = min(pivot, mas->last) - max(mas->index, min) + 1;
4866  		else
4867  			goto next_slot;
4868  
4869  		if (gap >= size) {
4870  			if (ma_is_leaf(type)) {
4871  				found = true;
4872  				break;
4873  			}
4874  
4875  			mas->node = mas_slot(mas, slots, offset);
4876  			mas->min = min;
4877  			mas->max = pivot;
4878  			offset = 0;
4879  			break;
4880  		}
4881  next_slot:
4882  		min = pivot + 1;
4883  		if (mas->last <= pivot) {
4884  			mas_set_err(mas, -EBUSY);
4885  			return true;
4886  		}
4887  	}
4888  
4889  	mas->offset = offset;
4890  	return found;
4891  }
4892  
4893  /**
4894   * mas_walk() - Search for @mas->index in the tree.
4895   * @mas: The maple state.
4896   *
4897   * mas->index and mas->last will be set to the range if there is a value.  If
4898   * mas->status is ma_none, reset to ma_start
4899   *
4900   * Return: the entry at the location or %NULL.
4901   */
mas_walk(struct ma_state * mas)4902  void *mas_walk(struct ma_state *mas)
4903  {
4904  	void *entry;
4905  
4906  	if (!mas_is_active(mas) || !mas_is_start(mas))
4907  		mas->status = ma_start;
4908  retry:
4909  	entry = mas_state_walk(mas);
4910  	if (mas_is_start(mas)) {
4911  		goto retry;
4912  	} else if (mas_is_none(mas)) {
4913  		mas->index = 0;
4914  		mas->last = ULONG_MAX;
4915  	} else if (mas_is_ptr(mas)) {
4916  		if (!mas->index) {
4917  			mas->last = 0;
4918  			return entry;
4919  		}
4920  
4921  		mas->index = 1;
4922  		mas->last = ULONG_MAX;
4923  		mas->status = ma_none;
4924  		return NULL;
4925  	}
4926  
4927  	return entry;
4928  }
4929  EXPORT_SYMBOL_GPL(mas_walk);
4930  
mas_rewind_node(struct ma_state * mas)4931  static inline bool mas_rewind_node(struct ma_state *mas)
4932  {
4933  	unsigned char slot;
4934  
4935  	do {
4936  		if (mte_is_root(mas->node)) {
4937  			slot = mas->offset;
4938  			if (!slot)
4939  				return false;
4940  		} else {
4941  			mas_ascend(mas);
4942  			slot = mas->offset;
4943  		}
4944  	} while (!slot);
4945  
4946  	mas->offset = --slot;
4947  	return true;
4948  }
4949  
4950  /*
4951   * mas_skip_node() - Internal function.  Skip over a node.
4952   * @mas: The maple state.
4953   *
4954   * Return: true if there is another node, false otherwise.
4955   */
mas_skip_node(struct ma_state * mas)4956  static inline bool mas_skip_node(struct ma_state *mas)
4957  {
4958  	if (mas_is_err(mas))
4959  		return false;
4960  
4961  	do {
4962  		if (mte_is_root(mas->node)) {
4963  			if (mas->offset >= mas_data_end(mas)) {
4964  				mas_set_err(mas, -EBUSY);
4965  				return false;
4966  			}
4967  		} else {
4968  			mas_ascend(mas);
4969  		}
4970  	} while (mas->offset >= mas_data_end(mas));
4971  
4972  	mas->offset++;
4973  	return true;
4974  }
4975  
4976  /*
4977   * mas_awalk() - Allocation walk.  Search from low address to high, for a gap of
4978   * @size
4979   * @mas: The maple state
4980   * @size: The size of the gap required
4981   *
4982   * Search between @mas->index and @mas->last for a gap of @size.
4983   */
mas_awalk(struct ma_state * mas,unsigned long size)4984  static inline void mas_awalk(struct ma_state *mas, unsigned long size)
4985  {
4986  	struct maple_enode *last = NULL;
4987  
4988  	/*
4989  	 * There are 4 options:
4990  	 * go to child (descend)
4991  	 * go back to parent (ascend)
4992  	 * no gap found. (return, error == -EBUSY)
4993  	 * found the gap. (return)
4994  	 */
4995  	while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) {
4996  		if (last == mas->node)
4997  			mas_skip_node(mas);
4998  		else
4999  			last = mas->node;
5000  	}
5001  }
5002  
5003  /*
5004   * mas_sparse_area() - Internal function.  Return upper or lower limit when
5005   * searching for a gap in an empty tree.
5006   * @mas: The maple state
5007   * @min: the minimum range
5008   * @max: The maximum range
5009   * @size: The size of the gap
5010   * @fwd: Searching forward or back
5011   */
mas_sparse_area(struct ma_state * mas,unsigned long min,unsigned long max,unsigned long size,bool fwd)5012  static inline int mas_sparse_area(struct ma_state *mas, unsigned long min,
5013  				unsigned long max, unsigned long size, bool fwd)
5014  {
5015  	if (!unlikely(mas_is_none(mas)) && min == 0) {
5016  		min++;
5017  		/*
5018  		 * At this time, min is increased, we need to recheck whether
5019  		 * the size is satisfied.
5020  		 */
5021  		if (min > max || max - min + 1 < size)
5022  			return -EBUSY;
5023  	}
5024  	/* mas_is_ptr */
5025  
5026  	if (fwd) {
5027  		mas->index = min;
5028  		mas->last = min + size - 1;
5029  	} else {
5030  		mas->last = max;
5031  		mas->index = max - size + 1;
5032  	}
5033  	return 0;
5034  }
5035  
5036  /*
5037   * mas_empty_area() - Get the lowest address within the range that is
5038   * sufficient for the size requested.
5039   * @mas: The maple state
5040   * @min: The lowest value of the range
5041   * @max: The highest value of the range
5042   * @size: The size needed
5043   */
mas_empty_area(struct ma_state * mas,unsigned long min,unsigned long max,unsigned long size)5044  int mas_empty_area(struct ma_state *mas, unsigned long min,
5045  		unsigned long max, unsigned long size)
5046  {
5047  	unsigned char offset;
5048  	unsigned long *pivots;
5049  	enum maple_type mt;
5050  	struct maple_node *node;
5051  
5052  	if (min > max)
5053  		return -EINVAL;
5054  
5055  	if (size == 0 || max - min < size - 1)
5056  		return -EINVAL;
5057  
5058  	if (mas_is_start(mas))
5059  		mas_start(mas);
5060  	else if (mas->offset >= 2)
5061  		mas->offset -= 2;
5062  	else if (!mas_skip_node(mas))
5063  		return -EBUSY;
5064  
5065  	/* Empty set */
5066  	if (mas_is_none(mas) || mas_is_ptr(mas))
5067  		return mas_sparse_area(mas, min, max, size, true);
5068  
5069  	/* The start of the window can only be within these values */
5070  	mas->index = min;
5071  	mas->last = max;
5072  	mas_awalk(mas, size);
5073  
5074  	if (unlikely(mas_is_err(mas)))
5075  		return xa_err(mas->node);
5076  
5077  	offset = mas->offset;
5078  	node = mas_mn(mas);
5079  	mt = mte_node_type(mas->node);
5080  	pivots = ma_pivots(node, mt);
5081  	min = mas_safe_min(mas, pivots, offset);
5082  	if (mas->index < min)
5083  		mas->index = min;
5084  	mas->last = mas->index + size - 1;
5085  	mas->end = ma_data_end(node, mt, pivots, mas->max);
5086  	return 0;
5087  }
5088  EXPORT_SYMBOL_GPL(mas_empty_area);
5089  
5090  /*
5091   * mas_empty_area_rev() - Get the highest address within the range that is
5092   * sufficient for the size requested.
5093   * @mas: The maple state
5094   * @min: The lowest value of the range
5095   * @max: The highest value of the range
5096   * @size: The size needed
5097   */
mas_empty_area_rev(struct ma_state * mas,unsigned long min,unsigned long max,unsigned long size)5098  int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
5099  		unsigned long max, unsigned long size)
5100  {
5101  	struct maple_enode *last = mas->node;
5102  
5103  	if (min > max)
5104  		return -EINVAL;
5105  
5106  	if (size == 0 || max - min < size - 1)
5107  		return -EINVAL;
5108  
5109  	if (mas_is_start(mas))
5110  		mas_start(mas);
5111  	else if ((mas->offset < 2) && (!mas_rewind_node(mas)))
5112  		return -EBUSY;
5113  
5114  	if (unlikely(mas_is_none(mas) || mas_is_ptr(mas)))
5115  		return mas_sparse_area(mas, min, max, size, false);
5116  	else if (mas->offset >= 2)
5117  		mas->offset -= 2;
5118  	else
5119  		mas->offset = mas_data_end(mas);
5120  
5121  
5122  	/* The start of the window can only be within these values. */
5123  	mas->index = min;
5124  	mas->last = max;
5125  
5126  	while (!mas_rev_awalk(mas, size, &min, &max)) {
5127  		if (last == mas->node) {
5128  			if (!mas_rewind_node(mas))
5129  				return -EBUSY;
5130  		} else {
5131  			last = mas->node;
5132  		}
5133  	}
5134  
5135  	if (mas_is_err(mas))
5136  		return xa_err(mas->node);
5137  
5138  	if (unlikely(mas->offset == MAPLE_NODE_SLOTS))
5139  		return -EBUSY;
5140  
5141  	/* Trim the upper limit to the max. */
5142  	if (max < mas->last)
5143  		mas->last = max;
5144  
5145  	mas->index = mas->last - size + 1;
5146  	mas->end = mas_data_end(mas);
5147  	return 0;
5148  }
5149  EXPORT_SYMBOL_GPL(mas_empty_area_rev);
5150  
5151  /*
5152   * mte_dead_leaves() - Mark all leaves of a node as dead.
5153   * @enode: the encoded node
5154   * @mt: the maple tree
5155   * @slots: Pointer to the slot array
5156   *
5157   * Must hold the write lock.
5158   *
5159   * Return: The number of leaves marked as dead.
5160   */
5161  static inline
mte_dead_leaves(struct maple_enode * enode,struct maple_tree * mt,void __rcu ** slots)5162  unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt,
5163  			      void __rcu **slots)
5164  {
5165  	struct maple_node *node;
5166  	enum maple_type type;
5167  	void *entry;
5168  	int offset;
5169  
5170  	for (offset = 0; offset < mt_slot_count(enode); offset++) {
5171  		entry = mt_slot(mt, slots, offset);
5172  		type = mte_node_type(entry);
5173  		node = mte_to_node(entry);
5174  		/* Use both node and type to catch LE & BE metadata */
5175  		if (!node || !type)
5176  			break;
5177  
5178  		mte_set_node_dead(entry);
5179  		node->type = type;
5180  		rcu_assign_pointer(slots[offset], node);
5181  	}
5182  
5183  	return offset;
5184  }
5185  
5186  /**
5187   * mte_dead_walk() - Walk down a dead tree to just before the leaves
5188   * @enode: The maple encoded node
5189   * @offset: The starting offset
5190   *
5191   * Note: This can only be used from the RCU callback context.
5192   */
mte_dead_walk(struct maple_enode ** enode,unsigned char offset)5193  static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset)
5194  {
5195  	struct maple_node *node, *next;
5196  	void __rcu **slots = NULL;
5197  
5198  	next = mte_to_node(*enode);
5199  	do {
5200  		*enode = ma_enode_ptr(next);
5201  		node = mte_to_node(*enode);
5202  		slots = ma_slots(node, node->type);
5203  		next = rcu_dereference_protected(slots[offset],
5204  					lock_is_held(&rcu_callback_map));
5205  		offset = 0;
5206  	} while (!ma_is_leaf(next->type));
5207  
5208  	return slots;
5209  }
5210  
5211  /**
5212   * mt_free_walk() - Walk & free a tree in the RCU callback context
5213   * @head: The RCU head that's within the node.
5214   *
5215   * Note: This can only be used from the RCU callback context.
5216   */
mt_free_walk(struct rcu_head * head)5217  static void mt_free_walk(struct rcu_head *head)
5218  {
5219  	void __rcu **slots;
5220  	struct maple_node *node, *start;
5221  	struct maple_enode *enode;
5222  	unsigned char offset;
5223  	enum maple_type type;
5224  
5225  	node = container_of(head, struct maple_node, rcu);
5226  
5227  	if (ma_is_leaf(node->type))
5228  		goto free_leaf;
5229  
5230  	start = node;
5231  	enode = mt_mk_node(node, node->type);
5232  	slots = mte_dead_walk(&enode, 0);
5233  	node = mte_to_node(enode);
5234  	do {
5235  		mt_free_bulk(node->slot_len, slots);
5236  		offset = node->parent_slot + 1;
5237  		enode = node->piv_parent;
5238  		if (mte_to_node(enode) == node)
5239  			goto free_leaf;
5240  
5241  		type = mte_node_type(enode);
5242  		slots = ma_slots(mte_to_node(enode), type);
5243  		if ((offset < mt_slots[type]) &&
5244  		    rcu_dereference_protected(slots[offset],
5245  					      lock_is_held(&rcu_callback_map)))
5246  			slots = mte_dead_walk(&enode, offset);
5247  		node = mte_to_node(enode);
5248  	} while ((node != start) || (node->slot_len < offset));
5249  
5250  	slots = ma_slots(node, node->type);
5251  	mt_free_bulk(node->slot_len, slots);
5252  
5253  free_leaf:
5254  	mt_free_rcu(&node->rcu);
5255  }
5256  
mte_destroy_descend(struct maple_enode ** enode,struct maple_tree * mt,struct maple_enode * prev,unsigned char offset)5257  static inline void __rcu **mte_destroy_descend(struct maple_enode **enode,
5258  	struct maple_tree *mt, struct maple_enode *prev, unsigned char offset)
5259  {
5260  	struct maple_node *node;
5261  	struct maple_enode *next = *enode;
5262  	void __rcu **slots = NULL;
5263  	enum maple_type type;
5264  	unsigned char next_offset = 0;
5265  
5266  	do {
5267  		*enode = next;
5268  		node = mte_to_node(*enode);
5269  		type = mte_node_type(*enode);
5270  		slots = ma_slots(node, type);
5271  		next = mt_slot_locked(mt, slots, next_offset);
5272  		if ((mte_dead_node(next)))
5273  			next = mt_slot_locked(mt, slots, ++next_offset);
5274  
5275  		mte_set_node_dead(*enode);
5276  		node->type = type;
5277  		node->piv_parent = prev;
5278  		node->parent_slot = offset;
5279  		offset = next_offset;
5280  		next_offset = 0;
5281  		prev = *enode;
5282  	} while (!mte_is_leaf(next));
5283  
5284  	return slots;
5285  }
5286  
mt_destroy_walk(struct maple_enode * enode,struct maple_tree * mt,bool free)5287  static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
5288  			    bool free)
5289  {
5290  	void __rcu **slots;
5291  	struct maple_node *node = mte_to_node(enode);
5292  	struct maple_enode *start;
5293  
5294  	if (mte_is_leaf(enode)) {
5295  		node->type = mte_node_type(enode);
5296  		goto free_leaf;
5297  	}
5298  
5299  	start = enode;
5300  	slots = mte_destroy_descend(&enode, mt, start, 0);
5301  	node = mte_to_node(enode); // Updated in the above call.
5302  	do {
5303  		enum maple_type type;
5304  		unsigned char offset;
5305  		struct maple_enode *parent, *tmp;
5306  
5307  		node->slot_len = mte_dead_leaves(enode, mt, slots);
5308  		if (free)
5309  			mt_free_bulk(node->slot_len, slots);
5310  		offset = node->parent_slot + 1;
5311  		enode = node->piv_parent;
5312  		if (mte_to_node(enode) == node)
5313  			goto free_leaf;
5314  
5315  		type = mte_node_type(enode);
5316  		slots = ma_slots(mte_to_node(enode), type);
5317  		if (offset >= mt_slots[type])
5318  			goto next;
5319  
5320  		tmp = mt_slot_locked(mt, slots, offset);
5321  		if (mte_node_type(tmp) && mte_to_node(tmp)) {
5322  			parent = enode;
5323  			enode = tmp;
5324  			slots = mte_destroy_descend(&enode, mt, parent, offset);
5325  		}
5326  next:
5327  		node = mte_to_node(enode);
5328  	} while (start != enode);
5329  
5330  	node = mte_to_node(enode);
5331  	node->slot_len = mte_dead_leaves(enode, mt, slots);
5332  	if (free)
5333  		mt_free_bulk(node->slot_len, slots);
5334  
5335  free_leaf:
5336  	if (free)
5337  		mt_free_rcu(&node->rcu);
5338  	else
5339  		mt_clear_meta(mt, node, node->type);
5340  }
5341  
5342  /*
5343   * mte_destroy_walk() - Free a tree or sub-tree.
5344   * @enode: the encoded maple node (maple_enode) to start
5345   * @mt: the tree to free - needed for node types.
5346   *
5347   * Must hold the write lock.
5348   */
mte_destroy_walk(struct maple_enode * enode,struct maple_tree * mt)5349  static inline void mte_destroy_walk(struct maple_enode *enode,
5350  				    struct maple_tree *mt)
5351  {
5352  	struct maple_node *node = mte_to_node(enode);
5353  
5354  	if (mt_in_rcu(mt)) {
5355  		mt_destroy_walk(enode, mt, false);
5356  		call_rcu(&node->rcu, mt_free_walk);
5357  	} else {
5358  		mt_destroy_walk(enode, mt, true);
5359  	}
5360  }
5361  /* Interface */
5362  
5363  /**
5364   * mas_store() - Store an @entry.
5365   * @mas: The maple state.
5366   * @entry: The entry to store.
5367   *
5368   * The @mas->index and @mas->last is used to set the range for the @entry.
5369   *
5370   * Return: the first entry between mas->index and mas->last or %NULL.
5371   */
mas_store(struct ma_state * mas,void * entry)5372  void *mas_store(struct ma_state *mas, void *entry)
5373  {
5374  	int request;
5375  	MA_WR_STATE(wr_mas, mas, entry);
5376  
5377  	trace_ma_write(__func__, mas, 0, entry);
5378  #ifdef CONFIG_DEBUG_MAPLE_TREE
5379  	if (MAS_WARN_ON(mas, mas->index > mas->last))
5380  		pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last,
5381  		       entry);
5382  
5383  	if (mas->index > mas->last) {
5384  		mas_set_err(mas, -EINVAL);
5385  		return NULL;
5386  	}
5387  
5388  #endif
5389  
5390  	/*
5391  	 * Storing is the same operation as insert with the added caveat that it
5392  	 * can overwrite entries.  Although this seems simple enough, one may
5393  	 * want to examine what happens if a single store operation was to
5394  	 * overwrite multiple entries within a self-balancing B-Tree.
5395  	 */
5396  	mas_wr_prealloc_setup(&wr_mas);
5397  	mas->store_type = mas_wr_store_type(&wr_mas);
5398  	if (mas->mas_flags & MA_STATE_PREALLOC) {
5399  		mas_wr_store_entry(&wr_mas);
5400  		MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas));
5401  		return wr_mas.content;
5402  	}
5403  
5404  	request = mas_prealloc_calc(mas, entry);
5405  	if (!request)
5406  		goto store;
5407  
5408  	mas_node_count(mas, request);
5409  	if (mas_is_err(mas))
5410  		return NULL;
5411  
5412  store:
5413  	mas_wr_store_entry(&wr_mas);
5414  	mas_destroy(mas);
5415  	return wr_mas.content;
5416  }
5417  EXPORT_SYMBOL_GPL(mas_store);
5418  
5419  /**
5420   * mas_store_gfp() - Store a value into the tree.
5421   * @mas: The maple state
5422   * @entry: The entry to store
5423   * @gfp: The GFP_FLAGS to use for allocations if necessary.
5424   *
5425   * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
5426   * be allocated.
5427   */
mas_store_gfp(struct ma_state * mas,void * entry,gfp_t gfp)5428  int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
5429  {
5430  	unsigned long index = mas->index;
5431  	unsigned long last = mas->last;
5432  	MA_WR_STATE(wr_mas, mas, entry);
5433  	int ret = 0;
5434  
5435  retry:
5436  	mas_wr_preallocate(&wr_mas, entry);
5437  	if (unlikely(mas_nomem(mas, gfp))) {
5438  		if (!entry)
5439  			__mas_set_range(mas, index, last);
5440  		goto retry;
5441  	}
5442  
5443  	if (mas_is_err(mas)) {
5444  		ret = xa_err(mas->node);
5445  		goto out;
5446  	}
5447  
5448  	mas_wr_store_entry(&wr_mas);
5449  out:
5450  	mas_destroy(mas);
5451  	return ret;
5452  }
5453  EXPORT_SYMBOL_GPL(mas_store_gfp);
5454  
5455  /**
5456   * mas_store_prealloc() - Store a value into the tree using memory
5457   * preallocated in the maple state.
5458   * @mas: The maple state
5459   * @entry: The entry to store.
5460   */
mas_store_prealloc(struct ma_state * mas,void * entry)5461  void mas_store_prealloc(struct ma_state *mas, void *entry)
5462  {
5463  	MA_WR_STATE(wr_mas, mas, entry);
5464  
5465  	if (mas->store_type == wr_store_root) {
5466  		mas_wr_prealloc_setup(&wr_mas);
5467  		goto store;
5468  	}
5469  
5470  	mas_wr_walk_descend(&wr_mas);
5471  	if (mas->store_type != wr_spanning_store) {
5472  		/* set wr_mas->content to current slot */
5473  		wr_mas.content = mas_slot_locked(mas, wr_mas.slots, mas->offset);
5474  		mas_wr_end_piv(&wr_mas);
5475  	}
5476  
5477  store:
5478  	trace_ma_write(__func__, mas, 0, entry);
5479  	mas_wr_store_entry(&wr_mas);
5480  	MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas));
5481  	mas_destroy(mas);
5482  }
5483  EXPORT_SYMBOL_GPL(mas_store_prealloc);
5484  
5485  /**
5486   * mas_preallocate() - Preallocate enough nodes for a store operation
5487   * @mas: The maple state
5488   * @entry: The entry that will be stored
5489   * @gfp: The GFP_FLAGS to use for allocations.
5490   *
5491   * Return: 0 on success, -ENOMEM if memory could not be allocated.
5492   */
mas_preallocate(struct ma_state * mas,void * entry,gfp_t gfp)5493  int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
5494  {
5495  	MA_WR_STATE(wr_mas, mas, entry);
5496  	int ret = 0;
5497  	int request;
5498  
5499  	mas_wr_prealloc_setup(&wr_mas);
5500  	mas->store_type = mas_wr_store_type(&wr_mas);
5501  	request = mas_prealloc_calc(mas, entry);
5502  	if (!request)
5503  		return ret;
5504  
5505  	mas_node_count_gfp(mas, request, gfp);
5506  	if (mas_is_err(mas)) {
5507  		mas_set_alloc_req(mas, 0);
5508  		ret = xa_err(mas->node);
5509  		mas_destroy(mas);
5510  		mas_reset(mas);
5511  		return ret;
5512  	}
5513  
5514  	mas->mas_flags |= MA_STATE_PREALLOC;
5515  	return ret;
5516  }
5517  EXPORT_SYMBOL_GPL(mas_preallocate);
5518  
5519  /*
5520   * mas_destroy() - destroy a maple state.
5521   * @mas: The maple state
5522   *
5523   * Upon completion, check the left-most node and rebalance against the node to
5524   * the right if necessary.  Frees any allocated nodes associated with this maple
5525   * state.
5526   */
mas_destroy(struct ma_state * mas)5527  void mas_destroy(struct ma_state *mas)
5528  {
5529  	struct maple_alloc *node;
5530  	unsigned long total;
5531  
5532  	/*
5533  	 * When using mas_for_each() to insert an expected number of elements,
5534  	 * it is possible that the number inserted is less than the expected
5535  	 * number.  To fix an invalid final node, a check is performed here to
5536  	 * rebalance the previous node with the final node.
5537  	 */
5538  	if (mas->mas_flags & MA_STATE_REBALANCE) {
5539  		unsigned char end;
5540  		if (mas_is_err(mas))
5541  			mas_reset(mas);
5542  		mas_start(mas);
5543  		mtree_range_walk(mas);
5544  		end = mas->end + 1;
5545  		if (end < mt_min_slot_count(mas->node) - 1)
5546  			mas_destroy_rebalance(mas, end);
5547  
5548  		mas->mas_flags &= ~MA_STATE_REBALANCE;
5549  	}
5550  	mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC);
5551  
5552  	total = mas_allocated(mas);
5553  	while (total) {
5554  		node = mas->alloc;
5555  		mas->alloc = node->slot[0];
5556  		if (node->node_count > 1) {
5557  			size_t count = node->node_count - 1;
5558  
5559  			mt_free_bulk(count, (void __rcu **)&node->slot[1]);
5560  			total -= count;
5561  		}
5562  		mt_free_one(ma_mnode_ptr(node));
5563  		total--;
5564  	}
5565  
5566  	mas->alloc = NULL;
5567  }
5568  EXPORT_SYMBOL_GPL(mas_destroy);
5569  
5570  /*
5571   * mas_expected_entries() - Set the expected number of entries that will be inserted.
5572   * @mas: The maple state
5573   * @nr_entries: The number of expected entries.
5574   *
5575   * This will attempt to pre-allocate enough nodes to store the expected number
5576   * of entries.  The allocations will occur using the bulk allocator interface
5577   * for speed.  Please call mas_destroy() on the @mas after inserting the entries
5578   * to ensure any unused nodes are freed.
5579   *
5580   * Return: 0 on success, -ENOMEM if memory could not be allocated.
5581   */
mas_expected_entries(struct ma_state * mas,unsigned long nr_entries)5582  int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries)
5583  {
5584  	int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2;
5585  	struct maple_enode *enode = mas->node;
5586  	int nr_nodes;
5587  	int ret;
5588  
5589  	/*
5590  	 * Sometimes it is necessary to duplicate a tree to a new tree, such as
5591  	 * forking a process and duplicating the VMAs from one tree to a new
5592  	 * tree.  When such a situation arises, it is known that the new tree is
5593  	 * not going to be used until the entire tree is populated.  For
5594  	 * performance reasons, it is best to use a bulk load with RCU disabled.
5595  	 * This allows for optimistic splitting that favours the left and reuse
5596  	 * of nodes during the operation.
5597  	 */
5598  
5599  	/* Optimize splitting for bulk insert in-order */
5600  	mas->mas_flags |= MA_STATE_BULK;
5601  
5602  	/*
5603  	 * Avoid overflow, assume a gap between each entry and a trailing null.
5604  	 * If this is wrong, it just means allocation can happen during
5605  	 * insertion of entries.
5606  	 */
5607  	nr_nodes = max(nr_entries, nr_entries * 2 + 1);
5608  	if (!mt_is_alloc(mas->tree))
5609  		nonleaf_cap = MAPLE_RANGE64_SLOTS - 2;
5610  
5611  	/* Leaves; reduce slots to keep space for expansion */
5612  	nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2);
5613  	/* Internal nodes */
5614  	nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap);
5615  	/* Add working room for split (2 nodes) + new parents */
5616  	mas_node_count_gfp(mas, nr_nodes + 3, GFP_KERNEL);
5617  
5618  	/* Detect if allocations run out */
5619  	mas->mas_flags |= MA_STATE_PREALLOC;
5620  
5621  	if (!mas_is_err(mas))
5622  		return 0;
5623  
5624  	ret = xa_err(mas->node);
5625  	mas->node = enode;
5626  	mas_destroy(mas);
5627  	return ret;
5628  
5629  }
5630  EXPORT_SYMBOL_GPL(mas_expected_entries);
5631  
mas_next_setup(struct ma_state * mas,unsigned long max,void ** entry)5632  static bool mas_next_setup(struct ma_state *mas, unsigned long max,
5633  		void **entry)
5634  {
5635  	bool was_none = mas_is_none(mas);
5636  
5637  	if (unlikely(mas->last >= max)) {
5638  		mas->status = ma_overflow;
5639  		return true;
5640  	}
5641  
5642  	switch (mas->status) {
5643  	case ma_active:
5644  		return false;
5645  	case ma_none:
5646  		fallthrough;
5647  	case ma_pause:
5648  		mas->status = ma_start;
5649  		fallthrough;
5650  	case ma_start:
5651  		mas_walk(mas); /* Retries on dead nodes handled by mas_walk */
5652  		break;
5653  	case ma_overflow:
5654  		/* Overflowed before, but the max changed */
5655  		mas->status = ma_active;
5656  		break;
5657  	case ma_underflow:
5658  		/* The user expects the mas to be one before where it is */
5659  		mas->status = ma_active;
5660  		*entry = mas_walk(mas);
5661  		if (*entry)
5662  			return true;
5663  		break;
5664  	case ma_root:
5665  		break;
5666  	case ma_error:
5667  		return true;
5668  	}
5669  
5670  	if (likely(mas_is_active(mas))) /* Fast path */
5671  		return false;
5672  
5673  	if (mas_is_ptr(mas)) {
5674  		*entry = NULL;
5675  		if (was_none && mas->index == 0) {
5676  			mas->index = mas->last = 0;
5677  			return true;
5678  		}
5679  		mas->index = 1;
5680  		mas->last = ULONG_MAX;
5681  		mas->status = ma_none;
5682  		return true;
5683  	}
5684  
5685  	if (mas_is_none(mas))
5686  		return true;
5687  
5688  	return false;
5689  }
5690  
5691  /**
5692   * mas_next() - Get the next entry.
5693   * @mas: The maple state
5694   * @max: The maximum index to check.
5695   *
5696   * Returns the next entry after @mas->index.
5697   * Must hold rcu_read_lock or the write lock.
5698   * Can return the zero entry.
5699   *
5700   * Return: The next entry or %NULL
5701   */
mas_next(struct ma_state * mas,unsigned long max)5702  void *mas_next(struct ma_state *mas, unsigned long max)
5703  {
5704  	void *entry = NULL;
5705  
5706  	if (mas_next_setup(mas, max, &entry))
5707  		return entry;
5708  
5709  	/* Retries on dead nodes handled by mas_next_slot */
5710  	return mas_next_slot(mas, max, false);
5711  }
5712  EXPORT_SYMBOL_GPL(mas_next);
5713  
5714  /**
5715   * mas_next_range() - Advance the maple state to the next range
5716   * @mas: The maple state
5717   * @max: The maximum index to check.
5718   *
5719   * Sets @mas->index and @mas->last to the range.
5720   * Must hold rcu_read_lock or the write lock.
5721   * Can return the zero entry.
5722   *
5723   * Return: The next entry or %NULL
5724   */
mas_next_range(struct ma_state * mas,unsigned long max)5725  void *mas_next_range(struct ma_state *mas, unsigned long max)
5726  {
5727  	void *entry = NULL;
5728  
5729  	if (mas_next_setup(mas, max, &entry))
5730  		return entry;
5731  
5732  	/* Retries on dead nodes handled by mas_next_slot */
5733  	return mas_next_slot(mas, max, true);
5734  }
5735  EXPORT_SYMBOL_GPL(mas_next_range);
5736  
5737  /**
5738   * mt_next() - get the next value in the maple tree
5739   * @mt: The maple tree
5740   * @index: The start index
5741   * @max: The maximum index to check
5742   *
5743   * Takes RCU read lock internally to protect the search, which does not
5744   * protect the returned pointer after dropping RCU read lock.
5745   * See also: Documentation/core-api/maple_tree.rst
5746   *
5747   * Return: The entry higher than @index or %NULL if nothing is found.
5748   */
mt_next(struct maple_tree * mt,unsigned long index,unsigned long max)5749  void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max)
5750  {
5751  	void *entry = NULL;
5752  	MA_STATE(mas, mt, index, index);
5753  
5754  	rcu_read_lock();
5755  	entry = mas_next(&mas, max);
5756  	rcu_read_unlock();
5757  	return entry;
5758  }
5759  EXPORT_SYMBOL_GPL(mt_next);
5760  
mas_prev_setup(struct ma_state * mas,unsigned long min,void ** entry)5761  static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry)
5762  {
5763  	if (unlikely(mas->index <= min)) {
5764  		mas->status = ma_underflow;
5765  		return true;
5766  	}
5767  
5768  	switch (mas->status) {
5769  	case ma_active:
5770  		return false;
5771  	case ma_start:
5772  		break;
5773  	case ma_none:
5774  		fallthrough;
5775  	case ma_pause:
5776  		mas->status = ma_start;
5777  		break;
5778  	case ma_underflow:
5779  		/* underflowed before but the min changed */
5780  		mas->status = ma_active;
5781  		break;
5782  	case ma_overflow:
5783  		/* User expects mas to be one after where it is */
5784  		mas->status = ma_active;
5785  		*entry = mas_walk(mas);
5786  		if (*entry)
5787  			return true;
5788  		break;
5789  	case ma_root:
5790  		break;
5791  	case ma_error:
5792  		return true;
5793  	}
5794  
5795  	if (mas_is_start(mas))
5796  		mas_walk(mas);
5797  
5798  	if (unlikely(mas_is_ptr(mas))) {
5799  		if (!mas->index) {
5800  			mas->status = ma_none;
5801  			return true;
5802  		}
5803  		mas->index = mas->last = 0;
5804  		*entry = mas_root(mas);
5805  		return true;
5806  	}
5807  
5808  	if (mas_is_none(mas)) {
5809  		if (mas->index) {
5810  			/* Walked to out-of-range pointer? */
5811  			mas->index = mas->last = 0;
5812  			mas->status = ma_root;
5813  			*entry = mas_root(mas);
5814  			return true;
5815  		}
5816  		return true;
5817  	}
5818  
5819  	return false;
5820  }
5821  
5822  /**
5823   * mas_prev() - Get the previous entry
5824   * @mas: The maple state
5825   * @min: The minimum value to check.
5826   *
5827   * Must hold rcu_read_lock or the write lock.
5828   * Will reset mas to ma_start if the status is ma_none.  Will stop on not
5829   * searchable nodes.
5830   *
5831   * Return: the previous value or %NULL.
5832   */
mas_prev(struct ma_state * mas,unsigned long min)5833  void *mas_prev(struct ma_state *mas, unsigned long min)
5834  {
5835  	void *entry = NULL;
5836  
5837  	if (mas_prev_setup(mas, min, &entry))
5838  		return entry;
5839  
5840  	return mas_prev_slot(mas, min, false);
5841  }
5842  EXPORT_SYMBOL_GPL(mas_prev);
5843  
5844  /**
5845   * mas_prev_range() - Advance to the previous range
5846   * @mas: The maple state
5847   * @min: The minimum value to check.
5848   *
5849   * Sets @mas->index and @mas->last to the range.
5850   * Must hold rcu_read_lock or the write lock.
5851   * Will reset mas to ma_start if the node is ma_none.  Will stop on not
5852   * searchable nodes.
5853   *
5854   * Return: the previous value or %NULL.
5855   */
mas_prev_range(struct ma_state * mas,unsigned long min)5856  void *mas_prev_range(struct ma_state *mas, unsigned long min)
5857  {
5858  	void *entry = NULL;
5859  
5860  	if (mas_prev_setup(mas, min, &entry))
5861  		return entry;
5862  
5863  	return mas_prev_slot(mas, min, true);
5864  }
5865  EXPORT_SYMBOL_GPL(mas_prev_range);
5866  
5867  /**
5868   * mt_prev() - get the previous value in the maple tree
5869   * @mt: The maple tree
5870   * @index: The start index
5871   * @min: The minimum index to check
5872   *
5873   * Takes RCU read lock internally to protect the search, which does not
5874   * protect the returned pointer after dropping RCU read lock.
5875   * See also: Documentation/core-api/maple_tree.rst
5876   *
5877   * Return: The entry before @index or %NULL if nothing is found.
5878   */
mt_prev(struct maple_tree * mt,unsigned long index,unsigned long min)5879  void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min)
5880  {
5881  	void *entry = NULL;
5882  	MA_STATE(mas, mt, index, index);
5883  
5884  	rcu_read_lock();
5885  	entry = mas_prev(&mas, min);
5886  	rcu_read_unlock();
5887  	return entry;
5888  }
5889  EXPORT_SYMBOL_GPL(mt_prev);
5890  
5891  /**
5892   * mas_pause() - Pause a mas_find/mas_for_each to drop the lock.
5893   * @mas: The maple state to pause
5894   *
5895   * Some users need to pause a walk and drop the lock they're holding in
5896   * order to yield to a higher priority thread or carry out an operation
5897   * on an entry.  Those users should call this function before they drop
5898   * the lock.  It resets the @mas to be suitable for the next iteration
5899   * of the loop after the user has reacquired the lock.  If most entries
5900   * found during a walk require you to call mas_pause(), the mt_for_each()
5901   * iterator may be more appropriate.
5902   *
5903   */
mas_pause(struct ma_state * mas)5904  void mas_pause(struct ma_state *mas)
5905  {
5906  	mas->status = ma_pause;
5907  	mas->node = NULL;
5908  }
5909  EXPORT_SYMBOL_GPL(mas_pause);
5910  
5911  /**
5912   * mas_find_setup() - Internal function to set up mas_find*().
5913   * @mas: The maple state
5914   * @max: The maximum index
5915   * @entry: Pointer to the entry
5916   *
5917   * Returns: True if entry is the answer, false otherwise.
5918   */
mas_find_setup(struct ma_state * mas,unsigned long max,void ** entry)5919  static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long max, void **entry)
5920  {
5921  	switch (mas->status) {
5922  	case ma_active:
5923  		if (mas->last < max)
5924  			return false;
5925  		return true;
5926  	case ma_start:
5927  		break;
5928  	case ma_pause:
5929  		if (unlikely(mas->last >= max))
5930  			return true;
5931  
5932  		mas->index = ++mas->last;
5933  		mas->status = ma_start;
5934  		break;
5935  	case ma_none:
5936  		if (unlikely(mas->last >= max))
5937  			return true;
5938  
5939  		mas->index = mas->last;
5940  		mas->status = ma_start;
5941  		break;
5942  	case ma_underflow:
5943  		/* mas is pointing at entry before unable to go lower */
5944  		if (unlikely(mas->index >= max)) {
5945  			mas->status = ma_overflow;
5946  			return true;
5947  		}
5948  
5949  		mas->status = ma_active;
5950  		*entry = mas_walk(mas);
5951  		if (*entry)
5952  			return true;
5953  		break;
5954  	case ma_overflow:
5955  		if (unlikely(mas->last >= max))
5956  			return true;
5957  
5958  		mas->status = ma_active;
5959  		*entry = mas_walk(mas);
5960  		if (*entry)
5961  			return true;
5962  		break;
5963  	case ma_root:
5964  		break;
5965  	case ma_error:
5966  		return true;
5967  	}
5968  
5969  	if (mas_is_start(mas)) {
5970  		/* First run or continue */
5971  		if (mas->index > max)
5972  			return true;
5973  
5974  		*entry = mas_walk(mas);
5975  		if (*entry)
5976  			return true;
5977  
5978  	}
5979  
5980  	if (unlikely(mas_is_ptr(mas)))
5981  		goto ptr_out_of_range;
5982  
5983  	if (unlikely(mas_is_none(mas)))
5984  		return true;
5985  
5986  	if (mas->index == max)
5987  		return true;
5988  
5989  	return false;
5990  
5991  ptr_out_of_range:
5992  	mas->status = ma_none;
5993  	mas->index = 1;
5994  	mas->last = ULONG_MAX;
5995  	return true;
5996  }
5997  
5998  /**
5999   * mas_find() - On the first call, find the entry at or after mas->index up to
6000   * %max.  Otherwise, find the entry after mas->index.
6001   * @mas: The maple state
6002   * @max: The maximum value to check.
6003   *
6004   * Must hold rcu_read_lock or the write lock.
6005   * If an entry exists, last and index are updated accordingly.
6006   * May set @mas->status to ma_overflow.
6007   *
6008   * Return: The entry or %NULL.
6009   */
mas_find(struct ma_state * mas,unsigned long max)6010  void *mas_find(struct ma_state *mas, unsigned long max)
6011  {
6012  	void *entry = NULL;
6013  
6014  	if (mas_find_setup(mas, max, &entry))
6015  		return entry;
6016  
6017  	/* Retries on dead nodes handled by mas_next_slot */
6018  	entry = mas_next_slot(mas, max, false);
6019  	/* Ignore overflow */
6020  	mas->status = ma_active;
6021  	return entry;
6022  }
6023  EXPORT_SYMBOL_GPL(mas_find);
6024  
6025  /**
6026   * mas_find_range() - On the first call, find the entry at or after
6027   * mas->index up to %max.  Otherwise, advance to the next slot mas->index.
6028   * @mas: The maple state
6029   * @max: The maximum value to check.
6030   *
6031   * Must hold rcu_read_lock or the write lock.
6032   * If an entry exists, last and index are updated accordingly.
6033   * May set @mas->status to ma_overflow.
6034   *
6035   * Return: The entry or %NULL.
6036   */
mas_find_range(struct ma_state * mas,unsigned long max)6037  void *mas_find_range(struct ma_state *mas, unsigned long max)
6038  {
6039  	void *entry = NULL;
6040  
6041  	if (mas_find_setup(mas, max, &entry))
6042  		return entry;
6043  
6044  	/* Retries on dead nodes handled by mas_next_slot */
6045  	return mas_next_slot(mas, max, true);
6046  }
6047  EXPORT_SYMBOL_GPL(mas_find_range);
6048  
6049  /**
6050   * mas_find_rev_setup() - Internal function to set up mas_find_*_rev()
6051   * @mas: The maple state
6052   * @min: The minimum index
6053   * @entry: Pointer to the entry
6054   *
6055   * Returns: True if entry is the answer, false otherwise.
6056   */
mas_find_rev_setup(struct ma_state * mas,unsigned long min,void ** entry)6057  static bool mas_find_rev_setup(struct ma_state *mas, unsigned long min,
6058  		void **entry)
6059  {
6060  
6061  	switch (mas->status) {
6062  	case ma_active:
6063  		goto active;
6064  	case ma_start:
6065  		break;
6066  	case ma_pause:
6067  		if (unlikely(mas->index <= min)) {
6068  			mas->status = ma_underflow;
6069  			return true;
6070  		}
6071  		mas->last = --mas->index;
6072  		mas->status = ma_start;
6073  		break;
6074  	case ma_none:
6075  		if (mas->index <= min)
6076  			goto none;
6077  
6078  		mas->last = mas->index;
6079  		mas->status = ma_start;
6080  		break;
6081  	case ma_overflow: /* user expects the mas to be one after where it is */
6082  		if (unlikely(mas->index <= min)) {
6083  			mas->status = ma_underflow;
6084  			return true;
6085  		}
6086  
6087  		mas->status = ma_active;
6088  		break;
6089  	case ma_underflow: /* user expects the mas to be one before where it is */
6090  		if (unlikely(mas->index <= min))
6091  			return true;
6092  
6093  		mas->status = ma_active;
6094  		break;
6095  	case ma_root:
6096  		break;
6097  	case ma_error:
6098  		return true;
6099  	}
6100  
6101  	if (mas_is_start(mas)) {
6102  		/* First run or continue */
6103  		if (mas->index < min)
6104  			return true;
6105  
6106  		*entry = mas_walk(mas);
6107  		if (*entry)
6108  			return true;
6109  	}
6110  
6111  	if (unlikely(mas_is_ptr(mas)))
6112  		goto none;
6113  
6114  	if (unlikely(mas_is_none(mas))) {
6115  		/*
6116  		 * Walked to the location, and there was nothing so the previous
6117  		 * location is 0.
6118  		 */
6119  		mas->last = mas->index = 0;
6120  		mas->status = ma_root;
6121  		*entry = mas_root(mas);
6122  		return true;
6123  	}
6124  
6125  active:
6126  	if (mas->index < min)
6127  		return true;
6128  
6129  	return false;
6130  
6131  none:
6132  	mas->status = ma_none;
6133  	return true;
6134  }
6135  
6136  /**
6137   * mas_find_rev: On the first call, find the first non-null entry at or below
6138   * mas->index down to %min.  Otherwise find the first non-null entry below
6139   * mas->index down to %min.
6140   * @mas: The maple state
6141   * @min: The minimum value to check.
6142   *
6143   * Must hold rcu_read_lock or the write lock.
6144   * If an entry exists, last and index are updated accordingly.
6145   * May set @mas->status to ma_underflow.
6146   *
6147   * Return: The entry or %NULL.
6148   */
mas_find_rev(struct ma_state * mas,unsigned long min)6149  void *mas_find_rev(struct ma_state *mas, unsigned long min)
6150  {
6151  	void *entry = NULL;
6152  
6153  	if (mas_find_rev_setup(mas, min, &entry))
6154  		return entry;
6155  
6156  	/* Retries on dead nodes handled by mas_prev_slot */
6157  	return mas_prev_slot(mas, min, false);
6158  
6159  }
6160  EXPORT_SYMBOL_GPL(mas_find_rev);
6161  
6162  /**
6163   * mas_find_range_rev: On the first call, find the first non-null entry at or
6164   * below mas->index down to %min.  Otherwise advance to the previous slot after
6165   * mas->index down to %min.
6166   * @mas: The maple state
6167   * @min: The minimum value to check.
6168   *
6169   * Must hold rcu_read_lock or the write lock.
6170   * If an entry exists, last and index are updated accordingly.
6171   * May set @mas->status to ma_underflow.
6172   *
6173   * Return: The entry or %NULL.
6174   */
mas_find_range_rev(struct ma_state * mas,unsigned long min)6175  void *mas_find_range_rev(struct ma_state *mas, unsigned long min)
6176  {
6177  	void *entry = NULL;
6178  
6179  	if (mas_find_rev_setup(mas, min, &entry))
6180  		return entry;
6181  
6182  	/* Retries on dead nodes handled by mas_prev_slot */
6183  	return mas_prev_slot(mas, min, true);
6184  }
6185  EXPORT_SYMBOL_GPL(mas_find_range_rev);
6186  
6187  /**
6188   * mas_erase() - Find the range in which index resides and erase the entire
6189   * range.
6190   * @mas: The maple state
6191   *
6192   * Must hold the write lock.
6193   * Searches for @mas->index, sets @mas->index and @mas->last to the range and
6194   * erases that range.
6195   *
6196   * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated.
6197   */
mas_erase(struct ma_state * mas)6198  void *mas_erase(struct ma_state *mas)
6199  {
6200  	void *entry;
6201  	unsigned long index = mas->index;
6202  	MA_WR_STATE(wr_mas, mas, NULL);
6203  
6204  	if (!mas_is_active(mas) || !mas_is_start(mas))
6205  		mas->status = ma_start;
6206  
6207  write_retry:
6208  	entry = mas_state_walk(mas);
6209  	if (!entry)
6210  		return NULL;
6211  
6212  	/* Must reset to ensure spanning writes of last slot are detected */
6213  	mas_reset(mas);
6214  	mas_wr_preallocate(&wr_mas, NULL);
6215  	if (mas_nomem(mas, GFP_KERNEL)) {
6216  		/* in case the range of entry changed when unlocked */
6217  		mas->index = mas->last = index;
6218  		goto write_retry;
6219  	}
6220  
6221  	if (mas_is_err(mas))
6222  		goto out;
6223  
6224  	mas_wr_store_entry(&wr_mas);
6225  out:
6226  	mas_destroy(mas);
6227  	return entry;
6228  }
6229  EXPORT_SYMBOL_GPL(mas_erase);
6230  
6231  /**
6232   * mas_nomem() - Check if there was an error allocating and do the allocation
6233   * if necessary If there are allocations, then free them.
6234   * @mas: The maple state
6235   * @gfp: The GFP_FLAGS to use for allocations
6236   * Return: true on allocation, false otherwise.
6237   */
mas_nomem(struct ma_state * mas,gfp_t gfp)6238  bool mas_nomem(struct ma_state *mas, gfp_t gfp)
6239  	__must_hold(mas->tree->ma_lock)
6240  {
6241  	if (likely(mas->node != MA_ERROR(-ENOMEM)))
6242  		return false;
6243  
6244  	if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) {
6245  		mtree_unlock(mas->tree);
6246  		mas_alloc_nodes(mas, gfp);
6247  		mtree_lock(mas->tree);
6248  	} else {
6249  		mas_alloc_nodes(mas, gfp);
6250  	}
6251  
6252  	if (!mas_allocated(mas))
6253  		return false;
6254  
6255  	mas->status = ma_start;
6256  	return true;
6257  }
6258  
maple_tree_init(void)6259  void __init maple_tree_init(void)
6260  {
6261  	maple_node_cache = kmem_cache_create("maple_node",
6262  			sizeof(struct maple_node), sizeof(struct maple_node),
6263  			SLAB_PANIC, NULL);
6264  }
6265  
6266  /**
6267   * mtree_load() - Load a value stored in a maple tree
6268   * @mt: The maple tree
6269   * @index: The index to load
6270   *
6271   * Return: the entry or %NULL
6272   */
mtree_load(struct maple_tree * mt,unsigned long index)6273  void *mtree_load(struct maple_tree *mt, unsigned long index)
6274  {
6275  	MA_STATE(mas, mt, index, index);
6276  	void *entry;
6277  
6278  	trace_ma_read(__func__, &mas);
6279  	rcu_read_lock();
6280  retry:
6281  	entry = mas_start(&mas);
6282  	if (unlikely(mas_is_none(&mas)))
6283  		goto unlock;
6284  
6285  	if (unlikely(mas_is_ptr(&mas))) {
6286  		if (index)
6287  			entry = NULL;
6288  
6289  		goto unlock;
6290  	}
6291  
6292  	entry = mtree_lookup_walk(&mas);
6293  	if (!entry && unlikely(mas_is_start(&mas)))
6294  		goto retry;
6295  unlock:
6296  	rcu_read_unlock();
6297  	if (xa_is_zero(entry))
6298  		return NULL;
6299  
6300  	return entry;
6301  }
6302  EXPORT_SYMBOL(mtree_load);
6303  
6304  /**
6305   * mtree_store_range() - Store an entry at a given range.
6306   * @mt: The maple tree
6307   * @index: The start of the range
6308   * @last: The end of the range
6309   * @entry: The entry to store
6310   * @gfp: The GFP_FLAGS to use for allocations
6311   *
6312   * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
6313   * be allocated.
6314   */
mtree_store_range(struct maple_tree * mt,unsigned long index,unsigned long last,void * entry,gfp_t gfp)6315  int mtree_store_range(struct maple_tree *mt, unsigned long index,
6316  		unsigned long last, void *entry, gfp_t gfp)
6317  {
6318  	MA_STATE(mas, mt, index, last);
6319  	int ret = 0;
6320  
6321  	trace_ma_write(__func__, &mas, 0, entry);
6322  	if (WARN_ON_ONCE(xa_is_advanced(entry)))
6323  		return -EINVAL;
6324  
6325  	if (index > last)
6326  		return -EINVAL;
6327  
6328  	mtree_lock(mt);
6329  	ret = mas_store_gfp(&mas, entry, gfp);
6330  	mtree_unlock(mt);
6331  
6332  	return ret;
6333  }
6334  EXPORT_SYMBOL(mtree_store_range);
6335  
6336  /**
6337   * mtree_store() - Store an entry at a given index.
6338   * @mt: The maple tree
6339   * @index: The index to store the value
6340   * @entry: The entry to store
6341   * @gfp: The GFP_FLAGS to use for allocations
6342   *
6343   * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
6344   * be allocated.
6345   */
mtree_store(struct maple_tree * mt,unsigned long index,void * entry,gfp_t gfp)6346  int mtree_store(struct maple_tree *mt, unsigned long index, void *entry,
6347  		 gfp_t gfp)
6348  {
6349  	return mtree_store_range(mt, index, index, entry, gfp);
6350  }
6351  EXPORT_SYMBOL(mtree_store);
6352  
6353  /**
6354   * mtree_insert_range() - Insert an entry at a given range if there is no value.
6355   * @mt: The maple tree
6356   * @first: The start of the range
6357   * @last: The end of the range
6358   * @entry: The entry to store
6359   * @gfp: The GFP_FLAGS to use for allocations.
6360   *
6361   * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
6362   * request, -ENOMEM if memory could not be allocated.
6363   */
mtree_insert_range(struct maple_tree * mt,unsigned long first,unsigned long last,void * entry,gfp_t gfp)6364  int mtree_insert_range(struct maple_tree *mt, unsigned long first,
6365  		unsigned long last, void *entry, gfp_t gfp)
6366  {
6367  	MA_STATE(ms, mt, first, last);
6368  	int ret = 0;
6369  
6370  	if (WARN_ON_ONCE(xa_is_advanced(entry)))
6371  		return -EINVAL;
6372  
6373  	if (first > last)
6374  		return -EINVAL;
6375  
6376  	mtree_lock(mt);
6377  retry:
6378  	mas_insert(&ms, entry);
6379  	if (mas_nomem(&ms, gfp))
6380  		goto retry;
6381  
6382  	mtree_unlock(mt);
6383  	if (mas_is_err(&ms))
6384  		ret = xa_err(ms.node);
6385  
6386  	mas_destroy(&ms);
6387  	return ret;
6388  }
6389  EXPORT_SYMBOL(mtree_insert_range);
6390  
6391  /**
6392   * mtree_insert() - Insert an entry at a given index if there is no value.
6393   * @mt: The maple tree
6394   * @index : The index to store the value
6395   * @entry: The entry to store
6396   * @gfp: The GFP_FLAGS to use for allocations.
6397   *
6398   * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
6399   * request, -ENOMEM if memory could not be allocated.
6400   */
mtree_insert(struct maple_tree * mt,unsigned long index,void * entry,gfp_t gfp)6401  int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry,
6402  		 gfp_t gfp)
6403  {
6404  	return mtree_insert_range(mt, index, index, entry, gfp);
6405  }
6406  EXPORT_SYMBOL(mtree_insert);
6407  
mtree_alloc_range(struct maple_tree * mt,unsigned long * startp,void * entry,unsigned long size,unsigned long min,unsigned long max,gfp_t gfp)6408  int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp,
6409  		void *entry, unsigned long size, unsigned long min,
6410  		unsigned long max, gfp_t gfp)
6411  {
6412  	int ret = 0;
6413  
6414  	MA_STATE(mas, mt, 0, 0);
6415  	if (!mt_is_alloc(mt))
6416  		return -EINVAL;
6417  
6418  	if (WARN_ON_ONCE(mt_is_reserved(entry)))
6419  		return -EINVAL;
6420  
6421  	mtree_lock(mt);
6422  retry:
6423  	ret = mas_empty_area(&mas, min, max, size);
6424  	if (ret)
6425  		goto unlock;
6426  
6427  	mas_insert(&mas, entry);
6428  	/*
6429  	 * mas_nomem() may release the lock, causing the allocated area
6430  	 * to be unavailable, so try to allocate a free area again.
6431  	 */
6432  	if (mas_nomem(&mas, gfp))
6433  		goto retry;
6434  
6435  	if (mas_is_err(&mas))
6436  		ret = xa_err(mas.node);
6437  	else
6438  		*startp = mas.index;
6439  
6440  unlock:
6441  	mtree_unlock(mt);
6442  	mas_destroy(&mas);
6443  	return ret;
6444  }
6445  EXPORT_SYMBOL(mtree_alloc_range);
6446  
6447  /**
6448   * mtree_alloc_cyclic() - Find somewhere to store this entry in the tree.
6449   * @mt: The maple tree.
6450   * @startp: Pointer to ID.
6451   * @range_lo: Lower bound of range to search.
6452   * @range_hi: Upper bound of range to search.
6453   * @entry: The entry to store.
6454   * @next: Pointer to next ID to allocate.
6455   * @gfp: The GFP_FLAGS to use for allocations.
6456   *
6457   * Finds an empty entry in @mt after @next, stores the new index into
6458   * the @id pointer, stores the entry at that index, then updates @next.
6459   *
6460   * @mt must be initialized with the MT_FLAGS_ALLOC_RANGE flag.
6461   *
6462   * Context: Any context.  Takes and releases the mt.lock.  May sleep if
6463   * the @gfp flags permit.
6464   *
6465   * Return: 0 if the allocation succeeded without wrapping, 1 if the
6466   * allocation succeeded after wrapping, -ENOMEM if memory could not be
6467   * allocated, -EINVAL if @mt cannot be used, or -EBUSY if there are no
6468   * free entries.
6469   */
mtree_alloc_cyclic(struct maple_tree * mt,unsigned long * startp,void * entry,unsigned long range_lo,unsigned long range_hi,unsigned long * next,gfp_t gfp)6470  int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp,
6471  		void *entry, unsigned long range_lo, unsigned long range_hi,
6472  		unsigned long *next, gfp_t gfp)
6473  {
6474  	int ret;
6475  
6476  	MA_STATE(mas, mt, 0, 0);
6477  
6478  	if (!mt_is_alloc(mt))
6479  		return -EINVAL;
6480  	if (WARN_ON_ONCE(mt_is_reserved(entry)))
6481  		return -EINVAL;
6482  	mtree_lock(mt);
6483  	ret = mas_alloc_cyclic(&mas, startp, entry, range_lo, range_hi,
6484  			       next, gfp);
6485  	mtree_unlock(mt);
6486  	return ret;
6487  }
6488  EXPORT_SYMBOL(mtree_alloc_cyclic);
6489  
mtree_alloc_rrange(struct maple_tree * mt,unsigned long * startp,void * entry,unsigned long size,unsigned long min,unsigned long max,gfp_t gfp)6490  int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
6491  		void *entry, unsigned long size, unsigned long min,
6492  		unsigned long max, gfp_t gfp)
6493  {
6494  	int ret = 0;
6495  
6496  	MA_STATE(mas, mt, 0, 0);
6497  	if (!mt_is_alloc(mt))
6498  		return -EINVAL;
6499  
6500  	if (WARN_ON_ONCE(mt_is_reserved(entry)))
6501  		return -EINVAL;
6502  
6503  	mtree_lock(mt);
6504  retry:
6505  	ret = mas_empty_area_rev(&mas, min, max, size);
6506  	if (ret)
6507  		goto unlock;
6508  
6509  	mas_insert(&mas, entry);
6510  	/*
6511  	 * mas_nomem() may release the lock, causing the allocated area
6512  	 * to be unavailable, so try to allocate a free area again.
6513  	 */
6514  	if (mas_nomem(&mas, gfp))
6515  		goto retry;
6516  
6517  	if (mas_is_err(&mas))
6518  		ret = xa_err(mas.node);
6519  	else
6520  		*startp = mas.index;
6521  
6522  unlock:
6523  	mtree_unlock(mt);
6524  	mas_destroy(&mas);
6525  	return ret;
6526  }
6527  EXPORT_SYMBOL(mtree_alloc_rrange);
6528  
6529  /**
6530   * mtree_erase() - Find an index and erase the entire range.
6531   * @mt: The maple tree
6532   * @index: The index to erase
6533   *
6534   * Erasing is the same as a walk to an entry then a store of a NULL to that
6535   * ENTIRE range.  In fact, it is implemented as such using the advanced API.
6536   *
6537   * Return: The entry stored at the @index or %NULL
6538   */
mtree_erase(struct maple_tree * mt,unsigned long index)6539  void *mtree_erase(struct maple_tree *mt, unsigned long index)
6540  {
6541  	void *entry = NULL;
6542  
6543  	MA_STATE(mas, mt, index, index);
6544  	trace_ma_op(__func__, &mas);
6545  
6546  	mtree_lock(mt);
6547  	entry = mas_erase(&mas);
6548  	mtree_unlock(mt);
6549  
6550  	return entry;
6551  }
6552  EXPORT_SYMBOL(mtree_erase);
6553  
6554  /*
6555   * mas_dup_free() - Free an incomplete duplication of a tree.
6556   * @mas: The maple state of a incomplete tree.
6557   *
6558   * The parameter @mas->node passed in indicates that the allocation failed on
6559   * this node. This function frees all nodes starting from @mas->node in the
6560   * reverse order of mas_dup_build(). There is no need to hold the source tree
6561   * lock at this time.
6562   */
mas_dup_free(struct ma_state * mas)6563  static void mas_dup_free(struct ma_state *mas)
6564  {
6565  	struct maple_node *node;
6566  	enum maple_type type;
6567  	void __rcu **slots;
6568  	unsigned char count, i;
6569  
6570  	/* Maybe the first node allocation failed. */
6571  	if (mas_is_none(mas))
6572  		return;
6573  
6574  	while (!mte_is_root(mas->node)) {
6575  		mas_ascend(mas);
6576  		if (mas->offset) {
6577  			mas->offset--;
6578  			do {
6579  				mas_descend(mas);
6580  				mas->offset = mas_data_end(mas);
6581  			} while (!mte_is_leaf(mas->node));
6582  
6583  			mas_ascend(mas);
6584  		}
6585  
6586  		node = mte_to_node(mas->node);
6587  		type = mte_node_type(mas->node);
6588  		slots = ma_slots(node, type);
6589  		count = mas_data_end(mas) + 1;
6590  		for (i = 0; i < count; i++)
6591  			((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK;
6592  		mt_free_bulk(count, slots);
6593  	}
6594  
6595  	node = mte_to_node(mas->node);
6596  	mt_free_one(node);
6597  }
6598  
6599  /*
6600   * mas_copy_node() - Copy a maple node and replace the parent.
6601   * @mas: The maple state of source tree.
6602   * @new_mas: The maple state of new tree.
6603   * @parent: The parent of the new node.
6604   *
6605   * Copy @mas->node to @new_mas->node, set @parent to be the parent of
6606   * @new_mas->node. If memory allocation fails, @mas is set to -ENOMEM.
6607   */
mas_copy_node(struct ma_state * mas,struct ma_state * new_mas,struct maple_pnode * parent)6608  static inline void mas_copy_node(struct ma_state *mas, struct ma_state *new_mas,
6609  		struct maple_pnode *parent)
6610  {
6611  	struct maple_node *node = mte_to_node(mas->node);
6612  	struct maple_node *new_node = mte_to_node(new_mas->node);
6613  	unsigned long val;
6614  
6615  	/* Copy the node completely. */
6616  	memcpy(new_node, node, sizeof(struct maple_node));
6617  	/* Update the parent node pointer. */
6618  	val = (unsigned long)node->parent & MAPLE_NODE_MASK;
6619  	new_node->parent = ma_parent_ptr(val | (unsigned long)parent);
6620  }
6621  
6622  /*
6623   * mas_dup_alloc() - Allocate child nodes for a maple node.
6624   * @mas: The maple state of source tree.
6625   * @new_mas: The maple state of new tree.
6626   * @gfp: The GFP_FLAGS to use for allocations.
6627   *
6628   * This function allocates child nodes for @new_mas->node during the duplication
6629   * process. If memory allocation fails, @mas is set to -ENOMEM.
6630   */
mas_dup_alloc(struct ma_state * mas,struct ma_state * new_mas,gfp_t gfp)6631  static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas,
6632  		gfp_t gfp)
6633  {
6634  	struct maple_node *node = mte_to_node(mas->node);
6635  	struct maple_node *new_node = mte_to_node(new_mas->node);
6636  	enum maple_type type;
6637  	unsigned char request, count, i;
6638  	void __rcu **slots;
6639  	void __rcu **new_slots;
6640  	unsigned long val;
6641  
6642  	/* Allocate memory for child nodes. */
6643  	type = mte_node_type(mas->node);
6644  	new_slots = ma_slots(new_node, type);
6645  	request = mas_data_end(mas) + 1;
6646  	count = mt_alloc_bulk(gfp, request, (void **)new_slots);
6647  	if (unlikely(count < request)) {
6648  		memset(new_slots, 0, request * sizeof(void *));
6649  		mas_set_err(mas, -ENOMEM);
6650  		return;
6651  	}
6652  
6653  	/* Restore node type information in slots. */
6654  	slots = ma_slots(node, type);
6655  	for (i = 0; i < count; i++) {
6656  		val = (unsigned long)mt_slot_locked(mas->tree, slots, i);
6657  		val &= MAPLE_NODE_MASK;
6658  		((unsigned long *)new_slots)[i] |= val;
6659  	}
6660  }
6661  
6662  /*
6663   * mas_dup_build() - Build a new maple tree from a source tree
6664   * @mas: The maple state of source tree, need to be in MAS_START state.
6665   * @new_mas: The maple state of new tree, need to be in MAS_START state.
6666   * @gfp: The GFP_FLAGS to use for allocations.
6667   *
6668   * This function builds a new tree in DFS preorder. If the memory allocation
6669   * fails, the error code -ENOMEM will be set in @mas, and @new_mas points to the
6670   * last node. mas_dup_free() will free the incomplete duplication of a tree.
6671   *
6672   * Note that the attributes of the two trees need to be exactly the same, and the
6673   * new tree needs to be empty, otherwise -EINVAL will be set in @mas.
6674   */
mas_dup_build(struct ma_state * mas,struct ma_state * new_mas,gfp_t gfp)6675  static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas,
6676  		gfp_t gfp)
6677  {
6678  	struct maple_node *node;
6679  	struct maple_pnode *parent = NULL;
6680  	struct maple_enode *root;
6681  	enum maple_type type;
6682  
6683  	if (unlikely(mt_attr(mas->tree) != mt_attr(new_mas->tree)) ||
6684  	    unlikely(!mtree_empty(new_mas->tree))) {
6685  		mas_set_err(mas, -EINVAL);
6686  		return;
6687  	}
6688  
6689  	root = mas_start(mas);
6690  	if (mas_is_ptr(mas) || mas_is_none(mas))
6691  		goto set_new_tree;
6692  
6693  	node = mt_alloc_one(gfp);
6694  	if (!node) {
6695  		new_mas->status = ma_none;
6696  		mas_set_err(mas, -ENOMEM);
6697  		return;
6698  	}
6699  
6700  	type = mte_node_type(mas->node);
6701  	root = mt_mk_node(node, type);
6702  	new_mas->node = root;
6703  	new_mas->min = 0;
6704  	new_mas->max = ULONG_MAX;
6705  	root = mte_mk_root(root);
6706  	while (1) {
6707  		mas_copy_node(mas, new_mas, parent);
6708  		if (!mte_is_leaf(mas->node)) {
6709  			/* Only allocate child nodes for non-leaf nodes. */
6710  			mas_dup_alloc(mas, new_mas, gfp);
6711  			if (unlikely(mas_is_err(mas)))
6712  				return;
6713  		} else {
6714  			/*
6715  			 * This is the last leaf node and duplication is
6716  			 * completed.
6717  			 */
6718  			if (mas->max == ULONG_MAX)
6719  				goto done;
6720  
6721  			/* This is not the last leaf node and needs to go up. */
6722  			do {
6723  				mas_ascend(mas);
6724  				mas_ascend(new_mas);
6725  			} while (mas->offset == mas_data_end(mas));
6726  
6727  			/* Move to the next subtree. */
6728  			mas->offset++;
6729  			new_mas->offset++;
6730  		}
6731  
6732  		mas_descend(mas);
6733  		parent = ma_parent_ptr(mte_to_node(new_mas->node));
6734  		mas_descend(new_mas);
6735  		mas->offset = 0;
6736  		new_mas->offset = 0;
6737  	}
6738  done:
6739  	/* Specially handle the parent of the root node. */
6740  	mte_to_node(root)->parent = ma_parent_ptr(mas_tree_parent(new_mas));
6741  set_new_tree:
6742  	/* Make them the same height */
6743  	new_mas->tree->ma_flags = mas->tree->ma_flags;
6744  	rcu_assign_pointer(new_mas->tree->ma_root, root);
6745  }
6746  
6747  /**
6748   * __mt_dup(): Duplicate an entire maple tree
6749   * @mt: The source maple tree
6750   * @new: The new maple tree
6751   * @gfp: The GFP_FLAGS to use for allocations
6752   *
6753   * This function duplicates a maple tree in Depth-First Search (DFS) pre-order
6754   * traversal. It uses memcpy() to copy nodes in the source tree and allocate
6755   * new child nodes in non-leaf nodes. The new node is exactly the same as the
6756   * source node except for all the addresses stored in it. It will be faster than
6757   * traversing all elements in the source tree and inserting them one by one into
6758   * the new tree.
6759   * The user needs to ensure that the attributes of the source tree and the new
6760   * tree are the same, and the new tree needs to be an empty tree, otherwise
6761   * -EINVAL will be returned.
6762   * Note that the user needs to manually lock the source tree and the new tree.
6763   *
6764   * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If
6765   * the attributes of the two trees are different or the new tree is not an empty
6766   * tree.
6767   */
__mt_dup(struct maple_tree * mt,struct maple_tree * new,gfp_t gfp)6768  int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)
6769  {
6770  	int ret = 0;
6771  	MA_STATE(mas, mt, 0, 0);
6772  	MA_STATE(new_mas, new, 0, 0);
6773  
6774  	mas_dup_build(&mas, &new_mas, gfp);
6775  	if (unlikely(mas_is_err(&mas))) {
6776  		ret = xa_err(mas.node);
6777  		if (ret == -ENOMEM)
6778  			mas_dup_free(&new_mas);
6779  	}
6780  
6781  	return ret;
6782  }
6783  EXPORT_SYMBOL(__mt_dup);
6784  
6785  /**
6786   * mtree_dup(): Duplicate an entire maple tree
6787   * @mt: The source maple tree
6788   * @new: The new maple tree
6789   * @gfp: The GFP_FLAGS to use for allocations
6790   *
6791   * This function duplicates a maple tree in Depth-First Search (DFS) pre-order
6792   * traversal. It uses memcpy() to copy nodes in the source tree and allocate
6793   * new child nodes in non-leaf nodes. The new node is exactly the same as the
6794   * source node except for all the addresses stored in it. It will be faster than
6795   * traversing all elements in the source tree and inserting them one by one into
6796   * the new tree.
6797   * The user needs to ensure that the attributes of the source tree and the new
6798   * tree are the same, and the new tree needs to be an empty tree, otherwise
6799   * -EINVAL will be returned.
6800   *
6801   * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If
6802   * the attributes of the two trees are different or the new tree is not an empty
6803   * tree.
6804   */
mtree_dup(struct maple_tree * mt,struct maple_tree * new,gfp_t gfp)6805  int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)
6806  {
6807  	int ret = 0;
6808  	MA_STATE(mas, mt, 0, 0);
6809  	MA_STATE(new_mas, new, 0, 0);
6810  
6811  	mas_lock(&new_mas);
6812  	mas_lock_nested(&mas, SINGLE_DEPTH_NESTING);
6813  	mas_dup_build(&mas, &new_mas, gfp);
6814  	mas_unlock(&mas);
6815  	if (unlikely(mas_is_err(&mas))) {
6816  		ret = xa_err(mas.node);
6817  		if (ret == -ENOMEM)
6818  			mas_dup_free(&new_mas);
6819  	}
6820  
6821  	mas_unlock(&new_mas);
6822  	return ret;
6823  }
6824  EXPORT_SYMBOL(mtree_dup);
6825  
6826  /**
6827   * __mt_destroy() - Walk and free all nodes of a locked maple tree.
6828   * @mt: The maple tree
6829   *
6830   * Note: Does not handle locking.
6831   */
__mt_destroy(struct maple_tree * mt)6832  void __mt_destroy(struct maple_tree *mt)
6833  {
6834  	void *root = mt_root_locked(mt);
6835  
6836  	rcu_assign_pointer(mt->ma_root, NULL);
6837  	if (xa_is_node(root))
6838  		mte_destroy_walk(root, mt);
6839  
6840  	mt->ma_flags = mt_attr(mt);
6841  }
6842  EXPORT_SYMBOL_GPL(__mt_destroy);
6843  
6844  /**
6845   * mtree_destroy() - Destroy a maple tree
6846   * @mt: The maple tree
6847   *
6848   * Frees all resources used by the tree.  Handles locking.
6849   */
mtree_destroy(struct maple_tree * mt)6850  void mtree_destroy(struct maple_tree *mt)
6851  {
6852  	mtree_lock(mt);
6853  	__mt_destroy(mt);
6854  	mtree_unlock(mt);
6855  }
6856  EXPORT_SYMBOL(mtree_destroy);
6857  
6858  /**
6859   * mt_find() - Search from the start up until an entry is found.
6860   * @mt: The maple tree
6861   * @index: Pointer which contains the start location of the search
6862   * @max: The maximum value of the search range
6863   *
6864   * Takes RCU read lock internally to protect the search, which does not
6865   * protect the returned pointer after dropping RCU read lock.
6866   * See also: Documentation/core-api/maple_tree.rst
6867   *
6868   * In case that an entry is found @index is updated to point to the next
6869   * possible entry independent whether the found entry is occupying a
6870   * single index or a range if indices.
6871   *
6872   * Return: The entry at or after the @index or %NULL
6873   */
mt_find(struct maple_tree * mt,unsigned long * index,unsigned long max)6874  void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max)
6875  {
6876  	MA_STATE(mas, mt, *index, *index);
6877  	void *entry;
6878  #ifdef CONFIG_DEBUG_MAPLE_TREE
6879  	unsigned long copy = *index;
6880  #endif
6881  
6882  	trace_ma_read(__func__, &mas);
6883  
6884  	if ((*index) > max)
6885  		return NULL;
6886  
6887  	rcu_read_lock();
6888  retry:
6889  	entry = mas_state_walk(&mas);
6890  	if (mas_is_start(&mas))
6891  		goto retry;
6892  
6893  	if (unlikely(xa_is_zero(entry)))
6894  		entry = NULL;
6895  
6896  	if (entry)
6897  		goto unlock;
6898  
6899  	while (mas_is_active(&mas) && (mas.last < max)) {
6900  		entry = mas_next_slot(&mas, max, false);
6901  		if (likely(entry && !xa_is_zero(entry)))
6902  			break;
6903  	}
6904  
6905  	if (unlikely(xa_is_zero(entry)))
6906  		entry = NULL;
6907  unlock:
6908  	rcu_read_unlock();
6909  	if (likely(entry)) {
6910  		*index = mas.last + 1;
6911  #ifdef CONFIG_DEBUG_MAPLE_TREE
6912  		if (MT_WARN_ON(mt, (*index) && ((*index) <= copy)))
6913  			pr_err("index not increased! %lx <= %lx\n",
6914  			       *index, copy);
6915  #endif
6916  	}
6917  
6918  	return entry;
6919  }
6920  EXPORT_SYMBOL(mt_find);
6921  
6922  /**
6923   * mt_find_after() - Search from the start up until an entry is found.
6924   * @mt: The maple tree
6925   * @index: Pointer which contains the start location of the search
6926   * @max: The maximum value to check
6927   *
6928   * Same as mt_find() except that it checks @index for 0 before
6929   * searching. If @index == 0, the search is aborted. This covers a wrap
6930   * around of @index to 0 in an iterator loop.
6931   *
6932   * Return: The entry at or after the @index or %NULL
6933   */
mt_find_after(struct maple_tree * mt,unsigned long * index,unsigned long max)6934  void *mt_find_after(struct maple_tree *mt, unsigned long *index,
6935  		    unsigned long max)
6936  {
6937  	if (!(*index))
6938  		return NULL;
6939  
6940  	return mt_find(mt, index, max);
6941  }
6942  EXPORT_SYMBOL(mt_find_after);
6943  
6944  #ifdef CONFIG_DEBUG_MAPLE_TREE
6945  atomic_t maple_tree_tests_run;
6946  EXPORT_SYMBOL_GPL(maple_tree_tests_run);
6947  atomic_t maple_tree_tests_passed;
6948  EXPORT_SYMBOL_GPL(maple_tree_tests_passed);
6949  
6950  #ifndef __KERNEL__
6951  extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int);
mt_set_non_kernel(unsigned int val)6952  void mt_set_non_kernel(unsigned int val)
6953  {
6954  	kmem_cache_set_non_kernel(maple_node_cache, val);
6955  }
6956  
6957  extern void kmem_cache_set_callback(struct kmem_cache *cachep,
6958  		void (*callback)(void *));
mt_set_callback(void (* callback)(void *))6959  void mt_set_callback(void (*callback)(void *))
6960  {
6961  	kmem_cache_set_callback(maple_node_cache, callback);
6962  }
6963  
6964  extern void kmem_cache_set_private(struct kmem_cache *cachep, void *private);
mt_set_private(void * private)6965  void mt_set_private(void *private)
6966  {
6967  	kmem_cache_set_private(maple_node_cache, private);
6968  }
6969  
6970  extern unsigned long kmem_cache_get_alloc(struct kmem_cache *);
mt_get_alloc_size(void)6971  unsigned long mt_get_alloc_size(void)
6972  {
6973  	return kmem_cache_get_alloc(maple_node_cache);
6974  }
6975  
6976  extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *);
mt_zero_nr_tallocated(void)6977  void mt_zero_nr_tallocated(void)
6978  {
6979  	kmem_cache_zero_nr_tallocated(maple_node_cache);
6980  }
6981  
6982  extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *);
mt_nr_tallocated(void)6983  unsigned int mt_nr_tallocated(void)
6984  {
6985  	return kmem_cache_nr_tallocated(maple_node_cache);
6986  }
6987  
6988  extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *);
mt_nr_allocated(void)6989  unsigned int mt_nr_allocated(void)
6990  {
6991  	return kmem_cache_nr_allocated(maple_node_cache);
6992  }
6993  
mt_cache_shrink(void)6994  void mt_cache_shrink(void)
6995  {
6996  }
6997  #else
6998  /*
6999   * mt_cache_shrink() - For testing, don't use this.
7000   *
7001   * Certain testcases can trigger an OOM when combined with other memory
7002   * debugging configuration options.  This function is used to reduce the
7003   * possibility of an out of memory even due to kmem_cache objects remaining
7004   * around for longer than usual.
7005   */
mt_cache_shrink(void)7006  void mt_cache_shrink(void)
7007  {
7008  	kmem_cache_shrink(maple_node_cache);
7009  
7010  }
7011  EXPORT_SYMBOL_GPL(mt_cache_shrink);
7012  
7013  #endif /* not defined __KERNEL__ */
7014  /*
7015   * mas_get_slot() - Get the entry in the maple state node stored at @offset.
7016   * @mas: The maple state
7017   * @offset: The offset into the slot array to fetch.
7018   *
7019   * Return: The entry stored at @offset.
7020   */
mas_get_slot(struct ma_state * mas,unsigned char offset)7021  static inline struct maple_enode *mas_get_slot(struct ma_state *mas,
7022  		unsigned char offset)
7023  {
7024  	return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)),
7025  			offset);
7026  }
7027  
7028  /* Depth first search, post-order */
mas_dfs_postorder(struct ma_state * mas,unsigned long max)7029  static void mas_dfs_postorder(struct ma_state *mas, unsigned long max)
7030  {
7031  
7032  	struct maple_enode *p, *mn = mas->node;
7033  	unsigned long p_min, p_max;
7034  
7035  	mas_next_node(mas, mas_mn(mas), max);
7036  	if (!mas_is_overflow(mas))
7037  		return;
7038  
7039  	if (mte_is_root(mn))
7040  		return;
7041  
7042  	mas->node = mn;
7043  	mas_ascend(mas);
7044  	do {
7045  		p = mas->node;
7046  		p_min = mas->min;
7047  		p_max = mas->max;
7048  		mas_prev_node(mas, 0);
7049  	} while (!mas_is_underflow(mas));
7050  
7051  	mas->node = p;
7052  	mas->max = p_max;
7053  	mas->min = p_min;
7054  }
7055  
7056  /* Tree validations */
7057  static void mt_dump_node(const struct maple_tree *mt, void *entry,
7058  		unsigned long min, unsigned long max, unsigned int depth,
7059  		enum mt_dump_format format);
mt_dump_range(unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)7060  static void mt_dump_range(unsigned long min, unsigned long max,
7061  			  unsigned int depth, enum mt_dump_format format)
7062  {
7063  	static const char spaces[] = "                                ";
7064  
7065  	switch(format) {
7066  	case mt_dump_hex:
7067  		if (min == max)
7068  			pr_info("%.*s%lx: ", depth * 2, spaces, min);
7069  		else
7070  			pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max);
7071  		break;
7072  	case mt_dump_dec:
7073  		if (min == max)
7074  			pr_info("%.*s%lu: ", depth * 2, spaces, min);
7075  		else
7076  			pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max);
7077  	}
7078  }
7079  
mt_dump_entry(void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)7080  static void mt_dump_entry(void *entry, unsigned long min, unsigned long max,
7081  			  unsigned int depth, enum mt_dump_format format)
7082  {
7083  	mt_dump_range(min, max, depth, format);
7084  
7085  	if (xa_is_value(entry))
7086  		pr_cont("value %ld (0x%lx) [" PTR_FMT "]\n", xa_to_value(entry),
7087  			xa_to_value(entry), entry);
7088  	else if (xa_is_zero(entry))
7089  		pr_cont("zero (%ld)\n", xa_to_internal(entry));
7090  	else if (mt_is_reserved(entry))
7091  		pr_cont("UNKNOWN ENTRY (" PTR_FMT ")\n", entry);
7092  	else
7093  		pr_cont(PTR_FMT "\n", entry);
7094  }
7095  
mt_dump_range64(const struct maple_tree * mt,void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)7096  static void mt_dump_range64(const struct maple_tree *mt, void *entry,
7097  		unsigned long min, unsigned long max, unsigned int depth,
7098  		enum mt_dump_format format)
7099  {
7100  	struct maple_range_64 *node = &mte_to_node(entry)->mr64;
7101  	bool leaf = mte_is_leaf(entry);
7102  	unsigned long first = min;
7103  	int i;
7104  
7105  	pr_cont(" contents: ");
7106  	for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) {
7107  		switch(format) {
7108  		case mt_dump_hex:
7109  			pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]);
7110  			break;
7111  		case mt_dump_dec:
7112  			pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]);
7113  		}
7114  	}
7115  	pr_cont(PTR_FMT "\n", node->slot[i]);
7116  	for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) {
7117  		unsigned long last = max;
7118  
7119  		if (i < (MAPLE_RANGE64_SLOTS - 1))
7120  			last = node->pivot[i];
7121  		else if (!node->slot[i] && max != mt_node_max(entry))
7122  			break;
7123  		if (last == 0 && i > 0)
7124  			break;
7125  		if (leaf)
7126  			mt_dump_entry(mt_slot(mt, node->slot, i),
7127  					first, last, depth + 1, format);
7128  		else if (node->slot[i])
7129  			mt_dump_node(mt, mt_slot(mt, node->slot, i),
7130  					first, last, depth + 1, format);
7131  
7132  		if (last == max)
7133  			break;
7134  		if (last > max) {
7135  			switch(format) {
7136  			case mt_dump_hex:
7137  				pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n",
7138  					node, last, max, i);
7139  				break;
7140  			case mt_dump_dec:
7141  				pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n",
7142  					node, last, max, i);
7143  			}
7144  		}
7145  		first = last + 1;
7146  	}
7147  }
7148  
mt_dump_arange64(const struct maple_tree * mt,void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)7149  static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
7150  	unsigned long min, unsigned long max, unsigned int depth,
7151  	enum mt_dump_format format)
7152  {
7153  	struct maple_arange_64 *node = &mte_to_node(entry)->ma64;
7154  	unsigned long first = min;
7155  	int i;
7156  
7157  	pr_cont(" contents: ");
7158  	for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) {
7159  		switch (format) {
7160  		case mt_dump_hex:
7161  			pr_cont("%lx ", node->gap[i]);
7162  			break;
7163  		case mt_dump_dec:
7164  			pr_cont("%lu ", node->gap[i]);
7165  		}
7166  	}
7167  	pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap);
7168  	for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) {
7169  		switch (format) {
7170  		case mt_dump_hex:
7171  			pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]);
7172  			break;
7173  		case mt_dump_dec:
7174  			pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]);
7175  		}
7176  	}
7177  	pr_cont(PTR_FMT "\n", node->slot[i]);
7178  	for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) {
7179  		unsigned long last = max;
7180  
7181  		if (i < (MAPLE_ARANGE64_SLOTS - 1))
7182  			last = node->pivot[i];
7183  		else if (!node->slot[i])
7184  			break;
7185  		if (last == 0 && i > 0)
7186  			break;
7187  		if (node->slot[i])
7188  			mt_dump_node(mt, mt_slot(mt, node->slot, i),
7189  					first, last, depth + 1, format);
7190  
7191  		if (last == max)
7192  			break;
7193  		if (last > max) {
7194  			switch(format) {
7195  			case mt_dump_hex:
7196  				pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n",
7197  					node, last, max, i);
7198  				break;
7199  			case mt_dump_dec:
7200  				pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n",
7201  					node, last, max, i);
7202  			}
7203  		}
7204  		first = last + 1;
7205  	}
7206  }
7207  
mt_dump_node(const struct maple_tree * mt,void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)7208  static void mt_dump_node(const struct maple_tree *mt, void *entry,
7209  		unsigned long min, unsigned long max, unsigned int depth,
7210  		enum mt_dump_format format)
7211  {
7212  	struct maple_node *node = mte_to_node(entry);
7213  	unsigned int type = mte_node_type(entry);
7214  	unsigned int i;
7215  
7216  	mt_dump_range(min, max, depth, format);
7217  
7218  	pr_cont("node " PTR_FMT " depth %d type %d parent " PTR_FMT, node,
7219  		depth, type, node ? node->parent : NULL);
7220  	switch (type) {
7221  	case maple_dense:
7222  		pr_cont("\n");
7223  		for (i = 0; i < MAPLE_NODE_SLOTS; i++) {
7224  			if (min + i > max)
7225  				pr_cont("OUT OF RANGE: ");
7226  			mt_dump_entry(mt_slot(mt, node->slot, i),
7227  					min + i, min + i, depth, format);
7228  		}
7229  		break;
7230  	case maple_leaf_64:
7231  	case maple_range_64:
7232  		mt_dump_range64(mt, entry, min, max, depth, format);
7233  		break;
7234  	case maple_arange_64:
7235  		mt_dump_arange64(mt, entry, min, max, depth, format);
7236  		break;
7237  
7238  	default:
7239  		pr_cont(" UNKNOWN TYPE\n");
7240  	}
7241  }
7242  
mt_dump(const struct maple_tree * mt,enum mt_dump_format format)7243  void mt_dump(const struct maple_tree *mt, enum mt_dump_format format)
7244  {
7245  	void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt));
7246  
7247  	pr_info("maple_tree(" PTR_FMT ") flags %X, height %u root " PTR_FMT "\n",
7248  		 mt, mt->ma_flags, mt_height(mt), entry);
7249  	if (xa_is_node(entry))
7250  		mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format);
7251  	else if (entry)
7252  		mt_dump_entry(entry, 0, 0, 0, format);
7253  	else
7254  		pr_info("(empty)\n");
7255  }
7256  EXPORT_SYMBOL_GPL(mt_dump);
7257  
7258  /*
7259   * Calculate the maximum gap in a node and check if that's what is reported in
7260   * the parent (unless root).
7261   */
mas_validate_gaps(struct ma_state * mas)7262  static void mas_validate_gaps(struct ma_state *mas)
7263  {
7264  	struct maple_enode *mte = mas->node;
7265  	struct maple_node *p_mn, *node = mte_to_node(mte);
7266  	enum maple_type mt = mte_node_type(mas->node);
7267  	unsigned long gap = 0, max_gap = 0;
7268  	unsigned long p_end, p_start = mas->min;
7269  	unsigned char p_slot, offset;
7270  	unsigned long *gaps = NULL;
7271  	unsigned long *pivots = ma_pivots(node, mt);
7272  	unsigned int i;
7273  
7274  	if (ma_is_dense(mt)) {
7275  		for (i = 0; i < mt_slot_count(mte); i++) {
7276  			if (mas_get_slot(mas, i)) {
7277  				if (gap > max_gap)
7278  					max_gap = gap;
7279  				gap = 0;
7280  				continue;
7281  			}
7282  			gap++;
7283  		}
7284  		goto counted;
7285  	}
7286  
7287  	gaps = ma_gaps(node, mt);
7288  	for (i = 0; i < mt_slot_count(mte); i++) {
7289  		p_end = mas_safe_pivot(mas, pivots, i, mt);
7290  
7291  		if (!gaps) {
7292  			if (!mas_get_slot(mas, i))
7293  				gap = p_end - p_start + 1;
7294  		} else {
7295  			void *entry = mas_get_slot(mas, i);
7296  
7297  			gap = gaps[i];
7298  			MT_BUG_ON(mas->tree, !entry);
7299  
7300  			if (gap > p_end - p_start + 1) {
7301  				pr_err(PTR_FMT "[%u] %lu >= %lu - %lu + 1 (%lu)\n",
7302  				       mas_mn(mas), i, gap, p_end, p_start,
7303  				       p_end - p_start + 1);
7304  				MT_BUG_ON(mas->tree, gap > p_end - p_start + 1);
7305  			}
7306  		}
7307  
7308  		if (gap > max_gap)
7309  			max_gap = gap;
7310  
7311  		p_start = p_end + 1;
7312  		if (p_end >= mas->max)
7313  			break;
7314  	}
7315  
7316  counted:
7317  	if (mt == maple_arange_64) {
7318  		MT_BUG_ON(mas->tree, !gaps);
7319  		offset = ma_meta_gap(node);
7320  		if (offset > i) {
7321  			pr_err("gap offset " PTR_FMT "[%u] is invalid\n", node, offset);
7322  			MT_BUG_ON(mas->tree, 1);
7323  		}
7324  
7325  		if (gaps[offset] != max_gap) {
7326  			pr_err("gap " PTR_FMT "[%u] is not the largest gap %lu\n",
7327  			       node, offset, max_gap);
7328  			MT_BUG_ON(mas->tree, 1);
7329  		}
7330  
7331  		for (i++ ; i < mt_slot_count(mte); i++) {
7332  			if (gaps[i] != 0) {
7333  				pr_err("gap " PTR_FMT "[%u] beyond node limit != 0\n",
7334  				       node, i);
7335  				MT_BUG_ON(mas->tree, 1);
7336  			}
7337  		}
7338  	}
7339  
7340  	if (mte_is_root(mte))
7341  		return;
7342  
7343  	p_slot = mte_parent_slot(mas->node);
7344  	p_mn = mte_parent(mte);
7345  	MT_BUG_ON(mas->tree, max_gap > mas->max);
7346  	if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) {
7347  		pr_err("gap " PTR_FMT "[%u] != %lu\n", p_mn, p_slot, max_gap);
7348  		mt_dump(mas->tree, mt_dump_hex);
7349  		MT_BUG_ON(mas->tree, 1);
7350  	}
7351  }
7352  
mas_validate_parent_slot(struct ma_state * mas)7353  static void mas_validate_parent_slot(struct ma_state *mas)
7354  {
7355  	struct maple_node *parent;
7356  	struct maple_enode *node;
7357  	enum maple_type p_type;
7358  	unsigned char p_slot;
7359  	void __rcu **slots;
7360  	int i;
7361  
7362  	if (mte_is_root(mas->node))
7363  		return;
7364  
7365  	p_slot = mte_parent_slot(mas->node);
7366  	p_type = mas_parent_type(mas, mas->node);
7367  	parent = mte_parent(mas->node);
7368  	slots = ma_slots(parent, p_type);
7369  	MT_BUG_ON(mas->tree, mas_mn(mas) == parent);
7370  
7371  	/* Check prev/next parent slot for duplicate node entry */
7372  
7373  	for (i = 0; i < mt_slots[p_type]; i++) {
7374  		node = mas_slot(mas, slots, i);
7375  		if (i == p_slot) {
7376  			if (node != mas->node)
7377  				pr_err("parent " PTR_FMT "[%u] does not have " PTR_FMT "\n",
7378  					parent, i, mas_mn(mas));
7379  			MT_BUG_ON(mas->tree, node != mas->node);
7380  		} else if (node == mas->node) {
7381  			pr_err("Invalid child " PTR_FMT " at parent " PTR_FMT "[%u] p_slot %u\n",
7382  			       mas_mn(mas), parent, i, p_slot);
7383  			MT_BUG_ON(mas->tree, node == mas->node);
7384  		}
7385  	}
7386  }
7387  
mas_validate_child_slot(struct ma_state * mas)7388  static void mas_validate_child_slot(struct ma_state *mas)
7389  {
7390  	enum maple_type type = mte_node_type(mas->node);
7391  	void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
7392  	unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type);
7393  	struct maple_enode *child;
7394  	unsigned char i;
7395  
7396  	if (mte_is_leaf(mas->node))
7397  		return;
7398  
7399  	for (i = 0; i < mt_slots[type]; i++) {
7400  		child = mas_slot(mas, slots, i);
7401  
7402  		if (!child) {
7403  			pr_err("Non-leaf node lacks child at " PTR_FMT "[%u]\n",
7404  			       mas_mn(mas), i);
7405  			MT_BUG_ON(mas->tree, 1);
7406  		}
7407  
7408  		if (mte_parent_slot(child) != i) {
7409  			pr_err("Slot error at " PTR_FMT "[%u]: child " PTR_FMT " has pslot %u\n",
7410  			       mas_mn(mas), i, mte_to_node(child),
7411  			       mte_parent_slot(child));
7412  			MT_BUG_ON(mas->tree, 1);
7413  		}
7414  
7415  		if (mte_parent(child) != mte_to_node(mas->node)) {
7416  			pr_err("child " PTR_FMT " has parent " PTR_FMT " not " PTR_FMT "\n",
7417  			       mte_to_node(child), mte_parent(child),
7418  			       mte_to_node(mas->node));
7419  			MT_BUG_ON(mas->tree, 1);
7420  		}
7421  
7422  		if (i < mt_pivots[type] && pivots[i] == mas->max)
7423  			break;
7424  	}
7425  }
7426  
7427  /*
7428   * Validate all pivots are within mas->min and mas->max, check metadata ends
7429   * where the maximum ends and ensure there is no slots or pivots set outside of
7430   * the end of the data.
7431   */
mas_validate_limits(struct ma_state * mas)7432  static void mas_validate_limits(struct ma_state *mas)
7433  {
7434  	int i;
7435  	unsigned long prev_piv = 0;
7436  	enum maple_type type = mte_node_type(mas->node);
7437  	void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
7438  	unsigned long *pivots = ma_pivots(mas_mn(mas), type);
7439  
7440  	for (i = 0; i < mt_slots[type]; i++) {
7441  		unsigned long piv;
7442  
7443  		piv = mas_safe_pivot(mas, pivots, i, type);
7444  
7445  		if (!piv && (i != 0)) {
7446  			pr_err("Missing node limit pivot at " PTR_FMT "[%u]",
7447  			       mas_mn(mas), i);
7448  			MAS_WARN_ON(mas, 1);
7449  		}
7450  
7451  		if (prev_piv > piv) {
7452  			pr_err(PTR_FMT "[%u] piv %lu < prev_piv %lu\n",
7453  				mas_mn(mas), i, piv, prev_piv);
7454  			MAS_WARN_ON(mas, piv < prev_piv);
7455  		}
7456  
7457  		if (piv < mas->min) {
7458  			pr_err(PTR_FMT "[%u] %lu < %lu\n", mas_mn(mas), i,
7459  				piv, mas->min);
7460  			MAS_WARN_ON(mas, piv < mas->min);
7461  		}
7462  		if (piv > mas->max) {
7463  			pr_err(PTR_FMT "[%u] %lu > %lu\n", mas_mn(mas), i,
7464  				piv, mas->max);
7465  			MAS_WARN_ON(mas, piv > mas->max);
7466  		}
7467  		prev_piv = piv;
7468  		if (piv == mas->max)
7469  			break;
7470  	}
7471  
7472  	if (mas_data_end(mas) != i) {
7473  		pr_err("node" PTR_FMT ": data_end %u != the last slot offset %u\n",
7474  		       mas_mn(mas), mas_data_end(mas), i);
7475  		MT_BUG_ON(mas->tree, 1);
7476  	}
7477  
7478  	for (i += 1; i < mt_slots[type]; i++) {
7479  		void *entry = mas_slot(mas, slots, i);
7480  
7481  		if (entry && (i != mt_slots[type] - 1)) {
7482  			pr_err(PTR_FMT "[%u] should not have entry " PTR_FMT "\n",
7483  			       mas_mn(mas), i, entry);
7484  			MT_BUG_ON(mas->tree, entry != NULL);
7485  		}
7486  
7487  		if (i < mt_pivots[type]) {
7488  			unsigned long piv = pivots[i];
7489  
7490  			if (!piv)
7491  				continue;
7492  
7493  			pr_err(PTR_FMT "[%u] should not have piv %lu\n",
7494  			       mas_mn(mas), i, piv);
7495  			MAS_WARN_ON(mas, i < mt_pivots[type] - 1);
7496  		}
7497  	}
7498  }
7499  
mt_validate_nulls(struct maple_tree * mt)7500  static void mt_validate_nulls(struct maple_tree *mt)
7501  {
7502  	void *entry, *last = (void *)1;
7503  	unsigned char offset = 0;
7504  	void __rcu **slots;
7505  	MA_STATE(mas, mt, 0, 0);
7506  
7507  	mas_start(&mas);
7508  	if (mas_is_none(&mas) || (mas_is_ptr(&mas)))
7509  		return;
7510  
7511  	while (!mte_is_leaf(mas.node))
7512  		mas_descend(&mas);
7513  
7514  	slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node));
7515  	do {
7516  		entry = mas_slot(&mas, slots, offset);
7517  		if (!last && !entry) {
7518  			pr_err("Sequential nulls end at " PTR_FMT "[%u]\n",
7519  				mas_mn(&mas), offset);
7520  		}
7521  		MT_BUG_ON(mt, !last && !entry);
7522  		last = entry;
7523  		if (offset == mas_data_end(&mas)) {
7524  			mas_next_node(&mas, mas_mn(&mas), ULONG_MAX);
7525  			if (mas_is_overflow(&mas))
7526  				return;
7527  			offset = 0;
7528  			slots = ma_slots(mte_to_node(mas.node),
7529  					 mte_node_type(mas.node));
7530  		} else {
7531  			offset++;
7532  		}
7533  
7534  	} while (!mas_is_overflow(&mas));
7535  }
7536  
7537  /*
7538   * validate a maple tree by checking:
7539   * 1. The limits (pivots are within mas->min to mas->max)
7540   * 2. The gap is correctly set in the parents
7541   */
mt_validate(struct maple_tree * mt)7542  void mt_validate(struct maple_tree *mt)
7543  	__must_hold(mas->tree->ma_lock)
7544  {
7545  	unsigned char end;
7546  
7547  	MA_STATE(mas, mt, 0, 0);
7548  	mas_start(&mas);
7549  	if (!mas_is_active(&mas))
7550  		return;
7551  
7552  	while (!mte_is_leaf(mas.node))
7553  		mas_descend(&mas);
7554  
7555  	while (!mas_is_overflow(&mas)) {
7556  		MAS_WARN_ON(&mas, mte_dead_node(mas.node));
7557  		end = mas_data_end(&mas);
7558  		if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) &&
7559  				(!mte_is_root(mas.node)))) {
7560  			pr_err("Invalid size %u of " PTR_FMT "\n",
7561  			       end, mas_mn(&mas));
7562  		}
7563  
7564  		mas_validate_parent_slot(&mas);
7565  		mas_validate_limits(&mas);
7566  		mas_validate_child_slot(&mas);
7567  		if (mt_is_alloc(mt))
7568  			mas_validate_gaps(&mas);
7569  		mas_dfs_postorder(&mas, ULONG_MAX);
7570  	}
7571  	mt_validate_nulls(mt);
7572  }
7573  EXPORT_SYMBOL_GPL(mt_validate);
7574  
mas_dump(const struct ma_state * mas)7575  void mas_dump(const struct ma_state *mas)
7576  {
7577  	pr_err("MAS: tree=" PTR_FMT " enode=" PTR_FMT " ",
7578  	       mas->tree, mas->node);
7579  	switch (mas->status) {
7580  	case ma_active:
7581  		pr_err("(ma_active)");
7582  		break;
7583  	case ma_none:
7584  		pr_err("(ma_none)");
7585  		break;
7586  	case ma_root:
7587  		pr_err("(ma_root)");
7588  		break;
7589  	case ma_start:
7590  		pr_err("(ma_start) ");
7591  		break;
7592  	case ma_pause:
7593  		pr_err("(ma_pause) ");
7594  		break;
7595  	case ma_overflow:
7596  		pr_err("(ma_overflow) ");
7597  		break;
7598  	case ma_underflow:
7599  		pr_err("(ma_underflow) ");
7600  		break;
7601  	case ma_error:
7602  		pr_err("(ma_error) ");
7603  		break;
7604  	}
7605  
7606  	pr_err("Store Type: ");
7607  	switch (mas->store_type) {
7608  	case wr_invalid:
7609  		pr_err("invalid store type\n");
7610  		break;
7611  	case wr_new_root:
7612  		pr_err("new_root\n");
7613  		break;
7614  	case wr_store_root:
7615  		pr_err("store_root\n");
7616  		break;
7617  	case wr_exact_fit:
7618  		pr_err("exact_fit\n");
7619  		break;
7620  	case wr_split_store:
7621  		pr_err("split_store\n");
7622  		break;
7623  	case wr_slot_store:
7624  		pr_err("slot_store\n");
7625  		break;
7626  	case wr_append:
7627  		pr_err("append\n");
7628  		break;
7629  	case wr_node_store:
7630  		pr_err("node_store\n");
7631  		break;
7632  	case wr_spanning_store:
7633  		pr_err("spanning_store\n");
7634  		break;
7635  	case wr_rebalance:
7636  		pr_err("rebalance\n");
7637  		break;
7638  	}
7639  
7640  	pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end,
7641  	       mas->index, mas->last);
7642  	pr_err("     min=%lx max=%lx alloc=" PTR_FMT ", depth=%u, flags=%x\n",
7643  	       mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags);
7644  	if (mas->index > mas->last)
7645  		pr_err("Check index & last\n");
7646  }
7647  EXPORT_SYMBOL_GPL(mas_dump);
7648  
mas_wr_dump(const struct ma_wr_state * wr_mas)7649  void mas_wr_dump(const struct ma_wr_state *wr_mas)
7650  {
7651  	pr_err("WR_MAS: node=" PTR_FMT " r_min=%lx r_max=%lx\n",
7652  	       wr_mas->node, wr_mas->r_min, wr_mas->r_max);
7653  	pr_err("        type=%u off_end=%u, node_end=%u, end_piv=%lx\n",
7654  	       wr_mas->type, wr_mas->offset_end, wr_mas->mas->end,
7655  	       wr_mas->end_piv);
7656  }
7657  EXPORT_SYMBOL_GPL(mas_wr_dump);
7658  
7659  #endif /* CONFIG_DEBUG_MAPLE_TREE */
7660