1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * Maple Tree implementation
4 * Copyright (c) 2018-2022 Oracle Corporation
5 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com>
6 * Matthew Wilcox <willy@infradead.org>
7 * Copyright (c) 2023 ByteDance
8 * Author: Peng Zhang <zhangpeng.00@bytedance.com>
9 */
10
11 /*
12 * DOC: Interesting implementation details of the Maple Tree
13 *
14 * Each node type has a number of slots for entries and a number of slots for
15 * pivots. In the case of dense nodes, the pivots are implied by the position
16 * and are simply the slot index + the minimum of the node.
17 *
18 * In regular B-Tree terms, pivots are called keys. The term pivot is used to
19 * indicate that the tree is specifying ranges. Pivots may appear in the
20 * subtree with an entry attached to the value whereas keys are unique to a
21 * specific position of a B-tree. Pivot values are inclusive of the slot with
22 * the same index.
23 *
24 *
25 * The following illustrates the layout of a range64 nodes slots and pivots.
26 *
27 *
28 * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 |
29 * ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬
30 * │ │ │ │ │ │ │ │ └─ Implied maximum
31 * │ │ │ │ │ │ │ └─ Pivot 14
32 * │ │ │ │ │ │ └─ Pivot 13
33 * │ │ │ │ │ └─ Pivot 12
34 * │ │ │ │ └─ Pivot 11
35 * │ │ │ └─ Pivot 2
36 * │ │ └─ Pivot 1
37 * │ └─ Pivot 0
38 * └─ Implied minimum
39 *
40 * Slot contents:
41 * Internal (non-leaf) nodes contain pointers to other nodes.
42 * Leaf nodes contain entries.
43 *
44 * The location of interest is often referred to as an offset. All offsets have
45 * a slot, but the last offset has an implied pivot from the node above (or
46 * UINT_MAX for the root node.
47 *
48 * Ranges complicate certain write activities. When modifying any of
49 * the B-tree variants, it is known that one entry will either be added or
50 * deleted. When modifying the Maple Tree, one store operation may overwrite
51 * the entire data set, or one half of the tree, or the middle half of the tree.
52 *
53 */
54
55
56 #include <linux/maple_tree.h>
57 #include <linux/xarray.h>
58 #include <linux/types.h>
59 #include <linux/export.h>
60 #include <linux/slab.h>
61 #include <linux/limits.h>
62 #include <asm/barrier.h>
63
64 #define CREATE_TRACE_POINTS
65 #include <trace/events/maple_tree.h>
66
67 /*
68 * Kernel pointer hashing renders much of the maple tree dump useless as tagged
69 * pointers get hashed to arbitrary values.
70 *
71 * If CONFIG_DEBUG_VM_MAPLE_TREE is set we are in a debug mode where it is
72 * permissible to bypass this. Otherwise remain cautious and retain the hashing.
73 *
74 * Userland doesn't know about %px so also use %p there.
75 */
76 #if defined(__KERNEL__) && defined(CONFIG_DEBUG_VM_MAPLE_TREE)
77 #define PTR_FMT "%px"
78 #else
79 #define PTR_FMT "%p"
80 #endif
81
82 #define MA_ROOT_PARENT 1
83
84 /*
85 * Maple state flags
86 * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation
87 */
88 #define MA_STATE_PREALLOC 1
89
90 #define ma_parent_ptr(x) ((struct maple_pnode *)(x))
91 #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT)
92 #define ma_mnode_ptr(x) ((struct maple_node *)(x))
93 #define ma_enode_ptr(x) ((struct maple_enode *)(x))
94 static struct kmem_cache *maple_node_cache;
95
96 #ifdef CONFIG_DEBUG_MAPLE_TREE
97 static const unsigned long mt_max[] = {
98 [maple_dense] = MAPLE_NODE_SLOTS,
99 [maple_leaf_64] = ULONG_MAX,
100 [maple_range_64] = ULONG_MAX,
101 [maple_arange_64] = ULONG_MAX,
102 };
103 #define mt_node_max(x) mt_max[mte_node_type(x)]
104 #endif
105
106 static const unsigned char mt_slots[] = {
107 [maple_dense] = MAPLE_NODE_SLOTS,
108 [maple_leaf_64] = MAPLE_RANGE64_SLOTS,
109 [maple_range_64] = MAPLE_RANGE64_SLOTS,
110 [maple_arange_64] = MAPLE_ARANGE64_SLOTS,
111 };
112 #define mt_slot_count(x) mt_slots[mte_node_type(x)]
113
114 static const unsigned char mt_pivots[] = {
115 [maple_dense] = 0,
116 [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1,
117 [maple_range_64] = MAPLE_RANGE64_SLOTS - 1,
118 [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1,
119 };
120 #define mt_pivot_count(x) mt_pivots[mte_node_type(x)]
121
122 static const unsigned char mt_min_slots[] = {
123 [maple_dense] = MAPLE_NODE_SLOTS / 2,
124 [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2,
125 [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2,
126 [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1,
127 };
128 #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)]
129
130 #define MAPLE_BIG_NODE_SLOTS (MAPLE_RANGE64_SLOTS * 2 + 2)
131 #define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1)
132
133 struct maple_big_node {
134 unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1];
135 union {
136 struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS];
137 struct {
138 unsigned long padding[MAPLE_BIG_NODE_GAPS];
139 unsigned long gap[MAPLE_BIG_NODE_GAPS];
140 };
141 };
142 unsigned char b_end;
143 enum maple_type type;
144 };
145
146 /*
147 * The maple_subtree_state is used to build a tree to replace a segment of an
148 * existing tree in a more atomic way. Any walkers of the older tree will hit a
149 * dead node and restart on updates.
150 */
151 struct maple_subtree_state {
152 struct ma_state *orig_l; /* Original left side of subtree */
153 struct ma_state *orig_r; /* Original right side of subtree */
154 struct ma_state *l; /* New left side of subtree */
155 struct ma_state *m; /* New middle of subtree (rare) */
156 struct ma_state *r; /* New right side of subtree */
157 struct ma_topiary *free; /* nodes to be freed */
158 struct ma_topiary *destroy; /* Nodes to be destroyed (walked and freed) */
159 struct maple_big_node *bn;
160 };
161
162 #ifdef CONFIG_KASAN_STACK
163 /* Prevent mas_wr_bnode() from exceeding the stack frame limit */
164 #define noinline_for_kasan noinline_for_stack
165 #else
166 #define noinline_for_kasan inline
167 #endif
168
169 /* Functions */
mt_alloc_one(gfp_t gfp)170 static inline struct maple_node *mt_alloc_one(gfp_t gfp)
171 {
172 return kmem_cache_alloc(maple_node_cache, gfp);
173 }
174
mt_free_bulk(size_t size,void __rcu ** nodes)175 static inline void mt_free_bulk(size_t size, void __rcu **nodes)
176 {
177 kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes);
178 }
179
mt_return_sheaf(struct slab_sheaf * sheaf)180 static void mt_return_sheaf(struct slab_sheaf *sheaf)
181 {
182 kmem_cache_return_sheaf(maple_node_cache, GFP_NOWAIT, sheaf);
183 }
184
mt_get_sheaf(gfp_t gfp,int count)185 static struct slab_sheaf *mt_get_sheaf(gfp_t gfp, int count)
186 {
187 return kmem_cache_prefill_sheaf(maple_node_cache, gfp, count);
188 }
189
mt_refill_sheaf(gfp_t gfp,struct slab_sheaf ** sheaf,unsigned int size)190 static int mt_refill_sheaf(gfp_t gfp, struct slab_sheaf **sheaf,
191 unsigned int size)
192 {
193 return kmem_cache_refill_sheaf(maple_node_cache, gfp, sheaf, size);
194 }
195
196 /*
197 * ma_free_rcu() - Use rcu callback to free a maple node
198 * @node: The node to free
199 *
200 * The maple tree uses the parent pointer to indicate this node is no longer in
201 * use and will be freed.
202 */
ma_free_rcu(struct maple_node * node)203 static void ma_free_rcu(struct maple_node *node)
204 {
205 WARN_ON(node->parent != ma_parent_ptr(node));
206 kfree_rcu(node, rcu);
207 }
208
mt_set_height(struct maple_tree * mt,unsigned char height)209 static void mt_set_height(struct maple_tree *mt, unsigned char height)
210 {
211 unsigned int new_flags = mt->ma_flags;
212
213 new_flags &= ~MT_FLAGS_HEIGHT_MASK;
214 MT_BUG_ON(mt, height > MAPLE_HEIGHT_MAX);
215 new_flags |= height << MT_FLAGS_HEIGHT_OFFSET;
216 mt->ma_flags = new_flags;
217 }
218
mas_mt_height(struct ma_state * mas)219 static unsigned int mas_mt_height(struct ma_state *mas)
220 {
221 return mt_height(mas->tree);
222 }
223
mt_attr(struct maple_tree * mt)224 static inline unsigned int mt_attr(struct maple_tree *mt)
225 {
226 return mt->ma_flags & ~MT_FLAGS_HEIGHT_MASK;
227 }
228
mte_node_type(const struct maple_enode * entry)229 static __always_inline enum maple_type mte_node_type(
230 const struct maple_enode *entry)
231 {
232 return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) &
233 MAPLE_NODE_TYPE_MASK;
234 }
235
ma_is_dense(const enum maple_type type)236 static __always_inline bool ma_is_dense(const enum maple_type type)
237 {
238 return type < maple_leaf_64;
239 }
240
ma_is_leaf(const enum maple_type type)241 static __always_inline bool ma_is_leaf(const enum maple_type type)
242 {
243 return type < maple_range_64;
244 }
245
mte_is_leaf(const struct maple_enode * entry)246 static __always_inline bool mte_is_leaf(const struct maple_enode *entry)
247 {
248 return ma_is_leaf(mte_node_type(entry));
249 }
250
251 /*
252 * We also reserve values with the bottom two bits set to '10' which are
253 * below 4096
254 */
mt_is_reserved(const void * entry)255 static __always_inline bool mt_is_reserved(const void *entry)
256 {
257 return ((unsigned long)entry < MAPLE_RESERVED_RANGE) &&
258 xa_is_internal(entry);
259 }
260
mas_set_err(struct ma_state * mas,long err)261 static __always_inline void mas_set_err(struct ma_state *mas, long err)
262 {
263 mas->node = MA_ERROR(err);
264 mas->status = ma_error;
265 }
266
mas_is_ptr(const struct ma_state * mas)267 static __always_inline bool mas_is_ptr(const struct ma_state *mas)
268 {
269 return mas->status == ma_root;
270 }
271
mas_is_start(const struct ma_state * mas)272 static __always_inline bool mas_is_start(const struct ma_state *mas)
273 {
274 return mas->status == ma_start;
275 }
276
mas_is_none(const struct ma_state * mas)277 static __always_inline bool mas_is_none(const struct ma_state *mas)
278 {
279 return mas->status == ma_none;
280 }
281
mas_is_paused(const struct ma_state * mas)282 static __always_inline bool mas_is_paused(const struct ma_state *mas)
283 {
284 return mas->status == ma_pause;
285 }
286
mas_is_overflow(struct ma_state * mas)287 static __always_inline bool mas_is_overflow(struct ma_state *mas)
288 {
289 return mas->status == ma_overflow;
290 }
291
mas_is_underflow(struct ma_state * mas)292 static inline bool mas_is_underflow(struct ma_state *mas)
293 {
294 return mas->status == ma_underflow;
295 }
296
mte_to_node(const struct maple_enode * entry)297 static __always_inline struct maple_node *mte_to_node(
298 const struct maple_enode *entry)
299 {
300 return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK);
301 }
302
303 /*
304 * mte_to_mat() - Convert a maple encoded node to a maple topiary node.
305 * @entry: The maple encoded node
306 *
307 * Return: a maple topiary pointer
308 */
mte_to_mat(const struct maple_enode * entry)309 static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry)
310 {
311 return (struct maple_topiary *)
312 ((unsigned long)entry & ~MAPLE_NODE_MASK);
313 }
314
315 /*
316 * mas_mn() - Get the maple state node.
317 * @mas: The maple state
318 *
319 * Return: the maple node (not encoded - bare pointer).
320 */
mas_mn(const struct ma_state * mas)321 static inline struct maple_node *mas_mn(const struct ma_state *mas)
322 {
323 return mte_to_node(mas->node);
324 }
325
326 /*
327 * mte_set_node_dead() - Set a maple encoded node as dead.
328 * @mn: The maple encoded node.
329 */
mte_set_node_dead(struct maple_enode * mn)330 static inline void mte_set_node_dead(struct maple_enode *mn)
331 {
332 mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn));
333 smp_wmb(); /* Needed for RCU */
334 }
335
336 /* Bit 1 indicates the root is a node */
337 #define MAPLE_ROOT_NODE 0x02
338 /* maple_type stored bit 3-6 */
339 #define MAPLE_ENODE_TYPE_SHIFT 0x03
340 /* Bit 2 means a NULL somewhere below */
341 #define MAPLE_ENODE_NULL 0x04
342
mt_mk_node(const struct maple_node * node,enum maple_type type)343 static inline struct maple_enode *mt_mk_node(const struct maple_node *node,
344 enum maple_type type)
345 {
346 return (void *)((unsigned long)node |
347 (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL);
348 }
349
mte_mk_root(const struct maple_enode * node)350 static inline void *mte_mk_root(const struct maple_enode *node)
351 {
352 return (void *)((unsigned long)node | MAPLE_ROOT_NODE);
353 }
354
mte_safe_root(const struct maple_enode * node)355 static inline void *mte_safe_root(const struct maple_enode *node)
356 {
357 return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE);
358 }
359
mte_set_full(const struct maple_enode * node)360 static inline void __maybe_unused *mte_set_full(const struct maple_enode *node)
361 {
362 return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL);
363 }
364
mte_clear_full(const struct maple_enode * node)365 static inline void __maybe_unused *mte_clear_full(const struct maple_enode *node)
366 {
367 return (void *)((unsigned long)node | MAPLE_ENODE_NULL);
368 }
369
mte_has_null(const struct maple_enode * node)370 static inline bool __maybe_unused mte_has_null(const struct maple_enode *node)
371 {
372 return (unsigned long)node & MAPLE_ENODE_NULL;
373 }
374
ma_is_root(struct maple_node * node)375 static __always_inline bool ma_is_root(struct maple_node *node)
376 {
377 return ((unsigned long)node->parent & MA_ROOT_PARENT);
378 }
379
mte_is_root(const struct maple_enode * node)380 static __always_inline bool mte_is_root(const struct maple_enode *node)
381 {
382 return ma_is_root(mte_to_node(node));
383 }
384
mas_is_root_limits(const struct ma_state * mas)385 static inline bool mas_is_root_limits(const struct ma_state *mas)
386 {
387 return !mas->min && mas->max == ULONG_MAX;
388 }
389
mt_is_alloc(struct maple_tree * mt)390 static __always_inline bool mt_is_alloc(struct maple_tree *mt)
391 {
392 return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE);
393 }
394
395 /*
396 * The Parent Pointer
397 * Excluding root, the parent pointer is 256B aligned like all other tree nodes.
398 * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16
399 * bit values need an extra bit to store the offset. This extra bit comes from
400 * a reuse of the last bit in the node type. This is possible by using bit 1 to
401 * indicate if bit 2 is part of the type or the slot.
402 *
403 * Node types:
404 * 0b??1 = Root
405 * 0b?00 = 16 bit nodes
406 * 0b010 = 32 bit nodes
407 * 0b110 = 64 bit nodes
408 *
409 * Slot size and alignment
410 * 0b??1 : Root
411 * 0b?00 : 16 bit values, type in 0-1, slot in 2-7
412 * 0b010 : 32 bit values, type in 0-2, slot in 3-7
413 * 0b110 : 64 bit values, type in 0-2, slot in 3-7
414 */
415
416 #define MAPLE_PARENT_ROOT 0x01
417
418 #define MAPLE_PARENT_SLOT_SHIFT 0x03
419 #define MAPLE_PARENT_SLOT_MASK 0xF8
420
421 #define MAPLE_PARENT_16B_SLOT_SHIFT 0x02
422 #define MAPLE_PARENT_16B_SLOT_MASK 0xFC
423
424 #define MAPLE_PARENT_RANGE64 0x06
425 #define MAPLE_PARENT_RANGE32 0x02
426 #define MAPLE_PARENT_NOT_RANGE16 0x02
427
428 /*
429 * mte_parent_shift() - Get the parent shift for the slot storage.
430 * @parent: The parent pointer cast as an unsigned long
431 * Return: The shift into that pointer to the star to of the slot
432 */
mte_parent_shift(unsigned long parent)433 static inline unsigned long mte_parent_shift(unsigned long parent)
434 {
435 /* Note bit 1 == 0 means 16B */
436 if (likely(parent & MAPLE_PARENT_NOT_RANGE16))
437 return MAPLE_PARENT_SLOT_SHIFT;
438
439 return MAPLE_PARENT_16B_SLOT_SHIFT;
440 }
441
442 /*
443 * mte_parent_slot_mask() - Get the slot mask for the parent.
444 * @parent: The parent pointer cast as an unsigned long.
445 * Return: The slot mask for that parent.
446 */
mte_parent_slot_mask(unsigned long parent)447 static inline unsigned long mte_parent_slot_mask(unsigned long parent)
448 {
449 /* Note bit 1 == 0 means 16B */
450 if (likely(parent & MAPLE_PARENT_NOT_RANGE16))
451 return MAPLE_PARENT_SLOT_MASK;
452
453 return MAPLE_PARENT_16B_SLOT_MASK;
454 }
455
456 /*
457 * mas_parent_type() - Return the maple_type of the parent from the stored
458 * parent type.
459 * @mas: The maple state
460 * @enode: The maple_enode to extract the parent's enum
461 * Return: The node->parent maple_type
462 */
463 static inline
mas_parent_type(struct ma_state * mas,struct maple_enode * enode)464 enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode)
465 {
466 unsigned long p_type;
467
468 p_type = (unsigned long)mte_to_node(enode)->parent;
469 if (WARN_ON(p_type & MAPLE_PARENT_ROOT))
470 return 0;
471
472 p_type &= MAPLE_NODE_MASK;
473 p_type &= ~mte_parent_slot_mask(p_type);
474 switch (p_type) {
475 case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */
476 if (mt_is_alloc(mas->tree))
477 return maple_arange_64;
478 return maple_range_64;
479 }
480
481 return 0;
482 }
483
484 /*
485 * mas_set_parent() - Set the parent node and encode the slot
486 * @mas: The maple state
487 * @enode: The encoded maple node.
488 * @parent: The encoded maple node that is the parent of @enode.
489 * @slot: The slot that @enode resides in @parent.
490 *
491 * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the
492 * parent type.
493 */
494 static inline
mas_set_parent(struct ma_state * mas,struct maple_enode * enode,const struct maple_enode * parent,unsigned char slot)495 void mas_set_parent(struct ma_state *mas, struct maple_enode *enode,
496 const struct maple_enode *parent, unsigned char slot)
497 {
498 unsigned long val = (unsigned long)parent;
499 unsigned long shift;
500 unsigned long type;
501 enum maple_type p_type = mte_node_type(parent);
502
503 MAS_BUG_ON(mas, p_type == maple_dense);
504 MAS_BUG_ON(mas, p_type == maple_leaf_64);
505
506 switch (p_type) {
507 case maple_range_64:
508 case maple_arange_64:
509 shift = MAPLE_PARENT_SLOT_SHIFT;
510 type = MAPLE_PARENT_RANGE64;
511 break;
512 default:
513 case maple_dense:
514 case maple_leaf_64:
515 shift = type = 0;
516 break;
517 }
518
519 val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */
520 val |= (slot << shift) | type;
521 mte_to_node(enode)->parent = ma_parent_ptr(val);
522 }
523
524 /*
525 * mte_parent_slot() - get the parent slot of @enode.
526 * @enode: The encoded maple node.
527 *
528 * Return: The slot in the parent node where @enode resides.
529 */
530 static __always_inline
mte_parent_slot(const struct maple_enode * enode)531 unsigned int mte_parent_slot(const struct maple_enode *enode)
532 {
533 unsigned long val = (unsigned long)mte_to_node(enode)->parent;
534
535 if (unlikely(val & MA_ROOT_PARENT))
536 return 0;
537
538 /*
539 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost
540 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT
541 */
542 return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val);
543 }
544
545 /*
546 * mte_parent() - Get the parent of @node.
547 * @enode: The encoded maple node.
548 *
549 * Return: The parent maple node.
550 */
551 static __always_inline
mte_parent(const struct maple_enode * enode)552 struct maple_node *mte_parent(const struct maple_enode *enode)
553 {
554 return (void *)((unsigned long)
555 (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK);
556 }
557
558 /*
559 * ma_dead_node() - check if the @enode is dead.
560 * @enode: The encoded maple node
561 *
562 * Return: true if dead, false otherwise.
563 */
ma_dead_node(const struct maple_node * node)564 static __always_inline bool ma_dead_node(const struct maple_node *node)
565 {
566 struct maple_node *parent;
567
568 /* Do not reorder reads from the node prior to the parent check */
569 smp_rmb();
570 parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK);
571 return (parent == node);
572 }
573
574 /*
575 * mte_dead_node() - check if the @enode is dead.
576 * @enode: The encoded maple node
577 *
578 * Return: true if dead, false otherwise.
579 */
mte_dead_node(const struct maple_enode * enode)580 static __always_inline bool mte_dead_node(const struct maple_enode *enode)
581 {
582 struct maple_node *node;
583
584 node = mte_to_node(enode);
585 return ma_dead_node(node);
586 }
587
588 /*
589 * ma_pivots() - Get a pointer to the maple node pivots.
590 * @node: the maple node
591 * @type: the node type
592 *
593 * In the event of a dead node, this array may be %NULL
594 *
595 * Return: A pointer to the maple node pivots
596 */
ma_pivots(struct maple_node * node,enum maple_type type)597 static inline unsigned long *ma_pivots(struct maple_node *node,
598 enum maple_type type)
599 {
600 switch (type) {
601 case maple_arange_64:
602 return node->ma64.pivot;
603 case maple_range_64:
604 case maple_leaf_64:
605 return node->mr64.pivot;
606 case maple_dense:
607 return NULL;
608 }
609 return NULL;
610 }
611
612 /*
613 * ma_gaps() - Get a pointer to the maple node gaps.
614 * @node: the maple node
615 * @type: the node type
616 *
617 * Return: A pointer to the maple node gaps
618 */
ma_gaps(struct maple_node * node,enum maple_type type)619 static inline unsigned long *ma_gaps(struct maple_node *node,
620 enum maple_type type)
621 {
622 switch (type) {
623 case maple_arange_64:
624 return node->ma64.gap;
625 case maple_range_64:
626 case maple_leaf_64:
627 case maple_dense:
628 return NULL;
629 }
630 return NULL;
631 }
632
633 /*
634 * mas_safe_pivot() - get the pivot at @piv or mas->max.
635 * @mas: The maple state
636 * @pivots: The pointer to the maple node pivots
637 * @piv: The pivot to fetch
638 * @type: The maple node type
639 *
640 * Return: The pivot at @piv within the limit of the @pivots array, @mas->max
641 * otherwise.
642 */
643 static __always_inline unsigned long
mas_safe_pivot(const struct ma_state * mas,unsigned long * pivots,unsigned char piv,enum maple_type type)644 mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots,
645 unsigned char piv, enum maple_type type)
646 {
647 if (piv >= mt_pivots[type])
648 return mas->max;
649
650 return pivots[piv];
651 }
652
653 /*
654 * mas_safe_min() - Return the minimum for a given offset.
655 * @mas: The maple state
656 * @pivots: The pointer to the maple node pivots
657 * @offset: The offset into the pivot array
658 *
659 * Return: The minimum range value that is contained in @offset.
660 */
661 static inline unsigned long
mas_safe_min(struct ma_state * mas,unsigned long * pivots,unsigned char offset)662 mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset)
663 {
664 if (likely(offset))
665 return pivots[offset - 1] + 1;
666
667 return mas->min;
668 }
669
670 /*
671 * mte_set_pivot() - Set a pivot to a value in an encoded maple node.
672 * @mn: The encoded maple node
673 * @piv: The pivot offset
674 * @val: The value of the pivot
675 */
mte_set_pivot(struct maple_enode * mn,unsigned char piv,unsigned long val)676 static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv,
677 unsigned long val)
678 {
679 struct maple_node *node = mte_to_node(mn);
680 enum maple_type type = mte_node_type(mn);
681
682 BUG_ON(piv >= mt_pivots[type]);
683 switch (type) {
684 case maple_range_64:
685 case maple_leaf_64:
686 node->mr64.pivot[piv] = val;
687 break;
688 case maple_arange_64:
689 node->ma64.pivot[piv] = val;
690 break;
691 case maple_dense:
692 break;
693 }
694
695 }
696
697 /*
698 * ma_slots() - Get a pointer to the maple node slots.
699 * @mn: The maple node
700 * @mt: The maple node type
701 *
702 * Return: A pointer to the maple node slots
703 */
ma_slots(struct maple_node * mn,enum maple_type mt)704 static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt)
705 {
706 switch (mt) {
707 case maple_arange_64:
708 return mn->ma64.slot;
709 case maple_range_64:
710 case maple_leaf_64:
711 return mn->mr64.slot;
712 case maple_dense:
713 return mn->slot;
714 }
715
716 return NULL;
717 }
718
mt_write_locked(const struct maple_tree * mt)719 static inline bool mt_write_locked(const struct maple_tree *mt)
720 {
721 return mt_external_lock(mt) ? mt_write_lock_is_held(mt) :
722 lockdep_is_held(&mt->ma_lock);
723 }
724
mt_locked(const struct maple_tree * mt)725 static __always_inline bool mt_locked(const struct maple_tree *mt)
726 {
727 return mt_external_lock(mt) ? mt_lock_is_held(mt) :
728 lockdep_is_held(&mt->ma_lock);
729 }
730
mt_slot(const struct maple_tree * mt,void __rcu ** slots,unsigned char offset)731 static __always_inline void *mt_slot(const struct maple_tree *mt,
732 void __rcu **slots, unsigned char offset)
733 {
734 return rcu_dereference_check(slots[offset], mt_locked(mt));
735 }
736
mt_slot_locked(struct maple_tree * mt,void __rcu ** slots,unsigned char offset)737 static __always_inline void *mt_slot_locked(struct maple_tree *mt,
738 void __rcu **slots, unsigned char offset)
739 {
740 return rcu_dereference_protected(slots[offset], mt_write_locked(mt));
741 }
742 /*
743 * mas_slot_locked() - Get the slot value when holding the maple tree lock.
744 * @mas: The maple state
745 * @slots: The pointer to the slots
746 * @offset: The offset into the slots array to fetch
747 *
748 * Return: The entry stored in @slots at the @offset.
749 */
mas_slot_locked(struct ma_state * mas,void __rcu ** slots,unsigned char offset)750 static __always_inline void *mas_slot_locked(struct ma_state *mas,
751 void __rcu **slots, unsigned char offset)
752 {
753 return mt_slot_locked(mas->tree, slots, offset);
754 }
755
756 /*
757 * mas_slot() - Get the slot value when not holding the maple tree lock.
758 * @mas: The maple state
759 * @slots: The pointer to the slots
760 * @offset: The offset into the slots array to fetch
761 *
762 * Return: The entry stored in @slots at the @offset
763 */
mas_slot(struct ma_state * mas,void __rcu ** slots,unsigned char offset)764 static __always_inline void *mas_slot(struct ma_state *mas, void __rcu **slots,
765 unsigned char offset)
766 {
767 return mt_slot(mas->tree, slots, offset);
768 }
769
770 /*
771 * mas_root() - Get the maple tree root.
772 * @mas: The maple state.
773 *
774 * Return: The pointer to the root of the tree
775 */
mas_root(struct ma_state * mas)776 static __always_inline void *mas_root(struct ma_state *mas)
777 {
778 return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree));
779 }
780
mt_root_locked(struct maple_tree * mt)781 static inline void *mt_root_locked(struct maple_tree *mt)
782 {
783 return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt));
784 }
785
786 /*
787 * mas_root_locked() - Get the maple tree root when holding the maple tree lock.
788 * @mas: The maple state.
789 *
790 * Return: The pointer to the root of the tree
791 */
mas_root_locked(struct ma_state * mas)792 static inline void *mas_root_locked(struct ma_state *mas)
793 {
794 return mt_root_locked(mas->tree);
795 }
796
ma_meta(struct maple_node * mn,enum maple_type mt)797 static inline struct maple_metadata *ma_meta(struct maple_node *mn,
798 enum maple_type mt)
799 {
800 switch (mt) {
801 case maple_arange_64:
802 return &mn->ma64.meta;
803 default:
804 return &mn->mr64.meta;
805 }
806 }
807
808 /*
809 * ma_set_meta() - Set the metadata information of a node.
810 * @mn: The maple node
811 * @mt: The maple node type
812 * @offset: The offset of the highest sub-gap in this node.
813 * @end: The end of the data in this node.
814 */
ma_set_meta(struct maple_node * mn,enum maple_type mt,unsigned char offset,unsigned char end)815 static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt,
816 unsigned char offset, unsigned char end)
817 {
818 struct maple_metadata *meta = ma_meta(mn, mt);
819
820 meta->gap = offset;
821 meta->end = end;
822 }
823
824 /*
825 * mt_clear_meta() - clear the metadata information of a node, if it exists
826 * @mt: The maple tree
827 * @mn: The maple node
828 * @type: The maple node type
829 */
mt_clear_meta(struct maple_tree * mt,struct maple_node * mn,enum maple_type type)830 static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn,
831 enum maple_type type)
832 {
833 struct maple_metadata *meta;
834 unsigned long *pivots;
835 void __rcu **slots;
836 void *next;
837
838 switch (type) {
839 case maple_range_64:
840 pivots = mn->mr64.pivot;
841 if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) {
842 slots = mn->mr64.slot;
843 next = mt_slot_locked(mt, slots,
844 MAPLE_RANGE64_SLOTS - 1);
845 if (unlikely((mte_to_node(next) &&
846 mte_node_type(next))))
847 return; /* no metadata, could be node */
848 }
849 fallthrough;
850 case maple_arange_64:
851 meta = ma_meta(mn, type);
852 break;
853 default:
854 return;
855 }
856
857 meta->gap = 0;
858 meta->end = 0;
859 }
860
861 /*
862 * ma_meta_end() - Get the data end of a node from the metadata
863 * @mn: The maple node
864 * @mt: The maple node type
865 */
ma_meta_end(struct maple_node * mn,enum maple_type mt)866 static inline unsigned char ma_meta_end(struct maple_node *mn,
867 enum maple_type mt)
868 {
869 struct maple_metadata *meta = ma_meta(mn, mt);
870
871 return meta->end;
872 }
873
874 /*
875 * ma_meta_gap() - Get the largest gap location of a node from the metadata
876 * @mn: The maple node
877 */
ma_meta_gap(struct maple_node * mn)878 static inline unsigned char ma_meta_gap(struct maple_node *mn)
879 {
880 return mn->ma64.meta.gap;
881 }
882
883 /*
884 * ma_set_meta_gap() - Set the largest gap location in a nodes metadata
885 * @mn: The maple node
886 * @mt: The maple node type
887 * @offset: The location of the largest gap.
888 */
ma_set_meta_gap(struct maple_node * mn,enum maple_type mt,unsigned char offset)889 static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt,
890 unsigned char offset)
891 {
892
893 struct maple_metadata *meta = ma_meta(mn, mt);
894
895 meta->gap = offset;
896 }
897
898 /*
899 * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes.
900 * @mat: the ma_topiary, a linked list of dead nodes.
901 * @dead_enode: the node to be marked as dead and added to the tail of the list
902 *
903 * Add the @dead_enode to the linked list in @mat.
904 */
mat_add(struct ma_topiary * mat,struct maple_enode * dead_enode)905 static inline void mat_add(struct ma_topiary *mat,
906 struct maple_enode *dead_enode)
907 {
908 mte_set_node_dead(dead_enode);
909 mte_to_mat(dead_enode)->next = NULL;
910 if (!mat->tail) {
911 mat->tail = mat->head = dead_enode;
912 return;
913 }
914
915 mte_to_mat(mat->tail)->next = dead_enode;
916 mat->tail = dead_enode;
917 }
918
919 static void mt_free_walk(struct rcu_head *head);
920 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
921 bool free);
922 /*
923 * mas_mat_destroy() - Free all nodes and subtrees in a dead list.
924 * @mas: the maple state
925 * @mat: the ma_topiary linked list of dead nodes to free.
926 *
927 * Destroy walk a dead list.
928 */
mas_mat_destroy(struct ma_state * mas,struct ma_topiary * mat)929 static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat)
930 {
931 struct maple_enode *next;
932 struct maple_node *node;
933 bool in_rcu = mt_in_rcu(mas->tree);
934
935 while (mat->head) {
936 next = mte_to_mat(mat->head)->next;
937 node = mte_to_node(mat->head);
938 mt_destroy_walk(mat->head, mas->tree, !in_rcu);
939 if (in_rcu)
940 call_rcu(&node->rcu, mt_free_walk);
941 mat->head = next;
942 }
943 }
944 /*
945 * mas_descend() - Descend into the slot stored in the ma_state.
946 * @mas: the maple state.
947 *
948 * Note: Not RCU safe, only use in write side or debug code.
949 */
mas_descend(struct ma_state * mas)950 static inline void mas_descend(struct ma_state *mas)
951 {
952 enum maple_type type;
953 unsigned long *pivots;
954 struct maple_node *node;
955 void __rcu **slots;
956
957 node = mas_mn(mas);
958 type = mte_node_type(mas->node);
959 pivots = ma_pivots(node, type);
960 slots = ma_slots(node, type);
961
962 if (mas->offset)
963 mas->min = pivots[mas->offset - 1] + 1;
964 mas->max = mas_safe_pivot(mas, pivots, mas->offset, type);
965 mas->node = mas_slot(mas, slots, mas->offset);
966 }
967
968 /*
969 * mas_ascend() - Walk up a level of the tree.
970 * @mas: The maple state
971 *
972 * Sets the @mas->max and @mas->min for the parent node of mas->node. This
973 * may cause several levels of walking up to find the correct min and max.
974 * May find a dead node which will cause a premature return.
975 * Return: 1 on dead node, 0 otherwise
976 */
mas_ascend(struct ma_state * mas)977 static int mas_ascend(struct ma_state *mas)
978 {
979 struct maple_enode *p_enode; /* parent enode. */
980 struct maple_enode *a_enode; /* ancestor enode. */
981 struct maple_node *a_node; /* ancestor node. */
982 struct maple_node *p_node; /* parent node. */
983 unsigned char a_slot;
984 enum maple_type a_type;
985 unsigned long min, max;
986 unsigned long *pivots;
987 bool set_max = false, set_min = false;
988
989 a_node = mas_mn(mas);
990 if (ma_is_root(a_node)) {
991 mas->offset = 0;
992 return 0;
993 }
994
995 p_node = mte_parent(mas->node);
996 if (unlikely(a_node == p_node))
997 return 1;
998
999 a_type = mas_parent_type(mas, mas->node);
1000 mas->offset = mte_parent_slot(mas->node);
1001 a_enode = mt_mk_node(p_node, a_type);
1002
1003 /* Check to make sure all parent information is still accurate */
1004 if (p_node != mte_parent(mas->node))
1005 return 1;
1006
1007 mas->node = a_enode;
1008
1009 if (mte_is_root(a_enode)) {
1010 mas->max = ULONG_MAX;
1011 mas->min = 0;
1012 return 0;
1013 }
1014
1015 min = 0;
1016 max = ULONG_MAX;
1017
1018 /*
1019 * !mas->offset implies that parent node min == mas->min.
1020 * mas->offset > 0 implies that we need to walk up to find the
1021 * implied pivot min.
1022 */
1023 if (!mas->offset) {
1024 min = mas->min;
1025 set_min = true;
1026 }
1027
1028 if (mas->max == ULONG_MAX)
1029 set_max = true;
1030
1031 do {
1032 p_enode = a_enode;
1033 a_type = mas_parent_type(mas, p_enode);
1034 a_node = mte_parent(p_enode);
1035 a_slot = mte_parent_slot(p_enode);
1036 a_enode = mt_mk_node(a_node, a_type);
1037 pivots = ma_pivots(a_node, a_type);
1038
1039 if (unlikely(ma_dead_node(a_node)))
1040 return 1;
1041
1042 if (!set_min && a_slot) {
1043 set_min = true;
1044 min = pivots[a_slot - 1] + 1;
1045 }
1046
1047 if (!set_max && a_slot < mt_pivots[a_type]) {
1048 set_max = true;
1049 max = pivots[a_slot];
1050 }
1051
1052 if (unlikely(ma_dead_node(a_node)))
1053 return 1;
1054
1055 if (unlikely(ma_is_root(a_node)))
1056 break;
1057
1058 } while (!set_min || !set_max);
1059
1060 mas->max = max;
1061 mas->min = min;
1062 return 0;
1063 }
1064
1065 /*
1066 * mas_pop_node() - Get a previously allocated maple node from the maple state.
1067 * @mas: The maple state
1068 *
1069 * Return: A pointer to a maple node.
1070 */
mas_pop_node(struct ma_state * mas)1071 static __always_inline struct maple_node *mas_pop_node(struct ma_state *mas)
1072 {
1073 struct maple_node *ret;
1074
1075 if (mas->alloc) {
1076 ret = mas->alloc;
1077 mas->alloc = NULL;
1078 goto out;
1079 }
1080
1081 if (WARN_ON_ONCE(!mas->sheaf))
1082 return NULL;
1083
1084 ret = kmem_cache_alloc_from_sheaf(maple_node_cache, GFP_NOWAIT, mas->sheaf);
1085
1086 out:
1087 memset(ret, 0, sizeof(*ret));
1088 return ret;
1089 }
1090
1091 /*
1092 * mas_alloc_nodes() - Allocate nodes into a maple state
1093 * @mas: The maple state
1094 * @gfp: The GFP Flags
1095 */
mas_alloc_nodes(struct ma_state * mas,gfp_t gfp)1096 static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
1097 {
1098 if (!mas->node_request)
1099 return;
1100
1101 if (mas->node_request == 1) {
1102 if (mas->sheaf)
1103 goto use_sheaf;
1104
1105 if (mas->alloc)
1106 return;
1107
1108 mas->alloc = mt_alloc_one(gfp);
1109 if (!mas->alloc)
1110 goto error;
1111
1112 mas->node_request = 0;
1113 return;
1114 }
1115
1116 use_sheaf:
1117 if (unlikely(mas->alloc)) {
1118 kfree(mas->alloc);
1119 mas->alloc = NULL;
1120 }
1121
1122 if (mas->sheaf) {
1123 unsigned long refill;
1124
1125 refill = mas->node_request;
1126 if (kmem_cache_sheaf_size(mas->sheaf) >= refill) {
1127 mas->node_request = 0;
1128 return;
1129 }
1130
1131 if (mt_refill_sheaf(gfp, &mas->sheaf, refill))
1132 goto error;
1133
1134 mas->node_request = 0;
1135 return;
1136 }
1137
1138 mas->sheaf = mt_get_sheaf(gfp, mas->node_request);
1139 if (likely(mas->sheaf)) {
1140 mas->node_request = 0;
1141 return;
1142 }
1143
1144 error:
1145 mas_set_err(mas, -ENOMEM);
1146 }
1147
mas_empty_nodes(struct ma_state * mas)1148 static inline void mas_empty_nodes(struct ma_state *mas)
1149 {
1150 mas->node_request = 0;
1151 if (mas->sheaf) {
1152 mt_return_sheaf(mas->sheaf);
1153 mas->sheaf = NULL;
1154 }
1155
1156 if (mas->alloc) {
1157 kfree(mas->alloc);
1158 mas->alloc = NULL;
1159 }
1160 }
1161
1162 /*
1163 * mas_free() - Free an encoded maple node
1164 * @mas: The maple state
1165 * @used: The encoded maple node to free.
1166 *
1167 * Uses rcu free if necessary, pushes @used back on the maple state allocations
1168 * otherwise.
1169 */
mas_free(struct ma_state * mas,struct maple_enode * used)1170 static inline void mas_free(struct ma_state *mas, struct maple_enode *used)
1171 {
1172 ma_free_rcu(mte_to_node(used));
1173 }
1174
1175 /*
1176 * mas_start() - Sets up maple state for operations.
1177 * @mas: The maple state.
1178 *
1179 * If mas->status == ma_start, then set the min, max and depth to
1180 * defaults.
1181 *
1182 * Return:
1183 * - If mas->node is an error or not mas_start, return NULL.
1184 * - If it's an empty tree: NULL & mas->status == ma_none
1185 * - If it's a single entry: The entry & mas->status == ma_root
1186 * - If it's a tree: NULL & mas->status == ma_active
1187 */
mas_start(struct ma_state * mas)1188 static inline struct maple_enode *mas_start(struct ma_state *mas)
1189 {
1190 if (likely(mas_is_start(mas))) {
1191 struct maple_enode *root;
1192
1193 mas->min = 0;
1194 mas->max = ULONG_MAX;
1195
1196 retry:
1197 mas->depth = 0;
1198 root = mas_root(mas);
1199 /* Tree with nodes */
1200 if (likely(xa_is_node(root))) {
1201 mas->depth = 0;
1202 mas->status = ma_active;
1203 mas->node = mte_safe_root(root);
1204 mas->offset = 0;
1205 if (mte_dead_node(mas->node))
1206 goto retry;
1207
1208 return NULL;
1209 }
1210
1211 mas->node = NULL;
1212 /* empty tree */
1213 if (unlikely(!root)) {
1214 mas->status = ma_none;
1215 mas->offset = MAPLE_NODE_SLOTS;
1216 return NULL;
1217 }
1218
1219 /* Single entry tree */
1220 mas->status = ma_root;
1221 mas->offset = MAPLE_NODE_SLOTS;
1222
1223 /* Single entry tree. */
1224 if (mas->index > 0)
1225 return NULL;
1226
1227 return root;
1228 }
1229
1230 return NULL;
1231 }
1232
1233 /*
1234 * ma_data_end() - Find the end of the data in a node.
1235 * @node: The maple node
1236 * @type: The maple node type
1237 * @pivots: The array of pivots in the node
1238 * @max: The maximum value in the node
1239 *
1240 * Uses metadata to find the end of the data when possible.
1241 * Return: The zero indexed last slot with data (may be null).
1242 */
ma_data_end(struct maple_node * node,enum maple_type type,unsigned long * pivots,unsigned long max)1243 static __always_inline unsigned char ma_data_end(struct maple_node *node,
1244 enum maple_type type, unsigned long *pivots, unsigned long max)
1245 {
1246 unsigned char offset;
1247
1248 if (!pivots)
1249 return 0;
1250
1251 if (type == maple_arange_64)
1252 return ma_meta_end(node, type);
1253
1254 offset = mt_pivots[type] - 1;
1255 if (likely(!pivots[offset]))
1256 return ma_meta_end(node, type);
1257
1258 if (likely(pivots[offset] == max))
1259 return offset;
1260
1261 return mt_pivots[type];
1262 }
1263
1264 /*
1265 * mas_data_end() - Find the end of the data (slot).
1266 * @mas: the maple state
1267 *
1268 * This method is optimized to check the metadata of a node if the node type
1269 * supports data end metadata.
1270 *
1271 * Return: The zero indexed last slot with data (may be null).
1272 */
mas_data_end(struct ma_state * mas)1273 static inline unsigned char mas_data_end(struct ma_state *mas)
1274 {
1275 enum maple_type type;
1276 struct maple_node *node;
1277 unsigned char offset;
1278 unsigned long *pivots;
1279
1280 type = mte_node_type(mas->node);
1281 node = mas_mn(mas);
1282 if (type == maple_arange_64)
1283 return ma_meta_end(node, type);
1284
1285 pivots = ma_pivots(node, type);
1286 if (unlikely(ma_dead_node(node)))
1287 return 0;
1288
1289 offset = mt_pivots[type] - 1;
1290 if (likely(!pivots[offset]))
1291 return ma_meta_end(node, type);
1292
1293 if (likely(pivots[offset] == mas->max))
1294 return offset;
1295
1296 return mt_pivots[type];
1297 }
1298
1299 /*
1300 * mas_leaf_max_gap() - Returns the largest gap in a leaf node
1301 * @mas: the maple state
1302 *
1303 * Return: The maximum gap in the leaf.
1304 */
mas_leaf_max_gap(struct ma_state * mas)1305 static unsigned long mas_leaf_max_gap(struct ma_state *mas)
1306 {
1307 enum maple_type mt;
1308 unsigned long pstart, gap, max_gap;
1309 struct maple_node *mn;
1310 unsigned long *pivots;
1311 void __rcu **slots;
1312 unsigned char i;
1313 unsigned char max_piv;
1314
1315 mt = mte_node_type(mas->node);
1316 mn = mas_mn(mas);
1317 slots = ma_slots(mn, mt);
1318 max_gap = 0;
1319 if (unlikely(ma_is_dense(mt))) {
1320 gap = 0;
1321 for (i = 0; i < mt_slots[mt]; i++) {
1322 if (slots[i]) {
1323 if (gap > max_gap)
1324 max_gap = gap;
1325 gap = 0;
1326 } else {
1327 gap++;
1328 }
1329 }
1330 if (gap > max_gap)
1331 max_gap = gap;
1332 return max_gap;
1333 }
1334
1335 /*
1336 * Check the first implied pivot optimizes the loop below and slot 1 may
1337 * be skipped if there is a gap in slot 0.
1338 */
1339 pivots = ma_pivots(mn, mt);
1340 if (likely(!slots[0])) {
1341 max_gap = pivots[0] - mas->min + 1;
1342 i = 2;
1343 } else {
1344 i = 1;
1345 }
1346
1347 /* reduce max_piv as the special case is checked before the loop */
1348 max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1;
1349 /*
1350 * Check end implied pivot which can only be a gap on the right most
1351 * node.
1352 */
1353 if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) {
1354 gap = ULONG_MAX - pivots[max_piv];
1355 if (gap > max_gap)
1356 max_gap = gap;
1357
1358 if (max_gap > pivots[max_piv] - mas->min)
1359 return max_gap;
1360 }
1361
1362 for (; i <= max_piv; i++) {
1363 /* data == no gap. */
1364 if (likely(slots[i]))
1365 continue;
1366
1367 pstart = pivots[i - 1];
1368 gap = pivots[i] - pstart;
1369 if (gap > max_gap)
1370 max_gap = gap;
1371
1372 /* There cannot be two gaps in a row. */
1373 i++;
1374 }
1375 return max_gap;
1376 }
1377
1378 /*
1379 * ma_max_gap() - Get the maximum gap in a maple node (non-leaf)
1380 * @node: The maple node
1381 * @gaps: The pointer to the gaps
1382 * @mt: The maple node type
1383 * @off: Pointer to store the offset location of the gap.
1384 *
1385 * Uses the metadata data end to scan backwards across set gaps.
1386 *
1387 * Return: The maximum gap value
1388 */
1389 static inline unsigned long
ma_max_gap(struct maple_node * node,unsigned long * gaps,enum maple_type mt,unsigned char * off)1390 ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt,
1391 unsigned char *off)
1392 {
1393 unsigned char offset, i;
1394 unsigned long max_gap = 0;
1395
1396 i = offset = ma_meta_end(node, mt);
1397 do {
1398 if (gaps[i] > max_gap) {
1399 max_gap = gaps[i];
1400 offset = i;
1401 }
1402 } while (i--);
1403
1404 *off = offset;
1405 return max_gap;
1406 }
1407
1408 /*
1409 * mas_max_gap() - find the largest gap in a non-leaf node and set the slot.
1410 * @mas: The maple state.
1411 *
1412 * Return: The gap value.
1413 */
mas_max_gap(struct ma_state * mas)1414 static inline unsigned long mas_max_gap(struct ma_state *mas)
1415 {
1416 unsigned long *gaps;
1417 unsigned char offset;
1418 enum maple_type mt;
1419 struct maple_node *node;
1420
1421 mt = mte_node_type(mas->node);
1422 if (ma_is_leaf(mt))
1423 return mas_leaf_max_gap(mas);
1424
1425 node = mas_mn(mas);
1426 MAS_BUG_ON(mas, mt != maple_arange_64);
1427 offset = ma_meta_gap(node);
1428 gaps = ma_gaps(node, mt);
1429 return gaps[offset];
1430 }
1431
1432 /*
1433 * mas_parent_gap() - Set the parent gap and any gaps above, as needed
1434 * @mas: The maple state
1435 * @offset: The gap offset in the parent to set
1436 * @new: The new gap value.
1437 *
1438 * Set the parent gap then continue to set the gap upwards, using the metadata
1439 * of the parent to see if it is necessary to check the node above.
1440 */
mas_parent_gap(struct ma_state * mas,unsigned char offset,unsigned long new)1441 static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset,
1442 unsigned long new)
1443 {
1444 unsigned long meta_gap = 0;
1445 struct maple_node *pnode;
1446 struct maple_enode *penode;
1447 unsigned long *pgaps;
1448 unsigned char meta_offset;
1449 enum maple_type pmt;
1450
1451 pnode = mte_parent(mas->node);
1452 pmt = mas_parent_type(mas, mas->node);
1453 penode = mt_mk_node(pnode, pmt);
1454 pgaps = ma_gaps(pnode, pmt);
1455
1456 ascend:
1457 MAS_BUG_ON(mas, pmt != maple_arange_64);
1458 meta_offset = ma_meta_gap(pnode);
1459 meta_gap = pgaps[meta_offset];
1460
1461 pgaps[offset] = new;
1462
1463 if (meta_gap == new)
1464 return;
1465
1466 if (offset != meta_offset) {
1467 if (meta_gap > new)
1468 return;
1469
1470 ma_set_meta_gap(pnode, pmt, offset);
1471 } else if (new < meta_gap) {
1472 new = ma_max_gap(pnode, pgaps, pmt, &meta_offset);
1473 ma_set_meta_gap(pnode, pmt, meta_offset);
1474 }
1475
1476 if (ma_is_root(pnode))
1477 return;
1478
1479 /* Go to the parent node. */
1480 pnode = mte_parent(penode);
1481 pmt = mas_parent_type(mas, penode);
1482 pgaps = ma_gaps(pnode, pmt);
1483 offset = mte_parent_slot(penode);
1484 penode = mt_mk_node(pnode, pmt);
1485 goto ascend;
1486 }
1487
1488 /*
1489 * mas_update_gap() - Update a nodes gaps and propagate up if necessary.
1490 * @mas: the maple state.
1491 */
mas_update_gap(struct ma_state * mas)1492 static inline void mas_update_gap(struct ma_state *mas)
1493 {
1494 unsigned char pslot;
1495 unsigned long p_gap;
1496 unsigned long max_gap;
1497
1498 if (!mt_is_alloc(mas->tree))
1499 return;
1500
1501 if (mte_is_root(mas->node))
1502 return;
1503
1504 max_gap = mas_max_gap(mas);
1505
1506 pslot = mte_parent_slot(mas->node);
1507 p_gap = ma_gaps(mte_parent(mas->node),
1508 mas_parent_type(mas, mas->node))[pslot];
1509
1510 if (p_gap != max_gap)
1511 mas_parent_gap(mas, pslot, max_gap);
1512 }
1513
1514 /*
1515 * mas_adopt_children() - Set the parent pointer of all nodes in @parent to
1516 * @parent with the slot encoded.
1517 * @mas: the maple state (for the tree)
1518 * @parent: the maple encoded node containing the children.
1519 */
mas_adopt_children(struct ma_state * mas,struct maple_enode * parent)1520 static inline void mas_adopt_children(struct ma_state *mas,
1521 struct maple_enode *parent)
1522 {
1523 enum maple_type type = mte_node_type(parent);
1524 struct maple_node *node = mte_to_node(parent);
1525 void __rcu **slots = ma_slots(node, type);
1526 unsigned long *pivots = ma_pivots(node, type);
1527 struct maple_enode *child;
1528 unsigned char offset;
1529
1530 offset = ma_data_end(node, type, pivots, mas->max);
1531 do {
1532 child = mas_slot_locked(mas, slots, offset);
1533 mas_set_parent(mas, child, parent, offset);
1534 } while (offset--);
1535 }
1536
1537 /*
1538 * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old
1539 * node as dead.
1540 * @mas: the maple state with the new node
1541 * @old_enode: The old maple encoded node to replace.
1542 * @new_height: if we are inserting a root node, update the height of the tree
1543 */
mas_put_in_tree(struct ma_state * mas,struct maple_enode * old_enode,char new_height)1544 static inline void mas_put_in_tree(struct ma_state *mas,
1545 struct maple_enode *old_enode, char new_height)
1546 __must_hold(mas->tree->ma_lock)
1547 {
1548 unsigned char offset;
1549 void __rcu **slots;
1550
1551 if (mte_is_root(mas->node)) {
1552 mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas));
1553 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
1554 mt_set_height(mas->tree, new_height);
1555 } else {
1556
1557 offset = mte_parent_slot(mas->node);
1558 slots = ma_slots(mte_parent(mas->node),
1559 mas_parent_type(mas, mas->node));
1560 rcu_assign_pointer(slots[offset], mas->node);
1561 }
1562
1563 mte_set_node_dead(old_enode);
1564 }
1565
1566 /*
1567 * mas_replace_node() - Replace a node by putting it in the tree, marking it
1568 * dead, and freeing it.
1569 * the parent encoding to locate the maple node in the tree.
1570 * @mas: the ma_state with @mas->node pointing to the new node.
1571 * @old_enode: The old maple encoded node.
1572 * @new_height: The new height of the tree as a result of the operation
1573 */
mas_replace_node(struct ma_state * mas,struct maple_enode * old_enode,unsigned char new_height)1574 static inline void mas_replace_node(struct ma_state *mas,
1575 struct maple_enode *old_enode, unsigned char new_height)
1576 __must_hold(mas->tree->ma_lock)
1577 {
1578 mas_put_in_tree(mas, old_enode, new_height);
1579 mas_free(mas, old_enode);
1580 }
1581
1582 /*
1583 * mas_find_child() - Find a child who has the parent @mas->node.
1584 * @mas: the maple state with the parent.
1585 * @child: the maple state to store the child.
1586 */
mas_find_child(struct ma_state * mas,struct ma_state * child)1587 static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child)
1588 __must_hold(mas->tree->ma_lock)
1589 {
1590 enum maple_type mt;
1591 unsigned char offset;
1592 unsigned char end;
1593 unsigned long *pivots;
1594 struct maple_enode *entry;
1595 struct maple_node *node;
1596 void __rcu **slots;
1597
1598 mt = mte_node_type(mas->node);
1599 node = mas_mn(mas);
1600 slots = ma_slots(node, mt);
1601 pivots = ma_pivots(node, mt);
1602 end = ma_data_end(node, mt, pivots, mas->max);
1603 for (offset = mas->offset; offset <= end; offset++) {
1604 entry = mas_slot_locked(mas, slots, offset);
1605 if (mte_parent(entry) == node) {
1606 *child = *mas;
1607 mas->offset = offset + 1;
1608 child->offset = offset;
1609 mas_descend(child);
1610 child->offset = 0;
1611 return true;
1612 }
1613 }
1614 return false;
1615 }
1616
1617 /*
1618 * mab_shift_right() - Shift the data in mab right. Note, does not clean out the
1619 * old data or set b_node->b_end.
1620 * @b_node: the maple_big_node
1621 * @shift: the shift count
1622 */
mab_shift_right(struct maple_big_node * b_node,unsigned char shift)1623 static inline void mab_shift_right(struct maple_big_node *b_node,
1624 unsigned char shift)
1625 {
1626 unsigned long size = b_node->b_end * sizeof(unsigned long);
1627
1628 memmove(b_node->pivot + shift, b_node->pivot, size);
1629 memmove(b_node->slot + shift, b_node->slot, size);
1630 if (b_node->type == maple_arange_64)
1631 memmove(b_node->gap + shift, b_node->gap, size);
1632 }
1633
1634 /*
1635 * mab_middle_node() - Check if a middle node is needed (unlikely)
1636 * @b_node: the maple_big_node that contains the data.
1637 * @split: the potential split location
1638 * @slot_count: the size that can be stored in a single node being considered.
1639 *
1640 * Return: true if a middle node is required.
1641 */
mab_middle_node(struct maple_big_node * b_node,int split,unsigned char slot_count)1642 static inline bool mab_middle_node(struct maple_big_node *b_node, int split,
1643 unsigned char slot_count)
1644 {
1645 unsigned char size = b_node->b_end;
1646
1647 if (size >= 2 * slot_count)
1648 return true;
1649
1650 if (!b_node->slot[split] && (size >= 2 * slot_count - 1))
1651 return true;
1652
1653 return false;
1654 }
1655
1656 /*
1657 * mab_no_null_split() - ensure the split doesn't fall on a NULL
1658 * @b_node: the maple_big_node with the data
1659 * @split: the suggested split location
1660 * @slot_count: the number of slots in the node being considered.
1661 *
1662 * Return: the split location.
1663 */
mab_no_null_split(struct maple_big_node * b_node,unsigned char split,unsigned char slot_count)1664 static inline int mab_no_null_split(struct maple_big_node *b_node,
1665 unsigned char split, unsigned char slot_count)
1666 {
1667 if (!b_node->slot[split]) {
1668 /*
1669 * If the split is less than the max slot && the right side will
1670 * still be sufficient, then increment the split on NULL.
1671 */
1672 if ((split < slot_count - 1) &&
1673 (b_node->b_end - split) > (mt_min_slots[b_node->type]))
1674 split++;
1675 else
1676 split--;
1677 }
1678 return split;
1679 }
1680
1681 /*
1682 * mab_calc_split() - Calculate the split location and if there needs to be two
1683 * splits.
1684 * @mas: The maple state
1685 * @bn: The maple_big_node with the data
1686 * @mid_split: The second split, if required. 0 otherwise.
1687 *
1688 * Return: The first split location. The middle split is set in @mid_split.
1689 */
mab_calc_split(struct ma_state * mas,struct maple_big_node * bn,unsigned char * mid_split)1690 static inline int mab_calc_split(struct ma_state *mas,
1691 struct maple_big_node *bn, unsigned char *mid_split)
1692 {
1693 unsigned char b_end = bn->b_end;
1694 int split = b_end / 2; /* Assume equal split. */
1695 unsigned char slot_count = mt_slots[bn->type];
1696
1697 /*
1698 * To support gap tracking, all NULL entries are kept together and a node cannot
1699 * end on a NULL entry, with the exception of the left-most leaf. The
1700 * limitation means that the split of a node must be checked for this condition
1701 * and be able to put more data in one direction or the other.
1702 *
1703 * Although extremely rare, it is possible to enter what is known as the 3-way
1704 * split scenario. The 3-way split comes about by means of a store of a range
1705 * that overwrites the end and beginning of two full nodes. The result is a set
1706 * of entries that cannot be stored in 2 nodes. Sometimes, these two nodes can
1707 * also be located in different parent nodes which are also full. This can
1708 * carry upwards all the way to the root in the worst case.
1709 */
1710 if (unlikely(mab_middle_node(bn, split, slot_count))) {
1711 split = b_end / 3;
1712 *mid_split = split * 2;
1713 } else {
1714 *mid_split = 0;
1715 }
1716
1717 /* Avoid ending a node on a NULL entry */
1718 split = mab_no_null_split(bn, split, slot_count);
1719
1720 if (unlikely(*mid_split))
1721 *mid_split = mab_no_null_split(bn, *mid_split, slot_count);
1722
1723 return split;
1724 }
1725
1726 /*
1727 * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node
1728 * and set @b_node->b_end to the next free slot.
1729 * @mas: The maple state
1730 * @mas_start: The starting slot to copy
1731 * @mas_end: The end slot to copy (inclusively)
1732 * @b_node: The maple_big_node to place the data
1733 * @mab_start: The starting location in maple_big_node to store the data.
1734 */
mas_mab_cp(struct ma_state * mas,unsigned char mas_start,unsigned char mas_end,struct maple_big_node * b_node,unsigned char mab_start)1735 static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start,
1736 unsigned char mas_end, struct maple_big_node *b_node,
1737 unsigned char mab_start)
1738 {
1739 enum maple_type mt;
1740 struct maple_node *node;
1741 void __rcu **slots;
1742 unsigned long *pivots, *gaps;
1743 int i = mas_start, j = mab_start;
1744 unsigned char piv_end;
1745
1746 node = mas_mn(mas);
1747 mt = mte_node_type(mas->node);
1748 pivots = ma_pivots(node, mt);
1749 if (!i) {
1750 b_node->pivot[j] = pivots[i++];
1751 if (unlikely(i > mas_end))
1752 goto complete;
1753 j++;
1754 }
1755
1756 piv_end = min(mas_end, mt_pivots[mt]);
1757 for (; i < piv_end; i++, j++) {
1758 b_node->pivot[j] = pivots[i];
1759 if (unlikely(!b_node->pivot[j]))
1760 goto complete;
1761
1762 if (unlikely(mas->max == b_node->pivot[j]))
1763 goto complete;
1764 }
1765
1766 b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt);
1767
1768 complete:
1769 b_node->b_end = ++j;
1770 j -= mab_start;
1771 slots = ma_slots(node, mt);
1772 memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j);
1773 if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) {
1774 gaps = ma_gaps(node, mt);
1775 memcpy(b_node->gap + mab_start, gaps + mas_start,
1776 sizeof(unsigned long) * j);
1777 }
1778 }
1779
1780 /*
1781 * mas_leaf_set_meta() - Set the metadata of a leaf if possible.
1782 * @node: The maple node
1783 * @mt: The maple type
1784 * @end: The node end
1785 */
mas_leaf_set_meta(struct maple_node * node,enum maple_type mt,unsigned char end)1786 static inline void mas_leaf_set_meta(struct maple_node *node,
1787 enum maple_type mt, unsigned char end)
1788 {
1789 if (end < mt_slots[mt] - 1)
1790 ma_set_meta(node, mt, 0, end);
1791 }
1792
1793 /*
1794 * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node.
1795 * @b_node: the maple_big_node that has the data
1796 * @mab_start: the start location in @b_node.
1797 * @mab_end: The end location in @b_node (inclusively)
1798 * @mas: The maple state with the maple encoded node.
1799 */
mab_mas_cp(struct maple_big_node * b_node,unsigned char mab_start,unsigned char mab_end,struct ma_state * mas,bool new_max)1800 static inline void mab_mas_cp(struct maple_big_node *b_node,
1801 unsigned char mab_start, unsigned char mab_end,
1802 struct ma_state *mas, bool new_max)
1803 {
1804 int i, j = 0;
1805 enum maple_type mt = mte_node_type(mas->node);
1806 struct maple_node *node = mte_to_node(mas->node);
1807 void __rcu **slots = ma_slots(node, mt);
1808 unsigned long *pivots = ma_pivots(node, mt);
1809 unsigned long *gaps = NULL;
1810 unsigned char end;
1811
1812 if (mab_end - mab_start > mt_pivots[mt])
1813 mab_end--;
1814
1815 if (!pivots[mt_pivots[mt] - 1])
1816 slots[mt_pivots[mt]] = NULL;
1817
1818 i = mab_start;
1819 do {
1820 pivots[j++] = b_node->pivot[i++];
1821 } while (i <= mab_end && likely(b_node->pivot[i]));
1822
1823 memcpy(slots, b_node->slot + mab_start,
1824 sizeof(void *) * (i - mab_start));
1825
1826 if (new_max)
1827 mas->max = b_node->pivot[i - 1];
1828
1829 end = j - 1;
1830 if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) {
1831 unsigned long max_gap = 0;
1832 unsigned char offset = 0;
1833
1834 gaps = ma_gaps(node, mt);
1835 do {
1836 gaps[--j] = b_node->gap[--i];
1837 if (gaps[j] > max_gap) {
1838 offset = j;
1839 max_gap = gaps[j];
1840 }
1841 } while (j);
1842
1843 ma_set_meta(node, mt, offset, end);
1844 } else {
1845 mas_leaf_set_meta(node, mt, end);
1846 }
1847 }
1848
1849 /*
1850 * mas_store_b_node() - Store an @entry into the b_node while also copying the
1851 * data from a maple encoded node.
1852 * @wr_mas: the maple write state
1853 * @b_node: the maple_big_node to fill with data
1854 * @offset_end: the offset to end copying
1855 *
1856 * Return: The actual end of the data stored in @b_node
1857 */
mas_store_b_node(struct ma_wr_state * wr_mas,struct maple_big_node * b_node,unsigned char offset_end)1858 static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas,
1859 struct maple_big_node *b_node, unsigned char offset_end)
1860 {
1861 unsigned char slot;
1862 unsigned char b_end;
1863 /* Possible underflow of piv will wrap back to 0 before use. */
1864 unsigned long piv;
1865 struct ma_state *mas = wr_mas->mas;
1866
1867 b_node->type = wr_mas->type;
1868 b_end = 0;
1869 slot = mas->offset;
1870 if (slot) {
1871 /* Copy start data up to insert. */
1872 mas_mab_cp(mas, 0, slot - 1, b_node, 0);
1873 b_end = b_node->b_end;
1874 piv = b_node->pivot[b_end - 1];
1875 } else
1876 piv = mas->min - 1;
1877
1878 if (piv + 1 < mas->index) {
1879 /* Handle range starting after old range */
1880 b_node->slot[b_end] = wr_mas->content;
1881 if (!wr_mas->content)
1882 b_node->gap[b_end] = mas->index - 1 - piv;
1883 b_node->pivot[b_end++] = mas->index - 1;
1884 }
1885
1886 /* Store the new entry. */
1887 mas->offset = b_end;
1888 b_node->slot[b_end] = wr_mas->entry;
1889 b_node->pivot[b_end] = mas->last;
1890
1891 /* Appended. */
1892 if (mas->last >= mas->max)
1893 goto b_end;
1894
1895 /* Handle new range ending before old range ends */
1896 piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type);
1897 if (piv > mas->last) {
1898 if (offset_end != slot)
1899 wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
1900 offset_end);
1901
1902 b_node->slot[++b_end] = wr_mas->content;
1903 if (!wr_mas->content)
1904 b_node->gap[b_end] = piv - mas->last + 1;
1905 b_node->pivot[b_end] = piv;
1906 }
1907
1908 slot = offset_end + 1;
1909 if (slot > mas->end)
1910 goto b_end;
1911
1912 /* Copy end data to the end of the node. */
1913 mas_mab_cp(mas, slot, mas->end + 1, b_node, ++b_end);
1914 b_node->b_end--;
1915 return;
1916
1917 b_end:
1918 b_node->b_end = b_end;
1919 }
1920
1921 /*
1922 * mas_prev_sibling() - Find the previous node with the same parent.
1923 * @mas: the maple state
1924 *
1925 * Return: True if there is a previous sibling, false otherwise.
1926 */
mas_prev_sibling(struct ma_state * mas)1927 static inline bool mas_prev_sibling(struct ma_state *mas)
1928 {
1929 unsigned int p_slot = mte_parent_slot(mas->node);
1930
1931 /* For root node, p_slot is set to 0 by mte_parent_slot(). */
1932 if (!p_slot)
1933 return false;
1934
1935 mas_ascend(mas);
1936 mas->offset = p_slot - 1;
1937 mas_descend(mas);
1938 return true;
1939 }
1940
1941 /*
1942 * mas_next_sibling() - Find the next node with the same parent.
1943 * @mas: the maple state
1944 *
1945 * Return: true if there is a next sibling, false otherwise.
1946 */
mas_next_sibling(struct ma_state * mas)1947 static inline bool mas_next_sibling(struct ma_state *mas)
1948 {
1949 MA_STATE(parent, mas->tree, mas->index, mas->last);
1950
1951 if (mte_is_root(mas->node))
1952 return false;
1953
1954 parent = *mas;
1955 mas_ascend(&parent);
1956 parent.offset = mte_parent_slot(mas->node) + 1;
1957 if (parent.offset > mas_data_end(&parent))
1958 return false;
1959
1960 *mas = parent;
1961 mas_descend(mas);
1962 return true;
1963 }
1964
1965 /*
1966 * mas_node_or_none() - Set the enode and state.
1967 * @mas: the maple state
1968 * @enode: The encoded maple node.
1969 *
1970 * Set the node to the enode and the status.
1971 */
mas_node_or_none(struct ma_state * mas,struct maple_enode * enode)1972 static inline void mas_node_or_none(struct ma_state *mas,
1973 struct maple_enode *enode)
1974 {
1975 if (enode) {
1976 mas->node = enode;
1977 mas->status = ma_active;
1978 } else {
1979 mas->node = NULL;
1980 mas->status = ma_none;
1981 }
1982 }
1983
1984 /*
1985 * mas_wr_node_walk() - Find the correct offset for the index in the @mas.
1986 * If @mas->index cannot be found within the containing
1987 * node, we traverse to the last entry in the node.
1988 * @wr_mas: The maple write state
1989 *
1990 * Uses mas_slot_locked() and does not need to worry about dead nodes.
1991 */
mas_wr_node_walk(struct ma_wr_state * wr_mas)1992 static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas)
1993 {
1994 struct ma_state *mas = wr_mas->mas;
1995 unsigned char count, offset;
1996
1997 if (unlikely(ma_is_dense(wr_mas->type))) {
1998 wr_mas->r_max = wr_mas->r_min = mas->index;
1999 mas->offset = mas->index = mas->min;
2000 return;
2001 }
2002
2003 wr_mas->node = mas_mn(wr_mas->mas);
2004 wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type);
2005 count = mas->end = ma_data_end(wr_mas->node, wr_mas->type,
2006 wr_mas->pivots, mas->max);
2007 offset = mas->offset;
2008
2009 while (offset < count && mas->index > wr_mas->pivots[offset])
2010 offset++;
2011
2012 wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max;
2013 wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset);
2014 wr_mas->offset_end = mas->offset = offset;
2015 }
2016
2017 /*
2018 * mast_rebalance_next() - Rebalance against the next node
2019 * @mast: The maple subtree state
2020 */
mast_rebalance_next(struct maple_subtree_state * mast)2021 static inline void mast_rebalance_next(struct maple_subtree_state *mast)
2022 {
2023 unsigned char b_end = mast->bn->b_end;
2024
2025 mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node),
2026 mast->bn, b_end);
2027 mast->orig_r->last = mast->orig_r->max;
2028 }
2029
2030 /*
2031 * mast_rebalance_prev() - Rebalance against the previous node
2032 * @mast: The maple subtree state
2033 */
mast_rebalance_prev(struct maple_subtree_state * mast)2034 static inline void mast_rebalance_prev(struct maple_subtree_state *mast)
2035 {
2036 unsigned char end = mas_data_end(mast->orig_l) + 1;
2037 unsigned char b_end = mast->bn->b_end;
2038
2039 mab_shift_right(mast->bn, end);
2040 mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0);
2041 mast->l->min = mast->orig_l->min;
2042 mast->orig_l->index = mast->orig_l->min;
2043 mast->bn->b_end = end + b_end;
2044 mast->l->offset += end;
2045 }
2046
2047 /*
2048 * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring
2049 * the node to the right. Checking the nodes to the right then the left at each
2050 * level upwards until root is reached.
2051 * Data is copied into the @mast->bn.
2052 * @mast: The maple_subtree_state.
2053 */
2054 static inline
mast_spanning_rebalance(struct maple_subtree_state * mast)2055 bool mast_spanning_rebalance(struct maple_subtree_state *mast)
2056 {
2057 struct ma_state r_tmp = *mast->orig_r;
2058 struct ma_state l_tmp = *mast->orig_l;
2059 unsigned char depth = 0;
2060
2061 do {
2062 mas_ascend(mast->orig_r);
2063 mas_ascend(mast->orig_l);
2064 depth++;
2065 if (mast->orig_r->offset < mas_data_end(mast->orig_r)) {
2066 mast->orig_r->offset++;
2067 do {
2068 mas_descend(mast->orig_r);
2069 mast->orig_r->offset = 0;
2070 } while (--depth);
2071
2072 mast_rebalance_next(mast);
2073 *mast->orig_l = l_tmp;
2074 return true;
2075 } else if (mast->orig_l->offset != 0) {
2076 mast->orig_l->offset--;
2077 do {
2078 mas_descend(mast->orig_l);
2079 mast->orig_l->offset =
2080 mas_data_end(mast->orig_l);
2081 } while (--depth);
2082
2083 mast_rebalance_prev(mast);
2084 *mast->orig_r = r_tmp;
2085 return true;
2086 }
2087 } while (!mte_is_root(mast->orig_r->node));
2088
2089 *mast->orig_r = r_tmp;
2090 *mast->orig_l = l_tmp;
2091 return false;
2092 }
2093
2094 /*
2095 * mast_ascend() - Ascend the original left and right maple states.
2096 * @mast: the maple subtree state.
2097 *
2098 * Ascend the original left and right sides. Set the offsets to point to the
2099 * data already in the new tree (@mast->l and @mast->r).
2100 */
mast_ascend(struct maple_subtree_state * mast)2101 static inline void mast_ascend(struct maple_subtree_state *mast)
2102 {
2103 MA_WR_STATE(wr_mas, mast->orig_r, NULL);
2104 mas_ascend(mast->orig_l);
2105 mas_ascend(mast->orig_r);
2106
2107 mast->orig_r->offset = 0;
2108 mast->orig_r->index = mast->r->max;
2109 /* last should be larger than or equal to index */
2110 if (mast->orig_r->last < mast->orig_r->index)
2111 mast->orig_r->last = mast->orig_r->index;
2112
2113 wr_mas.type = mte_node_type(mast->orig_r->node);
2114 mas_wr_node_walk(&wr_mas);
2115 /* Set up the left side of things */
2116 mast->orig_l->offset = 0;
2117 mast->orig_l->index = mast->l->min;
2118 wr_mas.mas = mast->orig_l;
2119 wr_mas.type = mte_node_type(mast->orig_l->node);
2120 mas_wr_node_walk(&wr_mas);
2121
2122 mast->bn->type = wr_mas.type;
2123 }
2124
2125 /*
2126 * mas_new_ma_node() - Create and return a new maple node. Helper function.
2127 * @mas: the maple state with the allocations.
2128 * @b_node: the maple_big_node with the type encoding.
2129 *
2130 * Use the node type from the maple_big_node to allocate a new node from the
2131 * ma_state. This function exists mainly for code readability.
2132 *
2133 * Return: A new maple encoded node
2134 */
2135 static inline struct maple_enode
mas_new_ma_node(struct ma_state * mas,struct maple_big_node * b_node)2136 *mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node)
2137 {
2138 return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type);
2139 }
2140
2141 /*
2142 * mas_mab_to_node() - Set up right and middle nodes
2143 *
2144 * @mas: the maple state that contains the allocations.
2145 * @b_node: the node which contains the data.
2146 * @left: The pointer which will have the left node
2147 * @right: The pointer which may have the right node
2148 * @middle: the pointer which may have the middle node (rare)
2149 * @mid_split: the split location for the middle node
2150 *
2151 * Return: the split of left.
2152 */
mas_mab_to_node(struct ma_state * mas,struct maple_big_node * b_node,struct maple_enode ** left,struct maple_enode ** right,struct maple_enode ** middle,unsigned char * mid_split)2153 static inline unsigned char mas_mab_to_node(struct ma_state *mas,
2154 struct maple_big_node *b_node, struct maple_enode **left,
2155 struct maple_enode **right, struct maple_enode **middle,
2156 unsigned char *mid_split)
2157 {
2158 unsigned char split = 0;
2159 unsigned char slot_count = mt_slots[b_node->type];
2160
2161 *left = mas_new_ma_node(mas, b_node);
2162 *right = NULL;
2163 *middle = NULL;
2164 *mid_split = 0;
2165
2166 if (b_node->b_end < slot_count) {
2167 split = b_node->b_end;
2168 } else {
2169 split = mab_calc_split(mas, b_node, mid_split);
2170 *right = mas_new_ma_node(mas, b_node);
2171 }
2172
2173 if (*mid_split)
2174 *middle = mas_new_ma_node(mas, b_node);
2175
2176 return split;
2177
2178 }
2179
2180 /*
2181 * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end
2182 * pointer.
2183 * @b_node: the big node to add the entry
2184 * @mas: the maple state to get the pivot (mas->max)
2185 * @entry: the entry to add, if NULL nothing happens.
2186 */
mab_set_b_end(struct maple_big_node * b_node,struct ma_state * mas,void * entry)2187 static inline void mab_set_b_end(struct maple_big_node *b_node,
2188 struct ma_state *mas,
2189 void *entry)
2190 {
2191 if (!entry)
2192 return;
2193
2194 b_node->slot[b_node->b_end] = entry;
2195 if (mt_is_alloc(mas->tree))
2196 b_node->gap[b_node->b_end] = mas_max_gap(mas);
2197 b_node->pivot[b_node->b_end++] = mas->max;
2198 }
2199
2200 /*
2201 * mas_set_split_parent() - combine_then_separate helper function. Sets the parent
2202 * of @mas->node to either @left or @right, depending on @slot and @split
2203 *
2204 * @mas: the maple state with the node that needs a parent
2205 * @left: possible parent 1
2206 * @right: possible parent 2
2207 * @slot: the slot the mas->node was placed
2208 * @split: the split location between @left and @right
2209 */
mas_set_split_parent(struct ma_state * mas,struct maple_enode * left,struct maple_enode * right,unsigned char * slot,unsigned char split)2210 static inline void mas_set_split_parent(struct ma_state *mas,
2211 struct maple_enode *left,
2212 struct maple_enode *right,
2213 unsigned char *slot, unsigned char split)
2214 {
2215 if (mas_is_none(mas))
2216 return;
2217
2218 if ((*slot) <= split)
2219 mas_set_parent(mas, mas->node, left, *slot);
2220 else if (right)
2221 mas_set_parent(mas, mas->node, right, (*slot) - split - 1);
2222
2223 (*slot)++;
2224 }
2225
2226 /*
2227 * mte_mid_split_check() - Check if the next node passes the mid-split
2228 * @l: Pointer to left encoded maple node.
2229 * @m: Pointer to middle encoded maple node.
2230 * @r: Pointer to right encoded maple node.
2231 * @slot: The offset
2232 * @split: The split location.
2233 * @mid_split: The middle split.
2234 */
mte_mid_split_check(struct maple_enode ** l,struct maple_enode ** r,struct maple_enode * right,unsigned char slot,unsigned char * split,unsigned char mid_split)2235 static inline void mte_mid_split_check(struct maple_enode **l,
2236 struct maple_enode **r,
2237 struct maple_enode *right,
2238 unsigned char slot,
2239 unsigned char *split,
2240 unsigned char mid_split)
2241 {
2242 if (*r == right)
2243 return;
2244
2245 if (slot < mid_split)
2246 return;
2247
2248 *l = *r;
2249 *r = right;
2250 *split = mid_split;
2251 }
2252
2253 /*
2254 * mast_set_split_parents() - Helper function to set three nodes parents. Slot
2255 * is taken from @mast->l.
2256 * @mast: the maple subtree state
2257 * @left: the left node
2258 * @right: the right node
2259 * @split: the split location.
2260 */
mast_set_split_parents(struct maple_subtree_state * mast,struct maple_enode * left,struct maple_enode * middle,struct maple_enode * right,unsigned char split,unsigned char mid_split)2261 static inline void mast_set_split_parents(struct maple_subtree_state *mast,
2262 struct maple_enode *left,
2263 struct maple_enode *middle,
2264 struct maple_enode *right,
2265 unsigned char split,
2266 unsigned char mid_split)
2267 {
2268 unsigned char slot;
2269 struct maple_enode *l = left;
2270 struct maple_enode *r = right;
2271
2272 if (mas_is_none(mast->l))
2273 return;
2274
2275 if (middle)
2276 r = middle;
2277
2278 slot = mast->l->offset;
2279
2280 mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
2281 mas_set_split_parent(mast->l, l, r, &slot, split);
2282
2283 mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
2284 mas_set_split_parent(mast->m, l, r, &slot, split);
2285
2286 mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
2287 mas_set_split_parent(mast->r, l, r, &slot, split);
2288 }
2289
2290 /*
2291 * mas_topiary_node() - Dispose of a single node
2292 * @mas: The maple state for pushing nodes
2293 * @in_rcu: If the tree is in rcu mode
2294 *
2295 * The node will either be RCU freed or pushed back on the maple state.
2296 */
mas_topiary_node(struct ma_state * mas,struct ma_state * tmp_mas,bool in_rcu)2297 static inline void mas_topiary_node(struct ma_state *mas,
2298 struct ma_state *tmp_mas, bool in_rcu)
2299 {
2300 struct maple_node *tmp;
2301 struct maple_enode *enode;
2302
2303 if (mas_is_none(tmp_mas))
2304 return;
2305
2306 enode = tmp_mas->node;
2307 tmp = mte_to_node(enode);
2308 mte_set_node_dead(enode);
2309 ma_free_rcu(tmp);
2310 }
2311
2312 /*
2313 * mas_topiary_replace() - Replace the data with new data, then repair the
2314 * parent links within the new tree. Iterate over the dead sub-tree and collect
2315 * the dead subtrees and topiary the nodes that are no longer of use.
2316 *
2317 * The new tree will have up to three children with the correct parent. Keep
2318 * track of the new entries as they need to be followed to find the next level
2319 * of new entries.
2320 *
2321 * The old tree will have up to three children with the old parent. Keep track
2322 * of the old entries as they may have more nodes below replaced. Nodes within
2323 * [index, last] are dead subtrees, others need to be freed and followed.
2324 *
2325 * @mas: The maple state pointing at the new data
2326 * @old_enode: The maple encoded node being replaced
2327 * @new_height: The new height of the tree as a result of the operation
2328 *
2329 */
mas_topiary_replace(struct ma_state * mas,struct maple_enode * old_enode,unsigned char new_height)2330 static inline void mas_topiary_replace(struct ma_state *mas,
2331 struct maple_enode *old_enode, unsigned char new_height)
2332 {
2333 struct ma_state tmp[3], tmp_next[3];
2334 MA_TOPIARY(subtrees, mas->tree);
2335 bool in_rcu;
2336 int i, n;
2337
2338 /* Place data in tree & then mark node as old */
2339 mas_put_in_tree(mas, old_enode, new_height);
2340
2341 /* Update the parent pointers in the tree */
2342 tmp[0] = *mas;
2343 tmp[0].offset = 0;
2344 tmp[1].status = ma_none;
2345 tmp[2].status = ma_none;
2346 while (!mte_is_leaf(tmp[0].node)) {
2347 n = 0;
2348 for (i = 0; i < 3; i++) {
2349 if (mas_is_none(&tmp[i]))
2350 continue;
2351
2352 while (n < 3) {
2353 if (!mas_find_child(&tmp[i], &tmp_next[n]))
2354 break;
2355 n++;
2356 }
2357
2358 mas_adopt_children(&tmp[i], tmp[i].node);
2359 }
2360
2361 if (MAS_WARN_ON(mas, n == 0))
2362 break;
2363
2364 while (n < 3)
2365 tmp_next[n++].status = ma_none;
2366
2367 for (i = 0; i < 3; i++)
2368 tmp[i] = tmp_next[i];
2369 }
2370
2371 /* Collect the old nodes that need to be discarded */
2372 if (mte_is_leaf(old_enode))
2373 return mas_free(mas, old_enode);
2374
2375 tmp[0] = *mas;
2376 tmp[0].offset = 0;
2377 tmp[0].node = old_enode;
2378 tmp[1].status = ma_none;
2379 tmp[2].status = ma_none;
2380 in_rcu = mt_in_rcu(mas->tree);
2381 do {
2382 n = 0;
2383 for (i = 0; i < 3; i++) {
2384 if (mas_is_none(&tmp[i]))
2385 continue;
2386
2387 while (n < 3) {
2388 if (!mas_find_child(&tmp[i], &tmp_next[n]))
2389 break;
2390
2391 if ((tmp_next[n].min >= tmp_next->index) &&
2392 (tmp_next[n].max <= tmp_next->last)) {
2393 mat_add(&subtrees, tmp_next[n].node);
2394 tmp_next[n].status = ma_none;
2395 } else {
2396 n++;
2397 }
2398 }
2399 }
2400
2401 if (MAS_WARN_ON(mas, n == 0))
2402 break;
2403
2404 while (n < 3)
2405 tmp_next[n++].status = ma_none;
2406
2407 for (i = 0; i < 3; i++) {
2408 mas_topiary_node(mas, &tmp[i], in_rcu);
2409 tmp[i] = tmp_next[i];
2410 }
2411 } while (!mte_is_leaf(tmp[0].node));
2412
2413 for (i = 0; i < 3; i++)
2414 mas_topiary_node(mas, &tmp[i], in_rcu);
2415
2416 mas_mat_destroy(mas, &subtrees);
2417 }
2418
2419 /*
2420 * mas_wmb_replace() - Write memory barrier and replace
2421 * @mas: The maple state
2422 * @old_enode: The old maple encoded node that is being replaced.
2423 * @new_height: The new height of the tree as a result of the operation
2424 *
2425 * Updates gap as necessary.
2426 */
mas_wmb_replace(struct ma_state * mas,struct maple_enode * old_enode,unsigned char new_height)2427 static inline void mas_wmb_replace(struct ma_state *mas,
2428 struct maple_enode *old_enode, unsigned char new_height)
2429 {
2430 /* Insert the new data in the tree */
2431 mas_topiary_replace(mas, old_enode, new_height);
2432
2433 if (mte_is_leaf(mas->node))
2434 return;
2435
2436 mas_update_gap(mas);
2437 }
2438
2439 /*
2440 * mast_cp_to_nodes() - Copy data out to nodes.
2441 * @mast: The maple subtree state
2442 * @left: The left encoded maple node
2443 * @middle: The middle encoded maple node
2444 * @right: The right encoded maple node
2445 * @split: The location to split between left and (middle ? middle : right)
2446 * @mid_split: The location to split between middle and right.
2447 */
mast_cp_to_nodes(struct maple_subtree_state * mast,struct maple_enode * left,struct maple_enode * middle,struct maple_enode * right,unsigned char split,unsigned char mid_split)2448 static inline void mast_cp_to_nodes(struct maple_subtree_state *mast,
2449 struct maple_enode *left, struct maple_enode *middle,
2450 struct maple_enode *right, unsigned char split, unsigned char mid_split)
2451 {
2452 bool new_lmax = true;
2453
2454 mas_node_or_none(mast->l, left);
2455 mas_node_or_none(mast->m, middle);
2456 mas_node_or_none(mast->r, right);
2457
2458 mast->l->min = mast->orig_l->min;
2459 if (split == mast->bn->b_end) {
2460 mast->l->max = mast->orig_r->max;
2461 new_lmax = false;
2462 }
2463
2464 mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax);
2465
2466 if (middle) {
2467 mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true);
2468 mast->m->min = mast->bn->pivot[split] + 1;
2469 split = mid_split;
2470 }
2471
2472 mast->r->max = mast->orig_r->max;
2473 if (right) {
2474 mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false);
2475 mast->r->min = mast->bn->pivot[split] + 1;
2476 }
2477 }
2478
2479 /*
2480 * mast_combine_cp_left - Copy in the original left side of the tree into the
2481 * combined data set in the maple subtree state big node.
2482 * @mast: The maple subtree state
2483 */
mast_combine_cp_left(struct maple_subtree_state * mast)2484 static inline void mast_combine_cp_left(struct maple_subtree_state *mast)
2485 {
2486 unsigned char l_slot = mast->orig_l->offset;
2487
2488 if (!l_slot)
2489 return;
2490
2491 mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0);
2492 }
2493
2494 /*
2495 * mast_combine_cp_right: Copy in the original right side of the tree into the
2496 * combined data set in the maple subtree state big node.
2497 * @mast: The maple subtree state
2498 */
mast_combine_cp_right(struct maple_subtree_state * mast)2499 static inline void mast_combine_cp_right(struct maple_subtree_state *mast)
2500 {
2501 if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max)
2502 return;
2503
2504 mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1,
2505 mt_slot_count(mast->orig_r->node), mast->bn,
2506 mast->bn->b_end);
2507 mast->orig_r->last = mast->orig_r->max;
2508 }
2509
2510 /*
2511 * mast_sufficient: Check if the maple subtree state has enough data in the big
2512 * node to create at least one sufficient node
2513 * @mast: the maple subtree state
2514 */
mast_sufficient(struct maple_subtree_state * mast)2515 static inline bool mast_sufficient(struct maple_subtree_state *mast)
2516 {
2517 if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node))
2518 return true;
2519
2520 return false;
2521 }
2522
2523 /*
2524 * mast_overflow: Check if there is too much data in the subtree state for a
2525 * single node.
2526 * @mast: The maple subtree state
2527 */
mast_overflow(struct maple_subtree_state * mast)2528 static inline bool mast_overflow(struct maple_subtree_state *mast)
2529 {
2530 if (mast->bn->b_end > mt_slot_count(mast->orig_l->node))
2531 return true;
2532
2533 return false;
2534 }
2535
mtree_range_walk(struct ma_state * mas)2536 static inline void *mtree_range_walk(struct ma_state *mas)
2537 {
2538 unsigned long *pivots;
2539 unsigned char offset;
2540 struct maple_node *node;
2541 struct maple_enode *next, *last;
2542 enum maple_type type;
2543 void __rcu **slots;
2544 unsigned char end;
2545 unsigned long max, min;
2546 unsigned long prev_max, prev_min;
2547
2548 next = mas->node;
2549 min = mas->min;
2550 max = mas->max;
2551 do {
2552 last = next;
2553 node = mte_to_node(next);
2554 type = mte_node_type(next);
2555 pivots = ma_pivots(node, type);
2556 end = ma_data_end(node, type, pivots, max);
2557 prev_min = min;
2558 prev_max = max;
2559 if (pivots[0] >= mas->index) {
2560 offset = 0;
2561 max = pivots[0];
2562 goto next;
2563 }
2564
2565 offset = 1;
2566 while (offset < end) {
2567 if (pivots[offset] >= mas->index) {
2568 max = pivots[offset];
2569 break;
2570 }
2571 offset++;
2572 }
2573
2574 min = pivots[offset - 1] + 1;
2575 next:
2576 slots = ma_slots(node, type);
2577 next = mt_slot(mas->tree, slots, offset);
2578 if (unlikely(ma_dead_node(node)))
2579 goto dead_node;
2580 } while (!ma_is_leaf(type));
2581
2582 mas->end = end;
2583 mas->offset = offset;
2584 mas->index = min;
2585 mas->last = max;
2586 mas->min = prev_min;
2587 mas->max = prev_max;
2588 mas->node = last;
2589 return (void *)next;
2590
2591 dead_node:
2592 mas_reset(mas);
2593 return NULL;
2594 }
2595
2596 /*
2597 * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers.
2598 * @mas: The starting maple state
2599 * @mast: The maple_subtree_state, keeps track of 4 maple states.
2600 * @count: The estimated count of iterations needed.
2601 *
2602 * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root
2603 * is hit. First @b_node is split into two entries which are inserted into the
2604 * next iteration of the loop. @b_node is returned populated with the final
2605 * iteration. @mas is used to obtain allocations. orig_l_mas keeps track of the
2606 * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last
2607 * to account of what has been copied into the new sub-tree. The update of
2608 * orig_l_mas->last is used in mas_consume to find the slots that will need to
2609 * be either freed or destroyed. orig_l_mas->depth keeps track of the height of
2610 * the new sub-tree in case the sub-tree becomes the full tree.
2611 */
mas_spanning_rebalance(struct ma_state * mas,struct maple_subtree_state * mast,unsigned char count)2612 static void mas_spanning_rebalance(struct ma_state *mas,
2613 struct maple_subtree_state *mast, unsigned char count)
2614 {
2615 unsigned char split, mid_split;
2616 unsigned char slot = 0;
2617 unsigned char new_height = 0; /* used if node is a new root */
2618 struct maple_enode *left = NULL, *middle = NULL, *right = NULL;
2619 struct maple_enode *old_enode;
2620
2621 MA_STATE(l_mas, mas->tree, mas->index, mas->index);
2622 MA_STATE(r_mas, mas->tree, mas->index, mas->last);
2623 MA_STATE(m_mas, mas->tree, mas->index, mas->index);
2624
2625 /*
2626 * The tree needs to be rebalanced and leaves need to be kept at the same level.
2627 * Rebalancing is done by use of the ``struct maple_topiary``.
2628 */
2629 mast->l = &l_mas;
2630 mast->m = &m_mas;
2631 mast->r = &r_mas;
2632 l_mas.status = r_mas.status = m_mas.status = ma_none;
2633
2634 /* Check if this is not root and has sufficient data. */
2635 if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) &&
2636 unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type]))
2637 mast_spanning_rebalance(mast);
2638
2639 /*
2640 * Each level of the tree is examined and balanced, pushing data to the left or
2641 * right, or rebalancing against left or right nodes is employed to avoid
2642 * rippling up the tree to limit the amount of churn. Once a new sub-section of
2643 * the tree is created, there may be a mix of new and old nodes. The old nodes
2644 * will have the incorrect parent pointers and currently be in two trees: the
2645 * original tree and the partially new tree. To remedy the parent pointers in
2646 * the old tree, the new data is swapped into the active tree and a walk down
2647 * the tree is performed and the parent pointers are updated.
2648 * See mas_topiary_replace() for more information.
2649 */
2650 while (count--) {
2651 mast->bn->b_end--;
2652 mast->bn->type = mte_node_type(mast->orig_l->node);
2653 split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle,
2654 &mid_split);
2655 mast_set_split_parents(mast, left, middle, right, split,
2656 mid_split);
2657 mast_cp_to_nodes(mast, left, middle, right, split, mid_split);
2658 new_height++;
2659
2660 /*
2661 * Copy data from next level in the tree to mast->bn from next
2662 * iteration
2663 */
2664 memset(mast->bn, 0, sizeof(struct maple_big_node));
2665 mast->bn->type = mte_node_type(left);
2666
2667 /* Root already stored in l->node. */
2668 if (mas_is_root_limits(mast->l))
2669 goto new_root;
2670
2671 mast_ascend(mast);
2672 mast_combine_cp_left(mast);
2673 l_mas.offset = mast->bn->b_end;
2674 mab_set_b_end(mast->bn, &l_mas, left);
2675 mab_set_b_end(mast->bn, &m_mas, middle);
2676 mab_set_b_end(mast->bn, &r_mas, right);
2677
2678 /* Copy anything necessary out of the right node. */
2679 mast_combine_cp_right(mast);
2680 mast->orig_l->last = mast->orig_l->max;
2681
2682 if (mast_sufficient(mast)) {
2683 if (mast_overflow(mast))
2684 continue;
2685
2686 if (mast->orig_l->node == mast->orig_r->node) {
2687 /*
2688 * The data in b_node should be stored in one
2689 * node and in the tree
2690 */
2691 slot = mast->l->offset;
2692 break;
2693 }
2694
2695 continue;
2696 }
2697
2698 /* May be a new root stored in mast->bn */
2699 if (mas_is_root_limits(mast->orig_l))
2700 break;
2701
2702 mast_spanning_rebalance(mast);
2703
2704 /* rebalancing from other nodes may require another loop. */
2705 if (!count)
2706 count++;
2707 }
2708
2709 l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)),
2710 mte_node_type(mast->orig_l->node));
2711
2712 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true);
2713 new_height++;
2714 mas_set_parent(mas, left, l_mas.node, slot);
2715 if (middle)
2716 mas_set_parent(mas, middle, l_mas.node, ++slot);
2717
2718 if (right)
2719 mas_set_parent(mas, right, l_mas.node, ++slot);
2720
2721 if (mas_is_root_limits(mast->l)) {
2722 new_root:
2723 mas_mn(mast->l)->parent = ma_parent_ptr(mas_tree_parent(mas));
2724 while (!mte_is_root(mast->orig_l->node))
2725 mast_ascend(mast);
2726 } else {
2727 mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent;
2728 }
2729
2730 old_enode = mast->orig_l->node;
2731 mas->depth = l_mas.depth;
2732 mas->node = l_mas.node;
2733 mas->min = l_mas.min;
2734 mas->max = l_mas.max;
2735 mas->offset = l_mas.offset;
2736 mas_wmb_replace(mas, old_enode, new_height);
2737 mtree_range_walk(mas);
2738 return;
2739 }
2740
2741 /*
2742 * mas_rebalance() - Rebalance a given node.
2743 * @mas: The maple state
2744 * @b_node: The big maple node.
2745 *
2746 * Rebalance two nodes into a single node or two new nodes that are sufficient.
2747 * Continue upwards until tree is sufficient.
2748 */
mas_rebalance(struct ma_state * mas,struct maple_big_node * b_node)2749 static inline void mas_rebalance(struct ma_state *mas,
2750 struct maple_big_node *b_node)
2751 {
2752 char empty_count = mas_mt_height(mas);
2753 struct maple_subtree_state mast;
2754 unsigned char shift, b_end = ++b_node->b_end;
2755
2756 MA_STATE(l_mas, mas->tree, mas->index, mas->last);
2757 MA_STATE(r_mas, mas->tree, mas->index, mas->last);
2758
2759 trace_ma_op(__func__, mas);
2760
2761 /*
2762 * Rebalancing occurs if a node is insufficient. Data is rebalanced
2763 * against the node to the right if it exists, otherwise the node to the
2764 * left of this node is rebalanced against this node. If rebalancing
2765 * causes just one node to be produced instead of two, then the parent
2766 * is also examined and rebalanced if it is insufficient. Every level
2767 * tries to combine the data in the same way. If one node contains the
2768 * entire range of the tree, then that node is used as a new root node.
2769 */
2770
2771 mast.orig_l = &l_mas;
2772 mast.orig_r = &r_mas;
2773 mast.bn = b_node;
2774 mast.bn->type = mte_node_type(mas->node);
2775
2776 l_mas = r_mas = *mas;
2777
2778 if (mas_next_sibling(&r_mas)) {
2779 mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end);
2780 r_mas.last = r_mas.index = r_mas.max;
2781 } else {
2782 mas_prev_sibling(&l_mas);
2783 shift = mas_data_end(&l_mas) + 1;
2784 mab_shift_right(b_node, shift);
2785 mas->offset += shift;
2786 mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0);
2787 b_node->b_end = shift + b_end;
2788 l_mas.index = l_mas.last = l_mas.min;
2789 }
2790
2791 return mas_spanning_rebalance(mas, &mast, empty_count);
2792 }
2793
2794 /*
2795 * mas_split_final_node() - Split the final node in a subtree operation.
2796 * @mast: the maple subtree state
2797 * @mas: The maple state
2798 */
mas_split_final_node(struct maple_subtree_state * mast,struct ma_state * mas)2799 static inline void mas_split_final_node(struct maple_subtree_state *mast,
2800 struct ma_state *mas)
2801 {
2802 struct maple_enode *ancestor;
2803
2804 if (mte_is_root(mas->node)) {
2805 if (mt_is_alloc(mas->tree))
2806 mast->bn->type = maple_arange_64;
2807 else
2808 mast->bn->type = maple_range_64;
2809 }
2810 /*
2811 * Only a single node is used here, could be root.
2812 * The Big_node data should just fit in a single node.
2813 */
2814 ancestor = mas_new_ma_node(mas, mast->bn);
2815 mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset);
2816 mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset);
2817 mte_to_node(ancestor)->parent = mas_mn(mas)->parent;
2818
2819 mast->l->node = ancestor;
2820 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true);
2821 mas->offset = mast->bn->b_end - 1;
2822 }
2823
2824 /*
2825 * mast_fill_bnode() - Copy data into the big node in the subtree state
2826 * @mast: The maple subtree state
2827 * @mas: the maple state
2828 * @skip: The number of entries to skip for new nodes insertion.
2829 */
mast_fill_bnode(struct maple_subtree_state * mast,struct ma_state * mas,unsigned char skip)2830 static inline void mast_fill_bnode(struct maple_subtree_state *mast,
2831 struct ma_state *mas,
2832 unsigned char skip)
2833 {
2834 bool cp = true;
2835 unsigned char split;
2836
2837 memset(mast->bn, 0, sizeof(struct maple_big_node));
2838
2839 if (mte_is_root(mas->node)) {
2840 cp = false;
2841 } else {
2842 mas_ascend(mas);
2843 mas->offset = mte_parent_slot(mas->node);
2844 }
2845
2846 if (cp && mast->l->offset)
2847 mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0);
2848
2849 split = mast->bn->b_end;
2850 mab_set_b_end(mast->bn, mast->l, mast->l->node);
2851 mast->r->offset = mast->bn->b_end;
2852 mab_set_b_end(mast->bn, mast->r, mast->r->node);
2853 if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max)
2854 cp = false;
2855
2856 if (cp)
2857 mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1,
2858 mast->bn, mast->bn->b_end);
2859
2860 mast->bn->b_end--;
2861 mast->bn->type = mte_node_type(mas->node);
2862 }
2863
2864 /*
2865 * mast_split_data() - Split the data in the subtree state big node into regular
2866 * nodes.
2867 * @mast: The maple subtree state
2868 * @mas: The maple state
2869 * @split: The location to split the big node
2870 */
mast_split_data(struct maple_subtree_state * mast,struct ma_state * mas,unsigned char split)2871 static inline void mast_split_data(struct maple_subtree_state *mast,
2872 struct ma_state *mas, unsigned char split)
2873 {
2874 unsigned char p_slot;
2875
2876 mab_mas_cp(mast->bn, 0, split, mast->l, true);
2877 mte_set_pivot(mast->r->node, 0, mast->r->max);
2878 mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false);
2879 mast->l->offset = mte_parent_slot(mas->node);
2880 mast->l->max = mast->bn->pivot[split];
2881 mast->r->min = mast->l->max + 1;
2882 if (mte_is_leaf(mas->node))
2883 return;
2884
2885 p_slot = mast->orig_l->offset;
2886 mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node,
2887 &p_slot, split);
2888 mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node,
2889 &p_slot, split);
2890 }
2891
2892 /*
2893 * mas_push_data() - Instead of splitting a node, it is beneficial to push the
2894 * data to the right or left node if there is room.
2895 * @mas: The maple state
2896 * @mast: The maple subtree state
2897 * @left: Push left or not.
2898 *
2899 * Keeping the height of the tree low means faster lookups.
2900 *
2901 * Return: True if pushed, false otherwise.
2902 */
mas_push_data(struct ma_state * mas,struct maple_subtree_state * mast,bool left)2903 static inline bool mas_push_data(struct ma_state *mas,
2904 struct maple_subtree_state *mast, bool left)
2905 {
2906 unsigned char slot_total = mast->bn->b_end;
2907 unsigned char end, space, split;
2908
2909 MA_STATE(tmp_mas, mas->tree, mas->index, mas->last);
2910 tmp_mas = *mas;
2911 tmp_mas.depth = mast->l->depth;
2912
2913 if (left && !mas_prev_sibling(&tmp_mas))
2914 return false;
2915 else if (!left && !mas_next_sibling(&tmp_mas))
2916 return false;
2917
2918 end = mas_data_end(&tmp_mas);
2919 slot_total += end;
2920 space = 2 * mt_slot_count(mas->node) - 2;
2921 /* -2 instead of -1 to ensure there isn't a triple split */
2922 if (ma_is_leaf(mast->bn->type))
2923 space--;
2924
2925 if (mas->max == ULONG_MAX)
2926 space--;
2927
2928 if (slot_total >= space)
2929 return false;
2930
2931 /* Get the data; Fill mast->bn */
2932 mast->bn->b_end++;
2933 if (left) {
2934 mab_shift_right(mast->bn, end + 1);
2935 mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0);
2936 mast->bn->b_end = slot_total + 1;
2937 } else {
2938 mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end);
2939 }
2940
2941 /* Configure mast for splitting of mast->bn */
2942 split = mt_slots[mast->bn->type] - 2;
2943 if (left) {
2944 /* Switch mas to prev node */
2945 *mas = tmp_mas;
2946 /* Start using mast->l for the left side. */
2947 tmp_mas.node = mast->l->node;
2948 *mast->l = tmp_mas;
2949 } else {
2950 tmp_mas.node = mast->r->node;
2951 *mast->r = tmp_mas;
2952 split = slot_total - split;
2953 }
2954 split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]);
2955 /* Update parent slot for split calculation. */
2956 if (left)
2957 mast->orig_l->offset += end + 1;
2958
2959 mast_split_data(mast, mas, split);
2960 mast_fill_bnode(mast, mas, 2);
2961 mas_split_final_node(mast, mas);
2962 return true;
2963 }
2964
2965 /*
2966 * mas_split() - Split data that is too big for one node into two.
2967 * @mas: The maple state
2968 * @b_node: The maple big node
2969 */
mas_split(struct ma_state * mas,struct maple_big_node * b_node)2970 static void mas_split(struct ma_state *mas, struct maple_big_node *b_node)
2971 {
2972 struct maple_subtree_state mast;
2973 int height = 0;
2974 unsigned int orig_height = mas_mt_height(mas);
2975 unsigned char mid_split, split = 0;
2976 struct maple_enode *old;
2977
2978 /*
2979 * Splitting is handled differently from any other B-tree; the Maple
2980 * Tree splits upwards. Splitting up means that the split operation
2981 * occurs when the walk of the tree hits the leaves and not on the way
2982 * down. The reason for splitting up is that it is impossible to know
2983 * how much space will be needed until the leaf is (or leaves are)
2984 * reached. Since overwriting data is allowed and a range could
2985 * overwrite more than one range or result in changing one entry into 3
2986 * entries, it is impossible to know if a split is required until the
2987 * data is examined.
2988 *
2989 * Splitting is a balancing act between keeping allocations to a minimum
2990 * and avoiding a 'jitter' event where a tree is expanded to make room
2991 * for an entry followed by a contraction when the entry is removed. To
2992 * accomplish the balance, there are empty slots remaining in both left
2993 * and right nodes after a split.
2994 */
2995 MA_STATE(l_mas, mas->tree, mas->index, mas->last);
2996 MA_STATE(r_mas, mas->tree, mas->index, mas->last);
2997 MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last);
2998 MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last);
2999
3000 trace_ma_op(__func__, mas);
3001
3002 mast.l = &l_mas;
3003 mast.r = &r_mas;
3004 mast.orig_l = &prev_l_mas;
3005 mast.orig_r = &prev_r_mas;
3006 mast.bn = b_node;
3007
3008 while (height++ <= orig_height) {
3009 if (mt_slots[b_node->type] > b_node->b_end) {
3010 mas_split_final_node(&mast, mas);
3011 break;
3012 }
3013
3014 l_mas = r_mas = *mas;
3015 l_mas.node = mas_new_ma_node(mas, b_node);
3016 r_mas.node = mas_new_ma_node(mas, b_node);
3017 /*
3018 * Another way that 'jitter' is avoided is to terminate a split up early if the
3019 * left or right node has space to spare. This is referred to as "pushing left"
3020 * or "pushing right" and is similar to the B* tree, except the nodes left or
3021 * right can rarely be reused due to RCU, but the ripple upwards is halted which
3022 * is a significant savings.
3023 */
3024 /* Try to push left. */
3025 if (mas_push_data(mas, &mast, true)) {
3026 height++;
3027 break;
3028 }
3029 /* Try to push right. */
3030 if (mas_push_data(mas, &mast, false)) {
3031 height++;
3032 break;
3033 }
3034
3035 split = mab_calc_split(mas, b_node, &mid_split);
3036 mast_split_data(&mast, mas, split);
3037 /*
3038 * Usually correct, mab_mas_cp in the above call overwrites
3039 * r->max.
3040 */
3041 mast.r->max = mas->max;
3042 mast_fill_bnode(&mast, mas, 1);
3043 prev_l_mas = *mast.l;
3044 prev_r_mas = *mast.r;
3045 }
3046
3047 /* Set the original node as dead */
3048 old = mas->node;
3049 mas->node = l_mas.node;
3050 mas_wmb_replace(mas, old, height);
3051 mtree_range_walk(mas);
3052 return;
3053 }
3054
3055 /*
3056 * mas_commit_b_node() - Commit the big node into the tree.
3057 * @wr_mas: The maple write state
3058 * @b_node: The maple big node
3059 */
mas_commit_b_node(struct ma_wr_state * wr_mas,struct maple_big_node * b_node)3060 static noinline_for_kasan void mas_commit_b_node(struct ma_wr_state *wr_mas,
3061 struct maple_big_node *b_node)
3062 {
3063 enum store_type type = wr_mas->mas->store_type;
3064
3065 WARN_ON_ONCE(type != wr_rebalance && type != wr_split_store);
3066
3067 if (type == wr_rebalance)
3068 return mas_rebalance(wr_mas->mas, b_node);
3069
3070 return mas_split(wr_mas->mas, b_node);
3071 }
3072
3073 /*
3074 * mas_root_expand() - Expand a root to a node
3075 * @mas: The maple state
3076 * @entry: The entry to store into the tree
3077 */
mas_root_expand(struct ma_state * mas,void * entry)3078 static inline void mas_root_expand(struct ma_state *mas, void *entry)
3079 {
3080 void *contents = mas_root_locked(mas);
3081 enum maple_type type = maple_leaf_64;
3082 struct maple_node *node;
3083 void __rcu **slots;
3084 unsigned long *pivots;
3085 int slot = 0;
3086
3087 node = mas_pop_node(mas);
3088 pivots = ma_pivots(node, type);
3089 slots = ma_slots(node, type);
3090 node->parent = ma_parent_ptr(mas_tree_parent(mas));
3091 mas->node = mt_mk_node(node, type);
3092 mas->status = ma_active;
3093
3094 if (mas->index) {
3095 if (contents) {
3096 rcu_assign_pointer(slots[slot], contents);
3097 if (likely(mas->index > 1))
3098 slot++;
3099 }
3100 pivots[slot++] = mas->index - 1;
3101 }
3102
3103 rcu_assign_pointer(slots[slot], entry);
3104 mas->offset = slot;
3105 pivots[slot] = mas->last;
3106 if (mas->last != ULONG_MAX)
3107 pivots[++slot] = ULONG_MAX;
3108
3109 mt_set_height(mas->tree, 1);
3110 ma_set_meta(node, maple_leaf_64, 0, slot);
3111 /* swap the new root into the tree */
3112 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
3113 return;
3114 }
3115
3116 /*
3117 * mas_store_root() - Storing value into root.
3118 * @mas: The maple state
3119 * @entry: The entry to store.
3120 *
3121 * There is no root node now and we are storing a value into the root - this
3122 * function either assigns the pointer or expands into a node.
3123 */
mas_store_root(struct ma_state * mas,void * entry)3124 static inline void mas_store_root(struct ma_state *mas, void *entry)
3125 {
3126 if (!entry) {
3127 if (!mas->index)
3128 rcu_assign_pointer(mas->tree->ma_root, NULL);
3129 } else if (likely((mas->last != 0) || (mas->index != 0)))
3130 mas_root_expand(mas, entry);
3131 else if (((unsigned long) (entry) & 3) == 2)
3132 mas_root_expand(mas, entry);
3133 else {
3134 rcu_assign_pointer(mas->tree->ma_root, entry);
3135 mas->status = ma_start;
3136 }
3137 }
3138
3139 /*
3140 * mas_is_span_wr() - Check if the write needs to be treated as a write that
3141 * spans the node.
3142 * @wr_mas: The maple write state
3143 *
3144 * Spanning writes are writes that start in one node and end in another OR if
3145 * the write of a %NULL will cause the node to end with a %NULL.
3146 *
3147 * Return: True if this is a spanning write, false otherwise.
3148 */
mas_is_span_wr(struct ma_wr_state * wr_mas)3149 static bool mas_is_span_wr(struct ma_wr_state *wr_mas)
3150 {
3151 unsigned long max = wr_mas->r_max;
3152 unsigned long last = wr_mas->mas->last;
3153 enum maple_type type = wr_mas->type;
3154 void *entry = wr_mas->entry;
3155
3156 /* Contained in this pivot, fast path */
3157 if (last < max)
3158 return false;
3159
3160 if (ma_is_leaf(type)) {
3161 max = wr_mas->mas->max;
3162 if (last < max)
3163 return false;
3164 }
3165
3166 if (last == max) {
3167 /*
3168 * The last entry of leaf node cannot be NULL unless it is the
3169 * rightmost node (writing ULONG_MAX), otherwise it spans slots.
3170 */
3171 if (entry || last == ULONG_MAX)
3172 return false;
3173 }
3174
3175 trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry);
3176 return true;
3177 }
3178
mas_wr_walk_descend(struct ma_wr_state * wr_mas)3179 static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas)
3180 {
3181 wr_mas->type = mte_node_type(wr_mas->mas->node);
3182 mas_wr_node_walk(wr_mas);
3183 wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type);
3184 }
3185
mas_wr_walk_traverse(struct ma_wr_state * wr_mas)3186 static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas)
3187 {
3188 wr_mas->mas->max = wr_mas->r_max;
3189 wr_mas->mas->min = wr_mas->r_min;
3190 wr_mas->mas->node = wr_mas->content;
3191 wr_mas->mas->offset = 0;
3192 wr_mas->mas->depth++;
3193 }
3194 /*
3195 * mas_wr_walk() - Walk the tree for a write.
3196 * @wr_mas: The maple write state
3197 *
3198 * Uses mas_slot_locked() and does not need to worry about dead nodes.
3199 *
3200 * Return: True if it's contained in a node, false on spanning write.
3201 */
mas_wr_walk(struct ma_wr_state * wr_mas)3202 static bool mas_wr_walk(struct ma_wr_state *wr_mas)
3203 {
3204 struct ma_state *mas = wr_mas->mas;
3205
3206 while (true) {
3207 mas_wr_walk_descend(wr_mas);
3208 if (unlikely(mas_is_span_wr(wr_mas)))
3209 return false;
3210
3211 wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
3212 mas->offset);
3213 if (ma_is_leaf(wr_mas->type))
3214 return true;
3215
3216 if (mas->end < mt_slots[wr_mas->type] - 1)
3217 wr_mas->vacant_height = mas->depth + 1;
3218
3219 if (ma_is_root(mas_mn(mas))) {
3220 /* root needs more than 2 entries to be sufficient + 1 */
3221 if (mas->end > 2)
3222 wr_mas->sufficient_height = 1;
3223 } else if (mas->end > mt_min_slots[wr_mas->type] + 1)
3224 wr_mas->sufficient_height = mas->depth + 1;
3225
3226 mas_wr_walk_traverse(wr_mas);
3227 }
3228
3229 return true;
3230 }
3231
mas_wr_walk_index(struct ma_wr_state * wr_mas)3232 static void mas_wr_walk_index(struct ma_wr_state *wr_mas)
3233 {
3234 struct ma_state *mas = wr_mas->mas;
3235
3236 while (true) {
3237 mas_wr_walk_descend(wr_mas);
3238 wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
3239 mas->offset);
3240 if (ma_is_leaf(wr_mas->type))
3241 return;
3242 mas_wr_walk_traverse(wr_mas);
3243 }
3244 }
3245 /*
3246 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs.
3247 * @l_wr_mas: The left maple write state
3248 * @r_wr_mas: The right maple write state
3249 */
mas_extend_spanning_null(struct ma_wr_state * l_wr_mas,struct ma_wr_state * r_wr_mas)3250 static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas,
3251 struct ma_wr_state *r_wr_mas)
3252 {
3253 struct ma_state *r_mas = r_wr_mas->mas;
3254 struct ma_state *l_mas = l_wr_mas->mas;
3255 unsigned char l_slot;
3256
3257 l_slot = l_mas->offset;
3258 if (!l_wr_mas->content)
3259 l_mas->index = l_wr_mas->r_min;
3260
3261 if ((l_mas->index == l_wr_mas->r_min) &&
3262 (l_slot &&
3263 !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) {
3264 if (l_slot > 1)
3265 l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1;
3266 else
3267 l_mas->index = l_mas->min;
3268
3269 l_mas->offset = l_slot - 1;
3270 }
3271
3272 if (!r_wr_mas->content) {
3273 if (r_mas->last < r_wr_mas->r_max)
3274 r_mas->last = r_wr_mas->r_max;
3275 r_mas->offset++;
3276 } else if ((r_mas->last == r_wr_mas->r_max) &&
3277 (r_mas->last < r_mas->max) &&
3278 !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) {
3279 r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots,
3280 r_wr_mas->type, r_mas->offset + 1);
3281 r_mas->offset++;
3282 }
3283 }
3284
mas_state_walk(struct ma_state * mas)3285 static inline void *mas_state_walk(struct ma_state *mas)
3286 {
3287 void *entry;
3288
3289 entry = mas_start(mas);
3290 if (mas_is_none(mas))
3291 return NULL;
3292
3293 if (mas_is_ptr(mas))
3294 return entry;
3295
3296 return mtree_range_walk(mas);
3297 }
3298
3299 /*
3300 * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up
3301 * to date.
3302 *
3303 * @mas: The maple state.
3304 *
3305 * Note: Leaves mas in undesirable state.
3306 * Return: The entry for @mas->index or %NULL on dead node.
3307 */
mtree_lookup_walk(struct ma_state * mas)3308 static inline void *mtree_lookup_walk(struct ma_state *mas)
3309 {
3310 unsigned long *pivots;
3311 unsigned char offset;
3312 struct maple_node *node;
3313 struct maple_enode *next;
3314 enum maple_type type;
3315 void __rcu **slots;
3316 unsigned char end;
3317
3318 next = mas->node;
3319 do {
3320 node = mte_to_node(next);
3321 type = mte_node_type(next);
3322 pivots = ma_pivots(node, type);
3323 end = mt_pivots[type];
3324 offset = 0;
3325 do {
3326 if (pivots[offset] >= mas->index)
3327 break;
3328 } while (++offset < end);
3329
3330 slots = ma_slots(node, type);
3331 next = mt_slot(mas->tree, slots, offset);
3332 if (unlikely(ma_dead_node(node)))
3333 goto dead_node;
3334 } while (!ma_is_leaf(type));
3335
3336 return (void *)next;
3337
3338 dead_node:
3339 mas_reset(mas);
3340 return NULL;
3341 }
3342
3343 static void mte_destroy_walk(struct maple_enode *, struct maple_tree *);
3344 /*
3345 * mas_new_root() - Create a new root node that only contains the entry passed
3346 * in.
3347 * @mas: The maple state
3348 * @entry: The entry to store.
3349 *
3350 * Only valid when the index == 0 and the last == ULONG_MAX
3351 */
mas_new_root(struct ma_state * mas,void * entry)3352 static inline void mas_new_root(struct ma_state *mas, void *entry)
3353 {
3354 struct maple_enode *root = mas_root_locked(mas);
3355 enum maple_type type = maple_leaf_64;
3356 struct maple_node *node;
3357 void __rcu **slots;
3358 unsigned long *pivots;
3359
3360 WARN_ON_ONCE(mas->index || mas->last != ULONG_MAX);
3361
3362 if (!entry) {
3363 mt_set_height(mas->tree, 0);
3364 rcu_assign_pointer(mas->tree->ma_root, entry);
3365 mas->status = ma_start;
3366 goto done;
3367 }
3368
3369 node = mas_pop_node(mas);
3370 pivots = ma_pivots(node, type);
3371 slots = ma_slots(node, type);
3372 node->parent = ma_parent_ptr(mas_tree_parent(mas));
3373 mas->node = mt_mk_node(node, type);
3374 mas->status = ma_active;
3375 rcu_assign_pointer(slots[0], entry);
3376 pivots[0] = mas->last;
3377 mt_set_height(mas->tree, 1);
3378 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
3379
3380 done:
3381 if (xa_is_node(root))
3382 mte_destroy_walk(root, mas->tree);
3383
3384 return;
3385 }
3386 /*
3387 * mas_wr_spanning_store() - Create a subtree with the store operation completed
3388 * and new nodes where necessary, then place the sub-tree in the actual tree.
3389 * Note that mas is expected to point to the node which caused the store to
3390 * span.
3391 * @wr_mas: The maple write state
3392 */
mas_wr_spanning_store(struct ma_wr_state * wr_mas)3393 static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas)
3394 {
3395 struct maple_subtree_state mast;
3396 struct maple_big_node b_node;
3397 struct ma_state *mas;
3398 unsigned char height;
3399
3400 /* Left and Right side of spanning store */
3401 MA_STATE(l_mas, NULL, 0, 0);
3402 MA_STATE(r_mas, NULL, 0, 0);
3403 MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry);
3404 MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry);
3405
3406 /*
3407 * A store operation that spans multiple nodes is called a spanning
3408 * store and is handled early in the store call stack by the function
3409 * mas_is_span_wr(). When a spanning store is identified, the maple
3410 * state is duplicated. The first maple state walks the left tree path
3411 * to ``index``, the duplicate walks the right tree path to ``last``.
3412 * The data in the two nodes are combined into a single node, two nodes,
3413 * or possibly three nodes (see the 3-way split above). A ``NULL``
3414 * written to the last entry of a node is considered a spanning store as
3415 * a rebalance is required for the operation to complete and an overflow
3416 * of data may happen.
3417 */
3418 mas = wr_mas->mas;
3419 trace_ma_op(__func__, mas);
3420
3421 if (unlikely(!mas->index && mas->last == ULONG_MAX))
3422 return mas_new_root(mas, wr_mas->entry);
3423 /*
3424 * Node rebalancing may occur due to this store, so there may be three new
3425 * entries per level plus a new root.
3426 */
3427 height = mas_mt_height(mas);
3428
3429 /*
3430 * Set up right side. Need to get to the next offset after the spanning
3431 * store to ensure it's not NULL and to combine both the next node and
3432 * the node with the start together.
3433 */
3434 r_mas = *mas;
3435 /* Avoid overflow, walk to next slot in the tree. */
3436 if (r_mas.last + 1)
3437 r_mas.last++;
3438
3439 r_mas.index = r_mas.last;
3440 mas_wr_walk_index(&r_wr_mas);
3441 r_mas.last = r_mas.index = mas->last;
3442
3443 /* Set up left side. */
3444 l_mas = *mas;
3445 mas_wr_walk_index(&l_wr_mas);
3446
3447 if (!wr_mas->entry) {
3448 mas_extend_spanning_null(&l_wr_mas, &r_wr_mas);
3449 mas->offset = l_mas.offset;
3450 mas->index = l_mas.index;
3451 mas->last = l_mas.last = r_mas.last;
3452 }
3453
3454 /* expanding NULLs may make this cover the entire range */
3455 if (!l_mas.index && r_mas.last == ULONG_MAX) {
3456 mas_set_range(mas, 0, ULONG_MAX);
3457 return mas_new_root(mas, wr_mas->entry);
3458 }
3459
3460 memset(&b_node, 0, sizeof(struct maple_big_node));
3461 /* Copy l_mas and store the value in b_node. */
3462 mas_store_b_node(&l_wr_mas, &b_node, l_mas.end);
3463 /* Copy r_mas into b_node if there is anything to copy. */
3464 if (r_mas.max > r_mas.last)
3465 mas_mab_cp(&r_mas, r_mas.offset, r_mas.end,
3466 &b_node, b_node.b_end + 1);
3467 else
3468 b_node.b_end++;
3469
3470 /* Stop spanning searches by searching for just index. */
3471 l_mas.index = l_mas.last = mas->index;
3472
3473 mast.bn = &b_node;
3474 mast.orig_l = &l_mas;
3475 mast.orig_r = &r_mas;
3476 /* Combine l_mas and r_mas and split them up evenly again. */
3477 return mas_spanning_rebalance(mas, &mast, height + 1);
3478 }
3479
3480 /*
3481 * mas_wr_node_store() - Attempt to store the value in a node
3482 * @wr_mas: The maple write state
3483 *
3484 * Attempts to reuse the node, but may allocate.
3485 */
mas_wr_node_store(struct ma_wr_state * wr_mas,unsigned char new_end)3486 static inline void mas_wr_node_store(struct ma_wr_state *wr_mas,
3487 unsigned char new_end)
3488 {
3489 struct ma_state *mas = wr_mas->mas;
3490 void __rcu **dst_slots;
3491 unsigned long *dst_pivots;
3492 unsigned char dst_offset, offset_end = wr_mas->offset_end;
3493 struct maple_node reuse, *newnode;
3494 unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type];
3495 bool in_rcu = mt_in_rcu(mas->tree);
3496 unsigned char height = mas_mt_height(mas);
3497
3498 if (mas->last == wr_mas->end_piv)
3499 offset_end++; /* don't copy this offset */
3500
3501 /* set up node. */
3502 if (in_rcu) {
3503 newnode = mas_pop_node(mas);
3504 } else {
3505 memset(&reuse, 0, sizeof(struct maple_node));
3506 newnode = &reuse;
3507 }
3508
3509 newnode->parent = mas_mn(mas)->parent;
3510 dst_pivots = ma_pivots(newnode, wr_mas->type);
3511 dst_slots = ma_slots(newnode, wr_mas->type);
3512 /* Copy from start to insert point */
3513 memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset);
3514 memcpy(dst_slots, wr_mas->slots, sizeof(void *) * mas->offset);
3515
3516 /* Handle insert of new range starting after old range */
3517 if (wr_mas->r_min < mas->index) {
3518 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content);
3519 dst_pivots[mas->offset++] = mas->index - 1;
3520 }
3521
3522 /* Store the new entry and range end. */
3523 if (mas->offset < node_pivots)
3524 dst_pivots[mas->offset] = mas->last;
3525 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry);
3526
3527 /*
3528 * this range wrote to the end of the node or it overwrote the rest of
3529 * the data
3530 */
3531 if (offset_end > mas->end)
3532 goto done;
3533
3534 dst_offset = mas->offset + 1;
3535 /* Copy to the end of node if necessary. */
3536 copy_size = mas->end - offset_end + 1;
3537 memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end,
3538 sizeof(void *) * copy_size);
3539 memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end,
3540 sizeof(unsigned long) * (copy_size - 1));
3541
3542 if (new_end < node_pivots)
3543 dst_pivots[new_end] = mas->max;
3544
3545 done:
3546 mas_leaf_set_meta(newnode, maple_leaf_64, new_end);
3547 if (in_rcu) {
3548 struct maple_enode *old_enode = mas->node;
3549
3550 mas->node = mt_mk_node(newnode, wr_mas->type);
3551 mas_replace_node(mas, old_enode, height);
3552 } else {
3553 memcpy(wr_mas->node, newnode, sizeof(struct maple_node));
3554 }
3555 trace_ma_write(__func__, mas, 0, wr_mas->entry);
3556 mas_update_gap(mas);
3557 mas->end = new_end;
3558 return;
3559 }
3560
3561 /*
3562 * mas_wr_slot_store: Attempt to store a value in a slot.
3563 * @wr_mas: the maple write state
3564 */
mas_wr_slot_store(struct ma_wr_state * wr_mas)3565 static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas)
3566 {
3567 struct ma_state *mas = wr_mas->mas;
3568 unsigned char offset = mas->offset;
3569 void __rcu **slots = wr_mas->slots;
3570 bool gap = false;
3571
3572 gap |= !mt_slot_locked(mas->tree, slots, offset);
3573 gap |= !mt_slot_locked(mas->tree, slots, offset + 1);
3574
3575 if (wr_mas->offset_end - offset == 1) {
3576 if (mas->index == wr_mas->r_min) {
3577 /* Overwriting the range and a part of the next one */
3578 rcu_assign_pointer(slots[offset], wr_mas->entry);
3579 wr_mas->pivots[offset] = mas->last;
3580 } else {
3581 /* Overwriting a part of the range and the next one */
3582 rcu_assign_pointer(slots[offset + 1], wr_mas->entry);
3583 wr_mas->pivots[offset] = mas->index - 1;
3584 mas->offset++; /* Keep mas accurate. */
3585 }
3586 } else {
3587 WARN_ON_ONCE(mt_in_rcu(mas->tree));
3588 /*
3589 * Expand the range, only partially overwriting the previous and
3590 * next ranges
3591 */
3592 gap |= !mt_slot_locked(mas->tree, slots, offset + 2);
3593 rcu_assign_pointer(slots[offset + 1], wr_mas->entry);
3594 wr_mas->pivots[offset] = mas->index - 1;
3595 wr_mas->pivots[offset + 1] = mas->last;
3596 mas->offset++; /* Keep mas accurate. */
3597 }
3598
3599 trace_ma_write(__func__, mas, 0, wr_mas->entry);
3600 /*
3601 * Only update gap when the new entry is empty or there is an empty
3602 * entry in the original two ranges.
3603 */
3604 if (!wr_mas->entry || gap)
3605 mas_update_gap(mas);
3606
3607 return;
3608 }
3609
mas_wr_extend_null(struct ma_wr_state * wr_mas)3610 static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas)
3611 {
3612 struct ma_state *mas = wr_mas->mas;
3613
3614 if (!wr_mas->slots[wr_mas->offset_end]) {
3615 /* If this one is null, the next and prev are not */
3616 mas->last = wr_mas->end_piv;
3617 } else {
3618 /* Check next slot(s) if we are overwriting the end */
3619 if ((mas->last == wr_mas->end_piv) &&
3620 (mas->end != wr_mas->offset_end) &&
3621 !wr_mas->slots[wr_mas->offset_end + 1]) {
3622 wr_mas->offset_end++;
3623 if (wr_mas->offset_end == mas->end)
3624 mas->last = mas->max;
3625 else
3626 mas->last = wr_mas->pivots[wr_mas->offset_end];
3627 wr_mas->end_piv = mas->last;
3628 }
3629 }
3630
3631 if (!wr_mas->content) {
3632 /* If this one is null, the next and prev are not */
3633 mas->index = wr_mas->r_min;
3634 } else {
3635 /* Check prev slot if we are overwriting the start */
3636 if (mas->index == wr_mas->r_min && mas->offset &&
3637 !wr_mas->slots[mas->offset - 1]) {
3638 mas->offset--;
3639 wr_mas->r_min = mas->index =
3640 mas_safe_min(mas, wr_mas->pivots, mas->offset);
3641 wr_mas->r_max = wr_mas->pivots[mas->offset];
3642 }
3643 }
3644 }
3645
mas_wr_end_piv(struct ma_wr_state * wr_mas)3646 static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas)
3647 {
3648 while ((wr_mas->offset_end < wr_mas->mas->end) &&
3649 (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end]))
3650 wr_mas->offset_end++;
3651
3652 if (wr_mas->offset_end < wr_mas->mas->end)
3653 wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end];
3654 else
3655 wr_mas->end_piv = wr_mas->mas->max;
3656 }
3657
mas_wr_new_end(struct ma_wr_state * wr_mas)3658 static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas)
3659 {
3660 struct ma_state *mas = wr_mas->mas;
3661 unsigned char new_end = mas->end + 2;
3662
3663 new_end -= wr_mas->offset_end - mas->offset;
3664 if (wr_mas->r_min == mas->index)
3665 new_end--;
3666
3667 if (wr_mas->end_piv == mas->last)
3668 new_end--;
3669
3670 return new_end;
3671 }
3672
3673 /*
3674 * mas_wr_append: Attempt to append
3675 * @wr_mas: the maple write state
3676 * @new_end: The end of the node after the modification
3677 *
3678 * This is currently unsafe in rcu mode since the end of the node may be cached
3679 * by readers while the node contents may be updated which could result in
3680 * inaccurate information.
3681 */
mas_wr_append(struct ma_wr_state * wr_mas,unsigned char new_end)3682 static inline void mas_wr_append(struct ma_wr_state *wr_mas,
3683 unsigned char new_end)
3684 {
3685 struct ma_state *mas = wr_mas->mas;
3686 void __rcu **slots;
3687 unsigned char end = mas->end;
3688
3689 if (new_end < mt_pivots[wr_mas->type]) {
3690 wr_mas->pivots[new_end] = wr_mas->pivots[end];
3691 ma_set_meta(wr_mas->node, wr_mas->type, 0, new_end);
3692 }
3693
3694 slots = wr_mas->slots;
3695 if (new_end == end + 1) {
3696 if (mas->last == wr_mas->r_max) {
3697 /* Append to end of range */
3698 rcu_assign_pointer(slots[new_end], wr_mas->entry);
3699 wr_mas->pivots[end] = mas->index - 1;
3700 mas->offset = new_end;
3701 } else {
3702 /* Append to start of range */
3703 rcu_assign_pointer(slots[new_end], wr_mas->content);
3704 wr_mas->pivots[end] = mas->last;
3705 rcu_assign_pointer(slots[end], wr_mas->entry);
3706 }
3707 } else {
3708 /* Append to the range without touching any boundaries. */
3709 rcu_assign_pointer(slots[new_end], wr_mas->content);
3710 wr_mas->pivots[end + 1] = mas->last;
3711 rcu_assign_pointer(slots[end + 1], wr_mas->entry);
3712 wr_mas->pivots[end] = mas->index - 1;
3713 mas->offset = end + 1;
3714 }
3715
3716 if (!wr_mas->content || !wr_mas->entry)
3717 mas_update_gap(mas);
3718
3719 mas->end = new_end;
3720 trace_ma_write(__func__, mas, new_end, wr_mas->entry);
3721 return;
3722 }
3723
3724 /*
3725 * mas_wr_bnode() - Slow path for a modification.
3726 * @wr_mas: The write maple state
3727 *
3728 * This is where split, rebalance end up.
3729 */
mas_wr_bnode(struct ma_wr_state * wr_mas)3730 static void mas_wr_bnode(struct ma_wr_state *wr_mas)
3731 {
3732 struct maple_big_node b_node;
3733
3734 trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry);
3735 memset(&b_node, 0, sizeof(struct maple_big_node));
3736 mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end);
3737 mas_commit_b_node(wr_mas, &b_node);
3738 }
3739
3740 /*
3741 * mas_wr_store_entry() - Internal call to store a value
3742 * @wr_mas: The maple write state
3743 */
mas_wr_store_entry(struct ma_wr_state * wr_mas)3744 static inline void mas_wr_store_entry(struct ma_wr_state *wr_mas)
3745 {
3746 struct ma_state *mas = wr_mas->mas;
3747 unsigned char new_end = mas_wr_new_end(wr_mas);
3748
3749 switch (mas->store_type) {
3750 case wr_exact_fit:
3751 rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry);
3752 if (!!wr_mas->entry ^ !!wr_mas->content)
3753 mas_update_gap(mas);
3754 break;
3755 case wr_append:
3756 mas_wr_append(wr_mas, new_end);
3757 break;
3758 case wr_slot_store:
3759 mas_wr_slot_store(wr_mas);
3760 break;
3761 case wr_node_store:
3762 mas_wr_node_store(wr_mas, new_end);
3763 break;
3764 case wr_spanning_store:
3765 mas_wr_spanning_store(wr_mas);
3766 break;
3767 case wr_split_store:
3768 case wr_rebalance:
3769 mas_wr_bnode(wr_mas);
3770 break;
3771 case wr_new_root:
3772 mas_new_root(mas, wr_mas->entry);
3773 break;
3774 case wr_store_root:
3775 mas_store_root(mas, wr_mas->entry);
3776 break;
3777 case wr_invalid:
3778 MT_BUG_ON(mas->tree, 1);
3779 }
3780
3781 return;
3782 }
3783
mas_wr_prealloc_setup(struct ma_wr_state * wr_mas)3784 static inline void mas_wr_prealloc_setup(struct ma_wr_state *wr_mas)
3785 {
3786 struct ma_state *mas = wr_mas->mas;
3787
3788 if (!mas_is_active(mas)) {
3789 if (mas_is_start(mas))
3790 goto set_content;
3791
3792 if (unlikely(mas_is_paused(mas)))
3793 goto reset;
3794
3795 if (unlikely(mas_is_none(mas)))
3796 goto reset;
3797
3798 if (unlikely(mas_is_overflow(mas)))
3799 goto reset;
3800
3801 if (unlikely(mas_is_underflow(mas)))
3802 goto reset;
3803 }
3804
3805 /*
3806 * A less strict version of mas_is_span_wr() where we allow spanning
3807 * writes within this node. This is to stop partial walks in
3808 * mas_prealloc() from being reset.
3809 */
3810 if (mas->last > mas->max)
3811 goto reset;
3812
3813 if (wr_mas->entry)
3814 goto set_content;
3815
3816 if (mte_is_leaf(mas->node) && mas->last == mas->max)
3817 goto reset;
3818
3819 goto set_content;
3820
3821 reset:
3822 mas_reset(mas);
3823 set_content:
3824 wr_mas->content = mas_start(mas);
3825 }
3826
3827 /**
3828 * mas_prealloc_calc() - Calculate number of nodes needed for a
3829 * given store oepration
3830 * @wr_mas: The maple write state
3831 * @entry: The entry to store into the tree
3832 *
3833 * Return: Number of nodes required for preallocation.
3834 */
mas_prealloc_calc(struct ma_wr_state * wr_mas,void * entry)3835 static inline void mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry)
3836 {
3837 struct ma_state *mas = wr_mas->mas;
3838 unsigned char height = mas_mt_height(mas);
3839 int ret = height * 3 + 1;
3840 unsigned char delta = height - wr_mas->vacant_height;
3841
3842 switch (mas->store_type) {
3843 case wr_exact_fit:
3844 case wr_append:
3845 case wr_slot_store:
3846 ret = 0;
3847 break;
3848 case wr_spanning_store:
3849 if (wr_mas->sufficient_height < wr_mas->vacant_height)
3850 ret = (height - wr_mas->sufficient_height) * 3 + 1;
3851 else
3852 ret = delta * 3 + 1;
3853 break;
3854 case wr_split_store:
3855 ret = delta * 2 + 1;
3856 break;
3857 case wr_rebalance:
3858 if (wr_mas->sufficient_height < wr_mas->vacant_height)
3859 ret = (height - wr_mas->sufficient_height) * 2 + 1;
3860 else
3861 ret = delta * 2 + 1;
3862 break;
3863 case wr_node_store:
3864 ret = mt_in_rcu(mas->tree) ? 1 : 0;
3865 break;
3866 case wr_new_root:
3867 ret = 1;
3868 break;
3869 case wr_store_root:
3870 if (likely((mas->last != 0) || (mas->index != 0)))
3871 ret = 1;
3872 else if (((unsigned long) (entry) & 3) == 2)
3873 ret = 1;
3874 else
3875 ret = 0;
3876 break;
3877 case wr_invalid:
3878 WARN_ON_ONCE(1);
3879 }
3880
3881 mas->node_request = ret;
3882 }
3883
3884 /*
3885 * mas_wr_store_type() - Determine the store type for a given
3886 * store operation.
3887 * @wr_mas: The maple write state
3888 *
3889 * Return: the type of store needed for the operation
3890 */
mas_wr_store_type(struct ma_wr_state * wr_mas)3891 static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas)
3892 {
3893 struct ma_state *mas = wr_mas->mas;
3894 unsigned char new_end;
3895
3896 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas)))
3897 return wr_store_root;
3898
3899 if (unlikely(!mas_wr_walk(wr_mas)))
3900 return wr_spanning_store;
3901
3902 /* At this point, we are at the leaf node that needs to be altered. */
3903 mas_wr_end_piv(wr_mas);
3904 if (!wr_mas->entry)
3905 mas_wr_extend_null(wr_mas);
3906
3907 if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last))
3908 return wr_exact_fit;
3909
3910 if (unlikely(!mas->index && mas->last == ULONG_MAX))
3911 return wr_new_root;
3912
3913 new_end = mas_wr_new_end(wr_mas);
3914 /* Potential spanning rebalance collapsing a node */
3915 if (new_end < mt_min_slots[wr_mas->type]) {
3916 if (!mte_is_root(mas->node))
3917 return wr_rebalance;
3918 return wr_node_store;
3919 }
3920
3921 if (new_end >= mt_slots[wr_mas->type])
3922 return wr_split_store;
3923
3924 if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end))
3925 return wr_append;
3926
3927 if ((new_end == mas->end) && (!mt_in_rcu(mas->tree) ||
3928 (wr_mas->offset_end - mas->offset == 1)))
3929 return wr_slot_store;
3930
3931 return wr_node_store;
3932 }
3933
3934 /**
3935 * mas_wr_preallocate() - Preallocate enough nodes for a store operation
3936 * @wr_mas: The maple write state
3937 * @entry: The entry that will be stored
3938 *
3939 */
mas_wr_preallocate(struct ma_wr_state * wr_mas,void * entry)3940 static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry)
3941 {
3942 struct ma_state *mas = wr_mas->mas;
3943
3944 mas_wr_prealloc_setup(wr_mas);
3945 mas->store_type = mas_wr_store_type(wr_mas);
3946 mas_prealloc_calc(wr_mas, entry);
3947 if (!mas->node_request)
3948 return;
3949
3950 mas_alloc_nodes(mas, GFP_NOWAIT);
3951 }
3952
3953 /**
3954 * mas_insert() - Internal call to insert a value
3955 * @mas: The maple state
3956 * @entry: The entry to store
3957 *
3958 * Return: %NULL or the contents that already exists at the requested index
3959 * otherwise. The maple state needs to be checked for error conditions.
3960 */
mas_insert(struct ma_state * mas,void * entry)3961 static inline void *mas_insert(struct ma_state *mas, void *entry)
3962 {
3963 MA_WR_STATE(wr_mas, mas, entry);
3964
3965 /*
3966 * Inserting a new range inserts either 0, 1, or 2 pivots within the
3967 * tree. If the insert fits exactly into an existing gap with a value
3968 * of NULL, then the slot only needs to be written with the new value.
3969 * If the range being inserted is adjacent to another range, then only a
3970 * single pivot needs to be inserted (as well as writing the entry). If
3971 * the new range is within a gap but does not touch any other ranges,
3972 * then two pivots need to be inserted: the start - 1, and the end. As
3973 * usual, the entry must be written. Most operations require a new node
3974 * to be allocated and replace an existing node to ensure RCU safety,
3975 * when in RCU mode. The exception to requiring a newly allocated node
3976 * is when inserting at the end of a node (appending). When done
3977 * carefully, appending can reuse the node in place.
3978 */
3979 wr_mas.content = mas_start(mas);
3980 if (wr_mas.content)
3981 goto exists;
3982
3983 mas_wr_preallocate(&wr_mas, entry);
3984 if (mas_is_err(mas))
3985 return NULL;
3986
3987 /* spanning writes always overwrite something */
3988 if (mas->store_type == wr_spanning_store)
3989 goto exists;
3990
3991 /* At this point, we are at the leaf node that needs to be altered. */
3992 if (mas->store_type != wr_new_root && mas->store_type != wr_store_root) {
3993 wr_mas.offset_end = mas->offset;
3994 wr_mas.end_piv = wr_mas.r_max;
3995
3996 if (wr_mas.content || (mas->last > wr_mas.r_max))
3997 goto exists;
3998 }
3999
4000 mas_wr_store_entry(&wr_mas);
4001 return wr_mas.content;
4002
4003 exists:
4004 mas_set_err(mas, -EEXIST);
4005 return wr_mas.content;
4006
4007 }
4008
4009 /**
4010 * mas_alloc_cyclic() - Internal call to find somewhere to store an entry
4011 * @mas: The maple state.
4012 * @startp: Pointer to ID.
4013 * @range_lo: Lower bound of range to search.
4014 * @range_hi: Upper bound of range to search.
4015 * @entry: The entry to store.
4016 * @next: Pointer to next ID to allocate.
4017 * @gfp: The GFP_FLAGS to use for allocations.
4018 *
4019 * Return: 0 if the allocation succeeded without wrapping, 1 if the
4020 * allocation succeeded after wrapping, or -EBUSY if there are no
4021 * free entries.
4022 */
mas_alloc_cyclic(struct ma_state * mas,unsigned long * startp,void * entry,unsigned long range_lo,unsigned long range_hi,unsigned long * next,gfp_t gfp)4023 int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp,
4024 void *entry, unsigned long range_lo, unsigned long range_hi,
4025 unsigned long *next, gfp_t gfp)
4026 {
4027 unsigned long min = range_lo;
4028 int ret = 0;
4029
4030 range_lo = max(min, *next);
4031 ret = mas_empty_area(mas, range_lo, range_hi, 1);
4032 if ((mas->tree->ma_flags & MT_FLAGS_ALLOC_WRAPPED) && ret == 0) {
4033 mas->tree->ma_flags &= ~MT_FLAGS_ALLOC_WRAPPED;
4034 ret = 1;
4035 }
4036 if (ret < 0 && range_lo > min) {
4037 mas_reset(mas);
4038 ret = mas_empty_area(mas, min, range_hi, 1);
4039 if (ret == 0)
4040 ret = 1;
4041 }
4042 if (ret < 0)
4043 return ret;
4044
4045 do {
4046 mas_insert(mas, entry);
4047 } while (mas_nomem(mas, gfp));
4048 if (mas_is_err(mas))
4049 return xa_err(mas->node);
4050
4051 *startp = mas->index;
4052 *next = *startp + 1;
4053 if (*next == 0)
4054 mas->tree->ma_flags |= MT_FLAGS_ALLOC_WRAPPED;
4055
4056 mas_destroy(mas);
4057 return ret;
4058 }
4059 EXPORT_SYMBOL(mas_alloc_cyclic);
4060
mas_rewalk(struct ma_state * mas,unsigned long index)4061 static __always_inline void mas_rewalk(struct ma_state *mas, unsigned long index)
4062 {
4063 retry:
4064 mas_set(mas, index);
4065 mas_state_walk(mas);
4066 if (mas_is_start(mas))
4067 goto retry;
4068 }
4069
mas_rewalk_if_dead(struct ma_state * mas,struct maple_node * node,const unsigned long index)4070 static __always_inline bool mas_rewalk_if_dead(struct ma_state *mas,
4071 struct maple_node *node, const unsigned long index)
4072 {
4073 if (unlikely(ma_dead_node(node))) {
4074 mas_rewalk(mas, index);
4075 return true;
4076 }
4077 return false;
4078 }
4079
4080 /*
4081 * mas_prev_node() - Find the prev non-null entry at the same level in the
4082 * tree. The prev value will be mas->node[mas->offset] or the status will be
4083 * ma_none.
4084 * @mas: The maple state
4085 * @min: The lower limit to search
4086 *
4087 * The prev node value will be mas->node[mas->offset] or the status will be
4088 * ma_none.
4089 * Return: 1 if the node is dead, 0 otherwise.
4090 */
mas_prev_node(struct ma_state * mas,unsigned long min)4091 static int mas_prev_node(struct ma_state *mas, unsigned long min)
4092 {
4093 enum maple_type mt;
4094 int offset, level;
4095 void __rcu **slots;
4096 struct maple_node *node;
4097 unsigned long *pivots;
4098 unsigned long max;
4099
4100 node = mas_mn(mas);
4101 if (!mas->min)
4102 goto no_entry;
4103
4104 max = mas->min - 1;
4105 if (max < min)
4106 goto no_entry;
4107
4108 level = 0;
4109 do {
4110 if (ma_is_root(node))
4111 goto no_entry;
4112
4113 /* Walk up. */
4114 if (unlikely(mas_ascend(mas)))
4115 return 1;
4116 offset = mas->offset;
4117 level++;
4118 node = mas_mn(mas);
4119 } while (!offset);
4120
4121 offset--;
4122 mt = mte_node_type(mas->node);
4123 while (level > 1) {
4124 level--;
4125 slots = ma_slots(node, mt);
4126 mas->node = mas_slot(mas, slots, offset);
4127 if (unlikely(ma_dead_node(node)))
4128 return 1;
4129
4130 mt = mte_node_type(mas->node);
4131 node = mas_mn(mas);
4132 pivots = ma_pivots(node, mt);
4133 offset = ma_data_end(node, mt, pivots, max);
4134 if (unlikely(ma_dead_node(node)))
4135 return 1;
4136 }
4137
4138 slots = ma_slots(node, mt);
4139 mas->node = mas_slot(mas, slots, offset);
4140 pivots = ma_pivots(node, mt);
4141 if (unlikely(ma_dead_node(node)))
4142 return 1;
4143
4144 if (likely(offset))
4145 mas->min = pivots[offset - 1] + 1;
4146 mas->max = max;
4147 mas->offset = mas_data_end(mas);
4148 if (unlikely(mte_dead_node(mas->node)))
4149 return 1;
4150
4151 mas->end = mas->offset;
4152 return 0;
4153
4154 no_entry:
4155 if (unlikely(ma_dead_node(node)))
4156 return 1;
4157
4158 mas->status = ma_underflow;
4159 return 0;
4160 }
4161
4162 /*
4163 * mas_prev_slot() - Get the entry in the previous slot
4164 *
4165 * @mas: The maple state
4166 * @min: The minimum starting range
4167 * @empty: Can be empty
4168 *
4169 * Return: The entry in the previous slot which is possibly NULL
4170 */
mas_prev_slot(struct ma_state * mas,unsigned long min,bool empty)4171 static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty)
4172 {
4173 void *entry;
4174 void __rcu **slots;
4175 unsigned long pivot;
4176 enum maple_type type;
4177 unsigned long *pivots;
4178 struct maple_node *node;
4179 unsigned long save_point = mas->index;
4180
4181 retry:
4182 node = mas_mn(mas);
4183 type = mte_node_type(mas->node);
4184 pivots = ma_pivots(node, type);
4185 if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4186 goto retry;
4187
4188 if (mas->min <= min) {
4189 pivot = mas_safe_min(mas, pivots, mas->offset);
4190
4191 if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4192 goto retry;
4193
4194 if (pivot <= min)
4195 goto underflow;
4196 }
4197
4198 again:
4199 if (likely(mas->offset)) {
4200 mas->offset--;
4201 mas->last = mas->index - 1;
4202 mas->index = mas_safe_min(mas, pivots, mas->offset);
4203 } else {
4204 if (mas->index <= min)
4205 goto underflow;
4206
4207 if (mas_prev_node(mas, min)) {
4208 mas_rewalk(mas, save_point);
4209 goto retry;
4210 }
4211
4212 if (WARN_ON_ONCE(mas_is_underflow(mas)))
4213 return NULL;
4214
4215 mas->last = mas->max;
4216 node = mas_mn(mas);
4217 type = mte_node_type(mas->node);
4218 pivots = ma_pivots(node, type);
4219 mas->index = pivots[mas->offset - 1] + 1;
4220 }
4221
4222 slots = ma_slots(node, type);
4223 entry = mas_slot(mas, slots, mas->offset);
4224 if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4225 goto retry;
4226
4227 if (likely(entry))
4228 return entry;
4229
4230 if (!empty) {
4231 if (mas->index <= min)
4232 goto underflow;
4233
4234 goto again;
4235 }
4236
4237 return entry;
4238
4239 underflow:
4240 mas->status = ma_underflow;
4241 return NULL;
4242 }
4243
4244 /*
4245 * mas_next_node() - Get the next node at the same level in the tree.
4246 * @mas: The maple state
4247 * @node: The maple node
4248 * @max: The maximum pivot value to check.
4249 *
4250 * The next value will be mas->node[mas->offset] or the status will have
4251 * overflowed.
4252 * Return: 1 on dead node, 0 otherwise.
4253 */
mas_next_node(struct ma_state * mas,struct maple_node * node,unsigned long max)4254 static int mas_next_node(struct ma_state *mas, struct maple_node *node,
4255 unsigned long max)
4256 {
4257 unsigned long min;
4258 unsigned long *pivots;
4259 struct maple_enode *enode;
4260 struct maple_node *tmp;
4261 int level = 0;
4262 unsigned char node_end;
4263 enum maple_type mt;
4264 void __rcu **slots;
4265
4266 if (mas->max >= max)
4267 goto overflow;
4268
4269 min = mas->max + 1;
4270 level = 0;
4271 do {
4272 if (ma_is_root(node))
4273 goto overflow;
4274
4275 /* Walk up. */
4276 if (unlikely(mas_ascend(mas)))
4277 return 1;
4278
4279 level++;
4280 node = mas_mn(mas);
4281 mt = mte_node_type(mas->node);
4282 pivots = ma_pivots(node, mt);
4283 node_end = ma_data_end(node, mt, pivots, mas->max);
4284 if (unlikely(ma_dead_node(node)))
4285 return 1;
4286
4287 } while (unlikely(mas->offset == node_end));
4288
4289 slots = ma_slots(node, mt);
4290 mas->offset++;
4291 enode = mas_slot(mas, slots, mas->offset);
4292 if (unlikely(ma_dead_node(node)))
4293 return 1;
4294
4295 if (level > 1)
4296 mas->offset = 0;
4297
4298 while (unlikely(level > 1)) {
4299 level--;
4300 mas->node = enode;
4301 node = mas_mn(mas);
4302 mt = mte_node_type(mas->node);
4303 slots = ma_slots(node, mt);
4304 enode = mas_slot(mas, slots, 0);
4305 if (unlikely(ma_dead_node(node)))
4306 return 1;
4307 }
4308
4309 if (!mas->offset)
4310 pivots = ma_pivots(node, mt);
4311
4312 mas->max = mas_safe_pivot(mas, pivots, mas->offset, mt);
4313 tmp = mte_to_node(enode);
4314 mt = mte_node_type(enode);
4315 pivots = ma_pivots(tmp, mt);
4316 mas->end = ma_data_end(tmp, mt, pivots, mas->max);
4317 if (unlikely(ma_dead_node(node)))
4318 return 1;
4319
4320 mas->node = enode;
4321 mas->min = min;
4322 return 0;
4323
4324 overflow:
4325 if (unlikely(ma_dead_node(node)))
4326 return 1;
4327
4328 mas->status = ma_overflow;
4329 return 0;
4330 }
4331
4332 /*
4333 * mas_next_slot() - Get the entry in the next slot
4334 *
4335 * @mas: The maple state
4336 * @max: The maximum starting range
4337 * @empty: Can be empty
4338 *
4339 * Return: The entry in the next slot which is possibly NULL
4340 */
mas_next_slot(struct ma_state * mas,unsigned long max,bool empty)4341 static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty)
4342 {
4343 void __rcu **slots;
4344 unsigned long *pivots;
4345 unsigned long pivot;
4346 enum maple_type type;
4347 struct maple_node *node;
4348 unsigned long save_point = mas->last;
4349 void *entry;
4350
4351 retry:
4352 node = mas_mn(mas);
4353 type = mte_node_type(mas->node);
4354 pivots = ma_pivots(node, type);
4355 if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4356 goto retry;
4357
4358 if (mas->max >= max) {
4359 if (likely(mas->offset < mas->end))
4360 pivot = pivots[mas->offset];
4361 else
4362 pivot = mas->max;
4363
4364 if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4365 goto retry;
4366
4367 if (pivot >= max) { /* Was at the limit, next will extend beyond */
4368 mas->status = ma_overflow;
4369 return NULL;
4370 }
4371 }
4372
4373 if (likely(mas->offset < mas->end)) {
4374 mas->index = pivots[mas->offset] + 1;
4375 again:
4376 mas->offset++;
4377 if (likely(mas->offset < mas->end))
4378 mas->last = pivots[mas->offset];
4379 else
4380 mas->last = mas->max;
4381 } else {
4382 if (mas->last >= max) {
4383 mas->status = ma_overflow;
4384 return NULL;
4385 }
4386
4387 if (mas_next_node(mas, node, max)) {
4388 mas_rewalk(mas, save_point);
4389 goto retry;
4390 }
4391
4392 if (WARN_ON_ONCE(mas_is_overflow(mas)))
4393 return NULL;
4394
4395 mas->offset = 0;
4396 mas->index = mas->min;
4397 node = mas_mn(mas);
4398 type = mte_node_type(mas->node);
4399 pivots = ma_pivots(node, type);
4400 mas->last = pivots[0];
4401 }
4402
4403 slots = ma_slots(node, type);
4404 entry = mt_slot(mas->tree, slots, mas->offset);
4405 if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4406 goto retry;
4407
4408 if (entry)
4409 return entry;
4410
4411
4412 if (!empty) {
4413 if (mas->last >= max) {
4414 mas->status = ma_overflow;
4415 return NULL;
4416 }
4417
4418 mas->index = mas->last + 1;
4419 goto again;
4420 }
4421
4422 return entry;
4423 }
4424
4425 /*
4426 * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the
4427 * highest gap address of a given size in a given node and descend.
4428 * @mas: The maple state
4429 * @size: The needed size.
4430 *
4431 * Return: True if found in a leaf, false otherwise.
4432 *
4433 */
mas_rev_awalk(struct ma_state * mas,unsigned long size,unsigned long * gap_min,unsigned long * gap_max)4434 static bool mas_rev_awalk(struct ma_state *mas, unsigned long size,
4435 unsigned long *gap_min, unsigned long *gap_max)
4436 {
4437 enum maple_type type = mte_node_type(mas->node);
4438 struct maple_node *node = mas_mn(mas);
4439 unsigned long *pivots, *gaps;
4440 void __rcu **slots;
4441 unsigned long gap = 0;
4442 unsigned long max, min;
4443 unsigned char offset;
4444
4445 if (unlikely(mas_is_err(mas)))
4446 return true;
4447
4448 if (ma_is_dense(type)) {
4449 /* dense nodes. */
4450 mas->offset = (unsigned char)(mas->index - mas->min);
4451 return true;
4452 }
4453
4454 pivots = ma_pivots(node, type);
4455 slots = ma_slots(node, type);
4456 gaps = ma_gaps(node, type);
4457 offset = mas->offset;
4458 min = mas_safe_min(mas, pivots, offset);
4459 /* Skip out of bounds. */
4460 while (mas->last < min)
4461 min = mas_safe_min(mas, pivots, --offset);
4462
4463 max = mas_safe_pivot(mas, pivots, offset, type);
4464 while (mas->index <= max) {
4465 gap = 0;
4466 if (gaps)
4467 gap = gaps[offset];
4468 else if (!mas_slot(mas, slots, offset))
4469 gap = max - min + 1;
4470
4471 if (gap) {
4472 if ((size <= gap) && (size <= mas->last - min + 1))
4473 break;
4474
4475 if (!gaps) {
4476 /* Skip the next slot, it cannot be a gap. */
4477 if (offset < 2)
4478 goto ascend;
4479
4480 offset -= 2;
4481 max = pivots[offset];
4482 min = mas_safe_min(mas, pivots, offset);
4483 continue;
4484 }
4485 }
4486
4487 if (!offset)
4488 goto ascend;
4489
4490 offset--;
4491 max = min - 1;
4492 min = mas_safe_min(mas, pivots, offset);
4493 }
4494
4495 if (unlikely((mas->index > max) || (size - 1 > max - mas->index)))
4496 goto no_space;
4497
4498 if (unlikely(ma_is_leaf(type))) {
4499 mas->offset = offset;
4500 *gap_min = min;
4501 *gap_max = min + gap - 1;
4502 return true;
4503 }
4504
4505 /* descend, only happens under lock. */
4506 mas->node = mas_slot(mas, slots, offset);
4507 mas->min = min;
4508 mas->max = max;
4509 mas->offset = mas_data_end(mas);
4510 return false;
4511
4512 ascend:
4513 if (!mte_is_root(mas->node))
4514 return false;
4515
4516 no_space:
4517 mas_set_err(mas, -EBUSY);
4518 return false;
4519 }
4520
mas_anode_descend(struct ma_state * mas,unsigned long size)4521 static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
4522 {
4523 enum maple_type type = mte_node_type(mas->node);
4524 unsigned long pivot, min, gap = 0;
4525 unsigned char offset, data_end;
4526 unsigned long *gaps, *pivots;
4527 void __rcu **slots;
4528 struct maple_node *node;
4529 bool found = false;
4530
4531 if (ma_is_dense(type)) {
4532 mas->offset = (unsigned char)(mas->index - mas->min);
4533 return true;
4534 }
4535
4536 node = mas_mn(mas);
4537 pivots = ma_pivots(node, type);
4538 slots = ma_slots(node, type);
4539 gaps = ma_gaps(node, type);
4540 offset = mas->offset;
4541 min = mas_safe_min(mas, pivots, offset);
4542 data_end = ma_data_end(node, type, pivots, mas->max);
4543 for (; offset <= data_end; offset++) {
4544 pivot = mas_safe_pivot(mas, pivots, offset, type);
4545
4546 /* Not within lower bounds */
4547 if (mas->index > pivot)
4548 goto next_slot;
4549
4550 if (gaps)
4551 gap = gaps[offset];
4552 else if (!mas_slot(mas, slots, offset))
4553 gap = min(pivot, mas->last) - max(mas->index, min) + 1;
4554 else
4555 goto next_slot;
4556
4557 if (gap >= size) {
4558 if (ma_is_leaf(type)) {
4559 found = true;
4560 break;
4561 }
4562
4563 mas->node = mas_slot(mas, slots, offset);
4564 mas->min = min;
4565 mas->max = pivot;
4566 offset = 0;
4567 break;
4568 }
4569 next_slot:
4570 min = pivot + 1;
4571 if (mas->last <= pivot) {
4572 mas_set_err(mas, -EBUSY);
4573 return true;
4574 }
4575 }
4576
4577 mas->offset = offset;
4578 return found;
4579 }
4580
4581 /**
4582 * mas_walk() - Search for @mas->index in the tree.
4583 * @mas: The maple state.
4584 *
4585 * mas->index and mas->last will be set to the range if there is a value. If
4586 * mas->status is ma_none, reset to ma_start
4587 *
4588 * Return: the entry at the location or %NULL.
4589 */
mas_walk(struct ma_state * mas)4590 void *mas_walk(struct ma_state *mas)
4591 {
4592 void *entry;
4593
4594 if (!mas_is_active(mas) && !mas_is_start(mas))
4595 mas->status = ma_start;
4596 retry:
4597 entry = mas_state_walk(mas);
4598 if (mas_is_start(mas)) {
4599 goto retry;
4600 } else if (mas_is_none(mas)) {
4601 mas->index = 0;
4602 mas->last = ULONG_MAX;
4603 } else if (mas_is_ptr(mas)) {
4604 if (!mas->index) {
4605 mas->last = 0;
4606 return entry;
4607 }
4608
4609 mas->index = 1;
4610 mas->last = ULONG_MAX;
4611 mas->status = ma_none;
4612 return NULL;
4613 }
4614
4615 return entry;
4616 }
4617 EXPORT_SYMBOL_GPL(mas_walk);
4618
mas_rewind_node(struct ma_state * mas)4619 static inline bool mas_rewind_node(struct ma_state *mas)
4620 {
4621 unsigned char slot;
4622
4623 do {
4624 if (mte_is_root(mas->node)) {
4625 slot = mas->offset;
4626 if (!slot)
4627 return false;
4628 } else {
4629 mas_ascend(mas);
4630 slot = mas->offset;
4631 }
4632 } while (!slot);
4633
4634 mas->offset = --slot;
4635 return true;
4636 }
4637
4638 /*
4639 * mas_skip_node() - Internal function. Skip over a node.
4640 * @mas: The maple state.
4641 *
4642 * Return: true if there is another node, false otherwise.
4643 */
mas_skip_node(struct ma_state * mas)4644 static inline bool mas_skip_node(struct ma_state *mas)
4645 {
4646 if (mas_is_err(mas))
4647 return false;
4648
4649 do {
4650 if (mte_is_root(mas->node)) {
4651 if (mas->offset >= mas_data_end(mas)) {
4652 mas_set_err(mas, -EBUSY);
4653 return false;
4654 }
4655 } else {
4656 mas_ascend(mas);
4657 }
4658 } while (mas->offset >= mas_data_end(mas));
4659
4660 mas->offset++;
4661 return true;
4662 }
4663
4664 /*
4665 * mas_awalk() - Allocation walk. Search from low address to high, for a gap of
4666 * @size
4667 * @mas: The maple state
4668 * @size: The size of the gap required
4669 *
4670 * Search between @mas->index and @mas->last for a gap of @size.
4671 */
mas_awalk(struct ma_state * mas,unsigned long size)4672 static inline void mas_awalk(struct ma_state *mas, unsigned long size)
4673 {
4674 struct maple_enode *last = NULL;
4675
4676 /*
4677 * There are 4 options:
4678 * go to child (descend)
4679 * go back to parent (ascend)
4680 * no gap found. (return, error == -EBUSY)
4681 * found the gap. (return)
4682 */
4683 while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) {
4684 if (last == mas->node)
4685 mas_skip_node(mas);
4686 else
4687 last = mas->node;
4688 }
4689 }
4690
4691 /*
4692 * mas_sparse_area() - Internal function. Return upper or lower limit when
4693 * searching for a gap in an empty tree.
4694 * @mas: The maple state
4695 * @min: the minimum range
4696 * @max: The maximum range
4697 * @size: The size of the gap
4698 * @fwd: Searching forward or back
4699 */
mas_sparse_area(struct ma_state * mas,unsigned long min,unsigned long max,unsigned long size,bool fwd)4700 static inline int mas_sparse_area(struct ma_state *mas, unsigned long min,
4701 unsigned long max, unsigned long size, bool fwd)
4702 {
4703 if (!unlikely(mas_is_none(mas)) && min == 0) {
4704 min++;
4705 /*
4706 * At this time, min is increased, we need to recheck whether
4707 * the size is satisfied.
4708 */
4709 if (min > max || max - min + 1 < size)
4710 return -EBUSY;
4711 }
4712 /* mas_is_ptr */
4713
4714 if (fwd) {
4715 mas->index = min;
4716 mas->last = min + size - 1;
4717 } else {
4718 mas->last = max;
4719 mas->index = max - size + 1;
4720 }
4721 return 0;
4722 }
4723
4724 /*
4725 * mas_empty_area() - Get the lowest address within the range that is
4726 * sufficient for the size requested.
4727 * @mas: The maple state
4728 * @min: The lowest value of the range
4729 * @max: The highest value of the range
4730 * @size: The size needed
4731 */
mas_empty_area(struct ma_state * mas,unsigned long min,unsigned long max,unsigned long size)4732 int mas_empty_area(struct ma_state *mas, unsigned long min,
4733 unsigned long max, unsigned long size)
4734 {
4735 unsigned char offset;
4736 unsigned long *pivots;
4737 enum maple_type mt;
4738 struct maple_node *node;
4739
4740 if (min > max)
4741 return -EINVAL;
4742
4743 if (size == 0 || max - min < size - 1)
4744 return -EINVAL;
4745
4746 if (mas_is_start(mas))
4747 mas_start(mas);
4748 else if (mas->offset >= 2)
4749 mas->offset -= 2;
4750 else if (!mas_skip_node(mas))
4751 return -EBUSY;
4752
4753 /* Empty set */
4754 if (mas_is_none(mas) || mas_is_ptr(mas))
4755 return mas_sparse_area(mas, min, max, size, true);
4756
4757 /* The start of the window can only be within these values */
4758 mas->index = min;
4759 mas->last = max;
4760 mas_awalk(mas, size);
4761
4762 if (unlikely(mas_is_err(mas)))
4763 return xa_err(mas->node);
4764
4765 offset = mas->offset;
4766 node = mas_mn(mas);
4767 mt = mte_node_type(mas->node);
4768 pivots = ma_pivots(node, mt);
4769 min = mas_safe_min(mas, pivots, offset);
4770 if (mas->index < min)
4771 mas->index = min;
4772 mas->last = mas->index + size - 1;
4773 mas->end = ma_data_end(node, mt, pivots, mas->max);
4774 return 0;
4775 }
4776 EXPORT_SYMBOL_GPL(mas_empty_area);
4777
4778 /*
4779 * mas_empty_area_rev() - Get the highest address within the range that is
4780 * sufficient for the size requested.
4781 * @mas: The maple state
4782 * @min: The lowest value of the range
4783 * @max: The highest value of the range
4784 * @size: The size needed
4785 */
mas_empty_area_rev(struct ma_state * mas,unsigned long min,unsigned long max,unsigned long size)4786 int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
4787 unsigned long max, unsigned long size)
4788 {
4789 struct maple_enode *last = mas->node;
4790
4791 if (min > max)
4792 return -EINVAL;
4793
4794 if (size == 0 || max - min < size - 1)
4795 return -EINVAL;
4796
4797 if (mas_is_start(mas))
4798 mas_start(mas);
4799 else if ((mas->offset < 2) && (!mas_rewind_node(mas)))
4800 return -EBUSY;
4801
4802 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas)))
4803 return mas_sparse_area(mas, min, max, size, false);
4804 else if (mas->offset >= 2)
4805 mas->offset -= 2;
4806 else
4807 mas->offset = mas_data_end(mas);
4808
4809
4810 /* The start of the window can only be within these values. */
4811 mas->index = min;
4812 mas->last = max;
4813
4814 while (!mas_rev_awalk(mas, size, &min, &max)) {
4815 if (last == mas->node) {
4816 if (!mas_rewind_node(mas))
4817 return -EBUSY;
4818 } else {
4819 last = mas->node;
4820 }
4821 }
4822
4823 if (mas_is_err(mas))
4824 return xa_err(mas->node);
4825
4826 if (unlikely(mas->offset == MAPLE_NODE_SLOTS))
4827 return -EBUSY;
4828
4829 /* Trim the upper limit to the max. */
4830 if (max < mas->last)
4831 mas->last = max;
4832
4833 mas->index = mas->last - size + 1;
4834 mas->end = mas_data_end(mas);
4835 return 0;
4836 }
4837 EXPORT_SYMBOL_GPL(mas_empty_area_rev);
4838
4839 /*
4840 * mte_dead_leaves() - Mark all leaves of a node as dead.
4841 * @enode: the encoded node
4842 * @mt: the maple tree
4843 * @slots: Pointer to the slot array
4844 *
4845 * Must hold the write lock.
4846 *
4847 * Return: The number of leaves marked as dead.
4848 */
4849 static inline
mte_dead_leaves(struct maple_enode * enode,struct maple_tree * mt,void __rcu ** slots)4850 unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt,
4851 void __rcu **slots)
4852 {
4853 struct maple_node *node;
4854 enum maple_type type;
4855 void *entry;
4856 int offset;
4857
4858 for (offset = 0; offset < mt_slot_count(enode); offset++) {
4859 entry = mt_slot(mt, slots, offset);
4860 type = mte_node_type(entry);
4861 node = mte_to_node(entry);
4862 /* Use both node and type to catch LE & BE metadata */
4863 if (!node || !type)
4864 break;
4865
4866 mte_set_node_dead(entry);
4867 node->type = type;
4868 rcu_assign_pointer(slots[offset], node);
4869 }
4870
4871 return offset;
4872 }
4873
4874 /**
4875 * mte_dead_walk() - Walk down a dead tree to just before the leaves
4876 * @enode: The maple encoded node
4877 * @offset: The starting offset
4878 *
4879 * Note: This can only be used from the RCU callback context.
4880 */
mte_dead_walk(struct maple_enode ** enode,unsigned char offset)4881 static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset)
4882 {
4883 struct maple_node *node, *next;
4884 void __rcu **slots = NULL;
4885
4886 next = mte_to_node(*enode);
4887 do {
4888 *enode = ma_enode_ptr(next);
4889 node = mte_to_node(*enode);
4890 slots = ma_slots(node, node->type);
4891 next = rcu_dereference_protected(slots[offset],
4892 lock_is_held(&rcu_callback_map));
4893 offset = 0;
4894 } while (!ma_is_leaf(next->type));
4895
4896 return slots;
4897 }
4898
4899 /**
4900 * mt_free_walk() - Walk & free a tree in the RCU callback context
4901 * @head: The RCU head that's within the node.
4902 *
4903 * Note: This can only be used from the RCU callback context.
4904 */
mt_free_walk(struct rcu_head * head)4905 static void mt_free_walk(struct rcu_head *head)
4906 {
4907 void __rcu **slots;
4908 struct maple_node *node, *start;
4909 struct maple_enode *enode;
4910 unsigned char offset;
4911 enum maple_type type;
4912
4913 node = container_of(head, struct maple_node, rcu);
4914
4915 if (ma_is_leaf(node->type))
4916 goto free_leaf;
4917
4918 start = node;
4919 enode = mt_mk_node(node, node->type);
4920 slots = mte_dead_walk(&enode, 0);
4921 node = mte_to_node(enode);
4922 do {
4923 mt_free_bulk(node->slot_len, slots);
4924 offset = node->parent_slot + 1;
4925 enode = node->piv_parent;
4926 if (mte_to_node(enode) == node)
4927 goto free_leaf;
4928
4929 type = mte_node_type(enode);
4930 slots = ma_slots(mte_to_node(enode), type);
4931 if ((offset < mt_slots[type]) &&
4932 rcu_dereference_protected(slots[offset],
4933 lock_is_held(&rcu_callback_map)))
4934 slots = mte_dead_walk(&enode, offset);
4935 node = mte_to_node(enode);
4936 } while ((node != start) || (node->slot_len < offset));
4937
4938 slots = ma_slots(node, node->type);
4939 mt_free_bulk(node->slot_len, slots);
4940
4941 free_leaf:
4942 kfree(node);
4943 }
4944
mte_destroy_descend(struct maple_enode ** enode,struct maple_tree * mt,struct maple_enode * prev,unsigned char offset)4945 static inline void __rcu **mte_destroy_descend(struct maple_enode **enode,
4946 struct maple_tree *mt, struct maple_enode *prev, unsigned char offset)
4947 {
4948 struct maple_node *node;
4949 struct maple_enode *next = *enode;
4950 void __rcu **slots = NULL;
4951 enum maple_type type;
4952 unsigned char next_offset = 0;
4953
4954 do {
4955 *enode = next;
4956 node = mte_to_node(*enode);
4957 type = mte_node_type(*enode);
4958 slots = ma_slots(node, type);
4959 next = mt_slot_locked(mt, slots, next_offset);
4960 if ((mte_dead_node(next)))
4961 next = mt_slot_locked(mt, slots, ++next_offset);
4962
4963 mte_set_node_dead(*enode);
4964 node->type = type;
4965 node->piv_parent = prev;
4966 node->parent_slot = offset;
4967 offset = next_offset;
4968 next_offset = 0;
4969 prev = *enode;
4970 } while (!mte_is_leaf(next));
4971
4972 return slots;
4973 }
4974
mt_destroy_walk(struct maple_enode * enode,struct maple_tree * mt,bool free)4975 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
4976 bool free)
4977 {
4978 void __rcu **slots;
4979 struct maple_node *node = mte_to_node(enode);
4980 struct maple_enode *start;
4981
4982 if (mte_is_leaf(enode)) {
4983 mte_set_node_dead(enode);
4984 node->type = mte_node_type(enode);
4985 goto free_leaf;
4986 }
4987
4988 start = enode;
4989 slots = mte_destroy_descend(&enode, mt, start, 0);
4990 node = mte_to_node(enode); // Updated in the above call.
4991 do {
4992 enum maple_type type;
4993 unsigned char offset;
4994 struct maple_enode *parent, *tmp;
4995
4996 node->slot_len = mte_dead_leaves(enode, mt, slots);
4997 if (free)
4998 mt_free_bulk(node->slot_len, slots);
4999 offset = node->parent_slot + 1;
5000 enode = node->piv_parent;
5001 if (mte_to_node(enode) == node)
5002 goto free_leaf;
5003
5004 type = mte_node_type(enode);
5005 slots = ma_slots(mte_to_node(enode), type);
5006 if (offset >= mt_slots[type])
5007 goto next;
5008
5009 tmp = mt_slot_locked(mt, slots, offset);
5010 if (mte_node_type(tmp) && mte_to_node(tmp)) {
5011 parent = enode;
5012 enode = tmp;
5013 slots = mte_destroy_descend(&enode, mt, parent, offset);
5014 }
5015 next:
5016 node = mte_to_node(enode);
5017 } while (start != enode);
5018
5019 node = mte_to_node(enode);
5020 node->slot_len = mte_dead_leaves(enode, mt, slots);
5021 if (free)
5022 mt_free_bulk(node->slot_len, slots);
5023
5024 free_leaf:
5025 if (free)
5026 kfree(node);
5027 else
5028 mt_clear_meta(mt, node, node->type);
5029 }
5030
5031 /*
5032 * mte_destroy_walk() - Free a tree or sub-tree.
5033 * @enode: the encoded maple node (maple_enode) to start
5034 * @mt: the tree to free - needed for node types.
5035 *
5036 * Must hold the write lock.
5037 */
mte_destroy_walk(struct maple_enode * enode,struct maple_tree * mt)5038 static inline void mte_destroy_walk(struct maple_enode *enode,
5039 struct maple_tree *mt)
5040 {
5041 struct maple_node *node = mte_to_node(enode);
5042
5043 if (mt_in_rcu(mt)) {
5044 mt_destroy_walk(enode, mt, false);
5045 call_rcu(&node->rcu, mt_free_walk);
5046 } else {
5047 mt_destroy_walk(enode, mt, true);
5048 }
5049 }
5050 /* Interface */
5051
5052 /**
5053 * mas_store() - Store an @entry.
5054 * @mas: The maple state.
5055 * @entry: The entry to store.
5056 *
5057 * The @mas->index and @mas->last is used to set the range for the @entry.
5058 *
5059 * Return: the first entry between mas->index and mas->last or %NULL.
5060 */
mas_store(struct ma_state * mas,void * entry)5061 void *mas_store(struct ma_state *mas, void *entry)
5062 {
5063 MA_WR_STATE(wr_mas, mas, entry);
5064
5065 trace_ma_write(__func__, mas, 0, entry);
5066 #ifdef CONFIG_DEBUG_MAPLE_TREE
5067 if (MAS_WARN_ON(mas, mas->index > mas->last))
5068 pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last,
5069 entry);
5070
5071 if (mas->index > mas->last) {
5072 mas_set_err(mas, -EINVAL);
5073 return NULL;
5074 }
5075
5076 #endif
5077
5078 /*
5079 * Storing is the same operation as insert with the added caveat that it
5080 * can overwrite entries. Although this seems simple enough, one may
5081 * want to examine what happens if a single store operation was to
5082 * overwrite multiple entries within a self-balancing B-Tree.
5083 */
5084 mas_wr_prealloc_setup(&wr_mas);
5085 mas->store_type = mas_wr_store_type(&wr_mas);
5086 if (mas->mas_flags & MA_STATE_PREALLOC) {
5087 mas_wr_store_entry(&wr_mas);
5088 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas));
5089 return wr_mas.content;
5090 }
5091
5092 mas_prealloc_calc(&wr_mas, entry);
5093 if (!mas->node_request)
5094 goto store;
5095
5096 mas_alloc_nodes(mas, GFP_NOWAIT);
5097 if (mas_is_err(mas))
5098 return NULL;
5099
5100 store:
5101 mas_wr_store_entry(&wr_mas);
5102 mas_destroy(mas);
5103 return wr_mas.content;
5104 }
5105 EXPORT_SYMBOL_GPL(mas_store);
5106
5107 /**
5108 * mas_store_gfp() - Store a value into the tree.
5109 * @mas: The maple state
5110 * @entry: The entry to store
5111 * @gfp: The GFP_FLAGS to use for allocations if necessary.
5112 *
5113 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
5114 * be allocated.
5115 */
mas_store_gfp(struct ma_state * mas,void * entry,gfp_t gfp)5116 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
5117 {
5118 unsigned long index = mas->index;
5119 unsigned long last = mas->last;
5120 MA_WR_STATE(wr_mas, mas, entry);
5121 int ret = 0;
5122
5123 retry:
5124 mas_wr_preallocate(&wr_mas, entry);
5125 if (unlikely(mas_nomem(mas, gfp))) {
5126 if (!entry)
5127 __mas_set_range(mas, index, last);
5128 goto retry;
5129 }
5130
5131 if (mas_is_err(mas)) {
5132 ret = xa_err(mas->node);
5133 goto out;
5134 }
5135
5136 mas_wr_store_entry(&wr_mas);
5137 out:
5138 mas_destroy(mas);
5139 return ret;
5140 }
5141 EXPORT_SYMBOL_GPL(mas_store_gfp);
5142
5143 /**
5144 * mas_store_prealloc() - Store a value into the tree using memory
5145 * preallocated in the maple state.
5146 * @mas: The maple state
5147 * @entry: The entry to store.
5148 */
mas_store_prealloc(struct ma_state * mas,void * entry)5149 void mas_store_prealloc(struct ma_state *mas, void *entry)
5150 {
5151 MA_WR_STATE(wr_mas, mas, entry);
5152
5153 if (mas->store_type == wr_store_root) {
5154 mas_wr_prealloc_setup(&wr_mas);
5155 goto store;
5156 }
5157
5158 mas_wr_walk_descend(&wr_mas);
5159 if (mas->store_type != wr_spanning_store) {
5160 /* set wr_mas->content to current slot */
5161 wr_mas.content = mas_slot_locked(mas, wr_mas.slots, mas->offset);
5162 mas_wr_end_piv(&wr_mas);
5163 }
5164
5165 store:
5166 trace_ma_write(__func__, mas, 0, entry);
5167 mas_wr_store_entry(&wr_mas);
5168 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas));
5169 mas_destroy(mas);
5170 }
5171 EXPORT_SYMBOL_GPL(mas_store_prealloc);
5172
5173 /**
5174 * mas_preallocate() - Preallocate enough nodes for a store operation
5175 * @mas: The maple state
5176 * @entry: The entry that will be stored
5177 * @gfp: The GFP_FLAGS to use for allocations.
5178 *
5179 * Return: 0 on success, -ENOMEM if memory could not be allocated.
5180 */
mas_preallocate(struct ma_state * mas,void * entry,gfp_t gfp)5181 int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
5182 {
5183 MA_WR_STATE(wr_mas, mas, entry);
5184
5185 mas_wr_prealloc_setup(&wr_mas);
5186 mas->store_type = mas_wr_store_type(&wr_mas);
5187 mas_prealloc_calc(&wr_mas, entry);
5188 if (!mas->node_request)
5189 goto set_flag;
5190
5191 mas->mas_flags &= ~MA_STATE_PREALLOC;
5192 mas_alloc_nodes(mas, gfp);
5193 if (mas_is_err(mas)) {
5194 int ret = xa_err(mas->node);
5195
5196 mas->node_request = 0;
5197 mas_destroy(mas);
5198 mas_reset(mas);
5199 return ret;
5200 }
5201
5202 set_flag:
5203 mas->mas_flags |= MA_STATE_PREALLOC;
5204 return 0;
5205 }
5206 EXPORT_SYMBOL_GPL(mas_preallocate);
5207
5208 /*
5209 * mas_destroy() - destroy a maple state.
5210 * @mas: The maple state
5211 *
5212 * Upon completion, check the left-most node and rebalance against the node to
5213 * the right if necessary. Frees any allocated nodes associated with this maple
5214 * state.
5215 */
mas_destroy(struct ma_state * mas)5216 void mas_destroy(struct ma_state *mas)
5217 {
5218 mas->mas_flags &= ~MA_STATE_PREALLOC;
5219 mas_empty_nodes(mas);
5220 }
5221 EXPORT_SYMBOL_GPL(mas_destroy);
5222
mas_may_activate(struct ma_state * mas)5223 static void mas_may_activate(struct ma_state *mas)
5224 {
5225 if (!mas->node) {
5226 mas->status = ma_start;
5227 } else if (mas->index > mas->max || mas->index < mas->min) {
5228 mas->status = ma_start;
5229 } else {
5230 mas->status = ma_active;
5231 }
5232 }
5233
mas_next_setup(struct ma_state * mas,unsigned long max,void ** entry)5234 static bool mas_next_setup(struct ma_state *mas, unsigned long max,
5235 void **entry)
5236 {
5237 bool was_none = mas_is_none(mas);
5238
5239 if (unlikely(mas->last >= max)) {
5240 mas->status = ma_overflow;
5241 return true;
5242 }
5243
5244 switch (mas->status) {
5245 case ma_active:
5246 return false;
5247 case ma_none:
5248 fallthrough;
5249 case ma_pause:
5250 mas->status = ma_start;
5251 fallthrough;
5252 case ma_start:
5253 mas_walk(mas); /* Retries on dead nodes handled by mas_walk */
5254 break;
5255 case ma_overflow:
5256 /* Overflowed before, but the max changed */
5257 mas_may_activate(mas);
5258 break;
5259 case ma_underflow:
5260 /* The user expects the mas to be one before where it is */
5261 mas_may_activate(mas);
5262 *entry = mas_walk(mas);
5263 if (*entry)
5264 return true;
5265 break;
5266 case ma_root:
5267 break;
5268 case ma_error:
5269 return true;
5270 }
5271
5272 if (likely(mas_is_active(mas))) /* Fast path */
5273 return false;
5274
5275 if (mas_is_ptr(mas)) {
5276 *entry = NULL;
5277 if (was_none && mas->index == 0) {
5278 mas->index = mas->last = 0;
5279 return true;
5280 }
5281 mas->index = 1;
5282 mas->last = ULONG_MAX;
5283 mas->status = ma_none;
5284 return true;
5285 }
5286
5287 if (mas_is_none(mas))
5288 return true;
5289
5290 return false;
5291 }
5292
5293 /**
5294 * mas_next() - Get the next entry.
5295 * @mas: The maple state
5296 * @max: The maximum index to check.
5297 *
5298 * Returns the next entry after @mas->index.
5299 * Must hold rcu_read_lock or the write lock.
5300 * Can return the zero entry.
5301 *
5302 * Return: The next entry or %NULL
5303 */
mas_next(struct ma_state * mas,unsigned long max)5304 void *mas_next(struct ma_state *mas, unsigned long max)
5305 {
5306 void *entry = NULL;
5307
5308 if (mas_next_setup(mas, max, &entry))
5309 return entry;
5310
5311 /* Retries on dead nodes handled by mas_next_slot */
5312 return mas_next_slot(mas, max, false);
5313 }
5314 EXPORT_SYMBOL_GPL(mas_next);
5315
5316 /**
5317 * mas_next_range() - Advance the maple state to the next range
5318 * @mas: The maple state
5319 * @max: The maximum index to check.
5320 *
5321 * Sets @mas->index and @mas->last to the range.
5322 * Must hold rcu_read_lock or the write lock.
5323 * Can return the zero entry.
5324 *
5325 * Return: The next entry or %NULL
5326 */
mas_next_range(struct ma_state * mas,unsigned long max)5327 void *mas_next_range(struct ma_state *mas, unsigned long max)
5328 {
5329 void *entry = NULL;
5330
5331 if (mas_next_setup(mas, max, &entry))
5332 return entry;
5333
5334 /* Retries on dead nodes handled by mas_next_slot */
5335 return mas_next_slot(mas, max, true);
5336 }
5337 EXPORT_SYMBOL_GPL(mas_next_range);
5338
5339 /**
5340 * mt_next() - get the next value in the maple tree
5341 * @mt: The maple tree
5342 * @index: The start index
5343 * @max: The maximum index to check
5344 *
5345 * Takes RCU read lock internally to protect the search, which does not
5346 * protect the returned pointer after dropping RCU read lock.
5347 * See also: Documentation/core-api/maple_tree.rst
5348 *
5349 * Return: The entry higher than @index or %NULL if nothing is found.
5350 */
mt_next(struct maple_tree * mt,unsigned long index,unsigned long max)5351 void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max)
5352 {
5353 void *entry = NULL;
5354 MA_STATE(mas, mt, index, index);
5355
5356 rcu_read_lock();
5357 entry = mas_next(&mas, max);
5358 rcu_read_unlock();
5359 return entry;
5360 }
5361 EXPORT_SYMBOL_GPL(mt_next);
5362
mas_prev_setup(struct ma_state * mas,unsigned long min,void ** entry)5363 static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry)
5364 {
5365 if (unlikely(mas->index <= min)) {
5366 mas->status = ma_underflow;
5367 return true;
5368 }
5369
5370 switch (mas->status) {
5371 case ma_active:
5372 return false;
5373 case ma_start:
5374 break;
5375 case ma_none:
5376 fallthrough;
5377 case ma_pause:
5378 mas->status = ma_start;
5379 break;
5380 case ma_underflow:
5381 /* underflowed before but the min changed */
5382 mas_may_activate(mas);
5383 break;
5384 case ma_overflow:
5385 /* User expects mas to be one after where it is */
5386 mas_may_activate(mas);
5387 *entry = mas_walk(mas);
5388 if (*entry)
5389 return true;
5390 break;
5391 case ma_root:
5392 break;
5393 case ma_error:
5394 return true;
5395 }
5396
5397 if (mas_is_start(mas))
5398 mas_walk(mas);
5399
5400 if (unlikely(mas_is_ptr(mas))) {
5401 if (!mas->index) {
5402 mas->status = ma_none;
5403 return true;
5404 }
5405 mas->index = mas->last = 0;
5406 *entry = mas_root(mas);
5407 return true;
5408 }
5409
5410 if (mas_is_none(mas)) {
5411 if (mas->index) {
5412 /* Walked to out-of-range pointer? */
5413 mas->index = mas->last = 0;
5414 mas->status = ma_root;
5415 *entry = mas_root(mas);
5416 return true;
5417 }
5418 return true;
5419 }
5420
5421 return false;
5422 }
5423
5424 /**
5425 * mas_prev() - Get the previous entry
5426 * @mas: The maple state
5427 * @min: The minimum value to check.
5428 *
5429 * Must hold rcu_read_lock or the write lock.
5430 * Will reset mas to ma_start if the status is ma_none. Will stop on not
5431 * searchable nodes.
5432 *
5433 * Return: the previous value or %NULL.
5434 */
mas_prev(struct ma_state * mas,unsigned long min)5435 void *mas_prev(struct ma_state *mas, unsigned long min)
5436 {
5437 void *entry = NULL;
5438
5439 if (mas_prev_setup(mas, min, &entry))
5440 return entry;
5441
5442 return mas_prev_slot(mas, min, false);
5443 }
5444 EXPORT_SYMBOL_GPL(mas_prev);
5445
5446 /**
5447 * mas_prev_range() - Advance to the previous range
5448 * @mas: The maple state
5449 * @min: The minimum value to check.
5450 *
5451 * Sets @mas->index and @mas->last to the range.
5452 * Must hold rcu_read_lock or the write lock.
5453 * Will reset mas to ma_start if the node is ma_none. Will stop on not
5454 * searchable nodes.
5455 *
5456 * Return: the previous value or %NULL.
5457 */
mas_prev_range(struct ma_state * mas,unsigned long min)5458 void *mas_prev_range(struct ma_state *mas, unsigned long min)
5459 {
5460 void *entry = NULL;
5461
5462 if (mas_prev_setup(mas, min, &entry))
5463 return entry;
5464
5465 return mas_prev_slot(mas, min, true);
5466 }
5467 EXPORT_SYMBOL_GPL(mas_prev_range);
5468
5469 /**
5470 * mt_prev() - get the previous value in the maple tree
5471 * @mt: The maple tree
5472 * @index: The start index
5473 * @min: The minimum index to check
5474 *
5475 * Takes RCU read lock internally to protect the search, which does not
5476 * protect the returned pointer after dropping RCU read lock.
5477 * See also: Documentation/core-api/maple_tree.rst
5478 *
5479 * Return: The entry before @index or %NULL if nothing is found.
5480 */
mt_prev(struct maple_tree * mt,unsigned long index,unsigned long min)5481 void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min)
5482 {
5483 void *entry = NULL;
5484 MA_STATE(mas, mt, index, index);
5485
5486 rcu_read_lock();
5487 entry = mas_prev(&mas, min);
5488 rcu_read_unlock();
5489 return entry;
5490 }
5491 EXPORT_SYMBOL_GPL(mt_prev);
5492
5493 /**
5494 * mas_pause() - Pause a mas_find/mas_for_each to drop the lock.
5495 * @mas: The maple state to pause
5496 *
5497 * Some users need to pause a walk and drop the lock they're holding in
5498 * order to yield to a higher priority thread or carry out an operation
5499 * on an entry. Those users should call this function before they drop
5500 * the lock. It resets the @mas to be suitable for the next iteration
5501 * of the loop after the user has reacquired the lock. If most entries
5502 * found during a walk require you to call mas_pause(), the mt_for_each()
5503 * iterator may be more appropriate.
5504 *
5505 */
mas_pause(struct ma_state * mas)5506 void mas_pause(struct ma_state *mas)
5507 {
5508 mas->status = ma_pause;
5509 mas->node = NULL;
5510 }
5511 EXPORT_SYMBOL_GPL(mas_pause);
5512
5513 /**
5514 * mas_find_setup() - Internal function to set up mas_find*().
5515 * @mas: The maple state
5516 * @max: The maximum index
5517 * @entry: Pointer to the entry
5518 *
5519 * Returns: True if entry is the answer, false otherwise.
5520 */
mas_find_setup(struct ma_state * mas,unsigned long max,void ** entry)5521 static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long max, void **entry)
5522 {
5523 switch (mas->status) {
5524 case ma_active:
5525 if (mas->last < max)
5526 return false;
5527 return true;
5528 case ma_start:
5529 break;
5530 case ma_pause:
5531 if (unlikely(mas->last >= max))
5532 return true;
5533
5534 mas->index = ++mas->last;
5535 mas->status = ma_start;
5536 break;
5537 case ma_none:
5538 if (unlikely(mas->last >= max))
5539 return true;
5540
5541 mas->index = mas->last;
5542 mas->status = ma_start;
5543 break;
5544 case ma_underflow:
5545 /* mas is pointing at entry before unable to go lower */
5546 if (unlikely(mas->index >= max)) {
5547 mas->status = ma_overflow;
5548 return true;
5549 }
5550
5551 mas_may_activate(mas);
5552 *entry = mas_walk(mas);
5553 if (*entry)
5554 return true;
5555 break;
5556 case ma_overflow:
5557 if (unlikely(mas->last >= max))
5558 return true;
5559
5560 mas_may_activate(mas);
5561 *entry = mas_walk(mas);
5562 if (*entry)
5563 return true;
5564 break;
5565 case ma_root:
5566 break;
5567 case ma_error:
5568 return true;
5569 }
5570
5571 if (mas_is_start(mas)) {
5572 /* First run or continue */
5573 if (mas->index > max)
5574 return true;
5575
5576 *entry = mas_walk(mas);
5577 if (*entry)
5578 return true;
5579
5580 }
5581
5582 if (unlikely(mas_is_ptr(mas)))
5583 goto ptr_out_of_range;
5584
5585 if (unlikely(mas_is_none(mas)))
5586 return true;
5587
5588 if (mas->index == max)
5589 return true;
5590
5591 return false;
5592
5593 ptr_out_of_range:
5594 mas->status = ma_none;
5595 mas->index = 1;
5596 mas->last = ULONG_MAX;
5597 return true;
5598 }
5599
5600 /**
5601 * mas_find() - On the first call, find the entry at or after mas->index up to
5602 * %max. Otherwise, find the entry after mas->index.
5603 * @mas: The maple state
5604 * @max: The maximum value to check.
5605 *
5606 * Must hold rcu_read_lock or the write lock.
5607 * If an entry exists, last and index are updated accordingly.
5608 * May set @mas->status to ma_overflow.
5609 *
5610 * Return: The entry or %NULL.
5611 */
mas_find(struct ma_state * mas,unsigned long max)5612 void *mas_find(struct ma_state *mas, unsigned long max)
5613 {
5614 void *entry = NULL;
5615
5616 if (mas_find_setup(mas, max, &entry))
5617 return entry;
5618
5619 /* Retries on dead nodes handled by mas_next_slot */
5620 entry = mas_next_slot(mas, max, false);
5621 /* Ignore overflow */
5622 mas->status = ma_active;
5623 return entry;
5624 }
5625 EXPORT_SYMBOL_GPL(mas_find);
5626
5627 /**
5628 * mas_find_range() - On the first call, find the entry at or after
5629 * mas->index up to %max. Otherwise, advance to the next slot mas->index.
5630 * @mas: The maple state
5631 * @max: The maximum value to check.
5632 *
5633 * Must hold rcu_read_lock or the write lock.
5634 * If an entry exists, last and index are updated accordingly.
5635 * May set @mas->status to ma_overflow.
5636 *
5637 * Return: The entry or %NULL.
5638 */
mas_find_range(struct ma_state * mas,unsigned long max)5639 void *mas_find_range(struct ma_state *mas, unsigned long max)
5640 {
5641 void *entry = NULL;
5642
5643 if (mas_find_setup(mas, max, &entry))
5644 return entry;
5645
5646 /* Retries on dead nodes handled by mas_next_slot */
5647 return mas_next_slot(mas, max, true);
5648 }
5649 EXPORT_SYMBOL_GPL(mas_find_range);
5650
5651 /**
5652 * mas_find_rev_setup() - Internal function to set up mas_find_*_rev()
5653 * @mas: The maple state
5654 * @min: The minimum index
5655 * @entry: Pointer to the entry
5656 *
5657 * Returns: True if entry is the answer, false otherwise.
5658 */
mas_find_rev_setup(struct ma_state * mas,unsigned long min,void ** entry)5659 static bool mas_find_rev_setup(struct ma_state *mas, unsigned long min,
5660 void **entry)
5661 {
5662
5663 switch (mas->status) {
5664 case ma_active:
5665 goto active;
5666 case ma_start:
5667 break;
5668 case ma_pause:
5669 if (unlikely(mas->index <= min)) {
5670 mas->status = ma_underflow;
5671 return true;
5672 }
5673 mas->last = --mas->index;
5674 mas->status = ma_start;
5675 break;
5676 case ma_none:
5677 if (mas->index <= min)
5678 goto none;
5679
5680 mas->last = mas->index;
5681 mas->status = ma_start;
5682 break;
5683 case ma_overflow: /* user expects the mas to be one after where it is */
5684 if (unlikely(mas->index <= min)) {
5685 mas->status = ma_underflow;
5686 return true;
5687 }
5688
5689 mas->status = ma_active;
5690 break;
5691 case ma_underflow: /* user expects the mas to be one before where it is */
5692 if (unlikely(mas->index <= min))
5693 return true;
5694
5695 mas->status = ma_active;
5696 break;
5697 case ma_root:
5698 break;
5699 case ma_error:
5700 return true;
5701 }
5702
5703 if (mas_is_start(mas)) {
5704 /* First run or continue */
5705 if (mas->index < min)
5706 return true;
5707
5708 *entry = mas_walk(mas);
5709 if (*entry)
5710 return true;
5711 }
5712
5713 if (unlikely(mas_is_ptr(mas)))
5714 goto none;
5715
5716 if (unlikely(mas_is_none(mas))) {
5717 /*
5718 * Walked to the location, and there was nothing so the previous
5719 * location is 0.
5720 */
5721 mas->last = mas->index = 0;
5722 mas->status = ma_root;
5723 *entry = mas_root(mas);
5724 return true;
5725 }
5726
5727 active:
5728 if (mas->index < min)
5729 return true;
5730
5731 return false;
5732
5733 none:
5734 mas->status = ma_none;
5735 return true;
5736 }
5737
5738 /**
5739 * mas_find_rev: On the first call, find the first non-null entry at or below
5740 * mas->index down to %min. Otherwise find the first non-null entry below
5741 * mas->index down to %min.
5742 * @mas: The maple state
5743 * @min: The minimum value to check.
5744 *
5745 * Must hold rcu_read_lock or the write lock.
5746 * If an entry exists, last and index are updated accordingly.
5747 * May set @mas->status to ma_underflow.
5748 *
5749 * Return: The entry or %NULL.
5750 */
mas_find_rev(struct ma_state * mas,unsigned long min)5751 void *mas_find_rev(struct ma_state *mas, unsigned long min)
5752 {
5753 void *entry = NULL;
5754
5755 if (mas_find_rev_setup(mas, min, &entry))
5756 return entry;
5757
5758 /* Retries on dead nodes handled by mas_prev_slot */
5759 return mas_prev_slot(mas, min, false);
5760
5761 }
5762 EXPORT_SYMBOL_GPL(mas_find_rev);
5763
5764 /**
5765 * mas_find_range_rev: On the first call, find the first non-null entry at or
5766 * below mas->index down to %min. Otherwise advance to the previous slot after
5767 * mas->index down to %min.
5768 * @mas: The maple state
5769 * @min: The minimum value to check.
5770 *
5771 * Must hold rcu_read_lock or the write lock.
5772 * If an entry exists, last and index are updated accordingly.
5773 * May set @mas->status to ma_underflow.
5774 *
5775 * Return: The entry or %NULL.
5776 */
mas_find_range_rev(struct ma_state * mas,unsigned long min)5777 void *mas_find_range_rev(struct ma_state *mas, unsigned long min)
5778 {
5779 void *entry = NULL;
5780
5781 if (mas_find_rev_setup(mas, min, &entry))
5782 return entry;
5783
5784 /* Retries on dead nodes handled by mas_prev_slot */
5785 return mas_prev_slot(mas, min, true);
5786 }
5787 EXPORT_SYMBOL_GPL(mas_find_range_rev);
5788
5789 /**
5790 * mas_erase() - Find the range in which index resides and erase the entire
5791 * range.
5792 * @mas: The maple state
5793 *
5794 * Must hold the write lock.
5795 * Searches for @mas->index, sets @mas->index and @mas->last to the range and
5796 * erases that range.
5797 *
5798 * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated.
5799 */
mas_erase(struct ma_state * mas)5800 void *mas_erase(struct ma_state *mas)
5801 {
5802 void *entry;
5803 unsigned long index = mas->index;
5804 MA_WR_STATE(wr_mas, mas, NULL);
5805
5806 if (!mas_is_active(mas) || !mas_is_start(mas))
5807 mas->status = ma_start;
5808
5809 write_retry:
5810 entry = mas_state_walk(mas);
5811 if (!entry)
5812 return NULL;
5813
5814 /* Must reset to ensure spanning writes of last slot are detected */
5815 mas_reset(mas);
5816 mas_wr_preallocate(&wr_mas, NULL);
5817 if (mas_nomem(mas, GFP_KERNEL)) {
5818 /* in case the range of entry changed when unlocked */
5819 mas->index = mas->last = index;
5820 goto write_retry;
5821 }
5822
5823 if (mas_is_err(mas))
5824 goto out;
5825
5826 mas_wr_store_entry(&wr_mas);
5827 out:
5828 mas_destroy(mas);
5829 return entry;
5830 }
5831 EXPORT_SYMBOL_GPL(mas_erase);
5832
5833 /**
5834 * mas_nomem() - Check if there was an error allocating and do the allocation
5835 * if necessary If there are allocations, then free them.
5836 * @mas: The maple state
5837 * @gfp: The GFP_FLAGS to use for allocations
5838 * Return: true on allocation, false otherwise.
5839 */
mas_nomem(struct ma_state * mas,gfp_t gfp)5840 bool mas_nomem(struct ma_state *mas, gfp_t gfp)
5841 __must_hold(mas->tree->ma_lock)
5842 {
5843 if (likely(mas->node != MA_ERROR(-ENOMEM)))
5844 return false;
5845
5846 if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) {
5847 mtree_unlock(mas->tree);
5848 mas_alloc_nodes(mas, gfp);
5849 mtree_lock(mas->tree);
5850 } else {
5851 mas_alloc_nodes(mas, gfp);
5852 }
5853
5854 if (!mas->sheaf && !mas->alloc)
5855 return false;
5856
5857 mas->status = ma_start;
5858 return true;
5859 }
5860
maple_tree_init(void)5861 void __init maple_tree_init(void)
5862 {
5863 struct kmem_cache_args args = {
5864 .align = sizeof(struct maple_node),
5865 .sheaf_capacity = 32,
5866 };
5867
5868 maple_node_cache = kmem_cache_create("maple_node",
5869 sizeof(struct maple_node), &args,
5870 SLAB_PANIC);
5871 }
5872
5873 /**
5874 * mtree_load() - Load a value stored in a maple tree
5875 * @mt: The maple tree
5876 * @index: The index to load
5877 *
5878 * Return: the entry or %NULL
5879 */
mtree_load(struct maple_tree * mt,unsigned long index)5880 void *mtree_load(struct maple_tree *mt, unsigned long index)
5881 {
5882 MA_STATE(mas, mt, index, index);
5883 void *entry;
5884
5885 trace_ma_read(__func__, &mas);
5886 rcu_read_lock();
5887 retry:
5888 entry = mas_start(&mas);
5889 if (unlikely(mas_is_none(&mas)))
5890 goto unlock;
5891
5892 if (unlikely(mas_is_ptr(&mas))) {
5893 if (index)
5894 entry = NULL;
5895
5896 goto unlock;
5897 }
5898
5899 entry = mtree_lookup_walk(&mas);
5900 if (!entry && unlikely(mas_is_start(&mas)))
5901 goto retry;
5902 unlock:
5903 rcu_read_unlock();
5904 if (xa_is_zero(entry))
5905 return NULL;
5906
5907 return entry;
5908 }
5909 EXPORT_SYMBOL(mtree_load);
5910
5911 /**
5912 * mtree_store_range() - Store an entry at a given range.
5913 * @mt: The maple tree
5914 * @index: The start of the range
5915 * @last: The end of the range
5916 * @entry: The entry to store
5917 * @gfp: The GFP_FLAGS to use for allocations
5918 *
5919 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
5920 * be allocated.
5921 */
mtree_store_range(struct maple_tree * mt,unsigned long index,unsigned long last,void * entry,gfp_t gfp)5922 int mtree_store_range(struct maple_tree *mt, unsigned long index,
5923 unsigned long last, void *entry, gfp_t gfp)
5924 {
5925 MA_STATE(mas, mt, index, last);
5926 int ret = 0;
5927
5928 trace_ma_write(__func__, &mas, 0, entry);
5929 if (WARN_ON_ONCE(xa_is_advanced(entry)))
5930 return -EINVAL;
5931
5932 if (index > last)
5933 return -EINVAL;
5934
5935 mtree_lock(mt);
5936 ret = mas_store_gfp(&mas, entry, gfp);
5937 mtree_unlock(mt);
5938
5939 return ret;
5940 }
5941 EXPORT_SYMBOL(mtree_store_range);
5942
5943 /**
5944 * mtree_store() - Store an entry at a given index.
5945 * @mt: The maple tree
5946 * @index: The index to store the value
5947 * @entry: The entry to store
5948 * @gfp: The GFP_FLAGS to use for allocations
5949 *
5950 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
5951 * be allocated.
5952 */
mtree_store(struct maple_tree * mt,unsigned long index,void * entry,gfp_t gfp)5953 int mtree_store(struct maple_tree *mt, unsigned long index, void *entry,
5954 gfp_t gfp)
5955 {
5956 return mtree_store_range(mt, index, index, entry, gfp);
5957 }
5958 EXPORT_SYMBOL(mtree_store);
5959
5960 /**
5961 * mtree_insert_range() - Insert an entry at a given range if there is no value.
5962 * @mt: The maple tree
5963 * @first: The start of the range
5964 * @last: The end of the range
5965 * @entry: The entry to store
5966 * @gfp: The GFP_FLAGS to use for allocations.
5967 *
5968 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
5969 * request, -ENOMEM if memory could not be allocated.
5970 */
mtree_insert_range(struct maple_tree * mt,unsigned long first,unsigned long last,void * entry,gfp_t gfp)5971 int mtree_insert_range(struct maple_tree *mt, unsigned long first,
5972 unsigned long last, void *entry, gfp_t gfp)
5973 {
5974 MA_STATE(ms, mt, first, last);
5975 int ret = 0;
5976
5977 if (WARN_ON_ONCE(xa_is_advanced(entry)))
5978 return -EINVAL;
5979
5980 if (first > last)
5981 return -EINVAL;
5982
5983 mtree_lock(mt);
5984 retry:
5985 mas_insert(&ms, entry);
5986 if (mas_nomem(&ms, gfp))
5987 goto retry;
5988
5989 mtree_unlock(mt);
5990 if (mas_is_err(&ms))
5991 ret = xa_err(ms.node);
5992
5993 mas_destroy(&ms);
5994 return ret;
5995 }
5996 EXPORT_SYMBOL(mtree_insert_range);
5997
5998 /**
5999 * mtree_insert() - Insert an entry at a given index if there is no value.
6000 * @mt: The maple tree
6001 * @index : The index to store the value
6002 * @entry: The entry to store
6003 * @gfp: The GFP_FLAGS to use for allocations.
6004 *
6005 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
6006 * request, -ENOMEM if memory could not be allocated.
6007 */
mtree_insert(struct maple_tree * mt,unsigned long index,void * entry,gfp_t gfp)6008 int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry,
6009 gfp_t gfp)
6010 {
6011 return mtree_insert_range(mt, index, index, entry, gfp);
6012 }
6013 EXPORT_SYMBOL(mtree_insert);
6014
mtree_alloc_range(struct maple_tree * mt,unsigned long * startp,void * entry,unsigned long size,unsigned long min,unsigned long max,gfp_t gfp)6015 int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp,
6016 void *entry, unsigned long size, unsigned long min,
6017 unsigned long max, gfp_t gfp)
6018 {
6019 int ret = 0;
6020
6021 MA_STATE(mas, mt, 0, 0);
6022 if (!mt_is_alloc(mt))
6023 return -EINVAL;
6024
6025 if (WARN_ON_ONCE(mt_is_reserved(entry)))
6026 return -EINVAL;
6027
6028 mtree_lock(mt);
6029 retry:
6030 ret = mas_empty_area(&mas, min, max, size);
6031 if (ret)
6032 goto unlock;
6033
6034 mas_insert(&mas, entry);
6035 /*
6036 * mas_nomem() may release the lock, causing the allocated area
6037 * to be unavailable, so try to allocate a free area again.
6038 */
6039 if (mas_nomem(&mas, gfp))
6040 goto retry;
6041
6042 if (mas_is_err(&mas))
6043 ret = xa_err(mas.node);
6044 else
6045 *startp = mas.index;
6046
6047 unlock:
6048 mtree_unlock(mt);
6049 mas_destroy(&mas);
6050 return ret;
6051 }
6052 EXPORT_SYMBOL(mtree_alloc_range);
6053
6054 /**
6055 * mtree_alloc_cyclic() - Find somewhere to store this entry in the tree.
6056 * @mt: The maple tree.
6057 * @startp: Pointer to ID.
6058 * @range_lo: Lower bound of range to search.
6059 * @range_hi: Upper bound of range to search.
6060 * @entry: The entry to store.
6061 * @next: Pointer to next ID to allocate.
6062 * @gfp: The GFP_FLAGS to use for allocations.
6063 *
6064 * Finds an empty entry in @mt after @next, stores the new index into
6065 * the @id pointer, stores the entry at that index, then updates @next.
6066 *
6067 * @mt must be initialized with the MT_FLAGS_ALLOC_RANGE flag.
6068 *
6069 * Context: Any context. Takes and releases the mt.lock. May sleep if
6070 * the @gfp flags permit.
6071 *
6072 * Return: 0 if the allocation succeeded without wrapping, 1 if the
6073 * allocation succeeded after wrapping, -ENOMEM if memory could not be
6074 * allocated, -EINVAL if @mt cannot be used, or -EBUSY if there are no
6075 * free entries.
6076 */
mtree_alloc_cyclic(struct maple_tree * mt,unsigned long * startp,void * entry,unsigned long range_lo,unsigned long range_hi,unsigned long * next,gfp_t gfp)6077 int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp,
6078 void *entry, unsigned long range_lo, unsigned long range_hi,
6079 unsigned long *next, gfp_t gfp)
6080 {
6081 int ret;
6082
6083 MA_STATE(mas, mt, 0, 0);
6084
6085 if (!mt_is_alloc(mt))
6086 return -EINVAL;
6087 if (WARN_ON_ONCE(mt_is_reserved(entry)))
6088 return -EINVAL;
6089 mtree_lock(mt);
6090 ret = mas_alloc_cyclic(&mas, startp, entry, range_lo, range_hi,
6091 next, gfp);
6092 mtree_unlock(mt);
6093 return ret;
6094 }
6095 EXPORT_SYMBOL(mtree_alloc_cyclic);
6096
mtree_alloc_rrange(struct maple_tree * mt,unsigned long * startp,void * entry,unsigned long size,unsigned long min,unsigned long max,gfp_t gfp)6097 int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
6098 void *entry, unsigned long size, unsigned long min,
6099 unsigned long max, gfp_t gfp)
6100 {
6101 int ret = 0;
6102
6103 MA_STATE(mas, mt, 0, 0);
6104 if (!mt_is_alloc(mt))
6105 return -EINVAL;
6106
6107 if (WARN_ON_ONCE(mt_is_reserved(entry)))
6108 return -EINVAL;
6109
6110 mtree_lock(mt);
6111 retry:
6112 ret = mas_empty_area_rev(&mas, min, max, size);
6113 if (ret)
6114 goto unlock;
6115
6116 mas_insert(&mas, entry);
6117 /*
6118 * mas_nomem() may release the lock, causing the allocated area
6119 * to be unavailable, so try to allocate a free area again.
6120 */
6121 if (mas_nomem(&mas, gfp))
6122 goto retry;
6123
6124 if (mas_is_err(&mas))
6125 ret = xa_err(mas.node);
6126 else
6127 *startp = mas.index;
6128
6129 unlock:
6130 mtree_unlock(mt);
6131 mas_destroy(&mas);
6132 return ret;
6133 }
6134 EXPORT_SYMBOL(mtree_alloc_rrange);
6135
6136 /**
6137 * mtree_erase() - Find an index and erase the entire range.
6138 * @mt: The maple tree
6139 * @index: The index to erase
6140 *
6141 * Erasing is the same as a walk to an entry then a store of a NULL to that
6142 * ENTIRE range. In fact, it is implemented as such using the advanced API.
6143 *
6144 * Return: The entry stored at the @index or %NULL
6145 */
mtree_erase(struct maple_tree * mt,unsigned long index)6146 void *mtree_erase(struct maple_tree *mt, unsigned long index)
6147 {
6148 void *entry = NULL;
6149
6150 MA_STATE(mas, mt, index, index);
6151 trace_ma_op(__func__, &mas);
6152
6153 mtree_lock(mt);
6154 entry = mas_erase(&mas);
6155 mtree_unlock(mt);
6156
6157 return entry;
6158 }
6159 EXPORT_SYMBOL(mtree_erase);
6160
6161 /*
6162 * mas_dup_free() - Free an incomplete duplication of a tree.
6163 * @mas: The maple state of a incomplete tree.
6164 *
6165 * The parameter @mas->node passed in indicates that the allocation failed on
6166 * this node. This function frees all nodes starting from @mas->node in the
6167 * reverse order of mas_dup_build(). There is no need to hold the source tree
6168 * lock at this time.
6169 */
mas_dup_free(struct ma_state * mas)6170 static void mas_dup_free(struct ma_state *mas)
6171 {
6172 struct maple_node *node;
6173 enum maple_type type;
6174 void __rcu **slots;
6175 unsigned char count, i;
6176
6177 /* Maybe the first node allocation failed. */
6178 if (mas_is_none(mas))
6179 return;
6180
6181 while (!mte_is_root(mas->node)) {
6182 mas_ascend(mas);
6183 if (mas->offset) {
6184 mas->offset--;
6185 do {
6186 mas_descend(mas);
6187 mas->offset = mas_data_end(mas);
6188 } while (!mte_is_leaf(mas->node));
6189
6190 mas_ascend(mas);
6191 }
6192
6193 node = mte_to_node(mas->node);
6194 type = mte_node_type(mas->node);
6195 slots = ma_slots(node, type);
6196 count = mas_data_end(mas) + 1;
6197 for (i = 0; i < count; i++)
6198 ((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK;
6199 mt_free_bulk(count, slots);
6200 }
6201
6202 node = mte_to_node(mas->node);
6203 kfree(node);
6204 }
6205
6206 /*
6207 * mas_copy_node() - Copy a maple node and replace the parent.
6208 * @mas: The maple state of source tree.
6209 * @new_mas: The maple state of new tree.
6210 * @parent: The parent of the new node.
6211 *
6212 * Copy @mas->node to @new_mas->node, set @parent to be the parent of
6213 * @new_mas->node. If memory allocation fails, @mas is set to -ENOMEM.
6214 */
mas_copy_node(struct ma_state * mas,struct ma_state * new_mas,struct maple_pnode * parent)6215 static inline void mas_copy_node(struct ma_state *mas, struct ma_state *new_mas,
6216 struct maple_pnode *parent)
6217 {
6218 struct maple_node *node = mte_to_node(mas->node);
6219 struct maple_node *new_node = mte_to_node(new_mas->node);
6220 unsigned long val;
6221
6222 /* Copy the node completely. */
6223 memcpy(new_node, node, sizeof(struct maple_node));
6224 /* Update the parent node pointer. */
6225 val = (unsigned long)node->parent & MAPLE_NODE_MASK;
6226 new_node->parent = ma_parent_ptr(val | (unsigned long)parent);
6227 }
6228
6229 /*
6230 * mas_dup_alloc() - Allocate child nodes for a maple node.
6231 * @mas: The maple state of source tree.
6232 * @new_mas: The maple state of new tree.
6233 * @gfp: The GFP_FLAGS to use for allocations.
6234 *
6235 * This function allocates child nodes for @new_mas->node during the duplication
6236 * process. If memory allocation fails, @mas is set to -ENOMEM.
6237 */
mas_dup_alloc(struct ma_state * mas,struct ma_state * new_mas,gfp_t gfp)6238 static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas,
6239 gfp_t gfp)
6240 {
6241 struct maple_node *node = mte_to_node(mas->node);
6242 struct maple_node *new_node = mte_to_node(new_mas->node);
6243 enum maple_type type;
6244 unsigned char count, i;
6245 void __rcu **slots;
6246 void __rcu **new_slots;
6247 unsigned long val;
6248
6249 /* Allocate memory for child nodes. */
6250 type = mte_node_type(mas->node);
6251 new_slots = ma_slots(new_node, type);
6252 count = mas->node_request = mas_data_end(mas) + 1;
6253 mas_alloc_nodes(mas, gfp);
6254 if (unlikely(mas_is_err(mas)))
6255 return;
6256
6257 slots = ma_slots(node, type);
6258 for (i = 0; i < count; i++) {
6259 val = (unsigned long)mt_slot_locked(mas->tree, slots, i);
6260 val &= MAPLE_NODE_MASK;
6261 new_slots[i] = ma_mnode_ptr((unsigned long)mas_pop_node(mas) |
6262 val);
6263 }
6264 }
6265
6266 /*
6267 * mas_dup_build() - Build a new maple tree from a source tree
6268 * @mas: The maple state of source tree, need to be in MAS_START state.
6269 * @new_mas: The maple state of new tree, need to be in MAS_START state.
6270 * @gfp: The GFP_FLAGS to use for allocations.
6271 *
6272 * This function builds a new tree in DFS preorder. If the memory allocation
6273 * fails, the error code -ENOMEM will be set in @mas, and @new_mas points to the
6274 * last node. mas_dup_free() will free the incomplete duplication of a tree.
6275 *
6276 * Note that the attributes of the two trees need to be exactly the same, and the
6277 * new tree needs to be empty, otherwise -EINVAL will be set in @mas.
6278 */
mas_dup_build(struct ma_state * mas,struct ma_state * new_mas,gfp_t gfp)6279 static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas,
6280 gfp_t gfp)
6281 {
6282 struct maple_node *node;
6283 struct maple_pnode *parent = NULL;
6284 struct maple_enode *root;
6285 enum maple_type type;
6286
6287 if (unlikely(mt_attr(mas->tree) != mt_attr(new_mas->tree)) ||
6288 unlikely(!mtree_empty(new_mas->tree))) {
6289 mas_set_err(mas, -EINVAL);
6290 return;
6291 }
6292
6293 root = mas_start(mas);
6294 if (mas_is_ptr(mas) || mas_is_none(mas))
6295 goto set_new_tree;
6296
6297 node = mt_alloc_one(gfp);
6298 if (!node) {
6299 new_mas->status = ma_none;
6300 mas_set_err(mas, -ENOMEM);
6301 return;
6302 }
6303
6304 type = mte_node_type(mas->node);
6305 root = mt_mk_node(node, type);
6306 new_mas->node = root;
6307 new_mas->min = 0;
6308 new_mas->max = ULONG_MAX;
6309 root = mte_mk_root(root);
6310 while (1) {
6311 mas_copy_node(mas, new_mas, parent);
6312 if (!mte_is_leaf(mas->node)) {
6313 /* Only allocate child nodes for non-leaf nodes. */
6314 mas_dup_alloc(mas, new_mas, gfp);
6315 if (unlikely(mas_is_err(mas)))
6316 goto empty_mas;
6317 } else {
6318 /*
6319 * This is the last leaf node and duplication is
6320 * completed.
6321 */
6322 if (mas->max == ULONG_MAX)
6323 goto done;
6324
6325 /* This is not the last leaf node and needs to go up. */
6326 do {
6327 mas_ascend(mas);
6328 mas_ascend(new_mas);
6329 } while (mas->offset == mas_data_end(mas));
6330
6331 /* Move to the next subtree. */
6332 mas->offset++;
6333 new_mas->offset++;
6334 }
6335
6336 mas_descend(mas);
6337 parent = ma_parent_ptr(mte_to_node(new_mas->node));
6338 mas_descend(new_mas);
6339 mas->offset = 0;
6340 new_mas->offset = 0;
6341 }
6342 done:
6343 /* Specially handle the parent of the root node. */
6344 mte_to_node(root)->parent = ma_parent_ptr(mas_tree_parent(new_mas));
6345 set_new_tree:
6346 /* Make them the same height */
6347 new_mas->tree->ma_flags = mas->tree->ma_flags;
6348 rcu_assign_pointer(new_mas->tree->ma_root, root);
6349 empty_mas:
6350 mas_empty_nodes(mas);
6351 }
6352
6353 /**
6354 * __mt_dup(): Duplicate an entire maple tree
6355 * @mt: The source maple tree
6356 * @new: The new maple tree
6357 * @gfp: The GFP_FLAGS to use for allocations
6358 *
6359 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order
6360 * traversal. It uses memcpy() to copy nodes in the source tree and allocate
6361 * new child nodes in non-leaf nodes. The new node is exactly the same as the
6362 * source node except for all the addresses stored in it. It will be faster than
6363 * traversing all elements in the source tree and inserting them one by one into
6364 * the new tree.
6365 * The user needs to ensure that the attributes of the source tree and the new
6366 * tree are the same, and the new tree needs to be an empty tree, otherwise
6367 * -EINVAL will be returned.
6368 * Note that the user needs to manually lock the source tree and the new tree.
6369 *
6370 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If
6371 * the attributes of the two trees are different or the new tree is not an empty
6372 * tree.
6373 */
__mt_dup(struct maple_tree * mt,struct maple_tree * new,gfp_t gfp)6374 int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)
6375 {
6376 int ret = 0;
6377 MA_STATE(mas, mt, 0, 0);
6378 MA_STATE(new_mas, new, 0, 0);
6379
6380 mas_dup_build(&mas, &new_mas, gfp);
6381 if (unlikely(mas_is_err(&mas))) {
6382 ret = xa_err(mas.node);
6383 if (ret == -ENOMEM)
6384 mas_dup_free(&new_mas);
6385 }
6386
6387 return ret;
6388 }
6389 EXPORT_SYMBOL(__mt_dup);
6390
6391 /**
6392 * mtree_dup(): Duplicate an entire maple tree
6393 * @mt: The source maple tree
6394 * @new: The new maple tree
6395 * @gfp: The GFP_FLAGS to use for allocations
6396 *
6397 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order
6398 * traversal. It uses memcpy() to copy nodes in the source tree and allocate
6399 * new child nodes in non-leaf nodes. The new node is exactly the same as the
6400 * source node except for all the addresses stored in it. It will be faster than
6401 * traversing all elements in the source tree and inserting them one by one into
6402 * the new tree.
6403 * The user needs to ensure that the attributes of the source tree and the new
6404 * tree are the same, and the new tree needs to be an empty tree, otherwise
6405 * -EINVAL will be returned.
6406 *
6407 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If
6408 * the attributes of the two trees are different or the new tree is not an empty
6409 * tree.
6410 */
mtree_dup(struct maple_tree * mt,struct maple_tree * new,gfp_t gfp)6411 int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)
6412 {
6413 int ret = 0;
6414 MA_STATE(mas, mt, 0, 0);
6415 MA_STATE(new_mas, new, 0, 0);
6416
6417 mas_lock(&new_mas);
6418 mas_lock_nested(&mas, SINGLE_DEPTH_NESTING);
6419 mas_dup_build(&mas, &new_mas, gfp);
6420 mas_unlock(&mas);
6421 if (unlikely(mas_is_err(&mas))) {
6422 ret = xa_err(mas.node);
6423 if (ret == -ENOMEM)
6424 mas_dup_free(&new_mas);
6425 }
6426
6427 mas_unlock(&new_mas);
6428 return ret;
6429 }
6430 EXPORT_SYMBOL(mtree_dup);
6431
6432 /**
6433 * __mt_destroy() - Walk and free all nodes of a locked maple tree.
6434 * @mt: The maple tree
6435 *
6436 * Note: Does not handle locking.
6437 */
__mt_destroy(struct maple_tree * mt)6438 void __mt_destroy(struct maple_tree *mt)
6439 {
6440 void *root = mt_root_locked(mt);
6441
6442 rcu_assign_pointer(mt->ma_root, NULL);
6443 if (xa_is_node(root))
6444 mte_destroy_walk(root, mt);
6445
6446 mt->ma_flags = mt_attr(mt);
6447 }
6448 EXPORT_SYMBOL_GPL(__mt_destroy);
6449
6450 /**
6451 * mtree_destroy() - Destroy a maple tree
6452 * @mt: The maple tree
6453 *
6454 * Frees all resources used by the tree. Handles locking.
6455 */
mtree_destroy(struct maple_tree * mt)6456 void mtree_destroy(struct maple_tree *mt)
6457 {
6458 mtree_lock(mt);
6459 __mt_destroy(mt);
6460 mtree_unlock(mt);
6461 }
6462 EXPORT_SYMBOL(mtree_destroy);
6463
6464 /**
6465 * mt_find() - Search from the start up until an entry is found.
6466 * @mt: The maple tree
6467 * @index: Pointer which contains the start location of the search
6468 * @max: The maximum value of the search range
6469 *
6470 * Takes RCU read lock internally to protect the search, which does not
6471 * protect the returned pointer after dropping RCU read lock.
6472 * See also: Documentation/core-api/maple_tree.rst
6473 *
6474 * In case that an entry is found @index is updated to point to the next
6475 * possible entry independent whether the found entry is occupying a
6476 * single index or a range if indices.
6477 *
6478 * Return: The entry at or after the @index or %NULL
6479 */
mt_find(struct maple_tree * mt,unsigned long * index,unsigned long max)6480 void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max)
6481 {
6482 MA_STATE(mas, mt, *index, *index);
6483 void *entry;
6484 #ifdef CONFIG_DEBUG_MAPLE_TREE
6485 unsigned long copy = *index;
6486 #endif
6487
6488 trace_ma_read(__func__, &mas);
6489
6490 if ((*index) > max)
6491 return NULL;
6492
6493 rcu_read_lock();
6494 retry:
6495 entry = mas_state_walk(&mas);
6496 if (mas_is_start(&mas))
6497 goto retry;
6498
6499 if (unlikely(xa_is_zero(entry)))
6500 entry = NULL;
6501
6502 if (entry)
6503 goto unlock;
6504
6505 while (mas_is_active(&mas) && (mas.last < max)) {
6506 entry = mas_next_slot(&mas, max, false);
6507 if (likely(entry && !xa_is_zero(entry)))
6508 break;
6509 }
6510
6511 if (unlikely(xa_is_zero(entry)))
6512 entry = NULL;
6513 unlock:
6514 rcu_read_unlock();
6515 if (likely(entry)) {
6516 *index = mas.last + 1;
6517 #ifdef CONFIG_DEBUG_MAPLE_TREE
6518 if (MT_WARN_ON(mt, (*index) && ((*index) <= copy)))
6519 pr_err("index not increased! %lx <= %lx\n",
6520 *index, copy);
6521 #endif
6522 }
6523
6524 return entry;
6525 }
6526 EXPORT_SYMBOL(mt_find);
6527
6528 /**
6529 * mt_find_after() - Search from the start up until an entry is found.
6530 * @mt: The maple tree
6531 * @index: Pointer which contains the start location of the search
6532 * @max: The maximum value to check
6533 *
6534 * Same as mt_find() except that it checks @index for 0 before
6535 * searching. If @index == 0, the search is aborted. This covers a wrap
6536 * around of @index to 0 in an iterator loop.
6537 *
6538 * Return: The entry at or after the @index or %NULL
6539 */
mt_find_after(struct maple_tree * mt,unsigned long * index,unsigned long max)6540 void *mt_find_after(struct maple_tree *mt, unsigned long *index,
6541 unsigned long max)
6542 {
6543 if (!(*index))
6544 return NULL;
6545
6546 return mt_find(mt, index, max);
6547 }
6548 EXPORT_SYMBOL(mt_find_after);
6549
6550 #ifdef CONFIG_DEBUG_MAPLE_TREE
6551 atomic_t maple_tree_tests_run;
6552 EXPORT_SYMBOL_GPL(maple_tree_tests_run);
6553 atomic_t maple_tree_tests_passed;
6554 EXPORT_SYMBOL_GPL(maple_tree_tests_passed);
6555
6556 #ifndef __KERNEL__
6557 extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int);
mt_set_non_kernel(unsigned int val)6558 void mt_set_non_kernel(unsigned int val)
6559 {
6560 kmem_cache_set_non_kernel(maple_node_cache, val);
6561 }
6562
6563 extern void kmem_cache_set_callback(struct kmem_cache *cachep,
6564 void (*callback)(void *));
mt_set_callback(void (* callback)(void *))6565 void mt_set_callback(void (*callback)(void *))
6566 {
6567 kmem_cache_set_callback(maple_node_cache, callback);
6568 }
6569
6570 extern void kmem_cache_set_private(struct kmem_cache *cachep, void *private);
mt_set_private(void * private)6571 void mt_set_private(void *private)
6572 {
6573 kmem_cache_set_private(maple_node_cache, private);
6574 }
6575
6576 extern unsigned long kmem_cache_get_alloc(struct kmem_cache *);
mt_get_alloc_size(void)6577 unsigned long mt_get_alloc_size(void)
6578 {
6579 return kmem_cache_get_alloc(maple_node_cache);
6580 }
6581
6582 extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *);
mt_zero_nr_tallocated(void)6583 void mt_zero_nr_tallocated(void)
6584 {
6585 kmem_cache_zero_nr_tallocated(maple_node_cache);
6586 }
6587
6588 extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *);
mt_nr_tallocated(void)6589 unsigned int mt_nr_tallocated(void)
6590 {
6591 return kmem_cache_nr_tallocated(maple_node_cache);
6592 }
6593
6594 extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *);
mt_nr_allocated(void)6595 unsigned int mt_nr_allocated(void)
6596 {
6597 return kmem_cache_nr_allocated(maple_node_cache);
6598 }
6599
mt_cache_shrink(void)6600 void mt_cache_shrink(void)
6601 {
6602 }
6603 #else
6604 /*
6605 * mt_cache_shrink() - For testing, don't use this.
6606 *
6607 * Certain testcases can trigger an OOM when combined with other memory
6608 * debugging configuration options. This function is used to reduce the
6609 * possibility of an out of memory even due to kmem_cache objects remaining
6610 * around for longer than usual.
6611 */
mt_cache_shrink(void)6612 void mt_cache_shrink(void)
6613 {
6614 kmem_cache_shrink(maple_node_cache);
6615
6616 }
6617 EXPORT_SYMBOL_GPL(mt_cache_shrink);
6618
6619 #endif /* not defined __KERNEL__ */
6620 /*
6621 * mas_get_slot() - Get the entry in the maple state node stored at @offset.
6622 * @mas: The maple state
6623 * @offset: The offset into the slot array to fetch.
6624 *
6625 * Return: The entry stored at @offset.
6626 */
mas_get_slot(struct ma_state * mas,unsigned char offset)6627 static inline struct maple_enode *mas_get_slot(struct ma_state *mas,
6628 unsigned char offset)
6629 {
6630 return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)),
6631 offset);
6632 }
6633
6634 /* Depth first search, post-order */
mas_dfs_postorder(struct ma_state * mas,unsigned long max)6635 static void mas_dfs_postorder(struct ma_state *mas, unsigned long max)
6636 {
6637
6638 struct maple_enode *p, *mn = mas->node;
6639 unsigned long p_min, p_max;
6640
6641 mas_next_node(mas, mas_mn(mas), max);
6642 if (!mas_is_overflow(mas))
6643 return;
6644
6645 if (mte_is_root(mn))
6646 return;
6647
6648 mas->node = mn;
6649 mas_ascend(mas);
6650 do {
6651 p = mas->node;
6652 p_min = mas->min;
6653 p_max = mas->max;
6654 mas_prev_node(mas, 0);
6655 } while (!mas_is_underflow(mas));
6656
6657 mas->node = p;
6658 mas->max = p_max;
6659 mas->min = p_min;
6660 }
6661
6662 /* Tree validations */
6663 static void mt_dump_node(const struct maple_tree *mt, void *entry,
6664 unsigned long min, unsigned long max, unsigned int depth,
6665 enum mt_dump_format format);
mt_dump_range(unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)6666 static void mt_dump_range(unsigned long min, unsigned long max,
6667 unsigned int depth, enum mt_dump_format format)
6668 {
6669 static const char spaces[] = " ";
6670
6671 switch(format) {
6672 case mt_dump_hex:
6673 if (min == max)
6674 pr_info("%.*s%lx: ", depth * 2, spaces, min);
6675 else
6676 pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max);
6677 break;
6678 case mt_dump_dec:
6679 if (min == max)
6680 pr_info("%.*s%lu: ", depth * 2, spaces, min);
6681 else
6682 pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max);
6683 }
6684 }
6685
mt_dump_entry(void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)6686 static void mt_dump_entry(void *entry, unsigned long min, unsigned long max,
6687 unsigned int depth, enum mt_dump_format format)
6688 {
6689 mt_dump_range(min, max, depth, format);
6690
6691 if (xa_is_value(entry))
6692 pr_cont("value %ld (0x%lx) [" PTR_FMT "]\n", xa_to_value(entry),
6693 xa_to_value(entry), entry);
6694 else if (xa_is_zero(entry))
6695 pr_cont("zero (%ld)\n", xa_to_internal(entry));
6696 else if (mt_is_reserved(entry))
6697 pr_cont("UNKNOWN ENTRY (" PTR_FMT ")\n", entry);
6698 else
6699 pr_cont(PTR_FMT "\n", entry);
6700 }
6701
mt_dump_range64(const struct maple_tree * mt,void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)6702 static void mt_dump_range64(const struct maple_tree *mt, void *entry,
6703 unsigned long min, unsigned long max, unsigned int depth,
6704 enum mt_dump_format format)
6705 {
6706 struct maple_range_64 *node = &mte_to_node(entry)->mr64;
6707 bool leaf = mte_is_leaf(entry);
6708 unsigned long first = min;
6709 int i;
6710
6711 pr_cont(" contents: ");
6712 for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) {
6713 switch(format) {
6714 case mt_dump_hex:
6715 pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]);
6716 break;
6717 case mt_dump_dec:
6718 pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]);
6719 }
6720 }
6721 pr_cont(PTR_FMT "\n", node->slot[i]);
6722 for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) {
6723 unsigned long last = max;
6724
6725 if (i < (MAPLE_RANGE64_SLOTS - 1))
6726 last = node->pivot[i];
6727 else if (!node->slot[i] && max != mt_node_max(entry))
6728 break;
6729 if (last == 0 && i > 0)
6730 break;
6731 if (leaf)
6732 mt_dump_entry(mt_slot(mt, node->slot, i),
6733 first, last, depth + 1, format);
6734 else if (node->slot[i])
6735 mt_dump_node(mt, mt_slot(mt, node->slot, i),
6736 first, last, depth + 1, format);
6737
6738 if (last == max)
6739 break;
6740 if (last > max) {
6741 switch(format) {
6742 case mt_dump_hex:
6743 pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n",
6744 node, last, max, i);
6745 break;
6746 case mt_dump_dec:
6747 pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n",
6748 node, last, max, i);
6749 }
6750 }
6751 first = last + 1;
6752 }
6753 }
6754
mt_dump_arange64(const struct maple_tree * mt,void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)6755 static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
6756 unsigned long min, unsigned long max, unsigned int depth,
6757 enum mt_dump_format format)
6758 {
6759 struct maple_arange_64 *node = &mte_to_node(entry)->ma64;
6760 unsigned long first = min;
6761 int i;
6762
6763 pr_cont(" contents: ");
6764 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) {
6765 switch (format) {
6766 case mt_dump_hex:
6767 pr_cont("%lx ", node->gap[i]);
6768 break;
6769 case mt_dump_dec:
6770 pr_cont("%lu ", node->gap[i]);
6771 }
6772 }
6773 pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap);
6774 for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) {
6775 switch (format) {
6776 case mt_dump_hex:
6777 pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]);
6778 break;
6779 case mt_dump_dec:
6780 pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]);
6781 }
6782 }
6783 pr_cont(PTR_FMT "\n", node->slot[i]);
6784 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) {
6785 unsigned long last = max;
6786
6787 if (i < (MAPLE_ARANGE64_SLOTS - 1))
6788 last = node->pivot[i];
6789 else if (!node->slot[i])
6790 break;
6791 if (last == 0 && i > 0)
6792 break;
6793 if (node->slot[i])
6794 mt_dump_node(mt, mt_slot(mt, node->slot, i),
6795 first, last, depth + 1, format);
6796
6797 if (last == max)
6798 break;
6799 if (last > max) {
6800 switch(format) {
6801 case mt_dump_hex:
6802 pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n",
6803 node, last, max, i);
6804 break;
6805 case mt_dump_dec:
6806 pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n",
6807 node, last, max, i);
6808 }
6809 }
6810 first = last + 1;
6811 }
6812 }
6813
mt_dump_node(const struct maple_tree * mt,void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)6814 static void mt_dump_node(const struct maple_tree *mt, void *entry,
6815 unsigned long min, unsigned long max, unsigned int depth,
6816 enum mt_dump_format format)
6817 {
6818 struct maple_node *node = mte_to_node(entry);
6819 unsigned int type = mte_node_type(entry);
6820 unsigned int i;
6821
6822 mt_dump_range(min, max, depth, format);
6823
6824 pr_cont("node " PTR_FMT " depth %d type %d parent " PTR_FMT, node,
6825 depth, type, node ? node->parent : NULL);
6826 switch (type) {
6827 case maple_dense:
6828 pr_cont("\n");
6829 for (i = 0; i < MAPLE_NODE_SLOTS; i++) {
6830 if (min + i > max)
6831 pr_cont("OUT OF RANGE: ");
6832 mt_dump_entry(mt_slot(mt, node->slot, i),
6833 min + i, min + i, depth, format);
6834 }
6835 break;
6836 case maple_leaf_64:
6837 case maple_range_64:
6838 mt_dump_range64(mt, entry, min, max, depth, format);
6839 break;
6840 case maple_arange_64:
6841 mt_dump_arange64(mt, entry, min, max, depth, format);
6842 break;
6843
6844 default:
6845 pr_cont(" UNKNOWN TYPE\n");
6846 }
6847 }
6848
mt_dump(const struct maple_tree * mt,enum mt_dump_format format)6849 void mt_dump(const struct maple_tree *mt, enum mt_dump_format format)
6850 {
6851 void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt));
6852
6853 pr_info("maple_tree(" PTR_FMT ") flags %X, height %u root " PTR_FMT "\n",
6854 mt, mt->ma_flags, mt_height(mt), entry);
6855 if (xa_is_node(entry))
6856 mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format);
6857 else if (entry)
6858 mt_dump_entry(entry, 0, 0, 0, format);
6859 else
6860 pr_info("(empty)\n");
6861 }
6862 EXPORT_SYMBOL_GPL(mt_dump);
6863
6864 /*
6865 * Calculate the maximum gap in a node and check if that's what is reported in
6866 * the parent (unless root).
6867 */
mas_validate_gaps(struct ma_state * mas)6868 static void mas_validate_gaps(struct ma_state *mas)
6869 {
6870 struct maple_enode *mte = mas->node;
6871 struct maple_node *p_mn, *node = mte_to_node(mte);
6872 enum maple_type mt = mte_node_type(mas->node);
6873 unsigned long gap = 0, max_gap = 0;
6874 unsigned long p_end, p_start = mas->min;
6875 unsigned char p_slot, offset;
6876 unsigned long *gaps = NULL;
6877 unsigned long *pivots = ma_pivots(node, mt);
6878 unsigned int i;
6879
6880 if (ma_is_dense(mt)) {
6881 for (i = 0; i < mt_slot_count(mte); i++) {
6882 if (mas_get_slot(mas, i)) {
6883 if (gap > max_gap)
6884 max_gap = gap;
6885 gap = 0;
6886 continue;
6887 }
6888 gap++;
6889 }
6890 goto counted;
6891 }
6892
6893 gaps = ma_gaps(node, mt);
6894 for (i = 0; i < mt_slot_count(mte); i++) {
6895 p_end = mas_safe_pivot(mas, pivots, i, mt);
6896
6897 if (!gaps) {
6898 if (!mas_get_slot(mas, i))
6899 gap = p_end - p_start + 1;
6900 } else {
6901 void *entry = mas_get_slot(mas, i);
6902
6903 gap = gaps[i];
6904 MT_BUG_ON(mas->tree, !entry);
6905
6906 if (gap > p_end - p_start + 1) {
6907 pr_err(PTR_FMT "[%u] %lu >= %lu - %lu + 1 (%lu)\n",
6908 mas_mn(mas), i, gap, p_end, p_start,
6909 p_end - p_start + 1);
6910 MT_BUG_ON(mas->tree, gap > p_end - p_start + 1);
6911 }
6912 }
6913
6914 if (gap > max_gap)
6915 max_gap = gap;
6916
6917 p_start = p_end + 1;
6918 if (p_end >= mas->max)
6919 break;
6920 }
6921
6922 counted:
6923 if (mt == maple_arange_64) {
6924 MT_BUG_ON(mas->tree, !gaps);
6925 offset = ma_meta_gap(node);
6926 if (offset > i) {
6927 pr_err("gap offset " PTR_FMT "[%u] is invalid\n", node, offset);
6928 MT_BUG_ON(mas->tree, 1);
6929 }
6930
6931 if (gaps[offset] != max_gap) {
6932 pr_err("gap " PTR_FMT "[%u] is not the largest gap %lu\n",
6933 node, offset, max_gap);
6934 MT_BUG_ON(mas->tree, 1);
6935 }
6936
6937 for (i++ ; i < mt_slot_count(mte); i++) {
6938 if (gaps[i] != 0) {
6939 pr_err("gap " PTR_FMT "[%u] beyond node limit != 0\n",
6940 node, i);
6941 MT_BUG_ON(mas->tree, 1);
6942 }
6943 }
6944 }
6945
6946 if (mte_is_root(mte))
6947 return;
6948
6949 p_slot = mte_parent_slot(mas->node);
6950 p_mn = mte_parent(mte);
6951 MT_BUG_ON(mas->tree, max_gap > mas->max);
6952 if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) {
6953 pr_err("gap " PTR_FMT "[%u] != %lu\n", p_mn, p_slot, max_gap);
6954 mt_dump(mas->tree, mt_dump_hex);
6955 MT_BUG_ON(mas->tree, 1);
6956 }
6957 }
6958
mas_validate_parent_slot(struct ma_state * mas)6959 static void mas_validate_parent_slot(struct ma_state *mas)
6960 {
6961 struct maple_node *parent;
6962 struct maple_enode *node;
6963 enum maple_type p_type;
6964 unsigned char p_slot;
6965 void __rcu **slots;
6966 int i;
6967
6968 if (mte_is_root(mas->node))
6969 return;
6970
6971 p_slot = mte_parent_slot(mas->node);
6972 p_type = mas_parent_type(mas, mas->node);
6973 parent = mte_parent(mas->node);
6974 slots = ma_slots(parent, p_type);
6975 MT_BUG_ON(mas->tree, mas_mn(mas) == parent);
6976
6977 /* Check prev/next parent slot for duplicate node entry */
6978
6979 for (i = 0; i < mt_slots[p_type]; i++) {
6980 node = mas_slot(mas, slots, i);
6981 if (i == p_slot) {
6982 if (node != mas->node)
6983 pr_err("parent " PTR_FMT "[%u] does not have " PTR_FMT "\n",
6984 parent, i, mas_mn(mas));
6985 MT_BUG_ON(mas->tree, node != mas->node);
6986 } else if (node == mas->node) {
6987 pr_err("Invalid child " PTR_FMT " at parent " PTR_FMT "[%u] p_slot %u\n",
6988 mas_mn(mas), parent, i, p_slot);
6989 MT_BUG_ON(mas->tree, node == mas->node);
6990 }
6991 }
6992 }
6993
mas_validate_child_slot(struct ma_state * mas)6994 static void mas_validate_child_slot(struct ma_state *mas)
6995 {
6996 enum maple_type type = mte_node_type(mas->node);
6997 void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
6998 unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type);
6999 struct maple_enode *child;
7000 unsigned char i;
7001
7002 if (mte_is_leaf(mas->node))
7003 return;
7004
7005 for (i = 0; i < mt_slots[type]; i++) {
7006 child = mas_slot(mas, slots, i);
7007
7008 if (!child) {
7009 pr_err("Non-leaf node lacks child at " PTR_FMT "[%u]\n",
7010 mas_mn(mas), i);
7011 MT_BUG_ON(mas->tree, 1);
7012 }
7013
7014 if (mte_parent_slot(child) != i) {
7015 pr_err("Slot error at " PTR_FMT "[%u]: child " PTR_FMT " has pslot %u\n",
7016 mas_mn(mas), i, mte_to_node(child),
7017 mte_parent_slot(child));
7018 MT_BUG_ON(mas->tree, 1);
7019 }
7020
7021 if (mte_parent(child) != mte_to_node(mas->node)) {
7022 pr_err("child " PTR_FMT " has parent " PTR_FMT " not " PTR_FMT "\n",
7023 mte_to_node(child), mte_parent(child),
7024 mte_to_node(mas->node));
7025 MT_BUG_ON(mas->tree, 1);
7026 }
7027
7028 if (i < mt_pivots[type] && pivots[i] == mas->max)
7029 break;
7030 }
7031 }
7032
7033 /*
7034 * Validate all pivots are within mas->min and mas->max, check metadata ends
7035 * where the maximum ends and ensure there is no slots or pivots set outside of
7036 * the end of the data.
7037 */
mas_validate_limits(struct ma_state * mas)7038 static void mas_validate_limits(struct ma_state *mas)
7039 {
7040 int i;
7041 unsigned long prev_piv = 0;
7042 enum maple_type type = mte_node_type(mas->node);
7043 void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
7044 unsigned long *pivots = ma_pivots(mas_mn(mas), type);
7045
7046 for (i = 0; i < mt_slots[type]; i++) {
7047 unsigned long piv;
7048
7049 piv = mas_safe_pivot(mas, pivots, i, type);
7050
7051 if (!piv && (i != 0)) {
7052 pr_err("Missing node limit pivot at " PTR_FMT "[%u]",
7053 mas_mn(mas), i);
7054 MAS_WARN_ON(mas, 1);
7055 }
7056
7057 if (prev_piv > piv) {
7058 pr_err(PTR_FMT "[%u] piv %lu < prev_piv %lu\n",
7059 mas_mn(mas), i, piv, prev_piv);
7060 MAS_WARN_ON(mas, piv < prev_piv);
7061 }
7062
7063 if (piv < mas->min) {
7064 pr_err(PTR_FMT "[%u] %lu < %lu\n", mas_mn(mas), i,
7065 piv, mas->min);
7066 MAS_WARN_ON(mas, piv < mas->min);
7067 }
7068 if (piv > mas->max) {
7069 pr_err(PTR_FMT "[%u] %lu > %lu\n", mas_mn(mas), i,
7070 piv, mas->max);
7071 MAS_WARN_ON(mas, piv > mas->max);
7072 }
7073 prev_piv = piv;
7074 if (piv == mas->max)
7075 break;
7076 }
7077
7078 if (mas_data_end(mas) != i) {
7079 pr_err("node" PTR_FMT ": data_end %u != the last slot offset %u\n",
7080 mas_mn(mas), mas_data_end(mas), i);
7081 MT_BUG_ON(mas->tree, 1);
7082 }
7083
7084 for (i += 1; i < mt_slots[type]; i++) {
7085 void *entry = mas_slot(mas, slots, i);
7086
7087 if (entry && (i != mt_slots[type] - 1)) {
7088 pr_err(PTR_FMT "[%u] should not have entry " PTR_FMT "\n",
7089 mas_mn(mas), i, entry);
7090 MT_BUG_ON(mas->tree, entry != NULL);
7091 }
7092
7093 if (i < mt_pivots[type]) {
7094 unsigned long piv = pivots[i];
7095
7096 if (!piv)
7097 continue;
7098
7099 pr_err(PTR_FMT "[%u] should not have piv %lu\n",
7100 mas_mn(mas), i, piv);
7101 MAS_WARN_ON(mas, i < mt_pivots[type] - 1);
7102 }
7103 }
7104 }
7105
mt_validate_nulls(struct maple_tree * mt)7106 static void mt_validate_nulls(struct maple_tree *mt)
7107 {
7108 void *entry, *last = (void *)1;
7109 unsigned char offset = 0;
7110 void __rcu **slots;
7111 MA_STATE(mas, mt, 0, 0);
7112
7113 mas_start(&mas);
7114 if (mas_is_none(&mas) || (mas_is_ptr(&mas)))
7115 return;
7116
7117 while (!mte_is_leaf(mas.node))
7118 mas_descend(&mas);
7119
7120 slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node));
7121 do {
7122 entry = mas_slot(&mas, slots, offset);
7123 if (!last && !entry) {
7124 pr_err("Sequential nulls end at " PTR_FMT "[%u]\n",
7125 mas_mn(&mas), offset);
7126 }
7127 MT_BUG_ON(mt, !last && !entry);
7128 last = entry;
7129 if (offset == mas_data_end(&mas)) {
7130 mas_next_node(&mas, mas_mn(&mas), ULONG_MAX);
7131 if (mas_is_overflow(&mas))
7132 return;
7133 offset = 0;
7134 slots = ma_slots(mte_to_node(mas.node),
7135 mte_node_type(mas.node));
7136 } else {
7137 offset++;
7138 }
7139
7140 } while (!mas_is_overflow(&mas));
7141 }
7142
7143 /*
7144 * validate a maple tree by checking:
7145 * 1. The limits (pivots are within mas->min to mas->max)
7146 * 2. The gap is correctly set in the parents
7147 */
mt_validate(struct maple_tree * mt)7148 void mt_validate(struct maple_tree *mt)
7149 __must_hold(mas->tree->ma_lock)
7150 {
7151 unsigned char end;
7152
7153 MA_STATE(mas, mt, 0, 0);
7154 mas_start(&mas);
7155 if (!mas_is_active(&mas))
7156 return;
7157
7158 while (!mte_is_leaf(mas.node))
7159 mas_descend(&mas);
7160
7161 while (!mas_is_overflow(&mas)) {
7162 MAS_WARN_ON(&mas, mte_dead_node(mas.node));
7163 end = mas_data_end(&mas);
7164 if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) &&
7165 (!mte_is_root(mas.node)))) {
7166 pr_err("Invalid size %u of " PTR_FMT "\n",
7167 end, mas_mn(&mas));
7168 }
7169
7170 mas_validate_parent_slot(&mas);
7171 mas_validate_limits(&mas);
7172 mas_validate_child_slot(&mas);
7173 if (mt_is_alloc(mt))
7174 mas_validate_gaps(&mas);
7175 mas_dfs_postorder(&mas, ULONG_MAX);
7176 }
7177 mt_validate_nulls(mt);
7178 }
7179 EXPORT_SYMBOL_GPL(mt_validate);
7180
mas_dump(const struct ma_state * mas)7181 void mas_dump(const struct ma_state *mas)
7182 {
7183 pr_err("MAS: tree=" PTR_FMT " enode=" PTR_FMT " ",
7184 mas->tree, mas->node);
7185 switch (mas->status) {
7186 case ma_active:
7187 pr_err("(ma_active)");
7188 break;
7189 case ma_none:
7190 pr_err("(ma_none)");
7191 break;
7192 case ma_root:
7193 pr_err("(ma_root)");
7194 break;
7195 case ma_start:
7196 pr_err("(ma_start) ");
7197 break;
7198 case ma_pause:
7199 pr_err("(ma_pause) ");
7200 break;
7201 case ma_overflow:
7202 pr_err("(ma_overflow) ");
7203 break;
7204 case ma_underflow:
7205 pr_err("(ma_underflow) ");
7206 break;
7207 case ma_error:
7208 pr_err("(ma_error) ");
7209 break;
7210 }
7211
7212 pr_err("Store Type: ");
7213 switch (mas->store_type) {
7214 case wr_invalid:
7215 pr_err("invalid store type\n");
7216 break;
7217 case wr_new_root:
7218 pr_err("new_root\n");
7219 break;
7220 case wr_store_root:
7221 pr_err("store_root\n");
7222 break;
7223 case wr_exact_fit:
7224 pr_err("exact_fit\n");
7225 break;
7226 case wr_split_store:
7227 pr_err("split_store\n");
7228 break;
7229 case wr_slot_store:
7230 pr_err("slot_store\n");
7231 break;
7232 case wr_append:
7233 pr_err("append\n");
7234 break;
7235 case wr_node_store:
7236 pr_err("node_store\n");
7237 break;
7238 case wr_spanning_store:
7239 pr_err("spanning_store\n");
7240 break;
7241 case wr_rebalance:
7242 pr_err("rebalance\n");
7243 break;
7244 }
7245
7246 pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end,
7247 mas->index, mas->last);
7248 pr_err(" min=%lx max=%lx sheaf=" PTR_FMT ", request %lu depth=%u, flags=%x\n",
7249 mas->min, mas->max, mas->sheaf, mas->node_request, mas->depth,
7250 mas->mas_flags);
7251 if (mas->index > mas->last)
7252 pr_err("Check index & last\n");
7253 }
7254 EXPORT_SYMBOL_GPL(mas_dump);
7255
mas_wr_dump(const struct ma_wr_state * wr_mas)7256 void mas_wr_dump(const struct ma_wr_state *wr_mas)
7257 {
7258 pr_err("WR_MAS: node=" PTR_FMT " r_min=%lx r_max=%lx\n",
7259 wr_mas->node, wr_mas->r_min, wr_mas->r_max);
7260 pr_err(" type=%u off_end=%u, node_end=%u, end_piv=%lx\n",
7261 wr_mas->type, wr_mas->offset_end, wr_mas->mas->end,
7262 wr_mas->end_piv);
7263 }
7264 EXPORT_SYMBOL_GPL(mas_wr_dump);
7265
7266 #endif /* CONFIG_DEBUG_MAPLE_TREE */
7267