1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Maple Tree implementation 4 * Copyright (c) 2018-2022 Oracle Corporation 5 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com> 6 * Matthew Wilcox <willy@infradead.org> 7 * Copyright (c) 2023 ByteDance 8 * Author: Peng Zhang <zhangpeng.00@bytedance.com> 9 */ 10 11 /* 12 * DOC: Interesting implementation details of the Maple Tree 13 * 14 * Each node type has a number of slots for entries and a number of slots for 15 * pivots. In the case of dense nodes, the pivots are implied by the position 16 * and are simply the slot index + the minimum of the node. 17 * 18 * In regular B-Tree terms, pivots are called keys. The term pivot is used to 19 * indicate that the tree is specifying ranges. Pivots may appear in the 20 * subtree with an entry attached to the value whereas keys are unique to a 21 * specific position of a B-tree. Pivot values are inclusive of the slot with 22 * the same index. 23 * 24 * 25 * The following illustrates the layout of a range64 nodes slots and pivots. 26 * 27 * 28 * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 | 29 * ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ 30 * │ │ │ │ │ │ │ │ └─ Implied maximum 31 * │ │ │ │ │ │ │ └─ Pivot 14 32 * │ │ │ │ │ │ └─ Pivot 13 33 * │ │ │ │ │ └─ Pivot 12 34 * │ │ │ │ └─ Pivot 11 35 * │ │ │ └─ Pivot 2 36 * │ │ └─ Pivot 1 37 * │ └─ Pivot 0 38 * └─ Implied minimum 39 * 40 * Slot contents: 41 * Internal (non-leaf) nodes contain pointers to other nodes. 42 * Leaf nodes contain entries. 43 * 44 * The location of interest is often referred to as an offset. All offsets have 45 * a slot, but the last offset has an implied pivot from the node above (or 46 * UINT_MAX for the root node. 47 * 48 * Ranges complicate certain write activities. When modifying any of 49 * the B-tree variants, it is known that one entry will either be added or 50 * deleted. When modifying the Maple Tree, one store operation may overwrite 51 * the entire data set, or one half of the tree, or the middle half of the tree. 52 * 53 */ 54 55 56 #include <linux/maple_tree.h> 57 #include <linux/xarray.h> 58 #include <linux/types.h> 59 #include <linux/export.h> 60 #include <linux/slab.h> 61 #include <linux/limits.h> 62 #include <asm/barrier.h> 63 64 #define CREATE_TRACE_POINTS 65 #include <trace/events/maple_tree.h> 66 67 /* 68 * Kernel pointer hashing renders much of the maple tree dump useless as tagged 69 * pointers get hashed to arbitrary values. 70 * 71 * If CONFIG_DEBUG_VM_MAPLE_TREE is set we are in a debug mode where it is 72 * permissible to bypass this. Otherwise remain cautious and retain the hashing. 73 * 74 * Userland doesn't know about %px so also use %p there. 75 */ 76 #if defined(__KERNEL__) && defined(CONFIG_DEBUG_VM_MAPLE_TREE) 77 #define PTR_FMT "%px" 78 #else 79 #define PTR_FMT "%p" 80 #endif 81 82 #define MA_ROOT_PARENT 1 83 84 /* 85 * Maple state flags 86 * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation 87 */ 88 #define MA_STATE_PREALLOC 1 89 90 #define ma_parent_ptr(x) ((struct maple_pnode *)(x)) 91 #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT) 92 #define ma_mnode_ptr(x) ((struct maple_node *)(x)) 93 #define ma_enode_ptr(x) ((struct maple_enode *)(x)) 94 static struct kmem_cache *maple_node_cache; 95 96 #ifdef CONFIG_DEBUG_MAPLE_TREE 97 static const unsigned long mt_max[] = { 98 [maple_dense] = MAPLE_NODE_SLOTS, 99 [maple_leaf_64] = ULONG_MAX, 100 [maple_range_64] = ULONG_MAX, 101 [maple_arange_64] = ULONG_MAX, 102 }; 103 #define mt_node_max(x) mt_max[mte_node_type(x)] 104 #endif 105 106 static const unsigned char mt_slots[] = { 107 [maple_dense] = MAPLE_NODE_SLOTS, 108 [maple_leaf_64] = MAPLE_RANGE64_SLOTS, 109 [maple_range_64] = MAPLE_RANGE64_SLOTS, 110 [maple_arange_64] = MAPLE_ARANGE64_SLOTS, 111 }; 112 #define mt_slot_count(x) mt_slots[mte_node_type(x)] 113 114 static const unsigned char mt_pivots[] = { 115 [maple_dense] = 0, 116 [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1, 117 [maple_range_64] = MAPLE_RANGE64_SLOTS - 1, 118 [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1, 119 }; 120 #define mt_pivot_count(x) mt_pivots[mte_node_type(x)] 121 122 static const unsigned char mt_min_slots[] = { 123 [maple_dense] = MAPLE_NODE_SLOTS / 2, 124 [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 125 [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 126 [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1, 127 }; 128 #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)] 129 130 #define MAPLE_BIG_NODE_SLOTS (MAPLE_RANGE64_SLOTS * 2 + 2) 131 #define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1) 132 133 struct maple_big_node { 134 unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1]; 135 union { 136 struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS]; 137 struct { 138 unsigned long padding[MAPLE_BIG_NODE_GAPS]; 139 unsigned long gap[MAPLE_BIG_NODE_GAPS]; 140 }; 141 }; 142 unsigned char b_end; 143 enum maple_type type; 144 }; 145 146 /* 147 * The maple_subtree_state is used to build a tree to replace a segment of an 148 * existing tree in a more atomic way. Any walkers of the older tree will hit a 149 * dead node and restart on updates. 150 */ 151 struct maple_subtree_state { 152 struct ma_state *orig_l; /* Original left side of subtree */ 153 struct ma_state *orig_r; /* Original right side of subtree */ 154 struct ma_state *l; /* New left side of subtree */ 155 struct ma_state *m; /* New middle of subtree (rare) */ 156 struct ma_state *r; /* New right side of subtree */ 157 struct ma_topiary *free; /* nodes to be freed */ 158 struct ma_topiary *destroy; /* Nodes to be destroyed (walked and freed) */ 159 struct maple_big_node *bn; 160 }; 161 162 #ifdef CONFIG_KASAN_STACK 163 /* Prevent mas_wr_bnode() from exceeding the stack frame limit */ 164 #define noinline_for_kasan noinline_for_stack 165 #else 166 #define noinline_for_kasan inline 167 #endif 168 169 /* Functions */ 170 static inline struct maple_node *mt_alloc_one(gfp_t gfp) 171 { 172 return kmem_cache_alloc(maple_node_cache, gfp); 173 } 174 175 static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes) 176 { 177 return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes); 178 } 179 180 static inline void mt_free_bulk(size_t size, void __rcu **nodes) 181 { 182 kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); 183 } 184 185 static void mt_return_sheaf(struct slab_sheaf *sheaf) 186 { 187 kmem_cache_return_sheaf(maple_node_cache, GFP_NOWAIT, sheaf); 188 } 189 190 static struct slab_sheaf *mt_get_sheaf(gfp_t gfp, int count) 191 { 192 return kmem_cache_prefill_sheaf(maple_node_cache, gfp, count); 193 } 194 195 static int mt_refill_sheaf(gfp_t gfp, struct slab_sheaf **sheaf, 196 unsigned int size) 197 { 198 return kmem_cache_refill_sheaf(maple_node_cache, gfp, sheaf, size); 199 } 200 201 /* 202 * ma_free_rcu() - Use rcu callback to free a maple node 203 * @node: The node to free 204 * 205 * The maple tree uses the parent pointer to indicate this node is no longer in 206 * use and will be freed. 207 */ 208 static void ma_free_rcu(struct maple_node *node) 209 { 210 WARN_ON(node->parent != ma_parent_ptr(node)); 211 kfree_rcu(node, rcu); 212 } 213 214 static void mt_set_height(struct maple_tree *mt, unsigned char height) 215 { 216 unsigned int new_flags = mt->ma_flags; 217 218 new_flags &= ~MT_FLAGS_HEIGHT_MASK; 219 MT_BUG_ON(mt, height > MAPLE_HEIGHT_MAX); 220 new_flags |= height << MT_FLAGS_HEIGHT_OFFSET; 221 mt->ma_flags = new_flags; 222 } 223 224 static unsigned int mas_mt_height(struct ma_state *mas) 225 { 226 return mt_height(mas->tree); 227 } 228 229 static inline unsigned int mt_attr(struct maple_tree *mt) 230 { 231 return mt->ma_flags & ~MT_FLAGS_HEIGHT_MASK; 232 } 233 234 static __always_inline enum maple_type mte_node_type( 235 const struct maple_enode *entry) 236 { 237 return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) & 238 MAPLE_NODE_TYPE_MASK; 239 } 240 241 static __always_inline bool ma_is_dense(const enum maple_type type) 242 { 243 return type < maple_leaf_64; 244 } 245 246 static __always_inline bool ma_is_leaf(const enum maple_type type) 247 { 248 return type < maple_range_64; 249 } 250 251 static __always_inline bool mte_is_leaf(const struct maple_enode *entry) 252 { 253 return ma_is_leaf(mte_node_type(entry)); 254 } 255 256 /* 257 * We also reserve values with the bottom two bits set to '10' which are 258 * below 4096 259 */ 260 static __always_inline bool mt_is_reserved(const void *entry) 261 { 262 return ((unsigned long)entry < MAPLE_RESERVED_RANGE) && 263 xa_is_internal(entry); 264 } 265 266 static __always_inline void mas_set_err(struct ma_state *mas, long err) 267 { 268 mas->node = MA_ERROR(err); 269 mas->status = ma_error; 270 } 271 272 static __always_inline bool mas_is_ptr(const struct ma_state *mas) 273 { 274 return mas->status == ma_root; 275 } 276 277 static __always_inline bool mas_is_start(const struct ma_state *mas) 278 { 279 return mas->status == ma_start; 280 } 281 282 static __always_inline bool mas_is_none(const struct ma_state *mas) 283 { 284 return mas->status == ma_none; 285 } 286 287 static __always_inline bool mas_is_paused(const struct ma_state *mas) 288 { 289 return mas->status == ma_pause; 290 } 291 292 static __always_inline bool mas_is_overflow(struct ma_state *mas) 293 { 294 return mas->status == ma_overflow; 295 } 296 297 static inline bool mas_is_underflow(struct ma_state *mas) 298 { 299 return mas->status == ma_underflow; 300 } 301 302 static __always_inline struct maple_node *mte_to_node( 303 const struct maple_enode *entry) 304 { 305 return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK); 306 } 307 308 /* 309 * mte_to_mat() - Convert a maple encoded node to a maple topiary node. 310 * @entry: The maple encoded node 311 * 312 * Return: a maple topiary pointer 313 */ 314 static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry) 315 { 316 return (struct maple_topiary *) 317 ((unsigned long)entry & ~MAPLE_NODE_MASK); 318 } 319 320 /* 321 * mas_mn() - Get the maple state node. 322 * @mas: The maple state 323 * 324 * Return: the maple node (not encoded - bare pointer). 325 */ 326 static inline struct maple_node *mas_mn(const struct ma_state *mas) 327 { 328 return mte_to_node(mas->node); 329 } 330 331 /* 332 * mte_set_node_dead() - Set a maple encoded node as dead. 333 * @mn: The maple encoded node. 334 */ 335 static inline void mte_set_node_dead(struct maple_enode *mn) 336 { 337 mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn)); 338 smp_wmb(); /* Needed for RCU */ 339 } 340 341 /* Bit 1 indicates the root is a node */ 342 #define MAPLE_ROOT_NODE 0x02 343 /* maple_type stored bit 3-6 */ 344 #define MAPLE_ENODE_TYPE_SHIFT 0x03 345 /* Bit 2 means a NULL somewhere below */ 346 #define MAPLE_ENODE_NULL 0x04 347 348 static inline struct maple_enode *mt_mk_node(const struct maple_node *node, 349 enum maple_type type) 350 { 351 return (void *)((unsigned long)node | 352 (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL); 353 } 354 355 static inline void *mte_mk_root(const struct maple_enode *node) 356 { 357 return (void *)((unsigned long)node | MAPLE_ROOT_NODE); 358 } 359 360 static inline void *mte_safe_root(const struct maple_enode *node) 361 { 362 return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE); 363 } 364 365 static inline void __maybe_unused *mte_set_full(const struct maple_enode *node) 366 { 367 return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL); 368 } 369 370 static inline void __maybe_unused *mte_clear_full(const struct maple_enode *node) 371 { 372 return (void *)((unsigned long)node | MAPLE_ENODE_NULL); 373 } 374 375 static inline bool __maybe_unused mte_has_null(const struct maple_enode *node) 376 { 377 return (unsigned long)node & MAPLE_ENODE_NULL; 378 } 379 380 static __always_inline bool ma_is_root(struct maple_node *node) 381 { 382 return ((unsigned long)node->parent & MA_ROOT_PARENT); 383 } 384 385 static __always_inline bool mte_is_root(const struct maple_enode *node) 386 { 387 return ma_is_root(mte_to_node(node)); 388 } 389 390 static inline bool mas_is_root_limits(const struct ma_state *mas) 391 { 392 return !mas->min && mas->max == ULONG_MAX; 393 } 394 395 static __always_inline bool mt_is_alloc(struct maple_tree *mt) 396 { 397 return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE); 398 } 399 400 /* 401 * The Parent Pointer 402 * Excluding root, the parent pointer is 256B aligned like all other tree nodes. 403 * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16 404 * bit values need an extra bit to store the offset. This extra bit comes from 405 * a reuse of the last bit in the node type. This is possible by using bit 1 to 406 * indicate if bit 2 is part of the type or the slot. 407 * 408 * Note types: 409 * 0x??1 = Root 410 * 0x?00 = 16 bit nodes 411 * 0x010 = 32 bit nodes 412 * 0x110 = 64 bit nodes 413 * 414 * Slot size and alignment 415 * 0b??1 : Root 416 * 0b?00 : 16 bit values, type in 0-1, slot in 2-7 417 * 0b010 : 32 bit values, type in 0-2, slot in 3-7 418 * 0b110 : 64 bit values, type in 0-2, slot in 3-7 419 */ 420 421 #define MAPLE_PARENT_ROOT 0x01 422 423 #define MAPLE_PARENT_SLOT_SHIFT 0x03 424 #define MAPLE_PARENT_SLOT_MASK 0xF8 425 426 #define MAPLE_PARENT_16B_SLOT_SHIFT 0x02 427 #define MAPLE_PARENT_16B_SLOT_MASK 0xFC 428 429 #define MAPLE_PARENT_RANGE64 0x06 430 #define MAPLE_PARENT_RANGE32 0x04 431 #define MAPLE_PARENT_NOT_RANGE16 0x02 432 433 /* 434 * mte_parent_shift() - Get the parent shift for the slot storage. 435 * @parent: The parent pointer cast as an unsigned long 436 * Return: The shift into that pointer to the star to of the slot 437 */ 438 static inline unsigned long mte_parent_shift(unsigned long parent) 439 { 440 /* Note bit 1 == 0 means 16B */ 441 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 442 return MAPLE_PARENT_SLOT_SHIFT; 443 444 return MAPLE_PARENT_16B_SLOT_SHIFT; 445 } 446 447 /* 448 * mte_parent_slot_mask() - Get the slot mask for the parent. 449 * @parent: The parent pointer cast as an unsigned long. 450 * Return: The slot mask for that parent. 451 */ 452 static inline unsigned long mte_parent_slot_mask(unsigned long parent) 453 { 454 /* Note bit 1 == 0 means 16B */ 455 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 456 return MAPLE_PARENT_SLOT_MASK; 457 458 return MAPLE_PARENT_16B_SLOT_MASK; 459 } 460 461 /* 462 * mas_parent_type() - Return the maple_type of the parent from the stored 463 * parent type. 464 * @mas: The maple state 465 * @enode: The maple_enode to extract the parent's enum 466 * Return: The node->parent maple_type 467 */ 468 static inline 469 enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode) 470 { 471 unsigned long p_type; 472 473 p_type = (unsigned long)mte_to_node(enode)->parent; 474 if (WARN_ON(p_type & MAPLE_PARENT_ROOT)) 475 return 0; 476 477 p_type &= MAPLE_NODE_MASK; 478 p_type &= ~mte_parent_slot_mask(p_type); 479 switch (p_type) { 480 case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */ 481 if (mt_is_alloc(mas->tree)) 482 return maple_arange_64; 483 return maple_range_64; 484 } 485 486 return 0; 487 } 488 489 /* 490 * mas_set_parent() - Set the parent node and encode the slot 491 * @mas: The maple state 492 * @enode: The encoded maple node. 493 * @parent: The encoded maple node that is the parent of @enode. 494 * @slot: The slot that @enode resides in @parent. 495 * 496 * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the 497 * parent type. 498 */ 499 static inline 500 void mas_set_parent(struct ma_state *mas, struct maple_enode *enode, 501 const struct maple_enode *parent, unsigned char slot) 502 { 503 unsigned long val = (unsigned long)parent; 504 unsigned long shift; 505 unsigned long type; 506 enum maple_type p_type = mte_node_type(parent); 507 508 MAS_BUG_ON(mas, p_type == maple_dense); 509 MAS_BUG_ON(mas, p_type == maple_leaf_64); 510 511 switch (p_type) { 512 case maple_range_64: 513 case maple_arange_64: 514 shift = MAPLE_PARENT_SLOT_SHIFT; 515 type = MAPLE_PARENT_RANGE64; 516 break; 517 default: 518 case maple_dense: 519 case maple_leaf_64: 520 shift = type = 0; 521 break; 522 } 523 524 val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */ 525 val |= (slot << shift) | type; 526 mte_to_node(enode)->parent = ma_parent_ptr(val); 527 } 528 529 /* 530 * mte_parent_slot() - get the parent slot of @enode. 531 * @enode: The encoded maple node. 532 * 533 * Return: The slot in the parent node where @enode resides. 534 */ 535 static __always_inline 536 unsigned int mte_parent_slot(const struct maple_enode *enode) 537 { 538 unsigned long val = (unsigned long)mte_to_node(enode)->parent; 539 540 if (unlikely(val & MA_ROOT_PARENT)) 541 return 0; 542 543 /* 544 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost 545 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT 546 */ 547 return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val); 548 } 549 550 /* 551 * mte_parent() - Get the parent of @node. 552 * @enode: The encoded maple node. 553 * 554 * Return: The parent maple node. 555 */ 556 static __always_inline 557 struct maple_node *mte_parent(const struct maple_enode *enode) 558 { 559 return (void *)((unsigned long) 560 (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK); 561 } 562 563 /* 564 * ma_dead_node() - check if the @enode is dead. 565 * @enode: The encoded maple node 566 * 567 * Return: true if dead, false otherwise. 568 */ 569 static __always_inline bool ma_dead_node(const struct maple_node *node) 570 { 571 struct maple_node *parent; 572 573 /* Do not reorder reads from the node prior to the parent check */ 574 smp_rmb(); 575 parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); 576 return (parent == node); 577 } 578 579 /* 580 * mte_dead_node() - check if the @enode is dead. 581 * @enode: The encoded maple node 582 * 583 * Return: true if dead, false otherwise. 584 */ 585 static __always_inline bool mte_dead_node(const struct maple_enode *enode) 586 { 587 struct maple_node *node; 588 589 node = mte_to_node(enode); 590 return ma_dead_node(node); 591 } 592 593 /* 594 * ma_pivots() - Get a pointer to the maple node pivots. 595 * @node: the maple node 596 * @type: the node type 597 * 598 * In the event of a dead node, this array may be %NULL 599 * 600 * Return: A pointer to the maple node pivots 601 */ 602 static inline unsigned long *ma_pivots(struct maple_node *node, 603 enum maple_type type) 604 { 605 switch (type) { 606 case maple_arange_64: 607 return node->ma64.pivot; 608 case maple_range_64: 609 case maple_leaf_64: 610 return node->mr64.pivot; 611 case maple_dense: 612 return NULL; 613 } 614 return NULL; 615 } 616 617 /* 618 * ma_gaps() - Get a pointer to the maple node gaps. 619 * @node: the maple node 620 * @type: the node type 621 * 622 * Return: A pointer to the maple node gaps 623 */ 624 static inline unsigned long *ma_gaps(struct maple_node *node, 625 enum maple_type type) 626 { 627 switch (type) { 628 case maple_arange_64: 629 return node->ma64.gap; 630 case maple_range_64: 631 case maple_leaf_64: 632 case maple_dense: 633 return NULL; 634 } 635 return NULL; 636 } 637 638 /* 639 * mas_safe_pivot() - get the pivot at @piv or mas->max. 640 * @mas: The maple state 641 * @pivots: The pointer to the maple node pivots 642 * @piv: The pivot to fetch 643 * @type: The maple node type 644 * 645 * Return: The pivot at @piv within the limit of the @pivots array, @mas->max 646 * otherwise. 647 */ 648 static __always_inline unsigned long 649 mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots, 650 unsigned char piv, enum maple_type type) 651 { 652 if (piv >= mt_pivots[type]) 653 return mas->max; 654 655 return pivots[piv]; 656 } 657 658 /* 659 * mas_safe_min() - Return the minimum for a given offset. 660 * @mas: The maple state 661 * @pivots: The pointer to the maple node pivots 662 * @offset: The offset into the pivot array 663 * 664 * Return: The minimum range value that is contained in @offset. 665 */ 666 static inline unsigned long 667 mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset) 668 { 669 if (likely(offset)) 670 return pivots[offset - 1] + 1; 671 672 return mas->min; 673 } 674 675 /* 676 * mte_set_pivot() - Set a pivot to a value in an encoded maple node. 677 * @mn: The encoded maple node 678 * @piv: The pivot offset 679 * @val: The value of the pivot 680 */ 681 static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv, 682 unsigned long val) 683 { 684 struct maple_node *node = mte_to_node(mn); 685 enum maple_type type = mte_node_type(mn); 686 687 BUG_ON(piv >= mt_pivots[type]); 688 switch (type) { 689 case maple_range_64: 690 case maple_leaf_64: 691 node->mr64.pivot[piv] = val; 692 break; 693 case maple_arange_64: 694 node->ma64.pivot[piv] = val; 695 break; 696 case maple_dense: 697 break; 698 } 699 700 } 701 702 /* 703 * ma_slots() - Get a pointer to the maple node slots. 704 * @mn: The maple node 705 * @mt: The maple node type 706 * 707 * Return: A pointer to the maple node slots 708 */ 709 static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt) 710 { 711 switch (mt) { 712 case maple_arange_64: 713 return mn->ma64.slot; 714 case maple_range_64: 715 case maple_leaf_64: 716 return mn->mr64.slot; 717 case maple_dense: 718 return mn->slot; 719 } 720 721 return NULL; 722 } 723 724 static inline bool mt_write_locked(const struct maple_tree *mt) 725 { 726 return mt_external_lock(mt) ? mt_write_lock_is_held(mt) : 727 lockdep_is_held(&mt->ma_lock); 728 } 729 730 static __always_inline bool mt_locked(const struct maple_tree *mt) 731 { 732 return mt_external_lock(mt) ? mt_lock_is_held(mt) : 733 lockdep_is_held(&mt->ma_lock); 734 } 735 736 static __always_inline void *mt_slot(const struct maple_tree *mt, 737 void __rcu **slots, unsigned char offset) 738 { 739 return rcu_dereference_check(slots[offset], mt_locked(mt)); 740 } 741 742 static __always_inline void *mt_slot_locked(struct maple_tree *mt, 743 void __rcu **slots, unsigned char offset) 744 { 745 return rcu_dereference_protected(slots[offset], mt_write_locked(mt)); 746 } 747 /* 748 * mas_slot_locked() - Get the slot value when holding the maple tree lock. 749 * @mas: The maple state 750 * @slots: The pointer to the slots 751 * @offset: The offset into the slots array to fetch 752 * 753 * Return: The entry stored in @slots at the @offset. 754 */ 755 static __always_inline void *mas_slot_locked(struct ma_state *mas, 756 void __rcu **slots, unsigned char offset) 757 { 758 return mt_slot_locked(mas->tree, slots, offset); 759 } 760 761 /* 762 * mas_slot() - Get the slot value when not holding the maple tree lock. 763 * @mas: The maple state 764 * @slots: The pointer to the slots 765 * @offset: The offset into the slots array to fetch 766 * 767 * Return: The entry stored in @slots at the @offset 768 */ 769 static __always_inline void *mas_slot(struct ma_state *mas, void __rcu **slots, 770 unsigned char offset) 771 { 772 return mt_slot(mas->tree, slots, offset); 773 } 774 775 /* 776 * mas_root() - Get the maple tree root. 777 * @mas: The maple state. 778 * 779 * Return: The pointer to the root of the tree 780 */ 781 static __always_inline void *mas_root(struct ma_state *mas) 782 { 783 return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree)); 784 } 785 786 static inline void *mt_root_locked(struct maple_tree *mt) 787 { 788 return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt)); 789 } 790 791 /* 792 * mas_root_locked() - Get the maple tree root when holding the maple tree lock. 793 * @mas: The maple state. 794 * 795 * Return: The pointer to the root of the tree 796 */ 797 static inline void *mas_root_locked(struct ma_state *mas) 798 { 799 return mt_root_locked(mas->tree); 800 } 801 802 static inline struct maple_metadata *ma_meta(struct maple_node *mn, 803 enum maple_type mt) 804 { 805 switch (mt) { 806 case maple_arange_64: 807 return &mn->ma64.meta; 808 default: 809 return &mn->mr64.meta; 810 } 811 } 812 813 /* 814 * ma_set_meta() - Set the metadata information of a node. 815 * @mn: The maple node 816 * @mt: The maple node type 817 * @offset: The offset of the highest sub-gap in this node. 818 * @end: The end of the data in this node. 819 */ 820 static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, 821 unsigned char offset, unsigned char end) 822 { 823 struct maple_metadata *meta = ma_meta(mn, mt); 824 825 meta->gap = offset; 826 meta->end = end; 827 } 828 829 /* 830 * mt_clear_meta() - clear the metadata information of a node, if it exists 831 * @mt: The maple tree 832 * @mn: The maple node 833 * @type: The maple node type 834 */ 835 static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn, 836 enum maple_type type) 837 { 838 struct maple_metadata *meta; 839 unsigned long *pivots; 840 void __rcu **slots; 841 void *next; 842 843 switch (type) { 844 case maple_range_64: 845 pivots = mn->mr64.pivot; 846 if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { 847 slots = mn->mr64.slot; 848 next = mt_slot_locked(mt, slots, 849 MAPLE_RANGE64_SLOTS - 1); 850 if (unlikely((mte_to_node(next) && 851 mte_node_type(next)))) 852 return; /* no metadata, could be node */ 853 } 854 fallthrough; 855 case maple_arange_64: 856 meta = ma_meta(mn, type); 857 break; 858 default: 859 return; 860 } 861 862 meta->gap = 0; 863 meta->end = 0; 864 } 865 866 /* 867 * ma_meta_end() - Get the data end of a node from the metadata 868 * @mn: The maple node 869 * @mt: The maple node type 870 */ 871 static inline unsigned char ma_meta_end(struct maple_node *mn, 872 enum maple_type mt) 873 { 874 struct maple_metadata *meta = ma_meta(mn, mt); 875 876 return meta->end; 877 } 878 879 /* 880 * ma_meta_gap() - Get the largest gap location of a node from the metadata 881 * @mn: The maple node 882 */ 883 static inline unsigned char ma_meta_gap(struct maple_node *mn) 884 { 885 return mn->ma64.meta.gap; 886 } 887 888 /* 889 * ma_set_meta_gap() - Set the largest gap location in a nodes metadata 890 * @mn: The maple node 891 * @mt: The maple node type 892 * @offset: The location of the largest gap. 893 */ 894 static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt, 895 unsigned char offset) 896 { 897 898 struct maple_metadata *meta = ma_meta(mn, mt); 899 900 meta->gap = offset; 901 } 902 903 /* 904 * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes. 905 * @mat: the ma_topiary, a linked list of dead nodes. 906 * @dead_enode: the node to be marked as dead and added to the tail of the list 907 * 908 * Add the @dead_enode to the linked list in @mat. 909 */ 910 static inline void mat_add(struct ma_topiary *mat, 911 struct maple_enode *dead_enode) 912 { 913 mte_set_node_dead(dead_enode); 914 mte_to_mat(dead_enode)->next = NULL; 915 if (!mat->tail) { 916 mat->tail = mat->head = dead_enode; 917 return; 918 } 919 920 mte_to_mat(mat->tail)->next = dead_enode; 921 mat->tail = dead_enode; 922 } 923 924 static void mt_free_walk(struct rcu_head *head); 925 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 926 bool free); 927 /* 928 * mas_mat_destroy() - Free all nodes and subtrees in a dead list. 929 * @mas: the maple state 930 * @mat: the ma_topiary linked list of dead nodes to free. 931 * 932 * Destroy walk a dead list. 933 */ 934 static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat) 935 { 936 struct maple_enode *next; 937 struct maple_node *node; 938 bool in_rcu = mt_in_rcu(mas->tree); 939 940 while (mat->head) { 941 next = mte_to_mat(mat->head)->next; 942 node = mte_to_node(mat->head); 943 mt_destroy_walk(mat->head, mas->tree, !in_rcu); 944 if (in_rcu) 945 call_rcu(&node->rcu, mt_free_walk); 946 mat->head = next; 947 } 948 } 949 /* 950 * mas_descend() - Descend into the slot stored in the ma_state. 951 * @mas: the maple state. 952 * 953 * Note: Not RCU safe, only use in write side or debug code. 954 */ 955 static inline void mas_descend(struct ma_state *mas) 956 { 957 enum maple_type type; 958 unsigned long *pivots; 959 struct maple_node *node; 960 void __rcu **slots; 961 962 node = mas_mn(mas); 963 type = mte_node_type(mas->node); 964 pivots = ma_pivots(node, type); 965 slots = ma_slots(node, type); 966 967 if (mas->offset) 968 mas->min = pivots[mas->offset - 1] + 1; 969 mas->max = mas_safe_pivot(mas, pivots, mas->offset, type); 970 mas->node = mas_slot(mas, slots, mas->offset); 971 } 972 973 /* 974 * mas_ascend() - Walk up a level of the tree. 975 * @mas: The maple state 976 * 977 * Sets the @mas->max and @mas->min for the parent node of mas->node. This 978 * may cause several levels of walking up to find the correct min and max. 979 * May find a dead node which will cause a premature return. 980 * Return: 1 on dead node, 0 otherwise 981 */ 982 static int mas_ascend(struct ma_state *mas) 983 { 984 struct maple_enode *p_enode; /* parent enode. */ 985 struct maple_enode *a_enode; /* ancestor enode. */ 986 struct maple_node *a_node; /* ancestor node. */ 987 struct maple_node *p_node; /* parent node. */ 988 unsigned char a_slot; 989 enum maple_type a_type; 990 unsigned long min, max; 991 unsigned long *pivots; 992 bool set_max = false, set_min = false; 993 994 a_node = mas_mn(mas); 995 if (ma_is_root(a_node)) { 996 mas->offset = 0; 997 return 0; 998 } 999 1000 p_node = mte_parent(mas->node); 1001 if (unlikely(a_node == p_node)) 1002 return 1; 1003 1004 a_type = mas_parent_type(mas, mas->node); 1005 mas->offset = mte_parent_slot(mas->node); 1006 a_enode = mt_mk_node(p_node, a_type); 1007 1008 /* Check to make sure all parent information is still accurate */ 1009 if (p_node != mte_parent(mas->node)) 1010 return 1; 1011 1012 mas->node = a_enode; 1013 1014 if (mte_is_root(a_enode)) { 1015 mas->max = ULONG_MAX; 1016 mas->min = 0; 1017 return 0; 1018 } 1019 1020 min = 0; 1021 max = ULONG_MAX; 1022 1023 /* 1024 * !mas->offset implies that parent node min == mas->min. 1025 * mas->offset > 0 implies that we need to walk up to find the 1026 * implied pivot min. 1027 */ 1028 if (!mas->offset) { 1029 min = mas->min; 1030 set_min = true; 1031 } 1032 1033 if (mas->max == ULONG_MAX) 1034 set_max = true; 1035 1036 do { 1037 p_enode = a_enode; 1038 a_type = mas_parent_type(mas, p_enode); 1039 a_node = mte_parent(p_enode); 1040 a_slot = mte_parent_slot(p_enode); 1041 a_enode = mt_mk_node(a_node, a_type); 1042 pivots = ma_pivots(a_node, a_type); 1043 1044 if (unlikely(ma_dead_node(a_node))) 1045 return 1; 1046 1047 if (!set_min && a_slot) { 1048 set_min = true; 1049 min = pivots[a_slot - 1] + 1; 1050 } 1051 1052 if (!set_max && a_slot < mt_pivots[a_type]) { 1053 set_max = true; 1054 max = pivots[a_slot]; 1055 } 1056 1057 if (unlikely(ma_dead_node(a_node))) 1058 return 1; 1059 1060 if (unlikely(ma_is_root(a_node))) 1061 break; 1062 1063 } while (!set_min || !set_max); 1064 1065 mas->max = max; 1066 mas->min = min; 1067 return 0; 1068 } 1069 1070 /* 1071 * mas_pop_node() - Get a previously allocated maple node from the maple state. 1072 * @mas: The maple state 1073 * 1074 * Return: A pointer to a maple node. 1075 */ 1076 static inline struct maple_node *mas_pop_node(struct ma_state *mas) 1077 { 1078 struct maple_node *ret; 1079 1080 if (WARN_ON_ONCE(!mas->sheaf)) 1081 return NULL; 1082 1083 ret = kmem_cache_alloc_from_sheaf(maple_node_cache, GFP_NOWAIT, mas->sheaf); 1084 memset(ret, 0, sizeof(*ret)); 1085 1086 return ret; 1087 } 1088 1089 /* 1090 * mas_alloc_nodes() - Allocate nodes into a maple state 1091 * @mas: The maple state 1092 * @gfp: The GFP Flags 1093 */ 1094 static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) 1095 { 1096 if (unlikely(mas->sheaf)) { 1097 unsigned long refill = mas->node_request; 1098 1099 if (kmem_cache_sheaf_size(mas->sheaf) >= refill) { 1100 mas->node_request = 0; 1101 return; 1102 } 1103 1104 if (mt_refill_sheaf(gfp, &mas->sheaf, refill)) 1105 goto error; 1106 1107 mas->node_request = 0; 1108 return; 1109 } 1110 1111 mas->sheaf = mt_get_sheaf(gfp, mas->node_request); 1112 if (likely(mas->sheaf)) { 1113 mas->node_request = 0; 1114 return; 1115 } 1116 1117 error: 1118 mas_set_err(mas, -ENOMEM); 1119 } 1120 1121 1122 /* 1123 * mas_free() - Free an encoded maple node 1124 * @mas: The maple state 1125 * @used: The encoded maple node to free. 1126 * 1127 * Uses rcu free if necessary, pushes @used back on the maple state allocations 1128 * otherwise. 1129 */ 1130 static inline void mas_free(struct ma_state *mas, struct maple_enode *used) 1131 { 1132 ma_free_rcu(mte_to_node(used)); 1133 } 1134 1135 /* 1136 * mas_start() - Sets up maple state for operations. 1137 * @mas: The maple state. 1138 * 1139 * If mas->status == ma_start, then set the min, max and depth to 1140 * defaults. 1141 * 1142 * Return: 1143 * - If mas->node is an error or not mas_start, return NULL. 1144 * - If it's an empty tree: NULL & mas->status == ma_none 1145 * - If it's a single entry: The entry & mas->status == ma_root 1146 * - If it's a tree: NULL & mas->status == ma_active 1147 */ 1148 static inline struct maple_enode *mas_start(struct ma_state *mas) 1149 { 1150 if (likely(mas_is_start(mas))) { 1151 struct maple_enode *root; 1152 1153 mas->min = 0; 1154 mas->max = ULONG_MAX; 1155 1156 retry: 1157 mas->depth = 0; 1158 root = mas_root(mas); 1159 /* Tree with nodes */ 1160 if (likely(xa_is_node(root))) { 1161 mas->depth = 0; 1162 mas->status = ma_active; 1163 mas->node = mte_safe_root(root); 1164 mas->offset = 0; 1165 if (mte_dead_node(mas->node)) 1166 goto retry; 1167 1168 return NULL; 1169 } 1170 1171 mas->node = NULL; 1172 /* empty tree */ 1173 if (unlikely(!root)) { 1174 mas->status = ma_none; 1175 mas->offset = MAPLE_NODE_SLOTS; 1176 return NULL; 1177 } 1178 1179 /* Single entry tree */ 1180 mas->status = ma_root; 1181 mas->offset = MAPLE_NODE_SLOTS; 1182 1183 /* Single entry tree. */ 1184 if (mas->index > 0) 1185 return NULL; 1186 1187 return root; 1188 } 1189 1190 return NULL; 1191 } 1192 1193 /* 1194 * ma_data_end() - Find the end of the data in a node. 1195 * @node: The maple node 1196 * @type: The maple node type 1197 * @pivots: The array of pivots in the node 1198 * @max: The maximum value in the node 1199 * 1200 * Uses metadata to find the end of the data when possible. 1201 * Return: The zero indexed last slot with data (may be null). 1202 */ 1203 static __always_inline unsigned char ma_data_end(struct maple_node *node, 1204 enum maple_type type, unsigned long *pivots, unsigned long max) 1205 { 1206 unsigned char offset; 1207 1208 if (!pivots) 1209 return 0; 1210 1211 if (type == maple_arange_64) 1212 return ma_meta_end(node, type); 1213 1214 offset = mt_pivots[type] - 1; 1215 if (likely(!pivots[offset])) 1216 return ma_meta_end(node, type); 1217 1218 if (likely(pivots[offset] == max)) 1219 return offset; 1220 1221 return mt_pivots[type]; 1222 } 1223 1224 /* 1225 * mas_data_end() - Find the end of the data (slot). 1226 * @mas: the maple state 1227 * 1228 * This method is optimized to check the metadata of a node if the node type 1229 * supports data end metadata. 1230 * 1231 * Return: The zero indexed last slot with data (may be null). 1232 */ 1233 static inline unsigned char mas_data_end(struct ma_state *mas) 1234 { 1235 enum maple_type type; 1236 struct maple_node *node; 1237 unsigned char offset; 1238 unsigned long *pivots; 1239 1240 type = mte_node_type(mas->node); 1241 node = mas_mn(mas); 1242 if (type == maple_arange_64) 1243 return ma_meta_end(node, type); 1244 1245 pivots = ma_pivots(node, type); 1246 if (unlikely(ma_dead_node(node))) 1247 return 0; 1248 1249 offset = mt_pivots[type] - 1; 1250 if (likely(!pivots[offset])) 1251 return ma_meta_end(node, type); 1252 1253 if (likely(pivots[offset] == mas->max)) 1254 return offset; 1255 1256 return mt_pivots[type]; 1257 } 1258 1259 /* 1260 * mas_leaf_max_gap() - Returns the largest gap in a leaf node 1261 * @mas: the maple state 1262 * 1263 * Return: The maximum gap in the leaf. 1264 */ 1265 static unsigned long mas_leaf_max_gap(struct ma_state *mas) 1266 { 1267 enum maple_type mt; 1268 unsigned long pstart, gap, max_gap; 1269 struct maple_node *mn; 1270 unsigned long *pivots; 1271 void __rcu **slots; 1272 unsigned char i; 1273 unsigned char max_piv; 1274 1275 mt = mte_node_type(mas->node); 1276 mn = mas_mn(mas); 1277 slots = ma_slots(mn, mt); 1278 max_gap = 0; 1279 if (unlikely(ma_is_dense(mt))) { 1280 gap = 0; 1281 for (i = 0; i < mt_slots[mt]; i++) { 1282 if (slots[i]) { 1283 if (gap > max_gap) 1284 max_gap = gap; 1285 gap = 0; 1286 } else { 1287 gap++; 1288 } 1289 } 1290 if (gap > max_gap) 1291 max_gap = gap; 1292 return max_gap; 1293 } 1294 1295 /* 1296 * Check the first implied pivot optimizes the loop below and slot 1 may 1297 * be skipped if there is a gap in slot 0. 1298 */ 1299 pivots = ma_pivots(mn, mt); 1300 if (likely(!slots[0])) { 1301 max_gap = pivots[0] - mas->min + 1; 1302 i = 2; 1303 } else { 1304 i = 1; 1305 } 1306 1307 /* reduce max_piv as the special case is checked before the loop */ 1308 max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1; 1309 /* 1310 * Check end implied pivot which can only be a gap on the right most 1311 * node. 1312 */ 1313 if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) { 1314 gap = ULONG_MAX - pivots[max_piv]; 1315 if (gap > max_gap) 1316 max_gap = gap; 1317 1318 if (max_gap > pivots[max_piv] - mas->min) 1319 return max_gap; 1320 } 1321 1322 for (; i <= max_piv; i++) { 1323 /* data == no gap. */ 1324 if (likely(slots[i])) 1325 continue; 1326 1327 pstart = pivots[i - 1]; 1328 gap = pivots[i] - pstart; 1329 if (gap > max_gap) 1330 max_gap = gap; 1331 1332 /* There cannot be two gaps in a row. */ 1333 i++; 1334 } 1335 return max_gap; 1336 } 1337 1338 /* 1339 * ma_max_gap() - Get the maximum gap in a maple node (non-leaf) 1340 * @node: The maple node 1341 * @gaps: The pointer to the gaps 1342 * @mt: The maple node type 1343 * @off: Pointer to store the offset location of the gap. 1344 * 1345 * Uses the metadata data end to scan backwards across set gaps. 1346 * 1347 * Return: The maximum gap value 1348 */ 1349 static inline unsigned long 1350 ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt, 1351 unsigned char *off) 1352 { 1353 unsigned char offset, i; 1354 unsigned long max_gap = 0; 1355 1356 i = offset = ma_meta_end(node, mt); 1357 do { 1358 if (gaps[i] > max_gap) { 1359 max_gap = gaps[i]; 1360 offset = i; 1361 } 1362 } while (i--); 1363 1364 *off = offset; 1365 return max_gap; 1366 } 1367 1368 /* 1369 * mas_max_gap() - find the largest gap in a non-leaf node and set the slot. 1370 * @mas: The maple state. 1371 * 1372 * Return: The gap value. 1373 */ 1374 static inline unsigned long mas_max_gap(struct ma_state *mas) 1375 { 1376 unsigned long *gaps; 1377 unsigned char offset; 1378 enum maple_type mt; 1379 struct maple_node *node; 1380 1381 mt = mte_node_type(mas->node); 1382 if (ma_is_leaf(mt)) 1383 return mas_leaf_max_gap(mas); 1384 1385 node = mas_mn(mas); 1386 MAS_BUG_ON(mas, mt != maple_arange_64); 1387 offset = ma_meta_gap(node); 1388 gaps = ma_gaps(node, mt); 1389 return gaps[offset]; 1390 } 1391 1392 /* 1393 * mas_parent_gap() - Set the parent gap and any gaps above, as needed 1394 * @mas: The maple state 1395 * @offset: The gap offset in the parent to set 1396 * @new: The new gap value. 1397 * 1398 * Set the parent gap then continue to set the gap upwards, using the metadata 1399 * of the parent to see if it is necessary to check the node above. 1400 */ 1401 static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset, 1402 unsigned long new) 1403 { 1404 unsigned long meta_gap = 0; 1405 struct maple_node *pnode; 1406 struct maple_enode *penode; 1407 unsigned long *pgaps; 1408 unsigned char meta_offset; 1409 enum maple_type pmt; 1410 1411 pnode = mte_parent(mas->node); 1412 pmt = mas_parent_type(mas, mas->node); 1413 penode = mt_mk_node(pnode, pmt); 1414 pgaps = ma_gaps(pnode, pmt); 1415 1416 ascend: 1417 MAS_BUG_ON(mas, pmt != maple_arange_64); 1418 meta_offset = ma_meta_gap(pnode); 1419 meta_gap = pgaps[meta_offset]; 1420 1421 pgaps[offset] = new; 1422 1423 if (meta_gap == new) 1424 return; 1425 1426 if (offset != meta_offset) { 1427 if (meta_gap > new) 1428 return; 1429 1430 ma_set_meta_gap(pnode, pmt, offset); 1431 } else if (new < meta_gap) { 1432 new = ma_max_gap(pnode, pgaps, pmt, &meta_offset); 1433 ma_set_meta_gap(pnode, pmt, meta_offset); 1434 } 1435 1436 if (ma_is_root(pnode)) 1437 return; 1438 1439 /* Go to the parent node. */ 1440 pnode = mte_parent(penode); 1441 pmt = mas_parent_type(mas, penode); 1442 pgaps = ma_gaps(pnode, pmt); 1443 offset = mte_parent_slot(penode); 1444 penode = mt_mk_node(pnode, pmt); 1445 goto ascend; 1446 } 1447 1448 /* 1449 * mas_update_gap() - Update a nodes gaps and propagate up if necessary. 1450 * @mas: the maple state. 1451 */ 1452 static inline void mas_update_gap(struct ma_state *mas) 1453 { 1454 unsigned char pslot; 1455 unsigned long p_gap; 1456 unsigned long max_gap; 1457 1458 if (!mt_is_alloc(mas->tree)) 1459 return; 1460 1461 if (mte_is_root(mas->node)) 1462 return; 1463 1464 max_gap = mas_max_gap(mas); 1465 1466 pslot = mte_parent_slot(mas->node); 1467 p_gap = ma_gaps(mte_parent(mas->node), 1468 mas_parent_type(mas, mas->node))[pslot]; 1469 1470 if (p_gap != max_gap) 1471 mas_parent_gap(mas, pslot, max_gap); 1472 } 1473 1474 /* 1475 * mas_adopt_children() - Set the parent pointer of all nodes in @parent to 1476 * @parent with the slot encoded. 1477 * @mas: the maple state (for the tree) 1478 * @parent: the maple encoded node containing the children. 1479 */ 1480 static inline void mas_adopt_children(struct ma_state *mas, 1481 struct maple_enode *parent) 1482 { 1483 enum maple_type type = mte_node_type(parent); 1484 struct maple_node *node = mte_to_node(parent); 1485 void __rcu **slots = ma_slots(node, type); 1486 unsigned long *pivots = ma_pivots(node, type); 1487 struct maple_enode *child; 1488 unsigned char offset; 1489 1490 offset = ma_data_end(node, type, pivots, mas->max); 1491 do { 1492 child = mas_slot_locked(mas, slots, offset); 1493 mas_set_parent(mas, child, parent, offset); 1494 } while (offset--); 1495 } 1496 1497 /* 1498 * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old 1499 * node as dead. 1500 * @mas: the maple state with the new node 1501 * @old_enode: The old maple encoded node to replace. 1502 * @new_height: if we are inserting a root node, update the height of the tree 1503 */ 1504 static inline void mas_put_in_tree(struct ma_state *mas, 1505 struct maple_enode *old_enode, char new_height) 1506 __must_hold(mas->tree->ma_lock) 1507 { 1508 unsigned char offset; 1509 void __rcu **slots; 1510 1511 if (mte_is_root(mas->node)) { 1512 mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas)); 1513 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 1514 mt_set_height(mas->tree, new_height); 1515 } else { 1516 1517 offset = mte_parent_slot(mas->node); 1518 slots = ma_slots(mte_parent(mas->node), 1519 mas_parent_type(mas, mas->node)); 1520 rcu_assign_pointer(slots[offset], mas->node); 1521 } 1522 1523 mte_set_node_dead(old_enode); 1524 } 1525 1526 /* 1527 * mas_replace_node() - Replace a node by putting it in the tree, marking it 1528 * dead, and freeing it. 1529 * the parent encoding to locate the maple node in the tree. 1530 * @mas: the ma_state with @mas->node pointing to the new node. 1531 * @old_enode: The old maple encoded node. 1532 * @new_height: The new height of the tree as a result of the operation 1533 */ 1534 static inline void mas_replace_node(struct ma_state *mas, 1535 struct maple_enode *old_enode, unsigned char new_height) 1536 __must_hold(mas->tree->ma_lock) 1537 { 1538 mas_put_in_tree(mas, old_enode, new_height); 1539 mas_free(mas, old_enode); 1540 } 1541 1542 /* 1543 * mas_find_child() - Find a child who has the parent @mas->node. 1544 * @mas: the maple state with the parent. 1545 * @child: the maple state to store the child. 1546 */ 1547 static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child) 1548 __must_hold(mas->tree->ma_lock) 1549 { 1550 enum maple_type mt; 1551 unsigned char offset; 1552 unsigned char end; 1553 unsigned long *pivots; 1554 struct maple_enode *entry; 1555 struct maple_node *node; 1556 void __rcu **slots; 1557 1558 mt = mte_node_type(mas->node); 1559 node = mas_mn(mas); 1560 slots = ma_slots(node, mt); 1561 pivots = ma_pivots(node, mt); 1562 end = ma_data_end(node, mt, pivots, mas->max); 1563 for (offset = mas->offset; offset <= end; offset++) { 1564 entry = mas_slot_locked(mas, slots, offset); 1565 if (mte_parent(entry) == node) { 1566 *child = *mas; 1567 mas->offset = offset + 1; 1568 child->offset = offset; 1569 mas_descend(child); 1570 child->offset = 0; 1571 return true; 1572 } 1573 } 1574 return false; 1575 } 1576 1577 /* 1578 * mab_shift_right() - Shift the data in mab right. Note, does not clean out the 1579 * old data or set b_node->b_end. 1580 * @b_node: the maple_big_node 1581 * @shift: the shift count 1582 */ 1583 static inline void mab_shift_right(struct maple_big_node *b_node, 1584 unsigned char shift) 1585 { 1586 unsigned long size = b_node->b_end * sizeof(unsigned long); 1587 1588 memmove(b_node->pivot + shift, b_node->pivot, size); 1589 memmove(b_node->slot + shift, b_node->slot, size); 1590 if (b_node->type == maple_arange_64) 1591 memmove(b_node->gap + shift, b_node->gap, size); 1592 } 1593 1594 /* 1595 * mab_middle_node() - Check if a middle node is needed (unlikely) 1596 * @b_node: the maple_big_node that contains the data. 1597 * @split: the potential split location 1598 * @slot_count: the size that can be stored in a single node being considered. 1599 * 1600 * Return: true if a middle node is required. 1601 */ 1602 static inline bool mab_middle_node(struct maple_big_node *b_node, int split, 1603 unsigned char slot_count) 1604 { 1605 unsigned char size = b_node->b_end; 1606 1607 if (size >= 2 * slot_count) 1608 return true; 1609 1610 if (!b_node->slot[split] && (size >= 2 * slot_count - 1)) 1611 return true; 1612 1613 return false; 1614 } 1615 1616 /* 1617 * mab_no_null_split() - ensure the split doesn't fall on a NULL 1618 * @b_node: the maple_big_node with the data 1619 * @split: the suggested split location 1620 * @slot_count: the number of slots in the node being considered. 1621 * 1622 * Return: the split location. 1623 */ 1624 static inline int mab_no_null_split(struct maple_big_node *b_node, 1625 unsigned char split, unsigned char slot_count) 1626 { 1627 if (!b_node->slot[split]) { 1628 /* 1629 * If the split is less than the max slot && the right side will 1630 * still be sufficient, then increment the split on NULL. 1631 */ 1632 if ((split < slot_count - 1) && 1633 (b_node->b_end - split) > (mt_min_slots[b_node->type])) 1634 split++; 1635 else 1636 split--; 1637 } 1638 return split; 1639 } 1640 1641 /* 1642 * mab_calc_split() - Calculate the split location and if there needs to be two 1643 * splits. 1644 * @mas: The maple state 1645 * @bn: The maple_big_node with the data 1646 * @mid_split: The second split, if required. 0 otherwise. 1647 * 1648 * Return: The first split location. The middle split is set in @mid_split. 1649 */ 1650 static inline int mab_calc_split(struct ma_state *mas, 1651 struct maple_big_node *bn, unsigned char *mid_split) 1652 { 1653 unsigned char b_end = bn->b_end; 1654 int split = b_end / 2; /* Assume equal split. */ 1655 unsigned char slot_count = mt_slots[bn->type]; 1656 1657 /* 1658 * To support gap tracking, all NULL entries are kept together and a node cannot 1659 * end on a NULL entry, with the exception of the left-most leaf. The 1660 * limitation means that the split of a node must be checked for this condition 1661 * and be able to put more data in one direction or the other. 1662 * 1663 * Although extremely rare, it is possible to enter what is known as the 3-way 1664 * split scenario. The 3-way split comes about by means of a store of a range 1665 * that overwrites the end and beginning of two full nodes. The result is a set 1666 * of entries that cannot be stored in 2 nodes. Sometimes, these two nodes can 1667 * also be located in different parent nodes which are also full. This can 1668 * carry upwards all the way to the root in the worst case. 1669 */ 1670 if (unlikely(mab_middle_node(bn, split, slot_count))) { 1671 split = b_end / 3; 1672 *mid_split = split * 2; 1673 } else { 1674 *mid_split = 0; 1675 } 1676 1677 /* Avoid ending a node on a NULL entry */ 1678 split = mab_no_null_split(bn, split, slot_count); 1679 1680 if (unlikely(*mid_split)) 1681 *mid_split = mab_no_null_split(bn, *mid_split, slot_count); 1682 1683 return split; 1684 } 1685 1686 /* 1687 * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node 1688 * and set @b_node->b_end to the next free slot. 1689 * @mas: The maple state 1690 * @mas_start: The starting slot to copy 1691 * @mas_end: The end slot to copy (inclusively) 1692 * @b_node: The maple_big_node to place the data 1693 * @mab_start: The starting location in maple_big_node to store the data. 1694 */ 1695 static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start, 1696 unsigned char mas_end, struct maple_big_node *b_node, 1697 unsigned char mab_start) 1698 { 1699 enum maple_type mt; 1700 struct maple_node *node; 1701 void __rcu **slots; 1702 unsigned long *pivots, *gaps; 1703 int i = mas_start, j = mab_start; 1704 unsigned char piv_end; 1705 1706 node = mas_mn(mas); 1707 mt = mte_node_type(mas->node); 1708 pivots = ma_pivots(node, mt); 1709 if (!i) { 1710 b_node->pivot[j] = pivots[i++]; 1711 if (unlikely(i > mas_end)) 1712 goto complete; 1713 j++; 1714 } 1715 1716 piv_end = min(mas_end, mt_pivots[mt]); 1717 for (; i < piv_end; i++, j++) { 1718 b_node->pivot[j] = pivots[i]; 1719 if (unlikely(!b_node->pivot[j])) 1720 goto complete; 1721 1722 if (unlikely(mas->max == b_node->pivot[j])) 1723 goto complete; 1724 } 1725 1726 b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt); 1727 1728 complete: 1729 b_node->b_end = ++j; 1730 j -= mab_start; 1731 slots = ma_slots(node, mt); 1732 memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j); 1733 if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) { 1734 gaps = ma_gaps(node, mt); 1735 memcpy(b_node->gap + mab_start, gaps + mas_start, 1736 sizeof(unsigned long) * j); 1737 } 1738 } 1739 1740 /* 1741 * mas_leaf_set_meta() - Set the metadata of a leaf if possible. 1742 * @node: The maple node 1743 * @mt: The maple type 1744 * @end: The node end 1745 */ 1746 static inline void mas_leaf_set_meta(struct maple_node *node, 1747 enum maple_type mt, unsigned char end) 1748 { 1749 if (end < mt_slots[mt] - 1) 1750 ma_set_meta(node, mt, 0, end); 1751 } 1752 1753 /* 1754 * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node. 1755 * @b_node: the maple_big_node that has the data 1756 * @mab_start: the start location in @b_node. 1757 * @mab_end: The end location in @b_node (inclusively) 1758 * @mas: The maple state with the maple encoded node. 1759 */ 1760 static inline void mab_mas_cp(struct maple_big_node *b_node, 1761 unsigned char mab_start, unsigned char mab_end, 1762 struct ma_state *mas, bool new_max) 1763 { 1764 int i, j = 0; 1765 enum maple_type mt = mte_node_type(mas->node); 1766 struct maple_node *node = mte_to_node(mas->node); 1767 void __rcu **slots = ma_slots(node, mt); 1768 unsigned long *pivots = ma_pivots(node, mt); 1769 unsigned long *gaps = NULL; 1770 unsigned char end; 1771 1772 if (mab_end - mab_start > mt_pivots[mt]) 1773 mab_end--; 1774 1775 if (!pivots[mt_pivots[mt] - 1]) 1776 slots[mt_pivots[mt]] = NULL; 1777 1778 i = mab_start; 1779 do { 1780 pivots[j++] = b_node->pivot[i++]; 1781 } while (i <= mab_end && likely(b_node->pivot[i])); 1782 1783 memcpy(slots, b_node->slot + mab_start, 1784 sizeof(void *) * (i - mab_start)); 1785 1786 if (new_max) 1787 mas->max = b_node->pivot[i - 1]; 1788 1789 end = j - 1; 1790 if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) { 1791 unsigned long max_gap = 0; 1792 unsigned char offset = 0; 1793 1794 gaps = ma_gaps(node, mt); 1795 do { 1796 gaps[--j] = b_node->gap[--i]; 1797 if (gaps[j] > max_gap) { 1798 offset = j; 1799 max_gap = gaps[j]; 1800 } 1801 } while (j); 1802 1803 ma_set_meta(node, mt, offset, end); 1804 } else { 1805 mas_leaf_set_meta(node, mt, end); 1806 } 1807 } 1808 1809 /* 1810 * mas_store_b_node() - Store an @entry into the b_node while also copying the 1811 * data from a maple encoded node. 1812 * @wr_mas: the maple write state 1813 * @b_node: the maple_big_node to fill with data 1814 * @offset_end: the offset to end copying 1815 * 1816 * Return: The actual end of the data stored in @b_node 1817 */ 1818 static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas, 1819 struct maple_big_node *b_node, unsigned char offset_end) 1820 { 1821 unsigned char slot; 1822 unsigned char b_end; 1823 /* Possible underflow of piv will wrap back to 0 before use. */ 1824 unsigned long piv; 1825 struct ma_state *mas = wr_mas->mas; 1826 1827 b_node->type = wr_mas->type; 1828 b_end = 0; 1829 slot = mas->offset; 1830 if (slot) { 1831 /* Copy start data up to insert. */ 1832 mas_mab_cp(mas, 0, slot - 1, b_node, 0); 1833 b_end = b_node->b_end; 1834 piv = b_node->pivot[b_end - 1]; 1835 } else 1836 piv = mas->min - 1; 1837 1838 if (piv + 1 < mas->index) { 1839 /* Handle range starting after old range */ 1840 b_node->slot[b_end] = wr_mas->content; 1841 if (!wr_mas->content) 1842 b_node->gap[b_end] = mas->index - 1 - piv; 1843 b_node->pivot[b_end++] = mas->index - 1; 1844 } 1845 1846 /* Store the new entry. */ 1847 mas->offset = b_end; 1848 b_node->slot[b_end] = wr_mas->entry; 1849 b_node->pivot[b_end] = mas->last; 1850 1851 /* Appended. */ 1852 if (mas->last >= mas->max) 1853 goto b_end; 1854 1855 /* Handle new range ending before old range ends */ 1856 piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type); 1857 if (piv > mas->last) { 1858 if (offset_end != slot) 1859 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 1860 offset_end); 1861 1862 b_node->slot[++b_end] = wr_mas->content; 1863 if (!wr_mas->content) 1864 b_node->gap[b_end] = piv - mas->last + 1; 1865 b_node->pivot[b_end] = piv; 1866 } 1867 1868 slot = offset_end + 1; 1869 if (slot > mas->end) 1870 goto b_end; 1871 1872 /* Copy end data to the end of the node. */ 1873 mas_mab_cp(mas, slot, mas->end + 1, b_node, ++b_end); 1874 b_node->b_end--; 1875 return; 1876 1877 b_end: 1878 b_node->b_end = b_end; 1879 } 1880 1881 /* 1882 * mas_prev_sibling() - Find the previous node with the same parent. 1883 * @mas: the maple state 1884 * 1885 * Return: True if there is a previous sibling, false otherwise. 1886 */ 1887 static inline bool mas_prev_sibling(struct ma_state *mas) 1888 { 1889 unsigned int p_slot = mte_parent_slot(mas->node); 1890 1891 /* For root node, p_slot is set to 0 by mte_parent_slot(). */ 1892 if (!p_slot) 1893 return false; 1894 1895 mas_ascend(mas); 1896 mas->offset = p_slot - 1; 1897 mas_descend(mas); 1898 return true; 1899 } 1900 1901 /* 1902 * mas_next_sibling() - Find the next node with the same parent. 1903 * @mas: the maple state 1904 * 1905 * Return: true if there is a next sibling, false otherwise. 1906 */ 1907 static inline bool mas_next_sibling(struct ma_state *mas) 1908 { 1909 MA_STATE(parent, mas->tree, mas->index, mas->last); 1910 1911 if (mte_is_root(mas->node)) 1912 return false; 1913 1914 parent = *mas; 1915 mas_ascend(&parent); 1916 parent.offset = mte_parent_slot(mas->node) + 1; 1917 if (parent.offset > mas_data_end(&parent)) 1918 return false; 1919 1920 *mas = parent; 1921 mas_descend(mas); 1922 return true; 1923 } 1924 1925 /* 1926 * mas_node_or_none() - Set the enode and state. 1927 * @mas: the maple state 1928 * @enode: The encoded maple node. 1929 * 1930 * Set the node to the enode and the status. 1931 */ 1932 static inline void mas_node_or_none(struct ma_state *mas, 1933 struct maple_enode *enode) 1934 { 1935 if (enode) { 1936 mas->node = enode; 1937 mas->status = ma_active; 1938 } else { 1939 mas->node = NULL; 1940 mas->status = ma_none; 1941 } 1942 } 1943 1944 /* 1945 * mas_wr_node_walk() - Find the correct offset for the index in the @mas. 1946 * If @mas->index cannot be found within the containing 1947 * node, we traverse to the last entry in the node. 1948 * @wr_mas: The maple write state 1949 * 1950 * Uses mas_slot_locked() and does not need to worry about dead nodes. 1951 */ 1952 static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) 1953 { 1954 struct ma_state *mas = wr_mas->mas; 1955 unsigned char count, offset; 1956 1957 if (unlikely(ma_is_dense(wr_mas->type))) { 1958 wr_mas->r_max = wr_mas->r_min = mas->index; 1959 mas->offset = mas->index = mas->min; 1960 return; 1961 } 1962 1963 wr_mas->node = mas_mn(wr_mas->mas); 1964 wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type); 1965 count = mas->end = ma_data_end(wr_mas->node, wr_mas->type, 1966 wr_mas->pivots, mas->max); 1967 offset = mas->offset; 1968 1969 while (offset < count && mas->index > wr_mas->pivots[offset]) 1970 offset++; 1971 1972 wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max; 1973 wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset); 1974 wr_mas->offset_end = mas->offset = offset; 1975 } 1976 1977 /* 1978 * mast_rebalance_next() - Rebalance against the next node 1979 * @mast: The maple subtree state 1980 */ 1981 static inline void mast_rebalance_next(struct maple_subtree_state *mast) 1982 { 1983 unsigned char b_end = mast->bn->b_end; 1984 1985 mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node), 1986 mast->bn, b_end); 1987 mast->orig_r->last = mast->orig_r->max; 1988 } 1989 1990 /* 1991 * mast_rebalance_prev() - Rebalance against the previous node 1992 * @mast: The maple subtree state 1993 */ 1994 static inline void mast_rebalance_prev(struct maple_subtree_state *mast) 1995 { 1996 unsigned char end = mas_data_end(mast->orig_l) + 1; 1997 unsigned char b_end = mast->bn->b_end; 1998 1999 mab_shift_right(mast->bn, end); 2000 mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0); 2001 mast->l->min = mast->orig_l->min; 2002 mast->orig_l->index = mast->orig_l->min; 2003 mast->bn->b_end = end + b_end; 2004 mast->l->offset += end; 2005 } 2006 2007 /* 2008 * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring 2009 * the node to the right. Checking the nodes to the right then the left at each 2010 * level upwards until root is reached. 2011 * Data is copied into the @mast->bn. 2012 * @mast: The maple_subtree_state. 2013 */ 2014 static inline 2015 bool mast_spanning_rebalance(struct maple_subtree_state *mast) 2016 { 2017 struct ma_state r_tmp = *mast->orig_r; 2018 struct ma_state l_tmp = *mast->orig_l; 2019 unsigned char depth = 0; 2020 2021 do { 2022 mas_ascend(mast->orig_r); 2023 mas_ascend(mast->orig_l); 2024 depth++; 2025 if (mast->orig_r->offset < mas_data_end(mast->orig_r)) { 2026 mast->orig_r->offset++; 2027 do { 2028 mas_descend(mast->orig_r); 2029 mast->orig_r->offset = 0; 2030 } while (--depth); 2031 2032 mast_rebalance_next(mast); 2033 *mast->orig_l = l_tmp; 2034 return true; 2035 } else if (mast->orig_l->offset != 0) { 2036 mast->orig_l->offset--; 2037 do { 2038 mas_descend(mast->orig_l); 2039 mast->orig_l->offset = 2040 mas_data_end(mast->orig_l); 2041 } while (--depth); 2042 2043 mast_rebalance_prev(mast); 2044 *mast->orig_r = r_tmp; 2045 return true; 2046 } 2047 } while (!mte_is_root(mast->orig_r->node)); 2048 2049 *mast->orig_r = r_tmp; 2050 *mast->orig_l = l_tmp; 2051 return false; 2052 } 2053 2054 /* 2055 * mast_ascend() - Ascend the original left and right maple states. 2056 * @mast: the maple subtree state. 2057 * 2058 * Ascend the original left and right sides. Set the offsets to point to the 2059 * data already in the new tree (@mast->l and @mast->r). 2060 */ 2061 static inline void mast_ascend(struct maple_subtree_state *mast) 2062 { 2063 MA_WR_STATE(wr_mas, mast->orig_r, NULL); 2064 mas_ascend(mast->orig_l); 2065 mas_ascend(mast->orig_r); 2066 2067 mast->orig_r->offset = 0; 2068 mast->orig_r->index = mast->r->max; 2069 /* last should be larger than or equal to index */ 2070 if (mast->orig_r->last < mast->orig_r->index) 2071 mast->orig_r->last = mast->orig_r->index; 2072 2073 wr_mas.type = mte_node_type(mast->orig_r->node); 2074 mas_wr_node_walk(&wr_mas); 2075 /* Set up the left side of things */ 2076 mast->orig_l->offset = 0; 2077 mast->orig_l->index = mast->l->min; 2078 wr_mas.mas = mast->orig_l; 2079 wr_mas.type = mte_node_type(mast->orig_l->node); 2080 mas_wr_node_walk(&wr_mas); 2081 2082 mast->bn->type = wr_mas.type; 2083 } 2084 2085 /* 2086 * mas_new_ma_node() - Create and return a new maple node. Helper function. 2087 * @mas: the maple state with the allocations. 2088 * @b_node: the maple_big_node with the type encoding. 2089 * 2090 * Use the node type from the maple_big_node to allocate a new node from the 2091 * ma_state. This function exists mainly for code readability. 2092 * 2093 * Return: A new maple encoded node 2094 */ 2095 static inline struct maple_enode 2096 *mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node) 2097 { 2098 return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type); 2099 } 2100 2101 /* 2102 * mas_mab_to_node() - Set up right and middle nodes 2103 * 2104 * @mas: the maple state that contains the allocations. 2105 * @b_node: the node which contains the data. 2106 * @left: The pointer which will have the left node 2107 * @right: The pointer which may have the right node 2108 * @middle: the pointer which may have the middle node (rare) 2109 * @mid_split: the split location for the middle node 2110 * 2111 * Return: the split of left. 2112 */ 2113 static inline unsigned char mas_mab_to_node(struct ma_state *mas, 2114 struct maple_big_node *b_node, struct maple_enode **left, 2115 struct maple_enode **right, struct maple_enode **middle, 2116 unsigned char *mid_split) 2117 { 2118 unsigned char split = 0; 2119 unsigned char slot_count = mt_slots[b_node->type]; 2120 2121 *left = mas_new_ma_node(mas, b_node); 2122 *right = NULL; 2123 *middle = NULL; 2124 *mid_split = 0; 2125 2126 if (b_node->b_end < slot_count) { 2127 split = b_node->b_end; 2128 } else { 2129 split = mab_calc_split(mas, b_node, mid_split); 2130 *right = mas_new_ma_node(mas, b_node); 2131 } 2132 2133 if (*mid_split) 2134 *middle = mas_new_ma_node(mas, b_node); 2135 2136 return split; 2137 2138 } 2139 2140 /* 2141 * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end 2142 * pointer. 2143 * @b_node: the big node to add the entry 2144 * @mas: the maple state to get the pivot (mas->max) 2145 * @entry: the entry to add, if NULL nothing happens. 2146 */ 2147 static inline void mab_set_b_end(struct maple_big_node *b_node, 2148 struct ma_state *mas, 2149 void *entry) 2150 { 2151 if (!entry) 2152 return; 2153 2154 b_node->slot[b_node->b_end] = entry; 2155 if (mt_is_alloc(mas->tree)) 2156 b_node->gap[b_node->b_end] = mas_max_gap(mas); 2157 b_node->pivot[b_node->b_end++] = mas->max; 2158 } 2159 2160 /* 2161 * mas_set_split_parent() - combine_then_separate helper function. Sets the parent 2162 * of @mas->node to either @left or @right, depending on @slot and @split 2163 * 2164 * @mas: the maple state with the node that needs a parent 2165 * @left: possible parent 1 2166 * @right: possible parent 2 2167 * @slot: the slot the mas->node was placed 2168 * @split: the split location between @left and @right 2169 */ 2170 static inline void mas_set_split_parent(struct ma_state *mas, 2171 struct maple_enode *left, 2172 struct maple_enode *right, 2173 unsigned char *slot, unsigned char split) 2174 { 2175 if (mas_is_none(mas)) 2176 return; 2177 2178 if ((*slot) <= split) 2179 mas_set_parent(mas, mas->node, left, *slot); 2180 else if (right) 2181 mas_set_parent(mas, mas->node, right, (*slot) - split - 1); 2182 2183 (*slot)++; 2184 } 2185 2186 /* 2187 * mte_mid_split_check() - Check if the next node passes the mid-split 2188 * @l: Pointer to left encoded maple node. 2189 * @m: Pointer to middle encoded maple node. 2190 * @r: Pointer to right encoded maple node. 2191 * @slot: The offset 2192 * @split: The split location. 2193 * @mid_split: The middle split. 2194 */ 2195 static inline void mte_mid_split_check(struct maple_enode **l, 2196 struct maple_enode **r, 2197 struct maple_enode *right, 2198 unsigned char slot, 2199 unsigned char *split, 2200 unsigned char mid_split) 2201 { 2202 if (*r == right) 2203 return; 2204 2205 if (slot < mid_split) 2206 return; 2207 2208 *l = *r; 2209 *r = right; 2210 *split = mid_split; 2211 } 2212 2213 /* 2214 * mast_set_split_parents() - Helper function to set three nodes parents. Slot 2215 * is taken from @mast->l. 2216 * @mast: the maple subtree state 2217 * @left: the left node 2218 * @right: the right node 2219 * @split: the split location. 2220 */ 2221 static inline void mast_set_split_parents(struct maple_subtree_state *mast, 2222 struct maple_enode *left, 2223 struct maple_enode *middle, 2224 struct maple_enode *right, 2225 unsigned char split, 2226 unsigned char mid_split) 2227 { 2228 unsigned char slot; 2229 struct maple_enode *l = left; 2230 struct maple_enode *r = right; 2231 2232 if (mas_is_none(mast->l)) 2233 return; 2234 2235 if (middle) 2236 r = middle; 2237 2238 slot = mast->l->offset; 2239 2240 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2241 mas_set_split_parent(mast->l, l, r, &slot, split); 2242 2243 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2244 mas_set_split_parent(mast->m, l, r, &slot, split); 2245 2246 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2247 mas_set_split_parent(mast->r, l, r, &slot, split); 2248 } 2249 2250 /* 2251 * mas_topiary_node() - Dispose of a single node 2252 * @mas: The maple state for pushing nodes 2253 * @in_rcu: If the tree is in rcu mode 2254 * 2255 * The node will either be RCU freed or pushed back on the maple state. 2256 */ 2257 static inline void mas_topiary_node(struct ma_state *mas, 2258 struct ma_state *tmp_mas, bool in_rcu) 2259 { 2260 struct maple_node *tmp; 2261 struct maple_enode *enode; 2262 2263 if (mas_is_none(tmp_mas)) 2264 return; 2265 2266 enode = tmp_mas->node; 2267 tmp = mte_to_node(enode); 2268 mte_set_node_dead(enode); 2269 ma_free_rcu(tmp); 2270 } 2271 2272 /* 2273 * mas_topiary_replace() - Replace the data with new data, then repair the 2274 * parent links within the new tree. Iterate over the dead sub-tree and collect 2275 * the dead subtrees and topiary the nodes that are no longer of use. 2276 * 2277 * The new tree will have up to three children with the correct parent. Keep 2278 * track of the new entries as they need to be followed to find the next level 2279 * of new entries. 2280 * 2281 * The old tree will have up to three children with the old parent. Keep track 2282 * of the old entries as they may have more nodes below replaced. Nodes within 2283 * [index, last] are dead subtrees, others need to be freed and followed. 2284 * 2285 * @mas: The maple state pointing at the new data 2286 * @old_enode: The maple encoded node being replaced 2287 * @new_height: The new height of the tree as a result of the operation 2288 * 2289 */ 2290 static inline void mas_topiary_replace(struct ma_state *mas, 2291 struct maple_enode *old_enode, unsigned char new_height) 2292 { 2293 struct ma_state tmp[3], tmp_next[3]; 2294 MA_TOPIARY(subtrees, mas->tree); 2295 bool in_rcu; 2296 int i, n; 2297 2298 /* Place data in tree & then mark node as old */ 2299 mas_put_in_tree(mas, old_enode, new_height); 2300 2301 /* Update the parent pointers in the tree */ 2302 tmp[0] = *mas; 2303 tmp[0].offset = 0; 2304 tmp[1].status = ma_none; 2305 tmp[2].status = ma_none; 2306 while (!mte_is_leaf(tmp[0].node)) { 2307 n = 0; 2308 for (i = 0; i < 3; i++) { 2309 if (mas_is_none(&tmp[i])) 2310 continue; 2311 2312 while (n < 3) { 2313 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2314 break; 2315 n++; 2316 } 2317 2318 mas_adopt_children(&tmp[i], tmp[i].node); 2319 } 2320 2321 if (MAS_WARN_ON(mas, n == 0)) 2322 break; 2323 2324 while (n < 3) 2325 tmp_next[n++].status = ma_none; 2326 2327 for (i = 0; i < 3; i++) 2328 tmp[i] = tmp_next[i]; 2329 } 2330 2331 /* Collect the old nodes that need to be discarded */ 2332 if (mte_is_leaf(old_enode)) 2333 return mas_free(mas, old_enode); 2334 2335 tmp[0] = *mas; 2336 tmp[0].offset = 0; 2337 tmp[0].node = old_enode; 2338 tmp[1].status = ma_none; 2339 tmp[2].status = ma_none; 2340 in_rcu = mt_in_rcu(mas->tree); 2341 do { 2342 n = 0; 2343 for (i = 0; i < 3; i++) { 2344 if (mas_is_none(&tmp[i])) 2345 continue; 2346 2347 while (n < 3) { 2348 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2349 break; 2350 2351 if ((tmp_next[n].min >= tmp_next->index) && 2352 (tmp_next[n].max <= tmp_next->last)) { 2353 mat_add(&subtrees, tmp_next[n].node); 2354 tmp_next[n].status = ma_none; 2355 } else { 2356 n++; 2357 } 2358 } 2359 } 2360 2361 if (MAS_WARN_ON(mas, n == 0)) 2362 break; 2363 2364 while (n < 3) 2365 tmp_next[n++].status = ma_none; 2366 2367 for (i = 0; i < 3; i++) { 2368 mas_topiary_node(mas, &tmp[i], in_rcu); 2369 tmp[i] = tmp_next[i]; 2370 } 2371 } while (!mte_is_leaf(tmp[0].node)); 2372 2373 for (i = 0; i < 3; i++) 2374 mas_topiary_node(mas, &tmp[i], in_rcu); 2375 2376 mas_mat_destroy(mas, &subtrees); 2377 } 2378 2379 /* 2380 * mas_wmb_replace() - Write memory barrier and replace 2381 * @mas: The maple state 2382 * @old_enode: The old maple encoded node that is being replaced. 2383 * @new_height: The new height of the tree as a result of the operation 2384 * 2385 * Updates gap as necessary. 2386 */ 2387 static inline void mas_wmb_replace(struct ma_state *mas, 2388 struct maple_enode *old_enode, unsigned char new_height) 2389 { 2390 /* Insert the new data in the tree */ 2391 mas_topiary_replace(mas, old_enode, new_height); 2392 2393 if (mte_is_leaf(mas->node)) 2394 return; 2395 2396 mas_update_gap(mas); 2397 } 2398 2399 /* 2400 * mast_cp_to_nodes() - Copy data out to nodes. 2401 * @mast: The maple subtree state 2402 * @left: The left encoded maple node 2403 * @middle: The middle encoded maple node 2404 * @right: The right encoded maple node 2405 * @split: The location to split between left and (middle ? middle : right) 2406 * @mid_split: The location to split between middle and right. 2407 */ 2408 static inline void mast_cp_to_nodes(struct maple_subtree_state *mast, 2409 struct maple_enode *left, struct maple_enode *middle, 2410 struct maple_enode *right, unsigned char split, unsigned char mid_split) 2411 { 2412 bool new_lmax = true; 2413 2414 mas_node_or_none(mast->l, left); 2415 mas_node_or_none(mast->m, middle); 2416 mas_node_or_none(mast->r, right); 2417 2418 mast->l->min = mast->orig_l->min; 2419 if (split == mast->bn->b_end) { 2420 mast->l->max = mast->orig_r->max; 2421 new_lmax = false; 2422 } 2423 2424 mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax); 2425 2426 if (middle) { 2427 mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true); 2428 mast->m->min = mast->bn->pivot[split] + 1; 2429 split = mid_split; 2430 } 2431 2432 mast->r->max = mast->orig_r->max; 2433 if (right) { 2434 mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false); 2435 mast->r->min = mast->bn->pivot[split] + 1; 2436 } 2437 } 2438 2439 /* 2440 * mast_combine_cp_left - Copy in the original left side of the tree into the 2441 * combined data set in the maple subtree state big node. 2442 * @mast: The maple subtree state 2443 */ 2444 static inline void mast_combine_cp_left(struct maple_subtree_state *mast) 2445 { 2446 unsigned char l_slot = mast->orig_l->offset; 2447 2448 if (!l_slot) 2449 return; 2450 2451 mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0); 2452 } 2453 2454 /* 2455 * mast_combine_cp_right: Copy in the original right side of the tree into the 2456 * combined data set in the maple subtree state big node. 2457 * @mast: The maple subtree state 2458 */ 2459 static inline void mast_combine_cp_right(struct maple_subtree_state *mast) 2460 { 2461 if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max) 2462 return; 2463 2464 mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1, 2465 mt_slot_count(mast->orig_r->node), mast->bn, 2466 mast->bn->b_end); 2467 mast->orig_r->last = mast->orig_r->max; 2468 } 2469 2470 /* 2471 * mast_sufficient: Check if the maple subtree state has enough data in the big 2472 * node to create at least one sufficient node 2473 * @mast: the maple subtree state 2474 */ 2475 static inline bool mast_sufficient(struct maple_subtree_state *mast) 2476 { 2477 if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node)) 2478 return true; 2479 2480 return false; 2481 } 2482 2483 /* 2484 * mast_overflow: Check if there is too much data in the subtree state for a 2485 * single node. 2486 * @mast: The maple subtree state 2487 */ 2488 static inline bool mast_overflow(struct maple_subtree_state *mast) 2489 { 2490 if (mast->bn->b_end > mt_slot_count(mast->orig_l->node)) 2491 return true; 2492 2493 return false; 2494 } 2495 2496 static inline void *mtree_range_walk(struct ma_state *mas) 2497 { 2498 unsigned long *pivots; 2499 unsigned char offset; 2500 struct maple_node *node; 2501 struct maple_enode *next, *last; 2502 enum maple_type type; 2503 void __rcu **slots; 2504 unsigned char end; 2505 unsigned long max, min; 2506 unsigned long prev_max, prev_min; 2507 2508 next = mas->node; 2509 min = mas->min; 2510 max = mas->max; 2511 do { 2512 last = next; 2513 node = mte_to_node(next); 2514 type = mte_node_type(next); 2515 pivots = ma_pivots(node, type); 2516 end = ma_data_end(node, type, pivots, max); 2517 prev_min = min; 2518 prev_max = max; 2519 if (pivots[0] >= mas->index) { 2520 offset = 0; 2521 max = pivots[0]; 2522 goto next; 2523 } 2524 2525 offset = 1; 2526 while (offset < end) { 2527 if (pivots[offset] >= mas->index) { 2528 max = pivots[offset]; 2529 break; 2530 } 2531 offset++; 2532 } 2533 2534 min = pivots[offset - 1] + 1; 2535 next: 2536 slots = ma_slots(node, type); 2537 next = mt_slot(mas->tree, slots, offset); 2538 if (unlikely(ma_dead_node(node))) 2539 goto dead_node; 2540 } while (!ma_is_leaf(type)); 2541 2542 mas->end = end; 2543 mas->offset = offset; 2544 mas->index = min; 2545 mas->last = max; 2546 mas->min = prev_min; 2547 mas->max = prev_max; 2548 mas->node = last; 2549 return (void *)next; 2550 2551 dead_node: 2552 mas_reset(mas); 2553 return NULL; 2554 } 2555 2556 /* 2557 * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers. 2558 * @mas: The starting maple state 2559 * @mast: The maple_subtree_state, keeps track of 4 maple states. 2560 * @count: The estimated count of iterations needed. 2561 * 2562 * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root 2563 * is hit. First @b_node is split into two entries which are inserted into the 2564 * next iteration of the loop. @b_node is returned populated with the final 2565 * iteration. @mas is used to obtain allocations. orig_l_mas keeps track of the 2566 * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last 2567 * to account of what has been copied into the new sub-tree. The update of 2568 * orig_l_mas->last is used in mas_consume to find the slots that will need to 2569 * be either freed or destroyed. orig_l_mas->depth keeps track of the height of 2570 * the new sub-tree in case the sub-tree becomes the full tree. 2571 */ 2572 static void mas_spanning_rebalance(struct ma_state *mas, 2573 struct maple_subtree_state *mast, unsigned char count) 2574 { 2575 unsigned char split, mid_split; 2576 unsigned char slot = 0; 2577 unsigned char new_height = 0; /* used if node is a new root */ 2578 struct maple_enode *left = NULL, *middle = NULL, *right = NULL; 2579 struct maple_enode *old_enode; 2580 2581 MA_STATE(l_mas, mas->tree, mas->index, mas->index); 2582 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2583 MA_STATE(m_mas, mas->tree, mas->index, mas->index); 2584 2585 /* 2586 * The tree needs to be rebalanced and leaves need to be kept at the same level. 2587 * Rebalancing is done by use of the ``struct maple_topiary``. 2588 */ 2589 mast->l = &l_mas; 2590 mast->m = &m_mas; 2591 mast->r = &r_mas; 2592 l_mas.status = r_mas.status = m_mas.status = ma_none; 2593 2594 /* Check if this is not root and has sufficient data. */ 2595 if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) && 2596 unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type])) 2597 mast_spanning_rebalance(mast); 2598 2599 /* 2600 * Each level of the tree is examined and balanced, pushing data to the left or 2601 * right, or rebalancing against left or right nodes is employed to avoid 2602 * rippling up the tree to limit the amount of churn. Once a new sub-section of 2603 * the tree is created, there may be a mix of new and old nodes. The old nodes 2604 * will have the incorrect parent pointers and currently be in two trees: the 2605 * original tree and the partially new tree. To remedy the parent pointers in 2606 * the old tree, the new data is swapped into the active tree and a walk down 2607 * the tree is performed and the parent pointers are updated. 2608 * See mas_topiary_replace() for more information. 2609 */ 2610 while (count--) { 2611 mast->bn->b_end--; 2612 mast->bn->type = mte_node_type(mast->orig_l->node); 2613 split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle, 2614 &mid_split); 2615 mast_set_split_parents(mast, left, middle, right, split, 2616 mid_split); 2617 mast_cp_to_nodes(mast, left, middle, right, split, mid_split); 2618 new_height++; 2619 2620 /* 2621 * Copy data from next level in the tree to mast->bn from next 2622 * iteration 2623 */ 2624 memset(mast->bn, 0, sizeof(struct maple_big_node)); 2625 mast->bn->type = mte_node_type(left); 2626 2627 /* Root already stored in l->node. */ 2628 if (mas_is_root_limits(mast->l)) 2629 goto new_root; 2630 2631 mast_ascend(mast); 2632 mast_combine_cp_left(mast); 2633 l_mas.offset = mast->bn->b_end; 2634 mab_set_b_end(mast->bn, &l_mas, left); 2635 mab_set_b_end(mast->bn, &m_mas, middle); 2636 mab_set_b_end(mast->bn, &r_mas, right); 2637 2638 /* Copy anything necessary out of the right node. */ 2639 mast_combine_cp_right(mast); 2640 mast->orig_l->last = mast->orig_l->max; 2641 2642 if (mast_sufficient(mast)) { 2643 if (mast_overflow(mast)) 2644 continue; 2645 2646 if (mast->orig_l->node == mast->orig_r->node) { 2647 /* 2648 * The data in b_node should be stored in one 2649 * node and in the tree 2650 */ 2651 slot = mast->l->offset; 2652 break; 2653 } 2654 2655 continue; 2656 } 2657 2658 /* May be a new root stored in mast->bn */ 2659 if (mas_is_root_limits(mast->orig_l)) 2660 break; 2661 2662 mast_spanning_rebalance(mast); 2663 2664 /* rebalancing from other nodes may require another loop. */ 2665 if (!count) 2666 count++; 2667 } 2668 2669 l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), 2670 mte_node_type(mast->orig_l->node)); 2671 2672 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true); 2673 new_height++; 2674 mas_set_parent(mas, left, l_mas.node, slot); 2675 if (middle) 2676 mas_set_parent(mas, middle, l_mas.node, ++slot); 2677 2678 if (right) 2679 mas_set_parent(mas, right, l_mas.node, ++slot); 2680 2681 if (mas_is_root_limits(mast->l)) { 2682 new_root: 2683 mas_mn(mast->l)->parent = ma_parent_ptr(mas_tree_parent(mas)); 2684 while (!mte_is_root(mast->orig_l->node)) 2685 mast_ascend(mast); 2686 } else { 2687 mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent; 2688 } 2689 2690 old_enode = mast->orig_l->node; 2691 mas->depth = l_mas.depth; 2692 mas->node = l_mas.node; 2693 mas->min = l_mas.min; 2694 mas->max = l_mas.max; 2695 mas->offset = l_mas.offset; 2696 mas_wmb_replace(mas, old_enode, new_height); 2697 mtree_range_walk(mas); 2698 return; 2699 } 2700 2701 /* 2702 * mas_rebalance() - Rebalance a given node. 2703 * @mas: The maple state 2704 * @b_node: The big maple node. 2705 * 2706 * Rebalance two nodes into a single node or two new nodes that are sufficient. 2707 * Continue upwards until tree is sufficient. 2708 */ 2709 static inline void mas_rebalance(struct ma_state *mas, 2710 struct maple_big_node *b_node) 2711 { 2712 char empty_count = mas_mt_height(mas); 2713 struct maple_subtree_state mast; 2714 unsigned char shift, b_end = ++b_node->b_end; 2715 2716 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 2717 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2718 2719 trace_ma_op(__func__, mas); 2720 2721 /* 2722 * Rebalancing occurs if a node is insufficient. Data is rebalanced 2723 * against the node to the right if it exists, otherwise the node to the 2724 * left of this node is rebalanced against this node. If rebalancing 2725 * causes just one node to be produced instead of two, then the parent 2726 * is also examined and rebalanced if it is insufficient. Every level 2727 * tries to combine the data in the same way. If one node contains the 2728 * entire range of the tree, then that node is used as a new root node. 2729 */ 2730 2731 mast.orig_l = &l_mas; 2732 mast.orig_r = &r_mas; 2733 mast.bn = b_node; 2734 mast.bn->type = mte_node_type(mas->node); 2735 2736 l_mas = r_mas = *mas; 2737 2738 if (mas_next_sibling(&r_mas)) { 2739 mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end); 2740 r_mas.last = r_mas.index = r_mas.max; 2741 } else { 2742 mas_prev_sibling(&l_mas); 2743 shift = mas_data_end(&l_mas) + 1; 2744 mab_shift_right(b_node, shift); 2745 mas->offset += shift; 2746 mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0); 2747 b_node->b_end = shift + b_end; 2748 l_mas.index = l_mas.last = l_mas.min; 2749 } 2750 2751 return mas_spanning_rebalance(mas, &mast, empty_count); 2752 } 2753 2754 /* 2755 * mas_split_final_node() - Split the final node in a subtree operation. 2756 * @mast: the maple subtree state 2757 * @mas: The maple state 2758 */ 2759 static inline void mas_split_final_node(struct maple_subtree_state *mast, 2760 struct ma_state *mas) 2761 { 2762 struct maple_enode *ancestor; 2763 2764 if (mte_is_root(mas->node)) { 2765 if (mt_is_alloc(mas->tree)) 2766 mast->bn->type = maple_arange_64; 2767 else 2768 mast->bn->type = maple_range_64; 2769 } 2770 /* 2771 * Only a single node is used here, could be root. 2772 * The Big_node data should just fit in a single node. 2773 */ 2774 ancestor = mas_new_ma_node(mas, mast->bn); 2775 mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset); 2776 mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset); 2777 mte_to_node(ancestor)->parent = mas_mn(mas)->parent; 2778 2779 mast->l->node = ancestor; 2780 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true); 2781 mas->offset = mast->bn->b_end - 1; 2782 } 2783 2784 /* 2785 * mast_fill_bnode() - Copy data into the big node in the subtree state 2786 * @mast: The maple subtree state 2787 * @mas: the maple state 2788 * @skip: The number of entries to skip for new nodes insertion. 2789 */ 2790 static inline void mast_fill_bnode(struct maple_subtree_state *mast, 2791 struct ma_state *mas, 2792 unsigned char skip) 2793 { 2794 bool cp = true; 2795 unsigned char split; 2796 2797 memset(mast->bn, 0, sizeof(struct maple_big_node)); 2798 2799 if (mte_is_root(mas->node)) { 2800 cp = false; 2801 } else { 2802 mas_ascend(mas); 2803 mas->offset = mte_parent_slot(mas->node); 2804 } 2805 2806 if (cp && mast->l->offset) 2807 mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0); 2808 2809 split = mast->bn->b_end; 2810 mab_set_b_end(mast->bn, mast->l, mast->l->node); 2811 mast->r->offset = mast->bn->b_end; 2812 mab_set_b_end(mast->bn, mast->r, mast->r->node); 2813 if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max) 2814 cp = false; 2815 2816 if (cp) 2817 mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1, 2818 mast->bn, mast->bn->b_end); 2819 2820 mast->bn->b_end--; 2821 mast->bn->type = mte_node_type(mas->node); 2822 } 2823 2824 /* 2825 * mast_split_data() - Split the data in the subtree state big node into regular 2826 * nodes. 2827 * @mast: The maple subtree state 2828 * @mas: The maple state 2829 * @split: The location to split the big node 2830 */ 2831 static inline void mast_split_data(struct maple_subtree_state *mast, 2832 struct ma_state *mas, unsigned char split) 2833 { 2834 unsigned char p_slot; 2835 2836 mab_mas_cp(mast->bn, 0, split, mast->l, true); 2837 mte_set_pivot(mast->r->node, 0, mast->r->max); 2838 mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false); 2839 mast->l->offset = mte_parent_slot(mas->node); 2840 mast->l->max = mast->bn->pivot[split]; 2841 mast->r->min = mast->l->max + 1; 2842 if (mte_is_leaf(mas->node)) 2843 return; 2844 2845 p_slot = mast->orig_l->offset; 2846 mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node, 2847 &p_slot, split); 2848 mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node, 2849 &p_slot, split); 2850 } 2851 2852 /* 2853 * mas_push_data() - Instead of splitting a node, it is beneficial to push the 2854 * data to the right or left node if there is room. 2855 * @mas: The maple state 2856 * @mast: The maple subtree state 2857 * @left: Push left or not. 2858 * 2859 * Keeping the height of the tree low means faster lookups. 2860 * 2861 * Return: True if pushed, false otherwise. 2862 */ 2863 static inline bool mas_push_data(struct ma_state *mas, 2864 struct maple_subtree_state *mast, bool left) 2865 { 2866 unsigned char slot_total = mast->bn->b_end; 2867 unsigned char end, space, split; 2868 2869 MA_STATE(tmp_mas, mas->tree, mas->index, mas->last); 2870 tmp_mas = *mas; 2871 tmp_mas.depth = mast->l->depth; 2872 2873 if (left && !mas_prev_sibling(&tmp_mas)) 2874 return false; 2875 else if (!left && !mas_next_sibling(&tmp_mas)) 2876 return false; 2877 2878 end = mas_data_end(&tmp_mas); 2879 slot_total += end; 2880 space = 2 * mt_slot_count(mas->node) - 2; 2881 /* -2 instead of -1 to ensure there isn't a triple split */ 2882 if (ma_is_leaf(mast->bn->type)) 2883 space--; 2884 2885 if (mas->max == ULONG_MAX) 2886 space--; 2887 2888 if (slot_total >= space) 2889 return false; 2890 2891 /* Get the data; Fill mast->bn */ 2892 mast->bn->b_end++; 2893 if (left) { 2894 mab_shift_right(mast->bn, end + 1); 2895 mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0); 2896 mast->bn->b_end = slot_total + 1; 2897 } else { 2898 mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end); 2899 } 2900 2901 /* Configure mast for splitting of mast->bn */ 2902 split = mt_slots[mast->bn->type] - 2; 2903 if (left) { 2904 /* Switch mas to prev node */ 2905 *mas = tmp_mas; 2906 /* Start using mast->l for the left side. */ 2907 tmp_mas.node = mast->l->node; 2908 *mast->l = tmp_mas; 2909 } else { 2910 tmp_mas.node = mast->r->node; 2911 *mast->r = tmp_mas; 2912 split = slot_total - split; 2913 } 2914 split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]); 2915 /* Update parent slot for split calculation. */ 2916 if (left) 2917 mast->orig_l->offset += end + 1; 2918 2919 mast_split_data(mast, mas, split); 2920 mast_fill_bnode(mast, mas, 2); 2921 mas_split_final_node(mast, mas); 2922 return true; 2923 } 2924 2925 /* 2926 * mas_split() - Split data that is too big for one node into two. 2927 * @mas: The maple state 2928 * @b_node: The maple big node 2929 */ 2930 static void mas_split(struct ma_state *mas, struct maple_big_node *b_node) 2931 { 2932 struct maple_subtree_state mast; 2933 int height = 0; 2934 unsigned int orig_height = mas_mt_height(mas); 2935 unsigned char mid_split, split = 0; 2936 struct maple_enode *old; 2937 2938 /* 2939 * Splitting is handled differently from any other B-tree; the Maple 2940 * Tree splits upwards. Splitting up means that the split operation 2941 * occurs when the walk of the tree hits the leaves and not on the way 2942 * down. The reason for splitting up is that it is impossible to know 2943 * how much space will be needed until the leaf is (or leaves are) 2944 * reached. Since overwriting data is allowed and a range could 2945 * overwrite more than one range or result in changing one entry into 3 2946 * entries, it is impossible to know if a split is required until the 2947 * data is examined. 2948 * 2949 * Splitting is a balancing act between keeping allocations to a minimum 2950 * and avoiding a 'jitter' event where a tree is expanded to make room 2951 * for an entry followed by a contraction when the entry is removed. To 2952 * accomplish the balance, there are empty slots remaining in both left 2953 * and right nodes after a split. 2954 */ 2955 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 2956 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2957 MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last); 2958 MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last); 2959 2960 trace_ma_op(__func__, mas); 2961 2962 mast.l = &l_mas; 2963 mast.r = &r_mas; 2964 mast.orig_l = &prev_l_mas; 2965 mast.orig_r = &prev_r_mas; 2966 mast.bn = b_node; 2967 2968 while (height++ <= orig_height) { 2969 if (mt_slots[b_node->type] > b_node->b_end) { 2970 mas_split_final_node(&mast, mas); 2971 break; 2972 } 2973 2974 l_mas = r_mas = *mas; 2975 l_mas.node = mas_new_ma_node(mas, b_node); 2976 r_mas.node = mas_new_ma_node(mas, b_node); 2977 /* 2978 * Another way that 'jitter' is avoided is to terminate a split up early if the 2979 * left or right node has space to spare. This is referred to as "pushing left" 2980 * or "pushing right" and is similar to the B* tree, except the nodes left or 2981 * right can rarely be reused due to RCU, but the ripple upwards is halted which 2982 * is a significant savings. 2983 */ 2984 /* Try to push left. */ 2985 if (mas_push_data(mas, &mast, true)) { 2986 height++; 2987 break; 2988 } 2989 /* Try to push right. */ 2990 if (mas_push_data(mas, &mast, false)) { 2991 height++; 2992 break; 2993 } 2994 2995 split = mab_calc_split(mas, b_node, &mid_split); 2996 mast_split_data(&mast, mas, split); 2997 /* 2998 * Usually correct, mab_mas_cp in the above call overwrites 2999 * r->max. 3000 */ 3001 mast.r->max = mas->max; 3002 mast_fill_bnode(&mast, mas, 1); 3003 prev_l_mas = *mast.l; 3004 prev_r_mas = *mast.r; 3005 } 3006 3007 /* Set the original node as dead */ 3008 old = mas->node; 3009 mas->node = l_mas.node; 3010 mas_wmb_replace(mas, old, height); 3011 mtree_range_walk(mas); 3012 return; 3013 } 3014 3015 /* 3016 * mas_commit_b_node() - Commit the big node into the tree. 3017 * @wr_mas: The maple write state 3018 * @b_node: The maple big node 3019 */ 3020 static noinline_for_kasan void mas_commit_b_node(struct ma_wr_state *wr_mas, 3021 struct maple_big_node *b_node) 3022 { 3023 enum store_type type = wr_mas->mas->store_type; 3024 3025 WARN_ON_ONCE(type != wr_rebalance && type != wr_split_store); 3026 3027 if (type == wr_rebalance) 3028 return mas_rebalance(wr_mas->mas, b_node); 3029 3030 return mas_split(wr_mas->mas, b_node); 3031 } 3032 3033 /* 3034 * mas_root_expand() - Expand a root to a node 3035 * @mas: The maple state 3036 * @entry: The entry to store into the tree 3037 */ 3038 static inline void mas_root_expand(struct ma_state *mas, void *entry) 3039 { 3040 void *contents = mas_root_locked(mas); 3041 enum maple_type type = maple_leaf_64; 3042 struct maple_node *node; 3043 void __rcu **slots; 3044 unsigned long *pivots; 3045 int slot = 0; 3046 3047 node = mas_pop_node(mas); 3048 pivots = ma_pivots(node, type); 3049 slots = ma_slots(node, type); 3050 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3051 mas->node = mt_mk_node(node, type); 3052 mas->status = ma_active; 3053 3054 if (mas->index) { 3055 if (contents) { 3056 rcu_assign_pointer(slots[slot], contents); 3057 if (likely(mas->index > 1)) 3058 slot++; 3059 } 3060 pivots[slot++] = mas->index - 1; 3061 } 3062 3063 rcu_assign_pointer(slots[slot], entry); 3064 mas->offset = slot; 3065 pivots[slot] = mas->last; 3066 if (mas->last != ULONG_MAX) 3067 pivots[++slot] = ULONG_MAX; 3068 3069 mt_set_height(mas->tree, 1); 3070 ma_set_meta(node, maple_leaf_64, 0, slot); 3071 /* swap the new root into the tree */ 3072 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3073 return; 3074 } 3075 3076 /* 3077 * mas_store_root() - Storing value into root. 3078 * @mas: The maple state 3079 * @entry: The entry to store. 3080 * 3081 * There is no root node now and we are storing a value into the root - this 3082 * function either assigns the pointer or expands into a node. 3083 */ 3084 static inline void mas_store_root(struct ma_state *mas, void *entry) 3085 { 3086 if (!entry) { 3087 if (!mas->index) 3088 rcu_assign_pointer(mas->tree->ma_root, NULL); 3089 } else if (likely((mas->last != 0) || (mas->index != 0))) 3090 mas_root_expand(mas, entry); 3091 else if (((unsigned long) (entry) & 3) == 2) 3092 mas_root_expand(mas, entry); 3093 else { 3094 rcu_assign_pointer(mas->tree->ma_root, entry); 3095 mas->status = ma_start; 3096 } 3097 } 3098 3099 /* 3100 * mas_is_span_wr() - Check if the write needs to be treated as a write that 3101 * spans the node. 3102 * @wr_mas: The maple write state 3103 * 3104 * Spanning writes are writes that start in one node and end in another OR if 3105 * the write of a %NULL will cause the node to end with a %NULL. 3106 * 3107 * Return: True if this is a spanning write, false otherwise. 3108 */ 3109 static bool mas_is_span_wr(struct ma_wr_state *wr_mas) 3110 { 3111 unsigned long max = wr_mas->r_max; 3112 unsigned long last = wr_mas->mas->last; 3113 enum maple_type type = wr_mas->type; 3114 void *entry = wr_mas->entry; 3115 3116 /* Contained in this pivot, fast path */ 3117 if (last < max) 3118 return false; 3119 3120 if (ma_is_leaf(type)) { 3121 max = wr_mas->mas->max; 3122 if (last < max) 3123 return false; 3124 } 3125 3126 if (last == max) { 3127 /* 3128 * The last entry of leaf node cannot be NULL unless it is the 3129 * rightmost node (writing ULONG_MAX), otherwise it spans slots. 3130 */ 3131 if (entry || last == ULONG_MAX) 3132 return false; 3133 } 3134 3135 trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry); 3136 return true; 3137 } 3138 3139 static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas) 3140 { 3141 wr_mas->type = mte_node_type(wr_mas->mas->node); 3142 mas_wr_node_walk(wr_mas); 3143 wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type); 3144 } 3145 3146 static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas) 3147 { 3148 wr_mas->mas->max = wr_mas->r_max; 3149 wr_mas->mas->min = wr_mas->r_min; 3150 wr_mas->mas->node = wr_mas->content; 3151 wr_mas->mas->offset = 0; 3152 wr_mas->mas->depth++; 3153 } 3154 /* 3155 * mas_wr_walk() - Walk the tree for a write. 3156 * @wr_mas: The maple write state 3157 * 3158 * Uses mas_slot_locked() and does not need to worry about dead nodes. 3159 * 3160 * Return: True if it's contained in a node, false on spanning write. 3161 */ 3162 static bool mas_wr_walk(struct ma_wr_state *wr_mas) 3163 { 3164 struct ma_state *mas = wr_mas->mas; 3165 3166 while (true) { 3167 mas_wr_walk_descend(wr_mas); 3168 if (unlikely(mas_is_span_wr(wr_mas))) 3169 return false; 3170 3171 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3172 mas->offset); 3173 if (ma_is_leaf(wr_mas->type)) 3174 return true; 3175 3176 if (mas->end < mt_slots[wr_mas->type] - 1) 3177 wr_mas->vacant_height = mas->depth + 1; 3178 3179 if (ma_is_root(mas_mn(mas))) { 3180 /* root needs more than 2 entries to be sufficient + 1 */ 3181 if (mas->end > 2) 3182 wr_mas->sufficient_height = 1; 3183 } else if (mas->end > mt_min_slots[wr_mas->type] + 1) 3184 wr_mas->sufficient_height = mas->depth + 1; 3185 3186 mas_wr_walk_traverse(wr_mas); 3187 } 3188 3189 return true; 3190 } 3191 3192 static void mas_wr_walk_index(struct ma_wr_state *wr_mas) 3193 { 3194 struct ma_state *mas = wr_mas->mas; 3195 3196 while (true) { 3197 mas_wr_walk_descend(wr_mas); 3198 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3199 mas->offset); 3200 if (ma_is_leaf(wr_mas->type)) 3201 return; 3202 mas_wr_walk_traverse(wr_mas); 3203 } 3204 } 3205 /* 3206 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs. 3207 * @l_wr_mas: The left maple write state 3208 * @r_wr_mas: The right maple write state 3209 */ 3210 static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas, 3211 struct ma_wr_state *r_wr_mas) 3212 { 3213 struct ma_state *r_mas = r_wr_mas->mas; 3214 struct ma_state *l_mas = l_wr_mas->mas; 3215 unsigned char l_slot; 3216 3217 l_slot = l_mas->offset; 3218 if (!l_wr_mas->content) 3219 l_mas->index = l_wr_mas->r_min; 3220 3221 if ((l_mas->index == l_wr_mas->r_min) && 3222 (l_slot && 3223 !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) { 3224 if (l_slot > 1) 3225 l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1; 3226 else 3227 l_mas->index = l_mas->min; 3228 3229 l_mas->offset = l_slot - 1; 3230 } 3231 3232 if (!r_wr_mas->content) { 3233 if (r_mas->last < r_wr_mas->r_max) 3234 r_mas->last = r_wr_mas->r_max; 3235 r_mas->offset++; 3236 } else if ((r_mas->last == r_wr_mas->r_max) && 3237 (r_mas->last < r_mas->max) && 3238 !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) { 3239 r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots, 3240 r_wr_mas->type, r_mas->offset + 1); 3241 r_mas->offset++; 3242 } 3243 } 3244 3245 static inline void *mas_state_walk(struct ma_state *mas) 3246 { 3247 void *entry; 3248 3249 entry = mas_start(mas); 3250 if (mas_is_none(mas)) 3251 return NULL; 3252 3253 if (mas_is_ptr(mas)) 3254 return entry; 3255 3256 return mtree_range_walk(mas); 3257 } 3258 3259 /* 3260 * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up 3261 * to date. 3262 * 3263 * @mas: The maple state. 3264 * 3265 * Note: Leaves mas in undesirable state. 3266 * Return: The entry for @mas->index or %NULL on dead node. 3267 */ 3268 static inline void *mtree_lookup_walk(struct ma_state *mas) 3269 { 3270 unsigned long *pivots; 3271 unsigned char offset; 3272 struct maple_node *node; 3273 struct maple_enode *next; 3274 enum maple_type type; 3275 void __rcu **slots; 3276 unsigned char end; 3277 3278 next = mas->node; 3279 do { 3280 node = mte_to_node(next); 3281 type = mte_node_type(next); 3282 pivots = ma_pivots(node, type); 3283 end = mt_pivots[type]; 3284 offset = 0; 3285 do { 3286 if (pivots[offset] >= mas->index) 3287 break; 3288 } while (++offset < end); 3289 3290 slots = ma_slots(node, type); 3291 next = mt_slot(mas->tree, slots, offset); 3292 if (unlikely(ma_dead_node(node))) 3293 goto dead_node; 3294 } while (!ma_is_leaf(type)); 3295 3296 return (void *)next; 3297 3298 dead_node: 3299 mas_reset(mas); 3300 return NULL; 3301 } 3302 3303 static void mte_destroy_walk(struct maple_enode *, struct maple_tree *); 3304 /* 3305 * mas_new_root() - Create a new root node that only contains the entry passed 3306 * in. 3307 * @mas: The maple state 3308 * @entry: The entry to store. 3309 * 3310 * Only valid when the index == 0 and the last == ULONG_MAX 3311 */ 3312 static inline void mas_new_root(struct ma_state *mas, void *entry) 3313 { 3314 struct maple_enode *root = mas_root_locked(mas); 3315 enum maple_type type = maple_leaf_64; 3316 struct maple_node *node; 3317 void __rcu **slots; 3318 unsigned long *pivots; 3319 3320 WARN_ON_ONCE(mas->index || mas->last != ULONG_MAX); 3321 3322 if (!entry) { 3323 mt_set_height(mas->tree, 0); 3324 rcu_assign_pointer(mas->tree->ma_root, entry); 3325 mas->status = ma_start; 3326 goto done; 3327 } 3328 3329 node = mas_pop_node(mas); 3330 pivots = ma_pivots(node, type); 3331 slots = ma_slots(node, type); 3332 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3333 mas->node = mt_mk_node(node, type); 3334 mas->status = ma_active; 3335 rcu_assign_pointer(slots[0], entry); 3336 pivots[0] = mas->last; 3337 mt_set_height(mas->tree, 1); 3338 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3339 3340 done: 3341 if (xa_is_node(root)) 3342 mte_destroy_walk(root, mas->tree); 3343 3344 return; 3345 } 3346 /* 3347 * mas_wr_spanning_store() - Create a subtree with the store operation completed 3348 * and new nodes where necessary, then place the sub-tree in the actual tree. 3349 * Note that mas is expected to point to the node which caused the store to 3350 * span. 3351 * @wr_mas: The maple write state 3352 */ 3353 static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas) 3354 { 3355 struct maple_subtree_state mast; 3356 struct maple_big_node b_node; 3357 struct ma_state *mas; 3358 unsigned char height; 3359 3360 /* Left and Right side of spanning store */ 3361 MA_STATE(l_mas, NULL, 0, 0); 3362 MA_STATE(r_mas, NULL, 0, 0); 3363 MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry); 3364 MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry); 3365 3366 /* 3367 * A store operation that spans multiple nodes is called a spanning 3368 * store and is handled early in the store call stack by the function 3369 * mas_is_span_wr(). When a spanning store is identified, the maple 3370 * state is duplicated. The first maple state walks the left tree path 3371 * to ``index``, the duplicate walks the right tree path to ``last``. 3372 * The data in the two nodes are combined into a single node, two nodes, 3373 * or possibly three nodes (see the 3-way split above). A ``NULL`` 3374 * written to the last entry of a node is considered a spanning store as 3375 * a rebalance is required for the operation to complete and an overflow 3376 * of data may happen. 3377 */ 3378 mas = wr_mas->mas; 3379 trace_ma_op(__func__, mas); 3380 3381 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 3382 return mas_new_root(mas, wr_mas->entry); 3383 /* 3384 * Node rebalancing may occur due to this store, so there may be three new 3385 * entries per level plus a new root. 3386 */ 3387 height = mas_mt_height(mas); 3388 3389 /* 3390 * Set up right side. Need to get to the next offset after the spanning 3391 * store to ensure it's not NULL and to combine both the next node and 3392 * the node with the start together. 3393 */ 3394 r_mas = *mas; 3395 /* Avoid overflow, walk to next slot in the tree. */ 3396 if (r_mas.last + 1) 3397 r_mas.last++; 3398 3399 r_mas.index = r_mas.last; 3400 mas_wr_walk_index(&r_wr_mas); 3401 r_mas.last = r_mas.index = mas->last; 3402 3403 /* Set up left side. */ 3404 l_mas = *mas; 3405 mas_wr_walk_index(&l_wr_mas); 3406 3407 if (!wr_mas->entry) { 3408 mas_extend_spanning_null(&l_wr_mas, &r_wr_mas); 3409 mas->offset = l_mas.offset; 3410 mas->index = l_mas.index; 3411 mas->last = l_mas.last = r_mas.last; 3412 } 3413 3414 /* expanding NULLs may make this cover the entire range */ 3415 if (!l_mas.index && r_mas.last == ULONG_MAX) { 3416 mas_set_range(mas, 0, ULONG_MAX); 3417 return mas_new_root(mas, wr_mas->entry); 3418 } 3419 3420 memset(&b_node, 0, sizeof(struct maple_big_node)); 3421 /* Copy l_mas and store the value in b_node. */ 3422 mas_store_b_node(&l_wr_mas, &b_node, l_mas.end); 3423 /* Copy r_mas into b_node if there is anything to copy. */ 3424 if (r_mas.max > r_mas.last) 3425 mas_mab_cp(&r_mas, r_mas.offset, r_mas.end, 3426 &b_node, b_node.b_end + 1); 3427 else 3428 b_node.b_end++; 3429 3430 /* Stop spanning searches by searching for just index. */ 3431 l_mas.index = l_mas.last = mas->index; 3432 3433 mast.bn = &b_node; 3434 mast.orig_l = &l_mas; 3435 mast.orig_r = &r_mas; 3436 /* Combine l_mas and r_mas and split them up evenly again. */ 3437 return mas_spanning_rebalance(mas, &mast, height + 1); 3438 } 3439 3440 /* 3441 * mas_wr_node_store() - Attempt to store the value in a node 3442 * @wr_mas: The maple write state 3443 * 3444 * Attempts to reuse the node, but may allocate. 3445 */ 3446 static inline void mas_wr_node_store(struct ma_wr_state *wr_mas, 3447 unsigned char new_end) 3448 { 3449 struct ma_state *mas = wr_mas->mas; 3450 void __rcu **dst_slots; 3451 unsigned long *dst_pivots; 3452 unsigned char dst_offset, offset_end = wr_mas->offset_end; 3453 struct maple_node reuse, *newnode; 3454 unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type]; 3455 bool in_rcu = mt_in_rcu(mas->tree); 3456 unsigned char height = mas_mt_height(mas); 3457 3458 if (mas->last == wr_mas->end_piv) 3459 offset_end++; /* don't copy this offset */ 3460 3461 /* set up node. */ 3462 if (in_rcu) { 3463 newnode = mas_pop_node(mas); 3464 } else { 3465 memset(&reuse, 0, sizeof(struct maple_node)); 3466 newnode = &reuse; 3467 } 3468 3469 newnode->parent = mas_mn(mas)->parent; 3470 dst_pivots = ma_pivots(newnode, wr_mas->type); 3471 dst_slots = ma_slots(newnode, wr_mas->type); 3472 /* Copy from start to insert point */ 3473 memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset); 3474 memcpy(dst_slots, wr_mas->slots, sizeof(void *) * mas->offset); 3475 3476 /* Handle insert of new range starting after old range */ 3477 if (wr_mas->r_min < mas->index) { 3478 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content); 3479 dst_pivots[mas->offset++] = mas->index - 1; 3480 } 3481 3482 /* Store the new entry and range end. */ 3483 if (mas->offset < node_pivots) 3484 dst_pivots[mas->offset] = mas->last; 3485 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry); 3486 3487 /* 3488 * this range wrote to the end of the node or it overwrote the rest of 3489 * the data 3490 */ 3491 if (offset_end > mas->end) 3492 goto done; 3493 3494 dst_offset = mas->offset + 1; 3495 /* Copy to the end of node if necessary. */ 3496 copy_size = mas->end - offset_end + 1; 3497 memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end, 3498 sizeof(void *) * copy_size); 3499 memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end, 3500 sizeof(unsigned long) * (copy_size - 1)); 3501 3502 if (new_end < node_pivots) 3503 dst_pivots[new_end] = mas->max; 3504 3505 done: 3506 mas_leaf_set_meta(newnode, maple_leaf_64, new_end); 3507 if (in_rcu) { 3508 struct maple_enode *old_enode = mas->node; 3509 3510 mas->node = mt_mk_node(newnode, wr_mas->type); 3511 mas_replace_node(mas, old_enode, height); 3512 } else { 3513 memcpy(wr_mas->node, newnode, sizeof(struct maple_node)); 3514 } 3515 trace_ma_write(__func__, mas, 0, wr_mas->entry); 3516 mas_update_gap(mas); 3517 mas->end = new_end; 3518 return; 3519 } 3520 3521 /* 3522 * mas_wr_slot_store: Attempt to store a value in a slot. 3523 * @wr_mas: the maple write state 3524 */ 3525 static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas) 3526 { 3527 struct ma_state *mas = wr_mas->mas; 3528 unsigned char offset = mas->offset; 3529 void __rcu **slots = wr_mas->slots; 3530 bool gap = false; 3531 3532 gap |= !mt_slot_locked(mas->tree, slots, offset); 3533 gap |= !mt_slot_locked(mas->tree, slots, offset + 1); 3534 3535 if (wr_mas->offset_end - offset == 1) { 3536 if (mas->index == wr_mas->r_min) { 3537 /* Overwriting the range and a part of the next one */ 3538 rcu_assign_pointer(slots[offset], wr_mas->entry); 3539 wr_mas->pivots[offset] = mas->last; 3540 } else { 3541 /* Overwriting a part of the range and the next one */ 3542 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 3543 wr_mas->pivots[offset] = mas->index - 1; 3544 mas->offset++; /* Keep mas accurate. */ 3545 } 3546 } else { 3547 WARN_ON_ONCE(mt_in_rcu(mas->tree)); 3548 /* 3549 * Expand the range, only partially overwriting the previous and 3550 * next ranges 3551 */ 3552 gap |= !mt_slot_locked(mas->tree, slots, offset + 2); 3553 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 3554 wr_mas->pivots[offset] = mas->index - 1; 3555 wr_mas->pivots[offset + 1] = mas->last; 3556 mas->offset++; /* Keep mas accurate. */ 3557 } 3558 3559 trace_ma_write(__func__, mas, 0, wr_mas->entry); 3560 /* 3561 * Only update gap when the new entry is empty or there is an empty 3562 * entry in the original two ranges. 3563 */ 3564 if (!wr_mas->entry || gap) 3565 mas_update_gap(mas); 3566 3567 return; 3568 } 3569 3570 static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas) 3571 { 3572 struct ma_state *mas = wr_mas->mas; 3573 3574 if (!wr_mas->slots[wr_mas->offset_end]) { 3575 /* If this one is null, the next and prev are not */ 3576 mas->last = wr_mas->end_piv; 3577 } else { 3578 /* Check next slot(s) if we are overwriting the end */ 3579 if ((mas->last == wr_mas->end_piv) && 3580 (mas->end != wr_mas->offset_end) && 3581 !wr_mas->slots[wr_mas->offset_end + 1]) { 3582 wr_mas->offset_end++; 3583 if (wr_mas->offset_end == mas->end) 3584 mas->last = mas->max; 3585 else 3586 mas->last = wr_mas->pivots[wr_mas->offset_end]; 3587 wr_mas->end_piv = mas->last; 3588 } 3589 } 3590 3591 if (!wr_mas->content) { 3592 /* If this one is null, the next and prev are not */ 3593 mas->index = wr_mas->r_min; 3594 } else { 3595 /* Check prev slot if we are overwriting the start */ 3596 if (mas->index == wr_mas->r_min && mas->offset && 3597 !wr_mas->slots[mas->offset - 1]) { 3598 mas->offset--; 3599 wr_mas->r_min = mas->index = 3600 mas_safe_min(mas, wr_mas->pivots, mas->offset); 3601 wr_mas->r_max = wr_mas->pivots[mas->offset]; 3602 } 3603 } 3604 } 3605 3606 static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas) 3607 { 3608 while ((wr_mas->offset_end < wr_mas->mas->end) && 3609 (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end])) 3610 wr_mas->offset_end++; 3611 3612 if (wr_mas->offset_end < wr_mas->mas->end) 3613 wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end]; 3614 else 3615 wr_mas->end_piv = wr_mas->mas->max; 3616 } 3617 3618 static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas) 3619 { 3620 struct ma_state *mas = wr_mas->mas; 3621 unsigned char new_end = mas->end + 2; 3622 3623 new_end -= wr_mas->offset_end - mas->offset; 3624 if (wr_mas->r_min == mas->index) 3625 new_end--; 3626 3627 if (wr_mas->end_piv == mas->last) 3628 new_end--; 3629 3630 return new_end; 3631 } 3632 3633 /* 3634 * mas_wr_append: Attempt to append 3635 * @wr_mas: the maple write state 3636 * @new_end: The end of the node after the modification 3637 * 3638 * This is currently unsafe in rcu mode since the end of the node may be cached 3639 * by readers while the node contents may be updated which could result in 3640 * inaccurate information. 3641 */ 3642 static inline void mas_wr_append(struct ma_wr_state *wr_mas, 3643 unsigned char new_end) 3644 { 3645 struct ma_state *mas = wr_mas->mas; 3646 void __rcu **slots; 3647 unsigned char end = mas->end; 3648 3649 if (new_end < mt_pivots[wr_mas->type]) { 3650 wr_mas->pivots[new_end] = wr_mas->pivots[end]; 3651 ma_set_meta(wr_mas->node, wr_mas->type, 0, new_end); 3652 } 3653 3654 slots = wr_mas->slots; 3655 if (new_end == end + 1) { 3656 if (mas->last == wr_mas->r_max) { 3657 /* Append to end of range */ 3658 rcu_assign_pointer(slots[new_end], wr_mas->entry); 3659 wr_mas->pivots[end] = mas->index - 1; 3660 mas->offset = new_end; 3661 } else { 3662 /* Append to start of range */ 3663 rcu_assign_pointer(slots[new_end], wr_mas->content); 3664 wr_mas->pivots[end] = mas->last; 3665 rcu_assign_pointer(slots[end], wr_mas->entry); 3666 } 3667 } else { 3668 /* Append to the range without touching any boundaries. */ 3669 rcu_assign_pointer(slots[new_end], wr_mas->content); 3670 wr_mas->pivots[end + 1] = mas->last; 3671 rcu_assign_pointer(slots[end + 1], wr_mas->entry); 3672 wr_mas->pivots[end] = mas->index - 1; 3673 mas->offset = end + 1; 3674 } 3675 3676 if (!wr_mas->content || !wr_mas->entry) 3677 mas_update_gap(mas); 3678 3679 mas->end = new_end; 3680 trace_ma_write(__func__, mas, new_end, wr_mas->entry); 3681 return; 3682 } 3683 3684 /* 3685 * mas_wr_bnode() - Slow path for a modification. 3686 * @wr_mas: The write maple state 3687 * 3688 * This is where split, rebalance end up. 3689 */ 3690 static void mas_wr_bnode(struct ma_wr_state *wr_mas) 3691 { 3692 struct maple_big_node b_node; 3693 3694 trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry); 3695 memset(&b_node, 0, sizeof(struct maple_big_node)); 3696 mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end); 3697 mas_commit_b_node(wr_mas, &b_node); 3698 } 3699 3700 /* 3701 * mas_wr_store_entry() - Internal call to store a value 3702 * @wr_mas: The maple write state 3703 */ 3704 static inline void mas_wr_store_entry(struct ma_wr_state *wr_mas) 3705 { 3706 struct ma_state *mas = wr_mas->mas; 3707 unsigned char new_end = mas_wr_new_end(wr_mas); 3708 3709 switch (mas->store_type) { 3710 case wr_exact_fit: 3711 rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry); 3712 if (!!wr_mas->entry ^ !!wr_mas->content) 3713 mas_update_gap(mas); 3714 break; 3715 case wr_append: 3716 mas_wr_append(wr_mas, new_end); 3717 break; 3718 case wr_slot_store: 3719 mas_wr_slot_store(wr_mas); 3720 break; 3721 case wr_node_store: 3722 mas_wr_node_store(wr_mas, new_end); 3723 break; 3724 case wr_spanning_store: 3725 mas_wr_spanning_store(wr_mas); 3726 break; 3727 case wr_split_store: 3728 case wr_rebalance: 3729 mas_wr_bnode(wr_mas); 3730 break; 3731 case wr_new_root: 3732 mas_new_root(mas, wr_mas->entry); 3733 break; 3734 case wr_store_root: 3735 mas_store_root(mas, wr_mas->entry); 3736 break; 3737 case wr_invalid: 3738 MT_BUG_ON(mas->tree, 1); 3739 } 3740 3741 return; 3742 } 3743 3744 static inline void mas_wr_prealloc_setup(struct ma_wr_state *wr_mas) 3745 { 3746 struct ma_state *mas = wr_mas->mas; 3747 3748 if (!mas_is_active(mas)) { 3749 if (mas_is_start(mas)) 3750 goto set_content; 3751 3752 if (unlikely(mas_is_paused(mas))) 3753 goto reset; 3754 3755 if (unlikely(mas_is_none(mas))) 3756 goto reset; 3757 3758 if (unlikely(mas_is_overflow(mas))) 3759 goto reset; 3760 3761 if (unlikely(mas_is_underflow(mas))) 3762 goto reset; 3763 } 3764 3765 /* 3766 * A less strict version of mas_is_span_wr() where we allow spanning 3767 * writes within this node. This is to stop partial walks in 3768 * mas_prealloc() from being reset. 3769 */ 3770 if (mas->last > mas->max) 3771 goto reset; 3772 3773 if (wr_mas->entry) 3774 goto set_content; 3775 3776 if (mte_is_leaf(mas->node) && mas->last == mas->max) 3777 goto reset; 3778 3779 goto set_content; 3780 3781 reset: 3782 mas_reset(mas); 3783 set_content: 3784 wr_mas->content = mas_start(mas); 3785 } 3786 3787 /** 3788 * mas_prealloc_calc() - Calculate number of nodes needed for a 3789 * given store oepration 3790 * @wr_mas: The maple write state 3791 * @entry: The entry to store into the tree 3792 * 3793 * Return: Number of nodes required for preallocation. 3794 */ 3795 static inline void mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry) 3796 { 3797 struct ma_state *mas = wr_mas->mas; 3798 unsigned char height = mas_mt_height(mas); 3799 int ret = height * 3 + 1; 3800 unsigned char delta = height - wr_mas->vacant_height; 3801 3802 switch (mas->store_type) { 3803 case wr_exact_fit: 3804 case wr_append: 3805 case wr_slot_store: 3806 ret = 0; 3807 break; 3808 case wr_spanning_store: 3809 if (wr_mas->sufficient_height < wr_mas->vacant_height) 3810 ret = (height - wr_mas->sufficient_height) * 3 + 1; 3811 else 3812 ret = delta * 3 + 1; 3813 break; 3814 case wr_split_store: 3815 ret = delta * 2 + 1; 3816 break; 3817 case wr_rebalance: 3818 if (wr_mas->sufficient_height < wr_mas->vacant_height) 3819 ret = (height - wr_mas->sufficient_height) * 2 + 1; 3820 else 3821 ret = delta * 2 + 1; 3822 break; 3823 case wr_node_store: 3824 ret = mt_in_rcu(mas->tree) ? 1 : 0; 3825 break; 3826 case wr_new_root: 3827 ret = 1; 3828 break; 3829 case wr_store_root: 3830 if (likely((mas->last != 0) || (mas->index != 0))) 3831 ret = 1; 3832 else if (((unsigned long) (entry) & 3) == 2) 3833 ret = 1; 3834 else 3835 ret = 0; 3836 break; 3837 case wr_invalid: 3838 WARN_ON_ONCE(1); 3839 } 3840 3841 mas->node_request = ret; 3842 } 3843 3844 /* 3845 * mas_wr_store_type() - Determine the store type for a given 3846 * store operation. 3847 * @wr_mas: The maple write state 3848 * 3849 * Return: the type of store needed for the operation 3850 */ 3851 static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas) 3852 { 3853 struct ma_state *mas = wr_mas->mas; 3854 unsigned char new_end; 3855 3856 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 3857 return wr_store_root; 3858 3859 if (unlikely(!mas_wr_walk(wr_mas))) 3860 return wr_spanning_store; 3861 3862 /* At this point, we are at the leaf node that needs to be altered. */ 3863 mas_wr_end_piv(wr_mas); 3864 if (!wr_mas->entry) 3865 mas_wr_extend_null(wr_mas); 3866 3867 if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last)) 3868 return wr_exact_fit; 3869 3870 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 3871 return wr_new_root; 3872 3873 new_end = mas_wr_new_end(wr_mas); 3874 /* Potential spanning rebalance collapsing a node */ 3875 if (new_end < mt_min_slots[wr_mas->type]) { 3876 if (!mte_is_root(mas->node)) 3877 return wr_rebalance; 3878 return wr_node_store; 3879 } 3880 3881 if (new_end >= mt_slots[wr_mas->type]) 3882 return wr_split_store; 3883 3884 if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end)) 3885 return wr_append; 3886 3887 if ((new_end == mas->end) && (!mt_in_rcu(mas->tree) || 3888 (wr_mas->offset_end - mas->offset == 1))) 3889 return wr_slot_store; 3890 3891 return wr_node_store; 3892 } 3893 3894 /** 3895 * mas_wr_preallocate() - Preallocate enough nodes for a store operation 3896 * @wr_mas: The maple write state 3897 * @entry: The entry that will be stored 3898 * 3899 */ 3900 static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry) 3901 { 3902 struct ma_state *mas = wr_mas->mas; 3903 3904 mas_wr_prealloc_setup(wr_mas); 3905 mas->store_type = mas_wr_store_type(wr_mas); 3906 mas_prealloc_calc(wr_mas, entry); 3907 if (!mas->node_request) 3908 return; 3909 3910 mas_alloc_nodes(mas, GFP_NOWAIT); 3911 } 3912 3913 /** 3914 * mas_insert() - Internal call to insert a value 3915 * @mas: The maple state 3916 * @entry: The entry to store 3917 * 3918 * Return: %NULL or the contents that already exists at the requested index 3919 * otherwise. The maple state needs to be checked for error conditions. 3920 */ 3921 static inline void *mas_insert(struct ma_state *mas, void *entry) 3922 { 3923 MA_WR_STATE(wr_mas, mas, entry); 3924 3925 /* 3926 * Inserting a new range inserts either 0, 1, or 2 pivots within the 3927 * tree. If the insert fits exactly into an existing gap with a value 3928 * of NULL, then the slot only needs to be written with the new value. 3929 * If the range being inserted is adjacent to another range, then only a 3930 * single pivot needs to be inserted (as well as writing the entry). If 3931 * the new range is within a gap but does not touch any other ranges, 3932 * then two pivots need to be inserted: the start - 1, and the end. As 3933 * usual, the entry must be written. Most operations require a new node 3934 * to be allocated and replace an existing node to ensure RCU safety, 3935 * when in RCU mode. The exception to requiring a newly allocated node 3936 * is when inserting at the end of a node (appending). When done 3937 * carefully, appending can reuse the node in place. 3938 */ 3939 wr_mas.content = mas_start(mas); 3940 if (wr_mas.content) 3941 goto exists; 3942 3943 mas_wr_preallocate(&wr_mas, entry); 3944 if (mas_is_err(mas)) 3945 return NULL; 3946 3947 /* spanning writes always overwrite something */ 3948 if (mas->store_type == wr_spanning_store) 3949 goto exists; 3950 3951 /* At this point, we are at the leaf node that needs to be altered. */ 3952 if (mas->store_type != wr_new_root && mas->store_type != wr_store_root) { 3953 wr_mas.offset_end = mas->offset; 3954 wr_mas.end_piv = wr_mas.r_max; 3955 3956 if (wr_mas.content || (mas->last > wr_mas.r_max)) 3957 goto exists; 3958 } 3959 3960 mas_wr_store_entry(&wr_mas); 3961 return wr_mas.content; 3962 3963 exists: 3964 mas_set_err(mas, -EEXIST); 3965 return wr_mas.content; 3966 3967 } 3968 3969 /** 3970 * mas_alloc_cyclic() - Internal call to find somewhere to store an entry 3971 * @mas: The maple state. 3972 * @startp: Pointer to ID. 3973 * @range_lo: Lower bound of range to search. 3974 * @range_hi: Upper bound of range to search. 3975 * @entry: The entry to store. 3976 * @next: Pointer to next ID to allocate. 3977 * @gfp: The GFP_FLAGS to use for allocations. 3978 * 3979 * Return: 0 if the allocation succeeded without wrapping, 1 if the 3980 * allocation succeeded after wrapping, or -EBUSY if there are no 3981 * free entries. 3982 */ 3983 int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp, 3984 void *entry, unsigned long range_lo, unsigned long range_hi, 3985 unsigned long *next, gfp_t gfp) 3986 { 3987 unsigned long min = range_lo; 3988 int ret = 0; 3989 3990 range_lo = max(min, *next); 3991 ret = mas_empty_area(mas, range_lo, range_hi, 1); 3992 if ((mas->tree->ma_flags & MT_FLAGS_ALLOC_WRAPPED) && ret == 0) { 3993 mas->tree->ma_flags &= ~MT_FLAGS_ALLOC_WRAPPED; 3994 ret = 1; 3995 } 3996 if (ret < 0 && range_lo > min) { 3997 mas_reset(mas); 3998 ret = mas_empty_area(mas, min, range_hi, 1); 3999 if (ret == 0) 4000 ret = 1; 4001 } 4002 if (ret < 0) 4003 return ret; 4004 4005 do { 4006 mas_insert(mas, entry); 4007 } while (mas_nomem(mas, gfp)); 4008 if (mas_is_err(mas)) 4009 return xa_err(mas->node); 4010 4011 *startp = mas->index; 4012 *next = *startp + 1; 4013 if (*next == 0) 4014 mas->tree->ma_flags |= MT_FLAGS_ALLOC_WRAPPED; 4015 4016 mas_destroy(mas); 4017 return ret; 4018 } 4019 EXPORT_SYMBOL(mas_alloc_cyclic); 4020 4021 static __always_inline void mas_rewalk(struct ma_state *mas, unsigned long index) 4022 { 4023 retry: 4024 mas_set(mas, index); 4025 mas_state_walk(mas); 4026 if (mas_is_start(mas)) 4027 goto retry; 4028 } 4029 4030 static __always_inline bool mas_rewalk_if_dead(struct ma_state *mas, 4031 struct maple_node *node, const unsigned long index) 4032 { 4033 if (unlikely(ma_dead_node(node))) { 4034 mas_rewalk(mas, index); 4035 return true; 4036 } 4037 return false; 4038 } 4039 4040 /* 4041 * mas_prev_node() - Find the prev non-null entry at the same level in the 4042 * tree. The prev value will be mas->node[mas->offset] or the status will be 4043 * ma_none. 4044 * @mas: The maple state 4045 * @min: The lower limit to search 4046 * 4047 * The prev node value will be mas->node[mas->offset] or the status will be 4048 * ma_none. 4049 * Return: 1 if the node is dead, 0 otherwise. 4050 */ 4051 static int mas_prev_node(struct ma_state *mas, unsigned long min) 4052 { 4053 enum maple_type mt; 4054 int offset, level; 4055 void __rcu **slots; 4056 struct maple_node *node; 4057 unsigned long *pivots; 4058 unsigned long max; 4059 4060 node = mas_mn(mas); 4061 if (!mas->min) 4062 goto no_entry; 4063 4064 max = mas->min - 1; 4065 if (max < min) 4066 goto no_entry; 4067 4068 level = 0; 4069 do { 4070 if (ma_is_root(node)) 4071 goto no_entry; 4072 4073 /* Walk up. */ 4074 if (unlikely(mas_ascend(mas))) 4075 return 1; 4076 offset = mas->offset; 4077 level++; 4078 node = mas_mn(mas); 4079 } while (!offset); 4080 4081 offset--; 4082 mt = mte_node_type(mas->node); 4083 while (level > 1) { 4084 level--; 4085 slots = ma_slots(node, mt); 4086 mas->node = mas_slot(mas, slots, offset); 4087 if (unlikely(ma_dead_node(node))) 4088 return 1; 4089 4090 mt = mte_node_type(mas->node); 4091 node = mas_mn(mas); 4092 pivots = ma_pivots(node, mt); 4093 offset = ma_data_end(node, mt, pivots, max); 4094 if (unlikely(ma_dead_node(node))) 4095 return 1; 4096 } 4097 4098 slots = ma_slots(node, mt); 4099 mas->node = mas_slot(mas, slots, offset); 4100 pivots = ma_pivots(node, mt); 4101 if (unlikely(ma_dead_node(node))) 4102 return 1; 4103 4104 if (likely(offset)) 4105 mas->min = pivots[offset - 1] + 1; 4106 mas->max = max; 4107 mas->offset = mas_data_end(mas); 4108 if (unlikely(mte_dead_node(mas->node))) 4109 return 1; 4110 4111 mas->end = mas->offset; 4112 return 0; 4113 4114 no_entry: 4115 if (unlikely(ma_dead_node(node))) 4116 return 1; 4117 4118 mas->status = ma_underflow; 4119 return 0; 4120 } 4121 4122 /* 4123 * mas_prev_slot() - Get the entry in the previous slot 4124 * 4125 * @mas: The maple state 4126 * @min: The minimum starting range 4127 * @empty: Can be empty 4128 * 4129 * Return: The entry in the previous slot which is possibly NULL 4130 */ 4131 static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty) 4132 { 4133 void *entry; 4134 void __rcu **slots; 4135 unsigned long pivot; 4136 enum maple_type type; 4137 unsigned long *pivots; 4138 struct maple_node *node; 4139 unsigned long save_point = mas->index; 4140 4141 retry: 4142 node = mas_mn(mas); 4143 type = mte_node_type(mas->node); 4144 pivots = ma_pivots(node, type); 4145 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4146 goto retry; 4147 4148 if (mas->min <= min) { 4149 pivot = mas_safe_min(mas, pivots, mas->offset); 4150 4151 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4152 goto retry; 4153 4154 if (pivot <= min) 4155 goto underflow; 4156 } 4157 4158 again: 4159 if (likely(mas->offset)) { 4160 mas->offset--; 4161 mas->last = mas->index - 1; 4162 mas->index = mas_safe_min(mas, pivots, mas->offset); 4163 } else { 4164 if (mas->index <= min) 4165 goto underflow; 4166 4167 if (mas_prev_node(mas, min)) { 4168 mas_rewalk(mas, save_point); 4169 goto retry; 4170 } 4171 4172 if (WARN_ON_ONCE(mas_is_underflow(mas))) 4173 return NULL; 4174 4175 mas->last = mas->max; 4176 node = mas_mn(mas); 4177 type = mte_node_type(mas->node); 4178 pivots = ma_pivots(node, type); 4179 mas->index = pivots[mas->offset - 1] + 1; 4180 } 4181 4182 slots = ma_slots(node, type); 4183 entry = mas_slot(mas, slots, mas->offset); 4184 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4185 goto retry; 4186 4187 if (likely(entry)) 4188 return entry; 4189 4190 if (!empty) { 4191 if (mas->index <= min) 4192 goto underflow; 4193 4194 goto again; 4195 } 4196 4197 return entry; 4198 4199 underflow: 4200 mas->status = ma_underflow; 4201 return NULL; 4202 } 4203 4204 /* 4205 * mas_next_node() - Get the next node at the same level in the tree. 4206 * @mas: The maple state 4207 * @node: The maple node 4208 * @max: The maximum pivot value to check. 4209 * 4210 * The next value will be mas->node[mas->offset] or the status will have 4211 * overflowed. 4212 * Return: 1 on dead node, 0 otherwise. 4213 */ 4214 static int mas_next_node(struct ma_state *mas, struct maple_node *node, 4215 unsigned long max) 4216 { 4217 unsigned long min; 4218 unsigned long *pivots; 4219 struct maple_enode *enode; 4220 struct maple_node *tmp; 4221 int level = 0; 4222 unsigned char node_end; 4223 enum maple_type mt; 4224 void __rcu **slots; 4225 4226 if (mas->max >= max) 4227 goto overflow; 4228 4229 min = mas->max + 1; 4230 level = 0; 4231 do { 4232 if (ma_is_root(node)) 4233 goto overflow; 4234 4235 /* Walk up. */ 4236 if (unlikely(mas_ascend(mas))) 4237 return 1; 4238 4239 level++; 4240 node = mas_mn(mas); 4241 mt = mte_node_type(mas->node); 4242 pivots = ma_pivots(node, mt); 4243 node_end = ma_data_end(node, mt, pivots, mas->max); 4244 if (unlikely(ma_dead_node(node))) 4245 return 1; 4246 4247 } while (unlikely(mas->offset == node_end)); 4248 4249 slots = ma_slots(node, mt); 4250 mas->offset++; 4251 enode = mas_slot(mas, slots, mas->offset); 4252 if (unlikely(ma_dead_node(node))) 4253 return 1; 4254 4255 if (level > 1) 4256 mas->offset = 0; 4257 4258 while (unlikely(level > 1)) { 4259 level--; 4260 mas->node = enode; 4261 node = mas_mn(mas); 4262 mt = mte_node_type(mas->node); 4263 slots = ma_slots(node, mt); 4264 enode = mas_slot(mas, slots, 0); 4265 if (unlikely(ma_dead_node(node))) 4266 return 1; 4267 } 4268 4269 if (!mas->offset) 4270 pivots = ma_pivots(node, mt); 4271 4272 mas->max = mas_safe_pivot(mas, pivots, mas->offset, mt); 4273 tmp = mte_to_node(enode); 4274 mt = mte_node_type(enode); 4275 pivots = ma_pivots(tmp, mt); 4276 mas->end = ma_data_end(tmp, mt, pivots, mas->max); 4277 if (unlikely(ma_dead_node(node))) 4278 return 1; 4279 4280 mas->node = enode; 4281 mas->min = min; 4282 return 0; 4283 4284 overflow: 4285 if (unlikely(ma_dead_node(node))) 4286 return 1; 4287 4288 mas->status = ma_overflow; 4289 return 0; 4290 } 4291 4292 /* 4293 * mas_next_slot() - Get the entry in the next slot 4294 * 4295 * @mas: The maple state 4296 * @max: The maximum starting range 4297 * @empty: Can be empty 4298 * 4299 * Return: The entry in the next slot which is possibly NULL 4300 */ 4301 static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty) 4302 { 4303 void __rcu **slots; 4304 unsigned long *pivots; 4305 unsigned long pivot; 4306 enum maple_type type; 4307 struct maple_node *node; 4308 unsigned long save_point = mas->last; 4309 void *entry; 4310 4311 retry: 4312 node = mas_mn(mas); 4313 type = mte_node_type(mas->node); 4314 pivots = ma_pivots(node, type); 4315 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4316 goto retry; 4317 4318 if (mas->max >= max) { 4319 if (likely(mas->offset < mas->end)) 4320 pivot = pivots[mas->offset]; 4321 else 4322 pivot = mas->max; 4323 4324 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4325 goto retry; 4326 4327 if (pivot >= max) { /* Was at the limit, next will extend beyond */ 4328 mas->status = ma_overflow; 4329 return NULL; 4330 } 4331 } 4332 4333 if (likely(mas->offset < mas->end)) { 4334 mas->index = pivots[mas->offset] + 1; 4335 again: 4336 mas->offset++; 4337 if (likely(mas->offset < mas->end)) 4338 mas->last = pivots[mas->offset]; 4339 else 4340 mas->last = mas->max; 4341 } else { 4342 if (mas->last >= max) { 4343 mas->status = ma_overflow; 4344 return NULL; 4345 } 4346 4347 if (mas_next_node(mas, node, max)) { 4348 mas_rewalk(mas, save_point); 4349 goto retry; 4350 } 4351 4352 if (WARN_ON_ONCE(mas_is_overflow(mas))) 4353 return NULL; 4354 4355 mas->offset = 0; 4356 mas->index = mas->min; 4357 node = mas_mn(mas); 4358 type = mte_node_type(mas->node); 4359 pivots = ma_pivots(node, type); 4360 mas->last = pivots[0]; 4361 } 4362 4363 slots = ma_slots(node, type); 4364 entry = mt_slot(mas->tree, slots, mas->offset); 4365 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4366 goto retry; 4367 4368 if (entry) 4369 return entry; 4370 4371 4372 if (!empty) { 4373 if (mas->last >= max) { 4374 mas->status = ma_overflow; 4375 return NULL; 4376 } 4377 4378 mas->index = mas->last + 1; 4379 goto again; 4380 } 4381 4382 return entry; 4383 } 4384 4385 /* 4386 * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the 4387 * highest gap address of a given size in a given node and descend. 4388 * @mas: The maple state 4389 * @size: The needed size. 4390 * 4391 * Return: True if found in a leaf, false otherwise. 4392 * 4393 */ 4394 static bool mas_rev_awalk(struct ma_state *mas, unsigned long size, 4395 unsigned long *gap_min, unsigned long *gap_max) 4396 { 4397 enum maple_type type = mte_node_type(mas->node); 4398 struct maple_node *node = mas_mn(mas); 4399 unsigned long *pivots, *gaps; 4400 void __rcu **slots; 4401 unsigned long gap = 0; 4402 unsigned long max, min; 4403 unsigned char offset; 4404 4405 if (unlikely(mas_is_err(mas))) 4406 return true; 4407 4408 if (ma_is_dense(type)) { 4409 /* dense nodes. */ 4410 mas->offset = (unsigned char)(mas->index - mas->min); 4411 return true; 4412 } 4413 4414 pivots = ma_pivots(node, type); 4415 slots = ma_slots(node, type); 4416 gaps = ma_gaps(node, type); 4417 offset = mas->offset; 4418 min = mas_safe_min(mas, pivots, offset); 4419 /* Skip out of bounds. */ 4420 while (mas->last < min) 4421 min = mas_safe_min(mas, pivots, --offset); 4422 4423 max = mas_safe_pivot(mas, pivots, offset, type); 4424 while (mas->index <= max) { 4425 gap = 0; 4426 if (gaps) 4427 gap = gaps[offset]; 4428 else if (!mas_slot(mas, slots, offset)) 4429 gap = max - min + 1; 4430 4431 if (gap) { 4432 if ((size <= gap) && (size <= mas->last - min + 1)) 4433 break; 4434 4435 if (!gaps) { 4436 /* Skip the next slot, it cannot be a gap. */ 4437 if (offset < 2) 4438 goto ascend; 4439 4440 offset -= 2; 4441 max = pivots[offset]; 4442 min = mas_safe_min(mas, pivots, offset); 4443 continue; 4444 } 4445 } 4446 4447 if (!offset) 4448 goto ascend; 4449 4450 offset--; 4451 max = min - 1; 4452 min = mas_safe_min(mas, pivots, offset); 4453 } 4454 4455 if (unlikely((mas->index > max) || (size - 1 > max - mas->index))) 4456 goto no_space; 4457 4458 if (unlikely(ma_is_leaf(type))) { 4459 mas->offset = offset; 4460 *gap_min = min; 4461 *gap_max = min + gap - 1; 4462 return true; 4463 } 4464 4465 /* descend, only happens under lock. */ 4466 mas->node = mas_slot(mas, slots, offset); 4467 mas->min = min; 4468 mas->max = max; 4469 mas->offset = mas_data_end(mas); 4470 return false; 4471 4472 ascend: 4473 if (!mte_is_root(mas->node)) 4474 return false; 4475 4476 no_space: 4477 mas_set_err(mas, -EBUSY); 4478 return false; 4479 } 4480 4481 static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) 4482 { 4483 enum maple_type type = mte_node_type(mas->node); 4484 unsigned long pivot, min, gap = 0; 4485 unsigned char offset, data_end; 4486 unsigned long *gaps, *pivots; 4487 void __rcu **slots; 4488 struct maple_node *node; 4489 bool found = false; 4490 4491 if (ma_is_dense(type)) { 4492 mas->offset = (unsigned char)(mas->index - mas->min); 4493 return true; 4494 } 4495 4496 node = mas_mn(mas); 4497 pivots = ma_pivots(node, type); 4498 slots = ma_slots(node, type); 4499 gaps = ma_gaps(node, type); 4500 offset = mas->offset; 4501 min = mas_safe_min(mas, pivots, offset); 4502 data_end = ma_data_end(node, type, pivots, mas->max); 4503 for (; offset <= data_end; offset++) { 4504 pivot = mas_safe_pivot(mas, pivots, offset, type); 4505 4506 /* Not within lower bounds */ 4507 if (mas->index > pivot) 4508 goto next_slot; 4509 4510 if (gaps) 4511 gap = gaps[offset]; 4512 else if (!mas_slot(mas, slots, offset)) 4513 gap = min(pivot, mas->last) - max(mas->index, min) + 1; 4514 else 4515 goto next_slot; 4516 4517 if (gap >= size) { 4518 if (ma_is_leaf(type)) { 4519 found = true; 4520 break; 4521 } 4522 4523 mas->node = mas_slot(mas, slots, offset); 4524 mas->min = min; 4525 mas->max = pivot; 4526 offset = 0; 4527 break; 4528 } 4529 next_slot: 4530 min = pivot + 1; 4531 if (mas->last <= pivot) { 4532 mas_set_err(mas, -EBUSY); 4533 return true; 4534 } 4535 } 4536 4537 mas->offset = offset; 4538 return found; 4539 } 4540 4541 /** 4542 * mas_walk() - Search for @mas->index in the tree. 4543 * @mas: The maple state. 4544 * 4545 * mas->index and mas->last will be set to the range if there is a value. If 4546 * mas->status is ma_none, reset to ma_start 4547 * 4548 * Return: the entry at the location or %NULL. 4549 */ 4550 void *mas_walk(struct ma_state *mas) 4551 { 4552 void *entry; 4553 4554 if (!mas_is_active(mas) && !mas_is_start(mas)) 4555 mas->status = ma_start; 4556 retry: 4557 entry = mas_state_walk(mas); 4558 if (mas_is_start(mas)) { 4559 goto retry; 4560 } else if (mas_is_none(mas)) { 4561 mas->index = 0; 4562 mas->last = ULONG_MAX; 4563 } else if (mas_is_ptr(mas)) { 4564 if (!mas->index) { 4565 mas->last = 0; 4566 return entry; 4567 } 4568 4569 mas->index = 1; 4570 mas->last = ULONG_MAX; 4571 mas->status = ma_none; 4572 return NULL; 4573 } 4574 4575 return entry; 4576 } 4577 EXPORT_SYMBOL_GPL(mas_walk); 4578 4579 static inline bool mas_rewind_node(struct ma_state *mas) 4580 { 4581 unsigned char slot; 4582 4583 do { 4584 if (mte_is_root(mas->node)) { 4585 slot = mas->offset; 4586 if (!slot) 4587 return false; 4588 } else { 4589 mas_ascend(mas); 4590 slot = mas->offset; 4591 } 4592 } while (!slot); 4593 4594 mas->offset = --slot; 4595 return true; 4596 } 4597 4598 /* 4599 * mas_skip_node() - Internal function. Skip over a node. 4600 * @mas: The maple state. 4601 * 4602 * Return: true if there is another node, false otherwise. 4603 */ 4604 static inline bool mas_skip_node(struct ma_state *mas) 4605 { 4606 if (mas_is_err(mas)) 4607 return false; 4608 4609 do { 4610 if (mte_is_root(mas->node)) { 4611 if (mas->offset >= mas_data_end(mas)) { 4612 mas_set_err(mas, -EBUSY); 4613 return false; 4614 } 4615 } else { 4616 mas_ascend(mas); 4617 } 4618 } while (mas->offset >= mas_data_end(mas)); 4619 4620 mas->offset++; 4621 return true; 4622 } 4623 4624 /* 4625 * mas_awalk() - Allocation walk. Search from low address to high, for a gap of 4626 * @size 4627 * @mas: The maple state 4628 * @size: The size of the gap required 4629 * 4630 * Search between @mas->index and @mas->last for a gap of @size. 4631 */ 4632 static inline void mas_awalk(struct ma_state *mas, unsigned long size) 4633 { 4634 struct maple_enode *last = NULL; 4635 4636 /* 4637 * There are 4 options: 4638 * go to child (descend) 4639 * go back to parent (ascend) 4640 * no gap found. (return, error == -EBUSY) 4641 * found the gap. (return) 4642 */ 4643 while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) { 4644 if (last == mas->node) 4645 mas_skip_node(mas); 4646 else 4647 last = mas->node; 4648 } 4649 } 4650 4651 /* 4652 * mas_sparse_area() - Internal function. Return upper or lower limit when 4653 * searching for a gap in an empty tree. 4654 * @mas: The maple state 4655 * @min: the minimum range 4656 * @max: The maximum range 4657 * @size: The size of the gap 4658 * @fwd: Searching forward or back 4659 */ 4660 static inline int mas_sparse_area(struct ma_state *mas, unsigned long min, 4661 unsigned long max, unsigned long size, bool fwd) 4662 { 4663 if (!unlikely(mas_is_none(mas)) && min == 0) { 4664 min++; 4665 /* 4666 * At this time, min is increased, we need to recheck whether 4667 * the size is satisfied. 4668 */ 4669 if (min > max || max - min + 1 < size) 4670 return -EBUSY; 4671 } 4672 /* mas_is_ptr */ 4673 4674 if (fwd) { 4675 mas->index = min; 4676 mas->last = min + size - 1; 4677 } else { 4678 mas->last = max; 4679 mas->index = max - size + 1; 4680 } 4681 return 0; 4682 } 4683 4684 /* 4685 * mas_empty_area() - Get the lowest address within the range that is 4686 * sufficient for the size requested. 4687 * @mas: The maple state 4688 * @min: The lowest value of the range 4689 * @max: The highest value of the range 4690 * @size: The size needed 4691 */ 4692 int mas_empty_area(struct ma_state *mas, unsigned long min, 4693 unsigned long max, unsigned long size) 4694 { 4695 unsigned char offset; 4696 unsigned long *pivots; 4697 enum maple_type mt; 4698 struct maple_node *node; 4699 4700 if (min > max) 4701 return -EINVAL; 4702 4703 if (size == 0 || max - min < size - 1) 4704 return -EINVAL; 4705 4706 if (mas_is_start(mas)) 4707 mas_start(mas); 4708 else if (mas->offset >= 2) 4709 mas->offset -= 2; 4710 else if (!mas_skip_node(mas)) 4711 return -EBUSY; 4712 4713 /* Empty set */ 4714 if (mas_is_none(mas) || mas_is_ptr(mas)) 4715 return mas_sparse_area(mas, min, max, size, true); 4716 4717 /* The start of the window can only be within these values */ 4718 mas->index = min; 4719 mas->last = max; 4720 mas_awalk(mas, size); 4721 4722 if (unlikely(mas_is_err(mas))) 4723 return xa_err(mas->node); 4724 4725 offset = mas->offset; 4726 node = mas_mn(mas); 4727 mt = mte_node_type(mas->node); 4728 pivots = ma_pivots(node, mt); 4729 min = mas_safe_min(mas, pivots, offset); 4730 if (mas->index < min) 4731 mas->index = min; 4732 mas->last = mas->index + size - 1; 4733 mas->end = ma_data_end(node, mt, pivots, mas->max); 4734 return 0; 4735 } 4736 EXPORT_SYMBOL_GPL(mas_empty_area); 4737 4738 /* 4739 * mas_empty_area_rev() - Get the highest address within the range that is 4740 * sufficient for the size requested. 4741 * @mas: The maple state 4742 * @min: The lowest value of the range 4743 * @max: The highest value of the range 4744 * @size: The size needed 4745 */ 4746 int mas_empty_area_rev(struct ma_state *mas, unsigned long min, 4747 unsigned long max, unsigned long size) 4748 { 4749 struct maple_enode *last = mas->node; 4750 4751 if (min > max) 4752 return -EINVAL; 4753 4754 if (size == 0 || max - min < size - 1) 4755 return -EINVAL; 4756 4757 if (mas_is_start(mas)) 4758 mas_start(mas); 4759 else if ((mas->offset < 2) && (!mas_rewind_node(mas))) 4760 return -EBUSY; 4761 4762 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 4763 return mas_sparse_area(mas, min, max, size, false); 4764 else if (mas->offset >= 2) 4765 mas->offset -= 2; 4766 else 4767 mas->offset = mas_data_end(mas); 4768 4769 4770 /* The start of the window can only be within these values. */ 4771 mas->index = min; 4772 mas->last = max; 4773 4774 while (!mas_rev_awalk(mas, size, &min, &max)) { 4775 if (last == mas->node) { 4776 if (!mas_rewind_node(mas)) 4777 return -EBUSY; 4778 } else { 4779 last = mas->node; 4780 } 4781 } 4782 4783 if (mas_is_err(mas)) 4784 return xa_err(mas->node); 4785 4786 if (unlikely(mas->offset == MAPLE_NODE_SLOTS)) 4787 return -EBUSY; 4788 4789 /* Trim the upper limit to the max. */ 4790 if (max < mas->last) 4791 mas->last = max; 4792 4793 mas->index = mas->last - size + 1; 4794 mas->end = mas_data_end(mas); 4795 return 0; 4796 } 4797 EXPORT_SYMBOL_GPL(mas_empty_area_rev); 4798 4799 /* 4800 * mte_dead_leaves() - Mark all leaves of a node as dead. 4801 * @enode: the encoded node 4802 * @mt: the maple tree 4803 * @slots: Pointer to the slot array 4804 * 4805 * Must hold the write lock. 4806 * 4807 * Return: The number of leaves marked as dead. 4808 */ 4809 static inline 4810 unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt, 4811 void __rcu **slots) 4812 { 4813 struct maple_node *node; 4814 enum maple_type type; 4815 void *entry; 4816 int offset; 4817 4818 for (offset = 0; offset < mt_slot_count(enode); offset++) { 4819 entry = mt_slot(mt, slots, offset); 4820 type = mte_node_type(entry); 4821 node = mte_to_node(entry); 4822 /* Use both node and type to catch LE & BE metadata */ 4823 if (!node || !type) 4824 break; 4825 4826 mte_set_node_dead(entry); 4827 node->type = type; 4828 rcu_assign_pointer(slots[offset], node); 4829 } 4830 4831 return offset; 4832 } 4833 4834 /** 4835 * mte_dead_walk() - Walk down a dead tree to just before the leaves 4836 * @enode: The maple encoded node 4837 * @offset: The starting offset 4838 * 4839 * Note: This can only be used from the RCU callback context. 4840 */ 4841 static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset) 4842 { 4843 struct maple_node *node, *next; 4844 void __rcu **slots = NULL; 4845 4846 next = mte_to_node(*enode); 4847 do { 4848 *enode = ma_enode_ptr(next); 4849 node = mte_to_node(*enode); 4850 slots = ma_slots(node, node->type); 4851 next = rcu_dereference_protected(slots[offset], 4852 lock_is_held(&rcu_callback_map)); 4853 offset = 0; 4854 } while (!ma_is_leaf(next->type)); 4855 4856 return slots; 4857 } 4858 4859 /** 4860 * mt_free_walk() - Walk & free a tree in the RCU callback context 4861 * @head: The RCU head that's within the node. 4862 * 4863 * Note: This can only be used from the RCU callback context. 4864 */ 4865 static void mt_free_walk(struct rcu_head *head) 4866 { 4867 void __rcu **slots; 4868 struct maple_node *node, *start; 4869 struct maple_enode *enode; 4870 unsigned char offset; 4871 enum maple_type type; 4872 4873 node = container_of(head, struct maple_node, rcu); 4874 4875 if (ma_is_leaf(node->type)) 4876 goto free_leaf; 4877 4878 start = node; 4879 enode = mt_mk_node(node, node->type); 4880 slots = mte_dead_walk(&enode, 0); 4881 node = mte_to_node(enode); 4882 do { 4883 mt_free_bulk(node->slot_len, slots); 4884 offset = node->parent_slot + 1; 4885 enode = node->piv_parent; 4886 if (mte_to_node(enode) == node) 4887 goto free_leaf; 4888 4889 type = mte_node_type(enode); 4890 slots = ma_slots(mte_to_node(enode), type); 4891 if ((offset < mt_slots[type]) && 4892 rcu_dereference_protected(slots[offset], 4893 lock_is_held(&rcu_callback_map))) 4894 slots = mte_dead_walk(&enode, offset); 4895 node = mte_to_node(enode); 4896 } while ((node != start) || (node->slot_len < offset)); 4897 4898 slots = ma_slots(node, node->type); 4899 mt_free_bulk(node->slot_len, slots); 4900 4901 free_leaf: 4902 kfree(node); 4903 } 4904 4905 static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, 4906 struct maple_tree *mt, struct maple_enode *prev, unsigned char offset) 4907 { 4908 struct maple_node *node; 4909 struct maple_enode *next = *enode; 4910 void __rcu **slots = NULL; 4911 enum maple_type type; 4912 unsigned char next_offset = 0; 4913 4914 do { 4915 *enode = next; 4916 node = mte_to_node(*enode); 4917 type = mte_node_type(*enode); 4918 slots = ma_slots(node, type); 4919 next = mt_slot_locked(mt, slots, next_offset); 4920 if ((mte_dead_node(next))) 4921 next = mt_slot_locked(mt, slots, ++next_offset); 4922 4923 mte_set_node_dead(*enode); 4924 node->type = type; 4925 node->piv_parent = prev; 4926 node->parent_slot = offset; 4927 offset = next_offset; 4928 next_offset = 0; 4929 prev = *enode; 4930 } while (!mte_is_leaf(next)); 4931 4932 return slots; 4933 } 4934 4935 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 4936 bool free) 4937 { 4938 void __rcu **slots; 4939 struct maple_node *node = mte_to_node(enode); 4940 struct maple_enode *start; 4941 4942 if (mte_is_leaf(enode)) { 4943 mte_set_node_dead(enode); 4944 node->type = mte_node_type(enode); 4945 goto free_leaf; 4946 } 4947 4948 start = enode; 4949 slots = mte_destroy_descend(&enode, mt, start, 0); 4950 node = mte_to_node(enode); // Updated in the above call. 4951 do { 4952 enum maple_type type; 4953 unsigned char offset; 4954 struct maple_enode *parent, *tmp; 4955 4956 node->slot_len = mte_dead_leaves(enode, mt, slots); 4957 if (free) 4958 mt_free_bulk(node->slot_len, slots); 4959 offset = node->parent_slot + 1; 4960 enode = node->piv_parent; 4961 if (mte_to_node(enode) == node) 4962 goto free_leaf; 4963 4964 type = mte_node_type(enode); 4965 slots = ma_slots(mte_to_node(enode), type); 4966 if (offset >= mt_slots[type]) 4967 goto next; 4968 4969 tmp = mt_slot_locked(mt, slots, offset); 4970 if (mte_node_type(tmp) && mte_to_node(tmp)) { 4971 parent = enode; 4972 enode = tmp; 4973 slots = mte_destroy_descend(&enode, mt, parent, offset); 4974 } 4975 next: 4976 node = mte_to_node(enode); 4977 } while (start != enode); 4978 4979 node = mte_to_node(enode); 4980 node->slot_len = mte_dead_leaves(enode, mt, slots); 4981 if (free) 4982 mt_free_bulk(node->slot_len, slots); 4983 4984 free_leaf: 4985 if (free) 4986 kfree(node); 4987 else 4988 mt_clear_meta(mt, node, node->type); 4989 } 4990 4991 /* 4992 * mte_destroy_walk() - Free a tree or sub-tree. 4993 * @enode: the encoded maple node (maple_enode) to start 4994 * @mt: the tree to free - needed for node types. 4995 * 4996 * Must hold the write lock. 4997 */ 4998 static inline void mte_destroy_walk(struct maple_enode *enode, 4999 struct maple_tree *mt) 5000 { 5001 struct maple_node *node = mte_to_node(enode); 5002 5003 if (mt_in_rcu(mt)) { 5004 mt_destroy_walk(enode, mt, false); 5005 call_rcu(&node->rcu, mt_free_walk); 5006 } else { 5007 mt_destroy_walk(enode, mt, true); 5008 } 5009 } 5010 /* Interface */ 5011 5012 /** 5013 * mas_store() - Store an @entry. 5014 * @mas: The maple state. 5015 * @entry: The entry to store. 5016 * 5017 * The @mas->index and @mas->last is used to set the range for the @entry. 5018 * 5019 * Return: the first entry between mas->index and mas->last or %NULL. 5020 */ 5021 void *mas_store(struct ma_state *mas, void *entry) 5022 { 5023 MA_WR_STATE(wr_mas, mas, entry); 5024 5025 trace_ma_write(__func__, mas, 0, entry); 5026 #ifdef CONFIG_DEBUG_MAPLE_TREE 5027 if (MAS_WARN_ON(mas, mas->index > mas->last)) 5028 pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last, 5029 entry); 5030 5031 if (mas->index > mas->last) { 5032 mas_set_err(mas, -EINVAL); 5033 return NULL; 5034 } 5035 5036 #endif 5037 5038 /* 5039 * Storing is the same operation as insert with the added caveat that it 5040 * can overwrite entries. Although this seems simple enough, one may 5041 * want to examine what happens if a single store operation was to 5042 * overwrite multiple entries within a self-balancing B-Tree. 5043 */ 5044 mas_wr_prealloc_setup(&wr_mas); 5045 mas->store_type = mas_wr_store_type(&wr_mas); 5046 if (mas->mas_flags & MA_STATE_PREALLOC) { 5047 mas_wr_store_entry(&wr_mas); 5048 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5049 return wr_mas.content; 5050 } 5051 5052 mas_prealloc_calc(&wr_mas, entry); 5053 if (!mas->node_request) 5054 goto store; 5055 5056 mas_alloc_nodes(mas, GFP_NOWAIT); 5057 if (mas_is_err(mas)) 5058 return NULL; 5059 5060 store: 5061 mas_wr_store_entry(&wr_mas); 5062 mas_destroy(mas); 5063 return wr_mas.content; 5064 } 5065 EXPORT_SYMBOL_GPL(mas_store); 5066 5067 /** 5068 * mas_store_gfp() - Store a value into the tree. 5069 * @mas: The maple state 5070 * @entry: The entry to store 5071 * @gfp: The GFP_FLAGS to use for allocations if necessary. 5072 * 5073 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5074 * be allocated. 5075 */ 5076 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp) 5077 { 5078 unsigned long index = mas->index; 5079 unsigned long last = mas->last; 5080 MA_WR_STATE(wr_mas, mas, entry); 5081 int ret = 0; 5082 5083 retry: 5084 mas_wr_preallocate(&wr_mas, entry); 5085 if (unlikely(mas_nomem(mas, gfp))) { 5086 if (!entry) 5087 __mas_set_range(mas, index, last); 5088 goto retry; 5089 } 5090 5091 if (mas_is_err(mas)) { 5092 ret = xa_err(mas->node); 5093 goto out; 5094 } 5095 5096 mas_wr_store_entry(&wr_mas); 5097 out: 5098 mas_destroy(mas); 5099 return ret; 5100 } 5101 EXPORT_SYMBOL_GPL(mas_store_gfp); 5102 5103 /** 5104 * mas_store_prealloc() - Store a value into the tree using memory 5105 * preallocated in the maple state. 5106 * @mas: The maple state 5107 * @entry: The entry to store. 5108 */ 5109 void mas_store_prealloc(struct ma_state *mas, void *entry) 5110 { 5111 MA_WR_STATE(wr_mas, mas, entry); 5112 5113 if (mas->store_type == wr_store_root) { 5114 mas_wr_prealloc_setup(&wr_mas); 5115 goto store; 5116 } 5117 5118 mas_wr_walk_descend(&wr_mas); 5119 if (mas->store_type != wr_spanning_store) { 5120 /* set wr_mas->content to current slot */ 5121 wr_mas.content = mas_slot_locked(mas, wr_mas.slots, mas->offset); 5122 mas_wr_end_piv(&wr_mas); 5123 } 5124 5125 store: 5126 trace_ma_write(__func__, mas, 0, entry); 5127 mas_wr_store_entry(&wr_mas); 5128 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5129 mas_destroy(mas); 5130 } 5131 EXPORT_SYMBOL_GPL(mas_store_prealloc); 5132 5133 /** 5134 * mas_preallocate() - Preallocate enough nodes for a store operation 5135 * @mas: The maple state 5136 * @entry: The entry that will be stored 5137 * @gfp: The GFP_FLAGS to use for allocations. 5138 * 5139 * Return: 0 on success, -ENOMEM if memory could not be allocated. 5140 */ 5141 int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) 5142 { 5143 MA_WR_STATE(wr_mas, mas, entry); 5144 5145 mas_wr_prealloc_setup(&wr_mas); 5146 mas->store_type = mas_wr_store_type(&wr_mas); 5147 mas_prealloc_calc(&wr_mas, entry); 5148 if (!mas->node_request) 5149 goto set_flag; 5150 5151 mas->mas_flags &= ~MA_STATE_PREALLOC; 5152 mas_alloc_nodes(mas, gfp); 5153 if (mas_is_err(mas)) { 5154 int ret = xa_err(mas->node); 5155 5156 mas->node_request = 0; 5157 mas_destroy(mas); 5158 mas_reset(mas); 5159 return ret; 5160 } 5161 5162 set_flag: 5163 mas->mas_flags |= MA_STATE_PREALLOC; 5164 return 0; 5165 } 5166 EXPORT_SYMBOL_GPL(mas_preallocate); 5167 5168 /* 5169 * mas_destroy() - destroy a maple state. 5170 * @mas: The maple state 5171 * 5172 * Upon completion, check the left-most node and rebalance against the node to 5173 * the right if necessary. Frees any allocated nodes associated with this maple 5174 * state. 5175 */ 5176 void mas_destroy(struct ma_state *mas) 5177 { 5178 mas->mas_flags &= ~MA_STATE_PREALLOC; 5179 5180 mas->node_request = 0; 5181 if (mas->sheaf) 5182 mt_return_sheaf(mas->sheaf); 5183 5184 mas->sheaf = NULL; 5185 } 5186 EXPORT_SYMBOL_GPL(mas_destroy); 5187 5188 static void mas_may_activate(struct ma_state *mas) 5189 { 5190 if (!mas->node) { 5191 mas->status = ma_start; 5192 } else if (mas->index > mas->max || mas->index < mas->min) { 5193 mas->status = ma_start; 5194 } else { 5195 mas->status = ma_active; 5196 } 5197 } 5198 5199 static bool mas_next_setup(struct ma_state *mas, unsigned long max, 5200 void **entry) 5201 { 5202 bool was_none = mas_is_none(mas); 5203 5204 if (unlikely(mas->last >= max)) { 5205 mas->status = ma_overflow; 5206 return true; 5207 } 5208 5209 switch (mas->status) { 5210 case ma_active: 5211 return false; 5212 case ma_none: 5213 fallthrough; 5214 case ma_pause: 5215 mas->status = ma_start; 5216 fallthrough; 5217 case ma_start: 5218 mas_walk(mas); /* Retries on dead nodes handled by mas_walk */ 5219 break; 5220 case ma_overflow: 5221 /* Overflowed before, but the max changed */ 5222 mas_may_activate(mas); 5223 break; 5224 case ma_underflow: 5225 /* The user expects the mas to be one before where it is */ 5226 mas_may_activate(mas); 5227 *entry = mas_walk(mas); 5228 if (*entry) 5229 return true; 5230 break; 5231 case ma_root: 5232 break; 5233 case ma_error: 5234 return true; 5235 } 5236 5237 if (likely(mas_is_active(mas))) /* Fast path */ 5238 return false; 5239 5240 if (mas_is_ptr(mas)) { 5241 *entry = NULL; 5242 if (was_none && mas->index == 0) { 5243 mas->index = mas->last = 0; 5244 return true; 5245 } 5246 mas->index = 1; 5247 mas->last = ULONG_MAX; 5248 mas->status = ma_none; 5249 return true; 5250 } 5251 5252 if (mas_is_none(mas)) 5253 return true; 5254 5255 return false; 5256 } 5257 5258 /** 5259 * mas_next() - Get the next entry. 5260 * @mas: The maple state 5261 * @max: The maximum index to check. 5262 * 5263 * Returns the next entry after @mas->index. 5264 * Must hold rcu_read_lock or the write lock. 5265 * Can return the zero entry. 5266 * 5267 * Return: The next entry or %NULL 5268 */ 5269 void *mas_next(struct ma_state *mas, unsigned long max) 5270 { 5271 void *entry = NULL; 5272 5273 if (mas_next_setup(mas, max, &entry)) 5274 return entry; 5275 5276 /* Retries on dead nodes handled by mas_next_slot */ 5277 return mas_next_slot(mas, max, false); 5278 } 5279 EXPORT_SYMBOL_GPL(mas_next); 5280 5281 /** 5282 * mas_next_range() - Advance the maple state to the next range 5283 * @mas: The maple state 5284 * @max: The maximum index to check. 5285 * 5286 * Sets @mas->index and @mas->last to the range. 5287 * Must hold rcu_read_lock or the write lock. 5288 * Can return the zero entry. 5289 * 5290 * Return: The next entry or %NULL 5291 */ 5292 void *mas_next_range(struct ma_state *mas, unsigned long max) 5293 { 5294 void *entry = NULL; 5295 5296 if (mas_next_setup(mas, max, &entry)) 5297 return entry; 5298 5299 /* Retries on dead nodes handled by mas_next_slot */ 5300 return mas_next_slot(mas, max, true); 5301 } 5302 EXPORT_SYMBOL_GPL(mas_next_range); 5303 5304 /** 5305 * mt_next() - get the next value in the maple tree 5306 * @mt: The maple tree 5307 * @index: The start index 5308 * @max: The maximum index to check 5309 * 5310 * Takes RCU read lock internally to protect the search, which does not 5311 * protect the returned pointer after dropping RCU read lock. 5312 * See also: Documentation/core-api/maple_tree.rst 5313 * 5314 * Return: The entry higher than @index or %NULL if nothing is found. 5315 */ 5316 void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max) 5317 { 5318 void *entry = NULL; 5319 MA_STATE(mas, mt, index, index); 5320 5321 rcu_read_lock(); 5322 entry = mas_next(&mas, max); 5323 rcu_read_unlock(); 5324 return entry; 5325 } 5326 EXPORT_SYMBOL_GPL(mt_next); 5327 5328 static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry) 5329 { 5330 if (unlikely(mas->index <= min)) { 5331 mas->status = ma_underflow; 5332 return true; 5333 } 5334 5335 switch (mas->status) { 5336 case ma_active: 5337 return false; 5338 case ma_start: 5339 break; 5340 case ma_none: 5341 fallthrough; 5342 case ma_pause: 5343 mas->status = ma_start; 5344 break; 5345 case ma_underflow: 5346 /* underflowed before but the min changed */ 5347 mas_may_activate(mas); 5348 break; 5349 case ma_overflow: 5350 /* User expects mas to be one after where it is */ 5351 mas_may_activate(mas); 5352 *entry = mas_walk(mas); 5353 if (*entry) 5354 return true; 5355 break; 5356 case ma_root: 5357 break; 5358 case ma_error: 5359 return true; 5360 } 5361 5362 if (mas_is_start(mas)) 5363 mas_walk(mas); 5364 5365 if (unlikely(mas_is_ptr(mas))) { 5366 if (!mas->index) { 5367 mas->status = ma_none; 5368 return true; 5369 } 5370 mas->index = mas->last = 0; 5371 *entry = mas_root(mas); 5372 return true; 5373 } 5374 5375 if (mas_is_none(mas)) { 5376 if (mas->index) { 5377 /* Walked to out-of-range pointer? */ 5378 mas->index = mas->last = 0; 5379 mas->status = ma_root; 5380 *entry = mas_root(mas); 5381 return true; 5382 } 5383 return true; 5384 } 5385 5386 return false; 5387 } 5388 5389 /** 5390 * mas_prev() - Get the previous entry 5391 * @mas: The maple state 5392 * @min: The minimum value to check. 5393 * 5394 * Must hold rcu_read_lock or the write lock. 5395 * Will reset mas to ma_start if the status is ma_none. Will stop on not 5396 * searchable nodes. 5397 * 5398 * Return: the previous value or %NULL. 5399 */ 5400 void *mas_prev(struct ma_state *mas, unsigned long min) 5401 { 5402 void *entry = NULL; 5403 5404 if (mas_prev_setup(mas, min, &entry)) 5405 return entry; 5406 5407 return mas_prev_slot(mas, min, false); 5408 } 5409 EXPORT_SYMBOL_GPL(mas_prev); 5410 5411 /** 5412 * mas_prev_range() - Advance to the previous range 5413 * @mas: The maple state 5414 * @min: The minimum value to check. 5415 * 5416 * Sets @mas->index and @mas->last to the range. 5417 * Must hold rcu_read_lock or the write lock. 5418 * Will reset mas to ma_start if the node is ma_none. Will stop on not 5419 * searchable nodes. 5420 * 5421 * Return: the previous value or %NULL. 5422 */ 5423 void *mas_prev_range(struct ma_state *mas, unsigned long min) 5424 { 5425 void *entry = NULL; 5426 5427 if (mas_prev_setup(mas, min, &entry)) 5428 return entry; 5429 5430 return mas_prev_slot(mas, min, true); 5431 } 5432 EXPORT_SYMBOL_GPL(mas_prev_range); 5433 5434 /** 5435 * mt_prev() - get the previous value in the maple tree 5436 * @mt: The maple tree 5437 * @index: The start index 5438 * @min: The minimum index to check 5439 * 5440 * Takes RCU read lock internally to protect the search, which does not 5441 * protect the returned pointer after dropping RCU read lock. 5442 * See also: Documentation/core-api/maple_tree.rst 5443 * 5444 * Return: The entry before @index or %NULL if nothing is found. 5445 */ 5446 void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min) 5447 { 5448 void *entry = NULL; 5449 MA_STATE(mas, mt, index, index); 5450 5451 rcu_read_lock(); 5452 entry = mas_prev(&mas, min); 5453 rcu_read_unlock(); 5454 return entry; 5455 } 5456 EXPORT_SYMBOL_GPL(mt_prev); 5457 5458 /** 5459 * mas_pause() - Pause a mas_find/mas_for_each to drop the lock. 5460 * @mas: The maple state to pause 5461 * 5462 * Some users need to pause a walk and drop the lock they're holding in 5463 * order to yield to a higher priority thread or carry out an operation 5464 * on an entry. Those users should call this function before they drop 5465 * the lock. It resets the @mas to be suitable for the next iteration 5466 * of the loop after the user has reacquired the lock. If most entries 5467 * found during a walk require you to call mas_pause(), the mt_for_each() 5468 * iterator may be more appropriate. 5469 * 5470 */ 5471 void mas_pause(struct ma_state *mas) 5472 { 5473 mas->status = ma_pause; 5474 mas->node = NULL; 5475 } 5476 EXPORT_SYMBOL_GPL(mas_pause); 5477 5478 /** 5479 * mas_find_setup() - Internal function to set up mas_find*(). 5480 * @mas: The maple state 5481 * @max: The maximum index 5482 * @entry: Pointer to the entry 5483 * 5484 * Returns: True if entry is the answer, false otherwise. 5485 */ 5486 static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long max, void **entry) 5487 { 5488 switch (mas->status) { 5489 case ma_active: 5490 if (mas->last < max) 5491 return false; 5492 return true; 5493 case ma_start: 5494 break; 5495 case ma_pause: 5496 if (unlikely(mas->last >= max)) 5497 return true; 5498 5499 mas->index = ++mas->last; 5500 mas->status = ma_start; 5501 break; 5502 case ma_none: 5503 if (unlikely(mas->last >= max)) 5504 return true; 5505 5506 mas->index = mas->last; 5507 mas->status = ma_start; 5508 break; 5509 case ma_underflow: 5510 /* mas is pointing at entry before unable to go lower */ 5511 if (unlikely(mas->index >= max)) { 5512 mas->status = ma_overflow; 5513 return true; 5514 } 5515 5516 mas_may_activate(mas); 5517 *entry = mas_walk(mas); 5518 if (*entry) 5519 return true; 5520 break; 5521 case ma_overflow: 5522 if (unlikely(mas->last >= max)) 5523 return true; 5524 5525 mas_may_activate(mas); 5526 *entry = mas_walk(mas); 5527 if (*entry) 5528 return true; 5529 break; 5530 case ma_root: 5531 break; 5532 case ma_error: 5533 return true; 5534 } 5535 5536 if (mas_is_start(mas)) { 5537 /* First run or continue */ 5538 if (mas->index > max) 5539 return true; 5540 5541 *entry = mas_walk(mas); 5542 if (*entry) 5543 return true; 5544 5545 } 5546 5547 if (unlikely(mas_is_ptr(mas))) 5548 goto ptr_out_of_range; 5549 5550 if (unlikely(mas_is_none(mas))) 5551 return true; 5552 5553 if (mas->index == max) 5554 return true; 5555 5556 return false; 5557 5558 ptr_out_of_range: 5559 mas->status = ma_none; 5560 mas->index = 1; 5561 mas->last = ULONG_MAX; 5562 return true; 5563 } 5564 5565 /** 5566 * mas_find() - On the first call, find the entry at or after mas->index up to 5567 * %max. Otherwise, find the entry after mas->index. 5568 * @mas: The maple state 5569 * @max: The maximum value to check. 5570 * 5571 * Must hold rcu_read_lock or the write lock. 5572 * If an entry exists, last and index are updated accordingly. 5573 * May set @mas->status to ma_overflow. 5574 * 5575 * Return: The entry or %NULL. 5576 */ 5577 void *mas_find(struct ma_state *mas, unsigned long max) 5578 { 5579 void *entry = NULL; 5580 5581 if (mas_find_setup(mas, max, &entry)) 5582 return entry; 5583 5584 /* Retries on dead nodes handled by mas_next_slot */ 5585 entry = mas_next_slot(mas, max, false); 5586 /* Ignore overflow */ 5587 mas->status = ma_active; 5588 return entry; 5589 } 5590 EXPORT_SYMBOL_GPL(mas_find); 5591 5592 /** 5593 * mas_find_range() - On the first call, find the entry at or after 5594 * mas->index up to %max. Otherwise, advance to the next slot mas->index. 5595 * @mas: The maple state 5596 * @max: The maximum value to check. 5597 * 5598 * Must hold rcu_read_lock or the write lock. 5599 * If an entry exists, last and index are updated accordingly. 5600 * May set @mas->status to ma_overflow. 5601 * 5602 * Return: The entry or %NULL. 5603 */ 5604 void *mas_find_range(struct ma_state *mas, unsigned long max) 5605 { 5606 void *entry = NULL; 5607 5608 if (mas_find_setup(mas, max, &entry)) 5609 return entry; 5610 5611 /* Retries on dead nodes handled by mas_next_slot */ 5612 return mas_next_slot(mas, max, true); 5613 } 5614 EXPORT_SYMBOL_GPL(mas_find_range); 5615 5616 /** 5617 * mas_find_rev_setup() - Internal function to set up mas_find_*_rev() 5618 * @mas: The maple state 5619 * @min: The minimum index 5620 * @entry: Pointer to the entry 5621 * 5622 * Returns: True if entry is the answer, false otherwise. 5623 */ 5624 static bool mas_find_rev_setup(struct ma_state *mas, unsigned long min, 5625 void **entry) 5626 { 5627 5628 switch (mas->status) { 5629 case ma_active: 5630 goto active; 5631 case ma_start: 5632 break; 5633 case ma_pause: 5634 if (unlikely(mas->index <= min)) { 5635 mas->status = ma_underflow; 5636 return true; 5637 } 5638 mas->last = --mas->index; 5639 mas->status = ma_start; 5640 break; 5641 case ma_none: 5642 if (mas->index <= min) 5643 goto none; 5644 5645 mas->last = mas->index; 5646 mas->status = ma_start; 5647 break; 5648 case ma_overflow: /* user expects the mas to be one after where it is */ 5649 if (unlikely(mas->index <= min)) { 5650 mas->status = ma_underflow; 5651 return true; 5652 } 5653 5654 mas->status = ma_active; 5655 break; 5656 case ma_underflow: /* user expects the mas to be one before where it is */ 5657 if (unlikely(mas->index <= min)) 5658 return true; 5659 5660 mas->status = ma_active; 5661 break; 5662 case ma_root: 5663 break; 5664 case ma_error: 5665 return true; 5666 } 5667 5668 if (mas_is_start(mas)) { 5669 /* First run or continue */ 5670 if (mas->index < min) 5671 return true; 5672 5673 *entry = mas_walk(mas); 5674 if (*entry) 5675 return true; 5676 } 5677 5678 if (unlikely(mas_is_ptr(mas))) 5679 goto none; 5680 5681 if (unlikely(mas_is_none(mas))) { 5682 /* 5683 * Walked to the location, and there was nothing so the previous 5684 * location is 0. 5685 */ 5686 mas->last = mas->index = 0; 5687 mas->status = ma_root; 5688 *entry = mas_root(mas); 5689 return true; 5690 } 5691 5692 active: 5693 if (mas->index < min) 5694 return true; 5695 5696 return false; 5697 5698 none: 5699 mas->status = ma_none; 5700 return true; 5701 } 5702 5703 /** 5704 * mas_find_rev: On the first call, find the first non-null entry at or below 5705 * mas->index down to %min. Otherwise find the first non-null entry below 5706 * mas->index down to %min. 5707 * @mas: The maple state 5708 * @min: The minimum value to check. 5709 * 5710 * Must hold rcu_read_lock or the write lock. 5711 * If an entry exists, last and index are updated accordingly. 5712 * May set @mas->status to ma_underflow. 5713 * 5714 * Return: The entry or %NULL. 5715 */ 5716 void *mas_find_rev(struct ma_state *mas, unsigned long min) 5717 { 5718 void *entry = NULL; 5719 5720 if (mas_find_rev_setup(mas, min, &entry)) 5721 return entry; 5722 5723 /* Retries on dead nodes handled by mas_prev_slot */ 5724 return mas_prev_slot(mas, min, false); 5725 5726 } 5727 EXPORT_SYMBOL_GPL(mas_find_rev); 5728 5729 /** 5730 * mas_find_range_rev: On the first call, find the first non-null entry at or 5731 * below mas->index down to %min. Otherwise advance to the previous slot after 5732 * mas->index down to %min. 5733 * @mas: The maple state 5734 * @min: The minimum value to check. 5735 * 5736 * Must hold rcu_read_lock or the write lock. 5737 * If an entry exists, last and index are updated accordingly. 5738 * May set @mas->status to ma_underflow. 5739 * 5740 * Return: The entry or %NULL. 5741 */ 5742 void *mas_find_range_rev(struct ma_state *mas, unsigned long min) 5743 { 5744 void *entry = NULL; 5745 5746 if (mas_find_rev_setup(mas, min, &entry)) 5747 return entry; 5748 5749 /* Retries on dead nodes handled by mas_prev_slot */ 5750 return mas_prev_slot(mas, min, true); 5751 } 5752 EXPORT_SYMBOL_GPL(mas_find_range_rev); 5753 5754 /** 5755 * mas_erase() - Find the range in which index resides and erase the entire 5756 * range. 5757 * @mas: The maple state 5758 * 5759 * Must hold the write lock. 5760 * Searches for @mas->index, sets @mas->index and @mas->last to the range and 5761 * erases that range. 5762 * 5763 * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated. 5764 */ 5765 void *mas_erase(struct ma_state *mas) 5766 { 5767 void *entry; 5768 unsigned long index = mas->index; 5769 MA_WR_STATE(wr_mas, mas, NULL); 5770 5771 if (!mas_is_active(mas) || !mas_is_start(mas)) 5772 mas->status = ma_start; 5773 5774 write_retry: 5775 entry = mas_state_walk(mas); 5776 if (!entry) 5777 return NULL; 5778 5779 /* Must reset to ensure spanning writes of last slot are detected */ 5780 mas_reset(mas); 5781 mas_wr_preallocate(&wr_mas, NULL); 5782 if (mas_nomem(mas, GFP_KERNEL)) { 5783 /* in case the range of entry changed when unlocked */ 5784 mas->index = mas->last = index; 5785 goto write_retry; 5786 } 5787 5788 if (mas_is_err(mas)) 5789 goto out; 5790 5791 mas_wr_store_entry(&wr_mas); 5792 out: 5793 mas_destroy(mas); 5794 return entry; 5795 } 5796 EXPORT_SYMBOL_GPL(mas_erase); 5797 5798 /** 5799 * mas_nomem() - Check if there was an error allocating and do the allocation 5800 * if necessary If there are allocations, then free them. 5801 * @mas: The maple state 5802 * @gfp: The GFP_FLAGS to use for allocations 5803 * Return: true on allocation, false otherwise. 5804 */ 5805 bool mas_nomem(struct ma_state *mas, gfp_t gfp) 5806 __must_hold(mas->tree->ma_lock) 5807 { 5808 if (likely(mas->node != MA_ERROR(-ENOMEM))) 5809 return false; 5810 5811 if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) { 5812 mtree_unlock(mas->tree); 5813 mas_alloc_nodes(mas, gfp); 5814 mtree_lock(mas->tree); 5815 } else { 5816 mas_alloc_nodes(mas, gfp); 5817 } 5818 5819 if (!mas->sheaf) 5820 return false; 5821 5822 mas->status = ma_start; 5823 return true; 5824 } 5825 5826 void __init maple_tree_init(void) 5827 { 5828 struct kmem_cache_args args = { 5829 .align = sizeof(struct maple_node), 5830 .sheaf_capacity = 32, 5831 }; 5832 5833 maple_node_cache = kmem_cache_create("maple_node", 5834 sizeof(struct maple_node), &args, 5835 SLAB_PANIC); 5836 } 5837 5838 /** 5839 * mtree_load() - Load a value stored in a maple tree 5840 * @mt: The maple tree 5841 * @index: The index to load 5842 * 5843 * Return: the entry or %NULL 5844 */ 5845 void *mtree_load(struct maple_tree *mt, unsigned long index) 5846 { 5847 MA_STATE(mas, mt, index, index); 5848 void *entry; 5849 5850 trace_ma_read(__func__, &mas); 5851 rcu_read_lock(); 5852 retry: 5853 entry = mas_start(&mas); 5854 if (unlikely(mas_is_none(&mas))) 5855 goto unlock; 5856 5857 if (unlikely(mas_is_ptr(&mas))) { 5858 if (index) 5859 entry = NULL; 5860 5861 goto unlock; 5862 } 5863 5864 entry = mtree_lookup_walk(&mas); 5865 if (!entry && unlikely(mas_is_start(&mas))) 5866 goto retry; 5867 unlock: 5868 rcu_read_unlock(); 5869 if (xa_is_zero(entry)) 5870 return NULL; 5871 5872 return entry; 5873 } 5874 EXPORT_SYMBOL(mtree_load); 5875 5876 /** 5877 * mtree_store_range() - Store an entry at a given range. 5878 * @mt: The maple tree 5879 * @index: The start of the range 5880 * @last: The end of the range 5881 * @entry: The entry to store 5882 * @gfp: The GFP_FLAGS to use for allocations 5883 * 5884 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5885 * be allocated. 5886 */ 5887 int mtree_store_range(struct maple_tree *mt, unsigned long index, 5888 unsigned long last, void *entry, gfp_t gfp) 5889 { 5890 MA_STATE(mas, mt, index, last); 5891 int ret = 0; 5892 5893 trace_ma_write(__func__, &mas, 0, entry); 5894 if (WARN_ON_ONCE(xa_is_advanced(entry))) 5895 return -EINVAL; 5896 5897 if (index > last) 5898 return -EINVAL; 5899 5900 mtree_lock(mt); 5901 ret = mas_store_gfp(&mas, entry, gfp); 5902 mtree_unlock(mt); 5903 5904 return ret; 5905 } 5906 EXPORT_SYMBOL(mtree_store_range); 5907 5908 /** 5909 * mtree_store() - Store an entry at a given index. 5910 * @mt: The maple tree 5911 * @index: The index to store the value 5912 * @entry: The entry to store 5913 * @gfp: The GFP_FLAGS to use for allocations 5914 * 5915 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5916 * be allocated. 5917 */ 5918 int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, 5919 gfp_t gfp) 5920 { 5921 return mtree_store_range(mt, index, index, entry, gfp); 5922 } 5923 EXPORT_SYMBOL(mtree_store); 5924 5925 /** 5926 * mtree_insert_range() - Insert an entry at a given range if there is no value. 5927 * @mt: The maple tree 5928 * @first: The start of the range 5929 * @last: The end of the range 5930 * @entry: The entry to store 5931 * @gfp: The GFP_FLAGS to use for allocations. 5932 * 5933 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 5934 * request, -ENOMEM if memory could not be allocated. 5935 */ 5936 int mtree_insert_range(struct maple_tree *mt, unsigned long first, 5937 unsigned long last, void *entry, gfp_t gfp) 5938 { 5939 MA_STATE(ms, mt, first, last); 5940 int ret = 0; 5941 5942 if (WARN_ON_ONCE(xa_is_advanced(entry))) 5943 return -EINVAL; 5944 5945 if (first > last) 5946 return -EINVAL; 5947 5948 mtree_lock(mt); 5949 retry: 5950 mas_insert(&ms, entry); 5951 if (mas_nomem(&ms, gfp)) 5952 goto retry; 5953 5954 mtree_unlock(mt); 5955 if (mas_is_err(&ms)) 5956 ret = xa_err(ms.node); 5957 5958 mas_destroy(&ms); 5959 return ret; 5960 } 5961 EXPORT_SYMBOL(mtree_insert_range); 5962 5963 /** 5964 * mtree_insert() - Insert an entry at a given index if there is no value. 5965 * @mt: The maple tree 5966 * @index : The index to store the value 5967 * @entry: The entry to store 5968 * @gfp: The GFP_FLAGS to use for allocations. 5969 * 5970 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 5971 * request, -ENOMEM if memory could not be allocated. 5972 */ 5973 int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry, 5974 gfp_t gfp) 5975 { 5976 return mtree_insert_range(mt, index, index, entry, gfp); 5977 } 5978 EXPORT_SYMBOL(mtree_insert); 5979 5980 int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, 5981 void *entry, unsigned long size, unsigned long min, 5982 unsigned long max, gfp_t gfp) 5983 { 5984 int ret = 0; 5985 5986 MA_STATE(mas, mt, 0, 0); 5987 if (!mt_is_alloc(mt)) 5988 return -EINVAL; 5989 5990 if (WARN_ON_ONCE(mt_is_reserved(entry))) 5991 return -EINVAL; 5992 5993 mtree_lock(mt); 5994 retry: 5995 ret = mas_empty_area(&mas, min, max, size); 5996 if (ret) 5997 goto unlock; 5998 5999 mas_insert(&mas, entry); 6000 /* 6001 * mas_nomem() may release the lock, causing the allocated area 6002 * to be unavailable, so try to allocate a free area again. 6003 */ 6004 if (mas_nomem(&mas, gfp)) 6005 goto retry; 6006 6007 if (mas_is_err(&mas)) 6008 ret = xa_err(mas.node); 6009 else 6010 *startp = mas.index; 6011 6012 unlock: 6013 mtree_unlock(mt); 6014 mas_destroy(&mas); 6015 return ret; 6016 } 6017 EXPORT_SYMBOL(mtree_alloc_range); 6018 6019 /** 6020 * mtree_alloc_cyclic() - Find somewhere to store this entry in the tree. 6021 * @mt: The maple tree. 6022 * @startp: Pointer to ID. 6023 * @range_lo: Lower bound of range to search. 6024 * @range_hi: Upper bound of range to search. 6025 * @entry: The entry to store. 6026 * @next: Pointer to next ID to allocate. 6027 * @gfp: The GFP_FLAGS to use for allocations. 6028 * 6029 * Finds an empty entry in @mt after @next, stores the new index into 6030 * the @id pointer, stores the entry at that index, then updates @next. 6031 * 6032 * @mt must be initialized with the MT_FLAGS_ALLOC_RANGE flag. 6033 * 6034 * Context: Any context. Takes and releases the mt.lock. May sleep if 6035 * the @gfp flags permit. 6036 * 6037 * Return: 0 if the allocation succeeded without wrapping, 1 if the 6038 * allocation succeeded after wrapping, -ENOMEM if memory could not be 6039 * allocated, -EINVAL if @mt cannot be used, or -EBUSY if there are no 6040 * free entries. 6041 */ 6042 int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp, 6043 void *entry, unsigned long range_lo, unsigned long range_hi, 6044 unsigned long *next, gfp_t gfp) 6045 { 6046 int ret; 6047 6048 MA_STATE(mas, mt, 0, 0); 6049 6050 if (!mt_is_alloc(mt)) 6051 return -EINVAL; 6052 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6053 return -EINVAL; 6054 mtree_lock(mt); 6055 ret = mas_alloc_cyclic(&mas, startp, entry, range_lo, range_hi, 6056 next, gfp); 6057 mtree_unlock(mt); 6058 return ret; 6059 } 6060 EXPORT_SYMBOL(mtree_alloc_cyclic); 6061 6062 int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, 6063 void *entry, unsigned long size, unsigned long min, 6064 unsigned long max, gfp_t gfp) 6065 { 6066 int ret = 0; 6067 6068 MA_STATE(mas, mt, 0, 0); 6069 if (!mt_is_alloc(mt)) 6070 return -EINVAL; 6071 6072 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6073 return -EINVAL; 6074 6075 mtree_lock(mt); 6076 retry: 6077 ret = mas_empty_area_rev(&mas, min, max, size); 6078 if (ret) 6079 goto unlock; 6080 6081 mas_insert(&mas, entry); 6082 /* 6083 * mas_nomem() may release the lock, causing the allocated area 6084 * to be unavailable, so try to allocate a free area again. 6085 */ 6086 if (mas_nomem(&mas, gfp)) 6087 goto retry; 6088 6089 if (mas_is_err(&mas)) 6090 ret = xa_err(mas.node); 6091 else 6092 *startp = mas.index; 6093 6094 unlock: 6095 mtree_unlock(mt); 6096 mas_destroy(&mas); 6097 return ret; 6098 } 6099 EXPORT_SYMBOL(mtree_alloc_rrange); 6100 6101 /** 6102 * mtree_erase() - Find an index and erase the entire range. 6103 * @mt: The maple tree 6104 * @index: The index to erase 6105 * 6106 * Erasing is the same as a walk to an entry then a store of a NULL to that 6107 * ENTIRE range. In fact, it is implemented as such using the advanced API. 6108 * 6109 * Return: The entry stored at the @index or %NULL 6110 */ 6111 void *mtree_erase(struct maple_tree *mt, unsigned long index) 6112 { 6113 void *entry = NULL; 6114 6115 MA_STATE(mas, mt, index, index); 6116 trace_ma_op(__func__, &mas); 6117 6118 mtree_lock(mt); 6119 entry = mas_erase(&mas); 6120 mtree_unlock(mt); 6121 6122 return entry; 6123 } 6124 EXPORT_SYMBOL(mtree_erase); 6125 6126 /* 6127 * mas_dup_free() - Free an incomplete duplication of a tree. 6128 * @mas: The maple state of a incomplete tree. 6129 * 6130 * The parameter @mas->node passed in indicates that the allocation failed on 6131 * this node. This function frees all nodes starting from @mas->node in the 6132 * reverse order of mas_dup_build(). There is no need to hold the source tree 6133 * lock at this time. 6134 */ 6135 static void mas_dup_free(struct ma_state *mas) 6136 { 6137 struct maple_node *node; 6138 enum maple_type type; 6139 void __rcu **slots; 6140 unsigned char count, i; 6141 6142 /* Maybe the first node allocation failed. */ 6143 if (mas_is_none(mas)) 6144 return; 6145 6146 while (!mte_is_root(mas->node)) { 6147 mas_ascend(mas); 6148 if (mas->offset) { 6149 mas->offset--; 6150 do { 6151 mas_descend(mas); 6152 mas->offset = mas_data_end(mas); 6153 } while (!mte_is_leaf(mas->node)); 6154 6155 mas_ascend(mas); 6156 } 6157 6158 node = mte_to_node(mas->node); 6159 type = mte_node_type(mas->node); 6160 slots = ma_slots(node, type); 6161 count = mas_data_end(mas) + 1; 6162 for (i = 0; i < count; i++) 6163 ((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK; 6164 mt_free_bulk(count, slots); 6165 } 6166 6167 node = mte_to_node(mas->node); 6168 kfree(node); 6169 } 6170 6171 /* 6172 * mas_copy_node() - Copy a maple node and replace the parent. 6173 * @mas: The maple state of source tree. 6174 * @new_mas: The maple state of new tree. 6175 * @parent: The parent of the new node. 6176 * 6177 * Copy @mas->node to @new_mas->node, set @parent to be the parent of 6178 * @new_mas->node. If memory allocation fails, @mas is set to -ENOMEM. 6179 */ 6180 static inline void mas_copy_node(struct ma_state *mas, struct ma_state *new_mas, 6181 struct maple_pnode *parent) 6182 { 6183 struct maple_node *node = mte_to_node(mas->node); 6184 struct maple_node *new_node = mte_to_node(new_mas->node); 6185 unsigned long val; 6186 6187 /* Copy the node completely. */ 6188 memcpy(new_node, node, sizeof(struct maple_node)); 6189 /* Update the parent node pointer. */ 6190 val = (unsigned long)node->parent & MAPLE_NODE_MASK; 6191 new_node->parent = ma_parent_ptr(val | (unsigned long)parent); 6192 } 6193 6194 /* 6195 * mas_dup_alloc() - Allocate child nodes for a maple node. 6196 * @mas: The maple state of source tree. 6197 * @new_mas: The maple state of new tree. 6198 * @gfp: The GFP_FLAGS to use for allocations. 6199 * 6200 * This function allocates child nodes for @new_mas->node during the duplication 6201 * process. If memory allocation fails, @mas is set to -ENOMEM. 6202 */ 6203 static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas, 6204 gfp_t gfp) 6205 { 6206 struct maple_node *node = mte_to_node(mas->node); 6207 struct maple_node *new_node = mte_to_node(new_mas->node); 6208 enum maple_type type; 6209 unsigned char request, count, i; 6210 void __rcu **slots; 6211 void __rcu **new_slots; 6212 unsigned long val; 6213 6214 /* Allocate memory for child nodes. */ 6215 type = mte_node_type(mas->node); 6216 new_slots = ma_slots(new_node, type); 6217 request = mas_data_end(mas) + 1; 6218 count = mt_alloc_bulk(gfp, request, (void **)new_slots); 6219 if (unlikely(count < request)) { 6220 memset(new_slots, 0, request * sizeof(void *)); 6221 mas_set_err(mas, -ENOMEM); 6222 return; 6223 } 6224 6225 /* Restore node type information in slots. */ 6226 slots = ma_slots(node, type); 6227 for (i = 0; i < count; i++) { 6228 val = (unsigned long)mt_slot_locked(mas->tree, slots, i); 6229 val &= MAPLE_NODE_MASK; 6230 ((unsigned long *)new_slots)[i] |= val; 6231 } 6232 } 6233 6234 /* 6235 * mas_dup_build() - Build a new maple tree from a source tree 6236 * @mas: The maple state of source tree, need to be in MAS_START state. 6237 * @new_mas: The maple state of new tree, need to be in MAS_START state. 6238 * @gfp: The GFP_FLAGS to use for allocations. 6239 * 6240 * This function builds a new tree in DFS preorder. If the memory allocation 6241 * fails, the error code -ENOMEM will be set in @mas, and @new_mas points to the 6242 * last node. mas_dup_free() will free the incomplete duplication of a tree. 6243 * 6244 * Note that the attributes of the two trees need to be exactly the same, and the 6245 * new tree needs to be empty, otherwise -EINVAL will be set in @mas. 6246 */ 6247 static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas, 6248 gfp_t gfp) 6249 { 6250 struct maple_node *node; 6251 struct maple_pnode *parent = NULL; 6252 struct maple_enode *root; 6253 enum maple_type type; 6254 6255 if (unlikely(mt_attr(mas->tree) != mt_attr(new_mas->tree)) || 6256 unlikely(!mtree_empty(new_mas->tree))) { 6257 mas_set_err(mas, -EINVAL); 6258 return; 6259 } 6260 6261 root = mas_start(mas); 6262 if (mas_is_ptr(mas) || mas_is_none(mas)) 6263 goto set_new_tree; 6264 6265 node = mt_alloc_one(gfp); 6266 if (!node) { 6267 new_mas->status = ma_none; 6268 mas_set_err(mas, -ENOMEM); 6269 return; 6270 } 6271 6272 type = mte_node_type(mas->node); 6273 root = mt_mk_node(node, type); 6274 new_mas->node = root; 6275 new_mas->min = 0; 6276 new_mas->max = ULONG_MAX; 6277 root = mte_mk_root(root); 6278 while (1) { 6279 mas_copy_node(mas, new_mas, parent); 6280 if (!mte_is_leaf(mas->node)) { 6281 /* Only allocate child nodes for non-leaf nodes. */ 6282 mas_dup_alloc(mas, new_mas, gfp); 6283 if (unlikely(mas_is_err(mas))) 6284 return; 6285 } else { 6286 /* 6287 * This is the last leaf node and duplication is 6288 * completed. 6289 */ 6290 if (mas->max == ULONG_MAX) 6291 goto done; 6292 6293 /* This is not the last leaf node and needs to go up. */ 6294 do { 6295 mas_ascend(mas); 6296 mas_ascend(new_mas); 6297 } while (mas->offset == mas_data_end(mas)); 6298 6299 /* Move to the next subtree. */ 6300 mas->offset++; 6301 new_mas->offset++; 6302 } 6303 6304 mas_descend(mas); 6305 parent = ma_parent_ptr(mte_to_node(new_mas->node)); 6306 mas_descend(new_mas); 6307 mas->offset = 0; 6308 new_mas->offset = 0; 6309 } 6310 done: 6311 /* Specially handle the parent of the root node. */ 6312 mte_to_node(root)->parent = ma_parent_ptr(mas_tree_parent(new_mas)); 6313 set_new_tree: 6314 /* Make them the same height */ 6315 new_mas->tree->ma_flags = mas->tree->ma_flags; 6316 rcu_assign_pointer(new_mas->tree->ma_root, root); 6317 } 6318 6319 /** 6320 * __mt_dup(): Duplicate an entire maple tree 6321 * @mt: The source maple tree 6322 * @new: The new maple tree 6323 * @gfp: The GFP_FLAGS to use for allocations 6324 * 6325 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order 6326 * traversal. It uses memcpy() to copy nodes in the source tree and allocate 6327 * new child nodes in non-leaf nodes. The new node is exactly the same as the 6328 * source node except for all the addresses stored in it. It will be faster than 6329 * traversing all elements in the source tree and inserting them one by one into 6330 * the new tree. 6331 * The user needs to ensure that the attributes of the source tree and the new 6332 * tree are the same, and the new tree needs to be an empty tree, otherwise 6333 * -EINVAL will be returned. 6334 * Note that the user needs to manually lock the source tree and the new tree. 6335 * 6336 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If 6337 * the attributes of the two trees are different or the new tree is not an empty 6338 * tree. 6339 */ 6340 int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp) 6341 { 6342 int ret = 0; 6343 MA_STATE(mas, mt, 0, 0); 6344 MA_STATE(new_mas, new, 0, 0); 6345 6346 mas_dup_build(&mas, &new_mas, gfp); 6347 if (unlikely(mas_is_err(&mas))) { 6348 ret = xa_err(mas.node); 6349 if (ret == -ENOMEM) 6350 mas_dup_free(&new_mas); 6351 } 6352 6353 return ret; 6354 } 6355 EXPORT_SYMBOL(__mt_dup); 6356 6357 /** 6358 * mtree_dup(): Duplicate an entire maple tree 6359 * @mt: The source maple tree 6360 * @new: The new maple tree 6361 * @gfp: The GFP_FLAGS to use for allocations 6362 * 6363 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order 6364 * traversal. It uses memcpy() to copy nodes in the source tree and allocate 6365 * new child nodes in non-leaf nodes. The new node is exactly the same as the 6366 * source node except for all the addresses stored in it. It will be faster than 6367 * traversing all elements in the source tree and inserting them one by one into 6368 * the new tree. 6369 * The user needs to ensure that the attributes of the source tree and the new 6370 * tree are the same, and the new tree needs to be an empty tree, otherwise 6371 * -EINVAL will be returned. 6372 * 6373 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If 6374 * the attributes of the two trees are different or the new tree is not an empty 6375 * tree. 6376 */ 6377 int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp) 6378 { 6379 int ret = 0; 6380 MA_STATE(mas, mt, 0, 0); 6381 MA_STATE(new_mas, new, 0, 0); 6382 6383 mas_lock(&new_mas); 6384 mas_lock_nested(&mas, SINGLE_DEPTH_NESTING); 6385 mas_dup_build(&mas, &new_mas, gfp); 6386 mas_unlock(&mas); 6387 if (unlikely(mas_is_err(&mas))) { 6388 ret = xa_err(mas.node); 6389 if (ret == -ENOMEM) 6390 mas_dup_free(&new_mas); 6391 } 6392 6393 mas_unlock(&new_mas); 6394 return ret; 6395 } 6396 EXPORT_SYMBOL(mtree_dup); 6397 6398 /** 6399 * __mt_destroy() - Walk and free all nodes of a locked maple tree. 6400 * @mt: The maple tree 6401 * 6402 * Note: Does not handle locking. 6403 */ 6404 void __mt_destroy(struct maple_tree *mt) 6405 { 6406 void *root = mt_root_locked(mt); 6407 6408 rcu_assign_pointer(mt->ma_root, NULL); 6409 if (xa_is_node(root)) 6410 mte_destroy_walk(root, mt); 6411 6412 mt->ma_flags = mt_attr(mt); 6413 } 6414 EXPORT_SYMBOL_GPL(__mt_destroy); 6415 6416 /** 6417 * mtree_destroy() - Destroy a maple tree 6418 * @mt: The maple tree 6419 * 6420 * Frees all resources used by the tree. Handles locking. 6421 */ 6422 void mtree_destroy(struct maple_tree *mt) 6423 { 6424 mtree_lock(mt); 6425 __mt_destroy(mt); 6426 mtree_unlock(mt); 6427 } 6428 EXPORT_SYMBOL(mtree_destroy); 6429 6430 /** 6431 * mt_find() - Search from the start up until an entry is found. 6432 * @mt: The maple tree 6433 * @index: Pointer which contains the start location of the search 6434 * @max: The maximum value of the search range 6435 * 6436 * Takes RCU read lock internally to protect the search, which does not 6437 * protect the returned pointer after dropping RCU read lock. 6438 * See also: Documentation/core-api/maple_tree.rst 6439 * 6440 * In case that an entry is found @index is updated to point to the next 6441 * possible entry independent whether the found entry is occupying a 6442 * single index or a range if indices. 6443 * 6444 * Return: The entry at or after the @index or %NULL 6445 */ 6446 void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max) 6447 { 6448 MA_STATE(mas, mt, *index, *index); 6449 void *entry; 6450 #ifdef CONFIG_DEBUG_MAPLE_TREE 6451 unsigned long copy = *index; 6452 #endif 6453 6454 trace_ma_read(__func__, &mas); 6455 6456 if ((*index) > max) 6457 return NULL; 6458 6459 rcu_read_lock(); 6460 retry: 6461 entry = mas_state_walk(&mas); 6462 if (mas_is_start(&mas)) 6463 goto retry; 6464 6465 if (unlikely(xa_is_zero(entry))) 6466 entry = NULL; 6467 6468 if (entry) 6469 goto unlock; 6470 6471 while (mas_is_active(&mas) && (mas.last < max)) { 6472 entry = mas_next_slot(&mas, max, false); 6473 if (likely(entry && !xa_is_zero(entry))) 6474 break; 6475 } 6476 6477 if (unlikely(xa_is_zero(entry))) 6478 entry = NULL; 6479 unlock: 6480 rcu_read_unlock(); 6481 if (likely(entry)) { 6482 *index = mas.last + 1; 6483 #ifdef CONFIG_DEBUG_MAPLE_TREE 6484 if (MT_WARN_ON(mt, (*index) && ((*index) <= copy))) 6485 pr_err("index not increased! %lx <= %lx\n", 6486 *index, copy); 6487 #endif 6488 } 6489 6490 return entry; 6491 } 6492 EXPORT_SYMBOL(mt_find); 6493 6494 /** 6495 * mt_find_after() - Search from the start up until an entry is found. 6496 * @mt: The maple tree 6497 * @index: Pointer which contains the start location of the search 6498 * @max: The maximum value to check 6499 * 6500 * Same as mt_find() except that it checks @index for 0 before 6501 * searching. If @index == 0, the search is aborted. This covers a wrap 6502 * around of @index to 0 in an iterator loop. 6503 * 6504 * Return: The entry at or after the @index or %NULL 6505 */ 6506 void *mt_find_after(struct maple_tree *mt, unsigned long *index, 6507 unsigned long max) 6508 { 6509 if (!(*index)) 6510 return NULL; 6511 6512 return mt_find(mt, index, max); 6513 } 6514 EXPORT_SYMBOL(mt_find_after); 6515 6516 #ifdef CONFIG_DEBUG_MAPLE_TREE 6517 atomic_t maple_tree_tests_run; 6518 EXPORT_SYMBOL_GPL(maple_tree_tests_run); 6519 atomic_t maple_tree_tests_passed; 6520 EXPORT_SYMBOL_GPL(maple_tree_tests_passed); 6521 6522 #ifndef __KERNEL__ 6523 extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int); 6524 void mt_set_non_kernel(unsigned int val) 6525 { 6526 kmem_cache_set_non_kernel(maple_node_cache, val); 6527 } 6528 6529 extern void kmem_cache_set_callback(struct kmem_cache *cachep, 6530 void (*callback)(void *)); 6531 void mt_set_callback(void (*callback)(void *)) 6532 { 6533 kmem_cache_set_callback(maple_node_cache, callback); 6534 } 6535 6536 extern void kmem_cache_set_private(struct kmem_cache *cachep, void *private); 6537 void mt_set_private(void *private) 6538 { 6539 kmem_cache_set_private(maple_node_cache, private); 6540 } 6541 6542 extern unsigned long kmem_cache_get_alloc(struct kmem_cache *); 6543 unsigned long mt_get_alloc_size(void) 6544 { 6545 return kmem_cache_get_alloc(maple_node_cache); 6546 } 6547 6548 extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *); 6549 void mt_zero_nr_tallocated(void) 6550 { 6551 kmem_cache_zero_nr_tallocated(maple_node_cache); 6552 } 6553 6554 extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *); 6555 unsigned int mt_nr_tallocated(void) 6556 { 6557 return kmem_cache_nr_tallocated(maple_node_cache); 6558 } 6559 6560 extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *); 6561 unsigned int mt_nr_allocated(void) 6562 { 6563 return kmem_cache_nr_allocated(maple_node_cache); 6564 } 6565 6566 void mt_cache_shrink(void) 6567 { 6568 } 6569 #else 6570 /* 6571 * mt_cache_shrink() - For testing, don't use this. 6572 * 6573 * Certain testcases can trigger an OOM when combined with other memory 6574 * debugging configuration options. This function is used to reduce the 6575 * possibility of an out of memory even due to kmem_cache objects remaining 6576 * around for longer than usual. 6577 */ 6578 void mt_cache_shrink(void) 6579 { 6580 kmem_cache_shrink(maple_node_cache); 6581 6582 } 6583 EXPORT_SYMBOL_GPL(mt_cache_shrink); 6584 6585 #endif /* not defined __KERNEL__ */ 6586 /* 6587 * mas_get_slot() - Get the entry in the maple state node stored at @offset. 6588 * @mas: The maple state 6589 * @offset: The offset into the slot array to fetch. 6590 * 6591 * Return: The entry stored at @offset. 6592 */ 6593 static inline struct maple_enode *mas_get_slot(struct ma_state *mas, 6594 unsigned char offset) 6595 { 6596 return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)), 6597 offset); 6598 } 6599 6600 /* Depth first search, post-order */ 6601 static void mas_dfs_postorder(struct ma_state *mas, unsigned long max) 6602 { 6603 6604 struct maple_enode *p, *mn = mas->node; 6605 unsigned long p_min, p_max; 6606 6607 mas_next_node(mas, mas_mn(mas), max); 6608 if (!mas_is_overflow(mas)) 6609 return; 6610 6611 if (mte_is_root(mn)) 6612 return; 6613 6614 mas->node = mn; 6615 mas_ascend(mas); 6616 do { 6617 p = mas->node; 6618 p_min = mas->min; 6619 p_max = mas->max; 6620 mas_prev_node(mas, 0); 6621 } while (!mas_is_underflow(mas)); 6622 6623 mas->node = p; 6624 mas->max = p_max; 6625 mas->min = p_min; 6626 } 6627 6628 /* Tree validations */ 6629 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6630 unsigned long min, unsigned long max, unsigned int depth, 6631 enum mt_dump_format format); 6632 static void mt_dump_range(unsigned long min, unsigned long max, 6633 unsigned int depth, enum mt_dump_format format) 6634 { 6635 static const char spaces[] = " "; 6636 6637 switch(format) { 6638 case mt_dump_hex: 6639 if (min == max) 6640 pr_info("%.*s%lx: ", depth * 2, spaces, min); 6641 else 6642 pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max); 6643 break; 6644 case mt_dump_dec: 6645 if (min == max) 6646 pr_info("%.*s%lu: ", depth * 2, spaces, min); 6647 else 6648 pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max); 6649 } 6650 } 6651 6652 static void mt_dump_entry(void *entry, unsigned long min, unsigned long max, 6653 unsigned int depth, enum mt_dump_format format) 6654 { 6655 mt_dump_range(min, max, depth, format); 6656 6657 if (xa_is_value(entry)) 6658 pr_cont("value %ld (0x%lx) [" PTR_FMT "]\n", xa_to_value(entry), 6659 xa_to_value(entry), entry); 6660 else if (xa_is_zero(entry)) 6661 pr_cont("zero (%ld)\n", xa_to_internal(entry)); 6662 else if (mt_is_reserved(entry)) 6663 pr_cont("UNKNOWN ENTRY (" PTR_FMT ")\n", entry); 6664 else 6665 pr_cont(PTR_FMT "\n", entry); 6666 } 6667 6668 static void mt_dump_range64(const struct maple_tree *mt, void *entry, 6669 unsigned long min, unsigned long max, unsigned int depth, 6670 enum mt_dump_format format) 6671 { 6672 struct maple_range_64 *node = &mte_to_node(entry)->mr64; 6673 bool leaf = mte_is_leaf(entry); 6674 unsigned long first = min; 6675 int i; 6676 6677 pr_cont(" contents: "); 6678 for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) { 6679 switch(format) { 6680 case mt_dump_hex: 6681 pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]); 6682 break; 6683 case mt_dump_dec: 6684 pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]); 6685 } 6686 } 6687 pr_cont(PTR_FMT "\n", node->slot[i]); 6688 for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) { 6689 unsigned long last = max; 6690 6691 if (i < (MAPLE_RANGE64_SLOTS - 1)) 6692 last = node->pivot[i]; 6693 else if (!node->slot[i] && max != mt_node_max(entry)) 6694 break; 6695 if (last == 0 && i > 0) 6696 break; 6697 if (leaf) 6698 mt_dump_entry(mt_slot(mt, node->slot, i), 6699 first, last, depth + 1, format); 6700 else if (node->slot[i]) 6701 mt_dump_node(mt, mt_slot(mt, node->slot, i), 6702 first, last, depth + 1, format); 6703 6704 if (last == max) 6705 break; 6706 if (last > max) { 6707 switch(format) { 6708 case mt_dump_hex: 6709 pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n", 6710 node, last, max, i); 6711 break; 6712 case mt_dump_dec: 6713 pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", 6714 node, last, max, i); 6715 } 6716 } 6717 first = last + 1; 6718 } 6719 } 6720 6721 static void mt_dump_arange64(const struct maple_tree *mt, void *entry, 6722 unsigned long min, unsigned long max, unsigned int depth, 6723 enum mt_dump_format format) 6724 { 6725 struct maple_arange_64 *node = &mte_to_node(entry)->ma64; 6726 unsigned long first = min; 6727 int i; 6728 6729 pr_cont(" contents: "); 6730 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 6731 switch (format) { 6732 case mt_dump_hex: 6733 pr_cont("%lx ", node->gap[i]); 6734 break; 6735 case mt_dump_dec: 6736 pr_cont("%lu ", node->gap[i]); 6737 } 6738 } 6739 pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap); 6740 for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) { 6741 switch (format) { 6742 case mt_dump_hex: 6743 pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]); 6744 break; 6745 case mt_dump_dec: 6746 pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]); 6747 } 6748 } 6749 pr_cont(PTR_FMT "\n", node->slot[i]); 6750 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 6751 unsigned long last = max; 6752 6753 if (i < (MAPLE_ARANGE64_SLOTS - 1)) 6754 last = node->pivot[i]; 6755 else if (!node->slot[i]) 6756 break; 6757 if (last == 0 && i > 0) 6758 break; 6759 if (node->slot[i]) 6760 mt_dump_node(mt, mt_slot(mt, node->slot, i), 6761 first, last, depth + 1, format); 6762 6763 if (last == max) 6764 break; 6765 if (last > max) { 6766 switch(format) { 6767 case mt_dump_hex: 6768 pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n", 6769 node, last, max, i); 6770 break; 6771 case mt_dump_dec: 6772 pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", 6773 node, last, max, i); 6774 } 6775 } 6776 first = last + 1; 6777 } 6778 } 6779 6780 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6781 unsigned long min, unsigned long max, unsigned int depth, 6782 enum mt_dump_format format) 6783 { 6784 struct maple_node *node = mte_to_node(entry); 6785 unsigned int type = mte_node_type(entry); 6786 unsigned int i; 6787 6788 mt_dump_range(min, max, depth, format); 6789 6790 pr_cont("node " PTR_FMT " depth %d type %d parent " PTR_FMT, node, 6791 depth, type, node ? node->parent : NULL); 6792 switch (type) { 6793 case maple_dense: 6794 pr_cont("\n"); 6795 for (i = 0; i < MAPLE_NODE_SLOTS; i++) { 6796 if (min + i > max) 6797 pr_cont("OUT OF RANGE: "); 6798 mt_dump_entry(mt_slot(mt, node->slot, i), 6799 min + i, min + i, depth, format); 6800 } 6801 break; 6802 case maple_leaf_64: 6803 case maple_range_64: 6804 mt_dump_range64(mt, entry, min, max, depth, format); 6805 break; 6806 case maple_arange_64: 6807 mt_dump_arange64(mt, entry, min, max, depth, format); 6808 break; 6809 6810 default: 6811 pr_cont(" UNKNOWN TYPE\n"); 6812 } 6813 } 6814 6815 void mt_dump(const struct maple_tree *mt, enum mt_dump_format format) 6816 { 6817 void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt)); 6818 6819 pr_info("maple_tree(" PTR_FMT ") flags %X, height %u root " PTR_FMT "\n", 6820 mt, mt->ma_flags, mt_height(mt), entry); 6821 if (xa_is_node(entry)) 6822 mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format); 6823 else if (entry) 6824 mt_dump_entry(entry, 0, 0, 0, format); 6825 else 6826 pr_info("(empty)\n"); 6827 } 6828 EXPORT_SYMBOL_GPL(mt_dump); 6829 6830 /* 6831 * Calculate the maximum gap in a node and check if that's what is reported in 6832 * the parent (unless root). 6833 */ 6834 static void mas_validate_gaps(struct ma_state *mas) 6835 { 6836 struct maple_enode *mte = mas->node; 6837 struct maple_node *p_mn, *node = mte_to_node(mte); 6838 enum maple_type mt = mte_node_type(mas->node); 6839 unsigned long gap = 0, max_gap = 0; 6840 unsigned long p_end, p_start = mas->min; 6841 unsigned char p_slot, offset; 6842 unsigned long *gaps = NULL; 6843 unsigned long *pivots = ma_pivots(node, mt); 6844 unsigned int i; 6845 6846 if (ma_is_dense(mt)) { 6847 for (i = 0; i < mt_slot_count(mte); i++) { 6848 if (mas_get_slot(mas, i)) { 6849 if (gap > max_gap) 6850 max_gap = gap; 6851 gap = 0; 6852 continue; 6853 } 6854 gap++; 6855 } 6856 goto counted; 6857 } 6858 6859 gaps = ma_gaps(node, mt); 6860 for (i = 0; i < mt_slot_count(mte); i++) { 6861 p_end = mas_safe_pivot(mas, pivots, i, mt); 6862 6863 if (!gaps) { 6864 if (!mas_get_slot(mas, i)) 6865 gap = p_end - p_start + 1; 6866 } else { 6867 void *entry = mas_get_slot(mas, i); 6868 6869 gap = gaps[i]; 6870 MT_BUG_ON(mas->tree, !entry); 6871 6872 if (gap > p_end - p_start + 1) { 6873 pr_err(PTR_FMT "[%u] %lu >= %lu - %lu + 1 (%lu)\n", 6874 mas_mn(mas), i, gap, p_end, p_start, 6875 p_end - p_start + 1); 6876 MT_BUG_ON(mas->tree, gap > p_end - p_start + 1); 6877 } 6878 } 6879 6880 if (gap > max_gap) 6881 max_gap = gap; 6882 6883 p_start = p_end + 1; 6884 if (p_end >= mas->max) 6885 break; 6886 } 6887 6888 counted: 6889 if (mt == maple_arange_64) { 6890 MT_BUG_ON(mas->tree, !gaps); 6891 offset = ma_meta_gap(node); 6892 if (offset > i) { 6893 pr_err("gap offset " PTR_FMT "[%u] is invalid\n", node, offset); 6894 MT_BUG_ON(mas->tree, 1); 6895 } 6896 6897 if (gaps[offset] != max_gap) { 6898 pr_err("gap " PTR_FMT "[%u] is not the largest gap %lu\n", 6899 node, offset, max_gap); 6900 MT_BUG_ON(mas->tree, 1); 6901 } 6902 6903 for (i++ ; i < mt_slot_count(mte); i++) { 6904 if (gaps[i] != 0) { 6905 pr_err("gap " PTR_FMT "[%u] beyond node limit != 0\n", 6906 node, i); 6907 MT_BUG_ON(mas->tree, 1); 6908 } 6909 } 6910 } 6911 6912 if (mte_is_root(mte)) 6913 return; 6914 6915 p_slot = mte_parent_slot(mas->node); 6916 p_mn = mte_parent(mte); 6917 MT_BUG_ON(mas->tree, max_gap > mas->max); 6918 if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) { 6919 pr_err("gap " PTR_FMT "[%u] != %lu\n", p_mn, p_slot, max_gap); 6920 mt_dump(mas->tree, mt_dump_hex); 6921 MT_BUG_ON(mas->tree, 1); 6922 } 6923 } 6924 6925 static void mas_validate_parent_slot(struct ma_state *mas) 6926 { 6927 struct maple_node *parent; 6928 struct maple_enode *node; 6929 enum maple_type p_type; 6930 unsigned char p_slot; 6931 void __rcu **slots; 6932 int i; 6933 6934 if (mte_is_root(mas->node)) 6935 return; 6936 6937 p_slot = mte_parent_slot(mas->node); 6938 p_type = mas_parent_type(mas, mas->node); 6939 parent = mte_parent(mas->node); 6940 slots = ma_slots(parent, p_type); 6941 MT_BUG_ON(mas->tree, mas_mn(mas) == parent); 6942 6943 /* Check prev/next parent slot for duplicate node entry */ 6944 6945 for (i = 0; i < mt_slots[p_type]; i++) { 6946 node = mas_slot(mas, slots, i); 6947 if (i == p_slot) { 6948 if (node != mas->node) 6949 pr_err("parent " PTR_FMT "[%u] does not have " PTR_FMT "\n", 6950 parent, i, mas_mn(mas)); 6951 MT_BUG_ON(mas->tree, node != mas->node); 6952 } else if (node == mas->node) { 6953 pr_err("Invalid child " PTR_FMT " at parent " PTR_FMT "[%u] p_slot %u\n", 6954 mas_mn(mas), parent, i, p_slot); 6955 MT_BUG_ON(mas->tree, node == mas->node); 6956 } 6957 } 6958 } 6959 6960 static void mas_validate_child_slot(struct ma_state *mas) 6961 { 6962 enum maple_type type = mte_node_type(mas->node); 6963 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 6964 unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type); 6965 struct maple_enode *child; 6966 unsigned char i; 6967 6968 if (mte_is_leaf(mas->node)) 6969 return; 6970 6971 for (i = 0; i < mt_slots[type]; i++) { 6972 child = mas_slot(mas, slots, i); 6973 6974 if (!child) { 6975 pr_err("Non-leaf node lacks child at " PTR_FMT "[%u]\n", 6976 mas_mn(mas), i); 6977 MT_BUG_ON(mas->tree, 1); 6978 } 6979 6980 if (mte_parent_slot(child) != i) { 6981 pr_err("Slot error at " PTR_FMT "[%u]: child " PTR_FMT " has pslot %u\n", 6982 mas_mn(mas), i, mte_to_node(child), 6983 mte_parent_slot(child)); 6984 MT_BUG_ON(mas->tree, 1); 6985 } 6986 6987 if (mte_parent(child) != mte_to_node(mas->node)) { 6988 pr_err("child " PTR_FMT " has parent " PTR_FMT " not " PTR_FMT "\n", 6989 mte_to_node(child), mte_parent(child), 6990 mte_to_node(mas->node)); 6991 MT_BUG_ON(mas->tree, 1); 6992 } 6993 6994 if (i < mt_pivots[type] && pivots[i] == mas->max) 6995 break; 6996 } 6997 } 6998 6999 /* 7000 * Validate all pivots are within mas->min and mas->max, check metadata ends 7001 * where the maximum ends and ensure there is no slots or pivots set outside of 7002 * the end of the data. 7003 */ 7004 static void mas_validate_limits(struct ma_state *mas) 7005 { 7006 int i; 7007 unsigned long prev_piv = 0; 7008 enum maple_type type = mte_node_type(mas->node); 7009 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 7010 unsigned long *pivots = ma_pivots(mas_mn(mas), type); 7011 7012 for (i = 0; i < mt_slots[type]; i++) { 7013 unsigned long piv; 7014 7015 piv = mas_safe_pivot(mas, pivots, i, type); 7016 7017 if (!piv && (i != 0)) { 7018 pr_err("Missing node limit pivot at " PTR_FMT "[%u]", 7019 mas_mn(mas), i); 7020 MAS_WARN_ON(mas, 1); 7021 } 7022 7023 if (prev_piv > piv) { 7024 pr_err(PTR_FMT "[%u] piv %lu < prev_piv %lu\n", 7025 mas_mn(mas), i, piv, prev_piv); 7026 MAS_WARN_ON(mas, piv < prev_piv); 7027 } 7028 7029 if (piv < mas->min) { 7030 pr_err(PTR_FMT "[%u] %lu < %lu\n", mas_mn(mas), i, 7031 piv, mas->min); 7032 MAS_WARN_ON(mas, piv < mas->min); 7033 } 7034 if (piv > mas->max) { 7035 pr_err(PTR_FMT "[%u] %lu > %lu\n", mas_mn(mas), i, 7036 piv, mas->max); 7037 MAS_WARN_ON(mas, piv > mas->max); 7038 } 7039 prev_piv = piv; 7040 if (piv == mas->max) 7041 break; 7042 } 7043 7044 if (mas_data_end(mas) != i) { 7045 pr_err("node" PTR_FMT ": data_end %u != the last slot offset %u\n", 7046 mas_mn(mas), mas_data_end(mas), i); 7047 MT_BUG_ON(mas->tree, 1); 7048 } 7049 7050 for (i += 1; i < mt_slots[type]; i++) { 7051 void *entry = mas_slot(mas, slots, i); 7052 7053 if (entry && (i != mt_slots[type] - 1)) { 7054 pr_err(PTR_FMT "[%u] should not have entry " PTR_FMT "\n", 7055 mas_mn(mas), i, entry); 7056 MT_BUG_ON(mas->tree, entry != NULL); 7057 } 7058 7059 if (i < mt_pivots[type]) { 7060 unsigned long piv = pivots[i]; 7061 7062 if (!piv) 7063 continue; 7064 7065 pr_err(PTR_FMT "[%u] should not have piv %lu\n", 7066 mas_mn(mas), i, piv); 7067 MAS_WARN_ON(mas, i < mt_pivots[type] - 1); 7068 } 7069 } 7070 } 7071 7072 static void mt_validate_nulls(struct maple_tree *mt) 7073 { 7074 void *entry, *last = (void *)1; 7075 unsigned char offset = 0; 7076 void __rcu **slots; 7077 MA_STATE(mas, mt, 0, 0); 7078 7079 mas_start(&mas); 7080 if (mas_is_none(&mas) || (mas_is_ptr(&mas))) 7081 return; 7082 7083 while (!mte_is_leaf(mas.node)) 7084 mas_descend(&mas); 7085 7086 slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node)); 7087 do { 7088 entry = mas_slot(&mas, slots, offset); 7089 if (!last && !entry) { 7090 pr_err("Sequential nulls end at " PTR_FMT "[%u]\n", 7091 mas_mn(&mas), offset); 7092 } 7093 MT_BUG_ON(mt, !last && !entry); 7094 last = entry; 7095 if (offset == mas_data_end(&mas)) { 7096 mas_next_node(&mas, mas_mn(&mas), ULONG_MAX); 7097 if (mas_is_overflow(&mas)) 7098 return; 7099 offset = 0; 7100 slots = ma_slots(mte_to_node(mas.node), 7101 mte_node_type(mas.node)); 7102 } else { 7103 offset++; 7104 } 7105 7106 } while (!mas_is_overflow(&mas)); 7107 } 7108 7109 /* 7110 * validate a maple tree by checking: 7111 * 1. The limits (pivots are within mas->min to mas->max) 7112 * 2. The gap is correctly set in the parents 7113 */ 7114 void mt_validate(struct maple_tree *mt) 7115 __must_hold(mas->tree->ma_lock) 7116 { 7117 unsigned char end; 7118 7119 MA_STATE(mas, mt, 0, 0); 7120 mas_start(&mas); 7121 if (!mas_is_active(&mas)) 7122 return; 7123 7124 while (!mte_is_leaf(mas.node)) 7125 mas_descend(&mas); 7126 7127 while (!mas_is_overflow(&mas)) { 7128 MAS_WARN_ON(&mas, mte_dead_node(mas.node)); 7129 end = mas_data_end(&mas); 7130 if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) && 7131 (!mte_is_root(mas.node)))) { 7132 pr_err("Invalid size %u of " PTR_FMT "\n", 7133 end, mas_mn(&mas)); 7134 } 7135 7136 mas_validate_parent_slot(&mas); 7137 mas_validate_limits(&mas); 7138 mas_validate_child_slot(&mas); 7139 if (mt_is_alloc(mt)) 7140 mas_validate_gaps(&mas); 7141 mas_dfs_postorder(&mas, ULONG_MAX); 7142 } 7143 mt_validate_nulls(mt); 7144 } 7145 EXPORT_SYMBOL_GPL(mt_validate); 7146 7147 void mas_dump(const struct ma_state *mas) 7148 { 7149 pr_err("MAS: tree=" PTR_FMT " enode=" PTR_FMT " ", 7150 mas->tree, mas->node); 7151 switch (mas->status) { 7152 case ma_active: 7153 pr_err("(ma_active)"); 7154 break; 7155 case ma_none: 7156 pr_err("(ma_none)"); 7157 break; 7158 case ma_root: 7159 pr_err("(ma_root)"); 7160 break; 7161 case ma_start: 7162 pr_err("(ma_start) "); 7163 break; 7164 case ma_pause: 7165 pr_err("(ma_pause) "); 7166 break; 7167 case ma_overflow: 7168 pr_err("(ma_overflow) "); 7169 break; 7170 case ma_underflow: 7171 pr_err("(ma_underflow) "); 7172 break; 7173 case ma_error: 7174 pr_err("(ma_error) "); 7175 break; 7176 } 7177 7178 pr_err("Store Type: "); 7179 switch (mas->store_type) { 7180 case wr_invalid: 7181 pr_err("invalid store type\n"); 7182 break; 7183 case wr_new_root: 7184 pr_err("new_root\n"); 7185 break; 7186 case wr_store_root: 7187 pr_err("store_root\n"); 7188 break; 7189 case wr_exact_fit: 7190 pr_err("exact_fit\n"); 7191 break; 7192 case wr_split_store: 7193 pr_err("split_store\n"); 7194 break; 7195 case wr_slot_store: 7196 pr_err("slot_store\n"); 7197 break; 7198 case wr_append: 7199 pr_err("append\n"); 7200 break; 7201 case wr_node_store: 7202 pr_err("node_store\n"); 7203 break; 7204 case wr_spanning_store: 7205 pr_err("spanning_store\n"); 7206 break; 7207 case wr_rebalance: 7208 pr_err("rebalance\n"); 7209 break; 7210 } 7211 7212 pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end, 7213 mas->index, mas->last); 7214 pr_err(" min=%lx max=%lx sheaf=" PTR_FMT ", request %lu depth=%u, flags=%x\n", 7215 mas->min, mas->max, mas->sheaf, mas->node_request, mas->depth, 7216 mas->mas_flags); 7217 if (mas->index > mas->last) 7218 pr_err("Check index & last\n"); 7219 } 7220 EXPORT_SYMBOL_GPL(mas_dump); 7221 7222 void mas_wr_dump(const struct ma_wr_state *wr_mas) 7223 { 7224 pr_err("WR_MAS: node=" PTR_FMT " r_min=%lx r_max=%lx\n", 7225 wr_mas->node, wr_mas->r_min, wr_mas->r_max); 7226 pr_err(" type=%u off_end=%u, node_end=%u, end_piv=%lx\n", 7227 wr_mas->type, wr_mas->offset_end, wr_mas->mas->end, 7228 wr_mas->end_piv); 7229 } 7230 EXPORT_SYMBOL_GPL(mas_wr_dump); 7231 7232 #endif /* CONFIG_DEBUG_MAPLE_TREE */ 7233