1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Maple Tree implementation 4 * Copyright (c) 2018-2022 Oracle Corporation 5 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com> 6 * Matthew Wilcox <willy@infradead.org> 7 * Copyright (c) 2023 ByteDance 8 * Author: Peng Zhang <zhangpeng.00@bytedance.com> 9 */ 10 11 /* 12 * DOC: Interesting implementation details of the Maple Tree 13 * 14 * Each node type has a number of slots for entries and a number of slots for 15 * pivots. In the case of dense nodes, the pivots are implied by the position 16 * and are simply the slot index + the minimum of the node. 17 * 18 * In regular B-Tree terms, pivots are called keys. The term pivot is used to 19 * indicate that the tree is specifying ranges. Pivots may appear in the 20 * subtree with an entry attached to the value whereas keys are unique to a 21 * specific position of a B-tree. Pivot values are inclusive of the slot with 22 * the same index. 23 * 24 * 25 * The following illustrates the layout of a range64 nodes slots and pivots. 26 * 27 * 28 * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 | 29 * ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ 30 * │ │ │ │ │ │ │ │ └─ Implied maximum 31 * │ │ │ │ │ │ │ └─ Pivot 14 32 * │ │ │ │ │ │ └─ Pivot 13 33 * │ │ │ │ │ └─ Pivot 12 34 * │ │ │ │ └─ Pivot 11 35 * │ │ │ └─ Pivot 2 36 * │ │ └─ Pivot 1 37 * │ └─ Pivot 0 38 * └─ Implied minimum 39 * 40 * Slot contents: 41 * Internal (non-leaf) nodes contain pointers to other nodes. 42 * Leaf nodes contain entries. 43 * 44 * The location of interest is often referred to as an offset. All offsets have 45 * a slot, but the last offset has an implied pivot from the node above (or 46 * UINT_MAX for the root node. 47 * 48 * Ranges complicate certain write activities. When modifying any of 49 * the B-tree variants, it is known that one entry will either be added or 50 * deleted. When modifying the Maple Tree, one store operation may overwrite 51 * the entire data set, or one half of the tree, or the middle half of the tree. 52 * 53 */ 54 55 56 #include <linux/maple_tree.h> 57 #include <linux/xarray.h> 58 #include <linux/types.h> 59 #include <linux/export.h> 60 #include <linux/slab.h> 61 #include <linux/limits.h> 62 #include <asm/barrier.h> 63 64 #define CREATE_TRACE_POINTS 65 #include <trace/events/maple_tree.h> 66 67 #define TP_FCT tracepoint_string(__func__) 68 69 /* 70 * Kernel pointer hashing renders much of the maple tree dump useless as tagged 71 * pointers get hashed to arbitrary values. 72 * 73 * If CONFIG_DEBUG_VM_MAPLE_TREE is set we are in a debug mode where it is 74 * permissible to bypass this. Otherwise remain cautious and retain the hashing. 75 * 76 * Userland doesn't know about %px so also use %p there. 77 */ 78 #if defined(__KERNEL__) && defined(CONFIG_DEBUG_VM_MAPLE_TREE) 79 #define PTR_FMT "%px" 80 #else 81 #define PTR_FMT "%p" 82 #endif 83 84 #define MA_ROOT_PARENT 1 85 86 /* 87 * Maple state flags 88 * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation 89 */ 90 #define MA_STATE_PREALLOC 1 91 92 #define ma_parent_ptr(x) ((struct maple_pnode *)(x)) 93 #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT) 94 #define ma_mnode_ptr(x) ((struct maple_node *)(x)) 95 #define ma_enode_ptr(x) ((struct maple_enode *)(x)) 96 static struct kmem_cache *maple_node_cache; 97 98 #ifdef CONFIG_DEBUG_MAPLE_TREE 99 static const unsigned long mt_max[] = { 100 [maple_dense] = MAPLE_NODE_SLOTS, 101 [maple_leaf_64] = ULONG_MAX, 102 [maple_range_64] = ULONG_MAX, 103 [maple_arange_64] = ULONG_MAX, 104 }; 105 #define mt_node_max(x) mt_max[mte_node_type(x)] 106 #endif 107 108 static const unsigned char mt_slots[] = { 109 [maple_dense] = MAPLE_NODE_SLOTS, 110 [maple_leaf_64] = MAPLE_RANGE64_SLOTS, 111 [maple_range_64] = MAPLE_RANGE64_SLOTS, 112 [maple_arange_64] = MAPLE_ARANGE64_SLOTS, 113 }; 114 #define mt_slot_count(x) mt_slots[mte_node_type(x)] 115 116 static const unsigned char mt_pivots[] = { 117 [maple_dense] = 0, 118 [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1, 119 [maple_range_64] = MAPLE_RANGE64_SLOTS - 1, 120 [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1, 121 }; 122 #define mt_pivot_count(x) mt_pivots[mte_node_type(x)] 123 124 static const unsigned char mt_min_slots[] = { 125 [maple_dense] = MAPLE_NODE_SLOTS / 2, 126 [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 127 [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 128 [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1, 129 }; 130 #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)] 131 132 #define MAPLE_BIG_NODE_SLOTS (MAPLE_RANGE64_SLOTS * 2 + 2) 133 #define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1) 134 135 struct maple_big_node { 136 unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1]; 137 union { 138 struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS]; 139 struct { 140 unsigned long padding[MAPLE_BIG_NODE_GAPS]; 141 unsigned long gap[MAPLE_BIG_NODE_GAPS]; 142 }; 143 }; 144 unsigned char b_end; 145 enum maple_type type; 146 }; 147 148 /* 149 * The maple_subtree_state is used to build a tree to replace a segment of an 150 * existing tree in a more atomic way. Any walkers of the older tree will hit a 151 * dead node and restart on updates. 152 */ 153 struct maple_subtree_state { 154 struct ma_state *orig_l; /* Original left side of subtree */ 155 struct ma_state *orig_r; /* Original right side of subtree */ 156 struct ma_state *l; /* New left side of subtree */ 157 struct ma_state *m; /* New middle of subtree (rare) */ 158 struct ma_state *r; /* New right side of subtree */ 159 struct ma_topiary *free; /* nodes to be freed */ 160 struct ma_topiary *destroy; /* Nodes to be destroyed (walked and freed) */ 161 struct maple_big_node *bn; 162 }; 163 164 #ifdef CONFIG_KASAN_STACK 165 /* Prevent mas_wr_bnode() from exceeding the stack frame limit */ 166 #define noinline_for_kasan noinline_for_stack 167 #else 168 #define noinline_for_kasan inline 169 #endif 170 171 /* Functions */ 172 static inline struct maple_node *mt_alloc_one(gfp_t gfp) 173 { 174 return kmem_cache_alloc(maple_node_cache, gfp); 175 } 176 177 static inline void mt_free_bulk(size_t size, void __rcu **nodes) 178 { 179 kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); 180 } 181 182 static void mt_return_sheaf(struct slab_sheaf *sheaf) 183 { 184 kmem_cache_return_sheaf(maple_node_cache, GFP_NOWAIT, sheaf); 185 } 186 187 static struct slab_sheaf *mt_get_sheaf(gfp_t gfp, int count) 188 { 189 return kmem_cache_prefill_sheaf(maple_node_cache, gfp, count); 190 } 191 192 static int mt_refill_sheaf(gfp_t gfp, struct slab_sheaf **sheaf, 193 unsigned int size) 194 { 195 return kmem_cache_refill_sheaf(maple_node_cache, gfp, sheaf, size); 196 } 197 198 /* 199 * ma_free_rcu() - Use rcu callback to free a maple node 200 * @node: The node to free 201 * 202 * The maple tree uses the parent pointer to indicate this node is no longer in 203 * use and will be freed. 204 */ 205 static void ma_free_rcu(struct maple_node *node) 206 { 207 WARN_ON(node->parent != ma_parent_ptr(node)); 208 kfree_rcu(node, rcu); 209 } 210 211 static void mt_set_height(struct maple_tree *mt, unsigned char height) 212 { 213 unsigned int new_flags = mt->ma_flags; 214 215 new_flags &= ~MT_FLAGS_HEIGHT_MASK; 216 MT_BUG_ON(mt, height > MAPLE_HEIGHT_MAX); 217 new_flags |= height << MT_FLAGS_HEIGHT_OFFSET; 218 mt->ma_flags = new_flags; 219 } 220 221 static unsigned int mas_mt_height(struct ma_state *mas) 222 { 223 return mt_height(mas->tree); 224 } 225 226 static inline unsigned int mt_attr(struct maple_tree *mt) 227 { 228 return mt->ma_flags & ~MT_FLAGS_HEIGHT_MASK; 229 } 230 231 static __always_inline enum maple_type mte_node_type( 232 const struct maple_enode *entry) 233 { 234 return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) & 235 MAPLE_NODE_TYPE_MASK; 236 } 237 238 static __always_inline bool ma_is_dense(const enum maple_type type) 239 { 240 return type < maple_leaf_64; 241 } 242 243 static __always_inline bool ma_is_leaf(const enum maple_type type) 244 { 245 return type < maple_range_64; 246 } 247 248 static __always_inline bool mte_is_leaf(const struct maple_enode *entry) 249 { 250 return ma_is_leaf(mte_node_type(entry)); 251 } 252 253 /* 254 * We also reserve values with the bottom two bits set to '10' which are 255 * below 4096 256 */ 257 static __always_inline bool mt_is_reserved(const void *entry) 258 { 259 return ((unsigned long)entry < MAPLE_RESERVED_RANGE) && 260 xa_is_internal(entry); 261 } 262 263 static __always_inline void mas_set_err(struct ma_state *mas, long err) 264 { 265 mas->node = MA_ERROR(err); 266 mas->status = ma_error; 267 } 268 269 static __always_inline bool mas_is_ptr(const struct ma_state *mas) 270 { 271 return mas->status == ma_root; 272 } 273 274 static __always_inline bool mas_is_start(const struct ma_state *mas) 275 { 276 return mas->status == ma_start; 277 } 278 279 static __always_inline bool mas_is_none(const struct ma_state *mas) 280 { 281 return mas->status == ma_none; 282 } 283 284 static __always_inline bool mas_is_paused(const struct ma_state *mas) 285 { 286 return mas->status == ma_pause; 287 } 288 289 static __always_inline bool mas_is_overflow(struct ma_state *mas) 290 { 291 return mas->status == ma_overflow; 292 } 293 294 static inline bool mas_is_underflow(struct ma_state *mas) 295 { 296 return mas->status == ma_underflow; 297 } 298 299 static __always_inline struct maple_node *mte_to_node( 300 const struct maple_enode *entry) 301 { 302 return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK); 303 } 304 305 /* 306 * mte_to_mat() - Convert a maple encoded node to a maple topiary node. 307 * @entry: The maple encoded node 308 * 309 * Return: a maple topiary pointer 310 */ 311 static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry) 312 { 313 return (struct maple_topiary *) 314 ((unsigned long)entry & ~MAPLE_NODE_MASK); 315 } 316 317 /* 318 * mas_mn() - Get the maple state node. 319 * @mas: The maple state 320 * 321 * Return: the maple node (not encoded - bare pointer). 322 */ 323 static inline struct maple_node *mas_mn(const struct ma_state *mas) 324 { 325 return mte_to_node(mas->node); 326 } 327 328 /* 329 * mte_set_node_dead() - Set a maple encoded node as dead. 330 * @mn: The maple encoded node. 331 */ 332 static inline void mte_set_node_dead(struct maple_enode *mn) 333 { 334 mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn)); 335 smp_wmb(); /* Needed for RCU */ 336 } 337 338 /* Bit 1 indicates the root is a node */ 339 #define MAPLE_ROOT_NODE 0x02 340 /* maple_type stored bit 3-6 */ 341 #define MAPLE_ENODE_TYPE_SHIFT 0x03 342 /* Bit 2 means a NULL somewhere below */ 343 #define MAPLE_ENODE_NULL 0x04 344 345 static inline struct maple_enode *mt_mk_node(const struct maple_node *node, 346 enum maple_type type) 347 { 348 return (void *)((unsigned long)node | 349 (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL); 350 } 351 352 static inline void *mte_mk_root(const struct maple_enode *node) 353 { 354 return (void *)((unsigned long)node | MAPLE_ROOT_NODE); 355 } 356 357 static inline void *mte_safe_root(const struct maple_enode *node) 358 { 359 return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE); 360 } 361 362 static inline void __maybe_unused *mte_set_full(const struct maple_enode *node) 363 { 364 return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL); 365 } 366 367 static inline void __maybe_unused *mte_clear_full(const struct maple_enode *node) 368 { 369 return (void *)((unsigned long)node | MAPLE_ENODE_NULL); 370 } 371 372 static inline bool __maybe_unused mte_has_null(const struct maple_enode *node) 373 { 374 return (unsigned long)node & MAPLE_ENODE_NULL; 375 } 376 377 static __always_inline bool ma_is_root(struct maple_node *node) 378 { 379 return ((unsigned long)node->parent & MA_ROOT_PARENT); 380 } 381 382 static __always_inline bool mte_is_root(const struct maple_enode *node) 383 { 384 return ma_is_root(mte_to_node(node)); 385 } 386 387 static inline bool mas_is_root_limits(const struct ma_state *mas) 388 { 389 return !mas->min && mas->max == ULONG_MAX; 390 } 391 392 static __always_inline bool mt_is_alloc(struct maple_tree *mt) 393 { 394 return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE); 395 } 396 397 /* 398 * The Parent Pointer 399 * Excluding root, the parent pointer is 256B aligned like all other tree nodes. 400 * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16 401 * bit values need an extra bit to store the offset. This extra bit comes from 402 * a reuse of the last bit in the node type. This is possible by using bit 1 to 403 * indicate if bit 2 is part of the type or the slot. 404 * 405 * Node types: 406 * 0b??1 = Root 407 * 0b?00 = 16 bit nodes 408 * 0b010 = 32 bit nodes 409 * 0b110 = 64 bit nodes 410 * 411 * Slot size and alignment 412 * 0b??1 : Root 413 * 0b?00 : 16 bit values, type in 0-1, slot in 2-7 414 * 0b010 : 32 bit values, type in 0-2, slot in 3-7 415 * 0b110 : 64 bit values, type in 0-2, slot in 3-7 416 */ 417 418 #define MAPLE_PARENT_ROOT 0x01 419 420 #define MAPLE_PARENT_SLOT_SHIFT 0x03 421 #define MAPLE_PARENT_SLOT_MASK 0xF8 422 423 #define MAPLE_PARENT_16B_SLOT_SHIFT 0x02 424 #define MAPLE_PARENT_16B_SLOT_MASK 0xFC 425 426 #define MAPLE_PARENT_RANGE64 0x06 427 #define MAPLE_PARENT_RANGE32 0x02 428 #define MAPLE_PARENT_NOT_RANGE16 0x02 429 430 /* 431 * mte_parent_shift() - Get the parent shift for the slot storage. 432 * @parent: The parent pointer cast as an unsigned long 433 * Return: The shift into that pointer to the star to of the slot 434 */ 435 static inline unsigned long mte_parent_shift(unsigned long parent) 436 { 437 /* Note bit 1 == 0 means 16B */ 438 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 439 return MAPLE_PARENT_SLOT_SHIFT; 440 441 return MAPLE_PARENT_16B_SLOT_SHIFT; 442 } 443 444 /* 445 * mte_parent_slot_mask() - Get the slot mask for the parent. 446 * @parent: The parent pointer cast as an unsigned long. 447 * Return: The slot mask for that parent. 448 */ 449 static inline unsigned long mte_parent_slot_mask(unsigned long parent) 450 { 451 /* Note bit 1 == 0 means 16B */ 452 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 453 return MAPLE_PARENT_SLOT_MASK; 454 455 return MAPLE_PARENT_16B_SLOT_MASK; 456 } 457 458 /* 459 * mas_parent_type() - Return the maple_type of the parent from the stored 460 * parent type. 461 * @mas: The maple state 462 * @enode: The maple_enode to extract the parent's enum 463 * Return: The node->parent maple_type 464 */ 465 static inline 466 enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode) 467 { 468 unsigned long p_type; 469 470 p_type = (unsigned long)mte_to_node(enode)->parent; 471 if (WARN_ON(p_type & MAPLE_PARENT_ROOT)) 472 return 0; 473 474 p_type &= MAPLE_NODE_MASK; 475 p_type &= ~mte_parent_slot_mask(p_type); 476 switch (p_type) { 477 case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */ 478 if (mt_is_alloc(mas->tree)) 479 return maple_arange_64; 480 return maple_range_64; 481 } 482 483 return 0; 484 } 485 486 /* 487 * mas_set_parent() - Set the parent node and encode the slot 488 * @mas: The maple state 489 * @enode: The encoded maple node. 490 * @parent: The encoded maple node that is the parent of @enode. 491 * @slot: The slot that @enode resides in @parent. 492 * 493 * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the 494 * parent type. 495 */ 496 static inline 497 void mas_set_parent(struct ma_state *mas, struct maple_enode *enode, 498 const struct maple_enode *parent, unsigned char slot) 499 { 500 unsigned long val = (unsigned long)parent; 501 unsigned long shift; 502 unsigned long type; 503 enum maple_type p_type = mte_node_type(parent); 504 505 MAS_BUG_ON(mas, p_type == maple_dense); 506 MAS_BUG_ON(mas, p_type == maple_leaf_64); 507 508 switch (p_type) { 509 case maple_range_64: 510 case maple_arange_64: 511 shift = MAPLE_PARENT_SLOT_SHIFT; 512 type = MAPLE_PARENT_RANGE64; 513 break; 514 default: 515 case maple_dense: 516 case maple_leaf_64: 517 shift = type = 0; 518 break; 519 } 520 521 val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */ 522 val |= (slot << shift) | type; 523 mte_to_node(enode)->parent = ma_parent_ptr(val); 524 } 525 526 /* 527 * mte_parent_slot() - get the parent slot of @enode. 528 * @enode: The encoded maple node. 529 * 530 * Return: The slot in the parent node where @enode resides. 531 */ 532 static __always_inline 533 unsigned int mte_parent_slot(const struct maple_enode *enode) 534 { 535 unsigned long val = (unsigned long)mte_to_node(enode)->parent; 536 537 if (unlikely(val & MA_ROOT_PARENT)) 538 return 0; 539 540 /* 541 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost 542 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT 543 */ 544 return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val); 545 } 546 547 /* 548 * mte_parent() - Get the parent of @node. 549 * @enode: The encoded maple node. 550 * 551 * Return: The parent maple node. 552 */ 553 static __always_inline 554 struct maple_node *mte_parent(const struct maple_enode *enode) 555 { 556 return (void *)((unsigned long) 557 (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK); 558 } 559 560 /* 561 * ma_dead_node() - check if the @enode is dead. 562 * @enode: The encoded maple node 563 * 564 * Return: true if dead, false otherwise. 565 */ 566 static __always_inline bool ma_dead_node(const struct maple_node *node) 567 { 568 struct maple_node *parent; 569 570 /* Do not reorder reads from the node prior to the parent check */ 571 smp_rmb(); 572 parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); 573 return (parent == node); 574 } 575 576 /* 577 * mte_dead_node() - check if the @enode is dead. 578 * @enode: The encoded maple node 579 * 580 * Return: true if dead, false otherwise. 581 */ 582 static __always_inline bool mte_dead_node(const struct maple_enode *enode) 583 { 584 struct maple_node *node; 585 586 node = mte_to_node(enode); 587 return ma_dead_node(node); 588 } 589 590 /* 591 * ma_pivots() - Get a pointer to the maple node pivots. 592 * @node: the maple node 593 * @type: the node type 594 * 595 * In the event of a dead node, this array may be %NULL 596 * 597 * Return: A pointer to the maple node pivots 598 */ 599 static inline unsigned long *ma_pivots(struct maple_node *node, 600 enum maple_type type) 601 { 602 switch (type) { 603 case maple_arange_64: 604 return node->ma64.pivot; 605 case maple_range_64: 606 case maple_leaf_64: 607 return node->mr64.pivot; 608 case maple_dense: 609 return NULL; 610 } 611 return NULL; 612 } 613 614 /* 615 * ma_gaps() - Get a pointer to the maple node gaps. 616 * @node: the maple node 617 * @type: the node type 618 * 619 * Return: A pointer to the maple node gaps 620 */ 621 static inline unsigned long *ma_gaps(struct maple_node *node, 622 enum maple_type type) 623 { 624 switch (type) { 625 case maple_arange_64: 626 return node->ma64.gap; 627 case maple_range_64: 628 case maple_leaf_64: 629 case maple_dense: 630 return NULL; 631 } 632 return NULL; 633 } 634 635 /* 636 * mas_safe_pivot() - get the pivot at @piv or mas->max. 637 * @mas: The maple state 638 * @pivots: The pointer to the maple node pivots 639 * @piv: The pivot to fetch 640 * @type: The maple node type 641 * 642 * Return: The pivot at @piv within the limit of the @pivots array, @mas->max 643 * otherwise. 644 */ 645 static __always_inline unsigned long 646 mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots, 647 unsigned char piv, enum maple_type type) 648 { 649 if (piv >= mt_pivots[type]) 650 return mas->max; 651 652 return pivots[piv]; 653 } 654 655 /* 656 * mas_safe_min() - Return the minimum for a given offset. 657 * @mas: The maple state 658 * @pivots: The pointer to the maple node pivots 659 * @offset: The offset into the pivot array 660 * 661 * Return: The minimum range value that is contained in @offset. 662 */ 663 static inline unsigned long 664 mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset) 665 { 666 if (likely(offset)) 667 return pivots[offset - 1] + 1; 668 669 return mas->min; 670 } 671 672 /* 673 * mte_set_pivot() - Set a pivot to a value in an encoded maple node. 674 * @mn: The encoded maple node 675 * @piv: The pivot offset 676 * @val: The value of the pivot 677 */ 678 static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv, 679 unsigned long val) 680 { 681 struct maple_node *node = mte_to_node(mn); 682 enum maple_type type = mte_node_type(mn); 683 684 BUG_ON(piv >= mt_pivots[type]); 685 switch (type) { 686 case maple_range_64: 687 case maple_leaf_64: 688 node->mr64.pivot[piv] = val; 689 break; 690 case maple_arange_64: 691 node->ma64.pivot[piv] = val; 692 break; 693 case maple_dense: 694 break; 695 } 696 697 } 698 699 /* 700 * ma_slots() - Get a pointer to the maple node slots. 701 * @mn: The maple node 702 * @mt: The maple node type 703 * 704 * Return: A pointer to the maple node slots 705 */ 706 static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt) 707 { 708 switch (mt) { 709 case maple_arange_64: 710 return mn->ma64.slot; 711 case maple_range_64: 712 case maple_leaf_64: 713 return mn->mr64.slot; 714 case maple_dense: 715 return mn->slot; 716 } 717 718 return NULL; 719 } 720 721 static inline bool mt_write_locked(const struct maple_tree *mt) 722 { 723 return mt_external_lock(mt) ? mt_write_lock_is_held(mt) : 724 lockdep_is_held(&mt->ma_lock); 725 } 726 727 static __always_inline bool mt_locked(const struct maple_tree *mt) 728 { 729 return mt_external_lock(mt) ? mt_lock_is_held(mt) : 730 lockdep_is_held(&mt->ma_lock); 731 } 732 733 static __always_inline void *mt_slot(const struct maple_tree *mt, 734 void __rcu **slots, unsigned char offset) 735 { 736 return rcu_dereference_check(slots[offset], mt_locked(mt)); 737 } 738 739 static __always_inline void *mt_slot_locked(struct maple_tree *mt, 740 void __rcu **slots, unsigned char offset) 741 { 742 return rcu_dereference_protected(slots[offset], mt_write_locked(mt)); 743 } 744 /* 745 * mas_slot_locked() - Get the slot value when holding the maple tree lock. 746 * @mas: The maple state 747 * @slots: The pointer to the slots 748 * @offset: The offset into the slots array to fetch 749 * 750 * Return: The entry stored in @slots at the @offset. 751 */ 752 static __always_inline void *mas_slot_locked(struct ma_state *mas, 753 void __rcu **slots, unsigned char offset) 754 { 755 return mt_slot_locked(mas->tree, slots, offset); 756 } 757 758 /* 759 * mas_slot() - Get the slot value when not holding the maple tree lock. 760 * @mas: The maple state 761 * @slots: The pointer to the slots 762 * @offset: The offset into the slots array to fetch 763 * 764 * Return: The entry stored in @slots at the @offset 765 */ 766 static __always_inline void *mas_slot(struct ma_state *mas, void __rcu **slots, 767 unsigned char offset) 768 { 769 return mt_slot(mas->tree, slots, offset); 770 } 771 772 /* 773 * mas_root() - Get the maple tree root. 774 * @mas: The maple state. 775 * 776 * Return: The pointer to the root of the tree 777 */ 778 static __always_inline void *mas_root(struct ma_state *mas) 779 { 780 return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree)); 781 } 782 783 static inline void *mt_root_locked(struct maple_tree *mt) 784 { 785 return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt)); 786 } 787 788 /* 789 * mas_root_locked() - Get the maple tree root when holding the maple tree lock. 790 * @mas: The maple state. 791 * 792 * Return: The pointer to the root of the tree 793 */ 794 static inline void *mas_root_locked(struct ma_state *mas) 795 { 796 return mt_root_locked(mas->tree); 797 } 798 799 static inline struct maple_metadata *ma_meta(struct maple_node *mn, 800 enum maple_type mt) 801 { 802 switch (mt) { 803 case maple_arange_64: 804 return &mn->ma64.meta; 805 default: 806 return &mn->mr64.meta; 807 } 808 } 809 810 /* 811 * ma_set_meta() - Set the metadata information of a node. 812 * @mn: The maple node 813 * @mt: The maple node type 814 * @offset: The offset of the highest sub-gap in this node. 815 * @end: The end of the data in this node. 816 */ 817 static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, 818 unsigned char offset, unsigned char end) 819 { 820 struct maple_metadata *meta = ma_meta(mn, mt); 821 822 meta->gap = offset; 823 meta->end = end; 824 } 825 826 /* 827 * mt_clear_meta() - clear the metadata information of a node, if it exists 828 * @mt: The maple tree 829 * @mn: The maple node 830 * @type: The maple node type 831 */ 832 static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn, 833 enum maple_type type) 834 { 835 struct maple_metadata *meta; 836 unsigned long *pivots; 837 void __rcu **slots; 838 void *next; 839 840 switch (type) { 841 case maple_range_64: 842 pivots = mn->mr64.pivot; 843 if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { 844 slots = mn->mr64.slot; 845 next = mt_slot_locked(mt, slots, 846 MAPLE_RANGE64_SLOTS - 1); 847 if (unlikely((mte_to_node(next) && 848 mte_node_type(next)))) 849 return; /* no metadata, could be node */ 850 } 851 fallthrough; 852 case maple_arange_64: 853 meta = ma_meta(mn, type); 854 break; 855 default: 856 return; 857 } 858 859 meta->gap = 0; 860 meta->end = 0; 861 } 862 863 /* 864 * ma_meta_end() - Get the data end of a node from the metadata 865 * @mn: The maple node 866 * @mt: The maple node type 867 */ 868 static inline unsigned char ma_meta_end(struct maple_node *mn, 869 enum maple_type mt) 870 { 871 struct maple_metadata *meta = ma_meta(mn, mt); 872 873 return meta->end; 874 } 875 876 /* 877 * ma_meta_gap() - Get the largest gap location of a node from the metadata 878 * @mn: The maple node 879 */ 880 static inline unsigned char ma_meta_gap(struct maple_node *mn) 881 { 882 return mn->ma64.meta.gap; 883 } 884 885 /* 886 * ma_set_meta_gap() - Set the largest gap location in a nodes metadata 887 * @mn: The maple node 888 * @mt: The maple node type 889 * @offset: The location of the largest gap. 890 */ 891 static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt, 892 unsigned char offset) 893 { 894 895 struct maple_metadata *meta = ma_meta(mn, mt); 896 897 meta->gap = offset; 898 } 899 900 /* 901 * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes. 902 * @mat: the ma_topiary, a linked list of dead nodes. 903 * @dead_enode: the node to be marked as dead and added to the tail of the list 904 * 905 * Add the @dead_enode to the linked list in @mat. 906 */ 907 static inline void mat_add(struct ma_topiary *mat, 908 struct maple_enode *dead_enode) 909 { 910 mte_set_node_dead(dead_enode); 911 mte_to_mat(dead_enode)->next = NULL; 912 if (!mat->tail) { 913 mat->tail = mat->head = dead_enode; 914 return; 915 } 916 917 mte_to_mat(mat->tail)->next = dead_enode; 918 mat->tail = dead_enode; 919 } 920 921 static void mt_free_walk(struct rcu_head *head); 922 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 923 bool free); 924 /* 925 * mas_mat_destroy() - Free all nodes and subtrees in a dead list. 926 * @mas: the maple state 927 * @mat: the ma_topiary linked list of dead nodes to free. 928 * 929 * Destroy walk a dead list. 930 */ 931 static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat) 932 { 933 struct maple_enode *next; 934 struct maple_node *node; 935 bool in_rcu = mt_in_rcu(mas->tree); 936 937 while (mat->head) { 938 next = mte_to_mat(mat->head)->next; 939 node = mte_to_node(mat->head); 940 mt_destroy_walk(mat->head, mas->tree, !in_rcu); 941 if (in_rcu) 942 call_rcu(&node->rcu, mt_free_walk); 943 mat->head = next; 944 } 945 } 946 /* 947 * mas_descend() - Descend into the slot stored in the ma_state. 948 * @mas: the maple state. 949 * 950 * Note: Not RCU safe, only use in write side or debug code. 951 */ 952 static inline void mas_descend(struct ma_state *mas) 953 { 954 enum maple_type type; 955 unsigned long *pivots; 956 struct maple_node *node; 957 void __rcu **slots; 958 959 node = mas_mn(mas); 960 type = mte_node_type(mas->node); 961 pivots = ma_pivots(node, type); 962 slots = ma_slots(node, type); 963 964 if (mas->offset) 965 mas->min = pivots[mas->offset - 1] + 1; 966 mas->max = mas_safe_pivot(mas, pivots, mas->offset, type); 967 mas->node = mas_slot(mas, slots, mas->offset); 968 } 969 970 /* 971 * mas_ascend() - Walk up a level of the tree. 972 * @mas: The maple state 973 * 974 * Sets the @mas->max and @mas->min for the parent node of mas->node. This 975 * may cause several levels of walking up to find the correct min and max. 976 * May find a dead node which will cause a premature return. 977 * Return: 1 on dead node, 0 otherwise 978 */ 979 static int mas_ascend(struct ma_state *mas) 980 { 981 struct maple_enode *p_enode; /* parent enode. */ 982 struct maple_enode *a_enode; /* ancestor enode. */ 983 struct maple_node *a_node; /* ancestor node. */ 984 struct maple_node *p_node; /* parent node. */ 985 unsigned char a_slot; 986 enum maple_type a_type; 987 unsigned long min, max; 988 unsigned long *pivots; 989 bool set_max = false, set_min = false; 990 991 a_node = mas_mn(mas); 992 if (ma_is_root(a_node)) { 993 mas->offset = 0; 994 return 0; 995 } 996 997 p_node = mte_parent(mas->node); 998 if (unlikely(a_node == p_node)) 999 return 1; 1000 1001 a_type = mas_parent_type(mas, mas->node); 1002 mas->offset = mte_parent_slot(mas->node); 1003 a_enode = mt_mk_node(p_node, a_type); 1004 1005 /* Check to make sure all parent information is still accurate */ 1006 if (p_node != mte_parent(mas->node)) 1007 return 1; 1008 1009 mas->node = a_enode; 1010 1011 if (mte_is_root(a_enode)) { 1012 mas->max = ULONG_MAX; 1013 mas->min = 0; 1014 return 0; 1015 } 1016 1017 min = 0; 1018 max = ULONG_MAX; 1019 1020 /* 1021 * !mas->offset implies that parent node min == mas->min. 1022 * mas->offset > 0 implies that we need to walk up to find the 1023 * implied pivot min. 1024 */ 1025 if (!mas->offset) { 1026 min = mas->min; 1027 set_min = true; 1028 } 1029 1030 if (mas->max == ULONG_MAX) 1031 set_max = true; 1032 1033 do { 1034 p_enode = a_enode; 1035 a_type = mas_parent_type(mas, p_enode); 1036 a_node = mte_parent(p_enode); 1037 a_slot = mte_parent_slot(p_enode); 1038 a_enode = mt_mk_node(a_node, a_type); 1039 pivots = ma_pivots(a_node, a_type); 1040 1041 if (unlikely(ma_dead_node(a_node))) 1042 return 1; 1043 1044 if (!set_min && a_slot) { 1045 set_min = true; 1046 min = pivots[a_slot - 1] + 1; 1047 } 1048 1049 if (!set_max && a_slot < mt_pivots[a_type]) { 1050 set_max = true; 1051 max = pivots[a_slot]; 1052 } 1053 1054 if (unlikely(ma_dead_node(a_node))) 1055 return 1; 1056 1057 if (unlikely(ma_is_root(a_node))) 1058 break; 1059 1060 } while (!set_min || !set_max); 1061 1062 mas->max = max; 1063 mas->min = min; 1064 return 0; 1065 } 1066 1067 /* 1068 * mas_pop_node() - Get a previously allocated maple node from the maple state. 1069 * @mas: The maple state 1070 * 1071 * Return: A pointer to a maple node. 1072 */ 1073 static __always_inline struct maple_node *mas_pop_node(struct ma_state *mas) 1074 { 1075 struct maple_node *ret; 1076 1077 if (mas->alloc) { 1078 ret = mas->alloc; 1079 mas->alloc = NULL; 1080 goto out; 1081 } 1082 1083 if (WARN_ON_ONCE(!mas->sheaf)) 1084 return NULL; 1085 1086 ret = kmem_cache_alloc_from_sheaf(maple_node_cache, GFP_NOWAIT, mas->sheaf); 1087 1088 out: 1089 memset(ret, 0, sizeof(*ret)); 1090 return ret; 1091 } 1092 1093 /* 1094 * mas_alloc_nodes() - Allocate nodes into a maple state 1095 * @mas: The maple state 1096 * @gfp: The GFP Flags 1097 */ 1098 static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) 1099 { 1100 if (!mas->node_request) 1101 return; 1102 1103 if (mas->node_request == 1) { 1104 if (mas->sheaf) 1105 goto use_sheaf; 1106 1107 if (mas->alloc) 1108 return; 1109 1110 mas->alloc = mt_alloc_one(gfp); 1111 if (!mas->alloc) 1112 goto error; 1113 1114 mas->node_request = 0; 1115 return; 1116 } 1117 1118 use_sheaf: 1119 if (unlikely(mas->alloc)) { 1120 kfree(mas->alloc); 1121 mas->alloc = NULL; 1122 } 1123 1124 if (mas->sheaf) { 1125 unsigned long refill; 1126 1127 refill = mas->node_request; 1128 if (kmem_cache_sheaf_size(mas->sheaf) >= refill) { 1129 mas->node_request = 0; 1130 return; 1131 } 1132 1133 if (mt_refill_sheaf(gfp, &mas->sheaf, refill)) 1134 goto error; 1135 1136 mas->node_request = 0; 1137 return; 1138 } 1139 1140 mas->sheaf = mt_get_sheaf(gfp, mas->node_request); 1141 if (likely(mas->sheaf)) { 1142 mas->node_request = 0; 1143 return; 1144 } 1145 1146 error: 1147 mas_set_err(mas, -ENOMEM); 1148 } 1149 1150 static inline void mas_empty_nodes(struct ma_state *mas) 1151 { 1152 mas->node_request = 0; 1153 if (mas->sheaf) { 1154 mt_return_sheaf(mas->sheaf); 1155 mas->sheaf = NULL; 1156 } 1157 1158 if (mas->alloc) { 1159 kfree(mas->alloc); 1160 mas->alloc = NULL; 1161 } 1162 } 1163 1164 /* 1165 * mas_free() - Free an encoded maple node 1166 * @mas: The maple state 1167 * @used: The encoded maple node to free. 1168 * 1169 * Uses rcu free if necessary, pushes @used back on the maple state allocations 1170 * otherwise. 1171 */ 1172 static inline void mas_free(struct ma_state *mas, struct maple_enode *used) 1173 { 1174 ma_free_rcu(mte_to_node(used)); 1175 } 1176 1177 /* 1178 * mas_start() - Sets up maple state for operations. 1179 * @mas: The maple state. 1180 * 1181 * If mas->status == ma_start, then set the min, max and depth to 1182 * defaults. 1183 * 1184 * Return: 1185 * - If mas->node is an error or not mas_start, return NULL. 1186 * - If it's an empty tree: NULL & mas->status == ma_none 1187 * - If it's a single entry: The entry & mas->status == ma_root 1188 * - If it's a tree: NULL & mas->status == ma_active 1189 */ 1190 static inline struct maple_enode *mas_start(struct ma_state *mas) 1191 { 1192 if (likely(mas_is_start(mas))) { 1193 struct maple_enode *root; 1194 1195 mas->min = 0; 1196 mas->max = ULONG_MAX; 1197 1198 retry: 1199 mas->depth = 0; 1200 root = mas_root(mas); 1201 /* Tree with nodes */ 1202 if (likely(xa_is_node(root))) { 1203 mas->depth = 0; 1204 mas->status = ma_active; 1205 mas->node = mte_safe_root(root); 1206 mas->offset = 0; 1207 if (mte_dead_node(mas->node)) 1208 goto retry; 1209 1210 return NULL; 1211 } 1212 1213 mas->node = NULL; 1214 /* empty tree */ 1215 if (unlikely(!root)) { 1216 mas->status = ma_none; 1217 mas->offset = MAPLE_NODE_SLOTS; 1218 return NULL; 1219 } 1220 1221 /* Single entry tree */ 1222 mas->status = ma_root; 1223 mas->offset = MAPLE_NODE_SLOTS; 1224 1225 /* Single entry tree. */ 1226 if (mas->index > 0) 1227 return NULL; 1228 1229 return root; 1230 } 1231 1232 return NULL; 1233 } 1234 1235 /* 1236 * ma_data_end() - Find the end of the data in a node. 1237 * @node: The maple node 1238 * @type: The maple node type 1239 * @pivots: The array of pivots in the node 1240 * @max: The maximum value in the node 1241 * 1242 * Uses metadata to find the end of the data when possible. 1243 * Return: The zero indexed last slot with data (may be null). 1244 */ 1245 static __always_inline unsigned char ma_data_end(struct maple_node *node, 1246 enum maple_type type, unsigned long *pivots, unsigned long max) 1247 { 1248 unsigned char offset; 1249 1250 if (!pivots) 1251 return 0; 1252 1253 if (type == maple_arange_64) 1254 return ma_meta_end(node, type); 1255 1256 offset = mt_pivots[type] - 1; 1257 if (likely(!pivots[offset])) 1258 return ma_meta_end(node, type); 1259 1260 if (likely(pivots[offset] == max)) 1261 return offset; 1262 1263 return mt_pivots[type]; 1264 } 1265 1266 /* 1267 * mas_data_end() - Find the end of the data (slot). 1268 * @mas: the maple state 1269 * 1270 * This method is optimized to check the metadata of a node if the node type 1271 * supports data end metadata. 1272 * 1273 * Return: The zero indexed last slot with data (may be null). 1274 */ 1275 static inline unsigned char mas_data_end(struct ma_state *mas) 1276 { 1277 enum maple_type type; 1278 struct maple_node *node; 1279 unsigned char offset; 1280 unsigned long *pivots; 1281 1282 type = mte_node_type(mas->node); 1283 node = mas_mn(mas); 1284 if (type == maple_arange_64) 1285 return ma_meta_end(node, type); 1286 1287 pivots = ma_pivots(node, type); 1288 if (unlikely(ma_dead_node(node))) 1289 return 0; 1290 1291 offset = mt_pivots[type] - 1; 1292 if (likely(!pivots[offset])) 1293 return ma_meta_end(node, type); 1294 1295 if (likely(pivots[offset] == mas->max)) 1296 return offset; 1297 1298 return mt_pivots[type]; 1299 } 1300 1301 /* 1302 * mas_leaf_max_gap() - Returns the largest gap in a leaf node 1303 * @mas: the maple state 1304 * 1305 * Return: The maximum gap in the leaf. 1306 */ 1307 static unsigned long mas_leaf_max_gap(struct ma_state *mas) 1308 { 1309 enum maple_type mt; 1310 unsigned long pstart, gap, max_gap; 1311 struct maple_node *mn; 1312 unsigned long *pivots; 1313 void __rcu **slots; 1314 unsigned char i; 1315 unsigned char max_piv; 1316 1317 mt = mte_node_type(mas->node); 1318 mn = mas_mn(mas); 1319 slots = ma_slots(mn, mt); 1320 max_gap = 0; 1321 if (unlikely(ma_is_dense(mt))) { 1322 gap = 0; 1323 for (i = 0; i < mt_slots[mt]; i++) { 1324 if (slots[i]) { 1325 if (gap > max_gap) 1326 max_gap = gap; 1327 gap = 0; 1328 } else { 1329 gap++; 1330 } 1331 } 1332 if (gap > max_gap) 1333 max_gap = gap; 1334 return max_gap; 1335 } 1336 1337 /* 1338 * Check the first implied pivot optimizes the loop below and slot 1 may 1339 * be skipped if there is a gap in slot 0. 1340 */ 1341 pivots = ma_pivots(mn, mt); 1342 if (likely(!slots[0])) { 1343 max_gap = pivots[0] - mas->min + 1; 1344 i = 2; 1345 } else { 1346 i = 1; 1347 } 1348 1349 /* reduce max_piv as the special case is checked before the loop */ 1350 max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1; 1351 /* 1352 * Check end implied pivot which can only be a gap on the right most 1353 * node. 1354 */ 1355 if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) { 1356 gap = ULONG_MAX - pivots[max_piv]; 1357 if (gap > max_gap) 1358 max_gap = gap; 1359 1360 if (max_gap > pivots[max_piv] - mas->min) 1361 return max_gap; 1362 } 1363 1364 for (; i <= max_piv; i++) { 1365 /* data == no gap. */ 1366 if (likely(slots[i])) 1367 continue; 1368 1369 pstart = pivots[i - 1]; 1370 gap = pivots[i] - pstart; 1371 if (gap > max_gap) 1372 max_gap = gap; 1373 1374 /* There cannot be two gaps in a row. */ 1375 i++; 1376 } 1377 return max_gap; 1378 } 1379 1380 /* 1381 * ma_max_gap() - Get the maximum gap in a maple node (non-leaf) 1382 * @node: The maple node 1383 * @gaps: The pointer to the gaps 1384 * @mt: The maple node type 1385 * @off: Pointer to store the offset location of the gap. 1386 * 1387 * Uses the metadata data end to scan backwards across set gaps. 1388 * 1389 * Return: The maximum gap value 1390 */ 1391 static inline unsigned long 1392 ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt, 1393 unsigned char *off) 1394 { 1395 unsigned char offset, i; 1396 unsigned long max_gap = 0; 1397 1398 i = offset = ma_meta_end(node, mt); 1399 do { 1400 if (gaps[i] > max_gap) { 1401 max_gap = gaps[i]; 1402 offset = i; 1403 } 1404 } while (i--); 1405 1406 *off = offset; 1407 return max_gap; 1408 } 1409 1410 /* 1411 * mas_max_gap() - find the largest gap in a non-leaf node and set the slot. 1412 * @mas: The maple state. 1413 * 1414 * Return: The gap value. 1415 */ 1416 static inline unsigned long mas_max_gap(struct ma_state *mas) 1417 { 1418 unsigned long *gaps; 1419 unsigned char offset; 1420 enum maple_type mt; 1421 struct maple_node *node; 1422 1423 mt = mte_node_type(mas->node); 1424 if (ma_is_leaf(mt)) 1425 return mas_leaf_max_gap(mas); 1426 1427 node = mas_mn(mas); 1428 MAS_BUG_ON(mas, mt != maple_arange_64); 1429 offset = ma_meta_gap(node); 1430 gaps = ma_gaps(node, mt); 1431 return gaps[offset]; 1432 } 1433 1434 /* 1435 * mas_parent_gap() - Set the parent gap and any gaps above, as needed 1436 * @mas: The maple state 1437 * @offset: The gap offset in the parent to set 1438 * @new: The new gap value. 1439 * 1440 * Set the parent gap then continue to set the gap upwards, using the metadata 1441 * of the parent to see if it is necessary to check the node above. 1442 */ 1443 static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset, 1444 unsigned long new) 1445 { 1446 unsigned long meta_gap = 0; 1447 struct maple_node *pnode; 1448 struct maple_enode *penode; 1449 unsigned long *pgaps; 1450 unsigned char meta_offset; 1451 enum maple_type pmt; 1452 1453 pnode = mte_parent(mas->node); 1454 pmt = mas_parent_type(mas, mas->node); 1455 penode = mt_mk_node(pnode, pmt); 1456 pgaps = ma_gaps(pnode, pmt); 1457 1458 ascend: 1459 MAS_BUG_ON(mas, pmt != maple_arange_64); 1460 meta_offset = ma_meta_gap(pnode); 1461 meta_gap = pgaps[meta_offset]; 1462 1463 pgaps[offset] = new; 1464 1465 if (meta_gap == new) 1466 return; 1467 1468 if (offset != meta_offset) { 1469 if (meta_gap > new) 1470 return; 1471 1472 ma_set_meta_gap(pnode, pmt, offset); 1473 } else if (new < meta_gap) { 1474 new = ma_max_gap(pnode, pgaps, pmt, &meta_offset); 1475 ma_set_meta_gap(pnode, pmt, meta_offset); 1476 } 1477 1478 if (ma_is_root(pnode)) 1479 return; 1480 1481 /* Go to the parent node. */ 1482 pnode = mte_parent(penode); 1483 pmt = mas_parent_type(mas, penode); 1484 pgaps = ma_gaps(pnode, pmt); 1485 offset = mte_parent_slot(penode); 1486 penode = mt_mk_node(pnode, pmt); 1487 goto ascend; 1488 } 1489 1490 /* 1491 * mas_update_gap() - Update a nodes gaps and propagate up if necessary. 1492 * @mas: the maple state. 1493 */ 1494 static inline void mas_update_gap(struct ma_state *mas) 1495 { 1496 unsigned char pslot; 1497 unsigned long p_gap; 1498 unsigned long max_gap; 1499 1500 if (!mt_is_alloc(mas->tree)) 1501 return; 1502 1503 if (mte_is_root(mas->node)) 1504 return; 1505 1506 max_gap = mas_max_gap(mas); 1507 1508 pslot = mte_parent_slot(mas->node); 1509 p_gap = ma_gaps(mte_parent(mas->node), 1510 mas_parent_type(mas, mas->node))[pslot]; 1511 1512 if (p_gap != max_gap) 1513 mas_parent_gap(mas, pslot, max_gap); 1514 } 1515 1516 /* 1517 * mas_adopt_children() - Set the parent pointer of all nodes in @parent to 1518 * @parent with the slot encoded. 1519 * @mas: the maple state (for the tree) 1520 * @parent: the maple encoded node containing the children. 1521 */ 1522 static inline void mas_adopt_children(struct ma_state *mas, 1523 struct maple_enode *parent) 1524 { 1525 enum maple_type type = mte_node_type(parent); 1526 struct maple_node *node = mte_to_node(parent); 1527 void __rcu **slots = ma_slots(node, type); 1528 unsigned long *pivots = ma_pivots(node, type); 1529 struct maple_enode *child; 1530 unsigned char offset; 1531 1532 offset = ma_data_end(node, type, pivots, mas->max); 1533 do { 1534 child = mas_slot_locked(mas, slots, offset); 1535 mas_set_parent(mas, child, parent, offset); 1536 } while (offset--); 1537 } 1538 1539 /* 1540 * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old 1541 * node as dead. 1542 * @mas: the maple state with the new node 1543 * @old_enode: The old maple encoded node to replace. 1544 * @new_height: if we are inserting a root node, update the height of the tree 1545 */ 1546 static inline void mas_put_in_tree(struct ma_state *mas, 1547 struct maple_enode *old_enode, char new_height) 1548 __must_hold(mas->tree->ma_lock) 1549 { 1550 unsigned char offset; 1551 void __rcu **slots; 1552 1553 if (mte_is_root(mas->node)) { 1554 mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas)); 1555 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 1556 mt_set_height(mas->tree, new_height); 1557 } else { 1558 1559 offset = mte_parent_slot(mas->node); 1560 slots = ma_slots(mte_parent(mas->node), 1561 mas_parent_type(mas, mas->node)); 1562 rcu_assign_pointer(slots[offset], mas->node); 1563 } 1564 1565 mte_set_node_dead(old_enode); 1566 } 1567 1568 /* 1569 * mas_replace_node() - Replace a node by putting it in the tree, marking it 1570 * dead, and freeing it. 1571 * the parent encoding to locate the maple node in the tree. 1572 * @mas: the ma_state with @mas->node pointing to the new node. 1573 * @old_enode: The old maple encoded node. 1574 * @new_height: The new height of the tree as a result of the operation 1575 */ 1576 static inline void mas_replace_node(struct ma_state *mas, 1577 struct maple_enode *old_enode, unsigned char new_height) 1578 __must_hold(mas->tree->ma_lock) 1579 { 1580 mas_put_in_tree(mas, old_enode, new_height); 1581 mas_free(mas, old_enode); 1582 } 1583 1584 /* 1585 * mas_find_child() - Find a child who has the parent @mas->node. 1586 * @mas: the maple state with the parent. 1587 * @child: the maple state to store the child. 1588 */ 1589 static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child) 1590 __must_hold(mas->tree->ma_lock) 1591 { 1592 enum maple_type mt; 1593 unsigned char offset; 1594 unsigned char end; 1595 unsigned long *pivots; 1596 struct maple_enode *entry; 1597 struct maple_node *node; 1598 void __rcu **slots; 1599 1600 mt = mte_node_type(mas->node); 1601 node = mas_mn(mas); 1602 slots = ma_slots(node, mt); 1603 pivots = ma_pivots(node, mt); 1604 end = ma_data_end(node, mt, pivots, mas->max); 1605 for (offset = mas->offset; offset <= end; offset++) { 1606 entry = mas_slot_locked(mas, slots, offset); 1607 if (mte_parent(entry) == node) { 1608 *child = *mas; 1609 mas->offset = offset + 1; 1610 child->offset = offset; 1611 mas_descend(child); 1612 child->offset = 0; 1613 return true; 1614 } 1615 } 1616 return false; 1617 } 1618 1619 /* 1620 * mab_shift_right() - Shift the data in mab right. Note, does not clean out the 1621 * old data or set b_node->b_end. 1622 * @b_node: the maple_big_node 1623 * @shift: the shift count 1624 */ 1625 static inline void mab_shift_right(struct maple_big_node *b_node, 1626 unsigned char shift) 1627 { 1628 unsigned long size = b_node->b_end * sizeof(unsigned long); 1629 1630 memmove(b_node->pivot + shift, b_node->pivot, size); 1631 memmove(b_node->slot + shift, b_node->slot, size); 1632 if (b_node->type == maple_arange_64) 1633 memmove(b_node->gap + shift, b_node->gap, size); 1634 } 1635 1636 /* 1637 * mab_middle_node() - Check if a middle node is needed (unlikely) 1638 * @b_node: the maple_big_node that contains the data. 1639 * @split: the potential split location 1640 * @slot_count: the size that can be stored in a single node being considered. 1641 * 1642 * Return: true if a middle node is required. 1643 */ 1644 static inline bool mab_middle_node(struct maple_big_node *b_node, int split, 1645 unsigned char slot_count) 1646 { 1647 unsigned char size = b_node->b_end; 1648 1649 if (size >= 2 * slot_count) 1650 return true; 1651 1652 if (!b_node->slot[split] && (size >= 2 * slot_count - 1)) 1653 return true; 1654 1655 return false; 1656 } 1657 1658 /* 1659 * mab_no_null_split() - ensure the split doesn't fall on a NULL 1660 * @b_node: the maple_big_node with the data 1661 * @split: the suggested split location 1662 * @slot_count: the number of slots in the node being considered. 1663 * 1664 * Return: the split location. 1665 */ 1666 static inline int mab_no_null_split(struct maple_big_node *b_node, 1667 unsigned char split, unsigned char slot_count) 1668 { 1669 if (!b_node->slot[split]) { 1670 /* 1671 * If the split is less than the max slot && the right side will 1672 * still be sufficient, then increment the split on NULL. 1673 */ 1674 if ((split < slot_count - 1) && 1675 (b_node->b_end - split) > (mt_min_slots[b_node->type])) 1676 split++; 1677 else 1678 split--; 1679 } 1680 return split; 1681 } 1682 1683 /* 1684 * mab_calc_split() - Calculate the split location and if there needs to be two 1685 * splits. 1686 * @mas: The maple state 1687 * @bn: The maple_big_node with the data 1688 * @mid_split: The second split, if required. 0 otherwise. 1689 * 1690 * Return: The first split location. The middle split is set in @mid_split. 1691 */ 1692 static inline int mab_calc_split(struct ma_state *mas, 1693 struct maple_big_node *bn, unsigned char *mid_split) 1694 { 1695 unsigned char b_end = bn->b_end; 1696 int split = b_end / 2; /* Assume equal split. */ 1697 unsigned char slot_count = mt_slots[bn->type]; 1698 1699 /* 1700 * To support gap tracking, all NULL entries are kept together and a node cannot 1701 * end on a NULL entry, with the exception of the left-most leaf. The 1702 * limitation means that the split of a node must be checked for this condition 1703 * and be able to put more data in one direction or the other. 1704 * 1705 * Although extremely rare, it is possible to enter what is known as the 3-way 1706 * split scenario. The 3-way split comes about by means of a store of a range 1707 * that overwrites the end and beginning of two full nodes. The result is a set 1708 * of entries that cannot be stored in 2 nodes. Sometimes, these two nodes can 1709 * also be located in different parent nodes which are also full. This can 1710 * carry upwards all the way to the root in the worst case. 1711 */ 1712 if (unlikely(mab_middle_node(bn, split, slot_count))) { 1713 split = b_end / 3; 1714 *mid_split = split * 2; 1715 } else { 1716 *mid_split = 0; 1717 } 1718 1719 /* Avoid ending a node on a NULL entry */ 1720 split = mab_no_null_split(bn, split, slot_count); 1721 1722 if (unlikely(*mid_split)) 1723 *mid_split = mab_no_null_split(bn, *mid_split, slot_count); 1724 1725 return split; 1726 } 1727 1728 /* 1729 * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node 1730 * and set @b_node->b_end to the next free slot. 1731 * @mas: The maple state 1732 * @mas_start: The starting slot to copy 1733 * @mas_end: The end slot to copy (inclusively) 1734 * @b_node: The maple_big_node to place the data 1735 * @mab_start: The starting location in maple_big_node to store the data. 1736 */ 1737 static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start, 1738 unsigned char mas_end, struct maple_big_node *b_node, 1739 unsigned char mab_start) 1740 { 1741 enum maple_type mt; 1742 struct maple_node *node; 1743 void __rcu **slots; 1744 unsigned long *pivots, *gaps; 1745 int i = mas_start, j = mab_start; 1746 unsigned char piv_end; 1747 1748 node = mas_mn(mas); 1749 mt = mte_node_type(mas->node); 1750 pivots = ma_pivots(node, mt); 1751 if (!i) { 1752 b_node->pivot[j] = pivots[i++]; 1753 if (unlikely(i > mas_end)) 1754 goto complete; 1755 j++; 1756 } 1757 1758 piv_end = min(mas_end, mt_pivots[mt]); 1759 for (; i < piv_end; i++, j++) { 1760 b_node->pivot[j] = pivots[i]; 1761 if (unlikely(!b_node->pivot[j])) 1762 goto complete; 1763 1764 if (unlikely(mas->max == b_node->pivot[j])) 1765 goto complete; 1766 } 1767 1768 b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt); 1769 1770 complete: 1771 b_node->b_end = ++j; 1772 j -= mab_start; 1773 slots = ma_slots(node, mt); 1774 memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j); 1775 if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) { 1776 gaps = ma_gaps(node, mt); 1777 memcpy(b_node->gap + mab_start, gaps + mas_start, 1778 sizeof(unsigned long) * j); 1779 } 1780 } 1781 1782 /* 1783 * mas_leaf_set_meta() - Set the metadata of a leaf if possible. 1784 * @node: The maple node 1785 * @mt: The maple type 1786 * @end: The node end 1787 */ 1788 static inline void mas_leaf_set_meta(struct maple_node *node, 1789 enum maple_type mt, unsigned char end) 1790 { 1791 if (end < mt_slots[mt] - 1) 1792 ma_set_meta(node, mt, 0, end); 1793 } 1794 1795 /* 1796 * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node. 1797 * @b_node: the maple_big_node that has the data 1798 * @mab_start: the start location in @b_node. 1799 * @mab_end: The end location in @b_node (inclusively) 1800 * @mas: The maple state with the maple encoded node. 1801 */ 1802 static inline void mab_mas_cp(struct maple_big_node *b_node, 1803 unsigned char mab_start, unsigned char mab_end, 1804 struct ma_state *mas, bool new_max) 1805 { 1806 int i, j = 0; 1807 enum maple_type mt = mte_node_type(mas->node); 1808 struct maple_node *node = mte_to_node(mas->node); 1809 void __rcu **slots = ma_slots(node, mt); 1810 unsigned long *pivots = ma_pivots(node, mt); 1811 unsigned long *gaps = NULL; 1812 unsigned char end; 1813 1814 if (mab_end - mab_start > mt_pivots[mt]) 1815 mab_end--; 1816 1817 if (!pivots[mt_pivots[mt] - 1]) 1818 slots[mt_pivots[mt]] = NULL; 1819 1820 i = mab_start; 1821 do { 1822 pivots[j++] = b_node->pivot[i++]; 1823 } while (i <= mab_end && likely(b_node->pivot[i])); 1824 1825 memcpy(slots, b_node->slot + mab_start, 1826 sizeof(void *) * (i - mab_start)); 1827 1828 if (new_max) 1829 mas->max = b_node->pivot[i - 1]; 1830 1831 end = j - 1; 1832 if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) { 1833 unsigned long max_gap = 0; 1834 unsigned char offset = 0; 1835 1836 gaps = ma_gaps(node, mt); 1837 do { 1838 gaps[--j] = b_node->gap[--i]; 1839 if (gaps[j] > max_gap) { 1840 offset = j; 1841 max_gap = gaps[j]; 1842 } 1843 } while (j); 1844 1845 ma_set_meta(node, mt, offset, end); 1846 } else { 1847 mas_leaf_set_meta(node, mt, end); 1848 } 1849 } 1850 1851 /* 1852 * mas_store_b_node() - Store an @entry into the b_node while also copying the 1853 * data from a maple encoded node. 1854 * @wr_mas: the maple write state 1855 * @b_node: the maple_big_node to fill with data 1856 * @offset_end: the offset to end copying 1857 * 1858 * Return: The actual end of the data stored in @b_node 1859 */ 1860 static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas, 1861 struct maple_big_node *b_node, unsigned char offset_end) 1862 { 1863 unsigned char slot; 1864 unsigned char b_end; 1865 /* Possible underflow of piv will wrap back to 0 before use. */ 1866 unsigned long piv; 1867 struct ma_state *mas = wr_mas->mas; 1868 1869 b_node->type = wr_mas->type; 1870 b_end = 0; 1871 slot = mas->offset; 1872 if (slot) { 1873 /* Copy start data up to insert. */ 1874 mas_mab_cp(mas, 0, slot - 1, b_node, 0); 1875 b_end = b_node->b_end; 1876 piv = b_node->pivot[b_end - 1]; 1877 } else 1878 piv = mas->min - 1; 1879 1880 if (piv + 1 < mas->index) { 1881 /* Handle range starting after old range */ 1882 b_node->slot[b_end] = wr_mas->content; 1883 if (!wr_mas->content) 1884 b_node->gap[b_end] = mas->index - 1 - piv; 1885 b_node->pivot[b_end++] = mas->index - 1; 1886 } 1887 1888 /* Store the new entry. */ 1889 mas->offset = b_end; 1890 b_node->slot[b_end] = wr_mas->entry; 1891 b_node->pivot[b_end] = mas->last; 1892 1893 /* Appended. */ 1894 if (mas->last >= mas->max) 1895 goto b_end; 1896 1897 /* Handle new range ending before old range ends */ 1898 piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type); 1899 if (piv > mas->last) { 1900 if (offset_end != slot) 1901 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 1902 offset_end); 1903 1904 b_node->slot[++b_end] = wr_mas->content; 1905 if (!wr_mas->content) 1906 b_node->gap[b_end] = piv - mas->last + 1; 1907 b_node->pivot[b_end] = piv; 1908 } 1909 1910 slot = offset_end + 1; 1911 if (slot > mas->end) 1912 goto b_end; 1913 1914 /* Copy end data to the end of the node. */ 1915 mas_mab_cp(mas, slot, mas->end + 1, b_node, ++b_end); 1916 b_node->b_end--; 1917 return; 1918 1919 b_end: 1920 b_node->b_end = b_end; 1921 } 1922 1923 /* 1924 * mas_prev_sibling() - Find the previous node with the same parent. 1925 * @mas: the maple state 1926 * 1927 * Return: True if there is a previous sibling, false otherwise. 1928 */ 1929 static inline bool mas_prev_sibling(struct ma_state *mas) 1930 { 1931 unsigned int p_slot = mte_parent_slot(mas->node); 1932 1933 /* For root node, p_slot is set to 0 by mte_parent_slot(). */ 1934 if (!p_slot) 1935 return false; 1936 1937 mas_ascend(mas); 1938 mas->offset = p_slot - 1; 1939 mas_descend(mas); 1940 return true; 1941 } 1942 1943 /* 1944 * mas_next_sibling() - Find the next node with the same parent. 1945 * @mas: the maple state 1946 * 1947 * Return: true if there is a next sibling, false otherwise. 1948 */ 1949 static inline bool mas_next_sibling(struct ma_state *mas) 1950 { 1951 MA_STATE(parent, mas->tree, mas->index, mas->last); 1952 1953 if (mte_is_root(mas->node)) 1954 return false; 1955 1956 parent = *mas; 1957 mas_ascend(&parent); 1958 parent.offset = mte_parent_slot(mas->node) + 1; 1959 if (parent.offset > mas_data_end(&parent)) 1960 return false; 1961 1962 *mas = parent; 1963 mas_descend(mas); 1964 return true; 1965 } 1966 1967 /* 1968 * mas_node_or_none() - Set the enode and state. 1969 * @mas: the maple state 1970 * @enode: The encoded maple node. 1971 * 1972 * Set the node to the enode and the status. 1973 */ 1974 static inline void mas_node_or_none(struct ma_state *mas, 1975 struct maple_enode *enode) 1976 { 1977 if (enode) { 1978 mas->node = enode; 1979 mas->status = ma_active; 1980 } else { 1981 mas->node = NULL; 1982 mas->status = ma_none; 1983 } 1984 } 1985 1986 /* 1987 * mas_wr_node_walk() - Find the correct offset for the index in the @mas. 1988 * If @mas->index cannot be found within the containing 1989 * node, we traverse to the last entry in the node. 1990 * @wr_mas: The maple write state 1991 * 1992 * Uses mas_slot_locked() and does not need to worry about dead nodes. 1993 */ 1994 static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) 1995 { 1996 struct ma_state *mas = wr_mas->mas; 1997 unsigned char count, offset; 1998 1999 if (unlikely(ma_is_dense(wr_mas->type))) { 2000 wr_mas->r_max = wr_mas->r_min = mas->index; 2001 mas->offset = mas->index = mas->min; 2002 return; 2003 } 2004 2005 wr_mas->node = mas_mn(wr_mas->mas); 2006 wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type); 2007 count = mas->end = ma_data_end(wr_mas->node, wr_mas->type, 2008 wr_mas->pivots, mas->max); 2009 offset = mas->offset; 2010 2011 while (offset < count && mas->index > wr_mas->pivots[offset]) 2012 offset++; 2013 2014 wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max; 2015 wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset); 2016 wr_mas->offset_end = mas->offset = offset; 2017 } 2018 2019 /* 2020 * mast_rebalance_next() - Rebalance against the next node 2021 * @mast: The maple subtree state 2022 */ 2023 static inline void mast_rebalance_next(struct maple_subtree_state *mast) 2024 { 2025 unsigned char b_end = mast->bn->b_end; 2026 2027 mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node), 2028 mast->bn, b_end); 2029 mast->orig_r->last = mast->orig_r->max; 2030 } 2031 2032 /* 2033 * mast_rebalance_prev() - Rebalance against the previous node 2034 * @mast: The maple subtree state 2035 */ 2036 static inline void mast_rebalance_prev(struct maple_subtree_state *mast) 2037 { 2038 unsigned char end = mas_data_end(mast->orig_l) + 1; 2039 unsigned char b_end = mast->bn->b_end; 2040 2041 mab_shift_right(mast->bn, end); 2042 mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0); 2043 mast->l->min = mast->orig_l->min; 2044 mast->orig_l->index = mast->orig_l->min; 2045 mast->bn->b_end = end + b_end; 2046 mast->l->offset += end; 2047 } 2048 2049 /* 2050 * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring 2051 * the node to the right. Checking the nodes to the right then the left at each 2052 * level upwards until root is reached. 2053 * Data is copied into the @mast->bn. 2054 * @mast: The maple_subtree_state. 2055 */ 2056 static inline 2057 bool mast_spanning_rebalance(struct maple_subtree_state *mast) 2058 { 2059 struct ma_state r_tmp = *mast->orig_r; 2060 struct ma_state l_tmp = *mast->orig_l; 2061 unsigned char depth = 0; 2062 2063 do { 2064 mas_ascend(mast->orig_r); 2065 mas_ascend(mast->orig_l); 2066 depth++; 2067 if (mast->orig_r->offset < mas_data_end(mast->orig_r)) { 2068 mast->orig_r->offset++; 2069 do { 2070 mas_descend(mast->orig_r); 2071 mast->orig_r->offset = 0; 2072 } while (--depth); 2073 2074 mast_rebalance_next(mast); 2075 *mast->orig_l = l_tmp; 2076 return true; 2077 } else if (mast->orig_l->offset != 0) { 2078 mast->orig_l->offset--; 2079 do { 2080 mas_descend(mast->orig_l); 2081 mast->orig_l->offset = 2082 mas_data_end(mast->orig_l); 2083 } while (--depth); 2084 2085 mast_rebalance_prev(mast); 2086 *mast->orig_r = r_tmp; 2087 return true; 2088 } 2089 } while (!mte_is_root(mast->orig_r->node)); 2090 2091 *mast->orig_r = r_tmp; 2092 *mast->orig_l = l_tmp; 2093 return false; 2094 } 2095 2096 /* 2097 * mast_ascend() - Ascend the original left and right maple states. 2098 * @mast: the maple subtree state. 2099 * 2100 * Ascend the original left and right sides. Set the offsets to point to the 2101 * data already in the new tree (@mast->l and @mast->r). 2102 */ 2103 static inline void mast_ascend(struct maple_subtree_state *mast) 2104 { 2105 MA_WR_STATE(wr_mas, mast->orig_r, NULL); 2106 mas_ascend(mast->orig_l); 2107 mas_ascend(mast->orig_r); 2108 2109 mast->orig_r->offset = 0; 2110 mast->orig_r->index = mast->r->max; 2111 /* last should be larger than or equal to index */ 2112 if (mast->orig_r->last < mast->orig_r->index) 2113 mast->orig_r->last = mast->orig_r->index; 2114 2115 wr_mas.type = mte_node_type(mast->orig_r->node); 2116 mas_wr_node_walk(&wr_mas); 2117 /* Set up the left side of things */ 2118 mast->orig_l->offset = 0; 2119 mast->orig_l->index = mast->l->min; 2120 wr_mas.mas = mast->orig_l; 2121 wr_mas.type = mte_node_type(mast->orig_l->node); 2122 mas_wr_node_walk(&wr_mas); 2123 2124 mast->bn->type = wr_mas.type; 2125 } 2126 2127 /* 2128 * mas_new_ma_node() - Create and return a new maple node. Helper function. 2129 * @mas: the maple state with the allocations. 2130 * @b_node: the maple_big_node with the type encoding. 2131 * 2132 * Use the node type from the maple_big_node to allocate a new node from the 2133 * ma_state. This function exists mainly for code readability. 2134 * 2135 * Return: A new maple encoded node 2136 */ 2137 static inline struct maple_enode 2138 *mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node) 2139 { 2140 return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type); 2141 } 2142 2143 /* 2144 * mas_mab_to_node() - Set up right and middle nodes 2145 * 2146 * @mas: the maple state that contains the allocations. 2147 * @b_node: the node which contains the data. 2148 * @left: The pointer which will have the left node 2149 * @right: The pointer which may have the right node 2150 * @middle: the pointer which may have the middle node (rare) 2151 * @mid_split: the split location for the middle node 2152 * 2153 * Return: the split of left. 2154 */ 2155 static inline unsigned char mas_mab_to_node(struct ma_state *mas, 2156 struct maple_big_node *b_node, struct maple_enode **left, 2157 struct maple_enode **right, struct maple_enode **middle, 2158 unsigned char *mid_split) 2159 { 2160 unsigned char split = 0; 2161 unsigned char slot_count = mt_slots[b_node->type]; 2162 2163 *left = mas_new_ma_node(mas, b_node); 2164 *right = NULL; 2165 *middle = NULL; 2166 *mid_split = 0; 2167 2168 if (b_node->b_end < slot_count) { 2169 split = b_node->b_end; 2170 } else { 2171 split = mab_calc_split(mas, b_node, mid_split); 2172 *right = mas_new_ma_node(mas, b_node); 2173 } 2174 2175 if (*mid_split) 2176 *middle = mas_new_ma_node(mas, b_node); 2177 2178 return split; 2179 2180 } 2181 2182 /* 2183 * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end 2184 * pointer. 2185 * @b_node: the big node to add the entry 2186 * @mas: the maple state to get the pivot (mas->max) 2187 * @entry: the entry to add, if NULL nothing happens. 2188 */ 2189 static inline void mab_set_b_end(struct maple_big_node *b_node, 2190 struct ma_state *mas, 2191 void *entry) 2192 { 2193 if (!entry) 2194 return; 2195 2196 b_node->slot[b_node->b_end] = entry; 2197 if (mt_is_alloc(mas->tree)) 2198 b_node->gap[b_node->b_end] = mas_max_gap(mas); 2199 b_node->pivot[b_node->b_end++] = mas->max; 2200 } 2201 2202 /* 2203 * mas_set_split_parent() - combine_then_separate helper function. Sets the parent 2204 * of @mas->node to either @left or @right, depending on @slot and @split 2205 * 2206 * @mas: the maple state with the node that needs a parent 2207 * @left: possible parent 1 2208 * @right: possible parent 2 2209 * @slot: the slot the mas->node was placed 2210 * @split: the split location between @left and @right 2211 */ 2212 static inline void mas_set_split_parent(struct ma_state *mas, 2213 struct maple_enode *left, 2214 struct maple_enode *right, 2215 unsigned char *slot, unsigned char split) 2216 { 2217 if (mas_is_none(mas)) 2218 return; 2219 2220 if ((*slot) <= split) 2221 mas_set_parent(mas, mas->node, left, *slot); 2222 else if (right) 2223 mas_set_parent(mas, mas->node, right, (*slot) - split - 1); 2224 2225 (*slot)++; 2226 } 2227 2228 /* 2229 * mte_mid_split_check() - Check if the next node passes the mid-split 2230 * @l: Pointer to left encoded maple node. 2231 * @m: Pointer to middle encoded maple node. 2232 * @r: Pointer to right encoded maple node. 2233 * @slot: The offset 2234 * @split: The split location. 2235 * @mid_split: The middle split. 2236 */ 2237 static inline void mte_mid_split_check(struct maple_enode **l, 2238 struct maple_enode **r, 2239 struct maple_enode *right, 2240 unsigned char slot, 2241 unsigned char *split, 2242 unsigned char mid_split) 2243 { 2244 if (*r == right) 2245 return; 2246 2247 if (slot < mid_split) 2248 return; 2249 2250 *l = *r; 2251 *r = right; 2252 *split = mid_split; 2253 } 2254 2255 /* 2256 * mast_set_split_parents() - Helper function to set three nodes parents. Slot 2257 * is taken from @mast->l. 2258 * @mast: the maple subtree state 2259 * @left: the left node 2260 * @right: the right node 2261 * @split: the split location. 2262 */ 2263 static inline void mast_set_split_parents(struct maple_subtree_state *mast, 2264 struct maple_enode *left, 2265 struct maple_enode *middle, 2266 struct maple_enode *right, 2267 unsigned char split, 2268 unsigned char mid_split) 2269 { 2270 unsigned char slot; 2271 struct maple_enode *l = left; 2272 struct maple_enode *r = right; 2273 2274 if (mas_is_none(mast->l)) 2275 return; 2276 2277 if (middle) 2278 r = middle; 2279 2280 slot = mast->l->offset; 2281 2282 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2283 mas_set_split_parent(mast->l, l, r, &slot, split); 2284 2285 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2286 mas_set_split_parent(mast->m, l, r, &slot, split); 2287 2288 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2289 mas_set_split_parent(mast->r, l, r, &slot, split); 2290 } 2291 2292 /* 2293 * mas_topiary_node() - Dispose of a single node 2294 * @mas: The maple state for pushing nodes 2295 * @in_rcu: If the tree is in rcu mode 2296 * 2297 * The node will either be RCU freed or pushed back on the maple state. 2298 */ 2299 static inline void mas_topiary_node(struct ma_state *mas, 2300 struct ma_state *tmp_mas, bool in_rcu) 2301 { 2302 struct maple_node *tmp; 2303 struct maple_enode *enode; 2304 2305 if (mas_is_none(tmp_mas)) 2306 return; 2307 2308 enode = tmp_mas->node; 2309 tmp = mte_to_node(enode); 2310 mte_set_node_dead(enode); 2311 ma_free_rcu(tmp); 2312 } 2313 2314 /* 2315 * mas_topiary_replace() - Replace the data with new data, then repair the 2316 * parent links within the new tree. Iterate over the dead sub-tree and collect 2317 * the dead subtrees and topiary the nodes that are no longer of use. 2318 * 2319 * The new tree will have up to three children with the correct parent. Keep 2320 * track of the new entries as they need to be followed to find the next level 2321 * of new entries. 2322 * 2323 * The old tree will have up to three children with the old parent. Keep track 2324 * of the old entries as they may have more nodes below replaced. Nodes within 2325 * [index, last] are dead subtrees, others need to be freed and followed. 2326 * 2327 * @mas: The maple state pointing at the new data 2328 * @old_enode: The maple encoded node being replaced 2329 * @new_height: The new height of the tree as a result of the operation 2330 * 2331 */ 2332 static inline void mas_topiary_replace(struct ma_state *mas, 2333 struct maple_enode *old_enode, unsigned char new_height) 2334 { 2335 struct ma_state tmp[3], tmp_next[3]; 2336 MA_TOPIARY(subtrees, mas->tree); 2337 bool in_rcu; 2338 int i, n; 2339 2340 /* Place data in tree & then mark node as old */ 2341 mas_put_in_tree(mas, old_enode, new_height); 2342 2343 /* Update the parent pointers in the tree */ 2344 tmp[0] = *mas; 2345 tmp[0].offset = 0; 2346 tmp[1].status = ma_none; 2347 tmp[2].status = ma_none; 2348 while (!mte_is_leaf(tmp[0].node)) { 2349 n = 0; 2350 for (i = 0; i < 3; i++) { 2351 if (mas_is_none(&tmp[i])) 2352 continue; 2353 2354 while (n < 3) { 2355 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2356 break; 2357 n++; 2358 } 2359 2360 mas_adopt_children(&tmp[i], tmp[i].node); 2361 } 2362 2363 if (MAS_WARN_ON(mas, n == 0)) 2364 break; 2365 2366 while (n < 3) 2367 tmp_next[n++].status = ma_none; 2368 2369 for (i = 0; i < 3; i++) 2370 tmp[i] = tmp_next[i]; 2371 } 2372 2373 /* Collect the old nodes that need to be discarded */ 2374 if (mte_is_leaf(old_enode)) 2375 return mas_free(mas, old_enode); 2376 2377 tmp[0] = *mas; 2378 tmp[0].offset = 0; 2379 tmp[0].node = old_enode; 2380 tmp[1].status = ma_none; 2381 tmp[2].status = ma_none; 2382 in_rcu = mt_in_rcu(mas->tree); 2383 do { 2384 n = 0; 2385 for (i = 0; i < 3; i++) { 2386 if (mas_is_none(&tmp[i])) 2387 continue; 2388 2389 while (n < 3) { 2390 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2391 break; 2392 2393 if ((tmp_next[n].min >= tmp_next->index) && 2394 (tmp_next[n].max <= tmp_next->last)) { 2395 mat_add(&subtrees, tmp_next[n].node); 2396 tmp_next[n].status = ma_none; 2397 } else { 2398 n++; 2399 } 2400 } 2401 } 2402 2403 if (MAS_WARN_ON(mas, n == 0)) 2404 break; 2405 2406 while (n < 3) 2407 tmp_next[n++].status = ma_none; 2408 2409 for (i = 0; i < 3; i++) { 2410 mas_topiary_node(mas, &tmp[i], in_rcu); 2411 tmp[i] = tmp_next[i]; 2412 } 2413 } while (!mte_is_leaf(tmp[0].node)); 2414 2415 for (i = 0; i < 3; i++) 2416 mas_topiary_node(mas, &tmp[i], in_rcu); 2417 2418 mas_mat_destroy(mas, &subtrees); 2419 } 2420 2421 /* 2422 * mas_wmb_replace() - Write memory barrier and replace 2423 * @mas: The maple state 2424 * @old_enode: The old maple encoded node that is being replaced. 2425 * @new_height: The new height of the tree as a result of the operation 2426 * 2427 * Updates gap as necessary. 2428 */ 2429 static inline void mas_wmb_replace(struct ma_state *mas, 2430 struct maple_enode *old_enode, unsigned char new_height) 2431 { 2432 /* Insert the new data in the tree */ 2433 mas_topiary_replace(mas, old_enode, new_height); 2434 2435 if (mte_is_leaf(mas->node)) 2436 return; 2437 2438 mas_update_gap(mas); 2439 } 2440 2441 /* 2442 * mast_cp_to_nodes() - Copy data out to nodes. 2443 * @mast: The maple subtree state 2444 * @left: The left encoded maple node 2445 * @middle: The middle encoded maple node 2446 * @right: The right encoded maple node 2447 * @split: The location to split between left and (middle ? middle : right) 2448 * @mid_split: The location to split between middle and right. 2449 */ 2450 static inline void mast_cp_to_nodes(struct maple_subtree_state *mast, 2451 struct maple_enode *left, struct maple_enode *middle, 2452 struct maple_enode *right, unsigned char split, unsigned char mid_split) 2453 { 2454 bool new_lmax = true; 2455 2456 mas_node_or_none(mast->l, left); 2457 mas_node_or_none(mast->m, middle); 2458 mas_node_or_none(mast->r, right); 2459 2460 mast->l->min = mast->orig_l->min; 2461 if (split == mast->bn->b_end) { 2462 mast->l->max = mast->orig_r->max; 2463 new_lmax = false; 2464 } 2465 2466 mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax); 2467 2468 if (middle) { 2469 mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true); 2470 mast->m->min = mast->bn->pivot[split] + 1; 2471 split = mid_split; 2472 } 2473 2474 mast->r->max = mast->orig_r->max; 2475 if (right) { 2476 mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false); 2477 mast->r->min = mast->bn->pivot[split] + 1; 2478 } 2479 } 2480 2481 /* 2482 * mast_combine_cp_left - Copy in the original left side of the tree into the 2483 * combined data set in the maple subtree state big node. 2484 * @mast: The maple subtree state 2485 */ 2486 static inline void mast_combine_cp_left(struct maple_subtree_state *mast) 2487 { 2488 unsigned char l_slot = mast->orig_l->offset; 2489 2490 if (!l_slot) 2491 return; 2492 2493 mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0); 2494 } 2495 2496 /* 2497 * mast_combine_cp_right: Copy in the original right side of the tree into the 2498 * combined data set in the maple subtree state big node. 2499 * @mast: The maple subtree state 2500 */ 2501 static inline void mast_combine_cp_right(struct maple_subtree_state *mast) 2502 { 2503 if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max) 2504 return; 2505 2506 mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1, 2507 mt_slot_count(mast->orig_r->node), mast->bn, 2508 mast->bn->b_end); 2509 mast->orig_r->last = mast->orig_r->max; 2510 } 2511 2512 /* 2513 * mast_sufficient: Check if the maple subtree state has enough data in the big 2514 * node to create at least one sufficient node 2515 * @mast: the maple subtree state 2516 */ 2517 static inline bool mast_sufficient(struct maple_subtree_state *mast) 2518 { 2519 if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node)) 2520 return true; 2521 2522 return false; 2523 } 2524 2525 /* 2526 * mast_overflow: Check if there is too much data in the subtree state for a 2527 * single node. 2528 * @mast: The maple subtree state 2529 */ 2530 static inline bool mast_overflow(struct maple_subtree_state *mast) 2531 { 2532 if (mast->bn->b_end > mt_slot_count(mast->orig_l->node)) 2533 return true; 2534 2535 return false; 2536 } 2537 2538 static inline void *mtree_range_walk(struct ma_state *mas) 2539 { 2540 unsigned long *pivots; 2541 unsigned char offset; 2542 struct maple_node *node; 2543 struct maple_enode *next, *last; 2544 enum maple_type type; 2545 void __rcu **slots; 2546 unsigned char end; 2547 unsigned long max, min; 2548 unsigned long prev_max, prev_min; 2549 2550 next = mas->node; 2551 min = mas->min; 2552 max = mas->max; 2553 do { 2554 last = next; 2555 node = mte_to_node(next); 2556 type = mte_node_type(next); 2557 pivots = ma_pivots(node, type); 2558 end = ma_data_end(node, type, pivots, max); 2559 prev_min = min; 2560 prev_max = max; 2561 if (pivots[0] >= mas->index) { 2562 offset = 0; 2563 max = pivots[0]; 2564 goto next; 2565 } 2566 2567 offset = 1; 2568 while (offset < end) { 2569 if (pivots[offset] >= mas->index) { 2570 max = pivots[offset]; 2571 break; 2572 } 2573 offset++; 2574 } 2575 2576 min = pivots[offset - 1] + 1; 2577 next: 2578 slots = ma_slots(node, type); 2579 next = mt_slot(mas->tree, slots, offset); 2580 if (unlikely(ma_dead_node(node))) 2581 goto dead_node; 2582 } while (!ma_is_leaf(type)); 2583 2584 mas->end = end; 2585 mas->offset = offset; 2586 mas->index = min; 2587 mas->last = max; 2588 mas->min = prev_min; 2589 mas->max = prev_max; 2590 mas->node = last; 2591 return (void *)next; 2592 2593 dead_node: 2594 mas_reset(mas); 2595 return NULL; 2596 } 2597 2598 /* 2599 * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers. 2600 * @mas: The starting maple state 2601 * @mast: The maple_subtree_state, keeps track of 4 maple states. 2602 * @count: The estimated count of iterations needed. 2603 * 2604 * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root 2605 * is hit. First @b_node is split into two entries which are inserted into the 2606 * next iteration of the loop. @b_node is returned populated with the final 2607 * iteration. @mas is used to obtain allocations. orig_l_mas keeps track of the 2608 * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last 2609 * to account of what has been copied into the new sub-tree. The update of 2610 * orig_l_mas->last is used in mas_consume to find the slots that will need to 2611 * be either freed or destroyed. orig_l_mas->depth keeps track of the height of 2612 * the new sub-tree in case the sub-tree becomes the full tree. 2613 */ 2614 static void mas_spanning_rebalance(struct ma_state *mas, 2615 struct maple_subtree_state *mast, unsigned char count) 2616 { 2617 unsigned char split, mid_split; 2618 unsigned char slot = 0; 2619 unsigned char new_height = 0; /* used if node is a new root */ 2620 struct maple_enode *left = NULL, *middle = NULL, *right = NULL; 2621 struct maple_enode *old_enode; 2622 2623 MA_STATE(l_mas, mas->tree, mas->index, mas->index); 2624 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2625 MA_STATE(m_mas, mas->tree, mas->index, mas->index); 2626 2627 /* 2628 * The tree needs to be rebalanced and leaves need to be kept at the same level. 2629 * Rebalancing is done by use of the ``struct maple_topiary``. 2630 */ 2631 mast->l = &l_mas; 2632 mast->m = &m_mas; 2633 mast->r = &r_mas; 2634 l_mas.status = r_mas.status = m_mas.status = ma_none; 2635 2636 /* Check if this is not root and has sufficient data. */ 2637 if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) && 2638 unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type])) 2639 mast_spanning_rebalance(mast); 2640 2641 /* 2642 * Each level of the tree is examined and balanced, pushing data to the left or 2643 * right, or rebalancing against left or right nodes is employed to avoid 2644 * rippling up the tree to limit the amount of churn. Once a new sub-section of 2645 * the tree is created, there may be a mix of new and old nodes. The old nodes 2646 * will have the incorrect parent pointers and currently be in two trees: the 2647 * original tree and the partially new tree. To remedy the parent pointers in 2648 * the old tree, the new data is swapped into the active tree and a walk down 2649 * the tree is performed and the parent pointers are updated. 2650 * See mas_topiary_replace() for more information. 2651 */ 2652 while (count--) { 2653 mast->bn->b_end--; 2654 mast->bn->type = mte_node_type(mast->orig_l->node); 2655 split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle, 2656 &mid_split); 2657 mast_set_split_parents(mast, left, middle, right, split, 2658 mid_split); 2659 mast_cp_to_nodes(mast, left, middle, right, split, mid_split); 2660 new_height++; 2661 2662 /* 2663 * Copy data from next level in the tree to mast->bn from next 2664 * iteration 2665 */ 2666 memset(mast->bn, 0, sizeof(struct maple_big_node)); 2667 mast->bn->type = mte_node_type(left); 2668 2669 /* Root already stored in l->node. */ 2670 if (mas_is_root_limits(mast->l)) 2671 goto new_root; 2672 2673 mast_ascend(mast); 2674 mast_combine_cp_left(mast); 2675 l_mas.offset = mast->bn->b_end; 2676 mab_set_b_end(mast->bn, &l_mas, left); 2677 mab_set_b_end(mast->bn, &m_mas, middle); 2678 mab_set_b_end(mast->bn, &r_mas, right); 2679 2680 /* Copy anything necessary out of the right node. */ 2681 mast_combine_cp_right(mast); 2682 mast->orig_l->last = mast->orig_l->max; 2683 2684 if (mast_sufficient(mast)) { 2685 if (mast_overflow(mast)) 2686 continue; 2687 2688 if (mast->orig_l->node == mast->orig_r->node) { 2689 /* 2690 * The data in b_node should be stored in one 2691 * node and in the tree 2692 */ 2693 slot = mast->l->offset; 2694 break; 2695 } 2696 2697 continue; 2698 } 2699 2700 /* May be a new root stored in mast->bn */ 2701 if (mas_is_root_limits(mast->orig_l)) 2702 break; 2703 2704 mast_spanning_rebalance(mast); 2705 2706 /* rebalancing from other nodes may require another loop. */ 2707 if (!count) 2708 count++; 2709 } 2710 2711 l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), 2712 mte_node_type(mast->orig_l->node)); 2713 2714 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true); 2715 new_height++; 2716 mas_set_parent(mas, left, l_mas.node, slot); 2717 if (middle) 2718 mas_set_parent(mas, middle, l_mas.node, ++slot); 2719 2720 if (right) 2721 mas_set_parent(mas, right, l_mas.node, ++slot); 2722 2723 if (mas_is_root_limits(mast->l)) { 2724 new_root: 2725 mas_mn(mast->l)->parent = ma_parent_ptr(mas_tree_parent(mas)); 2726 while (!mte_is_root(mast->orig_l->node)) 2727 mast_ascend(mast); 2728 } else { 2729 mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent; 2730 } 2731 2732 old_enode = mast->orig_l->node; 2733 mas->depth = l_mas.depth; 2734 mas->node = l_mas.node; 2735 mas->min = l_mas.min; 2736 mas->max = l_mas.max; 2737 mas->offset = l_mas.offset; 2738 mas_wmb_replace(mas, old_enode, new_height); 2739 mtree_range_walk(mas); 2740 return; 2741 } 2742 2743 /* 2744 * mas_rebalance() - Rebalance a given node. 2745 * @mas: The maple state 2746 * @b_node: The big maple node. 2747 * 2748 * Rebalance two nodes into a single node or two new nodes that are sufficient. 2749 * Continue upwards until tree is sufficient. 2750 */ 2751 static inline void mas_rebalance(struct ma_state *mas, 2752 struct maple_big_node *b_node) 2753 { 2754 char empty_count = mas_mt_height(mas); 2755 struct maple_subtree_state mast; 2756 unsigned char shift, b_end = ++b_node->b_end; 2757 2758 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 2759 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2760 2761 trace_ma_op(TP_FCT, mas); 2762 2763 /* 2764 * Rebalancing occurs if a node is insufficient. Data is rebalanced 2765 * against the node to the right if it exists, otherwise the node to the 2766 * left of this node is rebalanced against this node. If rebalancing 2767 * causes just one node to be produced instead of two, then the parent 2768 * is also examined and rebalanced if it is insufficient. Every level 2769 * tries to combine the data in the same way. If one node contains the 2770 * entire range of the tree, then that node is used as a new root node. 2771 */ 2772 2773 mast.orig_l = &l_mas; 2774 mast.orig_r = &r_mas; 2775 mast.bn = b_node; 2776 mast.bn->type = mte_node_type(mas->node); 2777 2778 l_mas = r_mas = *mas; 2779 2780 if (mas_next_sibling(&r_mas)) { 2781 mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end); 2782 r_mas.last = r_mas.index = r_mas.max; 2783 } else { 2784 mas_prev_sibling(&l_mas); 2785 shift = mas_data_end(&l_mas) + 1; 2786 mab_shift_right(b_node, shift); 2787 mas->offset += shift; 2788 mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0); 2789 b_node->b_end = shift + b_end; 2790 l_mas.index = l_mas.last = l_mas.min; 2791 } 2792 2793 return mas_spanning_rebalance(mas, &mast, empty_count); 2794 } 2795 2796 /* 2797 * mas_split_final_node() - Split the final node in a subtree operation. 2798 * @mast: the maple subtree state 2799 * @mas: The maple state 2800 */ 2801 static inline void mas_split_final_node(struct maple_subtree_state *mast, 2802 struct ma_state *mas) 2803 { 2804 struct maple_enode *ancestor; 2805 2806 if (mte_is_root(mas->node)) { 2807 if (mt_is_alloc(mas->tree)) 2808 mast->bn->type = maple_arange_64; 2809 else 2810 mast->bn->type = maple_range_64; 2811 } 2812 /* 2813 * Only a single node is used here, could be root. 2814 * The Big_node data should just fit in a single node. 2815 */ 2816 ancestor = mas_new_ma_node(mas, mast->bn); 2817 mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset); 2818 mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset); 2819 mte_to_node(ancestor)->parent = mas_mn(mas)->parent; 2820 2821 mast->l->node = ancestor; 2822 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true); 2823 mas->offset = mast->bn->b_end - 1; 2824 } 2825 2826 /* 2827 * mast_fill_bnode() - Copy data into the big node in the subtree state 2828 * @mast: The maple subtree state 2829 * @mas: the maple state 2830 * @skip: The number of entries to skip for new nodes insertion. 2831 */ 2832 static inline void mast_fill_bnode(struct maple_subtree_state *mast, 2833 struct ma_state *mas, 2834 unsigned char skip) 2835 { 2836 bool cp = true; 2837 unsigned char split; 2838 2839 memset(mast->bn, 0, sizeof(struct maple_big_node)); 2840 2841 if (mte_is_root(mas->node)) { 2842 cp = false; 2843 } else { 2844 mas_ascend(mas); 2845 mas->offset = mte_parent_slot(mas->node); 2846 } 2847 2848 if (cp && mast->l->offset) 2849 mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0); 2850 2851 split = mast->bn->b_end; 2852 mab_set_b_end(mast->bn, mast->l, mast->l->node); 2853 mast->r->offset = mast->bn->b_end; 2854 mab_set_b_end(mast->bn, mast->r, mast->r->node); 2855 if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max) 2856 cp = false; 2857 2858 if (cp) 2859 mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1, 2860 mast->bn, mast->bn->b_end); 2861 2862 mast->bn->b_end--; 2863 mast->bn->type = mte_node_type(mas->node); 2864 } 2865 2866 /* 2867 * mast_split_data() - Split the data in the subtree state big node into regular 2868 * nodes. 2869 * @mast: The maple subtree state 2870 * @mas: The maple state 2871 * @split: The location to split the big node 2872 */ 2873 static inline void mast_split_data(struct maple_subtree_state *mast, 2874 struct ma_state *mas, unsigned char split) 2875 { 2876 unsigned char p_slot; 2877 2878 mab_mas_cp(mast->bn, 0, split, mast->l, true); 2879 mte_set_pivot(mast->r->node, 0, mast->r->max); 2880 mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false); 2881 mast->l->offset = mte_parent_slot(mas->node); 2882 mast->l->max = mast->bn->pivot[split]; 2883 mast->r->min = mast->l->max + 1; 2884 if (mte_is_leaf(mas->node)) 2885 return; 2886 2887 p_slot = mast->orig_l->offset; 2888 mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node, 2889 &p_slot, split); 2890 mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node, 2891 &p_slot, split); 2892 } 2893 2894 /* 2895 * mas_push_data() - Instead of splitting a node, it is beneficial to push the 2896 * data to the right or left node if there is room. 2897 * @mas: The maple state 2898 * @mast: The maple subtree state 2899 * @left: Push left or not. 2900 * 2901 * Keeping the height of the tree low means faster lookups. 2902 * 2903 * Return: True if pushed, false otherwise. 2904 */ 2905 static inline bool mas_push_data(struct ma_state *mas, 2906 struct maple_subtree_state *mast, bool left) 2907 { 2908 unsigned char slot_total = mast->bn->b_end; 2909 unsigned char end, space, split; 2910 2911 MA_STATE(tmp_mas, mas->tree, mas->index, mas->last); 2912 tmp_mas = *mas; 2913 tmp_mas.depth = mast->l->depth; 2914 2915 if (left && !mas_prev_sibling(&tmp_mas)) 2916 return false; 2917 else if (!left && !mas_next_sibling(&tmp_mas)) 2918 return false; 2919 2920 end = mas_data_end(&tmp_mas); 2921 slot_total += end; 2922 space = 2 * mt_slot_count(mas->node) - 2; 2923 /* -2 instead of -1 to ensure there isn't a triple split */ 2924 if (ma_is_leaf(mast->bn->type)) 2925 space--; 2926 2927 if (mas->max == ULONG_MAX) 2928 space--; 2929 2930 if (slot_total >= space) 2931 return false; 2932 2933 /* Get the data; Fill mast->bn */ 2934 mast->bn->b_end++; 2935 if (left) { 2936 mab_shift_right(mast->bn, end + 1); 2937 mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0); 2938 mast->bn->b_end = slot_total + 1; 2939 } else { 2940 mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end); 2941 } 2942 2943 /* Configure mast for splitting of mast->bn */ 2944 split = mt_slots[mast->bn->type] - 2; 2945 if (left) { 2946 /* Switch mas to prev node */ 2947 *mas = tmp_mas; 2948 /* Start using mast->l for the left side. */ 2949 tmp_mas.node = mast->l->node; 2950 *mast->l = tmp_mas; 2951 } else { 2952 tmp_mas.node = mast->r->node; 2953 *mast->r = tmp_mas; 2954 split = slot_total - split; 2955 } 2956 split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]); 2957 /* Update parent slot for split calculation. */ 2958 if (left) 2959 mast->orig_l->offset += end + 1; 2960 2961 mast_split_data(mast, mas, split); 2962 mast_fill_bnode(mast, mas, 2); 2963 mas_split_final_node(mast, mas); 2964 return true; 2965 } 2966 2967 /* 2968 * mas_split() - Split data that is too big for one node into two. 2969 * @mas: The maple state 2970 * @b_node: The maple big node 2971 */ 2972 static void mas_split(struct ma_state *mas, struct maple_big_node *b_node) 2973 { 2974 struct maple_subtree_state mast; 2975 int height = 0; 2976 unsigned int orig_height = mas_mt_height(mas); 2977 unsigned char mid_split, split = 0; 2978 struct maple_enode *old; 2979 2980 /* 2981 * Splitting is handled differently from any other B-tree; the Maple 2982 * Tree splits upwards. Splitting up means that the split operation 2983 * occurs when the walk of the tree hits the leaves and not on the way 2984 * down. The reason for splitting up is that it is impossible to know 2985 * how much space will be needed until the leaf is (or leaves are) 2986 * reached. Since overwriting data is allowed and a range could 2987 * overwrite more than one range or result in changing one entry into 3 2988 * entries, it is impossible to know if a split is required until the 2989 * data is examined. 2990 * 2991 * Splitting is a balancing act between keeping allocations to a minimum 2992 * and avoiding a 'jitter' event where a tree is expanded to make room 2993 * for an entry followed by a contraction when the entry is removed. To 2994 * accomplish the balance, there are empty slots remaining in both left 2995 * and right nodes after a split. 2996 */ 2997 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 2998 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2999 MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last); 3000 MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last); 3001 3002 trace_ma_op(TP_FCT, mas); 3003 3004 mast.l = &l_mas; 3005 mast.r = &r_mas; 3006 mast.orig_l = &prev_l_mas; 3007 mast.orig_r = &prev_r_mas; 3008 mast.bn = b_node; 3009 3010 while (height++ <= orig_height) { 3011 if (mt_slots[b_node->type] > b_node->b_end) { 3012 mas_split_final_node(&mast, mas); 3013 break; 3014 } 3015 3016 l_mas = r_mas = *mas; 3017 l_mas.node = mas_new_ma_node(mas, b_node); 3018 r_mas.node = mas_new_ma_node(mas, b_node); 3019 /* 3020 * Another way that 'jitter' is avoided is to terminate a split up early if the 3021 * left or right node has space to spare. This is referred to as "pushing left" 3022 * or "pushing right" and is similar to the B* tree, except the nodes left or 3023 * right can rarely be reused due to RCU, but the ripple upwards is halted which 3024 * is a significant savings. 3025 */ 3026 /* Try to push left. */ 3027 if (mas_push_data(mas, &mast, true)) { 3028 height++; 3029 break; 3030 } 3031 /* Try to push right. */ 3032 if (mas_push_data(mas, &mast, false)) { 3033 height++; 3034 break; 3035 } 3036 3037 split = mab_calc_split(mas, b_node, &mid_split); 3038 mast_split_data(&mast, mas, split); 3039 /* 3040 * Usually correct, mab_mas_cp in the above call overwrites 3041 * r->max. 3042 */ 3043 mast.r->max = mas->max; 3044 mast_fill_bnode(&mast, mas, 1); 3045 prev_l_mas = *mast.l; 3046 prev_r_mas = *mast.r; 3047 } 3048 3049 /* Set the original node as dead */ 3050 old = mas->node; 3051 mas->node = l_mas.node; 3052 mas_wmb_replace(mas, old, height); 3053 mtree_range_walk(mas); 3054 return; 3055 } 3056 3057 /* 3058 * mas_commit_b_node() - Commit the big node into the tree. 3059 * @wr_mas: The maple write state 3060 * @b_node: The maple big node 3061 */ 3062 static noinline_for_kasan void mas_commit_b_node(struct ma_wr_state *wr_mas, 3063 struct maple_big_node *b_node) 3064 { 3065 enum store_type type = wr_mas->mas->store_type; 3066 3067 WARN_ON_ONCE(type != wr_rebalance && type != wr_split_store); 3068 3069 if (type == wr_rebalance) 3070 return mas_rebalance(wr_mas->mas, b_node); 3071 3072 return mas_split(wr_mas->mas, b_node); 3073 } 3074 3075 /* 3076 * mas_root_expand() - Expand a root to a node 3077 * @mas: The maple state 3078 * @entry: The entry to store into the tree 3079 */ 3080 static inline void mas_root_expand(struct ma_state *mas, void *entry) 3081 { 3082 void *contents = mas_root_locked(mas); 3083 enum maple_type type = maple_leaf_64; 3084 struct maple_node *node; 3085 void __rcu **slots; 3086 unsigned long *pivots; 3087 int slot = 0; 3088 3089 node = mas_pop_node(mas); 3090 pivots = ma_pivots(node, type); 3091 slots = ma_slots(node, type); 3092 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3093 mas->node = mt_mk_node(node, type); 3094 mas->status = ma_active; 3095 3096 if (mas->index) { 3097 if (contents) { 3098 rcu_assign_pointer(slots[slot], contents); 3099 if (likely(mas->index > 1)) 3100 slot++; 3101 } 3102 pivots[slot++] = mas->index - 1; 3103 } 3104 3105 rcu_assign_pointer(slots[slot], entry); 3106 mas->offset = slot; 3107 pivots[slot] = mas->last; 3108 if (mas->last != ULONG_MAX) 3109 pivots[++slot] = ULONG_MAX; 3110 3111 mt_set_height(mas->tree, 1); 3112 ma_set_meta(node, maple_leaf_64, 0, slot); 3113 /* swap the new root into the tree */ 3114 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3115 return; 3116 } 3117 3118 /* 3119 * mas_store_root() - Storing value into root. 3120 * @mas: The maple state 3121 * @entry: The entry to store. 3122 * 3123 * There is no root node now and we are storing a value into the root - this 3124 * function either assigns the pointer or expands into a node. 3125 */ 3126 static inline void mas_store_root(struct ma_state *mas, void *entry) 3127 { 3128 if (!entry) { 3129 if (!mas->index) 3130 rcu_assign_pointer(mas->tree->ma_root, NULL); 3131 } else if (likely((mas->last != 0) || (mas->index != 0))) 3132 mas_root_expand(mas, entry); 3133 else if (((unsigned long) (entry) & 3) == 2) 3134 mas_root_expand(mas, entry); 3135 else { 3136 rcu_assign_pointer(mas->tree->ma_root, entry); 3137 mas->status = ma_start; 3138 } 3139 } 3140 3141 /* 3142 * mas_is_span_wr() - Check if the write needs to be treated as a write that 3143 * spans the node. 3144 * @wr_mas: The maple write state 3145 * 3146 * Spanning writes are writes that start in one node and end in another OR if 3147 * the write of a %NULL will cause the node to end with a %NULL. 3148 * 3149 * Return: True if this is a spanning write, false otherwise. 3150 */ 3151 static bool mas_is_span_wr(struct ma_wr_state *wr_mas) 3152 { 3153 unsigned long max = wr_mas->r_max; 3154 unsigned long last = wr_mas->mas->last; 3155 enum maple_type type = wr_mas->type; 3156 void *entry = wr_mas->entry; 3157 3158 /* Contained in this pivot, fast path */ 3159 if (last < max) 3160 return false; 3161 3162 if (ma_is_leaf(type)) { 3163 max = wr_mas->mas->max; 3164 if (last < max) 3165 return false; 3166 } 3167 3168 if (last == max) { 3169 /* 3170 * The last entry of leaf node cannot be NULL unless it is the 3171 * rightmost node (writing ULONG_MAX), otherwise it spans slots. 3172 */ 3173 if (entry || last == ULONG_MAX) 3174 return false; 3175 } 3176 3177 trace_ma_write(TP_FCT, wr_mas->mas, wr_mas->r_max, entry); 3178 return true; 3179 } 3180 3181 static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas) 3182 { 3183 wr_mas->type = mte_node_type(wr_mas->mas->node); 3184 mas_wr_node_walk(wr_mas); 3185 wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type); 3186 } 3187 3188 static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas) 3189 { 3190 wr_mas->mas->max = wr_mas->r_max; 3191 wr_mas->mas->min = wr_mas->r_min; 3192 wr_mas->mas->node = wr_mas->content; 3193 wr_mas->mas->offset = 0; 3194 wr_mas->mas->depth++; 3195 } 3196 /* 3197 * mas_wr_walk() - Walk the tree for a write. 3198 * @wr_mas: The maple write state 3199 * 3200 * Uses mas_slot_locked() and does not need to worry about dead nodes. 3201 * 3202 * Return: True if it's contained in a node, false on spanning write. 3203 */ 3204 static bool mas_wr_walk(struct ma_wr_state *wr_mas) 3205 { 3206 struct ma_state *mas = wr_mas->mas; 3207 3208 while (true) { 3209 mas_wr_walk_descend(wr_mas); 3210 if (unlikely(mas_is_span_wr(wr_mas))) 3211 return false; 3212 3213 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3214 mas->offset); 3215 if (ma_is_leaf(wr_mas->type)) 3216 return true; 3217 3218 if (mas->end < mt_slots[wr_mas->type] - 1) 3219 wr_mas->vacant_height = mas->depth + 1; 3220 3221 if (ma_is_root(mas_mn(mas))) { 3222 /* root needs more than 2 entries to be sufficient + 1 */ 3223 if (mas->end > 2) 3224 wr_mas->sufficient_height = 1; 3225 } else if (mas->end > mt_min_slots[wr_mas->type] + 1) 3226 wr_mas->sufficient_height = mas->depth + 1; 3227 3228 mas_wr_walk_traverse(wr_mas); 3229 } 3230 3231 return true; 3232 } 3233 3234 static void mas_wr_walk_index(struct ma_wr_state *wr_mas) 3235 { 3236 struct ma_state *mas = wr_mas->mas; 3237 3238 while (true) { 3239 mas_wr_walk_descend(wr_mas); 3240 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3241 mas->offset); 3242 if (ma_is_leaf(wr_mas->type)) 3243 return; 3244 mas_wr_walk_traverse(wr_mas); 3245 } 3246 } 3247 /* 3248 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs. 3249 * @l_wr_mas: The left maple write state 3250 * @r_wr_mas: The right maple write state 3251 */ 3252 static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas, 3253 struct ma_wr_state *r_wr_mas) 3254 { 3255 struct ma_state *r_mas = r_wr_mas->mas; 3256 struct ma_state *l_mas = l_wr_mas->mas; 3257 unsigned char l_slot; 3258 3259 l_slot = l_mas->offset; 3260 if (!l_wr_mas->content) 3261 l_mas->index = l_wr_mas->r_min; 3262 3263 if ((l_mas->index == l_wr_mas->r_min) && 3264 (l_slot && 3265 !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) { 3266 if (l_slot > 1) 3267 l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1; 3268 else 3269 l_mas->index = l_mas->min; 3270 3271 l_mas->offset = l_slot - 1; 3272 } 3273 3274 if (!r_wr_mas->content) { 3275 if (r_mas->last < r_wr_mas->r_max) 3276 r_mas->last = r_wr_mas->r_max; 3277 r_mas->offset++; 3278 } else if ((r_mas->last == r_wr_mas->r_max) && 3279 (r_mas->last < r_mas->max) && 3280 !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) { 3281 r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots, 3282 r_wr_mas->type, r_mas->offset + 1); 3283 r_mas->offset++; 3284 } 3285 } 3286 3287 static inline void *mas_state_walk(struct ma_state *mas) 3288 { 3289 void *entry; 3290 3291 entry = mas_start(mas); 3292 if (mas_is_none(mas)) 3293 return NULL; 3294 3295 if (mas_is_ptr(mas)) 3296 return entry; 3297 3298 return mtree_range_walk(mas); 3299 } 3300 3301 /* 3302 * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up 3303 * to date. 3304 * 3305 * @mas: The maple state. 3306 * 3307 * Note: Leaves mas in undesirable state. 3308 * Return: The entry for @mas->index or %NULL on dead node. 3309 */ 3310 static inline void *mtree_lookup_walk(struct ma_state *mas) 3311 { 3312 unsigned long *pivots; 3313 unsigned char offset; 3314 struct maple_node *node; 3315 struct maple_enode *next; 3316 enum maple_type type; 3317 void __rcu **slots; 3318 unsigned char end; 3319 3320 next = mas->node; 3321 do { 3322 node = mte_to_node(next); 3323 type = mte_node_type(next); 3324 pivots = ma_pivots(node, type); 3325 end = mt_pivots[type]; 3326 offset = 0; 3327 do { 3328 if (pivots[offset] >= mas->index) 3329 break; 3330 } while (++offset < end); 3331 3332 slots = ma_slots(node, type); 3333 next = mt_slot(mas->tree, slots, offset); 3334 if (unlikely(ma_dead_node(node))) 3335 goto dead_node; 3336 } while (!ma_is_leaf(type)); 3337 3338 return (void *)next; 3339 3340 dead_node: 3341 mas_reset(mas); 3342 return NULL; 3343 } 3344 3345 static void mte_destroy_walk(struct maple_enode *, struct maple_tree *); 3346 /* 3347 * mas_new_root() - Create a new root node that only contains the entry passed 3348 * in. 3349 * @mas: The maple state 3350 * @entry: The entry to store. 3351 * 3352 * Only valid when the index == 0 and the last == ULONG_MAX 3353 */ 3354 static inline void mas_new_root(struct ma_state *mas, void *entry) 3355 { 3356 struct maple_enode *root = mas_root_locked(mas); 3357 enum maple_type type = maple_leaf_64; 3358 struct maple_node *node; 3359 void __rcu **slots; 3360 unsigned long *pivots; 3361 3362 WARN_ON_ONCE(mas->index || mas->last != ULONG_MAX); 3363 3364 if (!entry) { 3365 mt_set_height(mas->tree, 0); 3366 rcu_assign_pointer(mas->tree->ma_root, entry); 3367 mas->status = ma_start; 3368 goto done; 3369 } 3370 3371 node = mas_pop_node(mas); 3372 pivots = ma_pivots(node, type); 3373 slots = ma_slots(node, type); 3374 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3375 mas->node = mt_mk_node(node, type); 3376 mas->status = ma_active; 3377 rcu_assign_pointer(slots[0], entry); 3378 pivots[0] = mas->last; 3379 mt_set_height(mas->tree, 1); 3380 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3381 3382 done: 3383 if (xa_is_node(root)) 3384 mte_destroy_walk(root, mas->tree); 3385 3386 return; 3387 } 3388 /* 3389 * mas_wr_spanning_store() - Create a subtree with the store operation completed 3390 * and new nodes where necessary, then place the sub-tree in the actual tree. 3391 * Note that mas is expected to point to the node which caused the store to 3392 * span. 3393 * @wr_mas: The maple write state 3394 */ 3395 static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas) 3396 { 3397 struct maple_subtree_state mast; 3398 struct maple_big_node b_node; 3399 struct ma_state *mas; 3400 unsigned char height; 3401 3402 /* Left and Right side of spanning store */ 3403 MA_STATE(l_mas, NULL, 0, 0); 3404 MA_STATE(r_mas, NULL, 0, 0); 3405 MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry); 3406 MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry); 3407 3408 /* 3409 * A store operation that spans multiple nodes is called a spanning 3410 * store and is handled early in the store call stack by the function 3411 * mas_is_span_wr(). When a spanning store is identified, the maple 3412 * state is duplicated. The first maple state walks the left tree path 3413 * to ``index``, the duplicate walks the right tree path to ``last``. 3414 * The data in the two nodes are combined into a single node, two nodes, 3415 * or possibly three nodes (see the 3-way split above). A ``NULL`` 3416 * written to the last entry of a node is considered a spanning store as 3417 * a rebalance is required for the operation to complete and an overflow 3418 * of data may happen. 3419 */ 3420 mas = wr_mas->mas; 3421 trace_ma_op(TP_FCT, mas); 3422 3423 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 3424 return mas_new_root(mas, wr_mas->entry); 3425 /* 3426 * Node rebalancing may occur due to this store, so there may be three new 3427 * entries per level plus a new root. 3428 */ 3429 height = mas_mt_height(mas); 3430 3431 /* 3432 * Set up right side. Need to get to the next offset after the spanning 3433 * store to ensure it's not NULL and to combine both the next node and 3434 * the node with the start together. 3435 */ 3436 r_mas = *mas; 3437 /* Avoid overflow, walk to next slot in the tree. */ 3438 if (r_mas.last + 1) 3439 r_mas.last++; 3440 3441 r_mas.index = r_mas.last; 3442 mas_wr_walk_index(&r_wr_mas); 3443 r_mas.last = r_mas.index = mas->last; 3444 3445 /* Set up left side. */ 3446 l_mas = *mas; 3447 mas_wr_walk_index(&l_wr_mas); 3448 3449 if (!wr_mas->entry) { 3450 mas_extend_spanning_null(&l_wr_mas, &r_wr_mas); 3451 mas->offset = l_mas.offset; 3452 mas->index = l_mas.index; 3453 mas->last = l_mas.last = r_mas.last; 3454 } 3455 3456 /* expanding NULLs may make this cover the entire range */ 3457 if (!l_mas.index && r_mas.last == ULONG_MAX) { 3458 mas_set_range(mas, 0, ULONG_MAX); 3459 return mas_new_root(mas, wr_mas->entry); 3460 } 3461 3462 memset(&b_node, 0, sizeof(struct maple_big_node)); 3463 /* Copy l_mas and store the value in b_node. */ 3464 mas_store_b_node(&l_wr_mas, &b_node, l_mas.end); 3465 /* Copy r_mas into b_node if there is anything to copy. */ 3466 if (r_mas.max > r_mas.last) 3467 mas_mab_cp(&r_mas, r_mas.offset, r_mas.end, 3468 &b_node, b_node.b_end + 1); 3469 else 3470 b_node.b_end++; 3471 3472 /* Stop spanning searches by searching for just index. */ 3473 l_mas.index = l_mas.last = mas->index; 3474 3475 mast.bn = &b_node; 3476 mast.orig_l = &l_mas; 3477 mast.orig_r = &r_mas; 3478 /* Combine l_mas and r_mas and split them up evenly again. */ 3479 return mas_spanning_rebalance(mas, &mast, height + 1); 3480 } 3481 3482 /* 3483 * mas_wr_node_store() - Attempt to store the value in a node 3484 * @wr_mas: The maple write state 3485 * 3486 * Attempts to reuse the node, but may allocate. 3487 */ 3488 static inline void mas_wr_node_store(struct ma_wr_state *wr_mas, 3489 unsigned char new_end) 3490 { 3491 struct ma_state *mas = wr_mas->mas; 3492 void __rcu **dst_slots; 3493 unsigned long *dst_pivots; 3494 unsigned char dst_offset, offset_end = wr_mas->offset_end; 3495 struct maple_node reuse, *newnode; 3496 unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type]; 3497 bool in_rcu = mt_in_rcu(mas->tree); 3498 unsigned char height = mas_mt_height(mas); 3499 3500 if (mas->last == wr_mas->end_piv) 3501 offset_end++; /* don't copy this offset */ 3502 3503 /* set up node. */ 3504 if (in_rcu) { 3505 newnode = mas_pop_node(mas); 3506 } else { 3507 memset(&reuse, 0, sizeof(struct maple_node)); 3508 newnode = &reuse; 3509 } 3510 3511 newnode->parent = mas_mn(mas)->parent; 3512 dst_pivots = ma_pivots(newnode, wr_mas->type); 3513 dst_slots = ma_slots(newnode, wr_mas->type); 3514 /* Copy from start to insert point */ 3515 memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset); 3516 memcpy(dst_slots, wr_mas->slots, sizeof(void *) * mas->offset); 3517 3518 /* Handle insert of new range starting after old range */ 3519 if (wr_mas->r_min < mas->index) { 3520 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content); 3521 dst_pivots[mas->offset++] = mas->index - 1; 3522 } 3523 3524 /* Store the new entry and range end. */ 3525 if (mas->offset < node_pivots) 3526 dst_pivots[mas->offset] = mas->last; 3527 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry); 3528 3529 /* 3530 * this range wrote to the end of the node or it overwrote the rest of 3531 * the data 3532 */ 3533 if (offset_end > mas->end) 3534 goto done; 3535 3536 dst_offset = mas->offset + 1; 3537 /* Copy to the end of node if necessary. */ 3538 copy_size = mas->end - offset_end + 1; 3539 memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end, 3540 sizeof(void *) * copy_size); 3541 memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end, 3542 sizeof(unsigned long) * (copy_size - 1)); 3543 3544 if (new_end < node_pivots) 3545 dst_pivots[new_end] = mas->max; 3546 3547 done: 3548 mas_leaf_set_meta(newnode, maple_leaf_64, new_end); 3549 if (in_rcu) { 3550 struct maple_enode *old_enode = mas->node; 3551 3552 mas->node = mt_mk_node(newnode, wr_mas->type); 3553 mas_replace_node(mas, old_enode, height); 3554 } else { 3555 memcpy(wr_mas->node, newnode, sizeof(struct maple_node)); 3556 } 3557 trace_ma_write(TP_FCT, mas, 0, wr_mas->entry); 3558 mas_update_gap(mas); 3559 mas->end = new_end; 3560 return; 3561 } 3562 3563 /* 3564 * mas_wr_slot_store: Attempt to store a value in a slot. 3565 * @wr_mas: the maple write state 3566 */ 3567 static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas) 3568 { 3569 struct ma_state *mas = wr_mas->mas; 3570 unsigned char offset = mas->offset; 3571 void __rcu **slots = wr_mas->slots; 3572 bool gap = false; 3573 3574 gap |= !mt_slot_locked(mas->tree, slots, offset); 3575 gap |= !mt_slot_locked(mas->tree, slots, offset + 1); 3576 3577 if (wr_mas->offset_end - offset == 1) { 3578 if (mas->index == wr_mas->r_min) { 3579 /* Overwriting the range and a part of the next one */ 3580 rcu_assign_pointer(slots[offset], wr_mas->entry); 3581 wr_mas->pivots[offset] = mas->last; 3582 } else { 3583 /* Overwriting a part of the range and the next one */ 3584 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 3585 wr_mas->pivots[offset] = mas->index - 1; 3586 mas->offset++; /* Keep mas accurate. */ 3587 } 3588 } else { 3589 WARN_ON_ONCE(mt_in_rcu(mas->tree)); 3590 /* 3591 * Expand the range, only partially overwriting the previous and 3592 * next ranges 3593 */ 3594 gap |= !mt_slot_locked(mas->tree, slots, offset + 2); 3595 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 3596 wr_mas->pivots[offset] = mas->index - 1; 3597 wr_mas->pivots[offset + 1] = mas->last; 3598 mas->offset++; /* Keep mas accurate. */ 3599 } 3600 3601 trace_ma_write(TP_FCT, mas, 0, wr_mas->entry); 3602 /* 3603 * Only update gap when the new entry is empty or there is an empty 3604 * entry in the original two ranges. 3605 */ 3606 if (!wr_mas->entry || gap) 3607 mas_update_gap(mas); 3608 3609 return; 3610 } 3611 3612 static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas) 3613 { 3614 struct ma_state *mas = wr_mas->mas; 3615 3616 if (!wr_mas->slots[wr_mas->offset_end]) { 3617 /* If this one is null, the next and prev are not */ 3618 mas->last = wr_mas->end_piv; 3619 } else { 3620 /* Check next slot(s) if we are overwriting the end */ 3621 if ((mas->last == wr_mas->end_piv) && 3622 (mas->end != wr_mas->offset_end) && 3623 !wr_mas->slots[wr_mas->offset_end + 1]) { 3624 wr_mas->offset_end++; 3625 if (wr_mas->offset_end == mas->end) 3626 mas->last = mas->max; 3627 else 3628 mas->last = wr_mas->pivots[wr_mas->offset_end]; 3629 wr_mas->end_piv = mas->last; 3630 } 3631 } 3632 3633 if (!wr_mas->content) { 3634 /* If this one is null, the next and prev are not */ 3635 mas->index = wr_mas->r_min; 3636 } else { 3637 /* Check prev slot if we are overwriting the start */ 3638 if (mas->index == wr_mas->r_min && mas->offset && 3639 !wr_mas->slots[mas->offset - 1]) { 3640 mas->offset--; 3641 wr_mas->r_min = mas->index = 3642 mas_safe_min(mas, wr_mas->pivots, mas->offset); 3643 wr_mas->r_max = wr_mas->pivots[mas->offset]; 3644 } 3645 } 3646 } 3647 3648 static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas) 3649 { 3650 while ((wr_mas->offset_end < wr_mas->mas->end) && 3651 (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end])) 3652 wr_mas->offset_end++; 3653 3654 if (wr_mas->offset_end < wr_mas->mas->end) 3655 wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end]; 3656 else 3657 wr_mas->end_piv = wr_mas->mas->max; 3658 } 3659 3660 static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas) 3661 { 3662 struct ma_state *mas = wr_mas->mas; 3663 unsigned char new_end = mas->end + 2; 3664 3665 new_end -= wr_mas->offset_end - mas->offset; 3666 if (wr_mas->r_min == mas->index) 3667 new_end--; 3668 3669 if (wr_mas->end_piv == mas->last) 3670 new_end--; 3671 3672 return new_end; 3673 } 3674 3675 /* 3676 * mas_wr_append: Attempt to append 3677 * @wr_mas: the maple write state 3678 * @new_end: The end of the node after the modification 3679 * 3680 * This is currently unsafe in rcu mode since the end of the node may be cached 3681 * by readers while the node contents may be updated which could result in 3682 * inaccurate information. 3683 */ 3684 static inline void mas_wr_append(struct ma_wr_state *wr_mas, 3685 unsigned char new_end) 3686 { 3687 struct ma_state *mas = wr_mas->mas; 3688 void __rcu **slots; 3689 unsigned char end = mas->end; 3690 3691 if (new_end < mt_pivots[wr_mas->type]) { 3692 wr_mas->pivots[new_end] = wr_mas->pivots[end]; 3693 ma_set_meta(wr_mas->node, wr_mas->type, 0, new_end); 3694 } 3695 3696 slots = wr_mas->slots; 3697 if (new_end == end + 1) { 3698 if (mas->last == wr_mas->r_max) { 3699 /* Append to end of range */ 3700 rcu_assign_pointer(slots[new_end], wr_mas->entry); 3701 wr_mas->pivots[end] = mas->index - 1; 3702 mas->offset = new_end; 3703 } else { 3704 /* Append to start of range */ 3705 rcu_assign_pointer(slots[new_end], wr_mas->content); 3706 wr_mas->pivots[end] = mas->last; 3707 rcu_assign_pointer(slots[end], wr_mas->entry); 3708 } 3709 } else { 3710 /* Append to the range without touching any boundaries. */ 3711 rcu_assign_pointer(slots[new_end], wr_mas->content); 3712 wr_mas->pivots[end + 1] = mas->last; 3713 rcu_assign_pointer(slots[end + 1], wr_mas->entry); 3714 wr_mas->pivots[end] = mas->index - 1; 3715 mas->offset = end + 1; 3716 } 3717 3718 if (!wr_mas->content || !wr_mas->entry) 3719 mas_update_gap(mas); 3720 3721 mas->end = new_end; 3722 trace_ma_write(TP_FCT, mas, new_end, wr_mas->entry); 3723 return; 3724 } 3725 3726 /* 3727 * mas_wr_bnode() - Slow path for a modification. 3728 * @wr_mas: The write maple state 3729 * 3730 * This is where split, rebalance end up. 3731 */ 3732 static void mas_wr_bnode(struct ma_wr_state *wr_mas) 3733 { 3734 struct maple_big_node b_node; 3735 3736 trace_ma_write(TP_FCT, wr_mas->mas, 0, wr_mas->entry); 3737 memset(&b_node, 0, sizeof(struct maple_big_node)); 3738 mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end); 3739 mas_commit_b_node(wr_mas, &b_node); 3740 } 3741 3742 /* 3743 * mas_wr_store_entry() - Internal call to store a value 3744 * @wr_mas: The maple write state 3745 */ 3746 static inline void mas_wr_store_entry(struct ma_wr_state *wr_mas) 3747 { 3748 struct ma_state *mas = wr_mas->mas; 3749 unsigned char new_end = mas_wr_new_end(wr_mas); 3750 3751 switch (mas->store_type) { 3752 case wr_exact_fit: 3753 rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry); 3754 if (!!wr_mas->entry ^ !!wr_mas->content) 3755 mas_update_gap(mas); 3756 break; 3757 case wr_append: 3758 mas_wr_append(wr_mas, new_end); 3759 break; 3760 case wr_slot_store: 3761 mas_wr_slot_store(wr_mas); 3762 break; 3763 case wr_node_store: 3764 mas_wr_node_store(wr_mas, new_end); 3765 break; 3766 case wr_spanning_store: 3767 mas_wr_spanning_store(wr_mas); 3768 break; 3769 case wr_split_store: 3770 case wr_rebalance: 3771 mas_wr_bnode(wr_mas); 3772 break; 3773 case wr_new_root: 3774 mas_new_root(mas, wr_mas->entry); 3775 break; 3776 case wr_store_root: 3777 mas_store_root(mas, wr_mas->entry); 3778 break; 3779 case wr_invalid: 3780 MT_BUG_ON(mas->tree, 1); 3781 } 3782 3783 return; 3784 } 3785 3786 static inline void mas_wr_prealloc_setup(struct ma_wr_state *wr_mas) 3787 { 3788 struct ma_state *mas = wr_mas->mas; 3789 3790 if (!mas_is_active(mas)) { 3791 if (mas_is_start(mas)) 3792 goto set_content; 3793 3794 if (unlikely(mas_is_paused(mas))) 3795 goto reset; 3796 3797 if (unlikely(mas_is_none(mas))) 3798 goto reset; 3799 3800 if (unlikely(mas_is_overflow(mas))) 3801 goto reset; 3802 3803 if (unlikely(mas_is_underflow(mas))) 3804 goto reset; 3805 } 3806 3807 /* 3808 * A less strict version of mas_is_span_wr() where we allow spanning 3809 * writes within this node. This is to stop partial walks in 3810 * mas_prealloc() from being reset. 3811 */ 3812 if (mas->last > mas->max) 3813 goto reset; 3814 3815 if (wr_mas->entry) 3816 goto set_content; 3817 3818 if (mte_is_leaf(mas->node) && mas->last == mas->max) 3819 goto reset; 3820 3821 goto set_content; 3822 3823 reset: 3824 mas_reset(mas); 3825 set_content: 3826 wr_mas->content = mas_start(mas); 3827 } 3828 3829 /** 3830 * mas_prealloc_calc() - Calculate number of nodes needed for a 3831 * given store oepration 3832 * @wr_mas: The maple write state 3833 * @entry: The entry to store into the tree 3834 * 3835 * Return: Number of nodes required for preallocation. 3836 */ 3837 static inline void mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry) 3838 { 3839 struct ma_state *mas = wr_mas->mas; 3840 unsigned char height = mas_mt_height(mas); 3841 int ret = height * 3 + 1; 3842 unsigned char delta = height - wr_mas->vacant_height; 3843 3844 switch (mas->store_type) { 3845 case wr_exact_fit: 3846 case wr_append: 3847 case wr_slot_store: 3848 ret = 0; 3849 break; 3850 case wr_spanning_store: 3851 if (wr_mas->sufficient_height < wr_mas->vacant_height) 3852 ret = (height - wr_mas->sufficient_height) * 3 + 1; 3853 else 3854 ret = delta * 3 + 1; 3855 break; 3856 case wr_split_store: 3857 ret = delta * 2 + 1; 3858 break; 3859 case wr_rebalance: 3860 if (wr_mas->sufficient_height < wr_mas->vacant_height) 3861 ret = (height - wr_mas->sufficient_height) * 2 + 1; 3862 else 3863 ret = delta * 2 + 1; 3864 break; 3865 case wr_node_store: 3866 ret = mt_in_rcu(mas->tree) ? 1 : 0; 3867 break; 3868 case wr_new_root: 3869 ret = 1; 3870 break; 3871 case wr_store_root: 3872 if (likely((mas->last != 0) || (mas->index != 0))) 3873 ret = 1; 3874 else if (((unsigned long) (entry) & 3) == 2) 3875 ret = 1; 3876 else 3877 ret = 0; 3878 break; 3879 case wr_invalid: 3880 WARN_ON_ONCE(1); 3881 } 3882 3883 mas->node_request = ret; 3884 } 3885 3886 /* 3887 * mas_wr_store_type() - Determine the store type for a given 3888 * store operation. 3889 * @wr_mas: The maple write state 3890 * 3891 * Return: the type of store needed for the operation 3892 */ 3893 static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas) 3894 { 3895 struct ma_state *mas = wr_mas->mas; 3896 unsigned char new_end; 3897 3898 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 3899 return wr_store_root; 3900 3901 if (unlikely(!mas_wr_walk(wr_mas))) 3902 return wr_spanning_store; 3903 3904 /* At this point, we are at the leaf node that needs to be altered. */ 3905 mas_wr_end_piv(wr_mas); 3906 if (!wr_mas->entry) 3907 mas_wr_extend_null(wr_mas); 3908 3909 if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last)) 3910 return wr_exact_fit; 3911 3912 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 3913 return wr_new_root; 3914 3915 new_end = mas_wr_new_end(wr_mas); 3916 /* Potential spanning rebalance collapsing a node */ 3917 if (new_end < mt_min_slots[wr_mas->type]) { 3918 if (!mte_is_root(mas->node)) 3919 return wr_rebalance; 3920 return wr_node_store; 3921 } 3922 3923 if (new_end >= mt_slots[wr_mas->type]) 3924 return wr_split_store; 3925 3926 if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end)) 3927 return wr_append; 3928 3929 if ((new_end == mas->end) && (!mt_in_rcu(mas->tree) || 3930 (wr_mas->offset_end - mas->offset == 1))) 3931 return wr_slot_store; 3932 3933 return wr_node_store; 3934 } 3935 3936 /** 3937 * mas_wr_preallocate() - Preallocate enough nodes for a store operation 3938 * @wr_mas: The maple write state 3939 * @entry: The entry that will be stored 3940 * 3941 */ 3942 static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry) 3943 { 3944 struct ma_state *mas = wr_mas->mas; 3945 3946 mas_wr_prealloc_setup(wr_mas); 3947 mas->store_type = mas_wr_store_type(wr_mas); 3948 mas_prealloc_calc(wr_mas, entry); 3949 if (!mas->node_request) 3950 return; 3951 3952 mas_alloc_nodes(mas, GFP_NOWAIT); 3953 } 3954 3955 /** 3956 * mas_insert() - Internal call to insert a value 3957 * @mas: The maple state 3958 * @entry: The entry to store 3959 * 3960 * Return: %NULL or the contents that already exists at the requested index 3961 * otherwise. The maple state needs to be checked for error conditions. 3962 */ 3963 static inline void *mas_insert(struct ma_state *mas, void *entry) 3964 { 3965 MA_WR_STATE(wr_mas, mas, entry); 3966 3967 /* 3968 * Inserting a new range inserts either 0, 1, or 2 pivots within the 3969 * tree. If the insert fits exactly into an existing gap with a value 3970 * of NULL, then the slot only needs to be written with the new value. 3971 * If the range being inserted is adjacent to another range, then only a 3972 * single pivot needs to be inserted (as well as writing the entry). If 3973 * the new range is within a gap but does not touch any other ranges, 3974 * then two pivots need to be inserted: the start - 1, and the end. As 3975 * usual, the entry must be written. Most operations require a new node 3976 * to be allocated and replace an existing node to ensure RCU safety, 3977 * when in RCU mode. The exception to requiring a newly allocated node 3978 * is when inserting at the end of a node (appending). When done 3979 * carefully, appending can reuse the node in place. 3980 */ 3981 wr_mas.content = mas_start(mas); 3982 if (wr_mas.content) 3983 goto exists; 3984 3985 mas_wr_preallocate(&wr_mas, entry); 3986 if (mas_is_err(mas)) 3987 return NULL; 3988 3989 /* spanning writes always overwrite something */ 3990 if (mas->store_type == wr_spanning_store) 3991 goto exists; 3992 3993 /* At this point, we are at the leaf node that needs to be altered. */ 3994 if (mas->store_type != wr_new_root && mas->store_type != wr_store_root) { 3995 wr_mas.offset_end = mas->offset; 3996 wr_mas.end_piv = wr_mas.r_max; 3997 3998 if (wr_mas.content || (mas->last > wr_mas.r_max)) 3999 goto exists; 4000 } 4001 4002 mas_wr_store_entry(&wr_mas); 4003 return wr_mas.content; 4004 4005 exists: 4006 mas_set_err(mas, -EEXIST); 4007 return wr_mas.content; 4008 4009 } 4010 4011 /** 4012 * mas_alloc_cyclic() - Internal call to find somewhere to store an entry 4013 * @mas: The maple state. 4014 * @startp: Pointer to ID. 4015 * @range_lo: Lower bound of range to search. 4016 * @range_hi: Upper bound of range to search. 4017 * @entry: The entry to store. 4018 * @next: Pointer to next ID to allocate. 4019 * @gfp: The GFP_FLAGS to use for allocations. 4020 * 4021 * Return: 0 if the allocation succeeded without wrapping, 1 if the 4022 * allocation succeeded after wrapping, or -EBUSY if there are no 4023 * free entries. 4024 */ 4025 int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp, 4026 void *entry, unsigned long range_lo, unsigned long range_hi, 4027 unsigned long *next, gfp_t gfp) 4028 { 4029 unsigned long min = range_lo; 4030 int ret = 0; 4031 4032 range_lo = max(min, *next); 4033 ret = mas_empty_area(mas, range_lo, range_hi, 1); 4034 if ((mas->tree->ma_flags & MT_FLAGS_ALLOC_WRAPPED) && ret == 0) { 4035 mas->tree->ma_flags &= ~MT_FLAGS_ALLOC_WRAPPED; 4036 ret = 1; 4037 } 4038 if (ret < 0 && range_lo > min) { 4039 mas_reset(mas); 4040 ret = mas_empty_area(mas, min, range_hi, 1); 4041 if (ret == 0) 4042 ret = 1; 4043 } 4044 if (ret < 0) 4045 return ret; 4046 4047 do { 4048 mas_insert(mas, entry); 4049 } while (mas_nomem(mas, gfp)); 4050 if (mas_is_err(mas)) 4051 return xa_err(mas->node); 4052 4053 *startp = mas->index; 4054 *next = *startp + 1; 4055 if (*next == 0) 4056 mas->tree->ma_flags |= MT_FLAGS_ALLOC_WRAPPED; 4057 4058 mas_destroy(mas); 4059 return ret; 4060 } 4061 EXPORT_SYMBOL(mas_alloc_cyclic); 4062 4063 static __always_inline void mas_rewalk(struct ma_state *mas, unsigned long index) 4064 { 4065 retry: 4066 mas_set(mas, index); 4067 mas_state_walk(mas); 4068 if (mas_is_start(mas)) 4069 goto retry; 4070 } 4071 4072 static __always_inline bool mas_rewalk_if_dead(struct ma_state *mas, 4073 struct maple_node *node, const unsigned long index) 4074 { 4075 if (unlikely(ma_dead_node(node))) { 4076 mas_rewalk(mas, index); 4077 return true; 4078 } 4079 return false; 4080 } 4081 4082 /* 4083 * mas_prev_node() - Find the prev non-null entry at the same level in the 4084 * tree. The prev value will be mas->node[mas->offset] or the status will be 4085 * ma_none. 4086 * @mas: The maple state 4087 * @min: The lower limit to search 4088 * 4089 * The prev node value will be mas->node[mas->offset] or the status will be 4090 * ma_none. 4091 * Return: 1 if the node is dead, 0 otherwise. 4092 */ 4093 static int mas_prev_node(struct ma_state *mas, unsigned long min) 4094 { 4095 enum maple_type mt; 4096 int offset, level; 4097 void __rcu **slots; 4098 struct maple_node *node; 4099 unsigned long *pivots; 4100 unsigned long max; 4101 4102 node = mas_mn(mas); 4103 if (!mas->min) 4104 goto no_entry; 4105 4106 max = mas->min - 1; 4107 if (max < min) 4108 goto no_entry; 4109 4110 level = 0; 4111 do { 4112 if (ma_is_root(node)) 4113 goto no_entry; 4114 4115 /* Walk up. */ 4116 if (unlikely(mas_ascend(mas))) 4117 return 1; 4118 offset = mas->offset; 4119 level++; 4120 node = mas_mn(mas); 4121 } while (!offset); 4122 4123 offset--; 4124 mt = mte_node_type(mas->node); 4125 while (level > 1) { 4126 level--; 4127 slots = ma_slots(node, mt); 4128 mas->node = mas_slot(mas, slots, offset); 4129 if (unlikely(ma_dead_node(node))) 4130 return 1; 4131 4132 mt = mte_node_type(mas->node); 4133 node = mas_mn(mas); 4134 pivots = ma_pivots(node, mt); 4135 offset = ma_data_end(node, mt, pivots, max); 4136 if (unlikely(ma_dead_node(node))) 4137 return 1; 4138 } 4139 4140 slots = ma_slots(node, mt); 4141 mas->node = mas_slot(mas, slots, offset); 4142 pivots = ma_pivots(node, mt); 4143 if (unlikely(ma_dead_node(node))) 4144 return 1; 4145 4146 if (likely(offset)) 4147 mas->min = pivots[offset - 1] + 1; 4148 mas->max = max; 4149 mas->offset = mas_data_end(mas); 4150 if (unlikely(mte_dead_node(mas->node))) 4151 return 1; 4152 4153 mas->end = mas->offset; 4154 return 0; 4155 4156 no_entry: 4157 if (unlikely(ma_dead_node(node))) 4158 return 1; 4159 4160 mas->status = ma_underflow; 4161 return 0; 4162 } 4163 4164 /* 4165 * mas_prev_slot() - Get the entry in the previous slot 4166 * 4167 * @mas: The maple state 4168 * @min: The minimum starting range 4169 * @empty: Can be empty 4170 * 4171 * Return: The entry in the previous slot which is possibly NULL 4172 */ 4173 static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty) 4174 { 4175 void *entry; 4176 void __rcu **slots; 4177 unsigned long pivot; 4178 enum maple_type type; 4179 unsigned long *pivots; 4180 struct maple_node *node; 4181 unsigned long save_point = mas->index; 4182 4183 retry: 4184 node = mas_mn(mas); 4185 type = mte_node_type(mas->node); 4186 pivots = ma_pivots(node, type); 4187 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4188 goto retry; 4189 4190 if (mas->min <= min) { 4191 pivot = mas_safe_min(mas, pivots, mas->offset); 4192 4193 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4194 goto retry; 4195 4196 if (pivot <= min) 4197 goto underflow; 4198 } 4199 4200 again: 4201 if (likely(mas->offset)) { 4202 mas->offset--; 4203 mas->last = mas->index - 1; 4204 mas->index = mas_safe_min(mas, pivots, mas->offset); 4205 } else { 4206 if (mas->index <= min) 4207 goto underflow; 4208 4209 if (mas_prev_node(mas, min)) { 4210 mas_rewalk(mas, save_point); 4211 goto retry; 4212 } 4213 4214 if (WARN_ON_ONCE(mas_is_underflow(mas))) 4215 return NULL; 4216 4217 mas->last = mas->max; 4218 node = mas_mn(mas); 4219 type = mte_node_type(mas->node); 4220 pivots = ma_pivots(node, type); 4221 mas->index = pivots[mas->offset - 1] + 1; 4222 } 4223 4224 slots = ma_slots(node, type); 4225 entry = mas_slot(mas, slots, mas->offset); 4226 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4227 goto retry; 4228 4229 if (likely(entry)) 4230 return entry; 4231 4232 if (!empty) { 4233 if (mas->index <= min) 4234 goto underflow; 4235 4236 goto again; 4237 } 4238 4239 return entry; 4240 4241 underflow: 4242 mas->status = ma_underflow; 4243 return NULL; 4244 } 4245 4246 /* 4247 * mas_next_node() - Get the next node at the same level in the tree. 4248 * @mas: The maple state 4249 * @node: The maple node 4250 * @max: The maximum pivot value to check. 4251 * 4252 * The next value will be mas->node[mas->offset] or the status will have 4253 * overflowed. 4254 * Return: 1 on dead node, 0 otherwise. 4255 */ 4256 static int mas_next_node(struct ma_state *mas, struct maple_node *node, 4257 unsigned long max) 4258 { 4259 unsigned long min; 4260 unsigned long *pivots; 4261 struct maple_enode *enode; 4262 struct maple_node *tmp; 4263 int level = 0; 4264 unsigned char node_end; 4265 enum maple_type mt; 4266 void __rcu **slots; 4267 4268 if (mas->max >= max) 4269 goto overflow; 4270 4271 min = mas->max + 1; 4272 level = 0; 4273 do { 4274 if (ma_is_root(node)) 4275 goto overflow; 4276 4277 /* Walk up. */ 4278 if (unlikely(mas_ascend(mas))) 4279 return 1; 4280 4281 level++; 4282 node = mas_mn(mas); 4283 mt = mte_node_type(mas->node); 4284 pivots = ma_pivots(node, mt); 4285 node_end = ma_data_end(node, mt, pivots, mas->max); 4286 if (unlikely(ma_dead_node(node))) 4287 return 1; 4288 4289 } while (unlikely(mas->offset == node_end)); 4290 4291 slots = ma_slots(node, mt); 4292 mas->offset++; 4293 enode = mas_slot(mas, slots, mas->offset); 4294 if (unlikely(ma_dead_node(node))) 4295 return 1; 4296 4297 if (level > 1) 4298 mas->offset = 0; 4299 4300 while (unlikely(level > 1)) { 4301 level--; 4302 mas->node = enode; 4303 node = mas_mn(mas); 4304 mt = mte_node_type(mas->node); 4305 slots = ma_slots(node, mt); 4306 enode = mas_slot(mas, slots, 0); 4307 if (unlikely(ma_dead_node(node))) 4308 return 1; 4309 } 4310 4311 if (!mas->offset) 4312 pivots = ma_pivots(node, mt); 4313 4314 mas->max = mas_safe_pivot(mas, pivots, mas->offset, mt); 4315 tmp = mte_to_node(enode); 4316 mt = mte_node_type(enode); 4317 pivots = ma_pivots(tmp, mt); 4318 mas->end = ma_data_end(tmp, mt, pivots, mas->max); 4319 if (unlikely(ma_dead_node(node))) 4320 return 1; 4321 4322 mas->node = enode; 4323 mas->min = min; 4324 return 0; 4325 4326 overflow: 4327 if (unlikely(ma_dead_node(node))) 4328 return 1; 4329 4330 mas->status = ma_overflow; 4331 return 0; 4332 } 4333 4334 /* 4335 * mas_next_slot() - Get the entry in the next slot 4336 * 4337 * @mas: The maple state 4338 * @max: The maximum starting range 4339 * @empty: Can be empty 4340 * 4341 * Return: The entry in the next slot which is possibly NULL 4342 */ 4343 static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty) 4344 { 4345 void __rcu **slots; 4346 unsigned long *pivots; 4347 unsigned long pivot; 4348 enum maple_type type; 4349 struct maple_node *node; 4350 unsigned long save_point = mas->last; 4351 void *entry; 4352 4353 retry: 4354 node = mas_mn(mas); 4355 type = mte_node_type(mas->node); 4356 pivots = ma_pivots(node, type); 4357 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4358 goto retry; 4359 4360 if (mas->max >= max) { 4361 if (likely(mas->offset < mas->end)) 4362 pivot = pivots[mas->offset]; 4363 else 4364 pivot = mas->max; 4365 4366 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4367 goto retry; 4368 4369 if (pivot >= max) { /* Was at the limit, next will extend beyond */ 4370 mas->status = ma_overflow; 4371 return NULL; 4372 } 4373 } 4374 4375 if (likely(mas->offset < mas->end)) { 4376 mas->index = pivots[mas->offset] + 1; 4377 again: 4378 mas->offset++; 4379 if (likely(mas->offset < mas->end)) 4380 mas->last = pivots[mas->offset]; 4381 else 4382 mas->last = mas->max; 4383 } else { 4384 if (mas->last >= max) { 4385 mas->status = ma_overflow; 4386 return NULL; 4387 } 4388 4389 if (mas_next_node(mas, node, max)) { 4390 mas_rewalk(mas, save_point); 4391 goto retry; 4392 } 4393 4394 if (WARN_ON_ONCE(mas_is_overflow(mas))) 4395 return NULL; 4396 4397 mas->offset = 0; 4398 mas->index = mas->min; 4399 node = mas_mn(mas); 4400 type = mte_node_type(mas->node); 4401 pivots = ma_pivots(node, type); 4402 mas->last = pivots[0]; 4403 } 4404 4405 slots = ma_slots(node, type); 4406 entry = mt_slot(mas->tree, slots, mas->offset); 4407 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4408 goto retry; 4409 4410 if (entry) 4411 return entry; 4412 4413 4414 if (!empty) { 4415 if (mas->last >= max) { 4416 mas->status = ma_overflow; 4417 return NULL; 4418 } 4419 4420 mas->index = mas->last + 1; 4421 goto again; 4422 } 4423 4424 return entry; 4425 } 4426 4427 /* 4428 * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the 4429 * highest gap address of a given size in a given node and descend. 4430 * @mas: The maple state 4431 * @size: The needed size. 4432 * 4433 * Return: True if found in a leaf, false otherwise. 4434 * 4435 */ 4436 static bool mas_rev_awalk(struct ma_state *mas, unsigned long size, 4437 unsigned long *gap_min, unsigned long *gap_max) 4438 { 4439 enum maple_type type = mte_node_type(mas->node); 4440 struct maple_node *node = mas_mn(mas); 4441 unsigned long *pivots, *gaps; 4442 void __rcu **slots; 4443 unsigned long gap = 0; 4444 unsigned long max, min; 4445 unsigned char offset; 4446 4447 if (unlikely(mas_is_err(mas))) 4448 return true; 4449 4450 if (ma_is_dense(type)) { 4451 /* dense nodes. */ 4452 mas->offset = (unsigned char)(mas->index - mas->min); 4453 return true; 4454 } 4455 4456 pivots = ma_pivots(node, type); 4457 slots = ma_slots(node, type); 4458 gaps = ma_gaps(node, type); 4459 offset = mas->offset; 4460 min = mas_safe_min(mas, pivots, offset); 4461 /* Skip out of bounds. */ 4462 while (mas->last < min) 4463 min = mas_safe_min(mas, pivots, --offset); 4464 4465 max = mas_safe_pivot(mas, pivots, offset, type); 4466 while (mas->index <= max) { 4467 gap = 0; 4468 if (gaps) 4469 gap = gaps[offset]; 4470 else if (!mas_slot(mas, slots, offset)) 4471 gap = max - min + 1; 4472 4473 if (gap) { 4474 if ((size <= gap) && (size <= mas->last - min + 1)) 4475 break; 4476 4477 if (!gaps) { 4478 /* Skip the next slot, it cannot be a gap. */ 4479 if (offset < 2) 4480 goto ascend; 4481 4482 offset -= 2; 4483 max = pivots[offset]; 4484 min = mas_safe_min(mas, pivots, offset); 4485 continue; 4486 } 4487 } 4488 4489 if (!offset) 4490 goto ascend; 4491 4492 offset--; 4493 max = min - 1; 4494 min = mas_safe_min(mas, pivots, offset); 4495 } 4496 4497 if (unlikely((mas->index > max) || (size - 1 > max - mas->index))) 4498 goto no_space; 4499 4500 if (unlikely(ma_is_leaf(type))) { 4501 mas->offset = offset; 4502 *gap_min = min; 4503 *gap_max = min + gap - 1; 4504 return true; 4505 } 4506 4507 /* descend, only happens under lock. */ 4508 mas->node = mas_slot(mas, slots, offset); 4509 mas->min = min; 4510 mas->max = max; 4511 mas->offset = mas_data_end(mas); 4512 return false; 4513 4514 ascend: 4515 if (!mte_is_root(mas->node)) 4516 return false; 4517 4518 no_space: 4519 mas_set_err(mas, -EBUSY); 4520 return false; 4521 } 4522 4523 static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) 4524 { 4525 enum maple_type type = mte_node_type(mas->node); 4526 unsigned long pivot, min, gap = 0; 4527 unsigned char offset, data_end; 4528 unsigned long *gaps, *pivots; 4529 void __rcu **slots; 4530 struct maple_node *node; 4531 bool found = false; 4532 4533 if (ma_is_dense(type)) { 4534 mas->offset = (unsigned char)(mas->index - mas->min); 4535 return true; 4536 } 4537 4538 node = mas_mn(mas); 4539 pivots = ma_pivots(node, type); 4540 slots = ma_slots(node, type); 4541 gaps = ma_gaps(node, type); 4542 offset = mas->offset; 4543 min = mas_safe_min(mas, pivots, offset); 4544 data_end = ma_data_end(node, type, pivots, mas->max); 4545 for (; offset <= data_end; offset++) { 4546 pivot = mas_safe_pivot(mas, pivots, offset, type); 4547 4548 /* Not within lower bounds */ 4549 if (mas->index > pivot) 4550 goto next_slot; 4551 4552 if (gaps) 4553 gap = gaps[offset]; 4554 else if (!mas_slot(mas, slots, offset)) 4555 gap = min(pivot, mas->last) - max(mas->index, min) + 1; 4556 else 4557 goto next_slot; 4558 4559 if (gap >= size) { 4560 if (ma_is_leaf(type)) { 4561 found = true; 4562 break; 4563 } 4564 4565 mas->node = mas_slot(mas, slots, offset); 4566 mas->min = min; 4567 mas->max = pivot; 4568 offset = 0; 4569 break; 4570 } 4571 next_slot: 4572 min = pivot + 1; 4573 if (mas->last <= pivot) { 4574 mas_set_err(mas, -EBUSY); 4575 return true; 4576 } 4577 } 4578 4579 mas->offset = offset; 4580 return found; 4581 } 4582 4583 /** 4584 * mas_walk() - Search for @mas->index in the tree. 4585 * @mas: The maple state. 4586 * 4587 * mas->index and mas->last will be set to the range if there is a value. If 4588 * mas->status is ma_none, reset to ma_start 4589 * 4590 * Return: the entry at the location or %NULL. 4591 */ 4592 void *mas_walk(struct ma_state *mas) 4593 { 4594 void *entry; 4595 4596 if (!mas_is_active(mas) && !mas_is_start(mas)) 4597 mas->status = ma_start; 4598 retry: 4599 entry = mas_state_walk(mas); 4600 if (mas_is_start(mas)) { 4601 goto retry; 4602 } else if (mas_is_none(mas)) { 4603 mas->index = 0; 4604 mas->last = ULONG_MAX; 4605 } else if (mas_is_ptr(mas)) { 4606 if (!mas->index) { 4607 mas->last = 0; 4608 return entry; 4609 } 4610 4611 mas->index = 1; 4612 mas->last = ULONG_MAX; 4613 mas->status = ma_none; 4614 return NULL; 4615 } 4616 4617 return entry; 4618 } 4619 EXPORT_SYMBOL_GPL(mas_walk); 4620 4621 static inline bool mas_rewind_node(struct ma_state *mas) 4622 { 4623 unsigned char slot; 4624 4625 do { 4626 if (mte_is_root(mas->node)) { 4627 slot = mas->offset; 4628 if (!slot) 4629 return false; 4630 } else { 4631 mas_ascend(mas); 4632 slot = mas->offset; 4633 } 4634 } while (!slot); 4635 4636 mas->offset = --slot; 4637 return true; 4638 } 4639 4640 /* 4641 * mas_skip_node() - Internal function. Skip over a node. 4642 * @mas: The maple state. 4643 * 4644 * Return: true if there is another node, false otherwise. 4645 */ 4646 static inline bool mas_skip_node(struct ma_state *mas) 4647 { 4648 if (mas_is_err(mas)) 4649 return false; 4650 4651 do { 4652 if (mte_is_root(mas->node)) { 4653 if (mas->offset >= mas_data_end(mas)) { 4654 mas_set_err(mas, -EBUSY); 4655 return false; 4656 } 4657 } else { 4658 mas_ascend(mas); 4659 } 4660 } while (mas->offset >= mas_data_end(mas)); 4661 4662 mas->offset++; 4663 return true; 4664 } 4665 4666 /* 4667 * mas_awalk() - Allocation walk. Search from low address to high, for a gap of 4668 * @size 4669 * @mas: The maple state 4670 * @size: The size of the gap required 4671 * 4672 * Search between @mas->index and @mas->last for a gap of @size. 4673 */ 4674 static inline void mas_awalk(struct ma_state *mas, unsigned long size) 4675 { 4676 struct maple_enode *last = NULL; 4677 4678 /* 4679 * There are 4 options: 4680 * go to child (descend) 4681 * go back to parent (ascend) 4682 * no gap found. (return, error == -EBUSY) 4683 * found the gap. (return) 4684 */ 4685 while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) { 4686 if (last == mas->node) 4687 mas_skip_node(mas); 4688 else 4689 last = mas->node; 4690 } 4691 } 4692 4693 /* 4694 * mas_sparse_area() - Internal function. Return upper or lower limit when 4695 * searching for a gap in an empty tree. 4696 * @mas: The maple state 4697 * @min: the minimum range 4698 * @max: The maximum range 4699 * @size: The size of the gap 4700 * @fwd: Searching forward or back 4701 */ 4702 static inline int mas_sparse_area(struct ma_state *mas, unsigned long min, 4703 unsigned long max, unsigned long size, bool fwd) 4704 { 4705 if (!unlikely(mas_is_none(mas)) && min == 0) { 4706 min++; 4707 /* 4708 * At this time, min is increased, we need to recheck whether 4709 * the size is satisfied. 4710 */ 4711 if (min > max || max - min + 1 < size) 4712 return -EBUSY; 4713 } 4714 /* mas_is_ptr */ 4715 4716 if (fwd) { 4717 mas->index = min; 4718 mas->last = min + size - 1; 4719 } else { 4720 mas->last = max; 4721 mas->index = max - size + 1; 4722 } 4723 return 0; 4724 } 4725 4726 /* 4727 * mas_empty_area() - Get the lowest address within the range that is 4728 * sufficient for the size requested. 4729 * @mas: The maple state 4730 * @min: The lowest value of the range 4731 * @max: The highest value of the range 4732 * @size: The size needed 4733 */ 4734 int mas_empty_area(struct ma_state *mas, unsigned long min, 4735 unsigned long max, unsigned long size) 4736 { 4737 unsigned char offset; 4738 unsigned long *pivots; 4739 enum maple_type mt; 4740 struct maple_node *node; 4741 4742 if (min > max) 4743 return -EINVAL; 4744 4745 if (size == 0 || max - min < size - 1) 4746 return -EINVAL; 4747 4748 if (mas_is_start(mas)) 4749 mas_start(mas); 4750 else if (mas->offset >= 2) 4751 mas->offset -= 2; 4752 else if (!mas_skip_node(mas)) 4753 return -EBUSY; 4754 4755 /* Empty set */ 4756 if (mas_is_none(mas) || mas_is_ptr(mas)) 4757 return mas_sparse_area(mas, min, max, size, true); 4758 4759 /* The start of the window can only be within these values */ 4760 mas->index = min; 4761 mas->last = max; 4762 mas_awalk(mas, size); 4763 4764 if (unlikely(mas_is_err(mas))) 4765 return xa_err(mas->node); 4766 4767 offset = mas->offset; 4768 node = mas_mn(mas); 4769 mt = mte_node_type(mas->node); 4770 pivots = ma_pivots(node, mt); 4771 min = mas_safe_min(mas, pivots, offset); 4772 if (mas->index < min) 4773 mas->index = min; 4774 mas->last = mas->index + size - 1; 4775 mas->end = ma_data_end(node, mt, pivots, mas->max); 4776 return 0; 4777 } 4778 EXPORT_SYMBOL_GPL(mas_empty_area); 4779 4780 /* 4781 * mas_empty_area_rev() - Get the highest address within the range that is 4782 * sufficient for the size requested. 4783 * @mas: The maple state 4784 * @min: The lowest value of the range 4785 * @max: The highest value of the range 4786 * @size: The size needed 4787 */ 4788 int mas_empty_area_rev(struct ma_state *mas, unsigned long min, 4789 unsigned long max, unsigned long size) 4790 { 4791 struct maple_enode *last = mas->node; 4792 4793 if (min > max) 4794 return -EINVAL; 4795 4796 if (size == 0 || max - min < size - 1) 4797 return -EINVAL; 4798 4799 if (mas_is_start(mas)) 4800 mas_start(mas); 4801 else if ((mas->offset < 2) && (!mas_rewind_node(mas))) 4802 return -EBUSY; 4803 4804 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 4805 return mas_sparse_area(mas, min, max, size, false); 4806 else if (mas->offset >= 2) 4807 mas->offset -= 2; 4808 else 4809 mas->offset = mas_data_end(mas); 4810 4811 4812 /* The start of the window can only be within these values. */ 4813 mas->index = min; 4814 mas->last = max; 4815 4816 while (!mas_rev_awalk(mas, size, &min, &max)) { 4817 if (last == mas->node) { 4818 if (!mas_rewind_node(mas)) 4819 return -EBUSY; 4820 } else { 4821 last = mas->node; 4822 } 4823 } 4824 4825 if (mas_is_err(mas)) 4826 return xa_err(mas->node); 4827 4828 if (unlikely(mas->offset == MAPLE_NODE_SLOTS)) 4829 return -EBUSY; 4830 4831 /* Trim the upper limit to the max. */ 4832 if (max < mas->last) 4833 mas->last = max; 4834 4835 mas->index = mas->last - size + 1; 4836 mas->end = mas_data_end(mas); 4837 return 0; 4838 } 4839 EXPORT_SYMBOL_GPL(mas_empty_area_rev); 4840 4841 /* 4842 * mte_dead_leaves() - Mark all leaves of a node as dead. 4843 * @enode: the encoded node 4844 * @mt: the maple tree 4845 * @slots: Pointer to the slot array 4846 * 4847 * Must hold the write lock. 4848 * 4849 * Return: The number of leaves marked as dead. 4850 */ 4851 static inline 4852 unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt, 4853 void __rcu **slots) 4854 { 4855 struct maple_node *node; 4856 enum maple_type type; 4857 void *entry; 4858 int offset; 4859 4860 for (offset = 0; offset < mt_slot_count(enode); offset++) { 4861 entry = mt_slot(mt, slots, offset); 4862 type = mte_node_type(entry); 4863 node = mte_to_node(entry); 4864 /* Use both node and type to catch LE & BE metadata */ 4865 if (!node || !type) 4866 break; 4867 4868 mte_set_node_dead(entry); 4869 node->type = type; 4870 rcu_assign_pointer(slots[offset], node); 4871 } 4872 4873 return offset; 4874 } 4875 4876 /** 4877 * mte_dead_walk() - Walk down a dead tree to just before the leaves 4878 * @enode: The maple encoded node 4879 * @offset: The starting offset 4880 * 4881 * Note: This can only be used from the RCU callback context. 4882 */ 4883 static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset) 4884 { 4885 struct maple_node *node, *next; 4886 void __rcu **slots = NULL; 4887 4888 next = mte_to_node(*enode); 4889 do { 4890 *enode = ma_enode_ptr(next); 4891 node = mte_to_node(*enode); 4892 slots = ma_slots(node, node->type); 4893 next = rcu_dereference_protected(slots[offset], 4894 lock_is_held(&rcu_callback_map)); 4895 offset = 0; 4896 } while (!ma_is_leaf(next->type)); 4897 4898 return slots; 4899 } 4900 4901 /** 4902 * mt_free_walk() - Walk & free a tree in the RCU callback context 4903 * @head: The RCU head that's within the node. 4904 * 4905 * Note: This can only be used from the RCU callback context. 4906 */ 4907 static void mt_free_walk(struct rcu_head *head) 4908 { 4909 void __rcu **slots; 4910 struct maple_node *node, *start; 4911 struct maple_enode *enode; 4912 unsigned char offset; 4913 enum maple_type type; 4914 4915 node = container_of(head, struct maple_node, rcu); 4916 4917 if (ma_is_leaf(node->type)) 4918 goto free_leaf; 4919 4920 start = node; 4921 enode = mt_mk_node(node, node->type); 4922 slots = mte_dead_walk(&enode, 0); 4923 node = mte_to_node(enode); 4924 do { 4925 mt_free_bulk(node->slot_len, slots); 4926 offset = node->parent_slot + 1; 4927 enode = node->piv_parent; 4928 if (mte_to_node(enode) == node) 4929 goto free_leaf; 4930 4931 type = mte_node_type(enode); 4932 slots = ma_slots(mte_to_node(enode), type); 4933 if ((offset < mt_slots[type]) && 4934 rcu_dereference_protected(slots[offset], 4935 lock_is_held(&rcu_callback_map))) 4936 slots = mte_dead_walk(&enode, offset); 4937 node = mte_to_node(enode); 4938 } while ((node != start) || (node->slot_len < offset)); 4939 4940 slots = ma_slots(node, node->type); 4941 mt_free_bulk(node->slot_len, slots); 4942 4943 free_leaf: 4944 kfree(node); 4945 } 4946 4947 static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, 4948 struct maple_tree *mt, struct maple_enode *prev, unsigned char offset) 4949 { 4950 struct maple_node *node; 4951 struct maple_enode *next = *enode; 4952 void __rcu **slots = NULL; 4953 enum maple_type type; 4954 unsigned char next_offset = 0; 4955 4956 do { 4957 *enode = next; 4958 node = mte_to_node(*enode); 4959 type = mte_node_type(*enode); 4960 slots = ma_slots(node, type); 4961 next = mt_slot_locked(mt, slots, next_offset); 4962 if ((mte_dead_node(next))) 4963 next = mt_slot_locked(mt, slots, ++next_offset); 4964 4965 mte_set_node_dead(*enode); 4966 node->type = type; 4967 node->piv_parent = prev; 4968 node->parent_slot = offset; 4969 offset = next_offset; 4970 next_offset = 0; 4971 prev = *enode; 4972 } while (!mte_is_leaf(next)); 4973 4974 return slots; 4975 } 4976 4977 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 4978 bool free) 4979 { 4980 void __rcu **slots; 4981 struct maple_node *node = mte_to_node(enode); 4982 struct maple_enode *start; 4983 4984 if (mte_is_leaf(enode)) { 4985 mte_set_node_dead(enode); 4986 node->type = mte_node_type(enode); 4987 goto free_leaf; 4988 } 4989 4990 start = enode; 4991 slots = mte_destroy_descend(&enode, mt, start, 0); 4992 node = mte_to_node(enode); // Updated in the above call. 4993 do { 4994 enum maple_type type; 4995 unsigned char offset; 4996 struct maple_enode *parent, *tmp; 4997 4998 node->slot_len = mte_dead_leaves(enode, mt, slots); 4999 if (free) 5000 mt_free_bulk(node->slot_len, slots); 5001 offset = node->parent_slot + 1; 5002 enode = node->piv_parent; 5003 if (mte_to_node(enode) == node) 5004 goto free_leaf; 5005 5006 type = mte_node_type(enode); 5007 slots = ma_slots(mte_to_node(enode), type); 5008 if (offset >= mt_slots[type]) 5009 goto next; 5010 5011 tmp = mt_slot_locked(mt, slots, offset); 5012 if (mte_node_type(tmp) && mte_to_node(tmp)) { 5013 parent = enode; 5014 enode = tmp; 5015 slots = mte_destroy_descend(&enode, mt, parent, offset); 5016 } 5017 next: 5018 node = mte_to_node(enode); 5019 } while (start != enode); 5020 5021 node = mte_to_node(enode); 5022 node->slot_len = mte_dead_leaves(enode, mt, slots); 5023 if (free) 5024 mt_free_bulk(node->slot_len, slots); 5025 5026 free_leaf: 5027 if (free) 5028 kfree(node); 5029 else 5030 mt_clear_meta(mt, node, node->type); 5031 } 5032 5033 /* 5034 * mte_destroy_walk() - Free a tree or sub-tree. 5035 * @enode: the encoded maple node (maple_enode) to start 5036 * @mt: the tree to free - needed for node types. 5037 * 5038 * Must hold the write lock. 5039 */ 5040 static inline void mte_destroy_walk(struct maple_enode *enode, 5041 struct maple_tree *mt) 5042 { 5043 struct maple_node *node = mte_to_node(enode); 5044 5045 if (mt_in_rcu(mt)) { 5046 mt_destroy_walk(enode, mt, false); 5047 call_rcu(&node->rcu, mt_free_walk); 5048 } else { 5049 mt_destroy_walk(enode, mt, true); 5050 } 5051 } 5052 /* Interface */ 5053 5054 /** 5055 * mas_store() - Store an @entry. 5056 * @mas: The maple state. 5057 * @entry: The entry to store. 5058 * 5059 * The @mas->index and @mas->last is used to set the range for the @entry. 5060 * 5061 * Return: the first entry between mas->index and mas->last or %NULL. 5062 */ 5063 void *mas_store(struct ma_state *mas, void *entry) 5064 { 5065 MA_WR_STATE(wr_mas, mas, entry); 5066 5067 trace_ma_write(TP_FCT, mas, 0, entry); 5068 #ifdef CONFIG_DEBUG_MAPLE_TREE 5069 if (MAS_WARN_ON(mas, mas->index > mas->last)) 5070 pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last, 5071 entry); 5072 5073 if (mas->index > mas->last) { 5074 mas_set_err(mas, -EINVAL); 5075 return NULL; 5076 } 5077 5078 #endif 5079 5080 /* 5081 * Storing is the same operation as insert with the added caveat that it 5082 * can overwrite entries. Although this seems simple enough, one may 5083 * want to examine what happens if a single store operation was to 5084 * overwrite multiple entries within a self-balancing B-Tree. 5085 */ 5086 mas_wr_prealloc_setup(&wr_mas); 5087 mas->store_type = mas_wr_store_type(&wr_mas); 5088 if (mas->mas_flags & MA_STATE_PREALLOC) { 5089 mas_wr_store_entry(&wr_mas); 5090 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5091 return wr_mas.content; 5092 } 5093 5094 mas_prealloc_calc(&wr_mas, entry); 5095 if (!mas->node_request) 5096 goto store; 5097 5098 mas_alloc_nodes(mas, GFP_NOWAIT); 5099 if (mas_is_err(mas)) 5100 return NULL; 5101 5102 store: 5103 mas_wr_store_entry(&wr_mas); 5104 mas_destroy(mas); 5105 return wr_mas.content; 5106 } 5107 EXPORT_SYMBOL_GPL(mas_store); 5108 5109 /** 5110 * mas_store_gfp() - Store a value into the tree. 5111 * @mas: The maple state 5112 * @entry: The entry to store 5113 * @gfp: The GFP_FLAGS to use for allocations if necessary. 5114 * 5115 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5116 * be allocated. 5117 */ 5118 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp) 5119 { 5120 unsigned long index = mas->index; 5121 unsigned long last = mas->last; 5122 MA_WR_STATE(wr_mas, mas, entry); 5123 int ret = 0; 5124 5125 retry: 5126 mas_wr_preallocate(&wr_mas, entry); 5127 if (unlikely(mas_nomem(mas, gfp))) { 5128 if (!entry) 5129 __mas_set_range(mas, index, last); 5130 goto retry; 5131 } 5132 5133 if (mas_is_err(mas)) { 5134 ret = xa_err(mas->node); 5135 goto out; 5136 } 5137 5138 mas_wr_store_entry(&wr_mas); 5139 out: 5140 mas_destroy(mas); 5141 return ret; 5142 } 5143 EXPORT_SYMBOL_GPL(mas_store_gfp); 5144 5145 /** 5146 * mas_store_prealloc() - Store a value into the tree using memory 5147 * preallocated in the maple state. 5148 * @mas: The maple state 5149 * @entry: The entry to store. 5150 */ 5151 void mas_store_prealloc(struct ma_state *mas, void *entry) 5152 { 5153 MA_WR_STATE(wr_mas, mas, entry); 5154 5155 if (mas->store_type == wr_store_root) { 5156 mas_wr_prealloc_setup(&wr_mas); 5157 goto store; 5158 } 5159 5160 mas_wr_walk_descend(&wr_mas); 5161 if (mas->store_type != wr_spanning_store) { 5162 /* set wr_mas->content to current slot */ 5163 wr_mas.content = mas_slot_locked(mas, wr_mas.slots, mas->offset); 5164 mas_wr_end_piv(&wr_mas); 5165 } 5166 5167 store: 5168 trace_ma_write(TP_FCT, mas, 0, entry); 5169 mas_wr_store_entry(&wr_mas); 5170 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5171 mas_destroy(mas); 5172 } 5173 EXPORT_SYMBOL_GPL(mas_store_prealloc); 5174 5175 /** 5176 * mas_preallocate() - Preallocate enough nodes for a store operation 5177 * @mas: The maple state 5178 * @entry: The entry that will be stored 5179 * @gfp: The GFP_FLAGS to use for allocations. 5180 * 5181 * Return: 0 on success, -ENOMEM if memory could not be allocated. 5182 */ 5183 int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) 5184 { 5185 MA_WR_STATE(wr_mas, mas, entry); 5186 5187 mas_wr_prealloc_setup(&wr_mas); 5188 mas->store_type = mas_wr_store_type(&wr_mas); 5189 mas_prealloc_calc(&wr_mas, entry); 5190 if (!mas->node_request) 5191 goto set_flag; 5192 5193 mas->mas_flags &= ~MA_STATE_PREALLOC; 5194 mas_alloc_nodes(mas, gfp); 5195 if (mas_is_err(mas)) { 5196 int ret = xa_err(mas->node); 5197 5198 mas->node_request = 0; 5199 mas_destroy(mas); 5200 mas_reset(mas); 5201 return ret; 5202 } 5203 5204 set_flag: 5205 mas->mas_flags |= MA_STATE_PREALLOC; 5206 return 0; 5207 } 5208 EXPORT_SYMBOL_GPL(mas_preallocate); 5209 5210 /* 5211 * mas_destroy() - destroy a maple state. 5212 * @mas: The maple state 5213 * 5214 * Upon completion, check the left-most node and rebalance against the node to 5215 * the right if necessary. Frees any allocated nodes associated with this maple 5216 * state. 5217 */ 5218 void mas_destroy(struct ma_state *mas) 5219 { 5220 mas->mas_flags &= ~MA_STATE_PREALLOC; 5221 mas_empty_nodes(mas); 5222 } 5223 EXPORT_SYMBOL_GPL(mas_destroy); 5224 5225 static void mas_may_activate(struct ma_state *mas) 5226 { 5227 if (!mas->node) { 5228 mas->status = ma_start; 5229 } else if (mas->index > mas->max || mas->index < mas->min) { 5230 mas->status = ma_start; 5231 } else { 5232 mas->status = ma_active; 5233 } 5234 } 5235 5236 static bool mas_next_setup(struct ma_state *mas, unsigned long max, 5237 void **entry) 5238 { 5239 bool was_none = mas_is_none(mas); 5240 5241 if (unlikely(mas->last >= max)) { 5242 mas->status = ma_overflow; 5243 return true; 5244 } 5245 5246 switch (mas->status) { 5247 case ma_active: 5248 return false; 5249 case ma_none: 5250 fallthrough; 5251 case ma_pause: 5252 mas->status = ma_start; 5253 fallthrough; 5254 case ma_start: 5255 mas_walk(mas); /* Retries on dead nodes handled by mas_walk */ 5256 break; 5257 case ma_overflow: 5258 /* Overflowed before, but the max changed */ 5259 mas_may_activate(mas); 5260 break; 5261 case ma_underflow: 5262 /* The user expects the mas to be one before where it is */ 5263 mas_may_activate(mas); 5264 *entry = mas_walk(mas); 5265 if (*entry) 5266 return true; 5267 break; 5268 case ma_root: 5269 break; 5270 case ma_error: 5271 return true; 5272 } 5273 5274 if (likely(mas_is_active(mas))) /* Fast path */ 5275 return false; 5276 5277 if (mas_is_ptr(mas)) { 5278 *entry = NULL; 5279 if (was_none && mas->index == 0) { 5280 mas->index = mas->last = 0; 5281 return true; 5282 } 5283 mas->index = 1; 5284 mas->last = ULONG_MAX; 5285 mas->status = ma_none; 5286 return true; 5287 } 5288 5289 if (mas_is_none(mas)) 5290 return true; 5291 5292 return false; 5293 } 5294 5295 /** 5296 * mas_next() - Get the next entry. 5297 * @mas: The maple state 5298 * @max: The maximum index to check. 5299 * 5300 * Returns the next entry after @mas->index. 5301 * Must hold rcu_read_lock or the write lock. 5302 * Can return the zero entry. 5303 * 5304 * Return: The next entry or %NULL 5305 */ 5306 void *mas_next(struct ma_state *mas, unsigned long max) 5307 { 5308 void *entry = NULL; 5309 5310 if (mas_next_setup(mas, max, &entry)) 5311 return entry; 5312 5313 /* Retries on dead nodes handled by mas_next_slot */ 5314 return mas_next_slot(mas, max, false); 5315 } 5316 EXPORT_SYMBOL_GPL(mas_next); 5317 5318 /** 5319 * mas_next_range() - Advance the maple state to the next range 5320 * @mas: The maple state 5321 * @max: The maximum index to check. 5322 * 5323 * Sets @mas->index and @mas->last to the range. 5324 * Must hold rcu_read_lock or the write lock. 5325 * Can return the zero entry. 5326 * 5327 * Return: The next entry or %NULL 5328 */ 5329 void *mas_next_range(struct ma_state *mas, unsigned long max) 5330 { 5331 void *entry = NULL; 5332 5333 if (mas_next_setup(mas, max, &entry)) 5334 return entry; 5335 5336 /* Retries on dead nodes handled by mas_next_slot */ 5337 return mas_next_slot(mas, max, true); 5338 } 5339 EXPORT_SYMBOL_GPL(mas_next_range); 5340 5341 /** 5342 * mt_next() - get the next value in the maple tree 5343 * @mt: The maple tree 5344 * @index: The start index 5345 * @max: The maximum index to check 5346 * 5347 * Takes RCU read lock internally to protect the search, which does not 5348 * protect the returned pointer after dropping RCU read lock. 5349 * See also: Documentation/core-api/maple_tree.rst 5350 * 5351 * Return: The entry higher than @index or %NULL if nothing is found. 5352 */ 5353 void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max) 5354 { 5355 void *entry = NULL; 5356 MA_STATE(mas, mt, index, index); 5357 5358 rcu_read_lock(); 5359 entry = mas_next(&mas, max); 5360 rcu_read_unlock(); 5361 return entry; 5362 } 5363 EXPORT_SYMBOL_GPL(mt_next); 5364 5365 static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry) 5366 { 5367 if (unlikely(mas->index <= min)) { 5368 mas->status = ma_underflow; 5369 return true; 5370 } 5371 5372 switch (mas->status) { 5373 case ma_active: 5374 return false; 5375 case ma_start: 5376 break; 5377 case ma_none: 5378 fallthrough; 5379 case ma_pause: 5380 mas->status = ma_start; 5381 break; 5382 case ma_underflow: 5383 /* underflowed before but the min changed */ 5384 mas_may_activate(mas); 5385 break; 5386 case ma_overflow: 5387 /* User expects mas to be one after where it is */ 5388 mas_may_activate(mas); 5389 *entry = mas_walk(mas); 5390 if (*entry) 5391 return true; 5392 break; 5393 case ma_root: 5394 break; 5395 case ma_error: 5396 return true; 5397 } 5398 5399 if (mas_is_start(mas)) 5400 mas_walk(mas); 5401 5402 if (unlikely(mas_is_ptr(mas))) { 5403 if (!mas->index) { 5404 mas->status = ma_none; 5405 return true; 5406 } 5407 mas->index = mas->last = 0; 5408 *entry = mas_root(mas); 5409 return true; 5410 } 5411 5412 if (mas_is_none(mas)) { 5413 if (mas->index) { 5414 /* Walked to out-of-range pointer? */ 5415 mas->index = mas->last = 0; 5416 mas->status = ma_root; 5417 *entry = mas_root(mas); 5418 return true; 5419 } 5420 return true; 5421 } 5422 5423 return false; 5424 } 5425 5426 /** 5427 * mas_prev() - Get the previous entry 5428 * @mas: The maple state 5429 * @min: The minimum value to check. 5430 * 5431 * Must hold rcu_read_lock or the write lock. 5432 * Will reset mas to ma_start if the status is ma_none. Will stop on not 5433 * searchable nodes. 5434 * 5435 * Return: the previous value or %NULL. 5436 */ 5437 void *mas_prev(struct ma_state *mas, unsigned long min) 5438 { 5439 void *entry = NULL; 5440 5441 if (mas_prev_setup(mas, min, &entry)) 5442 return entry; 5443 5444 return mas_prev_slot(mas, min, false); 5445 } 5446 EXPORT_SYMBOL_GPL(mas_prev); 5447 5448 /** 5449 * mas_prev_range() - Advance to the previous range 5450 * @mas: The maple state 5451 * @min: The minimum value to check. 5452 * 5453 * Sets @mas->index and @mas->last to the range. 5454 * Must hold rcu_read_lock or the write lock. 5455 * Will reset mas to ma_start if the node is ma_none. Will stop on not 5456 * searchable nodes. 5457 * 5458 * Return: the previous value or %NULL. 5459 */ 5460 void *mas_prev_range(struct ma_state *mas, unsigned long min) 5461 { 5462 void *entry = NULL; 5463 5464 if (mas_prev_setup(mas, min, &entry)) 5465 return entry; 5466 5467 return mas_prev_slot(mas, min, true); 5468 } 5469 EXPORT_SYMBOL_GPL(mas_prev_range); 5470 5471 /** 5472 * mt_prev() - get the previous value in the maple tree 5473 * @mt: The maple tree 5474 * @index: The start index 5475 * @min: The minimum index to check 5476 * 5477 * Takes RCU read lock internally to protect the search, which does not 5478 * protect the returned pointer after dropping RCU read lock. 5479 * See also: Documentation/core-api/maple_tree.rst 5480 * 5481 * Return: The entry before @index or %NULL if nothing is found. 5482 */ 5483 void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min) 5484 { 5485 void *entry = NULL; 5486 MA_STATE(mas, mt, index, index); 5487 5488 rcu_read_lock(); 5489 entry = mas_prev(&mas, min); 5490 rcu_read_unlock(); 5491 return entry; 5492 } 5493 EXPORT_SYMBOL_GPL(mt_prev); 5494 5495 /** 5496 * mas_pause() - Pause a mas_find/mas_for_each to drop the lock. 5497 * @mas: The maple state to pause 5498 * 5499 * Some users need to pause a walk and drop the lock they're holding in 5500 * order to yield to a higher priority thread or carry out an operation 5501 * on an entry. Those users should call this function before they drop 5502 * the lock. It resets the @mas to be suitable for the next iteration 5503 * of the loop after the user has reacquired the lock. If most entries 5504 * found during a walk require you to call mas_pause(), the mt_for_each() 5505 * iterator may be more appropriate. 5506 * 5507 */ 5508 void mas_pause(struct ma_state *mas) 5509 { 5510 mas->status = ma_pause; 5511 mas->node = NULL; 5512 } 5513 EXPORT_SYMBOL_GPL(mas_pause); 5514 5515 /** 5516 * mas_find_setup() - Internal function to set up mas_find*(). 5517 * @mas: The maple state 5518 * @max: The maximum index 5519 * @entry: Pointer to the entry 5520 * 5521 * Returns: True if entry is the answer, false otherwise. 5522 */ 5523 static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long max, void **entry) 5524 { 5525 switch (mas->status) { 5526 case ma_active: 5527 if (mas->last < max) 5528 return false; 5529 return true; 5530 case ma_start: 5531 break; 5532 case ma_pause: 5533 if (unlikely(mas->last >= max)) 5534 return true; 5535 5536 mas->index = ++mas->last; 5537 mas->status = ma_start; 5538 break; 5539 case ma_none: 5540 if (unlikely(mas->last >= max)) 5541 return true; 5542 5543 mas->index = mas->last; 5544 mas->status = ma_start; 5545 break; 5546 case ma_underflow: 5547 /* mas is pointing at entry before unable to go lower */ 5548 if (unlikely(mas->index >= max)) { 5549 mas->status = ma_overflow; 5550 return true; 5551 } 5552 5553 mas_may_activate(mas); 5554 *entry = mas_walk(mas); 5555 if (*entry) 5556 return true; 5557 break; 5558 case ma_overflow: 5559 if (unlikely(mas->last >= max)) 5560 return true; 5561 5562 mas_may_activate(mas); 5563 *entry = mas_walk(mas); 5564 if (*entry) 5565 return true; 5566 break; 5567 case ma_root: 5568 break; 5569 case ma_error: 5570 return true; 5571 } 5572 5573 if (mas_is_start(mas)) { 5574 /* First run or continue */ 5575 if (mas->index > max) 5576 return true; 5577 5578 *entry = mas_walk(mas); 5579 if (*entry) 5580 return true; 5581 5582 } 5583 5584 if (unlikely(mas_is_ptr(mas))) 5585 goto ptr_out_of_range; 5586 5587 if (unlikely(mas_is_none(mas))) 5588 return true; 5589 5590 if (mas->index == max) 5591 return true; 5592 5593 return false; 5594 5595 ptr_out_of_range: 5596 mas->status = ma_none; 5597 mas->index = 1; 5598 mas->last = ULONG_MAX; 5599 return true; 5600 } 5601 5602 /** 5603 * mas_find() - On the first call, find the entry at or after mas->index up to 5604 * %max. Otherwise, find the entry after mas->index. 5605 * @mas: The maple state 5606 * @max: The maximum value to check. 5607 * 5608 * Must hold rcu_read_lock or the write lock. 5609 * If an entry exists, last and index are updated accordingly. 5610 * May set @mas->status to ma_overflow. 5611 * 5612 * Return: The entry or %NULL. 5613 */ 5614 void *mas_find(struct ma_state *mas, unsigned long max) 5615 { 5616 void *entry = NULL; 5617 5618 if (mas_find_setup(mas, max, &entry)) 5619 return entry; 5620 5621 /* Retries on dead nodes handled by mas_next_slot */ 5622 entry = mas_next_slot(mas, max, false); 5623 /* Ignore overflow */ 5624 mas->status = ma_active; 5625 return entry; 5626 } 5627 EXPORT_SYMBOL_GPL(mas_find); 5628 5629 /** 5630 * mas_find_range() - On the first call, find the entry at or after 5631 * mas->index up to %max. Otherwise, advance to the next slot mas->index. 5632 * @mas: The maple state 5633 * @max: The maximum value to check. 5634 * 5635 * Must hold rcu_read_lock or the write lock. 5636 * If an entry exists, last and index are updated accordingly. 5637 * May set @mas->status to ma_overflow. 5638 * 5639 * Return: The entry or %NULL. 5640 */ 5641 void *mas_find_range(struct ma_state *mas, unsigned long max) 5642 { 5643 void *entry = NULL; 5644 5645 if (mas_find_setup(mas, max, &entry)) 5646 return entry; 5647 5648 /* Retries on dead nodes handled by mas_next_slot */ 5649 return mas_next_slot(mas, max, true); 5650 } 5651 EXPORT_SYMBOL_GPL(mas_find_range); 5652 5653 /** 5654 * mas_find_rev_setup() - Internal function to set up mas_find_*_rev() 5655 * @mas: The maple state 5656 * @min: The minimum index 5657 * @entry: Pointer to the entry 5658 * 5659 * Returns: True if entry is the answer, false otherwise. 5660 */ 5661 static bool mas_find_rev_setup(struct ma_state *mas, unsigned long min, 5662 void **entry) 5663 { 5664 5665 switch (mas->status) { 5666 case ma_active: 5667 goto active; 5668 case ma_start: 5669 break; 5670 case ma_pause: 5671 if (unlikely(mas->index <= min)) { 5672 mas->status = ma_underflow; 5673 return true; 5674 } 5675 mas->last = --mas->index; 5676 mas->status = ma_start; 5677 break; 5678 case ma_none: 5679 if (mas->index <= min) 5680 goto none; 5681 5682 mas->last = mas->index; 5683 mas->status = ma_start; 5684 break; 5685 case ma_overflow: /* user expects the mas to be one after where it is */ 5686 if (unlikely(mas->index <= min)) { 5687 mas->status = ma_underflow; 5688 return true; 5689 } 5690 5691 mas->status = ma_active; 5692 break; 5693 case ma_underflow: /* user expects the mas to be one before where it is */ 5694 if (unlikely(mas->index <= min)) 5695 return true; 5696 5697 mas->status = ma_active; 5698 break; 5699 case ma_root: 5700 break; 5701 case ma_error: 5702 return true; 5703 } 5704 5705 if (mas_is_start(mas)) { 5706 /* First run or continue */ 5707 if (mas->index < min) 5708 return true; 5709 5710 *entry = mas_walk(mas); 5711 if (*entry) 5712 return true; 5713 } 5714 5715 if (unlikely(mas_is_ptr(mas))) 5716 goto none; 5717 5718 if (unlikely(mas_is_none(mas))) { 5719 /* 5720 * Walked to the location, and there was nothing so the previous 5721 * location is 0. 5722 */ 5723 mas->last = mas->index = 0; 5724 mas->status = ma_root; 5725 *entry = mas_root(mas); 5726 return true; 5727 } 5728 5729 active: 5730 if (mas->index < min) 5731 return true; 5732 5733 return false; 5734 5735 none: 5736 mas->status = ma_none; 5737 return true; 5738 } 5739 5740 /** 5741 * mas_find_rev: On the first call, find the first non-null entry at or below 5742 * mas->index down to %min. Otherwise find the first non-null entry below 5743 * mas->index down to %min. 5744 * @mas: The maple state 5745 * @min: The minimum value to check. 5746 * 5747 * Must hold rcu_read_lock or the write lock. 5748 * If an entry exists, last and index are updated accordingly. 5749 * May set @mas->status to ma_underflow. 5750 * 5751 * Return: The entry or %NULL. 5752 */ 5753 void *mas_find_rev(struct ma_state *mas, unsigned long min) 5754 { 5755 void *entry = NULL; 5756 5757 if (mas_find_rev_setup(mas, min, &entry)) 5758 return entry; 5759 5760 /* Retries on dead nodes handled by mas_prev_slot */ 5761 return mas_prev_slot(mas, min, false); 5762 5763 } 5764 EXPORT_SYMBOL_GPL(mas_find_rev); 5765 5766 /** 5767 * mas_find_range_rev: On the first call, find the first non-null entry at or 5768 * below mas->index down to %min. Otherwise advance to the previous slot after 5769 * mas->index down to %min. 5770 * @mas: The maple state 5771 * @min: The minimum value to check. 5772 * 5773 * Must hold rcu_read_lock or the write lock. 5774 * If an entry exists, last and index are updated accordingly. 5775 * May set @mas->status to ma_underflow. 5776 * 5777 * Return: The entry or %NULL. 5778 */ 5779 void *mas_find_range_rev(struct ma_state *mas, unsigned long min) 5780 { 5781 void *entry = NULL; 5782 5783 if (mas_find_rev_setup(mas, min, &entry)) 5784 return entry; 5785 5786 /* Retries on dead nodes handled by mas_prev_slot */ 5787 return mas_prev_slot(mas, min, true); 5788 } 5789 EXPORT_SYMBOL_GPL(mas_find_range_rev); 5790 5791 /** 5792 * mas_erase() - Find the range in which index resides and erase the entire 5793 * range. 5794 * @mas: The maple state 5795 * 5796 * Must hold the write lock. 5797 * Searches for @mas->index, sets @mas->index and @mas->last to the range and 5798 * erases that range. 5799 * 5800 * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated. 5801 */ 5802 void *mas_erase(struct ma_state *mas) 5803 { 5804 void *entry; 5805 unsigned long index = mas->index; 5806 MA_WR_STATE(wr_mas, mas, NULL); 5807 5808 if (!mas_is_active(mas) || !mas_is_start(mas)) 5809 mas->status = ma_start; 5810 5811 write_retry: 5812 entry = mas_state_walk(mas); 5813 if (!entry) 5814 return NULL; 5815 5816 /* Must reset to ensure spanning writes of last slot are detected */ 5817 mas_reset(mas); 5818 mas_wr_preallocate(&wr_mas, NULL); 5819 if (mas_nomem(mas, GFP_KERNEL)) { 5820 /* in case the range of entry changed when unlocked */ 5821 mas->index = mas->last = index; 5822 goto write_retry; 5823 } 5824 5825 if (mas_is_err(mas)) 5826 goto out; 5827 5828 mas_wr_store_entry(&wr_mas); 5829 out: 5830 mas_destroy(mas); 5831 return entry; 5832 } 5833 EXPORT_SYMBOL_GPL(mas_erase); 5834 5835 /** 5836 * mas_nomem() - Check if there was an error allocating and do the allocation 5837 * if necessary If there are allocations, then free them. 5838 * @mas: The maple state 5839 * @gfp: The GFP_FLAGS to use for allocations 5840 * Return: true on allocation, false otherwise. 5841 */ 5842 bool mas_nomem(struct ma_state *mas, gfp_t gfp) 5843 __must_hold(mas->tree->ma_lock) 5844 { 5845 if (likely(mas->node != MA_ERROR(-ENOMEM))) 5846 return false; 5847 5848 if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) { 5849 mtree_unlock(mas->tree); 5850 mas_alloc_nodes(mas, gfp); 5851 mtree_lock(mas->tree); 5852 } else { 5853 mas_alloc_nodes(mas, gfp); 5854 } 5855 5856 if (!mas->sheaf && !mas->alloc) 5857 return false; 5858 5859 mas->status = ma_start; 5860 return true; 5861 } 5862 5863 void __init maple_tree_init(void) 5864 { 5865 struct kmem_cache_args args = { 5866 .align = sizeof(struct maple_node), 5867 .sheaf_capacity = 32, 5868 }; 5869 5870 maple_node_cache = kmem_cache_create("maple_node", 5871 sizeof(struct maple_node), &args, 5872 SLAB_PANIC); 5873 } 5874 5875 /** 5876 * mtree_load() - Load a value stored in a maple tree 5877 * @mt: The maple tree 5878 * @index: The index to load 5879 * 5880 * Return: the entry or %NULL 5881 */ 5882 void *mtree_load(struct maple_tree *mt, unsigned long index) 5883 { 5884 MA_STATE(mas, mt, index, index); 5885 void *entry; 5886 5887 trace_ma_read(TP_FCT, &mas); 5888 rcu_read_lock(); 5889 retry: 5890 entry = mas_start(&mas); 5891 if (unlikely(mas_is_none(&mas))) 5892 goto unlock; 5893 5894 if (unlikely(mas_is_ptr(&mas))) { 5895 if (index) 5896 entry = NULL; 5897 5898 goto unlock; 5899 } 5900 5901 entry = mtree_lookup_walk(&mas); 5902 if (!entry && unlikely(mas_is_start(&mas))) 5903 goto retry; 5904 unlock: 5905 rcu_read_unlock(); 5906 if (xa_is_zero(entry)) 5907 return NULL; 5908 5909 return entry; 5910 } 5911 EXPORT_SYMBOL(mtree_load); 5912 5913 /** 5914 * mtree_store_range() - Store an entry at a given range. 5915 * @mt: The maple tree 5916 * @index: The start of the range 5917 * @last: The end of the range 5918 * @entry: The entry to store 5919 * @gfp: The GFP_FLAGS to use for allocations 5920 * 5921 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5922 * be allocated. 5923 */ 5924 int mtree_store_range(struct maple_tree *mt, unsigned long index, 5925 unsigned long last, void *entry, gfp_t gfp) 5926 { 5927 MA_STATE(mas, mt, index, last); 5928 int ret = 0; 5929 5930 trace_ma_write(TP_FCT, &mas, 0, entry); 5931 if (WARN_ON_ONCE(xa_is_advanced(entry))) 5932 return -EINVAL; 5933 5934 if (index > last) 5935 return -EINVAL; 5936 5937 mtree_lock(mt); 5938 ret = mas_store_gfp(&mas, entry, gfp); 5939 mtree_unlock(mt); 5940 5941 return ret; 5942 } 5943 EXPORT_SYMBOL(mtree_store_range); 5944 5945 /** 5946 * mtree_store() - Store an entry at a given index. 5947 * @mt: The maple tree 5948 * @index: The index to store the value 5949 * @entry: The entry to store 5950 * @gfp: The GFP_FLAGS to use for allocations 5951 * 5952 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5953 * be allocated. 5954 */ 5955 int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, 5956 gfp_t gfp) 5957 { 5958 return mtree_store_range(mt, index, index, entry, gfp); 5959 } 5960 EXPORT_SYMBOL(mtree_store); 5961 5962 /** 5963 * mtree_insert_range() - Insert an entry at a given range if there is no value. 5964 * @mt: The maple tree 5965 * @first: The start of the range 5966 * @last: The end of the range 5967 * @entry: The entry to store 5968 * @gfp: The GFP_FLAGS to use for allocations. 5969 * 5970 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 5971 * request, -ENOMEM if memory could not be allocated. 5972 */ 5973 int mtree_insert_range(struct maple_tree *mt, unsigned long first, 5974 unsigned long last, void *entry, gfp_t gfp) 5975 { 5976 MA_STATE(ms, mt, first, last); 5977 int ret = 0; 5978 5979 if (WARN_ON_ONCE(xa_is_advanced(entry))) 5980 return -EINVAL; 5981 5982 if (first > last) 5983 return -EINVAL; 5984 5985 mtree_lock(mt); 5986 retry: 5987 mas_insert(&ms, entry); 5988 if (mas_nomem(&ms, gfp)) 5989 goto retry; 5990 5991 mtree_unlock(mt); 5992 if (mas_is_err(&ms)) 5993 ret = xa_err(ms.node); 5994 5995 mas_destroy(&ms); 5996 return ret; 5997 } 5998 EXPORT_SYMBOL(mtree_insert_range); 5999 6000 /** 6001 * mtree_insert() - Insert an entry at a given index if there is no value. 6002 * @mt: The maple tree 6003 * @index : The index to store the value 6004 * @entry: The entry to store 6005 * @gfp: The GFP_FLAGS to use for allocations. 6006 * 6007 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 6008 * request, -ENOMEM if memory could not be allocated. 6009 */ 6010 int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry, 6011 gfp_t gfp) 6012 { 6013 return mtree_insert_range(mt, index, index, entry, gfp); 6014 } 6015 EXPORT_SYMBOL(mtree_insert); 6016 6017 int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, 6018 void *entry, unsigned long size, unsigned long min, 6019 unsigned long max, gfp_t gfp) 6020 { 6021 int ret = 0; 6022 6023 MA_STATE(mas, mt, 0, 0); 6024 if (!mt_is_alloc(mt)) 6025 return -EINVAL; 6026 6027 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6028 return -EINVAL; 6029 6030 mtree_lock(mt); 6031 retry: 6032 ret = mas_empty_area(&mas, min, max, size); 6033 if (ret) 6034 goto unlock; 6035 6036 mas_insert(&mas, entry); 6037 /* 6038 * mas_nomem() may release the lock, causing the allocated area 6039 * to be unavailable, so try to allocate a free area again. 6040 */ 6041 if (mas_nomem(&mas, gfp)) 6042 goto retry; 6043 6044 if (mas_is_err(&mas)) 6045 ret = xa_err(mas.node); 6046 else 6047 *startp = mas.index; 6048 6049 unlock: 6050 mtree_unlock(mt); 6051 mas_destroy(&mas); 6052 return ret; 6053 } 6054 EXPORT_SYMBOL(mtree_alloc_range); 6055 6056 /** 6057 * mtree_alloc_cyclic() - Find somewhere to store this entry in the tree. 6058 * @mt: The maple tree. 6059 * @startp: Pointer to ID. 6060 * @range_lo: Lower bound of range to search. 6061 * @range_hi: Upper bound of range to search. 6062 * @entry: The entry to store. 6063 * @next: Pointer to next ID to allocate. 6064 * @gfp: The GFP_FLAGS to use for allocations. 6065 * 6066 * Finds an empty entry in @mt after @next, stores the new index into 6067 * the @id pointer, stores the entry at that index, then updates @next. 6068 * 6069 * @mt must be initialized with the MT_FLAGS_ALLOC_RANGE flag. 6070 * 6071 * Context: Any context. Takes and releases the mt.lock. May sleep if 6072 * the @gfp flags permit. 6073 * 6074 * Return: 0 if the allocation succeeded without wrapping, 1 if the 6075 * allocation succeeded after wrapping, -ENOMEM if memory could not be 6076 * allocated, -EINVAL if @mt cannot be used, or -EBUSY if there are no 6077 * free entries. 6078 */ 6079 int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp, 6080 void *entry, unsigned long range_lo, unsigned long range_hi, 6081 unsigned long *next, gfp_t gfp) 6082 { 6083 int ret; 6084 6085 MA_STATE(mas, mt, 0, 0); 6086 6087 if (!mt_is_alloc(mt)) 6088 return -EINVAL; 6089 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6090 return -EINVAL; 6091 mtree_lock(mt); 6092 ret = mas_alloc_cyclic(&mas, startp, entry, range_lo, range_hi, 6093 next, gfp); 6094 mtree_unlock(mt); 6095 return ret; 6096 } 6097 EXPORT_SYMBOL(mtree_alloc_cyclic); 6098 6099 int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, 6100 void *entry, unsigned long size, unsigned long min, 6101 unsigned long max, gfp_t gfp) 6102 { 6103 int ret = 0; 6104 6105 MA_STATE(mas, mt, 0, 0); 6106 if (!mt_is_alloc(mt)) 6107 return -EINVAL; 6108 6109 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6110 return -EINVAL; 6111 6112 mtree_lock(mt); 6113 retry: 6114 ret = mas_empty_area_rev(&mas, min, max, size); 6115 if (ret) 6116 goto unlock; 6117 6118 mas_insert(&mas, entry); 6119 /* 6120 * mas_nomem() may release the lock, causing the allocated area 6121 * to be unavailable, so try to allocate a free area again. 6122 */ 6123 if (mas_nomem(&mas, gfp)) 6124 goto retry; 6125 6126 if (mas_is_err(&mas)) 6127 ret = xa_err(mas.node); 6128 else 6129 *startp = mas.index; 6130 6131 unlock: 6132 mtree_unlock(mt); 6133 mas_destroy(&mas); 6134 return ret; 6135 } 6136 EXPORT_SYMBOL(mtree_alloc_rrange); 6137 6138 /** 6139 * mtree_erase() - Find an index and erase the entire range. 6140 * @mt: The maple tree 6141 * @index: The index to erase 6142 * 6143 * Erasing is the same as a walk to an entry then a store of a NULL to that 6144 * ENTIRE range. In fact, it is implemented as such using the advanced API. 6145 * 6146 * Return: The entry stored at the @index or %NULL 6147 */ 6148 void *mtree_erase(struct maple_tree *mt, unsigned long index) 6149 { 6150 void *entry = NULL; 6151 6152 MA_STATE(mas, mt, index, index); 6153 trace_ma_op(TP_FCT, &mas); 6154 6155 mtree_lock(mt); 6156 entry = mas_erase(&mas); 6157 mtree_unlock(mt); 6158 6159 return entry; 6160 } 6161 EXPORT_SYMBOL(mtree_erase); 6162 6163 /* 6164 * mas_dup_free() - Free an incomplete duplication of a tree. 6165 * @mas: The maple state of a incomplete tree. 6166 * 6167 * The parameter @mas->node passed in indicates that the allocation failed on 6168 * this node. This function frees all nodes starting from @mas->node in the 6169 * reverse order of mas_dup_build(). There is no need to hold the source tree 6170 * lock at this time. 6171 */ 6172 static void mas_dup_free(struct ma_state *mas) 6173 { 6174 struct maple_node *node; 6175 enum maple_type type; 6176 void __rcu **slots; 6177 unsigned char count, i; 6178 6179 /* Maybe the first node allocation failed. */ 6180 if (mas_is_none(mas)) 6181 return; 6182 6183 while (!mte_is_root(mas->node)) { 6184 mas_ascend(mas); 6185 if (mas->offset) { 6186 mas->offset--; 6187 do { 6188 mas_descend(mas); 6189 mas->offset = mas_data_end(mas); 6190 } while (!mte_is_leaf(mas->node)); 6191 6192 mas_ascend(mas); 6193 } 6194 6195 node = mte_to_node(mas->node); 6196 type = mte_node_type(mas->node); 6197 slots = ma_slots(node, type); 6198 count = mas_data_end(mas) + 1; 6199 for (i = 0; i < count; i++) 6200 ((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK; 6201 mt_free_bulk(count, slots); 6202 } 6203 6204 node = mte_to_node(mas->node); 6205 kfree(node); 6206 } 6207 6208 /* 6209 * mas_copy_node() - Copy a maple node and replace the parent. 6210 * @mas: The maple state of source tree. 6211 * @new_mas: The maple state of new tree. 6212 * @parent: The parent of the new node. 6213 * 6214 * Copy @mas->node to @new_mas->node, set @parent to be the parent of 6215 * @new_mas->node. If memory allocation fails, @mas is set to -ENOMEM. 6216 */ 6217 static inline void mas_copy_node(struct ma_state *mas, struct ma_state *new_mas, 6218 struct maple_pnode *parent) 6219 { 6220 struct maple_node *node = mte_to_node(mas->node); 6221 struct maple_node *new_node = mte_to_node(new_mas->node); 6222 unsigned long val; 6223 6224 /* Copy the node completely. */ 6225 memcpy(new_node, node, sizeof(struct maple_node)); 6226 /* Update the parent node pointer. */ 6227 val = (unsigned long)node->parent & MAPLE_NODE_MASK; 6228 new_node->parent = ma_parent_ptr(val | (unsigned long)parent); 6229 } 6230 6231 /* 6232 * mas_dup_alloc() - Allocate child nodes for a maple node. 6233 * @mas: The maple state of source tree. 6234 * @new_mas: The maple state of new tree. 6235 * @gfp: The GFP_FLAGS to use for allocations. 6236 * 6237 * This function allocates child nodes for @new_mas->node during the duplication 6238 * process. If memory allocation fails, @mas is set to -ENOMEM. 6239 */ 6240 static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas, 6241 gfp_t gfp) 6242 { 6243 struct maple_node *node = mte_to_node(mas->node); 6244 struct maple_node *new_node = mte_to_node(new_mas->node); 6245 enum maple_type type; 6246 unsigned char count, i; 6247 void __rcu **slots; 6248 void __rcu **new_slots; 6249 unsigned long val; 6250 6251 /* Allocate memory for child nodes. */ 6252 type = mte_node_type(mas->node); 6253 new_slots = ma_slots(new_node, type); 6254 count = mas->node_request = mas_data_end(mas) + 1; 6255 mas_alloc_nodes(mas, gfp); 6256 if (unlikely(mas_is_err(mas))) 6257 return; 6258 6259 slots = ma_slots(node, type); 6260 for (i = 0; i < count; i++) { 6261 val = (unsigned long)mt_slot_locked(mas->tree, slots, i); 6262 val &= MAPLE_NODE_MASK; 6263 new_slots[i] = ma_mnode_ptr((unsigned long)mas_pop_node(mas) | 6264 val); 6265 } 6266 } 6267 6268 /* 6269 * mas_dup_build() - Build a new maple tree from a source tree 6270 * @mas: The maple state of source tree, need to be in MAS_START state. 6271 * @new_mas: The maple state of new tree, need to be in MAS_START state. 6272 * @gfp: The GFP_FLAGS to use for allocations. 6273 * 6274 * This function builds a new tree in DFS preorder. If the memory allocation 6275 * fails, the error code -ENOMEM will be set in @mas, and @new_mas points to the 6276 * last node. mas_dup_free() will free the incomplete duplication of a tree. 6277 * 6278 * Note that the attributes of the two trees need to be exactly the same, and the 6279 * new tree needs to be empty, otherwise -EINVAL will be set in @mas. 6280 */ 6281 static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas, 6282 gfp_t gfp) 6283 { 6284 struct maple_node *node; 6285 struct maple_pnode *parent = NULL; 6286 struct maple_enode *root; 6287 enum maple_type type; 6288 6289 if (unlikely(mt_attr(mas->tree) != mt_attr(new_mas->tree)) || 6290 unlikely(!mtree_empty(new_mas->tree))) { 6291 mas_set_err(mas, -EINVAL); 6292 return; 6293 } 6294 6295 root = mas_start(mas); 6296 if (mas_is_ptr(mas) || mas_is_none(mas)) 6297 goto set_new_tree; 6298 6299 node = mt_alloc_one(gfp); 6300 if (!node) { 6301 new_mas->status = ma_none; 6302 mas_set_err(mas, -ENOMEM); 6303 return; 6304 } 6305 6306 type = mte_node_type(mas->node); 6307 root = mt_mk_node(node, type); 6308 new_mas->node = root; 6309 new_mas->min = 0; 6310 new_mas->max = ULONG_MAX; 6311 root = mte_mk_root(root); 6312 while (1) { 6313 mas_copy_node(mas, new_mas, parent); 6314 if (!mte_is_leaf(mas->node)) { 6315 /* Only allocate child nodes for non-leaf nodes. */ 6316 mas_dup_alloc(mas, new_mas, gfp); 6317 if (unlikely(mas_is_err(mas))) 6318 goto empty_mas; 6319 } else { 6320 /* 6321 * This is the last leaf node and duplication is 6322 * completed. 6323 */ 6324 if (mas->max == ULONG_MAX) 6325 goto done; 6326 6327 /* This is not the last leaf node and needs to go up. */ 6328 do { 6329 mas_ascend(mas); 6330 mas_ascend(new_mas); 6331 } while (mas->offset == mas_data_end(mas)); 6332 6333 /* Move to the next subtree. */ 6334 mas->offset++; 6335 new_mas->offset++; 6336 } 6337 6338 mas_descend(mas); 6339 parent = ma_parent_ptr(mte_to_node(new_mas->node)); 6340 mas_descend(new_mas); 6341 mas->offset = 0; 6342 new_mas->offset = 0; 6343 } 6344 done: 6345 /* Specially handle the parent of the root node. */ 6346 mte_to_node(root)->parent = ma_parent_ptr(mas_tree_parent(new_mas)); 6347 set_new_tree: 6348 /* Make them the same height */ 6349 new_mas->tree->ma_flags = mas->tree->ma_flags; 6350 rcu_assign_pointer(new_mas->tree->ma_root, root); 6351 empty_mas: 6352 mas_empty_nodes(mas); 6353 } 6354 6355 /** 6356 * __mt_dup(): Duplicate an entire maple tree 6357 * @mt: The source maple tree 6358 * @new: The new maple tree 6359 * @gfp: The GFP_FLAGS to use for allocations 6360 * 6361 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order 6362 * traversal. It uses memcpy() to copy nodes in the source tree and allocate 6363 * new child nodes in non-leaf nodes. The new node is exactly the same as the 6364 * source node except for all the addresses stored in it. It will be faster than 6365 * traversing all elements in the source tree and inserting them one by one into 6366 * the new tree. 6367 * The user needs to ensure that the attributes of the source tree and the new 6368 * tree are the same, and the new tree needs to be an empty tree, otherwise 6369 * -EINVAL will be returned. 6370 * Note that the user needs to manually lock the source tree and the new tree. 6371 * 6372 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If 6373 * the attributes of the two trees are different or the new tree is not an empty 6374 * tree. 6375 */ 6376 int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp) 6377 { 6378 int ret = 0; 6379 MA_STATE(mas, mt, 0, 0); 6380 MA_STATE(new_mas, new, 0, 0); 6381 6382 mas_dup_build(&mas, &new_mas, gfp); 6383 if (unlikely(mas_is_err(&mas))) { 6384 ret = xa_err(mas.node); 6385 if (ret == -ENOMEM) 6386 mas_dup_free(&new_mas); 6387 } 6388 6389 return ret; 6390 } 6391 EXPORT_SYMBOL(__mt_dup); 6392 6393 /** 6394 * mtree_dup(): Duplicate an entire maple tree 6395 * @mt: The source maple tree 6396 * @new: The new maple tree 6397 * @gfp: The GFP_FLAGS to use for allocations 6398 * 6399 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order 6400 * traversal. It uses memcpy() to copy nodes in the source tree and allocate 6401 * new child nodes in non-leaf nodes. The new node is exactly the same as the 6402 * source node except for all the addresses stored in it. It will be faster than 6403 * traversing all elements in the source tree and inserting them one by one into 6404 * the new tree. 6405 * The user needs to ensure that the attributes of the source tree and the new 6406 * tree are the same, and the new tree needs to be an empty tree, otherwise 6407 * -EINVAL will be returned. 6408 * 6409 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If 6410 * the attributes of the two trees are different or the new tree is not an empty 6411 * tree. 6412 */ 6413 int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp) 6414 { 6415 int ret = 0; 6416 MA_STATE(mas, mt, 0, 0); 6417 MA_STATE(new_mas, new, 0, 0); 6418 6419 mas_lock(&new_mas); 6420 mas_lock_nested(&mas, SINGLE_DEPTH_NESTING); 6421 mas_dup_build(&mas, &new_mas, gfp); 6422 mas_unlock(&mas); 6423 if (unlikely(mas_is_err(&mas))) { 6424 ret = xa_err(mas.node); 6425 if (ret == -ENOMEM) 6426 mas_dup_free(&new_mas); 6427 } 6428 6429 mas_unlock(&new_mas); 6430 return ret; 6431 } 6432 EXPORT_SYMBOL(mtree_dup); 6433 6434 /** 6435 * __mt_destroy() - Walk and free all nodes of a locked maple tree. 6436 * @mt: The maple tree 6437 * 6438 * Note: Does not handle locking. 6439 */ 6440 void __mt_destroy(struct maple_tree *mt) 6441 { 6442 void *root = mt_root_locked(mt); 6443 6444 rcu_assign_pointer(mt->ma_root, NULL); 6445 if (xa_is_node(root)) 6446 mte_destroy_walk(root, mt); 6447 6448 mt->ma_flags = mt_attr(mt); 6449 } 6450 EXPORT_SYMBOL_GPL(__mt_destroy); 6451 6452 /** 6453 * mtree_destroy() - Destroy a maple tree 6454 * @mt: The maple tree 6455 * 6456 * Frees all resources used by the tree. Handles locking. 6457 */ 6458 void mtree_destroy(struct maple_tree *mt) 6459 { 6460 mtree_lock(mt); 6461 __mt_destroy(mt); 6462 mtree_unlock(mt); 6463 } 6464 EXPORT_SYMBOL(mtree_destroy); 6465 6466 /** 6467 * mt_find() - Search from the start up until an entry is found. 6468 * @mt: The maple tree 6469 * @index: Pointer which contains the start location of the search 6470 * @max: The maximum value of the search range 6471 * 6472 * Takes RCU read lock internally to protect the search, which does not 6473 * protect the returned pointer after dropping RCU read lock. 6474 * See also: Documentation/core-api/maple_tree.rst 6475 * 6476 * In case that an entry is found @index is updated to point to the next 6477 * possible entry independent whether the found entry is occupying a 6478 * single index or a range if indices. 6479 * 6480 * Return: The entry at or after the @index or %NULL 6481 */ 6482 void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max) 6483 { 6484 MA_STATE(mas, mt, *index, *index); 6485 void *entry; 6486 #ifdef CONFIG_DEBUG_MAPLE_TREE 6487 unsigned long copy = *index; 6488 #endif 6489 6490 trace_ma_read(TP_FCT, &mas); 6491 6492 if ((*index) > max) 6493 return NULL; 6494 6495 rcu_read_lock(); 6496 retry: 6497 entry = mas_state_walk(&mas); 6498 if (mas_is_start(&mas)) 6499 goto retry; 6500 6501 if (unlikely(xa_is_zero(entry))) 6502 entry = NULL; 6503 6504 if (entry) 6505 goto unlock; 6506 6507 while (mas_is_active(&mas) && (mas.last < max)) { 6508 entry = mas_next_slot(&mas, max, false); 6509 if (likely(entry && !xa_is_zero(entry))) 6510 break; 6511 } 6512 6513 if (unlikely(xa_is_zero(entry))) 6514 entry = NULL; 6515 unlock: 6516 rcu_read_unlock(); 6517 if (likely(entry)) { 6518 *index = mas.last + 1; 6519 #ifdef CONFIG_DEBUG_MAPLE_TREE 6520 if (MT_WARN_ON(mt, (*index) && ((*index) <= copy))) 6521 pr_err("index not increased! %lx <= %lx\n", 6522 *index, copy); 6523 #endif 6524 } 6525 6526 return entry; 6527 } 6528 EXPORT_SYMBOL(mt_find); 6529 6530 /** 6531 * mt_find_after() - Search from the start up until an entry is found. 6532 * @mt: The maple tree 6533 * @index: Pointer which contains the start location of the search 6534 * @max: The maximum value to check 6535 * 6536 * Same as mt_find() except that it checks @index for 0 before 6537 * searching. If @index == 0, the search is aborted. This covers a wrap 6538 * around of @index to 0 in an iterator loop. 6539 * 6540 * Return: The entry at or after the @index or %NULL 6541 */ 6542 void *mt_find_after(struct maple_tree *mt, unsigned long *index, 6543 unsigned long max) 6544 { 6545 if (!(*index)) 6546 return NULL; 6547 6548 return mt_find(mt, index, max); 6549 } 6550 EXPORT_SYMBOL(mt_find_after); 6551 6552 #ifdef CONFIG_DEBUG_MAPLE_TREE 6553 atomic_t maple_tree_tests_run; 6554 EXPORT_SYMBOL_GPL(maple_tree_tests_run); 6555 atomic_t maple_tree_tests_passed; 6556 EXPORT_SYMBOL_GPL(maple_tree_tests_passed); 6557 6558 #ifndef __KERNEL__ 6559 extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int); 6560 void mt_set_non_kernel(unsigned int val) 6561 { 6562 kmem_cache_set_non_kernel(maple_node_cache, val); 6563 } 6564 6565 extern void kmem_cache_set_callback(struct kmem_cache *cachep, 6566 void (*callback)(void *)); 6567 void mt_set_callback(void (*callback)(void *)) 6568 { 6569 kmem_cache_set_callback(maple_node_cache, callback); 6570 } 6571 6572 extern void kmem_cache_set_private(struct kmem_cache *cachep, void *private); 6573 void mt_set_private(void *private) 6574 { 6575 kmem_cache_set_private(maple_node_cache, private); 6576 } 6577 6578 extern unsigned long kmem_cache_get_alloc(struct kmem_cache *); 6579 unsigned long mt_get_alloc_size(void) 6580 { 6581 return kmem_cache_get_alloc(maple_node_cache); 6582 } 6583 6584 extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *); 6585 void mt_zero_nr_tallocated(void) 6586 { 6587 kmem_cache_zero_nr_tallocated(maple_node_cache); 6588 } 6589 6590 extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *); 6591 unsigned int mt_nr_tallocated(void) 6592 { 6593 return kmem_cache_nr_tallocated(maple_node_cache); 6594 } 6595 6596 extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *); 6597 unsigned int mt_nr_allocated(void) 6598 { 6599 return kmem_cache_nr_allocated(maple_node_cache); 6600 } 6601 6602 void mt_cache_shrink(void) 6603 { 6604 } 6605 #else 6606 /* 6607 * mt_cache_shrink() - For testing, don't use this. 6608 * 6609 * Certain testcases can trigger an OOM when combined with other memory 6610 * debugging configuration options. This function is used to reduce the 6611 * possibility of an out of memory even due to kmem_cache objects remaining 6612 * around for longer than usual. 6613 */ 6614 void mt_cache_shrink(void) 6615 { 6616 kmem_cache_shrink(maple_node_cache); 6617 6618 } 6619 EXPORT_SYMBOL_GPL(mt_cache_shrink); 6620 6621 #endif /* not defined __KERNEL__ */ 6622 /* 6623 * mas_get_slot() - Get the entry in the maple state node stored at @offset. 6624 * @mas: The maple state 6625 * @offset: The offset into the slot array to fetch. 6626 * 6627 * Return: The entry stored at @offset. 6628 */ 6629 static inline struct maple_enode *mas_get_slot(struct ma_state *mas, 6630 unsigned char offset) 6631 { 6632 return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)), 6633 offset); 6634 } 6635 6636 /* Depth first search, post-order */ 6637 static void mas_dfs_postorder(struct ma_state *mas, unsigned long max) 6638 { 6639 6640 struct maple_enode *p, *mn = mas->node; 6641 unsigned long p_min, p_max; 6642 6643 mas_next_node(mas, mas_mn(mas), max); 6644 if (!mas_is_overflow(mas)) 6645 return; 6646 6647 if (mte_is_root(mn)) 6648 return; 6649 6650 mas->node = mn; 6651 mas_ascend(mas); 6652 do { 6653 p = mas->node; 6654 p_min = mas->min; 6655 p_max = mas->max; 6656 mas_prev_node(mas, 0); 6657 } while (!mas_is_underflow(mas)); 6658 6659 mas->node = p; 6660 mas->max = p_max; 6661 mas->min = p_min; 6662 } 6663 6664 /* Tree validations */ 6665 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6666 unsigned long min, unsigned long max, unsigned int depth, 6667 enum mt_dump_format format); 6668 static void mt_dump_range(unsigned long min, unsigned long max, 6669 unsigned int depth, enum mt_dump_format format) 6670 { 6671 static const char spaces[] = " "; 6672 6673 switch(format) { 6674 case mt_dump_hex: 6675 if (min == max) 6676 pr_info("%.*s%lx: ", depth * 2, spaces, min); 6677 else 6678 pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max); 6679 break; 6680 case mt_dump_dec: 6681 if (min == max) 6682 pr_info("%.*s%lu: ", depth * 2, spaces, min); 6683 else 6684 pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max); 6685 } 6686 } 6687 6688 static void mt_dump_entry(void *entry, unsigned long min, unsigned long max, 6689 unsigned int depth, enum mt_dump_format format) 6690 { 6691 mt_dump_range(min, max, depth, format); 6692 6693 if (xa_is_value(entry)) 6694 pr_cont("value %ld (0x%lx) [" PTR_FMT "]\n", xa_to_value(entry), 6695 xa_to_value(entry), entry); 6696 else if (xa_is_zero(entry)) 6697 pr_cont("zero (%ld)\n", xa_to_internal(entry)); 6698 else if (mt_is_reserved(entry)) 6699 pr_cont("UNKNOWN ENTRY (" PTR_FMT ")\n", entry); 6700 else 6701 pr_cont(PTR_FMT "\n", entry); 6702 } 6703 6704 static void mt_dump_range64(const struct maple_tree *mt, void *entry, 6705 unsigned long min, unsigned long max, unsigned int depth, 6706 enum mt_dump_format format) 6707 { 6708 struct maple_range_64 *node = &mte_to_node(entry)->mr64; 6709 bool leaf = mte_is_leaf(entry); 6710 unsigned long first = min; 6711 int i; 6712 6713 pr_cont(" contents: "); 6714 for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) { 6715 switch(format) { 6716 case mt_dump_hex: 6717 pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]); 6718 break; 6719 case mt_dump_dec: 6720 pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]); 6721 } 6722 } 6723 pr_cont(PTR_FMT "\n", node->slot[i]); 6724 for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) { 6725 unsigned long last = max; 6726 6727 if (i < (MAPLE_RANGE64_SLOTS - 1)) 6728 last = node->pivot[i]; 6729 else if (!node->slot[i] && max != mt_node_max(entry)) 6730 break; 6731 if (last == 0 && i > 0) 6732 break; 6733 if (leaf) 6734 mt_dump_entry(mt_slot(mt, node->slot, i), 6735 first, last, depth + 1, format); 6736 else if (node->slot[i]) 6737 mt_dump_node(mt, mt_slot(mt, node->slot, i), 6738 first, last, depth + 1, format); 6739 6740 if (last == max) 6741 break; 6742 if (last > max) { 6743 switch(format) { 6744 case mt_dump_hex: 6745 pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n", 6746 node, last, max, i); 6747 break; 6748 case mt_dump_dec: 6749 pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", 6750 node, last, max, i); 6751 } 6752 } 6753 first = last + 1; 6754 } 6755 } 6756 6757 static void mt_dump_arange64(const struct maple_tree *mt, void *entry, 6758 unsigned long min, unsigned long max, unsigned int depth, 6759 enum mt_dump_format format) 6760 { 6761 struct maple_arange_64 *node = &mte_to_node(entry)->ma64; 6762 unsigned long first = min; 6763 int i; 6764 6765 pr_cont(" contents: "); 6766 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 6767 switch (format) { 6768 case mt_dump_hex: 6769 pr_cont("%lx ", node->gap[i]); 6770 break; 6771 case mt_dump_dec: 6772 pr_cont("%lu ", node->gap[i]); 6773 } 6774 } 6775 pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap); 6776 for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) { 6777 switch (format) { 6778 case mt_dump_hex: 6779 pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]); 6780 break; 6781 case mt_dump_dec: 6782 pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]); 6783 } 6784 } 6785 pr_cont(PTR_FMT "\n", node->slot[i]); 6786 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 6787 unsigned long last = max; 6788 6789 if (i < (MAPLE_ARANGE64_SLOTS - 1)) 6790 last = node->pivot[i]; 6791 else if (!node->slot[i]) 6792 break; 6793 if (last == 0 && i > 0) 6794 break; 6795 if (node->slot[i]) 6796 mt_dump_node(mt, mt_slot(mt, node->slot, i), 6797 first, last, depth + 1, format); 6798 6799 if (last == max) 6800 break; 6801 if (last > max) { 6802 switch(format) { 6803 case mt_dump_hex: 6804 pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n", 6805 node, last, max, i); 6806 break; 6807 case mt_dump_dec: 6808 pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", 6809 node, last, max, i); 6810 } 6811 } 6812 first = last + 1; 6813 } 6814 } 6815 6816 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6817 unsigned long min, unsigned long max, unsigned int depth, 6818 enum mt_dump_format format) 6819 { 6820 struct maple_node *node = mte_to_node(entry); 6821 unsigned int type = mte_node_type(entry); 6822 unsigned int i; 6823 6824 mt_dump_range(min, max, depth, format); 6825 6826 pr_cont("node " PTR_FMT " depth %d type %d parent " PTR_FMT, node, 6827 depth, type, node ? node->parent : NULL); 6828 switch (type) { 6829 case maple_dense: 6830 pr_cont("\n"); 6831 for (i = 0; i < MAPLE_NODE_SLOTS; i++) { 6832 if (min + i > max) 6833 pr_cont("OUT OF RANGE: "); 6834 mt_dump_entry(mt_slot(mt, node->slot, i), 6835 min + i, min + i, depth, format); 6836 } 6837 break; 6838 case maple_leaf_64: 6839 case maple_range_64: 6840 mt_dump_range64(mt, entry, min, max, depth, format); 6841 break; 6842 case maple_arange_64: 6843 mt_dump_arange64(mt, entry, min, max, depth, format); 6844 break; 6845 6846 default: 6847 pr_cont(" UNKNOWN TYPE\n"); 6848 } 6849 } 6850 6851 void mt_dump(const struct maple_tree *mt, enum mt_dump_format format) 6852 { 6853 void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt)); 6854 6855 pr_info("maple_tree(" PTR_FMT ") flags %X, height %u root " PTR_FMT "\n", 6856 mt, mt->ma_flags, mt_height(mt), entry); 6857 if (xa_is_node(entry)) 6858 mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format); 6859 else if (entry) 6860 mt_dump_entry(entry, 0, 0, 0, format); 6861 else 6862 pr_info("(empty)\n"); 6863 } 6864 EXPORT_SYMBOL_GPL(mt_dump); 6865 6866 /* 6867 * Calculate the maximum gap in a node and check if that's what is reported in 6868 * the parent (unless root). 6869 */ 6870 static void mas_validate_gaps(struct ma_state *mas) 6871 { 6872 struct maple_enode *mte = mas->node; 6873 struct maple_node *p_mn, *node = mte_to_node(mte); 6874 enum maple_type mt = mte_node_type(mas->node); 6875 unsigned long gap = 0, max_gap = 0; 6876 unsigned long p_end, p_start = mas->min; 6877 unsigned char p_slot, offset; 6878 unsigned long *gaps = NULL; 6879 unsigned long *pivots = ma_pivots(node, mt); 6880 unsigned int i; 6881 6882 if (ma_is_dense(mt)) { 6883 for (i = 0; i < mt_slot_count(mte); i++) { 6884 if (mas_get_slot(mas, i)) { 6885 if (gap > max_gap) 6886 max_gap = gap; 6887 gap = 0; 6888 continue; 6889 } 6890 gap++; 6891 } 6892 goto counted; 6893 } 6894 6895 gaps = ma_gaps(node, mt); 6896 for (i = 0; i < mt_slot_count(mte); i++) { 6897 p_end = mas_safe_pivot(mas, pivots, i, mt); 6898 6899 if (!gaps) { 6900 if (!mas_get_slot(mas, i)) 6901 gap = p_end - p_start + 1; 6902 } else { 6903 void *entry = mas_get_slot(mas, i); 6904 6905 gap = gaps[i]; 6906 MT_BUG_ON(mas->tree, !entry); 6907 6908 if (gap > p_end - p_start + 1) { 6909 pr_err(PTR_FMT "[%u] %lu >= %lu - %lu + 1 (%lu)\n", 6910 mas_mn(mas), i, gap, p_end, p_start, 6911 p_end - p_start + 1); 6912 MT_BUG_ON(mas->tree, gap > p_end - p_start + 1); 6913 } 6914 } 6915 6916 if (gap > max_gap) 6917 max_gap = gap; 6918 6919 p_start = p_end + 1; 6920 if (p_end >= mas->max) 6921 break; 6922 } 6923 6924 counted: 6925 if (mt == maple_arange_64) { 6926 MT_BUG_ON(mas->tree, !gaps); 6927 offset = ma_meta_gap(node); 6928 if (offset > i) { 6929 pr_err("gap offset " PTR_FMT "[%u] is invalid\n", node, offset); 6930 MT_BUG_ON(mas->tree, 1); 6931 } 6932 6933 if (gaps[offset] != max_gap) { 6934 pr_err("gap " PTR_FMT "[%u] is not the largest gap %lu\n", 6935 node, offset, max_gap); 6936 MT_BUG_ON(mas->tree, 1); 6937 } 6938 6939 for (i++ ; i < mt_slot_count(mte); i++) { 6940 if (gaps[i] != 0) { 6941 pr_err("gap " PTR_FMT "[%u] beyond node limit != 0\n", 6942 node, i); 6943 MT_BUG_ON(mas->tree, 1); 6944 } 6945 } 6946 } 6947 6948 if (mte_is_root(mte)) 6949 return; 6950 6951 p_slot = mte_parent_slot(mas->node); 6952 p_mn = mte_parent(mte); 6953 MT_BUG_ON(mas->tree, max_gap > mas->max); 6954 if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) { 6955 pr_err("gap " PTR_FMT "[%u] != %lu\n", p_mn, p_slot, max_gap); 6956 mt_dump(mas->tree, mt_dump_hex); 6957 MT_BUG_ON(mas->tree, 1); 6958 } 6959 } 6960 6961 static void mas_validate_parent_slot(struct ma_state *mas) 6962 { 6963 struct maple_node *parent; 6964 struct maple_enode *node; 6965 enum maple_type p_type; 6966 unsigned char p_slot; 6967 void __rcu **slots; 6968 int i; 6969 6970 if (mte_is_root(mas->node)) 6971 return; 6972 6973 p_slot = mte_parent_slot(mas->node); 6974 p_type = mas_parent_type(mas, mas->node); 6975 parent = mte_parent(mas->node); 6976 slots = ma_slots(parent, p_type); 6977 MT_BUG_ON(mas->tree, mas_mn(mas) == parent); 6978 6979 /* Check prev/next parent slot for duplicate node entry */ 6980 6981 for (i = 0; i < mt_slots[p_type]; i++) { 6982 node = mas_slot(mas, slots, i); 6983 if (i == p_slot) { 6984 if (node != mas->node) 6985 pr_err("parent " PTR_FMT "[%u] does not have " PTR_FMT "\n", 6986 parent, i, mas_mn(mas)); 6987 MT_BUG_ON(mas->tree, node != mas->node); 6988 } else if (node == mas->node) { 6989 pr_err("Invalid child " PTR_FMT " at parent " PTR_FMT "[%u] p_slot %u\n", 6990 mas_mn(mas), parent, i, p_slot); 6991 MT_BUG_ON(mas->tree, node == mas->node); 6992 } 6993 } 6994 } 6995 6996 static void mas_validate_child_slot(struct ma_state *mas) 6997 { 6998 enum maple_type type = mte_node_type(mas->node); 6999 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 7000 unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type); 7001 struct maple_enode *child; 7002 unsigned char i; 7003 7004 if (mte_is_leaf(mas->node)) 7005 return; 7006 7007 for (i = 0; i < mt_slots[type]; i++) { 7008 child = mas_slot(mas, slots, i); 7009 7010 if (!child) { 7011 pr_err("Non-leaf node lacks child at " PTR_FMT "[%u]\n", 7012 mas_mn(mas), i); 7013 MT_BUG_ON(mas->tree, 1); 7014 } 7015 7016 if (mte_parent_slot(child) != i) { 7017 pr_err("Slot error at " PTR_FMT "[%u]: child " PTR_FMT " has pslot %u\n", 7018 mas_mn(mas), i, mte_to_node(child), 7019 mte_parent_slot(child)); 7020 MT_BUG_ON(mas->tree, 1); 7021 } 7022 7023 if (mte_parent(child) != mte_to_node(mas->node)) { 7024 pr_err("child " PTR_FMT " has parent " PTR_FMT " not " PTR_FMT "\n", 7025 mte_to_node(child), mte_parent(child), 7026 mte_to_node(mas->node)); 7027 MT_BUG_ON(mas->tree, 1); 7028 } 7029 7030 if (i < mt_pivots[type] && pivots[i] == mas->max) 7031 break; 7032 } 7033 } 7034 7035 /* 7036 * Validate all pivots are within mas->min and mas->max, check metadata ends 7037 * where the maximum ends and ensure there is no slots or pivots set outside of 7038 * the end of the data. 7039 */ 7040 static void mas_validate_limits(struct ma_state *mas) 7041 { 7042 int i; 7043 unsigned long prev_piv = 0; 7044 enum maple_type type = mte_node_type(mas->node); 7045 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 7046 unsigned long *pivots = ma_pivots(mas_mn(mas), type); 7047 7048 for (i = 0; i < mt_slots[type]; i++) { 7049 unsigned long piv; 7050 7051 piv = mas_safe_pivot(mas, pivots, i, type); 7052 7053 if (!piv && (i != 0)) { 7054 pr_err("Missing node limit pivot at " PTR_FMT "[%u]", 7055 mas_mn(mas), i); 7056 MAS_WARN_ON(mas, 1); 7057 } 7058 7059 if (prev_piv > piv) { 7060 pr_err(PTR_FMT "[%u] piv %lu < prev_piv %lu\n", 7061 mas_mn(mas), i, piv, prev_piv); 7062 MAS_WARN_ON(mas, piv < prev_piv); 7063 } 7064 7065 if (piv < mas->min) { 7066 pr_err(PTR_FMT "[%u] %lu < %lu\n", mas_mn(mas), i, 7067 piv, mas->min); 7068 MAS_WARN_ON(mas, piv < mas->min); 7069 } 7070 if (piv > mas->max) { 7071 pr_err(PTR_FMT "[%u] %lu > %lu\n", mas_mn(mas), i, 7072 piv, mas->max); 7073 MAS_WARN_ON(mas, piv > mas->max); 7074 } 7075 prev_piv = piv; 7076 if (piv == mas->max) 7077 break; 7078 } 7079 7080 if (mas_data_end(mas) != i) { 7081 pr_err("node" PTR_FMT ": data_end %u != the last slot offset %u\n", 7082 mas_mn(mas), mas_data_end(mas), i); 7083 MT_BUG_ON(mas->tree, 1); 7084 } 7085 7086 for (i += 1; i < mt_slots[type]; i++) { 7087 void *entry = mas_slot(mas, slots, i); 7088 7089 if (entry && (i != mt_slots[type] - 1)) { 7090 pr_err(PTR_FMT "[%u] should not have entry " PTR_FMT "\n", 7091 mas_mn(mas), i, entry); 7092 MT_BUG_ON(mas->tree, entry != NULL); 7093 } 7094 7095 if (i < mt_pivots[type]) { 7096 unsigned long piv = pivots[i]; 7097 7098 if (!piv) 7099 continue; 7100 7101 pr_err(PTR_FMT "[%u] should not have piv %lu\n", 7102 mas_mn(mas), i, piv); 7103 MAS_WARN_ON(mas, i < mt_pivots[type] - 1); 7104 } 7105 } 7106 } 7107 7108 static void mt_validate_nulls(struct maple_tree *mt) 7109 { 7110 void *entry, *last = (void *)1; 7111 unsigned char offset = 0; 7112 void __rcu **slots; 7113 MA_STATE(mas, mt, 0, 0); 7114 7115 mas_start(&mas); 7116 if (mas_is_none(&mas) || (mas_is_ptr(&mas))) 7117 return; 7118 7119 while (!mte_is_leaf(mas.node)) 7120 mas_descend(&mas); 7121 7122 slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node)); 7123 do { 7124 entry = mas_slot(&mas, slots, offset); 7125 if (!last && !entry) { 7126 pr_err("Sequential nulls end at " PTR_FMT "[%u]\n", 7127 mas_mn(&mas), offset); 7128 } 7129 MT_BUG_ON(mt, !last && !entry); 7130 last = entry; 7131 if (offset == mas_data_end(&mas)) { 7132 mas_next_node(&mas, mas_mn(&mas), ULONG_MAX); 7133 if (mas_is_overflow(&mas)) 7134 return; 7135 offset = 0; 7136 slots = ma_slots(mte_to_node(mas.node), 7137 mte_node_type(mas.node)); 7138 } else { 7139 offset++; 7140 } 7141 7142 } while (!mas_is_overflow(&mas)); 7143 } 7144 7145 /* 7146 * validate a maple tree by checking: 7147 * 1. The limits (pivots are within mas->min to mas->max) 7148 * 2. The gap is correctly set in the parents 7149 */ 7150 void mt_validate(struct maple_tree *mt) 7151 __must_hold(mas->tree->ma_lock) 7152 { 7153 unsigned char end; 7154 7155 MA_STATE(mas, mt, 0, 0); 7156 mas_start(&mas); 7157 if (!mas_is_active(&mas)) 7158 return; 7159 7160 while (!mte_is_leaf(mas.node)) 7161 mas_descend(&mas); 7162 7163 while (!mas_is_overflow(&mas)) { 7164 MAS_WARN_ON(&mas, mte_dead_node(mas.node)); 7165 end = mas_data_end(&mas); 7166 if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) && 7167 (!mte_is_root(mas.node)))) { 7168 pr_err("Invalid size %u of " PTR_FMT "\n", 7169 end, mas_mn(&mas)); 7170 } 7171 7172 mas_validate_parent_slot(&mas); 7173 mas_validate_limits(&mas); 7174 mas_validate_child_slot(&mas); 7175 if (mt_is_alloc(mt)) 7176 mas_validate_gaps(&mas); 7177 mas_dfs_postorder(&mas, ULONG_MAX); 7178 } 7179 mt_validate_nulls(mt); 7180 } 7181 EXPORT_SYMBOL_GPL(mt_validate); 7182 7183 void mas_dump(const struct ma_state *mas) 7184 { 7185 pr_err("MAS: tree=" PTR_FMT " enode=" PTR_FMT " ", 7186 mas->tree, mas->node); 7187 switch (mas->status) { 7188 case ma_active: 7189 pr_err("(ma_active)"); 7190 break; 7191 case ma_none: 7192 pr_err("(ma_none)"); 7193 break; 7194 case ma_root: 7195 pr_err("(ma_root)"); 7196 break; 7197 case ma_start: 7198 pr_err("(ma_start) "); 7199 break; 7200 case ma_pause: 7201 pr_err("(ma_pause) "); 7202 break; 7203 case ma_overflow: 7204 pr_err("(ma_overflow) "); 7205 break; 7206 case ma_underflow: 7207 pr_err("(ma_underflow) "); 7208 break; 7209 case ma_error: 7210 pr_err("(ma_error) "); 7211 break; 7212 } 7213 7214 pr_err("Store Type: "); 7215 switch (mas->store_type) { 7216 case wr_invalid: 7217 pr_err("invalid store type\n"); 7218 break; 7219 case wr_new_root: 7220 pr_err("new_root\n"); 7221 break; 7222 case wr_store_root: 7223 pr_err("store_root\n"); 7224 break; 7225 case wr_exact_fit: 7226 pr_err("exact_fit\n"); 7227 break; 7228 case wr_split_store: 7229 pr_err("split_store\n"); 7230 break; 7231 case wr_slot_store: 7232 pr_err("slot_store\n"); 7233 break; 7234 case wr_append: 7235 pr_err("append\n"); 7236 break; 7237 case wr_node_store: 7238 pr_err("node_store\n"); 7239 break; 7240 case wr_spanning_store: 7241 pr_err("spanning_store\n"); 7242 break; 7243 case wr_rebalance: 7244 pr_err("rebalance\n"); 7245 break; 7246 } 7247 7248 pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end, 7249 mas->index, mas->last); 7250 pr_err(" min=%lx max=%lx sheaf=" PTR_FMT ", request %lu depth=%u, flags=%x\n", 7251 mas->min, mas->max, mas->sheaf, mas->node_request, mas->depth, 7252 mas->mas_flags); 7253 if (mas->index > mas->last) 7254 pr_err("Check index & last\n"); 7255 } 7256 EXPORT_SYMBOL_GPL(mas_dump); 7257 7258 void mas_wr_dump(const struct ma_wr_state *wr_mas) 7259 { 7260 pr_err("WR_MAS: node=" PTR_FMT " r_min=%lx r_max=%lx\n", 7261 wr_mas->node, wr_mas->r_min, wr_mas->r_max); 7262 pr_err(" type=%u off_end=%u, node_end=%u, end_piv=%lx\n", 7263 wr_mas->type, wr_mas->offset_end, wr_mas->mas->end, 7264 wr_mas->end_piv); 7265 } 7266 EXPORT_SYMBOL_GPL(mas_wr_dump); 7267 7268 #endif /* CONFIG_DEBUG_MAPLE_TREE */ 7269