1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Maple Tree implementation 4 * Copyright (c) 2018-2022 Oracle Corporation 5 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com> 6 * Matthew Wilcox <willy@infradead.org> 7 * Copyright (c) 2023 ByteDance 8 * Author: Peng Zhang <zhangpeng.00@bytedance.com> 9 */ 10 11 /* 12 * DOC: Interesting implementation details of the Maple Tree 13 * 14 * Each node type has a number of slots for entries and a number of slots for 15 * pivots. In the case of dense nodes, the pivots are implied by the position 16 * and are simply the slot index + the minimum of the node. 17 * 18 * In regular B-Tree terms, pivots are called keys. The term pivot is used to 19 * indicate that the tree is specifying ranges. Pivots may appear in the 20 * subtree with an entry attached to the value whereas keys are unique to a 21 * specific position of a B-tree. Pivot values are inclusive of the slot with 22 * the same index. 23 * 24 * 25 * The following illustrates the layout of a range64 nodes slots and pivots. 26 * 27 * 28 * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 | 29 * ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ 30 * │ │ │ │ │ │ │ │ └─ Implied maximum 31 * │ │ │ │ │ │ │ └─ Pivot 14 32 * │ │ │ │ │ │ └─ Pivot 13 33 * │ │ │ │ │ └─ Pivot 12 34 * │ │ │ │ └─ Pivot 11 35 * │ │ │ └─ Pivot 2 36 * │ │ └─ Pivot 1 37 * │ └─ Pivot 0 38 * └─ Implied minimum 39 * 40 * Slot contents: 41 * Internal (non-leaf) nodes contain pointers to other nodes. 42 * Leaf nodes contain entries. 43 * 44 * The location of interest is often referred to as an offset. All offsets have 45 * a slot, but the last offset has an implied pivot from the node above (or 46 * UINT_MAX for the root node. 47 * 48 * Ranges complicate certain write activities. When modifying any of 49 * the B-tree variants, it is known that one entry will either be added or 50 * deleted. When modifying the Maple Tree, one store operation may overwrite 51 * the entire data set, or one half of the tree, or the middle half of the tree. 52 * 53 */ 54 55 56 #include <linux/maple_tree.h> 57 #include <linux/xarray.h> 58 #include <linux/types.h> 59 #include <linux/export.h> 60 #include <linux/slab.h> 61 #include <linux/limits.h> 62 #include <asm/barrier.h> 63 64 #define CREATE_TRACE_POINTS 65 #include <trace/events/maple_tree.h> 66 67 /* 68 * Kernel pointer hashing renders much of the maple tree dump useless as tagged 69 * pointers get hashed to arbitrary values. 70 * 71 * If CONFIG_DEBUG_VM_MAPLE_TREE is set we are in a debug mode where it is 72 * permissible to bypass this. Otherwise remain cautious and retain the hashing. 73 * 74 * Userland doesn't know about %px so also use %p there. 75 */ 76 #if defined(__KERNEL__) && defined(CONFIG_DEBUG_VM_MAPLE_TREE) 77 #define PTR_FMT "%px" 78 #else 79 #define PTR_FMT "%p" 80 #endif 81 82 #define MA_ROOT_PARENT 1 83 84 /* 85 * Maple state flags 86 * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation 87 */ 88 #define MA_STATE_PREALLOC 1 89 90 #define ma_parent_ptr(x) ((struct maple_pnode *)(x)) 91 #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT) 92 #define ma_mnode_ptr(x) ((struct maple_node *)(x)) 93 #define ma_enode_ptr(x) ((struct maple_enode *)(x)) 94 static struct kmem_cache *maple_node_cache; 95 96 #ifdef CONFIG_DEBUG_MAPLE_TREE 97 static const unsigned long mt_max[] = { 98 [maple_dense] = MAPLE_NODE_SLOTS, 99 [maple_leaf_64] = ULONG_MAX, 100 [maple_range_64] = ULONG_MAX, 101 [maple_arange_64] = ULONG_MAX, 102 }; 103 #define mt_node_max(x) mt_max[mte_node_type(x)] 104 #endif 105 106 static const unsigned char mt_slots[] = { 107 [maple_dense] = MAPLE_NODE_SLOTS, 108 [maple_leaf_64] = MAPLE_RANGE64_SLOTS, 109 [maple_range_64] = MAPLE_RANGE64_SLOTS, 110 [maple_arange_64] = MAPLE_ARANGE64_SLOTS, 111 }; 112 #define mt_slot_count(x) mt_slots[mte_node_type(x)] 113 114 static const unsigned char mt_pivots[] = { 115 [maple_dense] = 0, 116 [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1, 117 [maple_range_64] = MAPLE_RANGE64_SLOTS - 1, 118 [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1, 119 }; 120 #define mt_pivot_count(x) mt_pivots[mte_node_type(x)] 121 122 static const unsigned char mt_min_slots[] = { 123 [maple_dense] = MAPLE_NODE_SLOTS / 2, 124 [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 125 [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 126 [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1, 127 }; 128 #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)] 129 130 #define MAPLE_BIG_NODE_SLOTS (MAPLE_RANGE64_SLOTS * 2 + 2) 131 #define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1) 132 133 struct maple_big_node { 134 unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1]; 135 union { 136 struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS]; 137 struct { 138 unsigned long padding[MAPLE_BIG_NODE_GAPS]; 139 unsigned long gap[MAPLE_BIG_NODE_GAPS]; 140 }; 141 }; 142 unsigned char b_end; 143 enum maple_type type; 144 }; 145 146 /* 147 * The maple_subtree_state is used to build a tree to replace a segment of an 148 * existing tree in a more atomic way. Any walkers of the older tree will hit a 149 * dead node and restart on updates. 150 */ 151 struct maple_subtree_state { 152 struct ma_state *orig_l; /* Original left side of subtree */ 153 struct ma_state *orig_r; /* Original right side of subtree */ 154 struct ma_state *l; /* New left side of subtree */ 155 struct ma_state *m; /* New middle of subtree (rare) */ 156 struct ma_state *r; /* New right side of subtree */ 157 struct ma_topiary *free; /* nodes to be freed */ 158 struct ma_topiary *destroy; /* Nodes to be destroyed (walked and freed) */ 159 struct maple_big_node *bn; 160 }; 161 162 #ifdef CONFIG_KASAN_STACK 163 /* Prevent mas_wr_bnode() from exceeding the stack frame limit */ 164 #define noinline_for_kasan noinline_for_stack 165 #else 166 #define noinline_for_kasan inline 167 #endif 168 169 /* Functions */ 170 static inline struct maple_node *mt_alloc_one(gfp_t gfp) 171 { 172 return kmem_cache_alloc(maple_node_cache, gfp); 173 } 174 175 static inline void mt_free_bulk(size_t size, void __rcu **nodes) 176 { 177 kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); 178 } 179 180 static void mt_return_sheaf(struct slab_sheaf *sheaf) 181 { 182 kmem_cache_return_sheaf(maple_node_cache, GFP_NOWAIT, sheaf); 183 } 184 185 static struct slab_sheaf *mt_get_sheaf(gfp_t gfp, int count) 186 { 187 return kmem_cache_prefill_sheaf(maple_node_cache, gfp, count); 188 } 189 190 static int mt_refill_sheaf(gfp_t gfp, struct slab_sheaf **sheaf, 191 unsigned int size) 192 { 193 return kmem_cache_refill_sheaf(maple_node_cache, gfp, sheaf, size); 194 } 195 196 /* 197 * ma_free_rcu() - Use rcu callback to free a maple node 198 * @node: The node to free 199 * 200 * The maple tree uses the parent pointer to indicate this node is no longer in 201 * use and will be freed. 202 */ 203 static void ma_free_rcu(struct maple_node *node) 204 { 205 WARN_ON(node->parent != ma_parent_ptr(node)); 206 kfree_rcu(node, rcu); 207 } 208 209 static void mt_set_height(struct maple_tree *mt, unsigned char height) 210 { 211 unsigned int new_flags = mt->ma_flags; 212 213 new_flags &= ~MT_FLAGS_HEIGHT_MASK; 214 MT_BUG_ON(mt, height > MAPLE_HEIGHT_MAX); 215 new_flags |= height << MT_FLAGS_HEIGHT_OFFSET; 216 mt->ma_flags = new_flags; 217 } 218 219 static unsigned int mas_mt_height(struct ma_state *mas) 220 { 221 return mt_height(mas->tree); 222 } 223 224 static inline unsigned int mt_attr(struct maple_tree *mt) 225 { 226 return mt->ma_flags & ~MT_FLAGS_HEIGHT_MASK; 227 } 228 229 static __always_inline enum maple_type mte_node_type( 230 const struct maple_enode *entry) 231 { 232 return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) & 233 MAPLE_NODE_TYPE_MASK; 234 } 235 236 static __always_inline bool ma_is_dense(const enum maple_type type) 237 { 238 return type < maple_leaf_64; 239 } 240 241 static __always_inline bool ma_is_leaf(const enum maple_type type) 242 { 243 return type < maple_range_64; 244 } 245 246 static __always_inline bool mte_is_leaf(const struct maple_enode *entry) 247 { 248 return ma_is_leaf(mte_node_type(entry)); 249 } 250 251 /* 252 * We also reserve values with the bottom two bits set to '10' which are 253 * below 4096 254 */ 255 static __always_inline bool mt_is_reserved(const void *entry) 256 { 257 return ((unsigned long)entry < MAPLE_RESERVED_RANGE) && 258 xa_is_internal(entry); 259 } 260 261 static __always_inline void mas_set_err(struct ma_state *mas, long err) 262 { 263 mas->node = MA_ERROR(err); 264 mas->status = ma_error; 265 } 266 267 static __always_inline bool mas_is_ptr(const struct ma_state *mas) 268 { 269 return mas->status == ma_root; 270 } 271 272 static __always_inline bool mas_is_start(const struct ma_state *mas) 273 { 274 return mas->status == ma_start; 275 } 276 277 static __always_inline bool mas_is_none(const struct ma_state *mas) 278 { 279 return mas->status == ma_none; 280 } 281 282 static __always_inline bool mas_is_paused(const struct ma_state *mas) 283 { 284 return mas->status == ma_pause; 285 } 286 287 static __always_inline bool mas_is_overflow(struct ma_state *mas) 288 { 289 return mas->status == ma_overflow; 290 } 291 292 static inline bool mas_is_underflow(struct ma_state *mas) 293 { 294 return mas->status == ma_underflow; 295 } 296 297 static __always_inline struct maple_node *mte_to_node( 298 const struct maple_enode *entry) 299 { 300 return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK); 301 } 302 303 /* 304 * mte_to_mat() - Convert a maple encoded node to a maple topiary node. 305 * @entry: The maple encoded node 306 * 307 * Return: a maple topiary pointer 308 */ 309 static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry) 310 { 311 return (struct maple_topiary *) 312 ((unsigned long)entry & ~MAPLE_NODE_MASK); 313 } 314 315 /* 316 * mas_mn() - Get the maple state node. 317 * @mas: The maple state 318 * 319 * Return: the maple node (not encoded - bare pointer). 320 */ 321 static inline struct maple_node *mas_mn(const struct ma_state *mas) 322 { 323 return mte_to_node(mas->node); 324 } 325 326 /* 327 * mte_set_node_dead() - Set a maple encoded node as dead. 328 * @mn: The maple encoded node. 329 */ 330 static inline void mte_set_node_dead(struct maple_enode *mn) 331 { 332 mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn)); 333 smp_wmb(); /* Needed for RCU */ 334 } 335 336 /* Bit 1 indicates the root is a node */ 337 #define MAPLE_ROOT_NODE 0x02 338 /* maple_type stored bit 3-6 */ 339 #define MAPLE_ENODE_TYPE_SHIFT 0x03 340 /* Bit 2 means a NULL somewhere below */ 341 #define MAPLE_ENODE_NULL 0x04 342 343 static inline struct maple_enode *mt_mk_node(const struct maple_node *node, 344 enum maple_type type) 345 { 346 return (void *)((unsigned long)node | 347 (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL); 348 } 349 350 static inline void *mte_mk_root(const struct maple_enode *node) 351 { 352 return (void *)((unsigned long)node | MAPLE_ROOT_NODE); 353 } 354 355 static inline void *mte_safe_root(const struct maple_enode *node) 356 { 357 return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE); 358 } 359 360 static inline void __maybe_unused *mte_set_full(const struct maple_enode *node) 361 { 362 return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL); 363 } 364 365 static inline void __maybe_unused *mte_clear_full(const struct maple_enode *node) 366 { 367 return (void *)((unsigned long)node | MAPLE_ENODE_NULL); 368 } 369 370 static inline bool __maybe_unused mte_has_null(const struct maple_enode *node) 371 { 372 return (unsigned long)node & MAPLE_ENODE_NULL; 373 } 374 375 static __always_inline bool ma_is_root(struct maple_node *node) 376 { 377 return ((unsigned long)node->parent & MA_ROOT_PARENT); 378 } 379 380 static __always_inline bool mte_is_root(const struct maple_enode *node) 381 { 382 return ma_is_root(mte_to_node(node)); 383 } 384 385 static inline bool mas_is_root_limits(const struct ma_state *mas) 386 { 387 return !mas->min && mas->max == ULONG_MAX; 388 } 389 390 static __always_inline bool mt_is_alloc(struct maple_tree *mt) 391 { 392 return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE); 393 } 394 395 /* 396 * The Parent Pointer 397 * Excluding root, the parent pointer is 256B aligned like all other tree nodes. 398 * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16 399 * bit values need an extra bit to store the offset. This extra bit comes from 400 * a reuse of the last bit in the node type. This is possible by using bit 1 to 401 * indicate if bit 2 is part of the type or the slot. 402 * 403 * Node types: 404 * 0b??1 = Root 405 * 0b?00 = 16 bit nodes 406 * 0b010 = 32 bit nodes 407 * 0b110 = 64 bit nodes 408 * 409 * Slot size and alignment 410 * 0b??1 : Root 411 * 0b?00 : 16 bit values, type in 0-1, slot in 2-7 412 * 0b010 : 32 bit values, type in 0-2, slot in 3-7 413 * 0b110 : 64 bit values, type in 0-2, slot in 3-7 414 */ 415 416 #define MAPLE_PARENT_ROOT 0x01 417 418 #define MAPLE_PARENT_SLOT_SHIFT 0x03 419 #define MAPLE_PARENT_SLOT_MASK 0xF8 420 421 #define MAPLE_PARENT_16B_SLOT_SHIFT 0x02 422 #define MAPLE_PARENT_16B_SLOT_MASK 0xFC 423 424 #define MAPLE_PARENT_RANGE64 0x06 425 #define MAPLE_PARENT_RANGE32 0x02 426 #define MAPLE_PARENT_NOT_RANGE16 0x02 427 428 /* 429 * mte_parent_shift() - Get the parent shift for the slot storage. 430 * @parent: The parent pointer cast as an unsigned long 431 * Return: The shift into that pointer to the star to of the slot 432 */ 433 static inline unsigned long mte_parent_shift(unsigned long parent) 434 { 435 /* Note bit 1 == 0 means 16B */ 436 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 437 return MAPLE_PARENT_SLOT_SHIFT; 438 439 return MAPLE_PARENT_16B_SLOT_SHIFT; 440 } 441 442 /* 443 * mte_parent_slot_mask() - Get the slot mask for the parent. 444 * @parent: The parent pointer cast as an unsigned long. 445 * Return: The slot mask for that parent. 446 */ 447 static inline unsigned long mte_parent_slot_mask(unsigned long parent) 448 { 449 /* Note bit 1 == 0 means 16B */ 450 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 451 return MAPLE_PARENT_SLOT_MASK; 452 453 return MAPLE_PARENT_16B_SLOT_MASK; 454 } 455 456 /* 457 * mas_parent_type() - Return the maple_type of the parent from the stored 458 * parent type. 459 * @mas: The maple state 460 * @enode: The maple_enode to extract the parent's enum 461 * Return: The node->parent maple_type 462 */ 463 static inline 464 enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode) 465 { 466 unsigned long p_type; 467 468 p_type = (unsigned long)mte_to_node(enode)->parent; 469 if (WARN_ON(p_type & MAPLE_PARENT_ROOT)) 470 return 0; 471 472 p_type &= MAPLE_NODE_MASK; 473 p_type &= ~mte_parent_slot_mask(p_type); 474 switch (p_type) { 475 case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */ 476 if (mt_is_alloc(mas->tree)) 477 return maple_arange_64; 478 return maple_range_64; 479 } 480 481 return 0; 482 } 483 484 /* 485 * mas_set_parent() - Set the parent node and encode the slot 486 * @mas: The maple state 487 * @enode: The encoded maple node. 488 * @parent: The encoded maple node that is the parent of @enode. 489 * @slot: The slot that @enode resides in @parent. 490 * 491 * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the 492 * parent type. 493 */ 494 static inline 495 void mas_set_parent(struct ma_state *mas, struct maple_enode *enode, 496 const struct maple_enode *parent, unsigned char slot) 497 { 498 unsigned long val = (unsigned long)parent; 499 unsigned long shift; 500 unsigned long type; 501 enum maple_type p_type = mte_node_type(parent); 502 503 MAS_BUG_ON(mas, p_type == maple_dense); 504 MAS_BUG_ON(mas, p_type == maple_leaf_64); 505 506 switch (p_type) { 507 case maple_range_64: 508 case maple_arange_64: 509 shift = MAPLE_PARENT_SLOT_SHIFT; 510 type = MAPLE_PARENT_RANGE64; 511 break; 512 default: 513 case maple_dense: 514 case maple_leaf_64: 515 shift = type = 0; 516 break; 517 } 518 519 val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */ 520 val |= (slot << shift) | type; 521 mte_to_node(enode)->parent = ma_parent_ptr(val); 522 } 523 524 /* 525 * mte_parent_slot() - get the parent slot of @enode. 526 * @enode: The encoded maple node. 527 * 528 * Return: The slot in the parent node where @enode resides. 529 */ 530 static __always_inline 531 unsigned int mte_parent_slot(const struct maple_enode *enode) 532 { 533 unsigned long val = (unsigned long)mte_to_node(enode)->parent; 534 535 if (unlikely(val & MA_ROOT_PARENT)) 536 return 0; 537 538 /* 539 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost 540 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT 541 */ 542 return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val); 543 } 544 545 /* 546 * mte_parent() - Get the parent of @node. 547 * @enode: The encoded maple node. 548 * 549 * Return: The parent maple node. 550 */ 551 static __always_inline 552 struct maple_node *mte_parent(const struct maple_enode *enode) 553 { 554 return (void *)((unsigned long) 555 (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK); 556 } 557 558 /* 559 * ma_dead_node() - check if the @enode is dead. 560 * @enode: The encoded maple node 561 * 562 * Return: true if dead, false otherwise. 563 */ 564 static __always_inline bool ma_dead_node(const struct maple_node *node) 565 { 566 struct maple_node *parent; 567 568 /* Do not reorder reads from the node prior to the parent check */ 569 smp_rmb(); 570 parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); 571 return (parent == node); 572 } 573 574 /* 575 * mte_dead_node() - check if the @enode is dead. 576 * @enode: The encoded maple node 577 * 578 * Return: true if dead, false otherwise. 579 */ 580 static __always_inline bool mte_dead_node(const struct maple_enode *enode) 581 { 582 struct maple_node *node; 583 584 node = mte_to_node(enode); 585 return ma_dead_node(node); 586 } 587 588 /* 589 * ma_pivots() - Get a pointer to the maple node pivots. 590 * @node: the maple node 591 * @type: the node type 592 * 593 * In the event of a dead node, this array may be %NULL 594 * 595 * Return: A pointer to the maple node pivots 596 */ 597 static inline unsigned long *ma_pivots(struct maple_node *node, 598 enum maple_type type) 599 { 600 switch (type) { 601 case maple_arange_64: 602 return node->ma64.pivot; 603 case maple_range_64: 604 case maple_leaf_64: 605 return node->mr64.pivot; 606 case maple_dense: 607 return NULL; 608 } 609 return NULL; 610 } 611 612 /* 613 * ma_gaps() - Get a pointer to the maple node gaps. 614 * @node: the maple node 615 * @type: the node type 616 * 617 * Return: A pointer to the maple node gaps 618 */ 619 static inline unsigned long *ma_gaps(struct maple_node *node, 620 enum maple_type type) 621 { 622 switch (type) { 623 case maple_arange_64: 624 return node->ma64.gap; 625 case maple_range_64: 626 case maple_leaf_64: 627 case maple_dense: 628 return NULL; 629 } 630 return NULL; 631 } 632 633 /* 634 * mas_safe_pivot() - get the pivot at @piv or mas->max. 635 * @mas: The maple state 636 * @pivots: The pointer to the maple node pivots 637 * @piv: The pivot to fetch 638 * @type: The maple node type 639 * 640 * Return: The pivot at @piv within the limit of the @pivots array, @mas->max 641 * otherwise. 642 */ 643 static __always_inline unsigned long 644 mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots, 645 unsigned char piv, enum maple_type type) 646 { 647 if (piv >= mt_pivots[type]) 648 return mas->max; 649 650 return pivots[piv]; 651 } 652 653 /* 654 * mas_safe_min() - Return the minimum for a given offset. 655 * @mas: The maple state 656 * @pivots: The pointer to the maple node pivots 657 * @offset: The offset into the pivot array 658 * 659 * Return: The minimum range value that is contained in @offset. 660 */ 661 static inline unsigned long 662 mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset) 663 { 664 if (likely(offset)) 665 return pivots[offset - 1] + 1; 666 667 return mas->min; 668 } 669 670 /* 671 * mte_set_pivot() - Set a pivot to a value in an encoded maple node. 672 * @mn: The encoded maple node 673 * @piv: The pivot offset 674 * @val: The value of the pivot 675 */ 676 static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv, 677 unsigned long val) 678 { 679 struct maple_node *node = mte_to_node(mn); 680 enum maple_type type = mte_node_type(mn); 681 682 BUG_ON(piv >= mt_pivots[type]); 683 switch (type) { 684 case maple_range_64: 685 case maple_leaf_64: 686 node->mr64.pivot[piv] = val; 687 break; 688 case maple_arange_64: 689 node->ma64.pivot[piv] = val; 690 break; 691 case maple_dense: 692 break; 693 } 694 695 } 696 697 /* 698 * ma_slots() - Get a pointer to the maple node slots. 699 * @mn: The maple node 700 * @mt: The maple node type 701 * 702 * Return: A pointer to the maple node slots 703 */ 704 static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt) 705 { 706 switch (mt) { 707 case maple_arange_64: 708 return mn->ma64.slot; 709 case maple_range_64: 710 case maple_leaf_64: 711 return mn->mr64.slot; 712 case maple_dense: 713 return mn->slot; 714 } 715 716 return NULL; 717 } 718 719 static inline bool mt_write_locked(const struct maple_tree *mt) 720 { 721 return mt_external_lock(mt) ? mt_write_lock_is_held(mt) : 722 lockdep_is_held(&mt->ma_lock); 723 } 724 725 static __always_inline bool mt_locked(const struct maple_tree *mt) 726 { 727 return mt_external_lock(mt) ? mt_lock_is_held(mt) : 728 lockdep_is_held(&mt->ma_lock); 729 } 730 731 static __always_inline void *mt_slot(const struct maple_tree *mt, 732 void __rcu **slots, unsigned char offset) 733 { 734 return rcu_dereference_check(slots[offset], mt_locked(mt)); 735 } 736 737 static __always_inline void *mt_slot_locked(struct maple_tree *mt, 738 void __rcu **slots, unsigned char offset) 739 { 740 return rcu_dereference_protected(slots[offset], mt_write_locked(mt)); 741 } 742 /* 743 * mas_slot_locked() - Get the slot value when holding the maple tree lock. 744 * @mas: The maple state 745 * @slots: The pointer to the slots 746 * @offset: The offset into the slots array to fetch 747 * 748 * Return: The entry stored in @slots at the @offset. 749 */ 750 static __always_inline void *mas_slot_locked(struct ma_state *mas, 751 void __rcu **slots, unsigned char offset) 752 { 753 return mt_slot_locked(mas->tree, slots, offset); 754 } 755 756 /* 757 * mas_slot() - Get the slot value when not holding the maple tree lock. 758 * @mas: The maple state 759 * @slots: The pointer to the slots 760 * @offset: The offset into the slots array to fetch 761 * 762 * Return: The entry stored in @slots at the @offset 763 */ 764 static __always_inline void *mas_slot(struct ma_state *mas, void __rcu **slots, 765 unsigned char offset) 766 { 767 return mt_slot(mas->tree, slots, offset); 768 } 769 770 /* 771 * mas_root() - Get the maple tree root. 772 * @mas: The maple state. 773 * 774 * Return: The pointer to the root of the tree 775 */ 776 static __always_inline void *mas_root(struct ma_state *mas) 777 { 778 return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree)); 779 } 780 781 static inline void *mt_root_locked(struct maple_tree *mt) 782 { 783 return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt)); 784 } 785 786 /* 787 * mas_root_locked() - Get the maple tree root when holding the maple tree lock. 788 * @mas: The maple state. 789 * 790 * Return: The pointer to the root of the tree 791 */ 792 static inline void *mas_root_locked(struct ma_state *mas) 793 { 794 return mt_root_locked(mas->tree); 795 } 796 797 static inline struct maple_metadata *ma_meta(struct maple_node *mn, 798 enum maple_type mt) 799 { 800 switch (mt) { 801 case maple_arange_64: 802 return &mn->ma64.meta; 803 default: 804 return &mn->mr64.meta; 805 } 806 } 807 808 /* 809 * ma_set_meta() - Set the metadata information of a node. 810 * @mn: The maple node 811 * @mt: The maple node type 812 * @offset: The offset of the highest sub-gap in this node. 813 * @end: The end of the data in this node. 814 */ 815 static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, 816 unsigned char offset, unsigned char end) 817 { 818 struct maple_metadata *meta = ma_meta(mn, mt); 819 820 meta->gap = offset; 821 meta->end = end; 822 } 823 824 /* 825 * mt_clear_meta() - clear the metadata information of a node, if it exists 826 * @mt: The maple tree 827 * @mn: The maple node 828 * @type: The maple node type 829 */ 830 static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn, 831 enum maple_type type) 832 { 833 struct maple_metadata *meta; 834 unsigned long *pivots; 835 void __rcu **slots; 836 void *next; 837 838 switch (type) { 839 case maple_range_64: 840 pivots = mn->mr64.pivot; 841 if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { 842 slots = mn->mr64.slot; 843 next = mt_slot_locked(mt, slots, 844 MAPLE_RANGE64_SLOTS - 1); 845 if (unlikely((mte_to_node(next) && 846 mte_node_type(next)))) 847 return; /* no metadata, could be node */ 848 } 849 fallthrough; 850 case maple_arange_64: 851 meta = ma_meta(mn, type); 852 break; 853 default: 854 return; 855 } 856 857 meta->gap = 0; 858 meta->end = 0; 859 } 860 861 /* 862 * ma_meta_end() - Get the data end of a node from the metadata 863 * @mn: The maple node 864 * @mt: The maple node type 865 */ 866 static inline unsigned char ma_meta_end(struct maple_node *mn, 867 enum maple_type mt) 868 { 869 struct maple_metadata *meta = ma_meta(mn, mt); 870 871 return meta->end; 872 } 873 874 /* 875 * ma_meta_gap() - Get the largest gap location of a node from the metadata 876 * @mn: The maple node 877 */ 878 static inline unsigned char ma_meta_gap(struct maple_node *mn) 879 { 880 return mn->ma64.meta.gap; 881 } 882 883 /* 884 * ma_set_meta_gap() - Set the largest gap location in a nodes metadata 885 * @mn: The maple node 886 * @mt: The maple node type 887 * @offset: The location of the largest gap. 888 */ 889 static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt, 890 unsigned char offset) 891 { 892 893 struct maple_metadata *meta = ma_meta(mn, mt); 894 895 meta->gap = offset; 896 } 897 898 /* 899 * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes. 900 * @mat: the ma_topiary, a linked list of dead nodes. 901 * @dead_enode: the node to be marked as dead and added to the tail of the list 902 * 903 * Add the @dead_enode to the linked list in @mat. 904 */ 905 static inline void mat_add(struct ma_topiary *mat, 906 struct maple_enode *dead_enode) 907 { 908 mte_set_node_dead(dead_enode); 909 mte_to_mat(dead_enode)->next = NULL; 910 if (!mat->tail) { 911 mat->tail = mat->head = dead_enode; 912 return; 913 } 914 915 mte_to_mat(mat->tail)->next = dead_enode; 916 mat->tail = dead_enode; 917 } 918 919 static void mt_free_walk(struct rcu_head *head); 920 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 921 bool free); 922 /* 923 * mas_mat_destroy() - Free all nodes and subtrees in a dead list. 924 * @mas: the maple state 925 * @mat: the ma_topiary linked list of dead nodes to free. 926 * 927 * Destroy walk a dead list. 928 */ 929 static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat) 930 { 931 struct maple_enode *next; 932 struct maple_node *node; 933 bool in_rcu = mt_in_rcu(mas->tree); 934 935 while (mat->head) { 936 next = mte_to_mat(mat->head)->next; 937 node = mte_to_node(mat->head); 938 mt_destroy_walk(mat->head, mas->tree, !in_rcu); 939 if (in_rcu) 940 call_rcu(&node->rcu, mt_free_walk); 941 mat->head = next; 942 } 943 } 944 /* 945 * mas_descend() - Descend into the slot stored in the ma_state. 946 * @mas: the maple state. 947 * 948 * Note: Not RCU safe, only use in write side or debug code. 949 */ 950 static inline void mas_descend(struct ma_state *mas) 951 { 952 enum maple_type type; 953 unsigned long *pivots; 954 struct maple_node *node; 955 void __rcu **slots; 956 957 node = mas_mn(mas); 958 type = mte_node_type(mas->node); 959 pivots = ma_pivots(node, type); 960 slots = ma_slots(node, type); 961 962 if (mas->offset) 963 mas->min = pivots[mas->offset - 1] + 1; 964 mas->max = mas_safe_pivot(mas, pivots, mas->offset, type); 965 mas->node = mas_slot(mas, slots, mas->offset); 966 } 967 968 /* 969 * mas_ascend() - Walk up a level of the tree. 970 * @mas: The maple state 971 * 972 * Sets the @mas->max and @mas->min for the parent node of mas->node. This 973 * may cause several levels of walking up to find the correct min and max. 974 * May find a dead node which will cause a premature return. 975 * Return: 1 on dead node, 0 otherwise 976 */ 977 static int mas_ascend(struct ma_state *mas) 978 { 979 struct maple_enode *p_enode; /* parent enode. */ 980 struct maple_enode *a_enode; /* ancestor enode. */ 981 struct maple_node *a_node; /* ancestor node. */ 982 struct maple_node *p_node; /* parent node. */ 983 unsigned char a_slot; 984 enum maple_type a_type; 985 unsigned long min, max; 986 unsigned long *pivots; 987 bool set_max = false, set_min = false; 988 989 a_node = mas_mn(mas); 990 if (ma_is_root(a_node)) { 991 mas->offset = 0; 992 return 0; 993 } 994 995 p_node = mte_parent(mas->node); 996 if (unlikely(a_node == p_node)) 997 return 1; 998 999 a_type = mas_parent_type(mas, mas->node); 1000 mas->offset = mte_parent_slot(mas->node); 1001 a_enode = mt_mk_node(p_node, a_type); 1002 1003 /* Check to make sure all parent information is still accurate */ 1004 if (p_node != mte_parent(mas->node)) 1005 return 1; 1006 1007 mas->node = a_enode; 1008 1009 if (mte_is_root(a_enode)) { 1010 mas->max = ULONG_MAX; 1011 mas->min = 0; 1012 return 0; 1013 } 1014 1015 min = 0; 1016 max = ULONG_MAX; 1017 1018 /* 1019 * !mas->offset implies that parent node min == mas->min. 1020 * mas->offset > 0 implies that we need to walk up to find the 1021 * implied pivot min. 1022 */ 1023 if (!mas->offset) { 1024 min = mas->min; 1025 set_min = true; 1026 } 1027 1028 if (mas->max == ULONG_MAX) 1029 set_max = true; 1030 1031 do { 1032 p_enode = a_enode; 1033 a_type = mas_parent_type(mas, p_enode); 1034 a_node = mte_parent(p_enode); 1035 a_slot = mte_parent_slot(p_enode); 1036 a_enode = mt_mk_node(a_node, a_type); 1037 pivots = ma_pivots(a_node, a_type); 1038 1039 if (unlikely(ma_dead_node(a_node))) 1040 return 1; 1041 1042 if (!set_min && a_slot) { 1043 set_min = true; 1044 min = pivots[a_slot - 1] + 1; 1045 } 1046 1047 if (!set_max && a_slot < mt_pivots[a_type]) { 1048 set_max = true; 1049 max = pivots[a_slot]; 1050 } 1051 1052 if (unlikely(ma_dead_node(a_node))) 1053 return 1; 1054 1055 if (unlikely(ma_is_root(a_node))) 1056 break; 1057 1058 } while (!set_min || !set_max); 1059 1060 mas->max = max; 1061 mas->min = min; 1062 return 0; 1063 } 1064 1065 /* 1066 * mas_pop_node() - Get a previously allocated maple node from the maple state. 1067 * @mas: The maple state 1068 * 1069 * Return: A pointer to a maple node. 1070 */ 1071 static __always_inline struct maple_node *mas_pop_node(struct ma_state *mas) 1072 { 1073 struct maple_node *ret; 1074 1075 if (mas->alloc) { 1076 ret = mas->alloc; 1077 mas->alloc = NULL; 1078 goto out; 1079 } 1080 1081 if (WARN_ON_ONCE(!mas->sheaf)) 1082 return NULL; 1083 1084 ret = kmem_cache_alloc_from_sheaf(maple_node_cache, GFP_NOWAIT, mas->sheaf); 1085 1086 out: 1087 memset(ret, 0, sizeof(*ret)); 1088 return ret; 1089 } 1090 1091 /* 1092 * mas_alloc_nodes() - Allocate nodes into a maple state 1093 * @mas: The maple state 1094 * @gfp: The GFP Flags 1095 */ 1096 static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) 1097 { 1098 if (!mas->node_request) 1099 return; 1100 1101 if (mas->node_request == 1) { 1102 if (mas->sheaf) 1103 goto use_sheaf; 1104 1105 if (mas->alloc) 1106 return; 1107 1108 mas->alloc = mt_alloc_one(gfp); 1109 if (!mas->alloc) 1110 goto error; 1111 1112 mas->node_request = 0; 1113 return; 1114 } 1115 1116 use_sheaf: 1117 if (unlikely(mas->alloc)) { 1118 kfree(mas->alloc); 1119 mas->alloc = NULL; 1120 } 1121 1122 if (mas->sheaf) { 1123 unsigned long refill; 1124 1125 refill = mas->node_request; 1126 if (kmem_cache_sheaf_size(mas->sheaf) >= refill) { 1127 mas->node_request = 0; 1128 return; 1129 } 1130 1131 if (mt_refill_sheaf(gfp, &mas->sheaf, refill)) 1132 goto error; 1133 1134 mas->node_request = 0; 1135 return; 1136 } 1137 1138 mas->sheaf = mt_get_sheaf(gfp, mas->node_request); 1139 if (likely(mas->sheaf)) { 1140 mas->node_request = 0; 1141 return; 1142 } 1143 1144 error: 1145 mas_set_err(mas, -ENOMEM); 1146 } 1147 1148 static inline void mas_empty_nodes(struct ma_state *mas) 1149 { 1150 mas->node_request = 0; 1151 if (mas->sheaf) { 1152 mt_return_sheaf(mas->sheaf); 1153 mas->sheaf = NULL; 1154 } 1155 1156 if (mas->alloc) { 1157 kfree(mas->alloc); 1158 mas->alloc = NULL; 1159 } 1160 } 1161 1162 /* 1163 * mas_free() - Free an encoded maple node 1164 * @mas: The maple state 1165 * @used: The encoded maple node to free. 1166 * 1167 * Uses rcu free if necessary, pushes @used back on the maple state allocations 1168 * otherwise. 1169 */ 1170 static inline void mas_free(struct ma_state *mas, struct maple_enode *used) 1171 { 1172 ma_free_rcu(mte_to_node(used)); 1173 } 1174 1175 /* 1176 * mas_start() - Sets up maple state for operations. 1177 * @mas: The maple state. 1178 * 1179 * If mas->status == ma_start, then set the min, max and depth to 1180 * defaults. 1181 * 1182 * Return: 1183 * - If mas->node is an error or not mas_start, return NULL. 1184 * - If it's an empty tree: NULL & mas->status == ma_none 1185 * - If it's a single entry: The entry & mas->status == ma_root 1186 * - If it's a tree: NULL & mas->status == ma_active 1187 */ 1188 static inline struct maple_enode *mas_start(struct ma_state *mas) 1189 { 1190 if (likely(mas_is_start(mas))) { 1191 struct maple_enode *root; 1192 1193 mas->min = 0; 1194 mas->max = ULONG_MAX; 1195 1196 retry: 1197 mas->depth = 0; 1198 root = mas_root(mas); 1199 /* Tree with nodes */ 1200 if (likely(xa_is_node(root))) { 1201 mas->depth = 0; 1202 mas->status = ma_active; 1203 mas->node = mte_safe_root(root); 1204 mas->offset = 0; 1205 if (mte_dead_node(mas->node)) 1206 goto retry; 1207 1208 return NULL; 1209 } 1210 1211 mas->node = NULL; 1212 /* empty tree */ 1213 if (unlikely(!root)) { 1214 mas->status = ma_none; 1215 mas->offset = MAPLE_NODE_SLOTS; 1216 return NULL; 1217 } 1218 1219 /* Single entry tree */ 1220 mas->status = ma_root; 1221 mas->offset = MAPLE_NODE_SLOTS; 1222 1223 /* Single entry tree. */ 1224 if (mas->index > 0) 1225 return NULL; 1226 1227 return root; 1228 } 1229 1230 return NULL; 1231 } 1232 1233 /* 1234 * ma_data_end() - Find the end of the data in a node. 1235 * @node: The maple node 1236 * @type: The maple node type 1237 * @pivots: The array of pivots in the node 1238 * @max: The maximum value in the node 1239 * 1240 * Uses metadata to find the end of the data when possible. 1241 * Return: The zero indexed last slot with data (may be null). 1242 */ 1243 static __always_inline unsigned char ma_data_end(struct maple_node *node, 1244 enum maple_type type, unsigned long *pivots, unsigned long max) 1245 { 1246 unsigned char offset; 1247 1248 if (!pivots) 1249 return 0; 1250 1251 if (type == maple_arange_64) 1252 return ma_meta_end(node, type); 1253 1254 offset = mt_pivots[type] - 1; 1255 if (likely(!pivots[offset])) 1256 return ma_meta_end(node, type); 1257 1258 if (likely(pivots[offset] == max)) 1259 return offset; 1260 1261 return mt_pivots[type]; 1262 } 1263 1264 /* 1265 * mas_data_end() - Find the end of the data (slot). 1266 * @mas: the maple state 1267 * 1268 * This method is optimized to check the metadata of a node if the node type 1269 * supports data end metadata. 1270 * 1271 * Return: The zero indexed last slot with data (may be null). 1272 */ 1273 static inline unsigned char mas_data_end(struct ma_state *mas) 1274 { 1275 enum maple_type type; 1276 struct maple_node *node; 1277 unsigned char offset; 1278 unsigned long *pivots; 1279 1280 type = mte_node_type(mas->node); 1281 node = mas_mn(mas); 1282 if (type == maple_arange_64) 1283 return ma_meta_end(node, type); 1284 1285 pivots = ma_pivots(node, type); 1286 if (unlikely(ma_dead_node(node))) 1287 return 0; 1288 1289 offset = mt_pivots[type] - 1; 1290 if (likely(!pivots[offset])) 1291 return ma_meta_end(node, type); 1292 1293 if (likely(pivots[offset] == mas->max)) 1294 return offset; 1295 1296 return mt_pivots[type]; 1297 } 1298 1299 /* 1300 * mas_leaf_max_gap() - Returns the largest gap in a leaf node 1301 * @mas: the maple state 1302 * 1303 * Return: The maximum gap in the leaf. 1304 */ 1305 static unsigned long mas_leaf_max_gap(struct ma_state *mas) 1306 { 1307 enum maple_type mt; 1308 unsigned long pstart, gap, max_gap; 1309 struct maple_node *mn; 1310 unsigned long *pivots; 1311 void __rcu **slots; 1312 unsigned char i; 1313 unsigned char max_piv; 1314 1315 mt = mte_node_type(mas->node); 1316 mn = mas_mn(mas); 1317 slots = ma_slots(mn, mt); 1318 max_gap = 0; 1319 if (unlikely(ma_is_dense(mt))) { 1320 gap = 0; 1321 for (i = 0; i < mt_slots[mt]; i++) { 1322 if (slots[i]) { 1323 if (gap > max_gap) 1324 max_gap = gap; 1325 gap = 0; 1326 } else { 1327 gap++; 1328 } 1329 } 1330 if (gap > max_gap) 1331 max_gap = gap; 1332 return max_gap; 1333 } 1334 1335 /* 1336 * Check the first implied pivot optimizes the loop below and slot 1 may 1337 * be skipped if there is a gap in slot 0. 1338 */ 1339 pivots = ma_pivots(mn, mt); 1340 if (likely(!slots[0])) { 1341 max_gap = pivots[0] - mas->min + 1; 1342 i = 2; 1343 } else { 1344 i = 1; 1345 } 1346 1347 /* reduce max_piv as the special case is checked before the loop */ 1348 max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1; 1349 /* 1350 * Check end implied pivot which can only be a gap on the right most 1351 * node. 1352 */ 1353 if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) { 1354 gap = ULONG_MAX - pivots[max_piv]; 1355 if (gap > max_gap) 1356 max_gap = gap; 1357 1358 if (max_gap > pivots[max_piv] - mas->min) 1359 return max_gap; 1360 } 1361 1362 for (; i <= max_piv; i++) { 1363 /* data == no gap. */ 1364 if (likely(slots[i])) 1365 continue; 1366 1367 pstart = pivots[i - 1]; 1368 gap = pivots[i] - pstart; 1369 if (gap > max_gap) 1370 max_gap = gap; 1371 1372 /* There cannot be two gaps in a row. */ 1373 i++; 1374 } 1375 return max_gap; 1376 } 1377 1378 /* 1379 * ma_max_gap() - Get the maximum gap in a maple node (non-leaf) 1380 * @node: The maple node 1381 * @gaps: The pointer to the gaps 1382 * @mt: The maple node type 1383 * @off: Pointer to store the offset location of the gap. 1384 * 1385 * Uses the metadata data end to scan backwards across set gaps. 1386 * 1387 * Return: The maximum gap value 1388 */ 1389 static inline unsigned long 1390 ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt, 1391 unsigned char *off) 1392 { 1393 unsigned char offset, i; 1394 unsigned long max_gap = 0; 1395 1396 i = offset = ma_meta_end(node, mt); 1397 do { 1398 if (gaps[i] > max_gap) { 1399 max_gap = gaps[i]; 1400 offset = i; 1401 } 1402 } while (i--); 1403 1404 *off = offset; 1405 return max_gap; 1406 } 1407 1408 /* 1409 * mas_max_gap() - find the largest gap in a non-leaf node and set the slot. 1410 * @mas: The maple state. 1411 * 1412 * Return: The gap value. 1413 */ 1414 static inline unsigned long mas_max_gap(struct ma_state *mas) 1415 { 1416 unsigned long *gaps; 1417 unsigned char offset; 1418 enum maple_type mt; 1419 struct maple_node *node; 1420 1421 mt = mte_node_type(mas->node); 1422 if (ma_is_leaf(mt)) 1423 return mas_leaf_max_gap(mas); 1424 1425 node = mas_mn(mas); 1426 MAS_BUG_ON(mas, mt != maple_arange_64); 1427 offset = ma_meta_gap(node); 1428 gaps = ma_gaps(node, mt); 1429 return gaps[offset]; 1430 } 1431 1432 /* 1433 * mas_parent_gap() - Set the parent gap and any gaps above, as needed 1434 * @mas: The maple state 1435 * @offset: The gap offset in the parent to set 1436 * @new: The new gap value. 1437 * 1438 * Set the parent gap then continue to set the gap upwards, using the metadata 1439 * of the parent to see if it is necessary to check the node above. 1440 */ 1441 static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset, 1442 unsigned long new) 1443 { 1444 unsigned long meta_gap = 0; 1445 struct maple_node *pnode; 1446 struct maple_enode *penode; 1447 unsigned long *pgaps; 1448 unsigned char meta_offset; 1449 enum maple_type pmt; 1450 1451 pnode = mte_parent(mas->node); 1452 pmt = mas_parent_type(mas, mas->node); 1453 penode = mt_mk_node(pnode, pmt); 1454 pgaps = ma_gaps(pnode, pmt); 1455 1456 ascend: 1457 MAS_BUG_ON(mas, pmt != maple_arange_64); 1458 meta_offset = ma_meta_gap(pnode); 1459 meta_gap = pgaps[meta_offset]; 1460 1461 pgaps[offset] = new; 1462 1463 if (meta_gap == new) 1464 return; 1465 1466 if (offset != meta_offset) { 1467 if (meta_gap > new) 1468 return; 1469 1470 ma_set_meta_gap(pnode, pmt, offset); 1471 } else if (new < meta_gap) { 1472 new = ma_max_gap(pnode, pgaps, pmt, &meta_offset); 1473 ma_set_meta_gap(pnode, pmt, meta_offset); 1474 } 1475 1476 if (ma_is_root(pnode)) 1477 return; 1478 1479 /* Go to the parent node. */ 1480 pnode = mte_parent(penode); 1481 pmt = mas_parent_type(mas, penode); 1482 pgaps = ma_gaps(pnode, pmt); 1483 offset = mte_parent_slot(penode); 1484 penode = mt_mk_node(pnode, pmt); 1485 goto ascend; 1486 } 1487 1488 /* 1489 * mas_update_gap() - Update a nodes gaps and propagate up if necessary. 1490 * @mas: the maple state. 1491 */ 1492 static inline void mas_update_gap(struct ma_state *mas) 1493 { 1494 unsigned char pslot; 1495 unsigned long p_gap; 1496 unsigned long max_gap; 1497 1498 if (!mt_is_alloc(mas->tree)) 1499 return; 1500 1501 if (mte_is_root(mas->node)) 1502 return; 1503 1504 max_gap = mas_max_gap(mas); 1505 1506 pslot = mte_parent_slot(mas->node); 1507 p_gap = ma_gaps(mte_parent(mas->node), 1508 mas_parent_type(mas, mas->node))[pslot]; 1509 1510 if (p_gap != max_gap) 1511 mas_parent_gap(mas, pslot, max_gap); 1512 } 1513 1514 /* 1515 * mas_adopt_children() - Set the parent pointer of all nodes in @parent to 1516 * @parent with the slot encoded. 1517 * @mas: the maple state (for the tree) 1518 * @parent: the maple encoded node containing the children. 1519 */ 1520 static inline void mas_adopt_children(struct ma_state *mas, 1521 struct maple_enode *parent) 1522 { 1523 enum maple_type type = mte_node_type(parent); 1524 struct maple_node *node = mte_to_node(parent); 1525 void __rcu **slots = ma_slots(node, type); 1526 unsigned long *pivots = ma_pivots(node, type); 1527 struct maple_enode *child; 1528 unsigned char offset; 1529 1530 offset = ma_data_end(node, type, pivots, mas->max); 1531 do { 1532 child = mas_slot_locked(mas, slots, offset); 1533 mas_set_parent(mas, child, parent, offset); 1534 } while (offset--); 1535 } 1536 1537 /* 1538 * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old 1539 * node as dead. 1540 * @mas: the maple state with the new node 1541 * @old_enode: The old maple encoded node to replace. 1542 * @new_height: if we are inserting a root node, update the height of the tree 1543 */ 1544 static inline void mas_put_in_tree(struct ma_state *mas, 1545 struct maple_enode *old_enode, char new_height) 1546 __must_hold(mas->tree->ma_lock) 1547 { 1548 unsigned char offset; 1549 void __rcu **slots; 1550 1551 if (mte_is_root(mas->node)) { 1552 mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas)); 1553 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 1554 mt_set_height(mas->tree, new_height); 1555 } else { 1556 1557 offset = mte_parent_slot(mas->node); 1558 slots = ma_slots(mte_parent(mas->node), 1559 mas_parent_type(mas, mas->node)); 1560 rcu_assign_pointer(slots[offset], mas->node); 1561 } 1562 1563 mte_set_node_dead(old_enode); 1564 } 1565 1566 /* 1567 * mas_replace_node() - Replace a node by putting it in the tree, marking it 1568 * dead, and freeing it. 1569 * the parent encoding to locate the maple node in the tree. 1570 * @mas: the ma_state with @mas->node pointing to the new node. 1571 * @old_enode: The old maple encoded node. 1572 * @new_height: The new height of the tree as a result of the operation 1573 */ 1574 static inline void mas_replace_node(struct ma_state *mas, 1575 struct maple_enode *old_enode, unsigned char new_height) 1576 __must_hold(mas->tree->ma_lock) 1577 { 1578 mas_put_in_tree(mas, old_enode, new_height); 1579 mas_free(mas, old_enode); 1580 } 1581 1582 /* 1583 * mas_find_child() - Find a child who has the parent @mas->node. 1584 * @mas: the maple state with the parent. 1585 * @child: the maple state to store the child. 1586 */ 1587 static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child) 1588 __must_hold(mas->tree->ma_lock) 1589 { 1590 enum maple_type mt; 1591 unsigned char offset; 1592 unsigned char end; 1593 unsigned long *pivots; 1594 struct maple_enode *entry; 1595 struct maple_node *node; 1596 void __rcu **slots; 1597 1598 mt = mte_node_type(mas->node); 1599 node = mas_mn(mas); 1600 slots = ma_slots(node, mt); 1601 pivots = ma_pivots(node, mt); 1602 end = ma_data_end(node, mt, pivots, mas->max); 1603 for (offset = mas->offset; offset <= end; offset++) { 1604 entry = mas_slot_locked(mas, slots, offset); 1605 if (mte_parent(entry) == node) { 1606 *child = *mas; 1607 mas->offset = offset + 1; 1608 child->offset = offset; 1609 mas_descend(child); 1610 child->offset = 0; 1611 return true; 1612 } 1613 } 1614 return false; 1615 } 1616 1617 /* 1618 * mab_shift_right() - Shift the data in mab right. Note, does not clean out the 1619 * old data or set b_node->b_end. 1620 * @b_node: the maple_big_node 1621 * @shift: the shift count 1622 */ 1623 static inline void mab_shift_right(struct maple_big_node *b_node, 1624 unsigned char shift) 1625 { 1626 unsigned long size = b_node->b_end * sizeof(unsigned long); 1627 1628 memmove(b_node->pivot + shift, b_node->pivot, size); 1629 memmove(b_node->slot + shift, b_node->slot, size); 1630 if (b_node->type == maple_arange_64) 1631 memmove(b_node->gap + shift, b_node->gap, size); 1632 } 1633 1634 /* 1635 * mab_middle_node() - Check if a middle node is needed (unlikely) 1636 * @b_node: the maple_big_node that contains the data. 1637 * @split: the potential split location 1638 * @slot_count: the size that can be stored in a single node being considered. 1639 * 1640 * Return: true if a middle node is required. 1641 */ 1642 static inline bool mab_middle_node(struct maple_big_node *b_node, int split, 1643 unsigned char slot_count) 1644 { 1645 unsigned char size = b_node->b_end; 1646 1647 if (size >= 2 * slot_count) 1648 return true; 1649 1650 if (!b_node->slot[split] && (size >= 2 * slot_count - 1)) 1651 return true; 1652 1653 return false; 1654 } 1655 1656 /* 1657 * mab_no_null_split() - ensure the split doesn't fall on a NULL 1658 * @b_node: the maple_big_node with the data 1659 * @split: the suggested split location 1660 * @slot_count: the number of slots in the node being considered. 1661 * 1662 * Return: the split location. 1663 */ 1664 static inline int mab_no_null_split(struct maple_big_node *b_node, 1665 unsigned char split, unsigned char slot_count) 1666 { 1667 if (!b_node->slot[split]) { 1668 /* 1669 * If the split is less than the max slot && the right side will 1670 * still be sufficient, then increment the split on NULL. 1671 */ 1672 if ((split < slot_count - 1) && 1673 (b_node->b_end - split) > (mt_min_slots[b_node->type])) 1674 split++; 1675 else 1676 split--; 1677 } 1678 return split; 1679 } 1680 1681 /* 1682 * mab_calc_split() - Calculate the split location and if there needs to be two 1683 * splits. 1684 * @mas: The maple state 1685 * @bn: The maple_big_node with the data 1686 * @mid_split: The second split, if required. 0 otherwise. 1687 * 1688 * Return: The first split location. The middle split is set in @mid_split. 1689 */ 1690 static inline int mab_calc_split(struct ma_state *mas, 1691 struct maple_big_node *bn, unsigned char *mid_split) 1692 { 1693 unsigned char b_end = bn->b_end; 1694 int split = b_end / 2; /* Assume equal split. */ 1695 unsigned char slot_count = mt_slots[bn->type]; 1696 1697 /* 1698 * To support gap tracking, all NULL entries are kept together and a node cannot 1699 * end on a NULL entry, with the exception of the left-most leaf. The 1700 * limitation means that the split of a node must be checked for this condition 1701 * and be able to put more data in one direction or the other. 1702 * 1703 * Although extremely rare, it is possible to enter what is known as the 3-way 1704 * split scenario. The 3-way split comes about by means of a store of a range 1705 * that overwrites the end and beginning of two full nodes. The result is a set 1706 * of entries that cannot be stored in 2 nodes. Sometimes, these two nodes can 1707 * also be located in different parent nodes which are also full. This can 1708 * carry upwards all the way to the root in the worst case. 1709 */ 1710 if (unlikely(mab_middle_node(bn, split, slot_count))) { 1711 split = b_end / 3; 1712 *mid_split = split * 2; 1713 } else { 1714 *mid_split = 0; 1715 } 1716 1717 /* Avoid ending a node on a NULL entry */ 1718 split = mab_no_null_split(bn, split, slot_count); 1719 1720 if (unlikely(*mid_split)) 1721 *mid_split = mab_no_null_split(bn, *mid_split, slot_count); 1722 1723 return split; 1724 } 1725 1726 /* 1727 * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node 1728 * and set @b_node->b_end to the next free slot. 1729 * @mas: The maple state 1730 * @mas_start: The starting slot to copy 1731 * @mas_end: The end slot to copy (inclusively) 1732 * @b_node: The maple_big_node to place the data 1733 * @mab_start: The starting location in maple_big_node to store the data. 1734 */ 1735 static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start, 1736 unsigned char mas_end, struct maple_big_node *b_node, 1737 unsigned char mab_start) 1738 { 1739 enum maple_type mt; 1740 struct maple_node *node; 1741 void __rcu **slots; 1742 unsigned long *pivots, *gaps; 1743 int i = mas_start, j = mab_start; 1744 unsigned char piv_end; 1745 1746 node = mas_mn(mas); 1747 mt = mte_node_type(mas->node); 1748 pivots = ma_pivots(node, mt); 1749 if (!i) { 1750 b_node->pivot[j] = pivots[i++]; 1751 if (unlikely(i > mas_end)) 1752 goto complete; 1753 j++; 1754 } 1755 1756 piv_end = min(mas_end, mt_pivots[mt]); 1757 for (; i < piv_end; i++, j++) { 1758 b_node->pivot[j] = pivots[i]; 1759 if (unlikely(!b_node->pivot[j])) 1760 goto complete; 1761 1762 if (unlikely(mas->max == b_node->pivot[j])) 1763 goto complete; 1764 } 1765 1766 b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt); 1767 1768 complete: 1769 b_node->b_end = ++j; 1770 j -= mab_start; 1771 slots = ma_slots(node, mt); 1772 memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j); 1773 if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) { 1774 gaps = ma_gaps(node, mt); 1775 memcpy(b_node->gap + mab_start, gaps + mas_start, 1776 sizeof(unsigned long) * j); 1777 } 1778 } 1779 1780 /* 1781 * mas_leaf_set_meta() - Set the metadata of a leaf if possible. 1782 * @node: The maple node 1783 * @mt: The maple type 1784 * @end: The node end 1785 */ 1786 static inline void mas_leaf_set_meta(struct maple_node *node, 1787 enum maple_type mt, unsigned char end) 1788 { 1789 if (end < mt_slots[mt] - 1) 1790 ma_set_meta(node, mt, 0, end); 1791 } 1792 1793 /* 1794 * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node. 1795 * @b_node: the maple_big_node that has the data 1796 * @mab_start: the start location in @b_node. 1797 * @mab_end: The end location in @b_node (inclusively) 1798 * @mas: The maple state with the maple encoded node. 1799 */ 1800 static inline void mab_mas_cp(struct maple_big_node *b_node, 1801 unsigned char mab_start, unsigned char mab_end, 1802 struct ma_state *mas, bool new_max) 1803 { 1804 int i, j = 0; 1805 enum maple_type mt = mte_node_type(mas->node); 1806 struct maple_node *node = mte_to_node(mas->node); 1807 void __rcu **slots = ma_slots(node, mt); 1808 unsigned long *pivots = ma_pivots(node, mt); 1809 unsigned long *gaps = NULL; 1810 unsigned char end; 1811 1812 if (mab_end - mab_start > mt_pivots[mt]) 1813 mab_end--; 1814 1815 if (!pivots[mt_pivots[mt] - 1]) 1816 slots[mt_pivots[mt]] = NULL; 1817 1818 i = mab_start; 1819 do { 1820 pivots[j++] = b_node->pivot[i++]; 1821 } while (i <= mab_end && likely(b_node->pivot[i])); 1822 1823 memcpy(slots, b_node->slot + mab_start, 1824 sizeof(void *) * (i - mab_start)); 1825 1826 if (new_max) 1827 mas->max = b_node->pivot[i - 1]; 1828 1829 end = j - 1; 1830 if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) { 1831 unsigned long max_gap = 0; 1832 unsigned char offset = 0; 1833 1834 gaps = ma_gaps(node, mt); 1835 do { 1836 gaps[--j] = b_node->gap[--i]; 1837 if (gaps[j] > max_gap) { 1838 offset = j; 1839 max_gap = gaps[j]; 1840 } 1841 } while (j); 1842 1843 ma_set_meta(node, mt, offset, end); 1844 } else { 1845 mas_leaf_set_meta(node, mt, end); 1846 } 1847 } 1848 1849 /* 1850 * mas_store_b_node() - Store an @entry into the b_node while also copying the 1851 * data from a maple encoded node. 1852 * @wr_mas: the maple write state 1853 * @b_node: the maple_big_node to fill with data 1854 * @offset_end: the offset to end copying 1855 * 1856 * Return: The actual end of the data stored in @b_node 1857 */ 1858 static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas, 1859 struct maple_big_node *b_node, unsigned char offset_end) 1860 { 1861 unsigned char slot; 1862 unsigned char b_end; 1863 /* Possible underflow of piv will wrap back to 0 before use. */ 1864 unsigned long piv; 1865 struct ma_state *mas = wr_mas->mas; 1866 1867 b_node->type = wr_mas->type; 1868 b_end = 0; 1869 slot = mas->offset; 1870 if (slot) { 1871 /* Copy start data up to insert. */ 1872 mas_mab_cp(mas, 0, slot - 1, b_node, 0); 1873 b_end = b_node->b_end; 1874 piv = b_node->pivot[b_end - 1]; 1875 } else 1876 piv = mas->min - 1; 1877 1878 if (piv + 1 < mas->index) { 1879 /* Handle range starting after old range */ 1880 b_node->slot[b_end] = wr_mas->content; 1881 if (!wr_mas->content) 1882 b_node->gap[b_end] = mas->index - 1 - piv; 1883 b_node->pivot[b_end++] = mas->index - 1; 1884 } 1885 1886 /* Store the new entry. */ 1887 mas->offset = b_end; 1888 b_node->slot[b_end] = wr_mas->entry; 1889 b_node->pivot[b_end] = mas->last; 1890 1891 /* Appended. */ 1892 if (mas->last >= mas->max) 1893 goto b_end; 1894 1895 /* Handle new range ending before old range ends */ 1896 piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type); 1897 if (piv > mas->last) { 1898 if (offset_end != slot) 1899 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 1900 offset_end); 1901 1902 b_node->slot[++b_end] = wr_mas->content; 1903 if (!wr_mas->content) 1904 b_node->gap[b_end] = piv - mas->last + 1; 1905 b_node->pivot[b_end] = piv; 1906 } 1907 1908 slot = offset_end + 1; 1909 if (slot > mas->end) 1910 goto b_end; 1911 1912 /* Copy end data to the end of the node. */ 1913 mas_mab_cp(mas, slot, mas->end + 1, b_node, ++b_end); 1914 b_node->b_end--; 1915 return; 1916 1917 b_end: 1918 b_node->b_end = b_end; 1919 } 1920 1921 /* 1922 * mas_prev_sibling() - Find the previous node with the same parent. 1923 * @mas: the maple state 1924 * 1925 * Return: True if there is a previous sibling, false otherwise. 1926 */ 1927 static inline bool mas_prev_sibling(struct ma_state *mas) 1928 { 1929 unsigned int p_slot = mte_parent_slot(mas->node); 1930 1931 /* For root node, p_slot is set to 0 by mte_parent_slot(). */ 1932 if (!p_slot) 1933 return false; 1934 1935 mas_ascend(mas); 1936 mas->offset = p_slot - 1; 1937 mas_descend(mas); 1938 return true; 1939 } 1940 1941 /* 1942 * mas_next_sibling() - Find the next node with the same parent. 1943 * @mas: the maple state 1944 * 1945 * Return: true if there is a next sibling, false otherwise. 1946 */ 1947 static inline bool mas_next_sibling(struct ma_state *mas) 1948 { 1949 MA_STATE(parent, mas->tree, mas->index, mas->last); 1950 1951 if (mte_is_root(mas->node)) 1952 return false; 1953 1954 parent = *mas; 1955 mas_ascend(&parent); 1956 parent.offset = mte_parent_slot(mas->node) + 1; 1957 if (parent.offset > mas_data_end(&parent)) 1958 return false; 1959 1960 *mas = parent; 1961 mas_descend(mas); 1962 return true; 1963 } 1964 1965 /* 1966 * mas_node_or_none() - Set the enode and state. 1967 * @mas: the maple state 1968 * @enode: The encoded maple node. 1969 * 1970 * Set the node to the enode and the status. 1971 */ 1972 static inline void mas_node_or_none(struct ma_state *mas, 1973 struct maple_enode *enode) 1974 { 1975 if (enode) { 1976 mas->node = enode; 1977 mas->status = ma_active; 1978 } else { 1979 mas->node = NULL; 1980 mas->status = ma_none; 1981 } 1982 } 1983 1984 /* 1985 * mas_wr_node_walk() - Find the correct offset for the index in the @mas. 1986 * If @mas->index cannot be found within the containing 1987 * node, we traverse to the last entry in the node. 1988 * @wr_mas: The maple write state 1989 * 1990 * Uses mas_slot_locked() and does not need to worry about dead nodes. 1991 */ 1992 static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) 1993 { 1994 struct ma_state *mas = wr_mas->mas; 1995 unsigned char count, offset; 1996 1997 if (unlikely(ma_is_dense(wr_mas->type))) { 1998 wr_mas->r_max = wr_mas->r_min = mas->index; 1999 mas->offset = mas->index = mas->min; 2000 return; 2001 } 2002 2003 wr_mas->node = mas_mn(wr_mas->mas); 2004 wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type); 2005 count = mas->end = ma_data_end(wr_mas->node, wr_mas->type, 2006 wr_mas->pivots, mas->max); 2007 offset = mas->offset; 2008 2009 while (offset < count && mas->index > wr_mas->pivots[offset]) 2010 offset++; 2011 2012 wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max; 2013 wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset); 2014 wr_mas->offset_end = mas->offset = offset; 2015 } 2016 2017 /* 2018 * mast_rebalance_next() - Rebalance against the next node 2019 * @mast: The maple subtree state 2020 */ 2021 static inline void mast_rebalance_next(struct maple_subtree_state *mast) 2022 { 2023 unsigned char b_end = mast->bn->b_end; 2024 2025 mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node), 2026 mast->bn, b_end); 2027 mast->orig_r->last = mast->orig_r->max; 2028 } 2029 2030 /* 2031 * mast_rebalance_prev() - Rebalance against the previous node 2032 * @mast: The maple subtree state 2033 */ 2034 static inline void mast_rebalance_prev(struct maple_subtree_state *mast) 2035 { 2036 unsigned char end = mas_data_end(mast->orig_l) + 1; 2037 unsigned char b_end = mast->bn->b_end; 2038 2039 mab_shift_right(mast->bn, end); 2040 mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0); 2041 mast->l->min = mast->orig_l->min; 2042 mast->orig_l->index = mast->orig_l->min; 2043 mast->bn->b_end = end + b_end; 2044 mast->l->offset += end; 2045 } 2046 2047 /* 2048 * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring 2049 * the node to the right. Checking the nodes to the right then the left at each 2050 * level upwards until root is reached. 2051 * Data is copied into the @mast->bn. 2052 * @mast: The maple_subtree_state. 2053 */ 2054 static inline 2055 bool mast_spanning_rebalance(struct maple_subtree_state *mast) 2056 { 2057 struct ma_state r_tmp = *mast->orig_r; 2058 struct ma_state l_tmp = *mast->orig_l; 2059 unsigned char depth = 0; 2060 2061 do { 2062 mas_ascend(mast->orig_r); 2063 mas_ascend(mast->orig_l); 2064 depth++; 2065 if (mast->orig_r->offset < mas_data_end(mast->orig_r)) { 2066 mast->orig_r->offset++; 2067 do { 2068 mas_descend(mast->orig_r); 2069 mast->orig_r->offset = 0; 2070 } while (--depth); 2071 2072 mast_rebalance_next(mast); 2073 *mast->orig_l = l_tmp; 2074 return true; 2075 } else if (mast->orig_l->offset != 0) { 2076 mast->orig_l->offset--; 2077 do { 2078 mas_descend(mast->orig_l); 2079 mast->orig_l->offset = 2080 mas_data_end(mast->orig_l); 2081 } while (--depth); 2082 2083 mast_rebalance_prev(mast); 2084 *mast->orig_r = r_tmp; 2085 return true; 2086 } 2087 } while (!mte_is_root(mast->orig_r->node)); 2088 2089 *mast->orig_r = r_tmp; 2090 *mast->orig_l = l_tmp; 2091 return false; 2092 } 2093 2094 /* 2095 * mast_ascend() - Ascend the original left and right maple states. 2096 * @mast: the maple subtree state. 2097 * 2098 * Ascend the original left and right sides. Set the offsets to point to the 2099 * data already in the new tree (@mast->l and @mast->r). 2100 */ 2101 static inline void mast_ascend(struct maple_subtree_state *mast) 2102 { 2103 MA_WR_STATE(wr_mas, mast->orig_r, NULL); 2104 mas_ascend(mast->orig_l); 2105 mas_ascend(mast->orig_r); 2106 2107 mast->orig_r->offset = 0; 2108 mast->orig_r->index = mast->r->max; 2109 /* last should be larger than or equal to index */ 2110 if (mast->orig_r->last < mast->orig_r->index) 2111 mast->orig_r->last = mast->orig_r->index; 2112 2113 wr_mas.type = mte_node_type(mast->orig_r->node); 2114 mas_wr_node_walk(&wr_mas); 2115 /* Set up the left side of things */ 2116 mast->orig_l->offset = 0; 2117 mast->orig_l->index = mast->l->min; 2118 wr_mas.mas = mast->orig_l; 2119 wr_mas.type = mte_node_type(mast->orig_l->node); 2120 mas_wr_node_walk(&wr_mas); 2121 2122 mast->bn->type = wr_mas.type; 2123 } 2124 2125 /* 2126 * mas_new_ma_node() - Create and return a new maple node. Helper function. 2127 * @mas: the maple state with the allocations. 2128 * @b_node: the maple_big_node with the type encoding. 2129 * 2130 * Use the node type from the maple_big_node to allocate a new node from the 2131 * ma_state. This function exists mainly for code readability. 2132 * 2133 * Return: A new maple encoded node 2134 */ 2135 static inline struct maple_enode 2136 *mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node) 2137 { 2138 return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type); 2139 } 2140 2141 /* 2142 * mas_mab_to_node() - Set up right and middle nodes 2143 * 2144 * @mas: the maple state that contains the allocations. 2145 * @b_node: the node which contains the data. 2146 * @left: The pointer which will have the left node 2147 * @right: The pointer which may have the right node 2148 * @middle: the pointer which may have the middle node (rare) 2149 * @mid_split: the split location for the middle node 2150 * 2151 * Return: the split of left. 2152 */ 2153 static inline unsigned char mas_mab_to_node(struct ma_state *mas, 2154 struct maple_big_node *b_node, struct maple_enode **left, 2155 struct maple_enode **right, struct maple_enode **middle, 2156 unsigned char *mid_split) 2157 { 2158 unsigned char split = 0; 2159 unsigned char slot_count = mt_slots[b_node->type]; 2160 2161 *left = mas_new_ma_node(mas, b_node); 2162 *right = NULL; 2163 *middle = NULL; 2164 *mid_split = 0; 2165 2166 if (b_node->b_end < slot_count) { 2167 split = b_node->b_end; 2168 } else { 2169 split = mab_calc_split(mas, b_node, mid_split); 2170 *right = mas_new_ma_node(mas, b_node); 2171 } 2172 2173 if (*mid_split) 2174 *middle = mas_new_ma_node(mas, b_node); 2175 2176 return split; 2177 2178 } 2179 2180 /* 2181 * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end 2182 * pointer. 2183 * @b_node: the big node to add the entry 2184 * @mas: the maple state to get the pivot (mas->max) 2185 * @entry: the entry to add, if NULL nothing happens. 2186 */ 2187 static inline void mab_set_b_end(struct maple_big_node *b_node, 2188 struct ma_state *mas, 2189 void *entry) 2190 { 2191 if (!entry) 2192 return; 2193 2194 b_node->slot[b_node->b_end] = entry; 2195 if (mt_is_alloc(mas->tree)) 2196 b_node->gap[b_node->b_end] = mas_max_gap(mas); 2197 b_node->pivot[b_node->b_end++] = mas->max; 2198 } 2199 2200 /* 2201 * mas_set_split_parent() - combine_then_separate helper function. Sets the parent 2202 * of @mas->node to either @left or @right, depending on @slot and @split 2203 * 2204 * @mas: the maple state with the node that needs a parent 2205 * @left: possible parent 1 2206 * @right: possible parent 2 2207 * @slot: the slot the mas->node was placed 2208 * @split: the split location between @left and @right 2209 */ 2210 static inline void mas_set_split_parent(struct ma_state *mas, 2211 struct maple_enode *left, 2212 struct maple_enode *right, 2213 unsigned char *slot, unsigned char split) 2214 { 2215 if (mas_is_none(mas)) 2216 return; 2217 2218 if ((*slot) <= split) 2219 mas_set_parent(mas, mas->node, left, *slot); 2220 else if (right) 2221 mas_set_parent(mas, mas->node, right, (*slot) - split - 1); 2222 2223 (*slot)++; 2224 } 2225 2226 /* 2227 * mte_mid_split_check() - Check if the next node passes the mid-split 2228 * @l: Pointer to left encoded maple node. 2229 * @m: Pointer to middle encoded maple node. 2230 * @r: Pointer to right encoded maple node. 2231 * @slot: The offset 2232 * @split: The split location. 2233 * @mid_split: The middle split. 2234 */ 2235 static inline void mte_mid_split_check(struct maple_enode **l, 2236 struct maple_enode **r, 2237 struct maple_enode *right, 2238 unsigned char slot, 2239 unsigned char *split, 2240 unsigned char mid_split) 2241 { 2242 if (*r == right) 2243 return; 2244 2245 if (slot < mid_split) 2246 return; 2247 2248 *l = *r; 2249 *r = right; 2250 *split = mid_split; 2251 } 2252 2253 /* 2254 * mast_set_split_parents() - Helper function to set three nodes parents. Slot 2255 * is taken from @mast->l. 2256 * @mast: the maple subtree state 2257 * @left: the left node 2258 * @right: the right node 2259 * @split: the split location. 2260 */ 2261 static inline void mast_set_split_parents(struct maple_subtree_state *mast, 2262 struct maple_enode *left, 2263 struct maple_enode *middle, 2264 struct maple_enode *right, 2265 unsigned char split, 2266 unsigned char mid_split) 2267 { 2268 unsigned char slot; 2269 struct maple_enode *l = left; 2270 struct maple_enode *r = right; 2271 2272 if (mas_is_none(mast->l)) 2273 return; 2274 2275 if (middle) 2276 r = middle; 2277 2278 slot = mast->l->offset; 2279 2280 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2281 mas_set_split_parent(mast->l, l, r, &slot, split); 2282 2283 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2284 mas_set_split_parent(mast->m, l, r, &slot, split); 2285 2286 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2287 mas_set_split_parent(mast->r, l, r, &slot, split); 2288 } 2289 2290 /* 2291 * mas_topiary_node() - Dispose of a single node 2292 * @mas: The maple state for pushing nodes 2293 * @in_rcu: If the tree is in rcu mode 2294 * 2295 * The node will either be RCU freed or pushed back on the maple state. 2296 */ 2297 static inline void mas_topiary_node(struct ma_state *mas, 2298 struct ma_state *tmp_mas, bool in_rcu) 2299 { 2300 struct maple_node *tmp; 2301 struct maple_enode *enode; 2302 2303 if (mas_is_none(tmp_mas)) 2304 return; 2305 2306 enode = tmp_mas->node; 2307 tmp = mte_to_node(enode); 2308 mte_set_node_dead(enode); 2309 ma_free_rcu(tmp); 2310 } 2311 2312 /* 2313 * mas_topiary_replace() - Replace the data with new data, then repair the 2314 * parent links within the new tree. Iterate over the dead sub-tree and collect 2315 * the dead subtrees and topiary the nodes that are no longer of use. 2316 * 2317 * The new tree will have up to three children with the correct parent. Keep 2318 * track of the new entries as they need to be followed to find the next level 2319 * of new entries. 2320 * 2321 * The old tree will have up to three children with the old parent. Keep track 2322 * of the old entries as they may have more nodes below replaced. Nodes within 2323 * [index, last] are dead subtrees, others need to be freed and followed. 2324 * 2325 * @mas: The maple state pointing at the new data 2326 * @old_enode: The maple encoded node being replaced 2327 * @new_height: The new height of the tree as a result of the operation 2328 * 2329 */ 2330 static inline void mas_topiary_replace(struct ma_state *mas, 2331 struct maple_enode *old_enode, unsigned char new_height) 2332 { 2333 struct ma_state tmp[3], tmp_next[3]; 2334 MA_TOPIARY(subtrees, mas->tree); 2335 bool in_rcu; 2336 int i, n; 2337 2338 /* Place data in tree & then mark node as old */ 2339 mas_put_in_tree(mas, old_enode, new_height); 2340 2341 /* Update the parent pointers in the tree */ 2342 tmp[0] = *mas; 2343 tmp[0].offset = 0; 2344 tmp[1].status = ma_none; 2345 tmp[2].status = ma_none; 2346 while (!mte_is_leaf(tmp[0].node)) { 2347 n = 0; 2348 for (i = 0; i < 3; i++) { 2349 if (mas_is_none(&tmp[i])) 2350 continue; 2351 2352 while (n < 3) { 2353 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2354 break; 2355 n++; 2356 } 2357 2358 mas_adopt_children(&tmp[i], tmp[i].node); 2359 } 2360 2361 if (MAS_WARN_ON(mas, n == 0)) 2362 break; 2363 2364 while (n < 3) 2365 tmp_next[n++].status = ma_none; 2366 2367 for (i = 0; i < 3; i++) 2368 tmp[i] = tmp_next[i]; 2369 } 2370 2371 /* Collect the old nodes that need to be discarded */ 2372 if (mte_is_leaf(old_enode)) 2373 return mas_free(mas, old_enode); 2374 2375 tmp[0] = *mas; 2376 tmp[0].offset = 0; 2377 tmp[0].node = old_enode; 2378 tmp[1].status = ma_none; 2379 tmp[2].status = ma_none; 2380 in_rcu = mt_in_rcu(mas->tree); 2381 do { 2382 n = 0; 2383 for (i = 0; i < 3; i++) { 2384 if (mas_is_none(&tmp[i])) 2385 continue; 2386 2387 while (n < 3) { 2388 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2389 break; 2390 2391 if ((tmp_next[n].min >= tmp_next->index) && 2392 (tmp_next[n].max <= tmp_next->last)) { 2393 mat_add(&subtrees, tmp_next[n].node); 2394 tmp_next[n].status = ma_none; 2395 } else { 2396 n++; 2397 } 2398 } 2399 } 2400 2401 if (MAS_WARN_ON(mas, n == 0)) 2402 break; 2403 2404 while (n < 3) 2405 tmp_next[n++].status = ma_none; 2406 2407 for (i = 0; i < 3; i++) { 2408 mas_topiary_node(mas, &tmp[i], in_rcu); 2409 tmp[i] = tmp_next[i]; 2410 } 2411 } while (!mte_is_leaf(tmp[0].node)); 2412 2413 for (i = 0; i < 3; i++) 2414 mas_topiary_node(mas, &tmp[i], in_rcu); 2415 2416 mas_mat_destroy(mas, &subtrees); 2417 } 2418 2419 /* 2420 * mas_wmb_replace() - Write memory barrier and replace 2421 * @mas: The maple state 2422 * @old_enode: The old maple encoded node that is being replaced. 2423 * @new_height: The new height of the tree as a result of the operation 2424 * 2425 * Updates gap as necessary. 2426 */ 2427 static inline void mas_wmb_replace(struct ma_state *mas, 2428 struct maple_enode *old_enode, unsigned char new_height) 2429 { 2430 /* Insert the new data in the tree */ 2431 mas_topiary_replace(mas, old_enode, new_height); 2432 2433 if (mte_is_leaf(mas->node)) 2434 return; 2435 2436 mas_update_gap(mas); 2437 } 2438 2439 /* 2440 * mast_cp_to_nodes() - Copy data out to nodes. 2441 * @mast: The maple subtree state 2442 * @left: The left encoded maple node 2443 * @middle: The middle encoded maple node 2444 * @right: The right encoded maple node 2445 * @split: The location to split between left and (middle ? middle : right) 2446 * @mid_split: The location to split between middle and right. 2447 */ 2448 static inline void mast_cp_to_nodes(struct maple_subtree_state *mast, 2449 struct maple_enode *left, struct maple_enode *middle, 2450 struct maple_enode *right, unsigned char split, unsigned char mid_split) 2451 { 2452 bool new_lmax = true; 2453 2454 mas_node_or_none(mast->l, left); 2455 mas_node_or_none(mast->m, middle); 2456 mas_node_or_none(mast->r, right); 2457 2458 mast->l->min = mast->orig_l->min; 2459 if (split == mast->bn->b_end) { 2460 mast->l->max = mast->orig_r->max; 2461 new_lmax = false; 2462 } 2463 2464 mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax); 2465 2466 if (middle) { 2467 mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true); 2468 mast->m->min = mast->bn->pivot[split] + 1; 2469 split = mid_split; 2470 } 2471 2472 mast->r->max = mast->orig_r->max; 2473 if (right) { 2474 mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false); 2475 mast->r->min = mast->bn->pivot[split] + 1; 2476 } 2477 } 2478 2479 /* 2480 * mast_combine_cp_left - Copy in the original left side of the tree into the 2481 * combined data set in the maple subtree state big node. 2482 * @mast: The maple subtree state 2483 */ 2484 static inline void mast_combine_cp_left(struct maple_subtree_state *mast) 2485 { 2486 unsigned char l_slot = mast->orig_l->offset; 2487 2488 if (!l_slot) 2489 return; 2490 2491 mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0); 2492 } 2493 2494 /* 2495 * mast_combine_cp_right: Copy in the original right side of the tree into the 2496 * combined data set in the maple subtree state big node. 2497 * @mast: The maple subtree state 2498 */ 2499 static inline void mast_combine_cp_right(struct maple_subtree_state *mast) 2500 { 2501 if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max) 2502 return; 2503 2504 mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1, 2505 mt_slot_count(mast->orig_r->node), mast->bn, 2506 mast->bn->b_end); 2507 mast->orig_r->last = mast->orig_r->max; 2508 } 2509 2510 /* 2511 * mast_sufficient: Check if the maple subtree state has enough data in the big 2512 * node to create at least one sufficient node 2513 * @mast: the maple subtree state 2514 */ 2515 static inline bool mast_sufficient(struct maple_subtree_state *mast) 2516 { 2517 if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node)) 2518 return true; 2519 2520 return false; 2521 } 2522 2523 /* 2524 * mast_overflow: Check if there is too much data in the subtree state for a 2525 * single node. 2526 * @mast: The maple subtree state 2527 */ 2528 static inline bool mast_overflow(struct maple_subtree_state *mast) 2529 { 2530 if (mast->bn->b_end > mt_slot_count(mast->orig_l->node)) 2531 return true; 2532 2533 return false; 2534 } 2535 2536 static inline void *mtree_range_walk(struct ma_state *mas) 2537 { 2538 unsigned long *pivots; 2539 unsigned char offset; 2540 struct maple_node *node; 2541 struct maple_enode *next, *last; 2542 enum maple_type type; 2543 void __rcu **slots; 2544 unsigned char end; 2545 unsigned long max, min; 2546 unsigned long prev_max, prev_min; 2547 2548 next = mas->node; 2549 min = mas->min; 2550 max = mas->max; 2551 do { 2552 last = next; 2553 node = mte_to_node(next); 2554 type = mte_node_type(next); 2555 pivots = ma_pivots(node, type); 2556 end = ma_data_end(node, type, pivots, max); 2557 prev_min = min; 2558 prev_max = max; 2559 if (pivots[0] >= mas->index) { 2560 offset = 0; 2561 max = pivots[0]; 2562 goto next; 2563 } 2564 2565 offset = 1; 2566 while (offset < end) { 2567 if (pivots[offset] >= mas->index) { 2568 max = pivots[offset]; 2569 break; 2570 } 2571 offset++; 2572 } 2573 2574 min = pivots[offset - 1] + 1; 2575 next: 2576 slots = ma_slots(node, type); 2577 next = mt_slot(mas->tree, slots, offset); 2578 if (unlikely(ma_dead_node(node))) 2579 goto dead_node; 2580 } while (!ma_is_leaf(type)); 2581 2582 mas->end = end; 2583 mas->offset = offset; 2584 mas->index = min; 2585 mas->last = max; 2586 mas->min = prev_min; 2587 mas->max = prev_max; 2588 mas->node = last; 2589 return (void *)next; 2590 2591 dead_node: 2592 mas_reset(mas); 2593 return NULL; 2594 } 2595 2596 /* 2597 * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers. 2598 * @mas: The starting maple state 2599 * @mast: The maple_subtree_state, keeps track of 4 maple states. 2600 * @count: The estimated count of iterations needed. 2601 * 2602 * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root 2603 * is hit. First @b_node is split into two entries which are inserted into the 2604 * next iteration of the loop. @b_node is returned populated with the final 2605 * iteration. @mas is used to obtain allocations. orig_l_mas keeps track of the 2606 * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last 2607 * to account of what has been copied into the new sub-tree. The update of 2608 * orig_l_mas->last is used in mas_consume to find the slots that will need to 2609 * be either freed or destroyed. orig_l_mas->depth keeps track of the height of 2610 * the new sub-tree in case the sub-tree becomes the full tree. 2611 */ 2612 static void mas_spanning_rebalance(struct ma_state *mas, 2613 struct maple_subtree_state *mast, unsigned char count) 2614 { 2615 unsigned char split, mid_split; 2616 unsigned char slot = 0; 2617 unsigned char new_height = 0; /* used if node is a new root */ 2618 struct maple_enode *left = NULL, *middle = NULL, *right = NULL; 2619 struct maple_enode *old_enode; 2620 2621 MA_STATE(l_mas, mas->tree, mas->index, mas->index); 2622 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2623 MA_STATE(m_mas, mas->tree, mas->index, mas->index); 2624 2625 /* 2626 * The tree needs to be rebalanced and leaves need to be kept at the same level. 2627 * Rebalancing is done by use of the ``struct maple_topiary``. 2628 */ 2629 mast->l = &l_mas; 2630 mast->m = &m_mas; 2631 mast->r = &r_mas; 2632 l_mas.status = r_mas.status = m_mas.status = ma_none; 2633 2634 /* Check if this is not root and has sufficient data. */ 2635 if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) && 2636 unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type])) 2637 mast_spanning_rebalance(mast); 2638 2639 /* 2640 * Each level of the tree is examined and balanced, pushing data to the left or 2641 * right, or rebalancing against left or right nodes is employed to avoid 2642 * rippling up the tree to limit the amount of churn. Once a new sub-section of 2643 * the tree is created, there may be a mix of new and old nodes. The old nodes 2644 * will have the incorrect parent pointers and currently be in two trees: the 2645 * original tree and the partially new tree. To remedy the parent pointers in 2646 * the old tree, the new data is swapped into the active tree and a walk down 2647 * the tree is performed and the parent pointers are updated. 2648 * See mas_topiary_replace() for more information. 2649 */ 2650 while (count--) { 2651 mast->bn->b_end--; 2652 mast->bn->type = mte_node_type(mast->orig_l->node); 2653 split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle, 2654 &mid_split); 2655 mast_set_split_parents(mast, left, middle, right, split, 2656 mid_split); 2657 mast_cp_to_nodes(mast, left, middle, right, split, mid_split); 2658 new_height++; 2659 2660 /* 2661 * Copy data from next level in the tree to mast->bn from next 2662 * iteration 2663 */ 2664 memset(mast->bn, 0, sizeof(struct maple_big_node)); 2665 mast->bn->type = mte_node_type(left); 2666 2667 /* Root already stored in l->node. */ 2668 if (mas_is_root_limits(mast->l)) 2669 goto new_root; 2670 2671 mast_ascend(mast); 2672 mast_combine_cp_left(mast); 2673 l_mas.offset = mast->bn->b_end; 2674 mab_set_b_end(mast->bn, &l_mas, left); 2675 mab_set_b_end(mast->bn, &m_mas, middle); 2676 mab_set_b_end(mast->bn, &r_mas, right); 2677 2678 /* Copy anything necessary out of the right node. */ 2679 mast_combine_cp_right(mast); 2680 mast->orig_l->last = mast->orig_l->max; 2681 2682 if (mast_sufficient(mast)) { 2683 if (mast_overflow(mast)) 2684 continue; 2685 2686 if (mast->orig_l->node == mast->orig_r->node) { 2687 /* 2688 * The data in b_node should be stored in one 2689 * node and in the tree 2690 */ 2691 slot = mast->l->offset; 2692 break; 2693 } 2694 2695 continue; 2696 } 2697 2698 /* May be a new root stored in mast->bn */ 2699 if (mas_is_root_limits(mast->orig_l)) 2700 break; 2701 2702 mast_spanning_rebalance(mast); 2703 2704 /* rebalancing from other nodes may require another loop. */ 2705 if (!count) 2706 count++; 2707 } 2708 2709 l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), 2710 mte_node_type(mast->orig_l->node)); 2711 2712 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true); 2713 new_height++; 2714 mas_set_parent(mas, left, l_mas.node, slot); 2715 if (middle) 2716 mas_set_parent(mas, middle, l_mas.node, ++slot); 2717 2718 if (right) 2719 mas_set_parent(mas, right, l_mas.node, ++slot); 2720 2721 if (mas_is_root_limits(mast->l)) { 2722 new_root: 2723 mas_mn(mast->l)->parent = ma_parent_ptr(mas_tree_parent(mas)); 2724 while (!mte_is_root(mast->orig_l->node)) 2725 mast_ascend(mast); 2726 } else { 2727 mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent; 2728 } 2729 2730 old_enode = mast->orig_l->node; 2731 mas->depth = l_mas.depth; 2732 mas->node = l_mas.node; 2733 mas->min = l_mas.min; 2734 mas->max = l_mas.max; 2735 mas->offset = l_mas.offset; 2736 mas_wmb_replace(mas, old_enode, new_height); 2737 mtree_range_walk(mas); 2738 return; 2739 } 2740 2741 /* 2742 * mas_rebalance() - Rebalance a given node. 2743 * @mas: The maple state 2744 * @b_node: The big maple node. 2745 * 2746 * Rebalance two nodes into a single node or two new nodes that are sufficient. 2747 * Continue upwards until tree is sufficient. 2748 */ 2749 static inline void mas_rebalance(struct ma_state *mas, 2750 struct maple_big_node *b_node) 2751 { 2752 char empty_count = mas_mt_height(mas); 2753 struct maple_subtree_state mast; 2754 unsigned char shift, b_end = ++b_node->b_end; 2755 2756 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 2757 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2758 2759 trace_ma_op(__func__, mas); 2760 2761 /* 2762 * Rebalancing occurs if a node is insufficient. Data is rebalanced 2763 * against the node to the right if it exists, otherwise the node to the 2764 * left of this node is rebalanced against this node. If rebalancing 2765 * causes just one node to be produced instead of two, then the parent 2766 * is also examined and rebalanced if it is insufficient. Every level 2767 * tries to combine the data in the same way. If one node contains the 2768 * entire range of the tree, then that node is used as a new root node. 2769 */ 2770 2771 mast.orig_l = &l_mas; 2772 mast.orig_r = &r_mas; 2773 mast.bn = b_node; 2774 mast.bn->type = mte_node_type(mas->node); 2775 2776 l_mas = r_mas = *mas; 2777 2778 if (mas_next_sibling(&r_mas)) { 2779 mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end); 2780 r_mas.last = r_mas.index = r_mas.max; 2781 } else { 2782 mas_prev_sibling(&l_mas); 2783 shift = mas_data_end(&l_mas) + 1; 2784 mab_shift_right(b_node, shift); 2785 mas->offset += shift; 2786 mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0); 2787 b_node->b_end = shift + b_end; 2788 l_mas.index = l_mas.last = l_mas.min; 2789 } 2790 2791 return mas_spanning_rebalance(mas, &mast, empty_count); 2792 } 2793 2794 /* 2795 * mas_split_final_node() - Split the final node in a subtree operation. 2796 * @mast: the maple subtree state 2797 * @mas: The maple state 2798 */ 2799 static inline void mas_split_final_node(struct maple_subtree_state *mast, 2800 struct ma_state *mas) 2801 { 2802 struct maple_enode *ancestor; 2803 2804 if (mte_is_root(mas->node)) { 2805 if (mt_is_alloc(mas->tree)) 2806 mast->bn->type = maple_arange_64; 2807 else 2808 mast->bn->type = maple_range_64; 2809 } 2810 /* 2811 * Only a single node is used here, could be root. 2812 * The Big_node data should just fit in a single node. 2813 */ 2814 ancestor = mas_new_ma_node(mas, mast->bn); 2815 mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset); 2816 mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset); 2817 mte_to_node(ancestor)->parent = mas_mn(mas)->parent; 2818 2819 mast->l->node = ancestor; 2820 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true); 2821 mas->offset = mast->bn->b_end - 1; 2822 } 2823 2824 /* 2825 * mast_fill_bnode() - Copy data into the big node in the subtree state 2826 * @mast: The maple subtree state 2827 * @mas: the maple state 2828 * @skip: The number of entries to skip for new nodes insertion. 2829 */ 2830 static inline void mast_fill_bnode(struct maple_subtree_state *mast, 2831 struct ma_state *mas, 2832 unsigned char skip) 2833 { 2834 bool cp = true; 2835 unsigned char split; 2836 2837 memset(mast->bn, 0, sizeof(struct maple_big_node)); 2838 2839 if (mte_is_root(mas->node)) { 2840 cp = false; 2841 } else { 2842 mas_ascend(mas); 2843 mas->offset = mte_parent_slot(mas->node); 2844 } 2845 2846 if (cp && mast->l->offset) 2847 mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0); 2848 2849 split = mast->bn->b_end; 2850 mab_set_b_end(mast->bn, mast->l, mast->l->node); 2851 mast->r->offset = mast->bn->b_end; 2852 mab_set_b_end(mast->bn, mast->r, mast->r->node); 2853 if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max) 2854 cp = false; 2855 2856 if (cp) 2857 mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1, 2858 mast->bn, mast->bn->b_end); 2859 2860 mast->bn->b_end--; 2861 mast->bn->type = mte_node_type(mas->node); 2862 } 2863 2864 /* 2865 * mast_split_data() - Split the data in the subtree state big node into regular 2866 * nodes. 2867 * @mast: The maple subtree state 2868 * @mas: The maple state 2869 * @split: The location to split the big node 2870 */ 2871 static inline void mast_split_data(struct maple_subtree_state *mast, 2872 struct ma_state *mas, unsigned char split) 2873 { 2874 unsigned char p_slot; 2875 2876 mab_mas_cp(mast->bn, 0, split, mast->l, true); 2877 mte_set_pivot(mast->r->node, 0, mast->r->max); 2878 mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false); 2879 mast->l->offset = mte_parent_slot(mas->node); 2880 mast->l->max = mast->bn->pivot[split]; 2881 mast->r->min = mast->l->max + 1; 2882 if (mte_is_leaf(mas->node)) 2883 return; 2884 2885 p_slot = mast->orig_l->offset; 2886 mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node, 2887 &p_slot, split); 2888 mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node, 2889 &p_slot, split); 2890 } 2891 2892 /* 2893 * mas_push_data() - Instead of splitting a node, it is beneficial to push the 2894 * data to the right or left node if there is room. 2895 * @mas: The maple state 2896 * @mast: The maple subtree state 2897 * @left: Push left or not. 2898 * 2899 * Keeping the height of the tree low means faster lookups. 2900 * 2901 * Return: True if pushed, false otherwise. 2902 */ 2903 static inline bool mas_push_data(struct ma_state *mas, 2904 struct maple_subtree_state *mast, bool left) 2905 { 2906 unsigned char slot_total = mast->bn->b_end; 2907 unsigned char end, space, split; 2908 2909 MA_STATE(tmp_mas, mas->tree, mas->index, mas->last); 2910 tmp_mas = *mas; 2911 tmp_mas.depth = mast->l->depth; 2912 2913 if (left && !mas_prev_sibling(&tmp_mas)) 2914 return false; 2915 else if (!left && !mas_next_sibling(&tmp_mas)) 2916 return false; 2917 2918 end = mas_data_end(&tmp_mas); 2919 slot_total += end; 2920 space = 2 * mt_slot_count(mas->node) - 2; 2921 /* -2 instead of -1 to ensure there isn't a triple split */ 2922 if (ma_is_leaf(mast->bn->type)) 2923 space--; 2924 2925 if (mas->max == ULONG_MAX) 2926 space--; 2927 2928 if (slot_total >= space) 2929 return false; 2930 2931 /* Get the data; Fill mast->bn */ 2932 mast->bn->b_end++; 2933 if (left) { 2934 mab_shift_right(mast->bn, end + 1); 2935 mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0); 2936 mast->bn->b_end = slot_total + 1; 2937 } else { 2938 mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end); 2939 } 2940 2941 /* Configure mast for splitting of mast->bn */ 2942 split = mt_slots[mast->bn->type] - 2; 2943 if (left) { 2944 /* Switch mas to prev node */ 2945 *mas = tmp_mas; 2946 /* Start using mast->l for the left side. */ 2947 tmp_mas.node = mast->l->node; 2948 *mast->l = tmp_mas; 2949 } else { 2950 tmp_mas.node = mast->r->node; 2951 *mast->r = tmp_mas; 2952 split = slot_total - split; 2953 } 2954 split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]); 2955 /* Update parent slot for split calculation. */ 2956 if (left) 2957 mast->orig_l->offset += end + 1; 2958 2959 mast_split_data(mast, mas, split); 2960 mast_fill_bnode(mast, mas, 2); 2961 mas_split_final_node(mast, mas); 2962 return true; 2963 } 2964 2965 /* 2966 * mas_split() - Split data that is too big for one node into two. 2967 * @mas: The maple state 2968 * @b_node: The maple big node 2969 */ 2970 static void mas_split(struct ma_state *mas, struct maple_big_node *b_node) 2971 { 2972 struct maple_subtree_state mast; 2973 int height = 0; 2974 unsigned int orig_height = mas_mt_height(mas); 2975 unsigned char mid_split, split = 0; 2976 struct maple_enode *old; 2977 2978 /* 2979 * Splitting is handled differently from any other B-tree; the Maple 2980 * Tree splits upwards. Splitting up means that the split operation 2981 * occurs when the walk of the tree hits the leaves and not on the way 2982 * down. The reason for splitting up is that it is impossible to know 2983 * how much space will be needed until the leaf is (or leaves are) 2984 * reached. Since overwriting data is allowed and a range could 2985 * overwrite more than one range or result in changing one entry into 3 2986 * entries, it is impossible to know if a split is required until the 2987 * data is examined. 2988 * 2989 * Splitting is a balancing act between keeping allocations to a minimum 2990 * and avoiding a 'jitter' event where a tree is expanded to make room 2991 * for an entry followed by a contraction when the entry is removed. To 2992 * accomplish the balance, there are empty slots remaining in both left 2993 * and right nodes after a split. 2994 */ 2995 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 2996 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2997 MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last); 2998 MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last); 2999 3000 trace_ma_op(__func__, mas); 3001 3002 mast.l = &l_mas; 3003 mast.r = &r_mas; 3004 mast.orig_l = &prev_l_mas; 3005 mast.orig_r = &prev_r_mas; 3006 mast.bn = b_node; 3007 3008 while (height++ <= orig_height) { 3009 if (mt_slots[b_node->type] > b_node->b_end) { 3010 mas_split_final_node(&mast, mas); 3011 break; 3012 } 3013 3014 l_mas = r_mas = *mas; 3015 l_mas.node = mas_new_ma_node(mas, b_node); 3016 r_mas.node = mas_new_ma_node(mas, b_node); 3017 /* 3018 * Another way that 'jitter' is avoided is to terminate a split up early if the 3019 * left or right node has space to spare. This is referred to as "pushing left" 3020 * or "pushing right" and is similar to the B* tree, except the nodes left or 3021 * right can rarely be reused due to RCU, but the ripple upwards is halted which 3022 * is a significant savings. 3023 */ 3024 /* Try to push left. */ 3025 if (mas_push_data(mas, &mast, true)) { 3026 height++; 3027 break; 3028 } 3029 /* Try to push right. */ 3030 if (mas_push_data(mas, &mast, false)) { 3031 height++; 3032 break; 3033 } 3034 3035 split = mab_calc_split(mas, b_node, &mid_split); 3036 mast_split_data(&mast, mas, split); 3037 /* 3038 * Usually correct, mab_mas_cp in the above call overwrites 3039 * r->max. 3040 */ 3041 mast.r->max = mas->max; 3042 mast_fill_bnode(&mast, mas, 1); 3043 prev_l_mas = *mast.l; 3044 prev_r_mas = *mast.r; 3045 } 3046 3047 /* Set the original node as dead */ 3048 old = mas->node; 3049 mas->node = l_mas.node; 3050 mas_wmb_replace(mas, old, height); 3051 mtree_range_walk(mas); 3052 return; 3053 } 3054 3055 /* 3056 * mas_commit_b_node() - Commit the big node into the tree. 3057 * @wr_mas: The maple write state 3058 * @b_node: The maple big node 3059 */ 3060 static noinline_for_kasan void mas_commit_b_node(struct ma_wr_state *wr_mas, 3061 struct maple_big_node *b_node) 3062 { 3063 enum store_type type = wr_mas->mas->store_type; 3064 3065 WARN_ON_ONCE(type != wr_rebalance && type != wr_split_store); 3066 3067 if (type == wr_rebalance) 3068 return mas_rebalance(wr_mas->mas, b_node); 3069 3070 return mas_split(wr_mas->mas, b_node); 3071 } 3072 3073 /* 3074 * mas_root_expand() - Expand a root to a node 3075 * @mas: The maple state 3076 * @entry: The entry to store into the tree 3077 */ 3078 static inline void mas_root_expand(struct ma_state *mas, void *entry) 3079 { 3080 void *contents = mas_root_locked(mas); 3081 enum maple_type type = maple_leaf_64; 3082 struct maple_node *node; 3083 void __rcu **slots; 3084 unsigned long *pivots; 3085 int slot = 0; 3086 3087 node = mas_pop_node(mas); 3088 pivots = ma_pivots(node, type); 3089 slots = ma_slots(node, type); 3090 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3091 mas->node = mt_mk_node(node, type); 3092 mas->status = ma_active; 3093 3094 if (mas->index) { 3095 if (contents) { 3096 rcu_assign_pointer(slots[slot], contents); 3097 if (likely(mas->index > 1)) 3098 slot++; 3099 } 3100 pivots[slot++] = mas->index - 1; 3101 } 3102 3103 rcu_assign_pointer(slots[slot], entry); 3104 mas->offset = slot; 3105 pivots[slot] = mas->last; 3106 if (mas->last != ULONG_MAX) 3107 pivots[++slot] = ULONG_MAX; 3108 3109 mt_set_height(mas->tree, 1); 3110 ma_set_meta(node, maple_leaf_64, 0, slot); 3111 /* swap the new root into the tree */ 3112 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3113 return; 3114 } 3115 3116 /* 3117 * mas_store_root() - Storing value into root. 3118 * @mas: The maple state 3119 * @entry: The entry to store. 3120 * 3121 * There is no root node now and we are storing a value into the root - this 3122 * function either assigns the pointer or expands into a node. 3123 */ 3124 static inline void mas_store_root(struct ma_state *mas, void *entry) 3125 { 3126 if (!entry) { 3127 if (!mas->index) 3128 rcu_assign_pointer(mas->tree->ma_root, NULL); 3129 } else if (likely((mas->last != 0) || (mas->index != 0))) 3130 mas_root_expand(mas, entry); 3131 else if (((unsigned long) (entry) & 3) == 2) 3132 mas_root_expand(mas, entry); 3133 else { 3134 rcu_assign_pointer(mas->tree->ma_root, entry); 3135 mas->status = ma_start; 3136 } 3137 } 3138 3139 /* 3140 * mas_is_span_wr() - Check if the write needs to be treated as a write that 3141 * spans the node. 3142 * @wr_mas: The maple write state 3143 * 3144 * Spanning writes are writes that start in one node and end in another OR if 3145 * the write of a %NULL will cause the node to end with a %NULL. 3146 * 3147 * Return: True if this is a spanning write, false otherwise. 3148 */ 3149 static bool mas_is_span_wr(struct ma_wr_state *wr_mas) 3150 { 3151 unsigned long max = wr_mas->r_max; 3152 unsigned long last = wr_mas->mas->last; 3153 enum maple_type type = wr_mas->type; 3154 void *entry = wr_mas->entry; 3155 3156 /* Contained in this pivot, fast path */ 3157 if (last < max) 3158 return false; 3159 3160 if (ma_is_leaf(type)) { 3161 max = wr_mas->mas->max; 3162 if (last < max) 3163 return false; 3164 } 3165 3166 if (last == max) { 3167 /* 3168 * The last entry of leaf node cannot be NULL unless it is the 3169 * rightmost node (writing ULONG_MAX), otherwise it spans slots. 3170 */ 3171 if (entry || last == ULONG_MAX) 3172 return false; 3173 } 3174 3175 trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry); 3176 return true; 3177 } 3178 3179 static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas) 3180 { 3181 wr_mas->type = mte_node_type(wr_mas->mas->node); 3182 mas_wr_node_walk(wr_mas); 3183 wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type); 3184 } 3185 3186 static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas) 3187 { 3188 wr_mas->mas->max = wr_mas->r_max; 3189 wr_mas->mas->min = wr_mas->r_min; 3190 wr_mas->mas->node = wr_mas->content; 3191 wr_mas->mas->offset = 0; 3192 wr_mas->mas->depth++; 3193 } 3194 /* 3195 * mas_wr_walk() - Walk the tree for a write. 3196 * @wr_mas: The maple write state 3197 * 3198 * Uses mas_slot_locked() and does not need to worry about dead nodes. 3199 * 3200 * Return: True if it's contained in a node, false on spanning write. 3201 */ 3202 static bool mas_wr_walk(struct ma_wr_state *wr_mas) 3203 { 3204 struct ma_state *mas = wr_mas->mas; 3205 3206 while (true) { 3207 mas_wr_walk_descend(wr_mas); 3208 if (unlikely(mas_is_span_wr(wr_mas))) 3209 return false; 3210 3211 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3212 mas->offset); 3213 if (ma_is_leaf(wr_mas->type)) 3214 return true; 3215 3216 if (mas->end < mt_slots[wr_mas->type] - 1) 3217 wr_mas->vacant_height = mas->depth + 1; 3218 3219 if (ma_is_root(mas_mn(mas))) { 3220 /* root needs more than 2 entries to be sufficient + 1 */ 3221 if (mas->end > 2) 3222 wr_mas->sufficient_height = 1; 3223 } else if (mas->end > mt_min_slots[wr_mas->type] + 1) 3224 wr_mas->sufficient_height = mas->depth + 1; 3225 3226 mas_wr_walk_traverse(wr_mas); 3227 } 3228 3229 return true; 3230 } 3231 3232 static void mas_wr_walk_index(struct ma_wr_state *wr_mas) 3233 { 3234 struct ma_state *mas = wr_mas->mas; 3235 3236 while (true) { 3237 mas_wr_walk_descend(wr_mas); 3238 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3239 mas->offset); 3240 if (ma_is_leaf(wr_mas->type)) 3241 return; 3242 mas_wr_walk_traverse(wr_mas); 3243 } 3244 } 3245 /* 3246 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs. 3247 * @l_wr_mas: The left maple write state 3248 * @r_wr_mas: The right maple write state 3249 */ 3250 static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas, 3251 struct ma_wr_state *r_wr_mas) 3252 { 3253 struct ma_state *r_mas = r_wr_mas->mas; 3254 struct ma_state *l_mas = l_wr_mas->mas; 3255 unsigned char l_slot; 3256 3257 l_slot = l_mas->offset; 3258 if (!l_wr_mas->content) 3259 l_mas->index = l_wr_mas->r_min; 3260 3261 if ((l_mas->index == l_wr_mas->r_min) && 3262 (l_slot && 3263 !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) { 3264 if (l_slot > 1) 3265 l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1; 3266 else 3267 l_mas->index = l_mas->min; 3268 3269 l_mas->offset = l_slot - 1; 3270 } 3271 3272 if (!r_wr_mas->content) { 3273 if (r_mas->last < r_wr_mas->r_max) 3274 r_mas->last = r_wr_mas->r_max; 3275 r_mas->offset++; 3276 } else if ((r_mas->last == r_wr_mas->r_max) && 3277 (r_mas->last < r_mas->max) && 3278 !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) { 3279 r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots, 3280 r_wr_mas->type, r_mas->offset + 1); 3281 r_mas->offset++; 3282 } 3283 } 3284 3285 static inline void *mas_state_walk(struct ma_state *mas) 3286 { 3287 void *entry; 3288 3289 entry = mas_start(mas); 3290 if (mas_is_none(mas)) 3291 return NULL; 3292 3293 if (mas_is_ptr(mas)) 3294 return entry; 3295 3296 return mtree_range_walk(mas); 3297 } 3298 3299 /* 3300 * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up 3301 * to date. 3302 * 3303 * @mas: The maple state. 3304 * 3305 * Note: Leaves mas in undesirable state. 3306 * Return: The entry for @mas->index or %NULL on dead node. 3307 */ 3308 static inline void *mtree_lookup_walk(struct ma_state *mas) 3309 { 3310 unsigned long *pivots; 3311 unsigned char offset; 3312 struct maple_node *node; 3313 struct maple_enode *next; 3314 enum maple_type type; 3315 void __rcu **slots; 3316 unsigned char end; 3317 3318 next = mas->node; 3319 do { 3320 node = mte_to_node(next); 3321 type = mte_node_type(next); 3322 pivots = ma_pivots(node, type); 3323 end = mt_pivots[type]; 3324 offset = 0; 3325 do { 3326 if (pivots[offset] >= mas->index) 3327 break; 3328 } while (++offset < end); 3329 3330 slots = ma_slots(node, type); 3331 next = mt_slot(mas->tree, slots, offset); 3332 if (unlikely(ma_dead_node(node))) 3333 goto dead_node; 3334 } while (!ma_is_leaf(type)); 3335 3336 return (void *)next; 3337 3338 dead_node: 3339 mas_reset(mas); 3340 return NULL; 3341 } 3342 3343 static void mte_destroy_walk(struct maple_enode *, struct maple_tree *); 3344 /* 3345 * mas_new_root() - Create a new root node that only contains the entry passed 3346 * in. 3347 * @mas: The maple state 3348 * @entry: The entry to store. 3349 * 3350 * Only valid when the index == 0 and the last == ULONG_MAX 3351 */ 3352 static inline void mas_new_root(struct ma_state *mas, void *entry) 3353 { 3354 struct maple_enode *root = mas_root_locked(mas); 3355 enum maple_type type = maple_leaf_64; 3356 struct maple_node *node; 3357 void __rcu **slots; 3358 unsigned long *pivots; 3359 3360 WARN_ON_ONCE(mas->index || mas->last != ULONG_MAX); 3361 3362 if (!entry) { 3363 mt_set_height(mas->tree, 0); 3364 rcu_assign_pointer(mas->tree->ma_root, entry); 3365 mas->status = ma_start; 3366 goto done; 3367 } 3368 3369 node = mas_pop_node(mas); 3370 pivots = ma_pivots(node, type); 3371 slots = ma_slots(node, type); 3372 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3373 mas->node = mt_mk_node(node, type); 3374 mas->status = ma_active; 3375 rcu_assign_pointer(slots[0], entry); 3376 pivots[0] = mas->last; 3377 mt_set_height(mas->tree, 1); 3378 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3379 3380 done: 3381 if (xa_is_node(root)) 3382 mte_destroy_walk(root, mas->tree); 3383 3384 return; 3385 } 3386 /* 3387 * mas_wr_spanning_store() - Create a subtree with the store operation completed 3388 * and new nodes where necessary, then place the sub-tree in the actual tree. 3389 * Note that mas is expected to point to the node which caused the store to 3390 * span. 3391 * @wr_mas: The maple write state 3392 */ 3393 static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas) 3394 { 3395 struct maple_subtree_state mast; 3396 struct maple_big_node b_node; 3397 struct ma_state *mas; 3398 unsigned char height; 3399 3400 /* Left and Right side of spanning store */ 3401 MA_STATE(l_mas, NULL, 0, 0); 3402 MA_STATE(r_mas, NULL, 0, 0); 3403 MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry); 3404 MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry); 3405 3406 /* 3407 * A store operation that spans multiple nodes is called a spanning 3408 * store and is handled early in the store call stack by the function 3409 * mas_is_span_wr(). When a spanning store is identified, the maple 3410 * state is duplicated. The first maple state walks the left tree path 3411 * to ``index``, the duplicate walks the right tree path to ``last``. 3412 * The data in the two nodes are combined into a single node, two nodes, 3413 * or possibly three nodes (see the 3-way split above). A ``NULL`` 3414 * written to the last entry of a node is considered a spanning store as 3415 * a rebalance is required for the operation to complete and an overflow 3416 * of data may happen. 3417 */ 3418 mas = wr_mas->mas; 3419 trace_ma_op(__func__, mas); 3420 3421 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 3422 return mas_new_root(mas, wr_mas->entry); 3423 /* 3424 * Node rebalancing may occur due to this store, so there may be three new 3425 * entries per level plus a new root. 3426 */ 3427 height = mas_mt_height(mas); 3428 3429 /* 3430 * Set up right side. Need to get to the next offset after the spanning 3431 * store to ensure it's not NULL and to combine both the next node and 3432 * the node with the start together. 3433 */ 3434 r_mas = *mas; 3435 /* Avoid overflow, walk to next slot in the tree. */ 3436 if (r_mas.last + 1) 3437 r_mas.last++; 3438 3439 r_mas.index = r_mas.last; 3440 mas_wr_walk_index(&r_wr_mas); 3441 r_mas.last = r_mas.index = mas->last; 3442 3443 /* Set up left side. */ 3444 l_mas = *mas; 3445 mas_wr_walk_index(&l_wr_mas); 3446 3447 if (!wr_mas->entry) { 3448 mas_extend_spanning_null(&l_wr_mas, &r_wr_mas); 3449 mas->offset = l_mas.offset; 3450 mas->index = l_mas.index; 3451 mas->last = l_mas.last = r_mas.last; 3452 } 3453 3454 /* expanding NULLs may make this cover the entire range */ 3455 if (!l_mas.index && r_mas.last == ULONG_MAX) { 3456 mas_set_range(mas, 0, ULONG_MAX); 3457 return mas_new_root(mas, wr_mas->entry); 3458 } 3459 3460 memset(&b_node, 0, sizeof(struct maple_big_node)); 3461 /* Copy l_mas and store the value in b_node. */ 3462 mas_store_b_node(&l_wr_mas, &b_node, l_mas.end); 3463 /* Copy r_mas into b_node if there is anything to copy. */ 3464 if (r_mas.max > r_mas.last) 3465 mas_mab_cp(&r_mas, r_mas.offset, r_mas.end, 3466 &b_node, b_node.b_end + 1); 3467 else 3468 b_node.b_end++; 3469 3470 /* Stop spanning searches by searching for just index. */ 3471 l_mas.index = l_mas.last = mas->index; 3472 3473 mast.bn = &b_node; 3474 mast.orig_l = &l_mas; 3475 mast.orig_r = &r_mas; 3476 /* Combine l_mas and r_mas and split them up evenly again. */ 3477 return mas_spanning_rebalance(mas, &mast, height + 1); 3478 } 3479 3480 /* 3481 * mas_wr_node_store() - Attempt to store the value in a node 3482 * @wr_mas: The maple write state 3483 * 3484 * Attempts to reuse the node, but may allocate. 3485 */ 3486 static inline void mas_wr_node_store(struct ma_wr_state *wr_mas, 3487 unsigned char new_end) 3488 { 3489 struct ma_state *mas = wr_mas->mas; 3490 void __rcu **dst_slots; 3491 unsigned long *dst_pivots; 3492 unsigned char dst_offset, offset_end = wr_mas->offset_end; 3493 struct maple_node reuse, *newnode; 3494 unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type]; 3495 bool in_rcu = mt_in_rcu(mas->tree); 3496 unsigned char height = mas_mt_height(mas); 3497 3498 if (mas->last == wr_mas->end_piv) 3499 offset_end++; /* don't copy this offset */ 3500 3501 /* set up node. */ 3502 if (in_rcu) { 3503 newnode = mas_pop_node(mas); 3504 } else { 3505 memset(&reuse, 0, sizeof(struct maple_node)); 3506 newnode = &reuse; 3507 } 3508 3509 newnode->parent = mas_mn(mas)->parent; 3510 dst_pivots = ma_pivots(newnode, wr_mas->type); 3511 dst_slots = ma_slots(newnode, wr_mas->type); 3512 /* Copy from start to insert point */ 3513 memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset); 3514 memcpy(dst_slots, wr_mas->slots, sizeof(void *) * mas->offset); 3515 3516 /* Handle insert of new range starting after old range */ 3517 if (wr_mas->r_min < mas->index) { 3518 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content); 3519 dst_pivots[mas->offset++] = mas->index - 1; 3520 } 3521 3522 /* Store the new entry and range end. */ 3523 if (mas->offset < node_pivots) 3524 dst_pivots[mas->offset] = mas->last; 3525 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry); 3526 3527 /* 3528 * this range wrote to the end of the node or it overwrote the rest of 3529 * the data 3530 */ 3531 if (offset_end > mas->end) 3532 goto done; 3533 3534 dst_offset = mas->offset + 1; 3535 /* Copy to the end of node if necessary. */ 3536 copy_size = mas->end - offset_end + 1; 3537 memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end, 3538 sizeof(void *) * copy_size); 3539 memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end, 3540 sizeof(unsigned long) * (copy_size - 1)); 3541 3542 if (new_end < node_pivots) 3543 dst_pivots[new_end] = mas->max; 3544 3545 done: 3546 mas_leaf_set_meta(newnode, maple_leaf_64, new_end); 3547 if (in_rcu) { 3548 struct maple_enode *old_enode = mas->node; 3549 3550 mas->node = mt_mk_node(newnode, wr_mas->type); 3551 mas_replace_node(mas, old_enode, height); 3552 } else { 3553 memcpy(wr_mas->node, newnode, sizeof(struct maple_node)); 3554 } 3555 trace_ma_write(__func__, mas, 0, wr_mas->entry); 3556 mas_update_gap(mas); 3557 mas->end = new_end; 3558 return; 3559 } 3560 3561 /* 3562 * mas_wr_slot_store: Attempt to store a value in a slot. 3563 * @wr_mas: the maple write state 3564 */ 3565 static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas) 3566 { 3567 struct ma_state *mas = wr_mas->mas; 3568 unsigned char offset = mas->offset; 3569 void __rcu **slots = wr_mas->slots; 3570 bool gap = false; 3571 3572 gap |= !mt_slot_locked(mas->tree, slots, offset); 3573 gap |= !mt_slot_locked(mas->tree, slots, offset + 1); 3574 3575 if (wr_mas->offset_end - offset == 1) { 3576 if (mas->index == wr_mas->r_min) { 3577 /* Overwriting the range and a part of the next one */ 3578 rcu_assign_pointer(slots[offset], wr_mas->entry); 3579 wr_mas->pivots[offset] = mas->last; 3580 } else { 3581 /* Overwriting a part of the range and the next one */ 3582 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 3583 wr_mas->pivots[offset] = mas->index - 1; 3584 mas->offset++; /* Keep mas accurate. */ 3585 } 3586 } else { 3587 WARN_ON_ONCE(mt_in_rcu(mas->tree)); 3588 /* 3589 * Expand the range, only partially overwriting the previous and 3590 * next ranges 3591 */ 3592 gap |= !mt_slot_locked(mas->tree, slots, offset + 2); 3593 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 3594 wr_mas->pivots[offset] = mas->index - 1; 3595 wr_mas->pivots[offset + 1] = mas->last; 3596 mas->offset++; /* Keep mas accurate. */ 3597 } 3598 3599 trace_ma_write(__func__, mas, 0, wr_mas->entry); 3600 /* 3601 * Only update gap when the new entry is empty or there is an empty 3602 * entry in the original two ranges. 3603 */ 3604 if (!wr_mas->entry || gap) 3605 mas_update_gap(mas); 3606 3607 return; 3608 } 3609 3610 static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas) 3611 { 3612 struct ma_state *mas = wr_mas->mas; 3613 3614 if (!wr_mas->slots[wr_mas->offset_end]) { 3615 /* If this one is null, the next and prev are not */ 3616 mas->last = wr_mas->end_piv; 3617 } else { 3618 /* Check next slot(s) if we are overwriting the end */ 3619 if ((mas->last == wr_mas->end_piv) && 3620 (mas->end != wr_mas->offset_end) && 3621 !wr_mas->slots[wr_mas->offset_end + 1]) { 3622 wr_mas->offset_end++; 3623 if (wr_mas->offset_end == mas->end) 3624 mas->last = mas->max; 3625 else 3626 mas->last = wr_mas->pivots[wr_mas->offset_end]; 3627 wr_mas->end_piv = mas->last; 3628 } 3629 } 3630 3631 if (!wr_mas->content) { 3632 /* If this one is null, the next and prev are not */ 3633 mas->index = wr_mas->r_min; 3634 } else { 3635 /* Check prev slot if we are overwriting the start */ 3636 if (mas->index == wr_mas->r_min && mas->offset && 3637 !wr_mas->slots[mas->offset - 1]) { 3638 mas->offset--; 3639 wr_mas->r_min = mas->index = 3640 mas_safe_min(mas, wr_mas->pivots, mas->offset); 3641 wr_mas->r_max = wr_mas->pivots[mas->offset]; 3642 } 3643 } 3644 } 3645 3646 static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas) 3647 { 3648 while ((wr_mas->offset_end < wr_mas->mas->end) && 3649 (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end])) 3650 wr_mas->offset_end++; 3651 3652 if (wr_mas->offset_end < wr_mas->mas->end) 3653 wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end]; 3654 else 3655 wr_mas->end_piv = wr_mas->mas->max; 3656 } 3657 3658 static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas) 3659 { 3660 struct ma_state *mas = wr_mas->mas; 3661 unsigned char new_end = mas->end + 2; 3662 3663 new_end -= wr_mas->offset_end - mas->offset; 3664 if (wr_mas->r_min == mas->index) 3665 new_end--; 3666 3667 if (wr_mas->end_piv == mas->last) 3668 new_end--; 3669 3670 return new_end; 3671 } 3672 3673 /* 3674 * mas_wr_append: Attempt to append 3675 * @wr_mas: the maple write state 3676 * @new_end: The end of the node after the modification 3677 * 3678 * This is currently unsafe in rcu mode since the end of the node may be cached 3679 * by readers while the node contents may be updated which could result in 3680 * inaccurate information. 3681 */ 3682 static inline void mas_wr_append(struct ma_wr_state *wr_mas, 3683 unsigned char new_end) 3684 { 3685 struct ma_state *mas = wr_mas->mas; 3686 void __rcu **slots; 3687 unsigned char end = mas->end; 3688 3689 if (new_end < mt_pivots[wr_mas->type]) { 3690 wr_mas->pivots[new_end] = wr_mas->pivots[end]; 3691 ma_set_meta(wr_mas->node, wr_mas->type, 0, new_end); 3692 } 3693 3694 slots = wr_mas->slots; 3695 if (new_end == end + 1) { 3696 if (mas->last == wr_mas->r_max) { 3697 /* Append to end of range */ 3698 rcu_assign_pointer(slots[new_end], wr_mas->entry); 3699 wr_mas->pivots[end] = mas->index - 1; 3700 mas->offset = new_end; 3701 } else { 3702 /* Append to start of range */ 3703 rcu_assign_pointer(slots[new_end], wr_mas->content); 3704 wr_mas->pivots[end] = mas->last; 3705 rcu_assign_pointer(slots[end], wr_mas->entry); 3706 } 3707 } else { 3708 /* Append to the range without touching any boundaries. */ 3709 rcu_assign_pointer(slots[new_end], wr_mas->content); 3710 wr_mas->pivots[end + 1] = mas->last; 3711 rcu_assign_pointer(slots[end + 1], wr_mas->entry); 3712 wr_mas->pivots[end] = mas->index - 1; 3713 mas->offset = end + 1; 3714 } 3715 3716 if (!wr_mas->content || !wr_mas->entry) 3717 mas_update_gap(mas); 3718 3719 mas->end = new_end; 3720 trace_ma_write(__func__, mas, new_end, wr_mas->entry); 3721 return; 3722 } 3723 3724 /* 3725 * mas_wr_bnode() - Slow path for a modification. 3726 * @wr_mas: The write maple state 3727 * 3728 * This is where split, rebalance end up. 3729 */ 3730 static void mas_wr_bnode(struct ma_wr_state *wr_mas) 3731 { 3732 struct maple_big_node b_node; 3733 3734 trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry); 3735 memset(&b_node, 0, sizeof(struct maple_big_node)); 3736 mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end); 3737 mas_commit_b_node(wr_mas, &b_node); 3738 } 3739 3740 /* 3741 * mas_wr_store_entry() - Internal call to store a value 3742 * @wr_mas: The maple write state 3743 */ 3744 static inline void mas_wr_store_entry(struct ma_wr_state *wr_mas) 3745 { 3746 struct ma_state *mas = wr_mas->mas; 3747 unsigned char new_end = mas_wr_new_end(wr_mas); 3748 3749 switch (mas->store_type) { 3750 case wr_exact_fit: 3751 rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry); 3752 if (!!wr_mas->entry ^ !!wr_mas->content) 3753 mas_update_gap(mas); 3754 break; 3755 case wr_append: 3756 mas_wr_append(wr_mas, new_end); 3757 break; 3758 case wr_slot_store: 3759 mas_wr_slot_store(wr_mas); 3760 break; 3761 case wr_node_store: 3762 mas_wr_node_store(wr_mas, new_end); 3763 break; 3764 case wr_spanning_store: 3765 mas_wr_spanning_store(wr_mas); 3766 break; 3767 case wr_split_store: 3768 case wr_rebalance: 3769 mas_wr_bnode(wr_mas); 3770 break; 3771 case wr_new_root: 3772 mas_new_root(mas, wr_mas->entry); 3773 break; 3774 case wr_store_root: 3775 mas_store_root(mas, wr_mas->entry); 3776 break; 3777 case wr_invalid: 3778 MT_BUG_ON(mas->tree, 1); 3779 } 3780 3781 return; 3782 } 3783 3784 static inline void mas_wr_prealloc_setup(struct ma_wr_state *wr_mas) 3785 { 3786 struct ma_state *mas = wr_mas->mas; 3787 3788 if (!mas_is_active(mas)) { 3789 if (mas_is_start(mas)) 3790 goto set_content; 3791 3792 if (unlikely(mas_is_paused(mas))) 3793 goto reset; 3794 3795 if (unlikely(mas_is_none(mas))) 3796 goto reset; 3797 3798 if (unlikely(mas_is_overflow(mas))) 3799 goto reset; 3800 3801 if (unlikely(mas_is_underflow(mas))) 3802 goto reset; 3803 } 3804 3805 /* 3806 * A less strict version of mas_is_span_wr() where we allow spanning 3807 * writes within this node. This is to stop partial walks in 3808 * mas_prealloc() from being reset. 3809 */ 3810 if (mas->last > mas->max) 3811 goto reset; 3812 3813 if (wr_mas->entry) 3814 goto set_content; 3815 3816 if (mte_is_leaf(mas->node) && mas->last == mas->max) 3817 goto reset; 3818 3819 goto set_content; 3820 3821 reset: 3822 mas_reset(mas); 3823 set_content: 3824 wr_mas->content = mas_start(mas); 3825 } 3826 3827 /** 3828 * mas_prealloc_calc() - Calculate number of nodes needed for a 3829 * given store oepration 3830 * @wr_mas: The maple write state 3831 * @entry: The entry to store into the tree 3832 * 3833 * Return: Number of nodes required for preallocation. 3834 */ 3835 static inline void mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry) 3836 { 3837 struct ma_state *mas = wr_mas->mas; 3838 unsigned char height = mas_mt_height(mas); 3839 int ret = height * 3 + 1; 3840 unsigned char delta = height - wr_mas->vacant_height; 3841 3842 switch (mas->store_type) { 3843 case wr_exact_fit: 3844 case wr_append: 3845 case wr_slot_store: 3846 ret = 0; 3847 break; 3848 case wr_spanning_store: 3849 if (wr_mas->sufficient_height < wr_mas->vacant_height) 3850 ret = (height - wr_mas->sufficient_height) * 3 + 1; 3851 else 3852 ret = delta * 3 + 1; 3853 break; 3854 case wr_split_store: 3855 ret = delta * 2 + 1; 3856 break; 3857 case wr_rebalance: 3858 if (wr_mas->sufficient_height < wr_mas->vacant_height) 3859 ret = (height - wr_mas->sufficient_height) * 2 + 1; 3860 else 3861 ret = delta * 2 + 1; 3862 break; 3863 case wr_node_store: 3864 ret = mt_in_rcu(mas->tree) ? 1 : 0; 3865 break; 3866 case wr_new_root: 3867 ret = 1; 3868 break; 3869 case wr_store_root: 3870 if (likely((mas->last != 0) || (mas->index != 0))) 3871 ret = 1; 3872 else if (((unsigned long) (entry) & 3) == 2) 3873 ret = 1; 3874 else 3875 ret = 0; 3876 break; 3877 case wr_invalid: 3878 WARN_ON_ONCE(1); 3879 } 3880 3881 mas->node_request = ret; 3882 } 3883 3884 /* 3885 * mas_wr_store_type() - Determine the store type for a given 3886 * store operation. 3887 * @wr_mas: The maple write state 3888 * 3889 * Return: the type of store needed for the operation 3890 */ 3891 static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas) 3892 { 3893 struct ma_state *mas = wr_mas->mas; 3894 unsigned char new_end; 3895 3896 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 3897 return wr_store_root; 3898 3899 if (unlikely(!mas_wr_walk(wr_mas))) 3900 return wr_spanning_store; 3901 3902 /* At this point, we are at the leaf node that needs to be altered. */ 3903 mas_wr_end_piv(wr_mas); 3904 if (!wr_mas->entry) 3905 mas_wr_extend_null(wr_mas); 3906 3907 if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last)) 3908 return wr_exact_fit; 3909 3910 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 3911 return wr_new_root; 3912 3913 new_end = mas_wr_new_end(wr_mas); 3914 /* Potential spanning rebalance collapsing a node */ 3915 if (new_end < mt_min_slots[wr_mas->type]) { 3916 if (!mte_is_root(mas->node)) 3917 return wr_rebalance; 3918 return wr_node_store; 3919 } 3920 3921 if (new_end >= mt_slots[wr_mas->type]) 3922 return wr_split_store; 3923 3924 if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end)) 3925 return wr_append; 3926 3927 if ((new_end == mas->end) && (!mt_in_rcu(mas->tree) || 3928 (wr_mas->offset_end - mas->offset == 1))) 3929 return wr_slot_store; 3930 3931 return wr_node_store; 3932 } 3933 3934 /** 3935 * mas_wr_preallocate() - Preallocate enough nodes for a store operation 3936 * @wr_mas: The maple write state 3937 * @entry: The entry that will be stored 3938 * 3939 */ 3940 static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry) 3941 { 3942 struct ma_state *mas = wr_mas->mas; 3943 3944 mas_wr_prealloc_setup(wr_mas); 3945 mas->store_type = mas_wr_store_type(wr_mas); 3946 mas_prealloc_calc(wr_mas, entry); 3947 if (!mas->node_request) 3948 return; 3949 3950 mas_alloc_nodes(mas, GFP_NOWAIT); 3951 } 3952 3953 /** 3954 * mas_insert() - Internal call to insert a value 3955 * @mas: The maple state 3956 * @entry: The entry to store 3957 * 3958 * Return: %NULL or the contents that already exists at the requested index 3959 * otherwise. The maple state needs to be checked for error conditions. 3960 */ 3961 static inline void *mas_insert(struct ma_state *mas, void *entry) 3962 { 3963 MA_WR_STATE(wr_mas, mas, entry); 3964 3965 /* 3966 * Inserting a new range inserts either 0, 1, or 2 pivots within the 3967 * tree. If the insert fits exactly into an existing gap with a value 3968 * of NULL, then the slot only needs to be written with the new value. 3969 * If the range being inserted is adjacent to another range, then only a 3970 * single pivot needs to be inserted (as well as writing the entry). If 3971 * the new range is within a gap but does not touch any other ranges, 3972 * then two pivots need to be inserted: the start - 1, and the end. As 3973 * usual, the entry must be written. Most operations require a new node 3974 * to be allocated and replace an existing node to ensure RCU safety, 3975 * when in RCU mode. The exception to requiring a newly allocated node 3976 * is when inserting at the end of a node (appending). When done 3977 * carefully, appending can reuse the node in place. 3978 */ 3979 wr_mas.content = mas_start(mas); 3980 if (wr_mas.content) 3981 goto exists; 3982 3983 mas_wr_preallocate(&wr_mas, entry); 3984 if (mas_is_err(mas)) 3985 return NULL; 3986 3987 /* spanning writes always overwrite something */ 3988 if (mas->store_type == wr_spanning_store) 3989 goto exists; 3990 3991 /* At this point, we are at the leaf node that needs to be altered. */ 3992 if (mas->store_type != wr_new_root && mas->store_type != wr_store_root) { 3993 wr_mas.offset_end = mas->offset; 3994 wr_mas.end_piv = wr_mas.r_max; 3995 3996 if (wr_mas.content || (mas->last > wr_mas.r_max)) 3997 goto exists; 3998 } 3999 4000 mas_wr_store_entry(&wr_mas); 4001 return wr_mas.content; 4002 4003 exists: 4004 mas_set_err(mas, -EEXIST); 4005 return wr_mas.content; 4006 4007 } 4008 4009 /** 4010 * mas_alloc_cyclic() - Internal call to find somewhere to store an entry 4011 * @mas: The maple state. 4012 * @startp: Pointer to ID. 4013 * @range_lo: Lower bound of range to search. 4014 * @range_hi: Upper bound of range to search. 4015 * @entry: The entry to store. 4016 * @next: Pointer to next ID to allocate. 4017 * @gfp: The GFP_FLAGS to use for allocations. 4018 * 4019 * Return: 0 if the allocation succeeded without wrapping, 1 if the 4020 * allocation succeeded after wrapping, or -EBUSY if there are no 4021 * free entries. 4022 */ 4023 int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp, 4024 void *entry, unsigned long range_lo, unsigned long range_hi, 4025 unsigned long *next, gfp_t gfp) 4026 { 4027 unsigned long min = range_lo; 4028 int ret = 0; 4029 4030 range_lo = max(min, *next); 4031 ret = mas_empty_area(mas, range_lo, range_hi, 1); 4032 if ((mas->tree->ma_flags & MT_FLAGS_ALLOC_WRAPPED) && ret == 0) { 4033 mas->tree->ma_flags &= ~MT_FLAGS_ALLOC_WRAPPED; 4034 ret = 1; 4035 } 4036 if (ret < 0 && range_lo > min) { 4037 mas_reset(mas); 4038 ret = mas_empty_area(mas, min, range_hi, 1); 4039 if (ret == 0) 4040 ret = 1; 4041 } 4042 if (ret < 0) 4043 return ret; 4044 4045 do { 4046 mas_insert(mas, entry); 4047 } while (mas_nomem(mas, gfp)); 4048 if (mas_is_err(mas)) 4049 return xa_err(mas->node); 4050 4051 *startp = mas->index; 4052 *next = *startp + 1; 4053 if (*next == 0) 4054 mas->tree->ma_flags |= MT_FLAGS_ALLOC_WRAPPED; 4055 4056 mas_destroy(mas); 4057 return ret; 4058 } 4059 EXPORT_SYMBOL(mas_alloc_cyclic); 4060 4061 static __always_inline void mas_rewalk(struct ma_state *mas, unsigned long index) 4062 { 4063 retry: 4064 mas_set(mas, index); 4065 mas_state_walk(mas); 4066 if (mas_is_start(mas)) 4067 goto retry; 4068 } 4069 4070 static __always_inline bool mas_rewalk_if_dead(struct ma_state *mas, 4071 struct maple_node *node, const unsigned long index) 4072 { 4073 if (unlikely(ma_dead_node(node))) { 4074 mas_rewalk(mas, index); 4075 return true; 4076 } 4077 return false; 4078 } 4079 4080 /* 4081 * mas_prev_node() - Find the prev non-null entry at the same level in the 4082 * tree. The prev value will be mas->node[mas->offset] or the status will be 4083 * ma_none. 4084 * @mas: The maple state 4085 * @min: The lower limit to search 4086 * 4087 * The prev node value will be mas->node[mas->offset] or the status will be 4088 * ma_none. 4089 * Return: 1 if the node is dead, 0 otherwise. 4090 */ 4091 static int mas_prev_node(struct ma_state *mas, unsigned long min) 4092 { 4093 enum maple_type mt; 4094 int offset, level; 4095 void __rcu **slots; 4096 struct maple_node *node; 4097 unsigned long *pivots; 4098 unsigned long max; 4099 4100 node = mas_mn(mas); 4101 if (!mas->min) 4102 goto no_entry; 4103 4104 max = mas->min - 1; 4105 if (max < min) 4106 goto no_entry; 4107 4108 level = 0; 4109 do { 4110 if (ma_is_root(node)) 4111 goto no_entry; 4112 4113 /* Walk up. */ 4114 if (unlikely(mas_ascend(mas))) 4115 return 1; 4116 offset = mas->offset; 4117 level++; 4118 node = mas_mn(mas); 4119 } while (!offset); 4120 4121 offset--; 4122 mt = mte_node_type(mas->node); 4123 while (level > 1) { 4124 level--; 4125 slots = ma_slots(node, mt); 4126 mas->node = mas_slot(mas, slots, offset); 4127 if (unlikely(ma_dead_node(node))) 4128 return 1; 4129 4130 mt = mte_node_type(mas->node); 4131 node = mas_mn(mas); 4132 pivots = ma_pivots(node, mt); 4133 offset = ma_data_end(node, mt, pivots, max); 4134 if (unlikely(ma_dead_node(node))) 4135 return 1; 4136 } 4137 4138 slots = ma_slots(node, mt); 4139 mas->node = mas_slot(mas, slots, offset); 4140 pivots = ma_pivots(node, mt); 4141 if (unlikely(ma_dead_node(node))) 4142 return 1; 4143 4144 if (likely(offset)) 4145 mas->min = pivots[offset - 1] + 1; 4146 mas->max = max; 4147 mas->offset = mas_data_end(mas); 4148 if (unlikely(mte_dead_node(mas->node))) 4149 return 1; 4150 4151 mas->end = mas->offset; 4152 return 0; 4153 4154 no_entry: 4155 if (unlikely(ma_dead_node(node))) 4156 return 1; 4157 4158 mas->status = ma_underflow; 4159 return 0; 4160 } 4161 4162 /* 4163 * mas_prev_slot() - Get the entry in the previous slot 4164 * 4165 * @mas: The maple state 4166 * @min: The minimum starting range 4167 * @empty: Can be empty 4168 * 4169 * Return: The entry in the previous slot which is possibly NULL 4170 */ 4171 static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty) 4172 { 4173 void *entry; 4174 void __rcu **slots; 4175 unsigned long pivot; 4176 enum maple_type type; 4177 unsigned long *pivots; 4178 struct maple_node *node; 4179 unsigned long save_point = mas->index; 4180 4181 retry: 4182 node = mas_mn(mas); 4183 type = mte_node_type(mas->node); 4184 pivots = ma_pivots(node, type); 4185 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4186 goto retry; 4187 4188 if (mas->min <= min) { 4189 pivot = mas_safe_min(mas, pivots, mas->offset); 4190 4191 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4192 goto retry; 4193 4194 if (pivot <= min) 4195 goto underflow; 4196 } 4197 4198 again: 4199 if (likely(mas->offset)) { 4200 mas->offset--; 4201 mas->last = mas->index - 1; 4202 mas->index = mas_safe_min(mas, pivots, mas->offset); 4203 } else { 4204 if (mas->index <= min) 4205 goto underflow; 4206 4207 if (mas_prev_node(mas, min)) { 4208 mas_rewalk(mas, save_point); 4209 goto retry; 4210 } 4211 4212 if (WARN_ON_ONCE(mas_is_underflow(mas))) 4213 return NULL; 4214 4215 mas->last = mas->max; 4216 node = mas_mn(mas); 4217 type = mte_node_type(mas->node); 4218 pivots = ma_pivots(node, type); 4219 mas->index = pivots[mas->offset - 1] + 1; 4220 } 4221 4222 slots = ma_slots(node, type); 4223 entry = mas_slot(mas, slots, mas->offset); 4224 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4225 goto retry; 4226 4227 if (likely(entry)) 4228 return entry; 4229 4230 if (!empty) { 4231 if (mas->index <= min) 4232 goto underflow; 4233 4234 goto again; 4235 } 4236 4237 return entry; 4238 4239 underflow: 4240 mas->status = ma_underflow; 4241 return NULL; 4242 } 4243 4244 /* 4245 * mas_next_node() - Get the next node at the same level in the tree. 4246 * @mas: The maple state 4247 * @node: The maple node 4248 * @max: The maximum pivot value to check. 4249 * 4250 * The next value will be mas->node[mas->offset] or the status will have 4251 * overflowed. 4252 * Return: 1 on dead node, 0 otherwise. 4253 */ 4254 static int mas_next_node(struct ma_state *mas, struct maple_node *node, 4255 unsigned long max) 4256 { 4257 unsigned long min; 4258 unsigned long *pivots; 4259 struct maple_enode *enode; 4260 struct maple_node *tmp; 4261 int level = 0; 4262 unsigned char node_end; 4263 enum maple_type mt; 4264 void __rcu **slots; 4265 4266 if (mas->max >= max) 4267 goto overflow; 4268 4269 min = mas->max + 1; 4270 level = 0; 4271 do { 4272 if (ma_is_root(node)) 4273 goto overflow; 4274 4275 /* Walk up. */ 4276 if (unlikely(mas_ascend(mas))) 4277 return 1; 4278 4279 level++; 4280 node = mas_mn(mas); 4281 mt = mte_node_type(mas->node); 4282 pivots = ma_pivots(node, mt); 4283 node_end = ma_data_end(node, mt, pivots, mas->max); 4284 if (unlikely(ma_dead_node(node))) 4285 return 1; 4286 4287 } while (unlikely(mas->offset == node_end)); 4288 4289 slots = ma_slots(node, mt); 4290 mas->offset++; 4291 enode = mas_slot(mas, slots, mas->offset); 4292 if (unlikely(ma_dead_node(node))) 4293 return 1; 4294 4295 if (level > 1) 4296 mas->offset = 0; 4297 4298 while (unlikely(level > 1)) { 4299 level--; 4300 mas->node = enode; 4301 node = mas_mn(mas); 4302 mt = mte_node_type(mas->node); 4303 slots = ma_slots(node, mt); 4304 enode = mas_slot(mas, slots, 0); 4305 if (unlikely(ma_dead_node(node))) 4306 return 1; 4307 } 4308 4309 if (!mas->offset) 4310 pivots = ma_pivots(node, mt); 4311 4312 mas->max = mas_safe_pivot(mas, pivots, mas->offset, mt); 4313 tmp = mte_to_node(enode); 4314 mt = mte_node_type(enode); 4315 pivots = ma_pivots(tmp, mt); 4316 mas->end = ma_data_end(tmp, mt, pivots, mas->max); 4317 if (unlikely(ma_dead_node(node))) 4318 return 1; 4319 4320 mas->node = enode; 4321 mas->min = min; 4322 return 0; 4323 4324 overflow: 4325 if (unlikely(ma_dead_node(node))) 4326 return 1; 4327 4328 mas->status = ma_overflow; 4329 return 0; 4330 } 4331 4332 /* 4333 * mas_next_slot() - Get the entry in the next slot 4334 * 4335 * @mas: The maple state 4336 * @max: The maximum starting range 4337 * @empty: Can be empty 4338 * 4339 * Return: The entry in the next slot which is possibly NULL 4340 */ 4341 static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty) 4342 { 4343 void __rcu **slots; 4344 unsigned long *pivots; 4345 unsigned long pivot; 4346 enum maple_type type; 4347 struct maple_node *node; 4348 unsigned long save_point = mas->last; 4349 void *entry; 4350 4351 retry: 4352 node = mas_mn(mas); 4353 type = mte_node_type(mas->node); 4354 pivots = ma_pivots(node, type); 4355 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4356 goto retry; 4357 4358 if (mas->max >= max) { 4359 if (likely(mas->offset < mas->end)) 4360 pivot = pivots[mas->offset]; 4361 else 4362 pivot = mas->max; 4363 4364 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4365 goto retry; 4366 4367 if (pivot >= max) { /* Was at the limit, next will extend beyond */ 4368 mas->status = ma_overflow; 4369 return NULL; 4370 } 4371 } 4372 4373 if (likely(mas->offset < mas->end)) { 4374 mas->index = pivots[mas->offset] + 1; 4375 again: 4376 mas->offset++; 4377 if (likely(mas->offset < mas->end)) 4378 mas->last = pivots[mas->offset]; 4379 else 4380 mas->last = mas->max; 4381 } else { 4382 if (mas->last >= max) { 4383 mas->status = ma_overflow; 4384 return NULL; 4385 } 4386 4387 if (mas_next_node(mas, node, max)) { 4388 mas_rewalk(mas, save_point); 4389 goto retry; 4390 } 4391 4392 if (WARN_ON_ONCE(mas_is_overflow(mas))) 4393 return NULL; 4394 4395 mas->offset = 0; 4396 mas->index = mas->min; 4397 node = mas_mn(mas); 4398 type = mte_node_type(mas->node); 4399 pivots = ma_pivots(node, type); 4400 mas->last = pivots[0]; 4401 } 4402 4403 slots = ma_slots(node, type); 4404 entry = mt_slot(mas->tree, slots, mas->offset); 4405 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4406 goto retry; 4407 4408 if (entry) 4409 return entry; 4410 4411 4412 if (!empty) { 4413 if (mas->last >= max) { 4414 mas->status = ma_overflow; 4415 return NULL; 4416 } 4417 4418 mas->index = mas->last + 1; 4419 goto again; 4420 } 4421 4422 return entry; 4423 } 4424 4425 /* 4426 * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the 4427 * highest gap address of a given size in a given node and descend. 4428 * @mas: The maple state 4429 * @size: The needed size. 4430 * 4431 * Return: True if found in a leaf, false otherwise. 4432 * 4433 */ 4434 static bool mas_rev_awalk(struct ma_state *mas, unsigned long size, 4435 unsigned long *gap_min, unsigned long *gap_max) 4436 { 4437 enum maple_type type = mte_node_type(mas->node); 4438 struct maple_node *node = mas_mn(mas); 4439 unsigned long *pivots, *gaps; 4440 void __rcu **slots; 4441 unsigned long gap = 0; 4442 unsigned long max, min; 4443 unsigned char offset; 4444 4445 if (unlikely(mas_is_err(mas))) 4446 return true; 4447 4448 if (ma_is_dense(type)) { 4449 /* dense nodes. */ 4450 mas->offset = (unsigned char)(mas->index - mas->min); 4451 return true; 4452 } 4453 4454 pivots = ma_pivots(node, type); 4455 slots = ma_slots(node, type); 4456 gaps = ma_gaps(node, type); 4457 offset = mas->offset; 4458 min = mas_safe_min(mas, pivots, offset); 4459 /* Skip out of bounds. */ 4460 while (mas->last < min) 4461 min = mas_safe_min(mas, pivots, --offset); 4462 4463 max = mas_safe_pivot(mas, pivots, offset, type); 4464 while (mas->index <= max) { 4465 gap = 0; 4466 if (gaps) 4467 gap = gaps[offset]; 4468 else if (!mas_slot(mas, slots, offset)) 4469 gap = max - min + 1; 4470 4471 if (gap) { 4472 if ((size <= gap) && (size <= mas->last - min + 1)) 4473 break; 4474 4475 if (!gaps) { 4476 /* Skip the next slot, it cannot be a gap. */ 4477 if (offset < 2) 4478 goto ascend; 4479 4480 offset -= 2; 4481 max = pivots[offset]; 4482 min = mas_safe_min(mas, pivots, offset); 4483 continue; 4484 } 4485 } 4486 4487 if (!offset) 4488 goto ascend; 4489 4490 offset--; 4491 max = min - 1; 4492 min = mas_safe_min(mas, pivots, offset); 4493 } 4494 4495 if (unlikely((mas->index > max) || (size - 1 > max - mas->index))) 4496 goto no_space; 4497 4498 if (unlikely(ma_is_leaf(type))) { 4499 mas->offset = offset; 4500 *gap_min = min; 4501 *gap_max = min + gap - 1; 4502 return true; 4503 } 4504 4505 /* descend, only happens under lock. */ 4506 mas->node = mas_slot(mas, slots, offset); 4507 mas->min = min; 4508 mas->max = max; 4509 mas->offset = mas_data_end(mas); 4510 return false; 4511 4512 ascend: 4513 if (!mte_is_root(mas->node)) 4514 return false; 4515 4516 no_space: 4517 mas_set_err(mas, -EBUSY); 4518 return false; 4519 } 4520 4521 static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) 4522 { 4523 enum maple_type type = mte_node_type(mas->node); 4524 unsigned long pivot, min, gap = 0; 4525 unsigned char offset, data_end; 4526 unsigned long *gaps, *pivots; 4527 void __rcu **slots; 4528 struct maple_node *node; 4529 bool found = false; 4530 4531 if (ma_is_dense(type)) { 4532 mas->offset = (unsigned char)(mas->index - mas->min); 4533 return true; 4534 } 4535 4536 node = mas_mn(mas); 4537 pivots = ma_pivots(node, type); 4538 slots = ma_slots(node, type); 4539 gaps = ma_gaps(node, type); 4540 offset = mas->offset; 4541 min = mas_safe_min(mas, pivots, offset); 4542 data_end = ma_data_end(node, type, pivots, mas->max); 4543 for (; offset <= data_end; offset++) { 4544 pivot = mas_safe_pivot(mas, pivots, offset, type); 4545 4546 /* Not within lower bounds */ 4547 if (mas->index > pivot) 4548 goto next_slot; 4549 4550 if (gaps) 4551 gap = gaps[offset]; 4552 else if (!mas_slot(mas, slots, offset)) 4553 gap = min(pivot, mas->last) - max(mas->index, min) + 1; 4554 else 4555 goto next_slot; 4556 4557 if (gap >= size) { 4558 if (ma_is_leaf(type)) { 4559 found = true; 4560 break; 4561 } 4562 4563 mas->node = mas_slot(mas, slots, offset); 4564 mas->min = min; 4565 mas->max = pivot; 4566 offset = 0; 4567 break; 4568 } 4569 next_slot: 4570 min = pivot + 1; 4571 if (mas->last <= pivot) { 4572 mas_set_err(mas, -EBUSY); 4573 return true; 4574 } 4575 } 4576 4577 mas->offset = offset; 4578 return found; 4579 } 4580 4581 /** 4582 * mas_walk() - Search for @mas->index in the tree. 4583 * @mas: The maple state. 4584 * 4585 * mas->index and mas->last will be set to the range if there is a value. If 4586 * mas->status is ma_none, reset to ma_start 4587 * 4588 * Return: the entry at the location or %NULL. 4589 */ 4590 void *mas_walk(struct ma_state *mas) 4591 { 4592 void *entry; 4593 4594 if (!mas_is_active(mas) && !mas_is_start(mas)) 4595 mas->status = ma_start; 4596 retry: 4597 entry = mas_state_walk(mas); 4598 if (mas_is_start(mas)) { 4599 goto retry; 4600 } else if (mas_is_none(mas)) { 4601 mas->index = 0; 4602 mas->last = ULONG_MAX; 4603 } else if (mas_is_ptr(mas)) { 4604 if (!mas->index) { 4605 mas->last = 0; 4606 return entry; 4607 } 4608 4609 mas->index = 1; 4610 mas->last = ULONG_MAX; 4611 mas->status = ma_none; 4612 return NULL; 4613 } 4614 4615 return entry; 4616 } 4617 EXPORT_SYMBOL_GPL(mas_walk); 4618 4619 static inline bool mas_rewind_node(struct ma_state *mas) 4620 { 4621 unsigned char slot; 4622 4623 do { 4624 if (mte_is_root(mas->node)) { 4625 slot = mas->offset; 4626 if (!slot) 4627 return false; 4628 } else { 4629 mas_ascend(mas); 4630 slot = mas->offset; 4631 } 4632 } while (!slot); 4633 4634 mas->offset = --slot; 4635 return true; 4636 } 4637 4638 /* 4639 * mas_skip_node() - Internal function. Skip over a node. 4640 * @mas: The maple state. 4641 * 4642 * Return: true if there is another node, false otherwise. 4643 */ 4644 static inline bool mas_skip_node(struct ma_state *mas) 4645 { 4646 if (mas_is_err(mas)) 4647 return false; 4648 4649 do { 4650 if (mte_is_root(mas->node)) { 4651 if (mas->offset >= mas_data_end(mas)) { 4652 mas_set_err(mas, -EBUSY); 4653 return false; 4654 } 4655 } else { 4656 mas_ascend(mas); 4657 } 4658 } while (mas->offset >= mas_data_end(mas)); 4659 4660 mas->offset++; 4661 return true; 4662 } 4663 4664 /* 4665 * mas_awalk() - Allocation walk. Search from low address to high, for a gap of 4666 * @size 4667 * @mas: The maple state 4668 * @size: The size of the gap required 4669 * 4670 * Search between @mas->index and @mas->last for a gap of @size. 4671 */ 4672 static inline void mas_awalk(struct ma_state *mas, unsigned long size) 4673 { 4674 struct maple_enode *last = NULL; 4675 4676 /* 4677 * There are 4 options: 4678 * go to child (descend) 4679 * go back to parent (ascend) 4680 * no gap found. (return, error == -EBUSY) 4681 * found the gap. (return) 4682 */ 4683 while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) { 4684 if (last == mas->node) 4685 mas_skip_node(mas); 4686 else 4687 last = mas->node; 4688 } 4689 } 4690 4691 /* 4692 * mas_sparse_area() - Internal function. Return upper or lower limit when 4693 * searching for a gap in an empty tree. 4694 * @mas: The maple state 4695 * @min: the minimum range 4696 * @max: The maximum range 4697 * @size: The size of the gap 4698 * @fwd: Searching forward or back 4699 */ 4700 static inline int mas_sparse_area(struct ma_state *mas, unsigned long min, 4701 unsigned long max, unsigned long size, bool fwd) 4702 { 4703 if (!unlikely(mas_is_none(mas)) && min == 0) { 4704 min++; 4705 /* 4706 * At this time, min is increased, we need to recheck whether 4707 * the size is satisfied. 4708 */ 4709 if (min > max || max - min + 1 < size) 4710 return -EBUSY; 4711 } 4712 /* mas_is_ptr */ 4713 4714 if (fwd) { 4715 mas->index = min; 4716 mas->last = min + size - 1; 4717 } else { 4718 mas->last = max; 4719 mas->index = max - size + 1; 4720 } 4721 return 0; 4722 } 4723 4724 /* 4725 * mas_empty_area() - Get the lowest address within the range that is 4726 * sufficient for the size requested. 4727 * @mas: The maple state 4728 * @min: The lowest value of the range 4729 * @max: The highest value of the range 4730 * @size: The size needed 4731 */ 4732 int mas_empty_area(struct ma_state *mas, unsigned long min, 4733 unsigned long max, unsigned long size) 4734 { 4735 unsigned char offset; 4736 unsigned long *pivots; 4737 enum maple_type mt; 4738 struct maple_node *node; 4739 4740 if (min > max) 4741 return -EINVAL; 4742 4743 if (size == 0 || max - min < size - 1) 4744 return -EINVAL; 4745 4746 if (mas_is_start(mas)) 4747 mas_start(mas); 4748 else if (mas->offset >= 2) 4749 mas->offset -= 2; 4750 else if (!mas_skip_node(mas)) 4751 return -EBUSY; 4752 4753 /* Empty set */ 4754 if (mas_is_none(mas) || mas_is_ptr(mas)) 4755 return mas_sparse_area(mas, min, max, size, true); 4756 4757 /* The start of the window can only be within these values */ 4758 mas->index = min; 4759 mas->last = max; 4760 mas_awalk(mas, size); 4761 4762 if (unlikely(mas_is_err(mas))) 4763 return xa_err(mas->node); 4764 4765 offset = mas->offset; 4766 node = mas_mn(mas); 4767 mt = mte_node_type(mas->node); 4768 pivots = ma_pivots(node, mt); 4769 min = mas_safe_min(mas, pivots, offset); 4770 if (mas->index < min) 4771 mas->index = min; 4772 mas->last = mas->index + size - 1; 4773 mas->end = ma_data_end(node, mt, pivots, mas->max); 4774 return 0; 4775 } 4776 EXPORT_SYMBOL_GPL(mas_empty_area); 4777 4778 /* 4779 * mas_empty_area_rev() - Get the highest address within the range that is 4780 * sufficient for the size requested. 4781 * @mas: The maple state 4782 * @min: The lowest value of the range 4783 * @max: The highest value of the range 4784 * @size: The size needed 4785 */ 4786 int mas_empty_area_rev(struct ma_state *mas, unsigned long min, 4787 unsigned long max, unsigned long size) 4788 { 4789 struct maple_enode *last = mas->node; 4790 4791 if (min > max) 4792 return -EINVAL; 4793 4794 if (size == 0 || max - min < size - 1) 4795 return -EINVAL; 4796 4797 if (mas_is_start(mas)) 4798 mas_start(mas); 4799 else if ((mas->offset < 2) && (!mas_rewind_node(mas))) 4800 return -EBUSY; 4801 4802 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 4803 return mas_sparse_area(mas, min, max, size, false); 4804 else if (mas->offset >= 2) 4805 mas->offset -= 2; 4806 else 4807 mas->offset = mas_data_end(mas); 4808 4809 4810 /* The start of the window can only be within these values. */ 4811 mas->index = min; 4812 mas->last = max; 4813 4814 while (!mas_rev_awalk(mas, size, &min, &max)) { 4815 if (last == mas->node) { 4816 if (!mas_rewind_node(mas)) 4817 return -EBUSY; 4818 } else { 4819 last = mas->node; 4820 } 4821 } 4822 4823 if (mas_is_err(mas)) 4824 return xa_err(mas->node); 4825 4826 if (unlikely(mas->offset == MAPLE_NODE_SLOTS)) 4827 return -EBUSY; 4828 4829 /* Trim the upper limit to the max. */ 4830 if (max < mas->last) 4831 mas->last = max; 4832 4833 mas->index = mas->last - size + 1; 4834 mas->end = mas_data_end(mas); 4835 return 0; 4836 } 4837 EXPORT_SYMBOL_GPL(mas_empty_area_rev); 4838 4839 /* 4840 * mte_dead_leaves() - Mark all leaves of a node as dead. 4841 * @enode: the encoded node 4842 * @mt: the maple tree 4843 * @slots: Pointer to the slot array 4844 * 4845 * Must hold the write lock. 4846 * 4847 * Return: The number of leaves marked as dead. 4848 */ 4849 static inline 4850 unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt, 4851 void __rcu **slots) 4852 { 4853 struct maple_node *node; 4854 enum maple_type type; 4855 void *entry; 4856 int offset; 4857 4858 for (offset = 0; offset < mt_slot_count(enode); offset++) { 4859 entry = mt_slot(mt, slots, offset); 4860 type = mte_node_type(entry); 4861 node = mte_to_node(entry); 4862 /* Use both node and type to catch LE & BE metadata */ 4863 if (!node || !type) 4864 break; 4865 4866 mte_set_node_dead(entry); 4867 node->type = type; 4868 rcu_assign_pointer(slots[offset], node); 4869 } 4870 4871 return offset; 4872 } 4873 4874 /** 4875 * mte_dead_walk() - Walk down a dead tree to just before the leaves 4876 * @enode: The maple encoded node 4877 * @offset: The starting offset 4878 * 4879 * Note: This can only be used from the RCU callback context. 4880 */ 4881 static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset) 4882 { 4883 struct maple_node *node, *next; 4884 void __rcu **slots = NULL; 4885 4886 next = mte_to_node(*enode); 4887 do { 4888 *enode = ma_enode_ptr(next); 4889 node = mte_to_node(*enode); 4890 slots = ma_slots(node, node->type); 4891 next = rcu_dereference_protected(slots[offset], 4892 lock_is_held(&rcu_callback_map)); 4893 offset = 0; 4894 } while (!ma_is_leaf(next->type)); 4895 4896 return slots; 4897 } 4898 4899 /** 4900 * mt_free_walk() - Walk & free a tree in the RCU callback context 4901 * @head: The RCU head that's within the node. 4902 * 4903 * Note: This can only be used from the RCU callback context. 4904 */ 4905 static void mt_free_walk(struct rcu_head *head) 4906 { 4907 void __rcu **slots; 4908 struct maple_node *node, *start; 4909 struct maple_enode *enode; 4910 unsigned char offset; 4911 enum maple_type type; 4912 4913 node = container_of(head, struct maple_node, rcu); 4914 4915 if (ma_is_leaf(node->type)) 4916 goto free_leaf; 4917 4918 start = node; 4919 enode = mt_mk_node(node, node->type); 4920 slots = mte_dead_walk(&enode, 0); 4921 node = mte_to_node(enode); 4922 do { 4923 mt_free_bulk(node->slot_len, slots); 4924 offset = node->parent_slot + 1; 4925 enode = node->piv_parent; 4926 if (mte_to_node(enode) == node) 4927 goto free_leaf; 4928 4929 type = mte_node_type(enode); 4930 slots = ma_slots(mte_to_node(enode), type); 4931 if ((offset < mt_slots[type]) && 4932 rcu_dereference_protected(slots[offset], 4933 lock_is_held(&rcu_callback_map))) 4934 slots = mte_dead_walk(&enode, offset); 4935 node = mte_to_node(enode); 4936 } while ((node != start) || (node->slot_len < offset)); 4937 4938 slots = ma_slots(node, node->type); 4939 mt_free_bulk(node->slot_len, slots); 4940 4941 free_leaf: 4942 kfree(node); 4943 } 4944 4945 static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, 4946 struct maple_tree *mt, struct maple_enode *prev, unsigned char offset) 4947 { 4948 struct maple_node *node; 4949 struct maple_enode *next = *enode; 4950 void __rcu **slots = NULL; 4951 enum maple_type type; 4952 unsigned char next_offset = 0; 4953 4954 do { 4955 *enode = next; 4956 node = mte_to_node(*enode); 4957 type = mte_node_type(*enode); 4958 slots = ma_slots(node, type); 4959 next = mt_slot_locked(mt, slots, next_offset); 4960 if ((mte_dead_node(next))) 4961 next = mt_slot_locked(mt, slots, ++next_offset); 4962 4963 mte_set_node_dead(*enode); 4964 node->type = type; 4965 node->piv_parent = prev; 4966 node->parent_slot = offset; 4967 offset = next_offset; 4968 next_offset = 0; 4969 prev = *enode; 4970 } while (!mte_is_leaf(next)); 4971 4972 return slots; 4973 } 4974 4975 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 4976 bool free) 4977 { 4978 void __rcu **slots; 4979 struct maple_node *node = mte_to_node(enode); 4980 struct maple_enode *start; 4981 4982 if (mte_is_leaf(enode)) { 4983 mte_set_node_dead(enode); 4984 node->type = mte_node_type(enode); 4985 goto free_leaf; 4986 } 4987 4988 start = enode; 4989 slots = mte_destroy_descend(&enode, mt, start, 0); 4990 node = mte_to_node(enode); // Updated in the above call. 4991 do { 4992 enum maple_type type; 4993 unsigned char offset; 4994 struct maple_enode *parent, *tmp; 4995 4996 node->slot_len = mte_dead_leaves(enode, mt, slots); 4997 if (free) 4998 mt_free_bulk(node->slot_len, slots); 4999 offset = node->parent_slot + 1; 5000 enode = node->piv_parent; 5001 if (mte_to_node(enode) == node) 5002 goto free_leaf; 5003 5004 type = mte_node_type(enode); 5005 slots = ma_slots(mte_to_node(enode), type); 5006 if (offset >= mt_slots[type]) 5007 goto next; 5008 5009 tmp = mt_slot_locked(mt, slots, offset); 5010 if (mte_node_type(tmp) && mte_to_node(tmp)) { 5011 parent = enode; 5012 enode = tmp; 5013 slots = mte_destroy_descend(&enode, mt, parent, offset); 5014 } 5015 next: 5016 node = mte_to_node(enode); 5017 } while (start != enode); 5018 5019 node = mte_to_node(enode); 5020 node->slot_len = mte_dead_leaves(enode, mt, slots); 5021 if (free) 5022 mt_free_bulk(node->slot_len, slots); 5023 5024 free_leaf: 5025 if (free) 5026 kfree(node); 5027 else 5028 mt_clear_meta(mt, node, node->type); 5029 } 5030 5031 /* 5032 * mte_destroy_walk() - Free a tree or sub-tree. 5033 * @enode: the encoded maple node (maple_enode) to start 5034 * @mt: the tree to free - needed for node types. 5035 * 5036 * Must hold the write lock. 5037 */ 5038 static inline void mte_destroy_walk(struct maple_enode *enode, 5039 struct maple_tree *mt) 5040 { 5041 struct maple_node *node = mte_to_node(enode); 5042 5043 if (mt_in_rcu(mt)) { 5044 mt_destroy_walk(enode, mt, false); 5045 call_rcu(&node->rcu, mt_free_walk); 5046 } else { 5047 mt_destroy_walk(enode, mt, true); 5048 } 5049 } 5050 /* Interface */ 5051 5052 /** 5053 * mas_store() - Store an @entry. 5054 * @mas: The maple state. 5055 * @entry: The entry to store. 5056 * 5057 * The @mas->index and @mas->last is used to set the range for the @entry. 5058 * 5059 * Return: the first entry between mas->index and mas->last or %NULL. 5060 */ 5061 void *mas_store(struct ma_state *mas, void *entry) 5062 { 5063 MA_WR_STATE(wr_mas, mas, entry); 5064 5065 trace_ma_write(__func__, mas, 0, entry); 5066 #ifdef CONFIG_DEBUG_MAPLE_TREE 5067 if (MAS_WARN_ON(mas, mas->index > mas->last)) 5068 pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last, 5069 entry); 5070 5071 if (mas->index > mas->last) { 5072 mas_set_err(mas, -EINVAL); 5073 return NULL; 5074 } 5075 5076 #endif 5077 5078 /* 5079 * Storing is the same operation as insert with the added caveat that it 5080 * can overwrite entries. Although this seems simple enough, one may 5081 * want to examine what happens if a single store operation was to 5082 * overwrite multiple entries within a self-balancing B-Tree. 5083 */ 5084 mas_wr_prealloc_setup(&wr_mas); 5085 mas->store_type = mas_wr_store_type(&wr_mas); 5086 if (mas->mas_flags & MA_STATE_PREALLOC) { 5087 mas_wr_store_entry(&wr_mas); 5088 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5089 return wr_mas.content; 5090 } 5091 5092 mas_prealloc_calc(&wr_mas, entry); 5093 if (!mas->node_request) 5094 goto store; 5095 5096 mas_alloc_nodes(mas, GFP_NOWAIT); 5097 if (mas_is_err(mas)) 5098 return NULL; 5099 5100 store: 5101 mas_wr_store_entry(&wr_mas); 5102 mas_destroy(mas); 5103 return wr_mas.content; 5104 } 5105 EXPORT_SYMBOL_GPL(mas_store); 5106 5107 /** 5108 * mas_store_gfp() - Store a value into the tree. 5109 * @mas: The maple state 5110 * @entry: The entry to store 5111 * @gfp: The GFP_FLAGS to use for allocations if necessary. 5112 * 5113 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5114 * be allocated. 5115 */ 5116 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp) 5117 { 5118 unsigned long index = mas->index; 5119 unsigned long last = mas->last; 5120 MA_WR_STATE(wr_mas, mas, entry); 5121 int ret = 0; 5122 5123 retry: 5124 mas_wr_preallocate(&wr_mas, entry); 5125 if (unlikely(mas_nomem(mas, gfp))) { 5126 if (!entry) 5127 __mas_set_range(mas, index, last); 5128 goto retry; 5129 } 5130 5131 if (mas_is_err(mas)) { 5132 ret = xa_err(mas->node); 5133 goto out; 5134 } 5135 5136 mas_wr_store_entry(&wr_mas); 5137 out: 5138 mas_destroy(mas); 5139 return ret; 5140 } 5141 EXPORT_SYMBOL_GPL(mas_store_gfp); 5142 5143 /** 5144 * mas_store_prealloc() - Store a value into the tree using memory 5145 * preallocated in the maple state. 5146 * @mas: The maple state 5147 * @entry: The entry to store. 5148 */ 5149 void mas_store_prealloc(struct ma_state *mas, void *entry) 5150 { 5151 MA_WR_STATE(wr_mas, mas, entry); 5152 5153 if (mas->store_type == wr_store_root) { 5154 mas_wr_prealloc_setup(&wr_mas); 5155 goto store; 5156 } 5157 5158 mas_wr_walk_descend(&wr_mas); 5159 if (mas->store_type != wr_spanning_store) { 5160 /* set wr_mas->content to current slot */ 5161 wr_mas.content = mas_slot_locked(mas, wr_mas.slots, mas->offset); 5162 mas_wr_end_piv(&wr_mas); 5163 } 5164 5165 store: 5166 trace_ma_write(__func__, mas, 0, entry); 5167 mas_wr_store_entry(&wr_mas); 5168 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5169 mas_destroy(mas); 5170 } 5171 EXPORT_SYMBOL_GPL(mas_store_prealloc); 5172 5173 /** 5174 * mas_preallocate() - Preallocate enough nodes for a store operation 5175 * @mas: The maple state 5176 * @entry: The entry that will be stored 5177 * @gfp: The GFP_FLAGS to use for allocations. 5178 * 5179 * Return: 0 on success, -ENOMEM if memory could not be allocated. 5180 */ 5181 int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) 5182 { 5183 MA_WR_STATE(wr_mas, mas, entry); 5184 5185 mas_wr_prealloc_setup(&wr_mas); 5186 mas->store_type = mas_wr_store_type(&wr_mas); 5187 mas_prealloc_calc(&wr_mas, entry); 5188 if (!mas->node_request) 5189 goto set_flag; 5190 5191 mas->mas_flags &= ~MA_STATE_PREALLOC; 5192 mas_alloc_nodes(mas, gfp); 5193 if (mas_is_err(mas)) { 5194 int ret = xa_err(mas->node); 5195 5196 mas->node_request = 0; 5197 mas_destroy(mas); 5198 mas_reset(mas); 5199 return ret; 5200 } 5201 5202 set_flag: 5203 mas->mas_flags |= MA_STATE_PREALLOC; 5204 return 0; 5205 } 5206 EXPORT_SYMBOL_GPL(mas_preallocate); 5207 5208 /* 5209 * mas_destroy() - destroy a maple state. 5210 * @mas: The maple state 5211 * 5212 * Upon completion, check the left-most node and rebalance against the node to 5213 * the right if necessary. Frees any allocated nodes associated with this maple 5214 * state. 5215 */ 5216 void mas_destroy(struct ma_state *mas) 5217 { 5218 mas->mas_flags &= ~MA_STATE_PREALLOC; 5219 mas_empty_nodes(mas); 5220 } 5221 EXPORT_SYMBOL_GPL(mas_destroy); 5222 5223 static void mas_may_activate(struct ma_state *mas) 5224 { 5225 if (!mas->node) { 5226 mas->status = ma_start; 5227 } else if (mas->index > mas->max || mas->index < mas->min) { 5228 mas->status = ma_start; 5229 } else { 5230 mas->status = ma_active; 5231 } 5232 } 5233 5234 static bool mas_next_setup(struct ma_state *mas, unsigned long max, 5235 void **entry) 5236 { 5237 bool was_none = mas_is_none(mas); 5238 5239 if (unlikely(mas->last >= max)) { 5240 mas->status = ma_overflow; 5241 return true; 5242 } 5243 5244 switch (mas->status) { 5245 case ma_active: 5246 return false; 5247 case ma_none: 5248 fallthrough; 5249 case ma_pause: 5250 mas->status = ma_start; 5251 fallthrough; 5252 case ma_start: 5253 mas_walk(mas); /* Retries on dead nodes handled by mas_walk */ 5254 break; 5255 case ma_overflow: 5256 /* Overflowed before, but the max changed */ 5257 mas_may_activate(mas); 5258 break; 5259 case ma_underflow: 5260 /* The user expects the mas to be one before where it is */ 5261 mas_may_activate(mas); 5262 *entry = mas_walk(mas); 5263 if (*entry) 5264 return true; 5265 break; 5266 case ma_root: 5267 break; 5268 case ma_error: 5269 return true; 5270 } 5271 5272 if (likely(mas_is_active(mas))) /* Fast path */ 5273 return false; 5274 5275 if (mas_is_ptr(mas)) { 5276 *entry = NULL; 5277 if (was_none && mas->index == 0) { 5278 mas->index = mas->last = 0; 5279 return true; 5280 } 5281 mas->index = 1; 5282 mas->last = ULONG_MAX; 5283 mas->status = ma_none; 5284 return true; 5285 } 5286 5287 if (mas_is_none(mas)) 5288 return true; 5289 5290 return false; 5291 } 5292 5293 /** 5294 * mas_next() - Get the next entry. 5295 * @mas: The maple state 5296 * @max: The maximum index to check. 5297 * 5298 * Returns the next entry after @mas->index. 5299 * Must hold rcu_read_lock or the write lock. 5300 * Can return the zero entry. 5301 * 5302 * Return: The next entry or %NULL 5303 */ 5304 void *mas_next(struct ma_state *mas, unsigned long max) 5305 { 5306 void *entry = NULL; 5307 5308 if (mas_next_setup(mas, max, &entry)) 5309 return entry; 5310 5311 /* Retries on dead nodes handled by mas_next_slot */ 5312 return mas_next_slot(mas, max, false); 5313 } 5314 EXPORT_SYMBOL_GPL(mas_next); 5315 5316 /** 5317 * mas_next_range() - Advance the maple state to the next range 5318 * @mas: The maple state 5319 * @max: The maximum index to check. 5320 * 5321 * Sets @mas->index and @mas->last to the range. 5322 * Must hold rcu_read_lock or the write lock. 5323 * Can return the zero entry. 5324 * 5325 * Return: The next entry or %NULL 5326 */ 5327 void *mas_next_range(struct ma_state *mas, unsigned long max) 5328 { 5329 void *entry = NULL; 5330 5331 if (mas_next_setup(mas, max, &entry)) 5332 return entry; 5333 5334 /* Retries on dead nodes handled by mas_next_slot */ 5335 return mas_next_slot(mas, max, true); 5336 } 5337 EXPORT_SYMBOL_GPL(mas_next_range); 5338 5339 /** 5340 * mt_next() - get the next value in the maple tree 5341 * @mt: The maple tree 5342 * @index: The start index 5343 * @max: The maximum index to check 5344 * 5345 * Takes RCU read lock internally to protect the search, which does not 5346 * protect the returned pointer after dropping RCU read lock. 5347 * See also: Documentation/core-api/maple_tree.rst 5348 * 5349 * Return: The entry higher than @index or %NULL if nothing is found. 5350 */ 5351 void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max) 5352 { 5353 void *entry = NULL; 5354 MA_STATE(mas, mt, index, index); 5355 5356 rcu_read_lock(); 5357 entry = mas_next(&mas, max); 5358 rcu_read_unlock(); 5359 return entry; 5360 } 5361 EXPORT_SYMBOL_GPL(mt_next); 5362 5363 static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry) 5364 { 5365 if (unlikely(mas->index <= min)) { 5366 mas->status = ma_underflow; 5367 return true; 5368 } 5369 5370 switch (mas->status) { 5371 case ma_active: 5372 return false; 5373 case ma_start: 5374 break; 5375 case ma_none: 5376 fallthrough; 5377 case ma_pause: 5378 mas->status = ma_start; 5379 break; 5380 case ma_underflow: 5381 /* underflowed before but the min changed */ 5382 mas_may_activate(mas); 5383 break; 5384 case ma_overflow: 5385 /* User expects mas to be one after where it is */ 5386 mas_may_activate(mas); 5387 *entry = mas_walk(mas); 5388 if (*entry) 5389 return true; 5390 break; 5391 case ma_root: 5392 break; 5393 case ma_error: 5394 return true; 5395 } 5396 5397 if (mas_is_start(mas)) 5398 mas_walk(mas); 5399 5400 if (unlikely(mas_is_ptr(mas))) { 5401 if (!mas->index) { 5402 mas->status = ma_none; 5403 return true; 5404 } 5405 mas->index = mas->last = 0; 5406 *entry = mas_root(mas); 5407 return true; 5408 } 5409 5410 if (mas_is_none(mas)) { 5411 if (mas->index) { 5412 /* Walked to out-of-range pointer? */ 5413 mas->index = mas->last = 0; 5414 mas->status = ma_root; 5415 *entry = mas_root(mas); 5416 return true; 5417 } 5418 return true; 5419 } 5420 5421 return false; 5422 } 5423 5424 /** 5425 * mas_prev() - Get the previous entry 5426 * @mas: The maple state 5427 * @min: The minimum value to check. 5428 * 5429 * Must hold rcu_read_lock or the write lock. 5430 * Will reset mas to ma_start if the status is ma_none. Will stop on not 5431 * searchable nodes. 5432 * 5433 * Return: the previous value or %NULL. 5434 */ 5435 void *mas_prev(struct ma_state *mas, unsigned long min) 5436 { 5437 void *entry = NULL; 5438 5439 if (mas_prev_setup(mas, min, &entry)) 5440 return entry; 5441 5442 return mas_prev_slot(mas, min, false); 5443 } 5444 EXPORT_SYMBOL_GPL(mas_prev); 5445 5446 /** 5447 * mas_prev_range() - Advance to the previous range 5448 * @mas: The maple state 5449 * @min: The minimum value to check. 5450 * 5451 * Sets @mas->index and @mas->last to the range. 5452 * Must hold rcu_read_lock or the write lock. 5453 * Will reset mas to ma_start if the node is ma_none. Will stop on not 5454 * searchable nodes. 5455 * 5456 * Return: the previous value or %NULL. 5457 */ 5458 void *mas_prev_range(struct ma_state *mas, unsigned long min) 5459 { 5460 void *entry = NULL; 5461 5462 if (mas_prev_setup(mas, min, &entry)) 5463 return entry; 5464 5465 return mas_prev_slot(mas, min, true); 5466 } 5467 EXPORT_SYMBOL_GPL(mas_prev_range); 5468 5469 /** 5470 * mt_prev() - get the previous value in the maple tree 5471 * @mt: The maple tree 5472 * @index: The start index 5473 * @min: The minimum index to check 5474 * 5475 * Takes RCU read lock internally to protect the search, which does not 5476 * protect the returned pointer after dropping RCU read lock. 5477 * See also: Documentation/core-api/maple_tree.rst 5478 * 5479 * Return: The entry before @index or %NULL if nothing is found. 5480 */ 5481 void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min) 5482 { 5483 void *entry = NULL; 5484 MA_STATE(mas, mt, index, index); 5485 5486 rcu_read_lock(); 5487 entry = mas_prev(&mas, min); 5488 rcu_read_unlock(); 5489 return entry; 5490 } 5491 EXPORT_SYMBOL_GPL(mt_prev); 5492 5493 /** 5494 * mas_pause() - Pause a mas_find/mas_for_each to drop the lock. 5495 * @mas: The maple state to pause 5496 * 5497 * Some users need to pause a walk and drop the lock they're holding in 5498 * order to yield to a higher priority thread or carry out an operation 5499 * on an entry. Those users should call this function before they drop 5500 * the lock. It resets the @mas to be suitable for the next iteration 5501 * of the loop after the user has reacquired the lock. If most entries 5502 * found during a walk require you to call mas_pause(), the mt_for_each() 5503 * iterator may be more appropriate. 5504 * 5505 */ 5506 void mas_pause(struct ma_state *mas) 5507 { 5508 mas->status = ma_pause; 5509 mas->node = NULL; 5510 } 5511 EXPORT_SYMBOL_GPL(mas_pause); 5512 5513 /** 5514 * mas_find_setup() - Internal function to set up mas_find*(). 5515 * @mas: The maple state 5516 * @max: The maximum index 5517 * @entry: Pointer to the entry 5518 * 5519 * Returns: True if entry is the answer, false otherwise. 5520 */ 5521 static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long max, void **entry) 5522 { 5523 switch (mas->status) { 5524 case ma_active: 5525 if (mas->last < max) 5526 return false; 5527 return true; 5528 case ma_start: 5529 break; 5530 case ma_pause: 5531 if (unlikely(mas->last >= max)) 5532 return true; 5533 5534 mas->index = ++mas->last; 5535 mas->status = ma_start; 5536 break; 5537 case ma_none: 5538 if (unlikely(mas->last >= max)) 5539 return true; 5540 5541 mas->index = mas->last; 5542 mas->status = ma_start; 5543 break; 5544 case ma_underflow: 5545 /* mas is pointing at entry before unable to go lower */ 5546 if (unlikely(mas->index >= max)) { 5547 mas->status = ma_overflow; 5548 return true; 5549 } 5550 5551 mas_may_activate(mas); 5552 *entry = mas_walk(mas); 5553 if (*entry) 5554 return true; 5555 break; 5556 case ma_overflow: 5557 if (unlikely(mas->last >= max)) 5558 return true; 5559 5560 mas_may_activate(mas); 5561 *entry = mas_walk(mas); 5562 if (*entry) 5563 return true; 5564 break; 5565 case ma_root: 5566 break; 5567 case ma_error: 5568 return true; 5569 } 5570 5571 if (mas_is_start(mas)) { 5572 /* First run or continue */ 5573 if (mas->index > max) 5574 return true; 5575 5576 *entry = mas_walk(mas); 5577 if (*entry) 5578 return true; 5579 5580 } 5581 5582 if (unlikely(mas_is_ptr(mas))) 5583 goto ptr_out_of_range; 5584 5585 if (unlikely(mas_is_none(mas))) 5586 return true; 5587 5588 if (mas->index == max) 5589 return true; 5590 5591 return false; 5592 5593 ptr_out_of_range: 5594 mas->status = ma_none; 5595 mas->index = 1; 5596 mas->last = ULONG_MAX; 5597 return true; 5598 } 5599 5600 /** 5601 * mas_find() - On the first call, find the entry at or after mas->index up to 5602 * %max. Otherwise, find the entry after mas->index. 5603 * @mas: The maple state 5604 * @max: The maximum value to check. 5605 * 5606 * Must hold rcu_read_lock or the write lock. 5607 * If an entry exists, last and index are updated accordingly. 5608 * May set @mas->status to ma_overflow. 5609 * 5610 * Return: The entry or %NULL. 5611 */ 5612 void *mas_find(struct ma_state *mas, unsigned long max) 5613 { 5614 void *entry = NULL; 5615 5616 if (mas_find_setup(mas, max, &entry)) 5617 return entry; 5618 5619 /* Retries on dead nodes handled by mas_next_slot */ 5620 entry = mas_next_slot(mas, max, false); 5621 /* Ignore overflow */ 5622 mas->status = ma_active; 5623 return entry; 5624 } 5625 EXPORT_SYMBOL_GPL(mas_find); 5626 5627 /** 5628 * mas_find_range() - On the first call, find the entry at or after 5629 * mas->index up to %max. Otherwise, advance to the next slot mas->index. 5630 * @mas: The maple state 5631 * @max: The maximum value to check. 5632 * 5633 * Must hold rcu_read_lock or the write lock. 5634 * If an entry exists, last and index are updated accordingly. 5635 * May set @mas->status to ma_overflow. 5636 * 5637 * Return: The entry or %NULL. 5638 */ 5639 void *mas_find_range(struct ma_state *mas, unsigned long max) 5640 { 5641 void *entry = NULL; 5642 5643 if (mas_find_setup(mas, max, &entry)) 5644 return entry; 5645 5646 /* Retries on dead nodes handled by mas_next_slot */ 5647 return mas_next_slot(mas, max, true); 5648 } 5649 EXPORT_SYMBOL_GPL(mas_find_range); 5650 5651 /** 5652 * mas_find_rev_setup() - Internal function to set up mas_find_*_rev() 5653 * @mas: The maple state 5654 * @min: The minimum index 5655 * @entry: Pointer to the entry 5656 * 5657 * Returns: True if entry is the answer, false otherwise. 5658 */ 5659 static bool mas_find_rev_setup(struct ma_state *mas, unsigned long min, 5660 void **entry) 5661 { 5662 5663 switch (mas->status) { 5664 case ma_active: 5665 goto active; 5666 case ma_start: 5667 break; 5668 case ma_pause: 5669 if (unlikely(mas->index <= min)) { 5670 mas->status = ma_underflow; 5671 return true; 5672 } 5673 mas->last = --mas->index; 5674 mas->status = ma_start; 5675 break; 5676 case ma_none: 5677 if (mas->index <= min) 5678 goto none; 5679 5680 mas->last = mas->index; 5681 mas->status = ma_start; 5682 break; 5683 case ma_overflow: /* user expects the mas to be one after where it is */ 5684 if (unlikely(mas->index <= min)) { 5685 mas->status = ma_underflow; 5686 return true; 5687 } 5688 5689 mas->status = ma_active; 5690 break; 5691 case ma_underflow: /* user expects the mas to be one before where it is */ 5692 if (unlikely(mas->index <= min)) 5693 return true; 5694 5695 mas->status = ma_active; 5696 break; 5697 case ma_root: 5698 break; 5699 case ma_error: 5700 return true; 5701 } 5702 5703 if (mas_is_start(mas)) { 5704 /* First run or continue */ 5705 if (mas->index < min) 5706 return true; 5707 5708 *entry = mas_walk(mas); 5709 if (*entry) 5710 return true; 5711 } 5712 5713 if (unlikely(mas_is_ptr(mas))) 5714 goto none; 5715 5716 if (unlikely(mas_is_none(mas))) { 5717 /* 5718 * Walked to the location, and there was nothing so the previous 5719 * location is 0. 5720 */ 5721 mas->last = mas->index = 0; 5722 mas->status = ma_root; 5723 *entry = mas_root(mas); 5724 return true; 5725 } 5726 5727 active: 5728 if (mas->index < min) 5729 return true; 5730 5731 return false; 5732 5733 none: 5734 mas->status = ma_none; 5735 return true; 5736 } 5737 5738 /** 5739 * mas_find_rev: On the first call, find the first non-null entry at or below 5740 * mas->index down to %min. Otherwise find the first non-null entry below 5741 * mas->index down to %min. 5742 * @mas: The maple state 5743 * @min: The minimum value to check. 5744 * 5745 * Must hold rcu_read_lock or the write lock. 5746 * If an entry exists, last and index are updated accordingly. 5747 * May set @mas->status to ma_underflow. 5748 * 5749 * Return: The entry or %NULL. 5750 */ 5751 void *mas_find_rev(struct ma_state *mas, unsigned long min) 5752 { 5753 void *entry = NULL; 5754 5755 if (mas_find_rev_setup(mas, min, &entry)) 5756 return entry; 5757 5758 /* Retries on dead nodes handled by mas_prev_slot */ 5759 return mas_prev_slot(mas, min, false); 5760 5761 } 5762 EXPORT_SYMBOL_GPL(mas_find_rev); 5763 5764 /** 5765 * mas_find_range_rev: On the first call, find the first non-null entry at or 5766 * below mas->index down to %min. Otherwise advance to the previous slot after 5767 * mas->index down to %min. 5768 * @mas: The maple state 5769 * @min: The minimum value to check. 5770 * 5771 * Must hold rcu_read_lock or the write lock. 5772 * If an entry exists, last and index are updated accordingly. 5773 * May set @mas->status to ma_underflow. 5774 * 5775 * Return: The entry or %NULL. 5776 */ 5777 void *mas_find_range_rev(struct ma_state *mas, unsigned long min) 5778 { 5779 void *entry = NULL; 5780 5781 if (mas_find_rev_setup(mas, min, &entry)) 5782 return entry; 5783 5784 /* Retries on dead nodes handled by mas_prev_slot */ 5785 return mas_prev_slot(mas, min, true); 5786 } 5787 EXPORT_SYMBOL_GPL(mas_find_range_rev); 5788 5789 /** 5790 * mas_erase() - Find the range in which index resides and erase the entire 5791 * range. 5792 * @mas: The maple state 5793 * 5794 * Must hold the write lock. 5795 * Searches for @mas->index, sets @mas->index and @mas->last to the range and 5796 * erases that range. 5797 * 5798 * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated. 5799 */ 5800 void *mas_erase(struct ma_state *mas) 5801 { 5802 void *entry; 5803 unsigned long index = mas->index; 5804 MA_WR_STATE(wr_mas, mas, NULL); 5805 5806 if (!mas_is_active(mas) || !mas_is_start(mas)) 5807 mas->status = ma_start; 5808 5809 write_retry: 5810 entry = mas_state_walk(mas); 5811 if (!entry) 5812 return NULL; 5813 5814 /* Must reset to ensure spanning writes of last slot are detected */ 5815 mas_reset(mas); 5816 mas_wr_preallocate(&wr_mas, NULL); 5817 if (mas_nomem(mas, GFP_KERNEL)) { 5818 /* in case the range of entry changed when unlocked */ 5819 mas->index = mas->last = index; 5820 goto write_retry; 5821 } 5822 5823 if (mas_is_err(mas)) 5824 goto out; 5825 5826 mas_wr_store_entry(&wr_mas); 5827 out: 5828 mas_destroy(mas); 5829 return entry; 5830 } 5831 EXPORT_SYMBOL_GPL(mas_erase); 5832 5833 /** 5834 * mas_nomem() - Check if there was an error allocating and do the allocation 5835 * if necessary If there are allocations, then free them. 5836 * @mas: The maple state 5837 * @gfp: The GFP_FLAGS to use for allocations 5838 * Return: true on allocation, false otherwise. 5839 */ 5840 bool mas_nomem(struct ma_state *mas, gfp_t gfp) 5841 __must_hold(mas->tree->ma_lock) 5842 { 5843 if (likely(mas->node != MA_ERROR(-ENOMEM))) 5844 return false; 5845 5846 if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) { 5847 mtree_unlock(mas->tree); 5848 mas_alloc_nodes(mas, gfp); 5849 mtree_lock(mas->tree); 5850 } else { 5851 mas_alloc_nodes(mas, gfp); 5852 } 5853 5854 if (!mas->sheaf && !mas->alloc) 5855 return false; 5856 5857 mas->status = ma_start; 5858 return true; 5859 } 5860 5861 void __init maple_tree_init(void) 5862 { 5863 struct kmem_cache_args args = { 5864 .align = sizeof(struct maple_node), 5865 .sheaf_capacity = 32, 5866 }; 5867 5868 maple_node_cache = kmem_cache_create("maple_node", 5869 sizeof(struct maple_node), &args, 5870 SLAB_PANIC); 5871 } 5872 5873 /** 5874 * mtree_load() - Load a value stored in a maple tree 5875 * @mt: The maple tree 5876 * @index: The index to load 5877 * 5878 * Return: the entry or %NULL 5879 */ 5880 void *mtree_load(struct maple_tree *mt, unsigned long index) 5881 { 5882 MA_STATE(mas, mt, index, index); 5883 void *entry; 5884 5885 trace_ma_read(__func__, &mas); 5886 rcu_read_lock(); 5887 retry: 5888 entry = mas_start(&mas); 5889 if (unlikely(mas_is_none(&mas))) 5890 goto unlock; 5891 5892 if (unlikely(mas_is_ptr(&mas))) { 5893 if (index) 5894 entry = NULL; 5895 5896 goto unlock; 5897 } 5898 5899 entry = mtree_lookup_walk(&mas); 5900 if (!entry && unlikely(mas_is_start(&mas))) 5901 goto retry; 5902 unlock: 5903 rcu_read_unlock(); 5904 if (xa_is_zero(entry)) 5905 return NULL; 5906 5907 return entry; 5908 } 5909 EXPORT_SYMBOL(mtree_load); 5910 5911 /** 5912 * mtree_store_range() - Store an entry at a given range. 5913 * @mt: The maple tree 5914 * @index: The start of the range 5915 * @last: The end of the range 5916 * @entry: The entry to store 5917 * @gfp: The GFP_FLAGS to use for allocations 5918 * 5919 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5920 * be allocated. 5921 */ 5922 int mtree_store_range(struct maple_tree *mt, unsigned long index, 5923 unsigned long last, void *entry, gfp_t gfp) 5924 { 5925 MA_STATE(mas, mt, index, last); 5926 int ret = 0; 5927 5928 trace_ma_write(__func__, &mas, 0, entry); 5929 if (WARN_ON_ONCE(xa_is_advanced(entry))) 5930 return -EINVAL; 5931 5932 if (index > last) 5933 return -EINVAL; 5934 5935 mtree_lock(mt); 5936 ret = mas_store_gfp(&mas, entry, gfp); 5937 mtree_unlock(mt); 5938 5939 return ret; 5940 } 5941 EXPORT_SYMBOL(mtree_store_range); 5942 5943 /** 5944 * mtree_store() - Store an entry at a given index. 5945 * @mt: The maple tree 5946 * @index: The index to store the value 5947 * @entry: The entry to store 5948 * @gfp: The GFP_FLAGS to use for allocations 5949 * 5950 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5951 * be allocated. 5952 */ 5953 int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, 5954 gfp_t gfp) 5955 { 5956 return mtree_store_range(mt, index, index, entry, gfp); 5957 } 5958 EXPORT_SYMBOL(mtree_store); 5959 5960 /** 5961 * mtree_insert_range() - Insert an entry at a given range if there is no value. 5962 * @mt: The maple tree 5963 * @first: The start of the range 5964 * @last: The end of the range 5965 * @entry: The entry to store 5966 * @gfp: The GFP_FLAGS to use for allocations. 5967 * 5968 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 5969 * request, -ENOMEM if memory could not be allocated. 5970 */ 5971 int mtree_insert_range(struct maple_tree *mt, unsigned long first, 5972 unsigned long last, void *entry, gfp_t gfp) 5973 { 5974 MA_STATE(ms, mt, first, last); 5975 int ret = 0; 5976 5977 if (WARN_ON_ONCE(xa_is_advanced(entry))) 5978 return -EINVAL; 5979 5980 if (first > last) 5981 return -EINVAL; 5982 5983 mtree_lock(mt); 5984 retry: 5985 mas_insert(&ms, entry); 5986 if (mas_nomem(&ms, gfp)) 5987 goto retry; 5988 5989 mtree_unlock(mt); 5990 if (mas_is_err(&ms)) 5991 ret = xa_err(ms.node); 5992 5993 mas_destroy(&ms); 5994 return ret; 5995 } 5996 EXPORT_SYMBOL(mtree_insert_range); 5997 5998 /** 5999 * mtree_insert() - Insert an entry at a given index if there is no value. 6000 * @mt: The maple tree 6001 * @index : The index to store the value 6002 * @entry: The entry to store 6003 * @gfp: The GFP_FLAGS to use for allocations. 6004 * 6005 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 6006 * request, -ENOMEM if memory could not be allocated. 6007 */ 6008 int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry, 6009 gfp_t gfp) 6010 { 6011 return mtree_insert_range(mt, index, index, entry, gfp); 6012 } 6013 EXPORT_SYMBOL(mtree_insert); 6014 6015 int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, 6016 void *entry, unsigned long size, unsigned long min, 6017 unsigned long max, gfp_t gfp) 6018 { 6019 int ret = 0; 6020 6021 MA_STATE(mas, mt, 0, 0); 6022 if (!mt_is_alloc(mt)) 6023 return -EINVAL; 6024 6025 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6026 return -EINVAL; 6027 6028 mtree_lock(mt); 6029 retry: 6030 ret = mas_empty_area(&mas, min, max, size); 6031 if (ret) 6032 goto unlock; 6033 6034 mas_insert(&mas, entry); 6035 /* 6036 * mas_nomem() may release the lock, causing the allocated area 6037 * to be unavailable, so try to allocate a free area again. 6038 */ 6039 if (mas_nomem(&mas, gfp)) 6040 goto retry; 6041 6042 if (mas_is_err(&mas)) 6043 ret = xa_err(mas.node); 6044 else 6045 *startp = mas.index; 6046 6047 unlock: 6048 mtree_unlock(mt); 6049 mas_destroy(&mas); 6050 return ret; 6051 } 6052 EXPORT_SYMBOL(mtree_alloc_range); 6053 6054 /** 6055 * mtree_alloc_cyclic() - Find somewhere to store this entry in the tree. 6056 * @mt: The maple tree. 6057 * @startp: Pointer to ID. 6058 * @range_lo: Lower bound of range to search. 6059 * @range_hi: Upper bound of range to search. 6060 * @entry: The entry to store. 6061 * @next: Pointer to next ID to allocate. 6062 * @gfp: The GFP_FLAGS to use for allocations. 6063 * 6064 * Finds an empty entry in @mt after @next, stores the new index into 6065 * the @id pointer, stores the entry at that index, then updates @next. 6066 * 6067 * @mt must be initialized with the MT_FLAGS_ALLOC_RANGE flag. 6068 * 6069 * Context: Any context. Takes and releases the mt.lock. May sleep if 6070 * the @gfp flags permit. 6071 * 6072 * Return: 0 if the allocation succeeded without wrapping, 1 if the 6073 * allocation succeeded after wrapping, -ENOMEM if memory could not be 6074 * allocated, -EINVAL if @mt cannot be used, or -EBUSY if there are no 6075 * free entries. 6076 */ 6077 int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp, 6078 void *entry, unsigned long range_lo, unsigned long range_hi, 6079 unsigned long *next, gfp_t gfp) 6080 { 6081 int ret; 6082 6083 MA_STATE(mas, mt, 0, 0); 6084 6085 if (!mt_is_alloc(mt)) 6086 return -EINVAL; 6087 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6088 return -EINVAL; 6089 mtree_lock(mt); 6090 ret = mas_alloc_cyclic(&mas, startp, entry, range_lo, range_hi, 6091 next, gfp); 6092 mtree_unlock(mt); 6093 return ret; 6094 } 6095 EXPORT_SYMBOL(mtree_alloc_cyclic); 6096 6097 int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, 6098 void *entry, unsigned long size, unsigned long min, 6099 unsigned long max, gfp_t gfp) 6100 { 6101 int ret = 0; 6102 6103 MA_STATE(mas, mt, 0, 0); 6104 if (!mt_is_alloc(mt)) 6105 return -EINVAL; 6106 6107 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6108 return -EINVAL; 6109 6110 mtree_lock(mt); 6111 retry: 6112 ret = mas_empty_area_rev(&mas, min, max, size); 6113 if (ret) 6114 goto unlock; 6115 6116 mas_insert(&mas, entry); 6117 /* 6118 * mas_nomem() may release the lock, causing the allocated area 6119 * to be unavailable, so try to allocate a free area again. 6120 */ 6121 if (mas_nomem(&mas, gfp)) 6122 goto retry; 6123 6124 if (mas_is_err(&mas)) 6125 ret = xa_err(mas.node); 6126 else 6127 *startp = mas.index; 6128 6129 unlock: 6130 mtree_unlock(mt); 6131 mas_destroy(&mas); 6132 return ret; 6133 } 6134 EXPORT_SYMBOL(mtree_alloc_rrange); 6135 6136 /** 6137 * mtree_erase() - Find an index and erase the entire range. 6138 * @mt: The maple tree 6139 * @index: The index to erase 6140 * 6141 * Erasing is the same as a walk to an entry then a store of a NULL to that 6142 * ENTIRE range. In fact, it is implemented as such using the advanced API. 6143 * 6144 * Return: The entry stored at the @index or %NULL 6145 */ 6146 void *mtree_erase(struct maple_tree *mt, unsigned long index) 6147 { 6148 void *entry = NULL; 6149 6150 MA_STATE(mas, mt, index, index); 6151 trace_ma_op(__func__, &mas); 6152 6153 mtree_lock(mt); 6154 entry = mas_erase(&mas); 6155 mtree_unlock(mt); 6156 6157 return entry; 6158 } 6159 EXPORT_SYMBOL(mtree_erase); 6160 6161 /* 6162 * mas_dup_free() - Free an incomplete duplication of a tree. 6163 * @mas: The maple state of a incomplete tree. 6164 * 6165 * The parameter @mas->node passed in indicates that the allocation failed on 6166 * this node. This function frees all nodes starting from @mas->node in the 6167 * reverse order of mas_dup_build(). There is no need to hold the source tree 6168 * lock at this time. 6169 */ 6170 static void mas_dup_free(struct ma_state *mas) 6171 { 6172 struct maple_node *node; 6173 enum maple_type type; 6174 void __rcu **slots; 6175 unsigned char count, i; 6176 6177 /* Maybe the first node allocation failed. */ 6178 if (mas_is_none(mas)) 6179 return; 6180 6181 while (!mte_is_root(mas->node)) { 6182 mas_ascend(mas); 6183 if (mas->offset) { 6184 mas->offset--; 6185 do { 6186 mas_descend(mas); 6187 mas->offset = mas_data_end(mas); 6188 } while (!mte_is_leaf(mas->node)); 6189 6190 mas_ascend(mas); 6191 } 6192 6193 node = mte_to_node(mas->node); 6194 type = mte_node_type(mas->node); 6195 slots = ma_slots(node, type); 6196 count = mas_data_end(mas) + 1; 6197 for (i = 0; i < count; i++) 6198 ((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK; 6199 mt_free_bulk(count, slots); 6200 } 6201 6202 node = mte_to_node(mas->node); 6203 kfree(node); 6204 } 6205 6206 /* 6207 * mas_copy_node() - Copy a maple node and replace the parent. 6208 * @mas: The maple state of source tree. 6209 * @new_mas: The maple state of new tree. 6210 * @parent: The parent of the new node. 6211 * 6212 * Copy @mas->node to @new_mas->node, set @parent to be the parent of 6213 * @new_mas->node. If memory allocation fails, @mas is set to -ENOMEM. 6214 */ 6215 static inline void mas_copy_node(struct ma_state *mas, struct ma_state *new_mas, 6216 struct maple_pnode *parent) 6217 { 6218 struct maple_node *node = mte_to_node(mas->node); 6219 struct maple_node *new_node = mte_to_node(new_mas->node); 6220 unsigned long val; 6221 6222 /* Copy the node completely. */ 6223 memcpy(new_node, node, sizeof(struct maple_node)); 6224 /* Update the parent node pointer. */ 6225 val = (unsigned long)node->parent & MAPLE_NODE_MASK; 6226 new_node->parent = ma_parent_ptr(val | (unsigned long)parent); 6227 } 6228 6229 /* 6230 * mas_dup_alloc() - Allocate child nodes for a maple node. 6231 * @mas: The maple state of source tree. 6232 * @new_mas: The maple state of new tree. 6233 * @gfp: The GFP_FLAGS to use for allocations. 6234 * 6235 * This function allocates child nodes for @new_mas->node during the duplication 6236 * process. If memory allocation fails, @mas is set to -ENOMEM. 6237 */ 6238 static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas, 6239 gfp_t gfp) 6240 { 6241 struct maple_node *node = mte_to_node(mas->node); 6242 struct maple_node *new_node = mte_to_node(new_mas->node); 6243 enum maple_type type; 6244 unsigned char count, i; 6245 void __rcu **slots; 6246 void __rcu **new_slots; 6247 unsigned long val; 6248 6249 /* Allocate memory for child nodes. */ 6250 type = mte_node_type(mas->node); 6251 new_slots = ma_slots(new_node, type); 6252 count = mas->node_request = mas_data_end(mas) + 1; 6253 mas_alloc_nodes(mas, gfp); 6254 if (unlikely(mas_is_err(mas))) 6255 return; 6256 6257 slots = ma_slots(node, type); 6258 for (i = 0; i < count; i++) { 6259 val = (unsigned long)mt_slot_locked(mas->tree, slots, i); 6260 val &= MAPLE_NODE_MASK; 6261 new_slots[i] = ma_mnode_ptr((unsigned long)mas_pop_node(mas) | 6262 val); 6263 } 6264 } 6265 6266 /* 6267 * mas_dup_build() - Build a new maple tree from a source tree 6268 * @mas: The maple state of source tree, need to be in MAS_START state. 6269 * @new_mas: The maple state of new tree, need to be in MAS_START state. 6270 * @gfp: The GFP_FLAGS to use for allocations. 6271 * 6272 * This function builds a new tree in DFS preorder. If the memory allocation 6273 * fails, the error code -ENOMEM will be set in @mas, and @new_mas points to the 6274 * last node. mas_dup_free() will free the incomplete duplication of a tree. 6275 * 6276 * Note that the attributes of the two trees need to be exactly the same, and the 6277 * new tree needs to be empty, otherwise -EINVAL will be set in @mas. 6278 */ 6279 static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas, 6280 gfp_t gfp) 6281 { 6282 struct maple_node *node; 6283 struct maple_pnode *parent = NULL; 6284 struct maple_enode *root; 6285 enum maple_type type; 6286 6287 if (unlikely(mt_attr(mas->tree) != mt_attr(new_mas->tree)) || 6288 unlikely(!mtree_empty(new_mas->tree))) { 6289 mas_set_err(mas, -EINVAL); 6290 return; 6291 } 6292 6293 root = mas_start(mas); 6294 if (mas_is_ptr(mas) || mas_is_none(mas)) 6295 goto set_new_tree; 6296 6297 node = mt_alloc_one(gfp); 6298 if (!node) { 6299 new_mas->status = ma_none; 6300 mas_set_err(mas, -ENOMEM); 6301 return; 6302 } 6303 6304 type = mte_node_type(mas->node); 6305 root = mt_mk_node(node, type); 6306 new_mas->node = root; 6307 new_mas->min = 0; 6308 new_mas->max = ULONG_MAX; 6309 root = mte_mk_root(root); 6310 while (1) { 6311 mas_copy_node(mas, new_mas, parent); 6312 if (!mte_is_leaf(mas->node)) { 6313 /* Only allocate child nodes for non-leaf nodes. */ 6314 mas_dup_alloc(mas, new_mas, gfp); 6315 if (unlikely(mas_is_err(mas))) 6316 goto empty_mas; 6317 } else { 6318 /* 6319 * This is the last leaf node and duplication is 6320 * completed. 6321 */ 6322 if (mas->max == ULONG_MAX) 6323 goto done; 6324 6325 /* This is not the last leaf node and needs to go up. */ 6326 do { 6327 mas_ascend(mas); 6328 mas_ascend(new_mas); 6329 } while (mas->offset == mas_data_end(mas)); 6330 6331 /* Move to the next subtree. */ 6332 mas->offset++; 6333 new_mas->offset++; 6334 } 6335 6336 mas_descend(mas); 6337 parent = ma_parent_ptr(mte_to_node(new_mas->node)); 6338 mas_descend(new_mas); 6339 mas->offset = 0; 6340 new_mas->offset = 0; 6341 } 6342 done: 6343 /* Specially handle the parent of the root node. */ 6344 mte_to_node(root)->parent = ma_parent_ptr(mas_tree_parent(new_mas)); 6345 set_new_tree: 6346 /* Make them the same height */ 6347 new_mas->tree->ma_flags = mas->tree->ma_flags; 6348 rcu_assign_pointer(new_mas->tree->ma_root, root); 6349 empty_mas: 6350 mas_empty_nodes(mas); 6351 } 6352 6353 /** 6354 * __mt_dup(): Duplicate an entire maple tree 6355 * @mt: The source maple tree 6356 * @new: The new maple tree 6357 * @gfp: The GFP_FLAGS to use for allocations 6358 * 6359 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order 6360 * traversal. It uses memcpy() to copy nodes in the source tree and allocate 6361 * new child nodes in non-leaf nodes. The new node is exactly the same as the 6362 * source node except for all the addresses stored in it. It will be faster than 6363 * traversing all elements in the source tree and inserting them one by one into 6364 * the new tree. 6365 * The user needs to ensure that the attributes of the source tree and the new 6366 * tree are the same, and the new tree needs to be an empty tree, otherwise 6367 * -EINVAL will be returned. 6368 * Note that the user needs to manually lock the source tree and the new tree. 6369 * 6370 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If 6371 * the attributes of the two trees are different or the new tree is not an empty 6372 * tree. 6373 */ 6374 int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp) 6375 { 6376 int ret = 0; 6377 MA_STATE(mas, mt, 0, 0); 6378 MA_STATE(new_mas, new, 0, 0); 6379 6380 mas_dup_build(&mas, &new_mas, gfp); 6381 if (unlikely(mas_is_err(&mas))) { 6382 ret = xa_err(mas.node); 6383 if (ret == -ENOMEM) 6384 mas_dup_free(&new_mas); 6385 } 6386 6387 return ret; 6388 } 6389 EXPORT_SYMBOL(__mt_dup); 6390 6391 /** 6392 * mtree_dup(): Duplicate an entire maple tree 6393 * @mt: The source maple tree 6394 * @new: The new maple tree 6395 * @gfp: The GFP_FLAGS to use for allocations 6396 * 6397 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order 6398 * traversal. It uses memcpy() to copy nodes in the source tree and allocate 6399 * new child nodes in non-leaf nodes. The new node is exactly the same as the 6400 * source node except for all the addresses stored in it. It will be faster than 6401 * traversing all elements in the source tree and inserting them one by one into 6402 * the new tree. 6403 * The user needs to ensure that the attributes of the source tree and the new 6404 * tree are the same, and the new tree needs to be an empty tree, otherwise 6405 * -EINVAL will be returned. 6406 * 6407 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If 6408 * the attributes of the two trees are different or the new tree is not an empty 6409 * tree. 6410 */ 6411 int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp) 6412 { 6413 int ret = 0; 6414 MA_STATE(mas, mt, 0, 0); 6415 MA_STATE(new_mas, new, 0, 0); 6416 6417 mas_lock(&new_mas); 6418 mas_lock_nested(&mas, SINGLE_DEPTH_NESTING); 6419 mas_dup_build(&mas, &new_mas, gfp); 6420 mas_unlock(&mas); 6421 if (unlikely(mas_is_err(&mas))) { 6422 ret = xa_err(mas.node); 6423 if (ret == -ENOMEM) 6424 mas_dup_free(&new_mas); 6425 } 6426 6427 mas_unlock(&new_mas); 6428 return ret; 6429 } 6430 EXPORT_SYMBOL(mtree_dup); 6431 6432 /** 6433 * __mt_destroy() - Walk and free all nodes of a locked maple tree. 6434 * @mt: The maple tree 6435 * 6436 * Note: Does not handle locking. 6437 */ 6438 void __mt_destroy(struct maple_tree *mt) 6439 { 6440 void *root = mt_root_locked(mt); 6441 6442 rcu_assign_pointer(mt->ma_root, NULL); 6443 if (xa_is_node(root)) 6444 mte_destroy_walk(root, mt); 6445 6446 mt->ma_flags = mt_attr(mt); 6447 } 6448 EXPORT_SYMBOL_GPL(__mt_destroy); 6449 6450 /** 6451 * mtree_destroy() - Destroy a maple tree 6452 * @mt: The maple tree 6453 * 6454 * Frees all resources used by the tree. Handles locking. 6455 */ 6456 void mtree_destroy(struct maple_tree *mt) 6457 { 6458 mtree_lock(mt); 6459 __mt_destroy(mt); 6460 mtree_unlock(mt); 6461 } 6462 EXPORT_SYMBOL(mtree_destroy); 6463 6464 /** 6465 * mt_find() - Search from the start up until an entry is found. 6466 * @mt: The maple tree 6467 * @index: Pointer which contains the start location of the search 6468 * @max: The maximum value of the search range 6469 * 6470 * Takes RCU read lock internally to protect the search, which does not 6471 * protect the returned pointer after dropping RCU read lock. 6472 * See also: Documentation/core-api/maple_tree.rst 6473 * 6474 * In case that an entry is found @index is updated to point to the next 6475 * possible entry independent whether the found entry is occupying a 6476 * single index or a range if indices. 6477 * 6478 * Return: The entry at or after the @index or %NULL 6479 */ 6480 void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max) 6481 { 6482 MA_STATE(mas, mt, *index, *index); 6483 void *entry; 6484 #ifdef CONFIG_DEBUG_MAPLE_TREE 6485 unsigned long copy = *index; 6486 #endif 6487 6488 trace_ma_read(__func__, &mas); 6489 6490 if ((*index) > max) 6491 return NULL; 6492 6493 rcu_read_lock(); 6494 retry: 6495 entry = mas_state_walk(&mas); 6496 if (mas_is_start(&mas)) 6497 goto retry; 6498 6499 if (unlikely(xa_is_zero(entry))) 6500 entry = NULL; 6501 6502 if (entry) 6503 goto unlock; 6504 6505 while (mas_is_active(&mas) && (mas.last < max)) { 6506 entry = mas_next_slot(&mas, max, false); 6507 if (likely(entry && !xa_is_zero(entry))) 6508 break; 6509 } 6510 6511 if (unlikely(xa_is_zero(entry))) 6512 entry = NULL; 6513 unlock: 6514 rcu_read_unlock(); 6515 if (likely(entry)) { 6516 *index = mas.last + 1; 6517 #ifdef CONFIG_DEBUG_MAPLE_TREE 6518 if (MT_WARN_ON(mt, (*index) && ((*index) <= copy))) 6519 pr_err("index not increased! %lx <= %lx\n", 6520 *index, copy); 6521 #endif 6522 } 6523 6524 return entry; 6525 } 6526 EXPORT_SYMBOL(mt_find); 6527 6528 /** 6529 * mt_find_after() - Search from the start up until an entry is found. 6530 * @mt: The maple tree 6531 * @index: Pointer which contains the start location of the search 6532 * @max: The maximum value to check 6533 * 6534 * Same as mt_find() except that it checks @index for 0 before 6535 * searching. If @index == 0, the search is aborted. This covers a wrap 6536 * around of @index to 0 in an iterator loop. 6537 * 6538 * Return: The entry at or after the @index or %NULL 6539 */ 6540 void *mt_find_after(struct maple_tree *mt, unsigned long *index, 6541 unsigned long max) 6542 { 6543 if (!(*index)) 6544 return NULL; 6545 6546 return mt_find(mt, index, max); 6547 } 6548 EXPORT_SYMBOL(mt_find_after); 6549 6550 #ifdef CONFIG_DEBUG_MAPLE_TREE 6551 atomic_t maple_tree_tests_run; 6552 EXPORT_SYMBOL_GPL(maple_tree_tests_run); 6553 atomic_t maple_tree_tests_passed; 6554 EXPORT_SYMBOL_GPL(maple_tree_tests_passed); 6555 6556 #ifndef __KERNEL__ 6557 extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int); 6558 void mt_set_non_kernel(unsigned int val) 6559 { 6560 kmem_cache_set_non_kernel(maple_node_cache, val); 6561 } 6562 6563 extern void kmem_cache_set_callback(struct kmem_cache *cachep, 6564 void (*callback)(void *)); 6565 void mt_set_callback(void (*callback)(void *)) 6566 { 6567 kmem_cache_set_callback(maple_node_cache, callback); 6568 } 6569 6570 extern void kmem_cache_set_private(struct kmem_cache *cachep, void *private); 6571 void mt_set_private(void *private) 6572 { 6573 kmem_cache_set_private(maple_node_cache, private); 6574 } 6575 6576 extern unsigned long kmem_cache_get_alloc(struct kmem_cache *); 6577 unsigned long mt_get_alloc_size(void) 6578 { 6579 return kmem_cache_get_alloc(maple_node_cache); 6580 } 6581 6582 extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *); 6583 void mt_zero_nr_tallocated(void) 6584 { 6585 kmem_cache_zero_nr_tallocated(maple_node_cache); 6586 } 6587 6588 extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *); 6589 unsigned int mt_nr_tallocated(void) 6590 { 6591 return kmem_cache_nr_tallocated(maple_node_cache); 6592 } 6593 6594 extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *); 6595 unsigned int mt_nr_allocated(void) 6596 { 6597 return kmem_cache_nr_allocated(maple_node_cache); 6598 } 6599 6600 void mt_cache_shrink(void) 6601 { 6602 } 6603 #else 6604 /* 6605 * mt_cache_shrink() - For testing, don't use this. 6606 * 6607 * Certain testcases can trigger an OOM when combined with other memory 6608 * debugging configuration options. This function is used to reduce the 6609 * possibility of an out of memory even due to kmem_cache objects remaining 6610 * around for longer than usual. 6611 */ 6612 void mt_cache_shrink(void) 6613 { 6614 kmem_cache_shrink(maple_node_cache); 6615 6616 } 6617 EXPORT_SYMBOL_GPL(mt_cache_shrink); 6618 6619 #endif /* not defined __KERNEL__ */ 6620 /* 6621 * mas_get_slot() - Get the entry in the maple state node stored at @offset. 6622 * @mas: The maple state 6623 * @offset: The offset into the slot array to fetch. 6624 * 6625 * Return: The entry stored at @offset. 6626 */ 6627 static inline struct maple_enode *mas_get_slot(struct ma_state *mas, 6628 unsigned char offset) 6629 { 6630 return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)), 6631 offset); 6632 } 6633 6634 /* Depth first search, post-order */ 6635 static void mas_dfs_postorder(struct ma_state *mas, unsigned long max) 6636 { 6637 6638 struct maple_enode *p, *mn = mas->node; 6639 unsigned long p_min, p_max; 6640 6641 mas_next_node(mas, mas_mn(mas), max); 6642 if (!mas_is_overflow(mas)) 6643 return; 6644 6645 if (mte_is_root(mn)) 6646 return; 6647 6648 mas->node = mn; 6649 mas_ascend(mas); 6650 do { 6651 p = mas->node; 6652 p_min = mas->min; 6653 p_max = mas->max; 6654 mas_prev_node(mas, 0); 6655 } while (!mas_is_underflow(mas)); 6656 6657 mas->node = p; 6658 mas->max = p_max; 6659 mas->min = p_min; 6660 } 6661 6662 /* Tree validations */ 6663 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6664 unsigned long min, unsigned long max, unsigned int depth, 6665 enum mt_dump_format format); 6666 static void mt_dump_range(unsigned long min, unsigned long max, 6667 unsigned int depth, enum mt_dump_format format) 6668 { 6669 static const char spaces[] = " "; 6670 6671 switch(format) { 6672 case mt_dump_hex: 6673 if (min == max) 6674 pr_info("%.*s%lx: ", depth * 2, spaces, min); 6675 else 6676 pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max); 6677 break; 6678 case mt_dump_dec: 6679 if (min == max) 6680 pr_info("%.*s%lu: ", depth * 2, spaces, min); 6681 else 6682 pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max); 6683 } 6684 } 6685 6686 static void mt_dump_entry(void *entry, unsigned long min, unsigned long max, 6687 unsigned int depth, enum mt_dump_format format) 6688 { 6689 mt_dump_range(min, max, depth, format); 6690 6691 if (xa_is_value(entry)) 6692 pr_cont("value %ld (0x%lx) [" PTR_FMT "]\n", xa_to_value(entry), 6693 xa_to_value(entry), entry); 6694 else if (xa_is_zero(entry)) 6695 pr_cont("zero (%ld)\n", xa_to_internal(entry)); 6696 else if (mt_is_reserved(entry)) 6697 pr_cont("UNKNOWN ENTRY (" PTR_FMT ")\n", entry); 6698 else 6699 pr_cont(PTR_FMT "\n", entry); 6700 } 6701 6702 static void mt_dump_range64(const struct maple_tree *mt, void *entry, 6703 unsigned long min, unsigned long max, unsigned int depth, 6704 enum mt_dump_format format) 6705 { 6706 struct maple_range_64 *node = &mte_to_node(entry)->mr64; 6707 bool leaf = mte_is_leaf(entry); 6708 unsigned long first = min; 6709 int i; 6710 6711 pr_cont(" contents: "); 6712 for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) { 6713 switch(format) { 6714 case mt_dump_hex: 6715 pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]); 6716 break; 6717 case mt_dump_dec: 6718 pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]); 6719 } 6720 } 6721 pr_cont(PTR_FMT "\n", node->slot[i]); 6722 for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) { 6723 unsigned long last = max; 6724 6725 if (i < (MAPLE_RANGE64_SLOTS - 1)) 6726 last = node->pivot[i]; 6727 else if (!node->slot[i] && max != mt_node_max(entry)) 6728 break; 6729 if (last == 0 && i > 0) 6730 break; 6731 if (leaf) 6732 mt_dump_entry(mt_slot(mt, node->slot, i), 6733 first, last, depth + 1, format); 6734 else if (node->slot[i]) 6735 mt_dump_node(mt, mt_slot(mt, node->slot, i), 6736 first, last, depth + 1, format); 6737 6738 if (last == max) 6739 break; 6740 if (last > max) { 6741 switch(format) { 6742 case mt_dump_hex: 6743 pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n", 6744 node, last, max, i); 6745 break; 6746 case mt_dump_dec: 6747 pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", 6748 node, last, max, i); 6749 } 6750 } 6751 first = last + 1; 6752 } 6753 } 6754 6755 static void mt_dump_arange64(const struct maple_tree *mt, void *entry, 6756 unsigned long min, unsigned long max, unsigned int depth, 6757 enum mt_dump_format format) 6758 { 6759 struct maple_arange_64 *node = &mte_to_node(entry)->ma64; 6760 unsigned long first = min; 6761 int i; 6762 6763 pr_cont(" contents: "); 6764 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 6765 switch (format) { 6766 case mt_dump_hex: 6767 pr_cont("%lx ", node->gap[i]); 6768 break; 6769 case mt_dump_dec: 6770 pr_cont("%lu ", node->gap[i]); 6771 } 6772 } 6773 pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap); 6774 for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) { 6775 switch (format) { 6776 case mt_dump_hex: 6777 pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]); 6778 break; 6779 case mt_dump_dec: 6780 pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]); 6781 } 6782 } 6783 pr_cont(PTR_FMT "\n", node->slot[i]); 6784 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 6785 unsigned long last = max; 6786 6787 if (i < (MAPLE_ARANGE64_SLOTS - 1)) 6788 last = node->pivot[i]; 6789 else if (!node->slot[i]) 6790 break; 6791 if (last == 0 && i > 0) 6792 break; 6793 if (node->slot[i]) 6794 mt_dump_node(mt, mt_slot(mt, node->slot, i), 6795 first, last, depth + 1, format); 6796 6797 if (last == max) 6798 break; 6799 if (last > max) { 6800 switch(format) { 6801 case mt_dump_hex: 6802 pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n", 6803 node, last, max, i); 6804 break; 6805 case mt_dump_dec: 6806 pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", 6807 node, last, max, i); 6808 } 6809 } 6810 first = last + 1; 6811 } 6812 } 6813 6814 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6815 unsigned long min, unsigned long max, unsigned int depth, 6816 enum mt_dump_format format) 6817 { 6818 struct maple_node *node = mte_to_node(entry); 6819 unsigned int type = mte_node_type(entry); 6820 unsigned int i; 6821 6822 mt_dump_range(min, max, depth, format); 6823 6824 pr_cont("node " PTR_FMT " depth %d type %d parent " PTR_FMT, node, 6825 depth, type, node ? node->parent : NULL); 6826 switch (type) { 6827 case maple_dense: 6828 pr_cont("\n"); 6829 for (i = 0; i < MAPLE_NODE_SLOTS; i++) { 6830 if (min + i > max) 6831 pr_cont("OUT OF RANGE: "); 6832 mt_dump_entry(mt_slot(mt, node->slot, i), 6833 min + i, min + i, depth, format); 6834 } 6835 break; 6836 case maple_leaf_64: 6837 case maple_range_64: 6838 mt_dump_range64(mt, entry, min, max, depth, format); 6839 break; 6840 case maple_arange_64: 6841 mt_dump_arange64(mt, entry, min, max, depth, format); 6842 break; 6843 6844 default: 6845 pr_cont(" UNKNOWN TYPE\n"); 6846 } 6847 } 6848 6849 void mt_dump(const struct maple_tree *mt, enum mt_dump_format format) 6850 { 6851 void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt)); 6852 6853 pr_info("maple_tree(" PTR_FMT ") flags %X, height %u root " PTR_FMT "\n", 6854 mt, mt->ma_flags, mt_height(mt), entry); 6855 if (xa_is_node(entry)) 6856 mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format); 6857 else if (entry) 6858 mt_dump_entry(entry, 0, 0, 0, format); 6859 else 6860 pr_info("(empty)\n"); 6861 } 6862 EXPORT_SYMBOL_GPL(mt_dump); 6863 6864 /* 6865 * Calculate the maximum gap in a node and check if that's what is reported in 6866 * the parent (unless root). 6867 */ 6868 static void mas_validate_gaps(struct ma_state *mas) 6869 { 6870 struct maple_enode *mte = mas->node; 6871 struct maple_node *p_mn, *node = mte_to_node(mte); 6872 enum maple_type mt = mte_node_type(mas->node); 6873 unsigned long gap = 0, max_gap = 0; 6874 unsigned long p_end, p_start = mas->min; 6875 unsigned char p_slot, offset; 6876 unsigned long *gaps = NULL; 6877 unsigned long *pivots = ma_pivots(node, mt); 6878 unsigned int i; 6879 6880 if (ma_is_dense(mt)) { 6881 for (i = 0; i < mt_slot_count(mte); i++) { 6882 if (mas_get_slot(mas, i)) { 6883 if (gap > max_gap) 6884 max_gap = gap; 6885 gap = 0; 6886 continue; 6887 } 6888 gap++; 6889 } 6890 goto counted; 6891 } 6892 6893 gaps = ma_gaps(node, mt); 6894 for (i = 0; i < mt_slot_count(mte); i++) { 6895 p_end = mas_safe_pivot(mas, pivots, i, mt); 6896 6897 if (!gaps) { 6898 if (!mas_get_slot(mas, i)) 6899 gap = p_end - p_start + 1; 6900 } else { 6901 void *entry = mas_get_slot(mas, i); 6902 6903 gap = gaps[i]; 6904 MT_BUG_ON(mas->tree, !entry); 6905 6906 if (gap > p_end - p_start + 1) { 6907 pr_err(PTR_FMT "[%u] %lu >= %lu - %lu + 1 (%lu)\n", 6908 mas_mn(mas), i, gap, p_end, p_start, 6909 p_end - p_start + 1); 6910 MT_BUG_ON(mas->tree, gap > p_end - p_start + 1); 6911 } 6912 } 6913 6914 if (gap > max_gap) 6915 max_gap = gap; 6916 6917 p_start = p_end + 1; 6918 if (p_end >= mas->max) 6919 break; 6920 } 6921 6922 counted: 6923 if (mt == maple_arange_64) { 6924 MT_BUG_ON(mas->tree, !gaps); 6925 offset = ma_meta_gap(node); 6926 if (offset > i) { 6927 pr_err("gap offset " PTR_FMT "[%u] is invalid\n", node, offset); 6928 MT_BUG_ON(mas->tree, 1); 6929 } 6930 6931 if (gaps[offset] != max_gap) { 6932 pr_err("gap " PTR_FMT "[%u] is not the largest gap %lu\n", 6933 node, offset, max_gap); 6934 MT_BUG_ON(mas->tree, 1); 6935 } 6936 6937 for (i++ ; i < mt_slot_count(mte); i++) { 6938 if (gaps[i] != 0) { 6939 pr_err("gap " PTR_FMT "[%u] beyond node limit != 0\n", 6940 node, i); 6941 MT_BUG_ON(mas->tree, 1); 6942 } 6943 } 6944 } 6945 6946 if (mte_is_root(mte)) 6947 return; 6948 6949 p_slot = mte_parent_slot(mas->node); 6950 p_mn = mte_parent(mte); 6951 MT_BUG_ON(mas->tree, max_gap > mas->max); 6952 if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) { 6953 pr_err("gap " PTR_FMT "[%u] != %lu\n", p_mn, p_slot, max_gap); 6954 mt_dump(mas->tree, mt_dump_hex); 6955 MT_BUG_ON(mas->tree, 1); 6956 } 6957 } 6958 6959 static void mas_validate_parent_slot(struct ma_state *mas) 6960 { 6961 struct maple_node *parent; 6962 struct maple_enode *node; 6963 enum maple_type p_type; 6964 unsigned char p_slot; 6965 void __rcu **slots; 6966 int i; 6967 6968 if (mte_is_root(mas->node)) 6969 return; 6970 6971 p_slot = mte_parent_slot(mas->node); 6972 p_type = mas_parent_type(mas, mas->node); 6973 parent = mte_parent(mas->node); 6974 slots = ma_slots(parent, p_type); 6975 MT_BUG_ON(mas->tree, mas_mn(mas) == parent); 6976 6977 /* Check prev/next parent slot for duplicate node entry */ 6978 6979 for (i = 0; i < mt_slots[p_type]; i++) { 6980 node = mas_slot(mas, slots, i); 6981 if (i == p_slot) { 6982 if (node != mas->node) 6983 pr_err("parent " PTR_FMT "[%u] does not have " PTR_FMT "\n", 6984 parent, i, mas_mn(mas)); 6985 MT_BUG_ON(mas->tree, node != mas->node); 6986 } else if (node == mas->node) { 6987 pr_err("Invalid child " PTR_FMT " at parent " PTR_FMT "[%u] p_slot %u\n", 6988 mas_mn(mas), parent, i, p_slot); 6989 MT_BUG_ON(mas->tree, node == mas->node); 6990 } 6991 } 6992 } 6993 6994 static void mas_validate_child_slot(struct ma_state *mas) 6995 { 6996 enum maple_type type = mte_node_type(mas->node); 6997 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 6998 unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type); 6999 struct maple_enode *child; 7000 unsigned char i; 7001 7002 if (mte_is_leaf(mas->node)) 7003 return; 7004 7005 for (i = 0; i < mt_slots[type]; i++) { 7006 child = mas_slot(mas, slots, i); 7007 7008 if (!child) { 7009 pr_err("Non-leaf node lacks child at " PTR_FMT "[%u]\n", 7010 mas_mn(mas), i); 7011 MT_BUG_ON(mas->tree, 1); 7012 } 7013 7014 if (mte_parent_slot(child) != i) { 7015 pr_err("Slot error at " PTR_FMT "[%u]: child " PTR_FMT " has pslot %u\n", 7016 mas_mn(mas), i, mte_to_node(child), 7017 mte_parent_slot(child)); 7018 MT_BUG_ON(mas->tree, 1); 7019 } 7020 7021 if (mte_parent(child) != mte_to_node(mas->node)) { 7022 pr_err("child " PTR_FMT " has parent " PTR_FMT " not " PTR_FMT "\n", 7023 mte_to_node(child), mte_parent(child), 7024 mte_to_node(mas->node)); 7025 MT_BUG_ON(mas->tree, 1); 7026 } 7027 7028 if (i < mt_pivots[type] && pivots[i] == mas->max) 7029 break; 7030 } 7031 } 7032 7033 /* 7034 * Validate all pivots are within mas->min and mas->max, check metadata ends 7035 * where the maximum ends and ensure there is no slots or pivots set outside of 7036 * the end of the data. 7037 */ 7038 static void mas_validate_limits(struct ma_state *mas) 7039 { 7040 int i; 7041 unsigned long prev_piv = 0; 7042 enum maple_type type = mte_node_type(mas->node); 7043 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 7044 unsigned long *pivots = ma_pivots(mas_mn(mas), type); 7045 7046 for (i = 0; i < mt_slots[type]; i++) { 7047 unsigned long piv; 7048 7049 piv = mas_safe_pivot(mas, pivots, i, type); 7050 7051 if (!piv && (i != 0)) { 7052 pr_err("Missing node limit pivot at " PTR_FMT "[%u]", 7053 mas_mn(mas), i); 7054 MAS_WARN_ON(mas, 1); 7055 } 7056 7057 if (prev_piv > piv) { 7058 pr_err(PTR_FMT "[%u] piv %lu < prev_piv %lu\n", 7059 mas_mn(mas), i, piv, prev_piv); 7060 MAS_WARN_ON(mas, piv < prev_piv); 7061 } 7062 7063 if (piv < mas->min) { 7064 pr_err(PTR_FMT "[%u] %lu < %lu\n", mas_mn(mas), i, 7065 piv, mas->min); 7066 MAS_WARN_ON(mas, piv < mas->min); 7067 } 7068 if (piv > mas->max) { 7069 pr_err(PTR_FMT "[%u] %lu > %lu\n", mas_mn(mas), i, 7070 piv, mas->max); 7071 MAS_WARN_ON(mas, piv > mas->max); 7072 } 7073 prev_piv = piv; 7074 if (piv == mas->max) 7075 break; 7076 } 7077 7078 if (mas_data_end(mas) != i) { 7079 pr_err("node" PTR_FMT ": data_end %u != the last slot offset %u\n", 7080 mas_mn(mas), mas_data_end(mas), i); 7081 MT_BUG_ON(mas->tree, 1); 7082 } 7083 7084 for (i += 1; i < mt_slots[type]; i++) { 7085 void *entry = mas_slot(mas, slots, i); 7086 7087 if (entry && (i != mt_slots[type] - 1)) { 7088 pr_err(PTR_FMT "[%u] should not have entry " PTR_FMT "\n", 7089 mas_mn(mas), i, entry); 7090 MT_BUG_ON(mas->tree, entry != NULL); 7091 } 7092 7093 if (i < mt_pivots[type]) { 7094 unsigned long piv = pivots[i]; 7095 7096 if (!piv) 7097 continue; 7098 7099 pr_err(PTR_FMT "[%u] should not have piv %lu\n", 7100 mas_mn(mas), i, piv); 7101 MAS_WARN_ON(mas, i < mt_pivots[type] - 1); 7102 } 7103 } 7104 } 7105 7106 static void mt_validate_nulls(struct maple_tree *mt) 7107 { 7108 void *entry, *last = (void *)1; 7109 unsigned char offset = 0; 7110 void __rcu **slots; 7111 MA_STATE(mas, mt, 0, 0); 7112 7113 mas_start(&mas); 7114 if (mas_is_none(&mas) || (mas_is_ptr(&mas))) 7115 return; 7116 7117 while (!mte_is_leaf(mas.node)) 7118 mas_descend(&mas); 7119 7120 slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node)); 7121 do { 7122 entry = mas_slot(&mas, slots, offset); 7123 if (!last && !entry) { 7124 pr_err("Sequential nulls end at " PTR_FMT "[%u]\n", 7125 mas_mn(&mas), offset); 7126 } 7127 MT_BUG_ON(mt, !last && !entry); 7128 last = entry; 7129 if (offset == mas_data_end(&mas)) { 7130 mas_next_node(&mas, mas_mn(&mas), ULONG_MAX); 7131 if (mas_is_overflow(&mas)) 7132 return; 7133 offset = 0; 7134 slots = ma_slots(mte_to_node(mas.node), 7135 mte_node_type(mas.node)); 7136 } else { 7137 offset++; 7138 } 7139 7140 } while (!mas_is_overflow(&mas)); 7141 } 7142 7143 /* 7144 * validate a maple tree by checking: 7145 * 1. The limits (pivots are within mas->min to mas->max) 7146 * 2. The gap is correctly set in the parents 7147 */ 7148 void mt_validate(struct maple_tree *mt) 7149 __must_hold(mas->tree->ma_lock) 7150 { 7151 unsigned char end; 7152 7153 MA_STATE(mas, mt, 0, 0); 7154 mas_start(&mas); 7155 if (!mas_is_active(&mas)) 7156 return; 7157 7158 while (!mte_is_leaf(mas.node)) 7159 mas_descend(&mas); 7160 7161 while (!mas_is_overflow(&mas)) { 7162 MAS_WARN_ON(&mas, mte_dead_node(mas.node)); 7163 end = mas_data_end(&mas); 7164 if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) && 7165 (!mte_is_root(mas.node)))) { 7166 pr_err("Invalid size %u of " PTR_FMT "\n", 7167 end, mas_mn(&mas)); 7168 } 7169 7170 mas_validate_parent_slot(&mas); 7171 mas_validate_limits(&mas); 7172 mas_validate_child_slot(&mas); 7173 if (mt_is_alloc(mt)) 7174 mas_validate_gaps(&mas); 7175 mas_dfs_postorder(&mas, ULONG_MAX); 7176 } 7177 mt_validate_nulls(mt); 7178 } 7179 EXPORT_SYMBOL_GPL(mt_validate); 7180 7181 void mas_dump(const struct ma_state *mas) 7182 { 7183 pr_err("MAS: tree=" PTR_FMT " enode=" PTR_FMT " ", 7184 mas->tree, mas->node); 7185 switch (mas->status) { 7186 case ma_active: 7187 pr_err("(ma_active)"); 7188 break; 7189 case ma_none: 7190 pr_err("(ma_none)"); 7191 break; 7192 case ma_root: 7193 pr_err("(ma_root)"); 7194 break; 7195 case ma_start: 7196 pr_err("(ma_start) "); 7197 break; 7198 case ma_pause: 7199 pr_err("(ma_pause) "); 7200 break; 7201 case ma_overflow: 7202 pr_err("(ma_overflow) "); 7203 break; 7204 case ma_underflow: 7205 pr_err("(ma_underflow) "); 7206 break; 7207 case ma_error: 7208 pr_err("(ma_error) "); 7209 break; 7210 } 7211 7212 pr_err("Store Type: "); 7213 switch (mas->store_type) { 7214 case wr_invalid: 7215 pr_err("invalid store type\n"); 7216 break; 7217 case wr_new_root: 7218 pr_err("new_root\n"); 7219 break; 7220 case wr_store_root: 7221 pr_err("store_root\n"); 7222 break; 7223 case wr_exact_fit: 7224 pr_err("exact_fit\n"); 7225 break; 7226 case wr_split_store: 7227 pr_err("split_store\n"); 7228 break; 7229 case wr_slot_store: 7230 pr_err("slot_store\n"); 7231 break; 7232 case wr_append: 7233 pr_err("append\n"); 7234 break; 7235 case wr_node_store: 7236 pr_err("node_store\n"); 7237 break; 7238 case wr_spanning_store: 7239 pr_err("spanning_store\n"); 7240 break; 7241 case wr_rebalance: 7242 pr_err("rebalance\n"); 7243 break; 7244 } 7245 7246 pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end, 7247 mas->index, mas->last); 7248 pr_err(" min=%lx max=%lx sheaf=" PTR_FMT ", request %lu depth=%u, flags=%x\n", 7249 mas->min, mas->max, mas->sheaf, mas->node_request, mas->depth, 7250 mas->mas_flags); 7251 if (mas->index > mas->last) 7252 pr_err("Check index & last\n"); 7253 } 7254 EXPORT_SYMBOL_GPL(mas_dump); 7255 7256 void mas_wr_dump(const struct ma_wr_state *wr_mas) 7257 { 7258 pr_err("WR_MAS: node=" PTR_FMT " r_min=%lx r_max=%lx\n", 7259 wr_mas->node, wr_mas->r_min, wr_mas->r_max); 7260 pr_err(" type=%u off_end=%u, node_end=%u, end_piv=%lx\n", 7261 wr_mas->type, wr_mas->offset_end, wr_mas->mas->end, 7262 wr_mas->end_piv); 7263 } 7264 EXPORT_SYMBOL_GPL(mas_wr_dump); 7265 7266 #endif /* CONFIG_DEBUG_MAPLE_TREE */ 7267