1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Maple Tree implementation 4 * Copyright (c) 2018-2022 Oracle Corporation 5 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com> 6 * Matthew Wilcox <willy@infradead.org> 7 * Copyright (c) 2023 ByteDance 8 * Author: Peng Zhang <zhangpeng.00@bytedance.com> 9 */ 10 11 /* 12 * DOC: Interesting implementation details of the Maple Tree 13 * 14 * Each node type has a number of slots for entries and a number of slots for 15 * pivots. In the case of dense nodes, the pivots are implied by the position 16 * and are simply the slot index + the minimum of the node. 17 * 18 * In regular B-Tree terms, pivots are called keys. The term pivot is used to 19 * indicate that the tree is specifying ranges. Pivots may appear in the 20 * subtree with an entry attached to the value whereas keys are unique to a 21 * specific position of a B-tree. Pivot values are inclusive of the slot with 22 * the same index. 23 * 24 * 25 * The following illustrates the layout of a range64 nodes slots and pivots. 26 * 27 * 28 * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 | 29 * ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ 30 * │ │ │ │ │ │ │ │ └─ Implied maximum 31 * │ │ │ │ │ │ │ └─ Pivot 14 32 * │ │ │ │ │ │ └─ Pivot 13 33 * │ │ │ │ │ └─ Pivot 12 34 * │ │ │ │ └─ Pivot 11 35 * │ │ │ └─ Pivot 2 36 * │ │ └─ Pivot 1 37 * │ └─ Pivot 0 38 * └─ Implied minimum 39 * 40 * Slot contents: 41 * Internal (non-leaf) nodes contain pointers to other nodes. 42 * Leaf nodes contain entries. 43 * 44 * The location of interest is often referred to as an offset. All offsets have 45 * a slot, but the last offset has an implied pivot from the node above (or 46 * UINT_MAX for the root node. 47 * 48 * Ranges complicate certain write activities. When modifying any of 49 * the B-tree variants, it is known that one entry will either be added or 50 * deleted. When modifying the Maple Tree, one store operation may overwrite 51 * the entire data set, or one half of the tree, or the middle half of the tree. 52 * 53 */ 54 55 56 #include <linux/maple_tree.h> 57 #include <linux/xarray.h> 58 #include <linux/types.h> 59 #include <linux/export.h> 60 #include <linux/slab.h> 61 #include <linux/limits.h> 62 #include <asm/barrier.h> 63 64 #define CREATE_TRACE_POINTS 65 #include <trace/events/maple_tree.h> 66 67 /* 68 * Kernel pointer hashing renders much of the maple tree dump useless as tagged 69 * pointers get hashed to arbitrary values. 70 * 71 * If CONFIG_DEBUG_VM_MAPLE_TREE is set we are in a debug mode where it is 72 * permissible to bypass this. Otherwise remain cautious and retain the hashing. 73 * 74 * Userland doesn't know about %px so also use %p there. 75 */ 76 #if defined(__KERNEL__) && defined(CONFIG_DEBUG_VM_MAPLE_TREE) 77 #define PTR_FMT "%px" 78 #else 79 #define PTR_FMT "%p" 80 #endif 81 82 #define MA_ROOT_PARENT 1 83 84 /* 85 * Maple state flags 86 * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation 87 */ 88 #define MA_STATE_PREALLOC 1 89 90 #define ma_parent_ptr(x) ((struct maple_pnode *)(x)) 91 #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT) 92 #define ma_mnode_ptr(x) ((struct maple_node *)(x)) 93 #define ma_enode_ptr(x) ((struct maple_enode *)(x)) 94 static struct kmem_cache *maple_node_cache; 95 96 #ifdef CONFIG_DEBUG_MAPLE_TREE 97 static const unsigned long mt_max[] = { 98 [maple_dense] = MAPLE_NODE_SLOTS, 99 [maple_leaf_64] = ULONG_MAX, 100 [maple_range_64] = ULONG_MAX, 101 [maple_arange_64] = ULONG_MAX, 102 }; 103 #define mt_node_max(x) mt_max[mte_node_type(x)] 104 #endif 105 106 static const unsigned char mt_slots[] = { 107 [maple_dense] = MAPLE_NODE_SLOTS, 108 [maple_leaf_64] = MAPLE_RANGE64_SLOTS, 109 [maple_range_64] = MAPLE_RANGE64_SLOTS, 110 [maple_arange_64] = MAPLE_ARANGE64_SLOTS, 111 }; 112 #define mt_slot_count(x) mt_slots[mte_node_type(x)] 113 114 static const unsigned char mt_pivots[] = { 115 [maple_dense] = 0, 116 [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1, 117 [maple_range_64] = MAPLE_RANGE64_SLOTS - 1, 118 [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1, 119 }; 120 #define mt_pivot_count(x) mt_pivots[mte_node_type(x)] 121 122 static const unsigned char mt_min_slots[] = { 123 [maple_dense] = MAPLE_NODE_SLOTS / 2, 124 [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 125 [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 126 [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1, 127 }; 128 #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)] 129 130 #define MAPLE_BIG_NODE_SLOTS (MAPLE_RANGE64_SLOTS * 2 + 2) 131 #define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1) 132 133 struct maple_big_node { 134 unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1]; 135 union { 136 struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS]; 137 struct { 138 unsigned long padding[MAPLE_BIG_NODE_GAPS]; 139 unsigned long gap[MAPLE_BIG_NODE_GAPS]; 140 }; 141 }; 142 unsigned char b_end; 143 enum maple_type type; 144 }; 145 146 /* 147 * The maple_subtree_state is used to build a tree to replace a segment of an 148 * existing tree in a more atomic way. Any walkers of the older tree will hit a 149 * dead node and restart on updates. 150 */ 151 struct maple_subtree_state { 152 struct ma_state *orig_l; /* Original left side of subtree */ 153 struct ma_state *orig_r; /* Original right side of subtree */ 154 struct ma_state *l; /* New left side of subtree */ 155 struct ma_state *m; /* New middle of subtree (rare) */ 156 struct ma_state *r; /* New right side of subtree */ 157 struct ma_topiary *free; /* nodes to be freed */ 158 struct ma_topiary *destroy; /* Nodes to be destroyed (walked and freed) */ 159 struct maple_big_node *bn; 160 }; 161 162 #ifdef CONFIG_KASAN_STACK 163 /* Prevent mas_wr_bnode() from exceeding the stack frame limit */ 164 #define noinline_for_kasan noinline_for_stack 165 #else 166 #define noinline_for_kasan inline 167 #endif 168 169 /* Functions */ 170 static inline struct maple_node *mt_alloc_one(gfp_t gfp) 171 { 172 return kmem_cache_alloc(maple_node_cache, gfp); 173 } 174 175 static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes) 176 { 177 return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes); 178 } 179 180 static inline void mt_free_one(struct maple_node *node) 181 { 182 kmem_cache_free(maple_node_cache, node); 183 } 184 185 static inline void mt_free_bulk(size_t size, void __rcu **nodes) 186 { 187 kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); 188 } 189 190 static void mt_free_rcu(struct rcu_head *head) 191 { 192 struct maple_node *node = container_of(head, struct maple_node, rcu); 193 194 kmem_cache_free(maple_node_cache, node); 195 } 196 197 /* 198 * ma_free_rcu() - Use rcu callback to free a maple node 199 * @node: The node to free 200 * 201 * The maple tree uses the parent pointer to indicate this node is no longer in 202 * use and will be freed. 203 */ 204 static void ma_free_rcu(struct maple_node *node) 205 { 206 WARN_ON(node->parent != ma_parent_ptr(node)); 207 call_rcu(&node->rcu, mt_free_rcu); 208 } 209 210 static void mt_set_height(struct maple_tree *mt, unsigned char height) 211 { 212 unsigned int new_flags = mt->ma_flags; 213 214 new_flags &= ~MT_FLAGS_HEIGHT_MASK; 215 MT_BUG_ON(mt, height > MAPLE_HEIGHT_MAX); 216 new_flags |= height << MT_FLAGS_HEIGHT_OFFSET; 217 mt->ma_flags = new_flags; 218 } 219 220 static unsigned int mas_mt_height(struct ma_state *mas) 221 { 222 return mt_height(mas->tree); 223 } 224 225 static inline unsigned int mt_attr(struct maple_tree *mt) 226 { 227 return mt->ma_flags & ~MT_FLAGS_HEIGHT_MASK; 228 } 229 230 static __always_inline enum maple_type mte_node_type( 231 const struct maple_enode *entry) 232 { 233 return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) & 234 MAPLE_NODE_TYPE_MASK; 235 } 236 237 static __always_inline bool ma_is_dense(const enum maple_type type) 238 { 239 return type < maple_leaf_64; 240 } 241 242 static __always_inline bool ma_is_leaf(const enum maple_type type) 243 { 244 return type < maple_range_64; 245 } 246 247 static __always_inline bool mte_is_leaf(const struct maple_enode *entry) 248 { 249 return ma_is_leaf(mte_node_type(entry)); 250 } 251 252 /* 253 * We also reserve values with the bottom two bits set to '10' which are 254 * below 4096 255 */ 256 static __always_inline bool mt_is_reserved(const void *entry) 257 { 258 return ((unsigned long)entry < MAPLE_RESERVED_RANGE) && 259 xa_is_internal(entry); 260 } 261 262 static __always_inline void mas_set_err(struct ma_state *mas, long err) 263 { 264 mas->node = MA_ERROR(err); 265 mas->status = ma_error; 266 } 267 268 static __always_inline bool mas_is_ptr(const struct ma_state *mas) 269 { 270 return mas->status == ma_root; 271 } 272 273 static __always_inline bool mas_is_start(const struct ma_state *mas) 274 { 275 return mas->status == ma_start; 276 } 277 278 static __always_inline bool mas_is_none(const struct ma_state *mas) 279 { 280 return mas->status == ma_none; 281 } 282 283 static __always_inline bool mas_is_paused(const struct ma_state *mas) 284 { 285 return mas->status == ma_pause; 286 } 287 288 static __always_inline bool mas_is_overflow(struct ma_state *mas) 289 { 290 return mas->status == ma_overflow; 291 } 292 293 static inline bool mas_is_underflow(struct ma_state *mas) 294 { 295 return mas->status == ma_underflow; 296 } 297 298 static __always_inline struct maple_node *mte_to_node( 299 const struct maple_enode *entry) 300 { 301 return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK); 302 } 303 304 /* 305 * mte_to_mat() - Convert a maple encoded node to a maple topiary node. 306 * @entry: The maple encoded node 307 * 308 * Return: a maple topiary pointer 309 */ 310 static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry) 311 { 312 return (struct maple_topiary *) 313 ((unsigned long)entry & ~MAPLE_NODE_MASK); 314 } 315 316 /* 317 * mas_mn() - Get the maple state node. 318 * @mas: The maple state 319 * 320 * Return: the maple node (not encoded - bare pointer). 321 */ 322 static inline struct maple_node *mas_mn(const struct ma_state *mas) 323 { 324 return mte_to_node(mas->node); 325 } 326 327 /* 328 * mte_set_node_dead() - Set a maple encoded node as dead. 329 * @mn: The maple encoded node. 330 */ 331 static inline void mte_set_node_dead(struct maple_enode *mn) 332 { 333 mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn)); 334 smp_wmb(); /* Needed for RCU */ 335 } 336 337 /* Bit 1 indicates the root is a node */ 338 #define MAPLE_ROOT_NODE 0x02 339 /* maple_type stored bit 3-6 */ 340 #define MAPLE_ENODE_TYPE_SHIFT 0x03 341 /* Bit 2 means a NULL somewhere below */ 342 #define MAPLE_ENODE_NULL 0x04 343 344 static inline struct maple_enode *mt_mk_node(const struct maple_node *node, 345 enum maple_type type) 346 { 347 return (void *)((unsigned long)node | 348 (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL); 349 } 350 351 static inline void *mte_mk_root(const struct maple_enode *node) 352 { 353 return (void *)((unsigned long)node | MAPLE_ROOT_NODE); 354 } 355 356 static inline void *mte_safe_root(const struct maple_enode *node) 357 { 358 return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE); 359 } 360 361 static inline void __maybe_unused *mte_set_full(const struct maple_enode *node) 362 { 363 return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL); 364 } 365 366 static inline void __maybe_unused *mte_clear_full(const struct maple_enode *node) 367 { 368 return (void *)((unsigned long)node | MAPLE_ENODE_NULL); 369 } 370 371 static inline bool __maybe_unused mte_has_null(const struct maple_enode *node) 372 { 373 return (unsigned long)node & MAPLE_ENODE_NULL; 374 } 375 376 static __always_inline bool ma_is_root(struct maple_node *node) 377 { 378 return ((unsigned long)node->parent & MA_ROOT_PARENT); 379 } 380 381 static __always_inline bool mte_is_root(const struct maple_enode *node) 382 { 383 return ma_is_root(mte_to_node(node)); 384 } 385 386 static inline bool mas_is_root_limits(const struct ma_state *mas) 387 { 388 return !mas->min && mas->max == ULONG_MAX; 389 } 390 391 static __always_inline bool mt_is_alloc(struct maple_tree *mt) 392 { 393 return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE); 394 } 395 396 /* 397 * The Parent Pointer 398 * Excluding root, the parent pointer is 256B aligned like all other tree nodes. 399 * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16 400 * bit values need an extra bit to store the offset. This extra bit comes from 401 * a reuse of the last bit in the node type. This is possible by using bit 1 to 402 * indicate if bit 2 is part of the type or the slot. 403 * 404 * Note types: 405 * 0x??1 = Root 406 * 0x?00 = 16 bit nodes 407 * 0x010 = 32 bit nodes 408 * 0x110 = 64 bit nodes 409 * 410 * Slot size and alignment 411 * 0b??1 : Root 412 * 0b?00 : 16 bit values, type in 0-1, slot in 2-7 413 * 0b010 : 32 bit values, type in 0-2, slot in 3-7 414 * 0b110 : 64 bit values, type in 0-2, slot in 3-7 415 */ 416 417 #define MAPLE_PARENT_ROOT 0x01 418 419 #define MAPLE_PARENT_SLOT_SHIFT 0x03 420 #define MAPLE_PARENT_SLOT_MASK 0xF8 421 422 #define MAPLE_PARENT_16B_SLOT_SHIFT 0x02 423 #define MAPLE_PARENT_16B_SLOT_MASK 0xFC 424 425 #define MAPLE_PARENT_RANGE64 0x06 426 #define MAPLE_PARENT_RANGE32 0x04 427 #define MAPLE_PARENT_NOT_RANGE16 0x02 428 429 /* 430 * mte_parent_shift() - Get the parent shift for the slot storage. 431 * @parent: The parent pointer cast as an unsigned long 432 * Return: The shift into that pointer to the star to of the slot 433 */ 434 static inline unsigned long mte_parent_shift(unsigned long parent) 435 { 436 /* Note bit 1 == 0 means 16B */ 437 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 438 return MAPLE_PARENT_SLOT_SHIFT; 439 440 return MAPLE_PARENT_16B_SLOT_SHIFT; 441 } 442 443 /* 444 * mte_parent_slot_mask() - Get the slot mask for the parent. 445 * @parent: The parent pointer cast as an unsigned long. 446 * Return: The slot mask for that parent. 447 */ 448 static inline unsigned long mte_parent_slot_mask(unsigned long parent) 449 { 450 /* Note bit 1 == 0 means 16B */ 451 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 452 return MAPLE_PARENT_SLOT_MASK; 453 454 return MAPLE_PARENT_16B_SLOT_MASK; 455 } 456 457 /* 458 * mas_parent_type() - Return the maple_type of the parent from the stored 459 * parent type. 460 * @mas: The maple state 461 * @enode: The maple_enode to extract the parent's enum 462 * Return: The node->parent maple_type 463 */ 464 static inline 465 enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode) 466 { 467 unsigned long p_type; 468 469 p_type = (unsigned long)mte_to_node(enode)->parent; 470 if (WARN_ON(p_type & MAPLE_PARENT_ROOT)) 471 return 0; 472 473 p_type &= MAPLE_NODE_MASK; 474 p_type &= ~mte_parent_slot_mask(p_type); 475 switch (p_type) { 476 case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */ 477 if (mt_is_alloc(mas->tree)) 478 return maple_arange_64; 479 return maple_range_64; 480 } 481 482 return 0; 483 } 484 485 /* 486 * mas_set_parent() - Set the parent node and encode the slot 487 * @mas: The maple state 488 * @enode: The encoded maple node. 489 * @parent: The encoded maple node that is the parent of @enode. 490 * @slot: The slot that @enode resides in @parent. 491 * 492 * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the 493 * parent type. 494 */ 495 static inline 496 void mas_set_parent(struct ma_state *mas, struct maple_enode *enode, 497 const struct maple_enode *parent, unsigned char slot) 498 { 499 unsigned long val = (unsigned long)parent; 500 unsigned long shift; 501 unsigned long type; 502 enum maple_type p_type = mte_node_type(parent); 503 504 MAS_BUG_ON(mas, p_type == maple_dense); 505 MAS_BUG_ON(mas, p_type == maple_leaf_64); 506 507 switch (p_type) { 508 case maple_range_64: 509 case maple_arange_64: 510 shift = MAPLE_PARENT_SLOT_SHIFT; 511 type = MAPLE_PARENT_RANGE64; 512 break; 513 default: 514 case maple_dense: 515 case maple_leaf_64: 516 shift = type = 0; 517 break; 518 } 519 520 val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */ 521 val |= (slot << shift) | type; 522 mte_to_node(enode)->parent = ma_parent_ptr(val); 523 } 524 525 /* 526 * mte_parent_slot() - get the parent slot of @enode. 527 * @enode: The encoded maple node. 528 * 529 * Return: The slot in the parent node where @enode resides. 530 */ 531 static __always_inline 532 unsigned int mte_parent_slot(const struct maple_enode *enode) 533 { 534 unsigned long val = (unsigned long)mte_to_node(enode)->parent; 535 536 if (unlikely(val & MA_ROOT_PARENT)) 537 return 0; 538 539 /* 540 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost 541 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT 542 */ 543 return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val); 544 } 545 546 /* 547 * mte_parent() - Get the parent of @node. 548 * @enode: The encoded maple node. 549 * 550 * Return: The parent maple node. 551 */ 552 static __always_inline 553 struct maple_node *mte_parent(const struct maple_enode *enode) 554 { 555 return (void *)((unsigned long) 556 (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK); 557 } 558 559 /* 560 * ma_dead_node() - check if the @enode is dead. 561 * @enode: The encoded maple node 562 * 563 * Return: true if dead, false otherwise. 564 */ 565 static __always_inline bool ma_dead_node(const struct maple_node *node) 566 { 567 struct maple_node *parent; 568 569 /* Do not reorder reads from the node prior to the parent check */ 570 smp_rmb(); 571 parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); 572 return (parent == node); 573 } 574 575 /* 576 * mte_dead_node() - check if the @enode is dead. 577 * @enode: The encoded maple node 578 * 579 * Return: true if dead, false otherwise. 580 */ 581 static __always_inline bool mte_dead_node(const struct maple_enode *enode) 582 { 583 struct maple_node *node; 584 585 node = mte_to_node(enode); 586 return ma_dead_node(node); 587 } 588 589 /* 590 * mas_allocated() - Get the number of nodes allocated in a maple state. 591 * @mas: The maple state 592 * 593 * The ma_state alloc member is overloaded to hold a pointer to the first 594 * allocated node or to the number of requested nodes to allocate. If bit 0 is 595 * set, then the alloc contains the number of requested nodes. If there is an 596 * allocated node, then the total allocated nodes is in that node. 597 * 598 * Return: The total number of nodes allocated 599 */ 600 static inline unsigned long mas_allocated(const struct ma_state *mas) 601 { 602 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) 603 return 0; 604 605 return mas->alloc->total; 606 } 607 608 /* 609 * mas_set_alloc_req() - Set the requested number of allocations. 610 * @mas: the maple state 611 * @count: the number of allocations. 612 * 613 * The requested number of allocations is either in the first allocated node, 614 * located in @mas->alloc->request_count, or directly in @mas->alloc if there is 615 * no allocated node. Set the request either in the node or do the necessary 616 * encoding to store in @mas->alloc directly. 617 */ 618 static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count) 619 { 620 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) { 621 if (!count) 622 mas->alloc = NULL; 623 else 624 mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U); 625 return; 626 } 627 628 mas->alloc->request_count = count; 629 } 630 631 /* 632 * mas_alloc_req() - get the requested number of allocations. 633 * @mas: The maple state 634 * 635 * The alloc count is either stored directly in @mas, or in 636 * @mas->alloc->request_count if there is at least one node allocated. Decode 637 * the request count if it's stored directly in @mas->alloc. 638 * 639 * Return: The allocation request count. 640 */ 641 static inline unsigned int mas_alloc_req(const struct ma_state *mas) 642 { 643 if ((unsigned long)mas->alloc & 0x1) 644 return (unsigned long)(mas->alloc) >> 1; 645 else if (mas->alloc) 646 return mas->alloc->request_count; 647 return 0; 648 } 649 650 /* 651 * ma_pivots() - Get a pointer to the maple node pivots. 652 * @node: the maple node 653 * @type: the node type 654 * 655 * In the event of a dead node, this array may be %NULL 656 * 657 * Return: A pointer to the maple node pivots 658 */ 659 static inline unsigned long *ma_pivots(struct maple_node *node, 660 enum maple_type type) 661 { 662 switch (type) { 663 case maple_arange_64: 664 return node->ma64.pivot; 665 case maple_range_64: 666 case maple_leaf_64: 667 return node->mr64.pivot; 668 case maple_dense: 669 return NULL; 670 } 671 return NULL; 672 } 673 674 /* 675 * ma_gaps() - Get a pointer to the maple node gaps. 676 * @node: the maple node 677 * @type: the node type 678 * 679 * Return: A pointer to the maple node gaps 680 */ 681 static inline unsigned long *ma_gaps(struct maple_node *node, 682 enum maple_type type) 683 { 684 switch (type) { 685 case maple_arange_64: 686 return node->ma64.gap; 687 case maple_range_64: 688 case maple_leaf_64: 689 case maple_dense: 690 return NULL; 691 } 692 return NULL; 693 } 694 695 /* 696 * mas_safe_pivot() - get the pivot at @piv or mas->max. 697 * @mas: The maple state 698 * @pivots: The pointer to the maple node pivots 699 * @piv: The pivot to fetch 700 * @type: The maple node type 701 * 702 * Return: The pivot at @piv within the limit of the @pivots array, @mas->max 703 * otherwise. 704 */ 705 static __always_inline unsigned long 706 mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots, 707 unsigned char piv, enum maple_type type) 708 { 709 if (piv >= mt_pivots[type]) 710 return mas->max; 711 712 return pivots[piv]; 713 } 714 715 /* 716 * mas_safe_min() - Return the minimum for a given offset. 717 * @mas: The maple state 718 * @pivots: The pointer to the maple node pivots 719 * @offset: The offset into the pivot array 720 * 721 * Return: The minimum range value that is contained in @offset. 722 */ 723 static inline unsigned long 724 mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset) 725 { 726 if (likely(offset)) 727 return pivots[offset - 1] + 1; 728 729 return mas->min; 730 } 731 732 /* 733 * mte_set_pivot() - Set a pivot to a value in an encoded maple node. 734 * @mn: The encoded maple node 735 * @piv: The pivot offset 736 * @val: The value of the pivot 737 */ 738 static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv, 739 unsigned long val) 740 { 741 struct maple_node *node = mte_to_node(mn); 742 enum maple_type type = mte_node_type(mn); 743 744 BUG_ON(piv >= mt_pivots[type]); 745 switch (type) { 746 case maple_range_64: 747 case maple_leaf_64: 748 node->mr64.pivot[piv] = val; 749 break; 750 case maple_arange_64: 751 node->ma64.pivot[piv] = val; 752 break; 753 case maple_dense: 754 break; 755 } 756 757 } 758 759 /* 760 * ma_slots() - Get a pointer to the maple node slots. 761 * @mn: The maple node 762 * @mt: The maple node type 763 * 764 * Return: A pointer to the maple node slots 765 */ 766 static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt) 767 { 768 switch (mt) { 769 case maple_arange_64: 770 return mn->ma64.slot; 771 case maple_range_64: 772 case maple_leaf_64: 773 return mn->mr64.slot; 774 case maple_dense: 775 return mn->slot; 776 } 777 778 return NULL; 779 } 780 781 static inline bool mt_write_locked(const struct maple_tree *mt) 782 { 783 return mt_external_lock(mt) ? mt_write_lock_is_held(mt) : 784 lockdep_is_held(&mt->ma_lock); 785 } 786 787 static __always_inline bool mt_locked(const struct maple_tree *mt) 788 { 789 return mt_external_lock(mt) ? mt_lock_is_held(mt) : 790 lockdep_is_held(&mt->ma_lock); 791 } 792 793 static __always_inline void *mt_slot(const struct maple_tree *mt, 794 void __rcu **slots, unsigned char offset) 795 { 796 return rcu_dereference_check(slots[offset], mt_locked(mt)); 797 } 798 799 static __always_inline void *mt_slot_locked(struct maple_tree *mt, 800 void __rcu **slots, unsigned char offset) 801 { 802 return rcu_dereference_protected(slots[offset], mt_write_locked(mt)); 803 } 804 /* 805 * mas_slot_locked() - Get the slot value when holding the maple tree lock. 806 * @mas: The maple state 807 * @slots: The pointer to the slots 808 * @offset: The offset into the slots array to fetch 809 * 810 * Return: The entry stored in @slots at the @offset. 811 */ 812 static __always_inline void *mas_slot_locked(struct ma_state *mas, 813 void __rcu **slots, unsigned char offset) 814 { 815 return mt_slot_locked(mas->tree, slots, offset); 816 } 817 818 /* 819 * mas_slot() - Get the slot value when not holding the maple tree lock. 820 * @mas: The maple state 821 * @slots: The pointer to the slots 822 * @offset: The offset into the slots array to fetch 823 * 824 * Return: The entry stored in @slots at the @offset 825 */ 826 static __always_inline void *mas_slot(struct ma_state *mas, void __rcu **slots, 827 unsigned char offset) 828 { 829 return mt_slot(mas->tree, slots, offset); 830 } 831 832 /* 833 * mas_root() - Get the maple tree root. 834 * @mas: The maple state. 835 * 836 * Return: The pointer to the root of the tree 837 */ 838 static __always_inline void *mas_root(struct ma_state *mas) 839 { 840 return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree)); 841 } 842 843 static inline void *mt_root_locked(struct maple_tree *mt) 844 { 845 return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt)); 846 } 847 848 /* 849 * mas_root_locked() - Get the maple tree root when holding the maple tree lock. 850 * @mas: The maple state. 851 * 852 * Return: The pointer to the root of the tree 853 */ 854 static inline void *mas_root_locked(struct ma_state *mas) 855 { 856 return mt_root_locked(mas->tree); 857 } 858 859 static inline struct maple_metadata *ma_meta(struct maple_node *mn, 860 enum maple_type mt) 861 { 862 switch (mt) { 863 case maple_arange_64: 864 return &mn->ma64.meta; 865 default: 866 return &mn->mr64.meta; 867 } 868 } 869 870 /* 871 * ma_set_meta() - Set the metadata information of a node. 872 * @mn: The maple node 873 * @mt: The maple node type 874 * @offset: The offset of the highest sub-gap in this node. 875 * @end: The end of the data in this node. 876 */ 877 static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, 878 unsigned char offset, unsigned char end) 879 { 880 struct maple_metadata *meta = ma_meta(mn, mt); 881 882 meta->gap = offset; 883 meta->end = end; 884 } 885 886 /* 887 * mt_clear_meta() - clear the metadata information of a node, if it exists 888 * @mt: The maple tree 889 * @mn: The maple node 890 * @type: The maple node type 891 */ 892 static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn, 893 enum maple_type type) 894 { 895 struct maple_metadata *meta; 896 unsigned long *pivots; 897 void __rcu **slots; 898 void *next; 899 900 switch (type) { 901 case maple_range_64: 902 pivots = mn->mr64.pivot; 903 if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { 904 slots = mn->mr64.slot; 905 next = mt_slot_locked(mt, slots, 906 MAPLE_RANGE64_SLOTS - 1); 907 if (unlikely((mte_to_node(next) && 908 mte_node_type(next)))) 909 return; /* no metadata, could be node */ 910 } 911 fallthrough; 912 case maple_arange_64: 913 meta = ma_meta(mn, type); 914 break; 915 default: 916 return; 917 } 918 919 meta->gap = 0; 920 meta->end = 0; 921 } 922 923 /* 924 * ma_meta_end() - Get the data end of a node from the metadata 925 * @mn: The maple node 926 * @mt: The maple node type 927 */ 928 static inline unsigned char ma_meta_end(struct maple_node *mn, 929 enum maple_type mt) 930 { 931 struct maple_metadata *meta = ma_meta(mn, mt); 932 933 return meta->end; 934 } 935 936 /* 937 * ma_meta_gap() - Get the largest gap location of a node from the metadata 938 * @mn: The maple node 939 */ 940 static inline unsigned char ma_meta_gap(struct maple_node *mn) 941 { 942 return mn->ma64.meta.gap; 943 } 944 945 /* 946 * ma_set_meta_gap() - Set the largest gap location in a nodes metadata 947 * @mn: The maple node 948 * @mt: The maple node type 949 * @offset: The location of the largest gap. 950 */ 951 static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt, 952 unsigned char offset) 953 { 954 955 struct maple_metadata *meta = ma_meta(mn, mt); 956 957 meta->gap = offset; 958 } 959 960 /* 961 * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes. 962 * @mat: the ma_topiary, a linked list of dead nodes. 963 * @dead_enode: the node to be marked as dead and added to the tail of the list 964 * 965 * Add the @dead_enode to the linked list in @mat. 966 */ 967 static inline void mat_add(struct ma_topiary *mat, 968 struct maple_enode *dead_enode) 969 { 970 mte_set_node_dead(dead_enode); 971 mte_to_mat(dead_enode)->next = NULL; 972 if (!mat->tail) { 973 mat->tail = mat->head = dead_enode; 974 return; 975 } 976 977 mte_to_mat(mat->tail)->next = dead_enode; 978 mat->tail = dead_enode; 979 } 980 981 static void mt_free_walk(struct rcu_head *head); 982 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 983 bool free); 984 /* 985 * mas_mat_destroy() - Free all nodes and subtrees in a dead list. 986 * @mas: the maple state 987 * @mat: the ma_topiary linked list of dead nodes to free. 988 * 989 * Destroy walk a dead list. 990 */ 991 static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat) 992 { 993 struct maple_enode *next; 994 struct maple_node *node; 995 bool in_rcu = mt_in_rcu(mas->tree); 996 997 while (mat->head) { 998 next = mte_to_mat(mat->head)->next; 999 node = mte_to_node(mat->head); 1000 mt_destroy_walk(mat->head, mas->tree, !in_rcu); 1001 if (in_rcu) 1002 call_rcu(&node->rcu, mt_free_walk); 1003 mat->head = next; 1004 } 1005 } 1006 /* 1007 * mas_descend() - Descend into the slot stored in the ma_state. 1008 * @mas: the maple state. 1009 * 1010 * Note: Not RCU safe, only use in write side or debug code. 1011 */ 1012 static inline void mas_descend(struct ma_state *mas) 1013 { 1014 enum maple_type type; 1015 unsigned long *pivots; 1016 struct maple_node *node; 1017 void __rcu **slots; 1018 1019 node = mas_mn(mas); 1020 type = mte_node_type(mas->node); 1021 pivots = ma_pivots(node, type); 1022 slots = ma_slots(node, type); 1023 1024 if (mas->offset) 1025 mas->min = pivots[mas->offset - 1] + 1; 1026 mas->max = mas_safe_pivot(mas, pivots, mas->offset, type); 1027 mas->node = mas_slot(mas, slots, mas->offset); 1028 } 1029 1030 /* 1031 * mas_ascend() - Walk up a level of the tree. 1032 * @mas: The maple state 1033 * 1034 * Sets the @mas->max and @mas->min for the parent node of mas->node. This 1035 * may cause several levels of walking up to find the correct min and max. 1036 * May find a dead node which will cause a premature return. 1037 * Return: 1 on dead node, 0 otherwise 1038 */ 1039 static int mas_ascend(struct ma_state *mas) 1040 { 1041 struct maple_enode *p_enode; /* parent enode. */ 1042 struct maple_enode *a_enode; /* ancestor enode. */ 1043 struct maple_node *a_node; /* ancestor node. */ 1044 struct maple_node *p_node; /* parent node. */ 1045 unsigned char a_slot; 1046 enum maple_type a_type; 1047 unsigned long min, max; 1048 unsigned long *pivots; 1049 bool set_max = false, set_min = false; 1050 1051 a_node = mas_mn(mas); 1052 if (ma_is_root(a_node)) { 1053 mas->offset = 0; 1054 return 0; 1055 } 1056 1057 p_node = mte_parent(mas->node); 1058 if (unlikely(a_node == p_node)) 1059 return 1; 1060 1061 a_type = mas_parent_type(mas, mas->node); 1062 mas->offset = mte_parent_slot(mas->node); 1063 a_enode = mt_mk_node(p_node, a_type); 1064 1065 /* Check to make sure all parent information is still accurate */ 1066 if (p_node != mte_parent(mas->node)) 1067 return 1; 1068 1069 mas->node = a_enode; 1070 1071 if (mte_is_root(a_enode)) { 1072 mas->max = ULONG_MAX; 1073 mas->min = 0; 1074 return 0; 1075 } 1076 1077 min = 0; 1078 max = ULONG_MAX; 1079 1080 /* 1081 * !mas->offset implies that parent node min == mas->min. 1082 * mas->offset > 0 implies that we need to walk up to find the 1083 * implied pivot min. 1084 */ 1085 if (!mas->offset) { 1086 min = mas->min; 1087 set_min = true; 1088 } 1089 1090 if (mas->max == ULONG_MAX) 1091 set_max = true; 1092 1093 do { 1094 p_enode = a_enode; 1095 a_type = mas_parent_type(mas, p_enode); 1096 a_node = mte_parent(p_enode); 1097 a_slot = mte_parent_slot(p_enode); 1098 a_enode = mt_mk_node(a_node, a_type); 1099 pivots = ma_pivots(a_node, a_type); 1100 1101 if (unlikely(ma_dead_node(a_node))) 1102 return 1; 1103 1104 if (!set_min && a_slot) { 1105 set_min = true; 1106 min = pivots[a_slot - 1] + 1; 1107 } 1108 1109 if (!set_max && a_slot < mt_pivots[a_type]) { 1110 set_max = true; 1111 max = pivots[a_slot]; 1112 } 1113 1114 if (unlikely(ma_dead_node(a_node))) 1115 return 1; 1116 1117 if (unlikely(ma_is_root(a_node))) 1118 break; 1119 1120 } while (!set_min || !set_max); 1121 1122 mas->max = max; 1123 mas->min = min; 1124 return 0; 1125 } 1126 1127 /* 1128 * mas_pop_node() - Get a previously allocated maple node from the maple state. 1129 * @mas: The maple state 1130 * 1131 * Return: A pointer to a maple node. 1132 */ 1133 static inline struct maple_node *mas_pop_node(struct ma_state *mas) 1134 { 1135 struct maple_alloc *ret, *node = mas->alloc; 1136 unsigned long total = mas_allocated(mas); 1137 unsigned int req = mas_alloc_req(mas); 1138 1139 /* nothing or a request pending. */ 1140 if (WARN_ON(!total)) 1141 return NULL; 1142 1143 if (total == 1) { 1144 /* single allocation in this ma_state */ 1145 mas->alloc = NULL; 1146 ret = node; 1147 goto single_node; 1148 } 1149 1150 if (node->node_count == 1) { 1151 /* Single allocation in this node. */ 1152 mas->alloc = node->slot[0]; 1153 mas->alloc->total = node->total - 1; 1154 ret = node; 1155 goto new_head; 1156 } 1157 node->total--; 1158 ret = node->slot[--node->node_count]; 1159 node->slot[node->node_count] = NULL; 1160 1161 single_node: 1162 new_head: 1163 if (req) { 1164 req++; 1165 mas_set_alloc_req(mas, req); 1166 } 1167 1168 memset(ret, 0, sizeof(*ret)); 1169 return (struct maple_node *)ret; 1170 } 1171 1172 /* 1173 * mas_push_node() - Push a node back on the maple state allocation. 1174 * @mas: The maple state 1175 * @used: The used maple node 1176 * 1177 * Stores the maple node back into @mas->alloc for reuse. Updates allocated and 1178 * requested node count as necessary. 1179 */ 1180 static inline void mas_push_node(struct ma_state *mas, struct maple_node *used) 1181 { 1182 struct maple_alloc *reuse = (struct maple_alloc *)used; 1183 struct maple_alloc *head = mas->alloc; 1184 unsigned long count; 1185 unsigned int requested = mas_alloc_req(mas); 1186 1187 count = mas_allocated(mas); 1188 1189 reuse->request_count = 0; 1190 reuse->node_count = 0; 1191 if (count) { 1192 if (head->node_count < MAPLE_ALLOC_SLOTS) { 1193 head->slot[head->node_count++] = reuse; 1194 head->total++; 1195 goto done; 1196 } 1197 reuse->slot[0] = head; 1198 reuse->node_count = 1; 1199 } 1200 1201 reuse->total = count + 1; 1202 mas->alloc = reuse; 1203 done: 1204 if (requested > 1) 1205 mas_set_alloc_req(mas, requested - 1); 1206 } 1207 1208 /* 1209 * mas_alloc_nodes() - Allocate nodes into a maple state 1210 * @mas: The maple state 1211 * @gfp: The GFP Flags 1212 */ 1213 static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) 1214 { 1215 struct maple_alloc *node; 1216 unsigned long allocated = mas_allocated(mas); 1217 unsigned int requested = mas_alloc_req(mas); 1218 unsigned int count; 1219 void **slots = NULL; 1220 unsigned int max_req = 0; 1221 1222 if (!requested) 1223 return; 1224 1225 mas_set_alloc_req(mas, 0); 1226 if (mas->mas_flags & MA_STATE_PREALLOC) { 1227 if (allocated) 1228 return; 1229 WARN_ON(!allocated); 1230 } 1231 1232 if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) { 1233 node = (struct maple_alloc *)mt_alloc_one(gfp); 1234 if (!node) 1235 goto nomem_one; 1236 1237 if (allocated) { 1238 node->slot[0] = mas->alloc; 1239 node->node_count = 1; 1240 } else { 1241 node->node_count = 0; 1242 } 1243 1244 mas->alloc = node; 1245 node->total = ++allocated; 1246 node->request_count = 0; 1247 requested--; 1248 } 1249 1250 node = mas->alloc; 1251 while (requested) { 1252 max_req = MAPLE_ALLOC_SLOTS - node->node_count; 1253 slots = (void **)&node->slot[node->node_count]; 1254 max_req = min(requested, max_req); 1255 count = mt_alloc_bulk(gfp, max_req, slots); 1256 if (!count) 1257 goto nomem_bulk; 1258 1259 if (node->node_count == 0) { 1260 node->slot[0]->node_count = 0; 1261 node->slot[0]->request_count = 0; 1262 } 1263 1264 node->node_count += count; 1265 allocated += count; 1266 /* find a non-full node*/ 1267 do { 1268 node = node->slot[0]; 1269 } while (unlikely(node->node_count == MAPLE_ALLOC_SLOTS)); 1270 requested -= count; 1271 } 1272 mas->alloc->total = allocated; 1273 return; 1274 1275 nomem_bulk: 1276 /* Clean up potential freed allocations on bulk failure */ 1277 memset(slots, 0, max_req * sizeof(unsigned long)); 1278 mas->alloc->total = allocated; 1279 nomem_one: 1280 mas_set_alloc_req(mas, requested); 1281 mas_set_err(mas, -ENOMEM); 1282 } 1283 1284 /* 1285 * mas_free() - Free an encoded maple node 1286 * @mas: The maple state 1287 * @used: The encoded maple node to free. 1288 * 1289 * Uses rcu free if necessary, pushes @used back on the maple state allocations 1290 * otherwise. 1291 */ 1292 static inline void mas_free(struct ma_state *mas, struct maple_enode *used) 1293 { 1294 struct maple_node *tmp = mte_to_node(used); 1295 1296 if (mt_in_rcu(mas->tree)) 1297 ma_free_rcu(tmp); 1298 else 1299 mas_push_node(mas, tmp); 1300 } 1301 1302 /* 1303 * mas_node_count_gfp() - Check if enough nodes are allocated and request more 1304 * if there is not enough nodes. 1305 * @mas: The maple state 1306 * @count: The number of nodes needed 1307 * @gfp: the gfp flags 1308 */ 1309 static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp) 1310 { 1311 unsigned long allocated = mas_allocated(mas); 1312 1313 if (allocated < count) { 1314 mas_set_alloc_req(mas, count - allocated); 1315 mas_alloc_nodes(mas, gfp); 1316 } 1317 } 1318 1319 /* 1320 * mas_node_count() - Check if enough nodes are allocated and request more if 1321 * there is not enough nodes. 1322 * @mas: The maple state 1323 * @count: The number of nodes needed 1324 * 1325 * Note: Uses GFP_NOWAIT for gfp flags. 1326 */ 1327 static void mas_node_count(struct ma_state *mas, int count) 1328 { 1329 return mas_node_count_gfp(mas, count, GFP_NOWAIT); 1330 } 1331 1332 /* 1333 * mas_start() - Sets up maple state for operations. 1334 * @mas: The maple state. 1335 * 1336 * If mas->status == ma_start, then set the min, max and depth to 1337 * defaults. 1338 * 1339 * Return: 1340 * - If mas->node is an error or not mas_start, return NULL. 1341 * - If it's an empty tree: NULL & mas->status == ma_none 1342 * - If it's a single entry: The entry & mas->status == ma_root 1343 * - If it's a tree: NULL & mas->status == ma_active 1344 */ 1345 static inline struct maple_enode *mas_start(struct ma_state *mas) 1346 { 1347 if (likely(mas_is_start(mas))) { 1348 struct maple_enode *root; 1349 1350 mas->min = 0; 1351 mas->max = ULONG_MAX; 1352 1353 retry: 1354 mas->depth = 0; 1355 root = mas_root(mas); 1356 /* Tree with nodes */ 1357 if (likely(xa_is_node(root))) { 1358 mas->depth = 0; 1359 mas->status = ma_active; 1360 mas->node = mte_safe_root(root); 1361 mas->offset = 0; 1362 if (mte_dead_node(mas->node)) 1363 goto retry; 1364 1365 return NULL; 1366 } 1367 1368 mas->node = NULL; 1369 /* empty tree */ 1370 if (unlikely(!root)) { 1371 mas->status = ma_none; 1372 mas->offset = MAPLE_NODE_SLOTS; 1373 return NULL; 1374 } 1375 1376 /* Single entry tree */ 1377 mas->status = ma_root; 1378 mas->offset = MAPLE_NODE_SLOTS; 1379 1380 /* Single entry tree. */ 1381 if (mas->index > 0) 1382 return NULL; 1383 1384 return root; 1385 } 1386 1387 return NULL; 1388 } 1389 1390 /* 1391 * ma_data_end() - Find the end of the data in a node. 1392 * @node: The maple node 1393 * @type: The maple node type 1394 * @pivots: The array of pivots in the node 1395 * @max: The maximum value in the node 1396 * 1397 * Uses metadata to find the end of the data when possible. 1398 * Return: The zero indexed last slot with data (may be null). 1399 */ 1400 static __always_inline unsigned char ma_data_end(struct maple_node *node, 1401 enum maple_type type, unsigned long *pivots, unsigned long max) 1402 { 1403 unsigned char offset; 1404 1405 if (!pivots) 1406 return 0; 1407 1408 if (type == maple_arange_64) 1409 return ma_meta_end(node, type); 1410 1411 offset = mt_pivots[type] - 1; 1412 if (likely(!pivots[offset])) 1413 return ma_meta_end(node, type); 1414 1415 if (likely(pivots[offset] == max)) 1416 return offset; 1417 1418 return mt_pivots[type]; 1419 } 1420 1421 /* 1422 * mas_data_end() - Find the end of the data (slot). 1423 * @mas: the maple state 1424 * 1425 * This method is optimized to check the metadata of a node if the node type 1426 * supports data end metadata. 1427 * 1428 * Return: The zero indexed last slot with data (may be null). 1429 */ 1430 static inline unsigned char mas_data_end(struct ma_state *mas) 1431 { 1432 enum maple_type type; 1433 struct maple_node *node; 1434 unsigned char offset; 1435 unsigned long *pivots; 1436 1437 type = mte_node_type(mas->node); 1438 node = mas_mn(mas); 1439 if (type == maple_arange_64) 1440 return ma_meta_end(node, type); 1441 1442 pivots = ma_pivots(node, type); 1443 if (unlikely(ma_dead_node(node))) 1444 return 0; 1445 1446 offset = mt_pivots[type] - 1; 1447 if (likely(!pivots[offset])) 1448 return ma_meta_end(node, type); 1449 1450 if (likely(pivots[offset] == mas->max)) 1451 return offset; 1452 1453 return mt_pivots[type]; 1454 } 1455 1456 /* 1457 * mas_leaf_max_gap() - Returns the largest gap in a leaf node 1458 * @mas: the maple state 1459 * 1460 * Return: The maximum gap in the leaf. 1461 */ 1462 static unsigned long mas_leaf_max_gap(struct ma_state *mas) 1463 { 1464 enum maple_type mt; 1465 unsigned long pstart, gap, max_gap; 1466 struct maple_node *mn; 1467 unsigned long *pivots; 1468 void __rcu **slots; 1469 unsigned char i; 1470 unsigned char max_piv; 1471 1472 mt = mte_node_type(mas->node); 1473 mn = mas_mn(mas); 1474 slots = ma_slots(mn, mt); 1475 max_gap = 0; 1476 if (unlikely(ma_is_dense(mt))) { 1477 gap = 0; 1478 for (i = 0; i < mt_slots[mt]; i++) { 1479 if (slots[i]) { 1480 if (gap > max_gap) 1481 max_gap = gap; 1482 gap = 0; 1483 } else { 1484 gap++; 1485 } 1486 } 1487 if (gap > max_gap) 1488 max_gap = gap; 1489 return max_gap; 1490 } 1491 1492 /* 1493 * Check the first implied pivot optimizes the loop below and slot 1 may 1494 * be skipped if there is a gap in slot 0. 1495 */ 1496 pivots = ma_pivots(mn, mt); 1497 if (likely(!slots[0])) { 1498 max_gap = pivots[0] - mas->min + 1; 1499 i = 2; 1500 } else { 1501 i = 1; 1502 } 1503 1504 /* reduce max_piv as the special case is checked before the loop */ 1505 max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1; 1506 /* 1507 * Check end implied pivot which can only be a gap on the right most 1508 * node. 1509 */ 1510 if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) { 1511 gap = ULONG_MAX - pivots[max_piv]; 1512 if (gap > max_gap) 1513 max_gap = gap; 1514 1515 if (max_gap > pivots[max_piv] - mas->min) 1516 return max_gap; 1517 } 1518 1519 for (; i <= max_piv; i++) { 1520 /* data == no gap. */ 1521 if (likely(slots[i])) 1522 continue; 1523 1524 pstart = pivots[i - 1]; 1525 gap = pivots[i] - pstart; 1526 if (gap > max_gap) 1527 max_gap = gap; 1528 1529 /* There cannot be two gaps in a row. */ 1530 i++; 1531 } 1532 return max_gap; 1533 } 1534 1535 /* 1536 * ma_max_gap() - Get the maximum gap in a maple node (non-leaf) 1537 * @node: The maple node 1538 * @gaps: The pointer to the gaps 1539 * @mt: The maple node type 1540 * @off: Pointer to store the offset location of the gap. 1541 * 1542 * Uses the metadata data end to scan backwards across set gaps. 1543 * 1544 * Return: The maximum gap value 1545 */ 1546 static inline unsigned long 1547 ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt, 1548 unsigned char *off) 1549 { 1550 unsigned char offset, i; 1551 unsigned long max_gap = 0; 1552 1553 i = offset = ma_meta_end(node, mt); 1554 do { 1555 if (gaps[i] > max_gap) { 1556 max_gap = gaps[i]; 1557 offset = i; 1558 } 1559 } while (i--); 1560 1561 *off = offset; 1562 return max_gap; 1563 } 1564 1565 /* 1566 * mas_max_gap() - find the largest gap in a non-leaf node and set the slot. 1567 * @mas: The maple state. 1568 * 1569 * Return: The gap value. 1570 */ 1571 static inline unsigned long mas_max_gap(struct ma_state *mas) 1572 { 1573 unsigned long *gaps; 1574 unsigned char offset; 1575 enum maple_type mt; 1576 struct maple_node *node; 1577 1578 mt = mte_node_type(mas->node); 1579 if (ma_is_leaf(mt)) 1580 return mas_leaf_max_gap(mas); 1581 1582 node = mas_mn(mas); 1583 MAS_BUG_ON(mas, mt != maple_arange_64); 1584 offset = ma_meta_gap(node); 1585 gaps = ma_gaps(node, mt); 1586 return gaps[offset]; 1587 } 1588 1589 /* 1590 * mas_parent_gap() - Set the parent gap and any gaps above, as needed 1591 * @mas: The maple state 1592 * @offset: The gap offset in the parent to set 1593 * @new: The new gap value. 1594 * 1595 * Set the parent gap then continue to set the gap upwards, using the metadata 1596 * of the parent to see if it is necessary to check the node above. 1597 */ 1598 static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset, 1599 unsigned long new) 1600 { 1601 unsigned long meta_gap = 0; 1602 struct maple_node *pnode; 1603 struct maple_enode *penode; 1604 unsigned long *pgaps; 1605 unsigned char meta_offset; 1606 enum maple_type pmt; 1607 1608 pnode = mte_parent(mas->node); 1609 pmt = mas_parent_type(mas, mas->node); 1610 penode = mt_mk_node(pnode, pmt); 1611 pgaps = ma_gaps(pnode, pmt); 1612 1613 ascend: 1614 MAS_BUG_ON(mas, pmt != maple_arange_64); 1615 meta_offset = ma_meta_gap(pnode); 1616 meta_gap = pgaps[meta_offset]; 1617 1618 pgaps[offset] = new; 1619 1620 if (meta_gap == new) 1621 return; 1622 1623 if (offset != meta_offset) { 1624 if (meta_gap > new) 1625 return; 1626 1627 ma_set_meta_gap(pnode, pmt, offset); 1628 } else if (new < meta_gap) { 1629 new = ma_max_gap(pnode, pgaps, pmt, &meta_offset); 1630 ma_set_meta_gap(pnode, pmt, meta_offset); 1631 } 1632 1633 if (ma_is_root(pnode)) 1634 return; 1635 1636 /* Go to the parent node. */ 1637 pnode = mte_parent(penode); 1638 pmt = mas_parent_type(mas, penode); 1639 pgaps = ma_gaps(pnode, pmt); 1640 offset = mte_parent_slot(penode); 1641 penode = mt_mk_node(pnode, pmt); 1642 goto ascend; 1643 } 1644 1645 /* 1646 * mas_update_gap() - Update a nodes gaps and propagate up if necessary. 1647 * @mas: the maple state. 1648 */ 1649 static inline void mas_update_gap(struct ma_state *mas) 1650 { 1651 unsigned char pslot; 1652 unsigned long p_gap; 1653 unsigned long max_gap; 1654 1655 if (!mt_is_alloc(mas->tree)) 1656 return; 1657 1658 if (mte_is_root(mas->node)) 1659 return; 1660 1661 max_gap = mas_max_gap(mas); 1662 1663 pslot = mte_parent_slot(mas->node); 1664 p_gap = ma_gaps(mte_parent(mas->node), 1665 mas_parent_type(mas, mas->node))[pslot]; 1666 1667 if (p_gap != max_gap) 1668 mas_parent_gap(mas, pslot, max_gap); 1669 } 1670 1671 /* 1672 * mas_adopt_children() - Set the parent pointer of all nodes in @parent to 1673 * @parent with the slot encoded. 1674 * @mas: the maple state (for the tree) 1675 * @parent: the maple encoded node containing the children. 1676 */ 1677 static inline void mas_adopt_children(struct ma_state *mas, 1678 struct maple_enode *parent) 1679 { 1680 enum maple_type type = mte_node_type(parent); 1681 struct maple_node *node = mte_to_node(parent); 1682 void __rcu **slots = ma_slots(node, type); 1683 unsigned long *pivots = ma_pivots(node, type); 1684 struct maple_enode *child; 1685 unsigned char offset; 1686 1687 offset = ma_data_end(node, type, pivots, mas->max); 1688 do { 1689 child = mas_slot_locked(mas, slots, offset); 1690 mas_set_parent(mas, child, parent, offset); 1691 } while (offset--); 1692 } 1693 1694 /* 1695 * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old 1696 * node as dead. 1697 * @mas: the maple state with the new node 1698 * @old_enode: The old maple encoded node to replace. 1699 * @new_height: if we are inserting a root node, update the height of the tree 1700 */ 1701 static inline void mas_put_in_tree(struct ma_state *mas, 1702 struct maple_enode *old_enode, char new_height) 1703 __must_hold(mas->tree->ma_lock) 1704 { 1705 unsigned char offset; 1706 void __rcu **slots; 1707 1708 if (mte_is_root(mas->node)) { 1709 mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas)); 1710 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 1711 mt_set_height(mas->tree, new_height); 1712 } else { 1713 1714 offset = mte_parent_slot(mas->node); 1715 slots = ma_slots(mte_parent(mas->node), 1716 mas_parent_type(mas, mas->node)); 1717 rcu_assign_pointer(slots[offset], mas->node); 1718 } 1719 1720 mte_set_node_dead(old_enode); 1721 } 1722 1723 /* 1724 * mas_replace_node() - Replace a node by putting it in the tree, marking it 1725 * dead, and freeing it. 1726 * the parent encoding to locate the maple node in the tree. 1727 * @mas: the ma_state with @mas->node pointing to the new node. 1728 * @old_enode: The old maple encoded node. 1729 * @new_height: The new height of the tree as a result of the operation 1730 */ 1731 static inline void mas_replace_node(struct ma_state *mas, 1732 struct maple_enode *old_enode, unsigned char new_height) 1733 __must_hold(mas->tree->ma_lock) 1734 { 1735 mas_put_in_tree(mas, old_enode, new_height); 1736 mas_free(mas, old_enode); 1737 } 1738 1739 /* 1740 * mas_find_child() - Find a child who has the parent @mas->node. 1741 * @mas: the maple state with the parent. 1742 * @child: the maple state to store the child. 1743 */ 1744 static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child) 1745 __must_hold(mas->tree->ma_lock) 1746 { 1747 enum maple_type mt; 1748 unsigned char offset; 1749 unsigned char end; 1750 unsigned long *pivots; 1751 struct maple_enode *entry; 1752 struct maple_node *node; 1753 void __rcu **slots; 1754 1755 mt = mte_node_type(mas->node); 1756 node = mas_mn(mas); 1757 slots = ma_slots(node, mt); 1758 pivots = ma_pivots(node, mt); 1759 end = ma_data_end(node, mt, pivots, mas->max); 1760 for (offset = mas->offset; offset <= end; offset++) { 1761 entry = mas_slot_locked(mas, slots, offset); 1762 if (mte_parent(entry) == node) { 1763 *child = *mas; 1764 mas->offset = offset + 1; 1765 child->offset = offset; 1766 mas_descend(child); 1767 child->offset = 0; 1768 return true; 1769 } 1770 } 1771 return false; 1772 } 1773 1774 /* 1775 * mab_shift_right() - Shift the data in mab right. Note, does not clean out the 1776 * old data or set b_node->b_end. 1777 * @b_node: the maple_big_node 1778 * @shift: the shift count 1779 */ 1780 static inline void mab_shift_right(struct maple_big_node *b_node, 1781 unsigned char shift) 1782 { 1783 unsigned long size = b_node->b_end * sizeof(unsigned long); 1784 1785 memmove(b_node->pivot + shift, b_node->pivot, size); 1786 memmove(b_node->slot + shift, b_node->slot, size); 1787 if (b_node->type == maple_arange_64) 1788 memmove(b_node->gap + shift, b_node->gap, size); 1789 } 1790 1791 /* 1792 * mab_middle_node() - Check if a middle node is needed (unlikely) 1793 * @b_node: the maple_big_node that contains the data. 1794 * @split: the potential split location 1795 * @slot_count: the size that can be stored in a single node being considered. 1796 * 1797 * Return: true if a middle node is required. 1798 */ 1799 static inline bool mab_middle_node(struct maple_big_node *b_node, int split, 1800 unsigned char slot_count) 1801 { 1802 unsigned char size = b_node->b_end; 1803 1804 if (size >= 2 * slot_count) 1805 return true; 1806 1807 if (!b_node->slot[split] && (size >= 2 * slot_count - 1)) 1808 return true; 1809 1810 return false; 1811 } 1812 1813 /* 1814 * mab_no_null_split() - ensure the split doesn't fall on a NULL 1815 * @b_node: the maple_big_node with the data 1816 * @split: the suggested split location 1817 * @slot_count: the number of slots in the node being considered. 1818 * 1819 * Return: the split location. 1820 */ 1821 static inline int mab_no_null_split(struct maple_big_node *b_node, 1822 unsigned char split, unsigned char slot_count) 1823 { 1824 if (!b_node->slot[split]) { 1825 /* 1826 * If the split is less than the max slot && the right side will 1827 * still be sufficient, then increment the split on NULL. 1828 */ 1829 if ((split < slot_count - 1) && 1830 (b_node->b_end - split) > (mt_min_slots[b_node->type])) 1831 split++; 1832 else 1833 split--; 1834 } 1835 return split; 1836 } 1837 1838 /* 1839 * mab_calc_split() - Calculate the split location and if there needs to be two 1840 * splits. 1841 * @mas: The maple state 1842 * @bn: The maple_big_node with the data 1843 * @mid_split: The second split, if required. 0 otherwise. 1844 * 1845 * Return: The first split location. The middle split is set in @mid_split. 1846 */ 1847 static inline int mab_calc_split(struct ma_state *mas, 1848 struct maple_big_node *bn, unsigned char *mid_split) 1849 { 1850 unsigned char b_end = bn->b_end; 1851 int split = b_end / 2; /* Assume equal split. */ 1852 unsigned char slot_count = mt_slots[bn->type]; 1853 1854 /* 1855 * To support gap tracking, all NULL entries are kept together and a node cannot 1856 * end on a NULL entry, with the exception of the left-most leaf. The 1857 * limitation means that the split of a node must be checked for this condition 1858 * and be able to put more data in one direction or the other. 1859 * 1860 * Although extremely rare, it is possible to enter what is known as the 3-way 1861 * split scenario. The 3-way split comes about by means of a store of a range 1862 * that overwrites the end and beginning of two full nodes. The result is a set 1863 * of entries that cannot be stored in 2 nodes. Sometimes, these two nodes can 1864 * also be located in different parent nodes which are also full. This can 1865 * carry upwards all the way to the root in the worst case. 1866 */ 1867 if (unlikely(mab_middle_node(bn, split, slot_count))) { 1868 split = b_end / 3; 1869 *mid_split = split * 2; 1870 } else { 1871 *mid_split = 0; 1872 } 1873 1874 /* Avoid ending a node on a NULL entry */ 1875 split = mab_no_null_split(bn, split, slot_count); 1876 1877 if (unlikely(*mid_split)) 1878 *mid_split = mab_no_null_split(bn, *mid_split, slot_count); 1879 1880 return split; 1881 } 1882 1883 /* 1884 * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node 1885 * and set @b_node->b_end to the next free slot. 1886 * @mas: The maple state 1887 * @mas_start: The starting slot to copy 1888 * @mas_end: The end slot to copy (inclusively) 1889 * @b_node: The maple_big_node to place the data 1890 * @mab_start: The starting location in maple_big_node to store the data. 1891 */ 1892 static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start, 1893 unsigned char mas_end, struct maple_big_node *b_node, 1894 unsigned char mab_start) 1895 { 1896 enum maple_type mt; 1897 struct maple_node *node; 1898 void __rcu **slots; 1899 unsigned long *pivots, *gaps; 1900 int i = mas_start, j = mab_start; 1901 unsigned char piv_end; 1902 1903 node = mas_mn(mas); 1904 mt = mte_node_type(mas->node); 1905 pivots = ma_pivots(node, mt); 1906 if (!i) { 1907 b_node->pivot[j] = pivots[i++]; 1908 if (unlikely(i > mas_end)) 1909 goto complete; 1910 j++; 1911 } 1912 1913 piv_end = min(mas_end, mt_pivots[mt]); 1914 for (; i < piv_end; i++, j++) { 1915 b_node->pivot[j] = pivots[i]; 1916 if (unlikely(!b_node->pivot[j])) 1917 goto complete; 1918 1919 if (unlikely(mas->max == b_node->pivot[j])) 1920 goto complete; 1921 } 1922 1923 b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt); 1924 1925 complete: 1926 b_node->b_end = ++j; 1927 j -= mab_start; 1928 slots = ma_slots(node, mt); 1929 memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j); 1930 if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) { 1931 gaps = ma_gaps(node, mt); 1932 memcpy(b_node->gap + mab_start, gaps + mas_start, 1933 sizeof(unsigned long) * j); 1934 } 1935 } 1936 1937 /* 1938 * mas_leaf_set_meta() - Set the metadata of a leaf if possible. 1939 * @node: The maple node 1940 * @mt: The maple type 1941 * @end: The node end 1942 */ 1943 static inline void mas_leaf_set_meta(struct maple_node *node, 1944 enum maple_type mt, unsigned char end) 1945 { 1946 if (end < mt_slots[mt] - 1) 1947 ma_set_meta(node, mt, 0, end); 1948 } 1949 1950 /* 1951 * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node. 1952 * @b_node: the maple_big_node that has the data 1953 * @mab_start: the start location in @b_node. 1954 * @mab_end: The end location in @b_node (inclusively) 1955 * @mas: The maple state with the maple encoded node. 1956 */ 1957 static inline void mab_mas_cp(struct maple_big_node *b_node, 1958 unsigned char mab_start, unsigned char mab_end, 1959 struct ma_state *mas, bool new_max) 1960 { 1961 int i, j = 0; 1962 enum maple_type mt = mte_node_type(mas->node); 1963 struct maple_node *node = mte_to_node(mas->node); 1964 void __rcu **slots = ma_slots(node, mt); 1965 unsigned long *pivots = ma_pivots(node, mt); 1966 unsigned long *gaps = NULL; 1967 unsigned char end; 1968 1969 if (mab_end - mab_start > mt_pivots[mt]) 1970 mab_end--; 1971 1972 if (!pivots[mt_pivots[mt] - 1]) 1973 slots[mt_pivots[mt]] = NULL; 1974 1975 i = mab_start; 1976 do { 1977 pivots[j++] = b_node->pivot[i++]; 1978 } while (i <= mab_end && likely(b_node->pivot[i])); 1979 1980 memcpy(slots, b_node->slot + mab_start, 1981 sizeof(void *) * (i - mab_start)); 1982 1983 if (new_max) 1984 mas->max = b_node->pivot[i - 1]; 1985 1986 end = j - 1; 1987 if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) { 1988 unsigned long max_gap = 0; 1989 unsigned char offset = 0; 1990 1991 gaps = ma_gaps(node, mt); 1992 do { 1993 gaps[--j] = b_node->gap[--i]; 1994 if (gaps[j] > max_gap) { 1995 offset = j; 1996 max_gap = gaps[j]; 1997 } 1998 } while (j); 1999 2000 ma_set_meta(node, mt, offset, end); 2001 } else { 2002 mas_leaf_set_meta(node, mt, end); 2003 } 2004 } 2005 2006 /* 2007 * mas_store_b_node() - Store an @entry into the b_node while also copying the 2008 * data from a maple encoded node. 2009 * @wr_mas: the maple write state 2010 * @b_node: the maple_big_node to fill with data 2011 * @offset_end: the offset to end copying 2012 * 2013 * Return: The actual end of the data stored in @b_node 2014 */ 2015 static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas, 2016 struct maple_big_node *b_node, unsigned char offset_end) 2017 { 2018 unsigned char slot; 2019 unsigned char b_end; 2020 /* Possible underflow of piv will wrap back to 0 before use. */ 2021 unsigned long piv; 2022 struct ma_state *mas = wr_mas->mas; 2023 2024 b_node->type = wr_mas->type; 2025 b_end = 0; 2026 slot = mas->offset; 2027 if (slot) { 2028 /* Copy start data up to insert. */ 2029 mas_mab_cp(mas, 0, slot - 1, b_node, 0); 2030 b_end = b_node->b_end; 2031 piv = b_node->pivot[b_end - 1]; 2032 } else 2033 piv = mas->min - 1; 2034 2035 if (piv + 1 < mas->index) { 2036 /* Handle range starting after old range */ 2037 b_node->slot[b_end] = wr_mas->content; 2038 if (!wr_mas->content) 2039 b_node->gap[b_end] = mas->index - 1 - piv; 2040 b_node->pivot[b_end++] = mas->index - 1; 2041 } 2042 2043 /* Store the new entry. */ 2044 mas->offset = b_end; 2045 b_node->slot[b_end] = wr_mas->entry; 2046 b_node->pivot[b_end] = mas->last; 2047 2048 /* Appended. */ 2049 if (mas->last >= mas->max) 2050 goto b_end; 2051 2052 /* Handle new range ending before old range ends */ 2053 piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type); 2054 if (piv > mas->last) { 2055 if (offset_end != slot) 2056 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 2057 offset_end); 2058 2059 b_node->slot[++b_end] = wr_mas->content; 2060 if (!wr_mas->content) 2061 b_node->gap[b_end] = piv - mas->last + 1; 2062 b_node->pivot[b_end] = piv; 2063 } 2064 2065 slot = offset_end + 1; 2066 if (slot > mas->end) 2067 goto b_end; 2068 2069 /* Copy end data to the end of the node. */ 2070 mas_mab_cp(mas, slot, mas->end + 1, b_node, ++b_end); 2071 b_node->b_end--; 2072 return; 2073 2074 b_end: 2075 b_node->b_end = b_end; 2076 } 2077 2078 /* 2079 * mas_prev_sibling() - Find the previous node with the same parent. 2080 * @mas: the maple state 2081 * 2082 * Return: True if there is a previous sibling, false otherwise. 2083 */ 2084 static inline bool mas_prev_sibling(struct ma_state *mas) 2085 { 2086 unsigned int p_slot = mte_parent_slot(mas->node); 2087 2088 /* For root node, p_slot is set to 0 by mte_parent_slot(). */ 2089 if (!p_slot) 2090 return false; 2091 2092 mas_ascend(mas); 2093 mas->offset = p_slot - 1; 2094 mas_descend(mas); 2095 return true; 2096 } 2097 2098 /* 2099 * mas_next_sibling() - Find the next node with the same parent. 2100 * @mas: the maple state 2101 * 2102 * Return: true if there is a next sibling, false otherwise. 2103 */ 2104 static inline bool mas_next_sibling(struct ma_state *mas) 2105 { 2106 MA_STATE(parent, mas->tree, mas->index, mas->last); 2107 2108 if (mte_is_root(mas->node)) 2109 return false; 2110 2111 parent = *mas; 2112 mas_ascend(&parent); 2113 parent.offset = mte_parent_slot(mas->node) + 1; 2114 if (parent.offset > mas_data_end(&parent)) 2115 return false; 2116 2117 *mas = parent; 2118 mas_descend(mas); 2119 return true; 2120 } 2121 2122 /* 2123 * mas_node_or_none() - Set the enode and state. 2124 * @mas: the maple state 2125 * @enode: The encoded maple node. 2126 * 2127 * Set the node to the enode and the status. 2128 */ 2129 static inline void mas_node_or_none(struct ma_state *mas, 2130 struct maple_enode *enode) 2131 { 2132 if (enode) { 2133 mas->node = enode; 2134 mas->status = ma_active; 2135 } else { 2136 mas->node = NULL; 2137 mas->status = ma_none; 2138 } 2139 } 2140 2141 /* 2142 * mas_wr_node_walk() - Find the correct offset for the index in the @mas. 2143 * If @mas->index cannot be found within the containing 2144 * node, we traverse to the last entry in the node. 2145 * @wr_mas: The maple write state 2146 * 2147 * Uses mas_slot_locked() and does not need to worry about dead nodes. 2148 */ 2149 static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) 2150 { 2151 struct ma_state *mas = wr_mas->mas; 2152 unsigned char count, offset; 2153 2154 if (unlikely(ma_is_dense(wr_mas->type))) { 2155 wr_mas->r_max = wr_mas->r_min = mas->index; 2156 mas->offset = mas->index = mas->min; 2157 return; 2158 } 2159 2160 wr_mas->node = mas_mn(wr_mas->mas); 2161 wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type); 2162 count = mas->end = ma_data_end(wr_mas->node, wr_mas->type, 2163 wr_mas->pivots, mas->max); 2164 offset = mas->offset; 2165 2166 while (offset < count && mas->index > wr_mas->pivots[offset]) 2167 offset++; 2168 2169 wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max; 2170 wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset); 2171 wr_mas->offset_end = mas->offset = offset; 2172 } 2173 2174 /* 2175 * mast_rebalance_next() - Rebalance against the next node 2176 * @mast: The maple subtree state 2177 */ 2178 static inline void mast_rebalance_next(struct maple_subtree_state *mast) 2179 { 2180 unsigned char b_end = mast->bn->b_end; 2181 2182 mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node), 2183 mast->bn, b_end); 2184 mast->orig_r->last = mast->orig_r->max; 2185 } 2186 2187 /* 2188 * mast_rebalance_prev() - Rebalance against the previous node 2189 * @mast: The maple subtree state 2190 */ 2191 static inline void mast_rebalance_prev(struct maple_subtree_state *mast) 2192 { 2193 unsigned char end = mas_data_end(mast->orig_l) + 1; 2194 unsigned char b_end = mast->bn->b_end; 2195 2196 mab_shift_right(mast->bn, end); 2197 mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0); 2198 mast->l->min = mast->orig_l->min; 2199 mast->orig_l->index = mast->orig_l->min; 2200 mast->bn->b_end = end + b_end; 2201 mast->l->offset += end; 2202 } 2203 2204 /* 2205 * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring 2206 * the node to the right. Checking the nodes to the right then the left at each 2207 * level upwards until root is reached. 2208 * Data is copied into the @mast->bn. 2209 * @mast: The maple_subtree_state. 2210 */ 2211 static inline 2212 bool mast_spanning_rebalance(struct maple_subtree_state *mast) 2213 { 2214 struct ma_state r_tmp = *mast->orig_r; 2215 struct ma_state l_tmp = *mast->orig_l; 2216 unsigned char depth = 0; 2217 2218 do { 2219 mas_ascend(mast->orig_r); 2220 mas_ascend(mast->orig_l); 2221 depth++; 2222 if (mast->orig_r->offset < mas_data_end(mast->orig_r)) { 2223 mast->orig_r->offset++; 2224 do { 2225 mas_descend(mast->orig_r); 2226 mast->orig_r->offset = 0; 2227 } while (--depth); 2228 2229 mast_rebalance_next(mast); 2230 *mast->orig_l = l_tmp; 2231 return true; 2232 } else if (mast->orig_l->offset != 0) { 2233 mast->orig_l->offset--; 2234 do { 2235 mas_descend(mast->orig_l); 2236 mast->orig_l->offset = 2237 mas_data_end(mast->orig_l); 2238 } while (--depth); 2239 2240 mast_rebalance_prev(mast); 2241 *mast->orig_r = r_tmp; 2242 return true; 2243 } 2244 } while (!mte_is_root(mast->orig_r->node)); 2245 2246 *mast->orig_r = r_tmp; 2247 *mast->orig_l = l_tmp; 2248 return false; 2249 } 2250 2251 /* 2252 * mast_ascend() - Ascend the original left and right maple states. 2253 * @mast: the maple subtree state. 2254 * 2255 * Ascend the original left and right sides. Set the offsets to point to the 2256 * data already in the new tree (@mast->l and @mast->r). 2257 */ 2258 static inline void mast_ascend(struct maple_subtree_state *mast) 2259 { 2260 MA_WR_STATE(wr_mas, mast->orig_r, NULL); 2261 mas_ascend(mast->orig_l); 2262 mas_ascend(mast->orig_r); 2263 2264 mast->orig_r->offset = 0; 2265 mast->orig_r->index = mast->r->max; 2266 /* last should be larger than or equal to index */ 2267 if (mast->orig_r->last < mast->orig_r->index) 2268 mast->orig_r->last = mast->orig_r->index; 2269 2270 wr_mas.type = mte_node_type(mast->orig_r->node); 2271 mas_wr_node_walk(&wr_mas); 2272 /* Set up the left side of things */ 2273 mast->orig_l->offset = 0; 2274 mast->orig_l->index = mast->l->min; 2275 wr_mas.mas = mast->orig_l; 2276 wr_mas.type = mte_node_type(mast->orig_l->node); 2277 mas_wr_node_walk(&wr_mas); 2278 2279 mast->bn->type = wr_mas.type; 2280 } 2281 2282 /* 2283 * mas_new_ma_node() - Create and return a new maple node. Helper function. 2284 * @mas: the maple state with the allocations. 2285 * @b_node: the maple_big_node with the type encoding. 2286 * 2287 * Use the node type from the maple_big_node to allocate a new node from the 2288 * ma_state. This function exists mainly for code readability. 2289 * 2290 * Return: A new maple encoded node 2291 */ 2292 static inline struct maple_enode 2293 *mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node) 2294 { 2295 return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type); 2296 } 2297 2298 /* 2299 * mas_mab_to_node() - Set up right and middle nodes 2300 * 2301 * @mas: the maple state that contains the allocations. 2302 * @b_node: the node which contains the data. 2303 * @left: The pointer which will have the left node 2304 * @right: The pointer which may have the right node 2305 * @middle: the pointer which may have the middle node (rare) 2306 * @mid_split: the split location for the middle node 2307 * 2308 * Return: the split of left. 2309 */ 2310 static inline unsigned char mas_mab_to_node(struct ma_state *mas, 2311 struct maple_big_node *b_node, struct maple_enode **left, 2312 struct maple_enode **right, struct maple_enode **middle, 2313 unsigned char *mid_split) 2314 { 2315 unsigned char split = 0; 2316 unsigned char slot_count = mt_slots[b_node->type]; 2317 2318 *left = mas_new_ma_node(mas, b_node); 2319 *right = NULL; 2320 *middle = NULL; 2321 *mid_split = 0; 2322 2323 if (b_node->b_end < slot_count) { 2324 split = b_node->b_end; 2325 } else { 2326 split = mab_calc_split(mas, b_node, mid_split); 2327 *right = mas_new_ma_node(mas, b_node); 2328 } 2329 2330 if (*mid_split) 2331 *middle = mas_new_ma_node(mas, b_node); 2332 2333 return split; 2334 2335 } 2336 2337 /* 2338 * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end 2339 * pointer. 2340 * @b_node: the big node to add the entry 2341 * @mas: the maple state to get the pivot (mas->max) 2342 * @entry: the entry to add, if NULL nothing happens. 2343 */ 2344 static inline void mab_set_b_end(struct maple_big_node *b_node, 2345 struct ma_state *mas, 2346 void *entry) 2347 { 2348 if (!entry) 2349 return; 2350 2351 b_node->slot[b_node->b_end] = entry; 2352 if (mt_is_alloc(mas->tree)) 2353 b_node->gap[b_node->b_end] = mas_max_gap(mas); 2354 b_node->pivot[b_node->b_end++] = mas->max; 2355 } 2356 2357 /* 2358 * mas_set_split_parent() - combine_then_separate helper function. Sets the parent 2359 * of @mas->node to either @left or @right, depending on @slot and @split 2360 * 2361 * @mas: the maple state with the node that needs a parent 2362 * @left: possible parent 1 2363 * @right: possible parent 2 2364 * @slot: the slot the mas->node was placed 2365 * @split: the split location between @left and @right 2366 */ 2367 static inline void mas_set_split_parent(struct ma_state *mas, 2368 struct maple_enode *left, 2369 struct maple_enode *right, 2370 unsigned char *slot, unsigned char split) 2371 { 2372 if (mas_is_none(mas)) 2373 return; 2374 2375 if ((*slot) <= split) 2376 mas_set_parent(mas, mas->node, left, *slot); 2377 else if (right) 2378 mas_set_parent(mas, mas->node, right, (*slot) - split - 1); 2379 2380 (*slot)++; 2381 } 2382 2383 /* 2384 * mte_mid_split_check() - Check if the next node passes the mid-split 2385 * @l: Pointer to left encoded maple node. 2386 * @m: Pointer to middle encoded maple node. 2387 * @r: Pointer to right encoded maple node. 2388 * @slot: The offset 2389 * @split: The split location. 2390 * @mid_split: The middle split. 2391 */ 2392 static inline void mte_mid_split_check(struct maple_enode **l, 2393 struct maple_enode **r, 2394 struct maple_enode *right, 2395 unsigned char slot, 2396 unsigned char *split, 2397 unsigned char mid_split) 2398 { 2399 if (*r == right) 2400 return; 2401 2402 if (slot < mid_split) 2403 return; 2404 2405 *l = *r; 2406 *r = right; 2407 *split = mid_split; 2408 } 2409 2410 /* 2411 * mast_set_split_parents() - Helper function to set three nodes parents. Slot 2412 * is taken from @mast->l. 2413 * @mast: the maple subtree state 2414 * @left: the left node 2415 * @right: the right node 2416 * @split: the split location. 2417 */ 2418 static inline void mast_set_split_parents(struct maple_subtree_state *mast, 2419 struct maple_enode *left, 2420 struct maple_enode *middle, 2421 struct maple_enode *right, 2422 unsigned char split, 2423 unsigned char mid_split) 2424 { 2425 unsigned char slot; 2426 struct maple_enode *l = left; 2427 struct maple_enode *r = right; 2428 2429 if (mas_is_none(mast->l)) 2430 return; 2431 2432 if (middle) 2433 r = middle; 2434 2435 slot = mast->l->offset; 2436 2437 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2438 mas_set_split_parent(mast->l, l, r, &slot, split); 2439 2440 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2441 mas_set_split_parent(mast->m, l, r, &slot, split); 2442 2443 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2444 mas_set_split_parent(mast->r, l, r, &slot, split); 2445 } 2446 2447 /* 2448 * mas_topiary_node() - Dispose of a single node 2449 * @mas: The maple state for pushing nodes 2450 * @in_rcu: If the tree is in rcu mode 2451 * 2452 * The node will either be RCU freed or pushed back on the maple state. 2453 */ 2454 static inline void mas_topiary_node(struct ma_state *mas, 2455 struct ma_state *tmp_mas, bool in_rcu) 2456 { 2457 struct maple_node *tmp; 2458 struct maple_enode *enode; 2459 2460 if (mas_is_none(tmp_mas)) 2461 return; 2462 2463 enode = tmp_mas->node; 2464 tmp = mte_to_node(enode); 2465 mte_set_node_dead(enode); 2466 if (in_rcu) 2467 ma_free_rcu(tmp); 2468 else 2469 mas_push_node(mas, tmp); 2470 } 2471 2472 /* 2473 * mas_topiary_replace() - Replace the data with new data, then repair the 2474 * parent links within the new tree. Iterate over the dead sub-tree and collect 2475 * the dead subtrees and topiary the nodes that are no longer of use. 2476 * 2477 * The new tree will have up to three children with the correct parent. Keep 2478 * track of the new entries as they need to be followed to find the next level 2479 * of new entries. 2480 * 2481 * The old tree will have up to three children with the old parent. Keep track 2482 * of the old entries as they may have more nodes below replaced. Nodes within 2483 * [index, last] are dead subtrees, others need to be freed and followed. 2484 * 2485 * @mas: The maple state pointing at the new data 2486 * @old_enode: The maple encoded node being replaced 2487 * @new_height: The new height of the tree as a result of the operation 2488 * 2489 */ 2490 static inline void mas_topiary_replace(struct ma_state *mas, 2491 struct maple_enode *old_enode, unsigned char new_height) 2492 { 2493 struct ma_state tmp[3], tmp_next[3]; 2494 MA_TOPIARY(subtrees, mas->tree); 2495 bool in_rcu; 2496 int i, n; 2497 2498 /* Place data in tree & then mark node as old */ 2499 mas_put_in_tree(mas, old_enode, new_height); 2500 2501 /* Update the parent pointers in the tree */ 2502 tmp[0] = *mas; 2503 tmp[0].offset = 0; 2504 tmp[1].status = ma_none; 2505 tmp[2].status = ma_none; 2506 while (!mte_is_leaf(tmp[0].node)) { 2507 n = 0; 2508 for (i = 0; i < 3; i++) { 2509 if (mas_is_none(&tmp[i])) 2510 continue; 2511 2512 while (n < 3) { 2513 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2514 break; 2515 n++; 2516 } 2517 2518 mas_adopt_children(&tmp[i], tmp[i].node); 2519 } 2520 2521 if (MAS_WARN_ON(mas, n == 0)) 2522 break; 2523 2524 while (n < 3) 2525 tmp_next[n++].status = ma_none; 2526 2527 for (i = 0; i < 3; i++) 2528 tmp[i] = tmp_next[i]; 2529 } 2530 2531 /* Collect the old nodes that need to be discarded */ 2532 if (mte_is_leaf(old_enode)) 2533 return mas_free(mas, old_enode); 2534 2535 tmp[0] = *mas; 2536 tmp[0].offset = 0; 2537 tmp[0].node = old_enode; 2538 tmp[1].status = ma_none; 2539 tmp[2].status = ma_none; 2540 in_rcu = mt_in_rcu(mas->tree); 2541 do { 2542 n = 0; 2543 for (i = 0; i < 3; i++) { 2544 if (mas_is_none(&tmp[i])) 2545 continue; 2546 2547 while (n < 3) { 2548 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2549 break; 2550 2551 if ((tmp_next[n].min >= tmp_next->index) && 2552 (tmp_next[n].max <= tmp_next->last)) { 2553 mat_add(&subtrees, tmp_next[n].node); 2554 tmp_next[n].status = ma_none; 2555 } else { 2556 n++; 2557 } 2558 } 2559 } 2560 2561 if (MAS_WARN_ON(mas, n == 0)) 2562 break; 2563 2564 while (n < 3) 2565 tmp_next[n++].status = ma_none; 2566 2567 for (i = 0; i < 3; i++) { 2568 mas_topiary_node(mas, &tmp[i], in_rcu); 2569 tmp[i] = tmp_next[i]; 2570 } 2571 } while (!mte_is_leaf(tmp[0].node)); 2572 2573 for (i = 0; i < 3; i++) 2574 mas_topiary_node(mas, &tmp[i], in_rcu); 2575 2576 mas_mat_destroy(mas, &subtrees); 2577 } 2578 2579 /* 2580 * mas_wmb_replace() - Write memory barrier and replace 2581 * @mas: The maple state 2582 * @old_enode: The old maple encoded node that is being replaced. 2583 * @new_height: The new height of the tree as a result of the operation 2584 * 2585 * Updates gap as necessary. 2586 */ 2587 static inline void mas_wmb_replace(struct ma_state *mas, 2588 struct maple_enode *old_enode, unsigned char new_height) 2589 { 2590 /* Insert the new data in the tree */ 2591 mas_topiary_replace(mas, old_enode, new_height); 2592 2593 if (mte_is_leaf(mas->node)) 2594 return; 2595 2596 mas_update_gap(mas); 2597 } 2598 2599 /* 2600 * mast_cp_to_nodes() - Copy data out to nodes. 2601 * @mast: The maple subtree state 2602 * @left: The left encoded maple node 2603 * @middle: The middle encoded maple node 2604 * @right: The right encoded maple node 2605 * @split: The location to split between left and (middle ? middle : right) 2606 * @mid_split: The location to split between middle and right. 2607 */ 2608 static inline void mast_cp_to_nodes(struct maple_subtree_state *mast, 2609 struct maple_enode *left, struct maple_enode *middle, 2610 struct maple_enode *right, unsigned char split, unsigned char mid_split) 2611 { 2612 bool new_lmax = true; 2613 2614 mas_node_or_none(mast->l, left); 2615 mas_node_or_none(mast->m, middle); 2616 mas_node_or_none(mast->r, right); 2617 2618 mast->l->min = mast->orig_l->min; 2619 if (split == mast->bn->b_end) { 2620 mast->l->max = mast->orig_r->max; 2621 new_lmax = false; 2622 } 2623 2624 mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax); 2625 2626 if (middle) { 2627 mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true); 2628 mast->m->min = mast->bn->pivot[split] + 1; 2629 split = mid_split; 2630 } 2631 2632 mast->r->max = mast->orig_r->max; 2633 if (right) { 2634 mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false); 2635 mast->r->min = mast->bn->pivot[split] + 1; 2636 } 2637 } 2638 2639 /* 2640 * mast_combine_cp_left - Copy in the original left side of the tree into the 2641 * combined data set in the maple subtree state big node. 2642 * @mast: The maple subtree state 2643 */ 2644 static inline void mast_combine_cp_left(struct maple_subtree_state *mast) 2645 { 2646 unsigned char l_slot = mast->orig_l->offset; 2647 2648 if (!l_slot) 2649 return; 2650 2651 mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0); 2652 } 2653 2654 /* 2655 * mast_combine_cp_right: Copy in the original right side of the tree into the 2656 * combined data set in the maple subtree state big node. 2657 * @mast: The maple subtree state 2658 */ 2659 static inline void mast_combine_cp_right(struct maple_subtree_state *mast) 2660 { 2661 if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max) 2662 return; 2663 2664 mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1, 2665 mt_slot_count(mast->orig_r->node), mast->bn, 2666 mast->bn->b_end); 2667 mast->orig_r->last = mast->orig_r->max; 2668 } 2669 2670 /* 2671 * mast_sufficient: Check if the maple subtree state has enough data in the big 2672 * node to create at least one sufficient node 2673 * @mast: the maple subtree state 2674 */ 2675 static inline bool mast_sufficient(struct maple_subtree_state *mast) 2676 { 2677 if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node)) 2678 return true; 2679 2680 return false; 2681 } 2682 2683 /* 2684 * mast_overflow: Check if there is too much data in the subtree state for a 2685 * single node. 2686 * @mast: The maple subtree state 2687 */ 2688 static inline bool mast_overflow(struct maple_subtree_state *mast) 2689 { 2690 if (mast->bn->b_end > mt_slot_count(mast->orig_l->node)) 2691 return true; 2692 2693 return false; 2694 } 2695 2696 static inline void *mtree_range_walk(struct ma_state *mas) 2697 { 2698 unsigned long *pivots; 2699 unsigned char offset; 2700 struct maple_node *node; 2701 struct maple_enode *next, *last; 2702 enum maple_type type; 2703 void __rcu **slots; 2704 unsigned char end; 2705 unsigned long max, min; 2706 unsigned long prev_max, prev_min; 2707 2708 next = mas->node; 2709 min = mas->min; 2710 max = mas->max; 2711 do { 2712 last = next; 2713 node = mte_to_node(next); 2714 type = mte_node_type(next); 2715 pivots = ma_pivots(node, type); 2716 end = ma_data_end(node, type, pivots, max); 2717 prev_min = min; 2718 prev_max = max; 2719 if (pivots[0] >= mas->index) { 2720 offset = 0; 2721 max = pivots[0]; 2722 goto next; 2723 } 2724 2725 offset = 1; 2726 while (offset < end) { 2727 if (pivots[offset] >= mas->index) { 2728 max = pivots[offset]; 2729 break; 2730 } 2731 offset++; 2732 } 2733 2734 min = pivots[offset - 1] + 1; 2735 next: 2736 slots = ma_slots(node, type); 2737 next = mt_slot(mas->tree, slots, offset); 2738 if (unlikely(ma_dead_node(node))) 2739 goto dead_node; 2740 } while (!ma_is_leaf(type)); 2741 2742 mas->end = end; 2743 mas->offset = offset; 2744 mas->index = min; 2745 mas->last = max; 2746 mas->min = prev_min; 2747 mas->max = prev_max; 2748 mas->node = last; 2749 return (void *)next; 2750 2751 dead_node: 2752 mas_reset(mas); 2753 return NULL; 2754 } 2755 2756 /* 2757 * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers. 2758 * @mas: The starting maple state 2759 * @mast: The maple_subtree_state, keeps track of 4 maple states. 2760 * @count: The estimated count of iterations needed. 2761 * 2762 * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root 2763 * is hit. First @b_node is split into two entries which are inserted into the 2764 * next iteration of the loop. @b_node is returned populated with the final 2765 * iteration. @mas is used to obtain allocations. orig_l_mas keeps track of the 2766 * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last 2767 * to account of what has been copied into the new sub-tree. The update of 2768 * orig_l_mas->last is used in mas_consume to find the slots that will need to 2769 * be either freed or destroyed. orig_l_mas->depth keeps track of the height of 2770 * the new sub-tree in case the sub-tree becomes the full tree. 2771 */ 2772 static void mas_spanning_rebalance(struct ma_state *mas, 2773 struct maple_subtree_state *mast, unsigned char count) 2774 { 2775 unsigned char split, mid_split; 2776 unsigned char slot = 0; 2777 unsigned char new_height = 0; /* used if node is a new root */ 2778 struct maple_enode *left = NULL, *middle = NULL, *right = NULL; 2779 struct maple_enode *old_enode; 2780 2781 MA_STATE(l_mas, mas->tree, mas->index, mas->index); 2782 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2783 MA_STATE(m_mas, mas->tree, mas->index, mas->index); 2784 2785 /* 2786 * The tree needs to be rebalanced and leaves need to be kept at the same level. 2787 * Rebalancing is done by use of the ``struct maple_topiary``. 2788 */ 2789 mast->l = &l_mas; 2790 mast->m = &m_mas; 2791 mast->r = &r_mas; 2792 l_mas.status = r_mas.status = m_mas.status = ma_none; 2793 2794 /* Check if this is not root and has sufficient data. */ 2795 if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) && 2796 unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type])) 2797 mast_spanning_rebalance(mast); 2798 2799 /* 2800 * Each level of the tree is examined and balanced, pushing data to the left or 2801 * right, or rebalancing against left or right nodes is employed to avoid 2802 * rippling up the tree to limit the amount of churn. Once a new sub-section of 2803 * the tree is created, there may be a mix of new and old nodes. The old nodes 2804 * will have the incorrect parent pointers and currently be in two trees: the 2805 * original tree and the partially new tree. To remedy the parent pointers in 2806 * the old tree, the new data is swapped into the active tree and a walk down 2807 * the tree is performed and the parent pointers are updated. 2808 * See mas_topiary_replace() for more information. 2809 */ 2810 while (count--) { 2811 mast->bn->b_end--; 2812 mast->bn->type = mte_node_type(mast->orig_l->node); 2813 split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle, 2814 &mid_split); 2815 mast_set_split_parents(mast, left, middle, right, split, 2816 mid_split); 2817 mast_cp_to_nodes(mast, left, middle, right, split, mid_split); 2818 new_height++; 2819 2820 /* 2821 * Copy data from next level in the tree to mast->bn from next 2822 * iteration 2823 */ 2824 memset(mast->bn, 0, sizeof(struct maple_big_node)); 2825 mast->bn->type = mte_node_type(left); 2826 2827 /* Root already stored in l->node. */ 2828 if (mas_is_root_limits(mast->l)) 2829 goto new_root; 2830 2831 mast_ascend(mast); 2832 mast_combine_cp_left(mast); 2833 l_mas.offset = mast->bn->b_end; 2834 mab_set_b_end(mast->bn, &l_mas, left); 2835 mab_set_b_end(mast->bn, &m_mas, middle); 2836 mab_set_b_end(mast->bn, &r_mas, right); 2837 2838 /* Copy anything necessary out of the right node. */ 2839 mast_combine_cp_right(mast); 2840 mast->orig_l->last = mast->orig_l->max; 2841 2842 if (mast_sufficient(mast)) { 2843 if (mast_overflow(mast)) 2844 continue; 2845 2846 if (mast->orig_l->node == mast->orig_r->node) { 2847 /* 2848 * The data in b_node should be stored in one 2849 * node and in the tree 2850 */ 2851 slot = mast->l->offset; 2852 break; 2853 } 2854 2855 continue; 2856 } 2857 2858 /* May be a new root stored in mast->bn */ 2859 if (mas_is_root_limits(mast->orig_l)) 2860 break; 2861 2862 mast_spanning_rebalance(mast); 2863 2864 /* rebalancing from other nodes may require another loop. */ 2865 if (!count) 2866 count++; 2867 } 2868 2869 l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), 2870 mte_node_type(mast->orig_l->node)); 2871 2872 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true); 2873 new_height++; 2874 mas_set_parent(mas, left, l_mas.node, slot); 2875 if (middle) 2876 mas_set_parent(mas, middle, l_mas.node, ++slot); 2877 2878 if (right) 2879 mas_set_parent(mas, right, l_mas.node, ++slot); 2880 2881 if (mas_is_root_limits(mast->l)) { 2882 new_root: 2883 mas_mn(mast->l)->parent = ma_parent_ptr(mas_tree_parent(mas)); 2884 while (!mte_is_root(mast->orig_l->node)) 2885 mast_ascend(mast); 2886 } else { 2887 mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent; 2888 } 2889 2890 old_enode = mast->orig_l->node; 2891 mas->depth = l_mas.depth; 2892 mas->node = l_mas.node; 2893 mas->min = l_mas.min; 2894 mas->max = l_mas.max; 2895 mas->offset = l_mas.offset; 2896 mas_wmb_replace(mas, old_enode, new_height); 2897 mtree_range_walk(mas); 2898 return; 2899 } 2900 2901 /* 2902 * mas_rebalance() - Rebalance a given node. 2903 * @mas: The maple state 2904 * @b_node: The big maple node. 2905 * 2906 * Rebalance two nodes into a single node or two new nodes that are sufficient. 2907 * Continue upwards until tree is sufficient. 2908 */ 2909 static inline void mas_rebalance(struct ma_state *mas, 2910 struct maple_big_node *b_node) 2911 { 2912 char empty_count = mas_mt_height(mas); 2913 struct maple_subtree_state mast; 2914 unsigned char shift, b_end = ++b_node->b_end; 2915 2916 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 2917 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2918 2919 trace_ma_op(__func__, mas); 2920 2921 /* 2922 * Rebalancing occurs if a node is insufficient. Data is rebalanced 2923 * against the node to the right if it exists, otherwise the node to the 2924 * left of this node is rebalanced against this node. If rebalancing 2925 * causes just one node to be produced instead of two, then the parent 2926 * is also examined and rebalanced if it is insufficient. Every level 2927 * tries to combine the data in the same way. If one node contains the 2928 * entire range of the tree, then that node is used as a new root node. 2929 */ 2930 2931 mast.orig_l = &l_mas; 2932 mast.orig_r = &r_mas; 2933 mast.bn = b_node; 2934 mast.bn->type = mte_node_type(mas->node); 2935 2936 l_mas = r_mas = *mas; 2937 2938 if (mas_next_sibling(&r_mas)) { 2939 mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end); 2940 r_mas.last = r_mas.index = r_mas.max; 2941 } else { 2942 mas_prev_sibling(&l_mas); 2943 shift = mas_data_end(&l_mas) + 1; 2944 mab_shift_right(b_node, shift); 2945 mas->offset += shift; 2946 mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0); 2947 b_node->b_end = shift + b_end; 2948 l_mas.index = l_mas.last = l_mas.min; 2949 } 2950 2951 return mas_spanning_rebalance(mas, &mast, empty_count); 2952 } 2953 2954 /* 2955 * mas_split_final_node() - Split the final node in a subtree operation. 2956 * @mast: the maple subtree state 2957 * @mas: The maple state 2958 */ 2959 static inline void mas_split_final_node(struct maple_subtree_state *mast, 2960 struct ma_state *mas) 2961 { 2962 struct maple_enode *ancestor; 2963 2964 if (mte_is_root(mas->node)) { 2965 if (mt_is_alloc(mas->tree)) 2966 mast->bn->type = maple_arange_64; 2967 else 2968 mast->bn->type = maple_range_64; 2969 } 2970 /* 2971 * Only a single node is used here, could be root. 2972 * The Big_node data should just fit in a single node. 2973 */ 2974 ancestor = mas_new_ma_node(mas, mast->bn); 2975 mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset); 2976 mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset); 2977 mte_to_node(ancestor)->parent = mas_mn(mas)->parent; 2978 2979 mast->l->node = ancestor; 2980 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true); 2981 mas->offset = mast->bn->b_end - 1; 2982 } 2983 2984 /* 2985 * mast_fill_bnode() - Copy data into the big node in the subtree state 2986 * @mast: The maple subtree state 2987 * @mas: the maple state 2988 * @skip: The number of entries to skip for new nodes insertion. 2989 */ 2990 static inline void mast_fill_bnode(struct maple_subtree_state *mast, 2991 struct ma_state *mas, 2992 unsigned char skip) 2993 { 2994 bool cp = true; 2995 unsigned char split; 2996 2997 memset(mast->bn, 0, sizeof(struct maple_big_node)); 2998 2999 if (mte_is_root(mas->node)) { 3000 cp = false; 3001 } else { 3002 mas_ascend(mas); 3003 mas->offset = mte_parent_slot(mas->node); 3004 } 3005 3006 if (cp && mast->l->offset) 3007 mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0); 3008 3009 split = mast->bn->b_end; 3010 mab_set_b_end(mast->bn, mast->l, mast->l->node); 3011 mast->r->offset = mast->bn->b_end; 3012 mab_set_b_end(mast->bn, mast->r, mast->r->node); 3013 if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max) 3014 cp = false; 3015 3016 if (cp) 3017 mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1, 3018 mast->bn, mast->bn->b_end); 3019 3020 mast->bn->b_end--; 3021 mast->bn->type = mte_node_type(mas->node); 3022 } 3023 3024 /* 3025 * mast_split_data() - Split the data in the subtree state big node into regular 3026 * nodes. 3027 * @mast: The maple subtree state 3028 * @mas: The maple state 3029 * @split: The location to split the big node 3030 */ 3031 static inline void mast_split_data(struct maple_subtree_state *mast, 3032 struct ma_state *mas, unsigned char split) 3033 { 3034 unsigned char p_slot; 3035 3036 mab_mas_cp(mast->bn, 0, split, mast->l, true); 3037 mte_set_pivot(mast->r->node, 0, mast->r->max); 3038 mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false); 3039 mast->l->offset = mte_parent_slot(mas->node); 3040 mast->l->max = mast->bn->pivot[split]; 3041 mast->r->min = mast->l->max + 1; 3042 if (mte_is_leaf(mas->node)) 3043 return; 3044 3045 p_slot = mast->orig_l->offset; 3046 mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node, 3047 &p_slot, split); 3048 mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node, 3049 &p_slot, split); 3050 } 3051 3052 /* 3053 * mas_push_data() - Instead of splitting a node, it is beneficial to push the 3054 * data to the right or left node if there is room. 3055 * @mas: The maple state 3056 * @mast: The maple subtree state 3057 * @left: Push left or not. 3058 * 3059 * Keeping the height of the tree low means faster lookups. 3060 * 3061 * Return: True if pushed, false otherwise. 3062 */ 3063 static inline bool mas_push_data(struct ma_state *mas, 3064 struct maple_subtree_state *mast, bool left) 3065 { 3066 unsigned char slot_total = mast->bn->b_end; 3067 unsigned char end, space, split; 3068 3069 MA_STATE(tmp_mas, mas->tree, mas->index, mas->last); 3070 tmp_mas = *mas; 3071 tmp_mas.depth = mast->l->depth; 3072 3073 if (left && !mas_prev_sibling(&tmp_mas)) 3074 return false; 3075 else if (!left && !mas_next_sibling(&tmp_mas)) 3076 return false; 3077 3078 end = mas_data_end(&tmp_mas); 3079 slot_total += end; 3080 space = 2 * mt_slot_count(mas->node) - 2; 3081 /* -2 instead of -1 to ensure there isn't a triple split */ 3082 if (ma_is_leaf(mast->bn->type)) 3083 space--; 3084 3085 if (mas->max == ULONG_MAX) 3086 space--; 3087 3088 if (slot_total >= space) 3089 return false; 3090 3091 /* Get the data; Fill mast->bn */ 3092 mast->bn->b_end++; 3093 if (left) { 3094 mab_shift_right(mast->bn, end + 1); 3095 mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0); 3096 mast->bn->b_end = slot_total + 1; 3097 } else { 3098 mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end); 3099 } 3100 3101 /* Configure mast for splitting of mast->bn */ 3102 split = mt_slots[mast->bn->type] - 2; 3103 if (left) { 3104 /* Switch mas to prev node */ 3105 *mas = tmp_mas; 3106 /* Start using mast->l for the left side. */ 3107 tmp_mas.node = mast->l->node; 3108 *mast->l = tmp_mas; 3109 } else { 3110 tmp_mas.node = mast->r->node; 3111 *mast->r = tmp_mas; 3112 split = slot_total - split; 3113 } 3114 split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]); 3115 /* Update parent slot for split calculation. */ 3116 if (left) 3117 mast->orig_l->offset += end + 1; 3118 3119 mast_split_data(mast, mas, split); 3120 mast_fill_bnode(mast, mas, 2); 3121 mas_split_final_node(mast, mas); 3122 return true; 3123 } 3124 3125 /* 3126 * mas_split() - Split data that is too big for one node into two. 3127 * @mas: The maple state 3128 * @b_node: The maple big node 3129 */ 3130 static void mas_split(struct ma_state *mas, struct maple_big_node *b_node) 3131 { 3132 struct maple_subtree_state mast; 3133 int height = 0; 3134 unsigned int orig_height = mas_mt_height(mas); 3135 unsigned char mid_split, split = 0; 3136 struct maple_enode *old; 3137 3138 /* 3139 * Splitting is handled differently from any other B-tree; the Maple 3140 * Tree splits upwards. Splitting up means that the split operation 3141 * occurs when the walk of the tree hits the leaves and not on the way 3142 * down. The reason for splitting up is that it is impossible to know 3143 * how much space will be needed until the leaf is (or leaves are) 3144 * reached. Since overwriting data is allowed and a range could 3145 * overwrite more than one range or result in changing one entry into 3 3146 * entries, it is impossible to know if a split is required until the 3147 * data is examined. 3148 * 3149 * Splitting is a balancing act between keeping allocations to a minimum 3150 * and avoiding a 'jitter' event where a tree is expanded to make room 3151 * for an entry followed by a contraction when the entry is removed. To 3152 * accomplish the balance, there are empty slots remaining in both left 3153 * and right nodes after a split. 3154 */ 3155 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 3156 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 3157 MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last); 3158 MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last); 3159 3160 trace_ma_op(__func__, mas); 3161 3162 mast.l = &l_mas; 3163 mast.r = &r_mas; 3164 mast.orig_l = &prev_l_mas; 3165 mast.orig_r = &prev_r_mas; 3166 mast.bn = b_node; 3167 3168 while (height++ <= orig_height) { 3169 if (mt_slots[b_node->type] > b_node->b_end) { 3170 mas_split_final_node(&mast, mas); 3171 break; 3172 } 3173 3174 l_mas = r_mas = *mas; 3175 l_mas.node = mas_new_ma_node(mas, b_node); 3176 r_mas.node = mas_new_ma_node(mas, b_node); 3177 /* 3178 * Another way that 'jitter' is avoided is to terminate a split up early if the 3179 * left or right node has space to spare. This is referred to as "pushing left" 3180 * or "pushing right" and is similar to the B* tree, except the nodes left or 3181 * right can rarely be reused due to RCU, but the ripple upwards is halted which 3182 * is a significant savings. 3183 */ 3184 /* Try to push left. */ 3185 if (mas_push_data(mas, &mast, true)) { 3186 height++; 3187 break; 3188 } 3189 /* Try to push right. */ 3190 if (mas_push_data(mas, &mast, false)) { 3191 height++; 3192 break; 3193 } 3194 3195 split = mab_calc_split(mas, b_node, &mid_split); 3196 mast_split_data(&mast, mas, split); 3197 /* 3198 * Usually correct, mab_mas_cp in the above call overwrites 3199 * r->max. 3200 */ 3201 mast.r->max = mas->max; 3202 mast_fill_bnode(&mast, mas, 1); 3203 prev_l_mas = *mast.l; 3204 prev_r_mas = *mast.r; 3205 } 3206 3207 /* Set the original node as dead */ 3208 old = mas->node; 3209 mas->node = l_mas.node; 3210 mas_wmb_replace(mas, old, height); 3211 mtree_range_walk(mas); 3212 return; 3213 } 3214 3215 /* 3216 * mas_commit_b_node() - Commit the big node into the tree. 3217 * @wr_mas: The maple write state 3218 * @b_node: The maple big node 3219 */ 3220 static noinline_for_kasan void mas_commit_b_node(struct ma_wr_state *wr_mas, 3221 struct maple_big_node *b_node) 3222 { 3223 enum store_type type = wr_mas->mas->store_type; 3224 3225 WARN_ON_ONCE(type != wr_rebalance && type != wr_split_store); 3226 3227 if (type == wr_rebalance) 3228 return mas_rebalance(wr_mas->mas, b_node); 3229 3230 return mas_split(wr_mas->mas, b_node); 3231 } 3232 3233 /* 3234 * mas_root_expand() - Expand a root to a node 3235 * @mas: The maple state 3236 * @entry: The entry to store into the tree 3237 */ 3238 static inline void mas_root_expand(struct ma_state *mas, void *entry) 3239 { 3240 void *contents = mas_root_locked(mas); 3241 enum maple_type type = maple_leaf_64; 3242 struct maple_node *node; 3243 void __rcu **slots; 3244 unsigned long *pivots; 3245 int slot = 0; 3246 3247 node = mas_pop_node(mas); 3248 pivots = ma_pivots(node, type); 3249 slots = ma_slots(node, type); 3250 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3251 mas->node = mt_mk_node(node, type); 3252 mas->status = ma_active; 3253 3254 if (mas->index) { 3255 if (contents) { 3256 rcu_assign_pointer(slots[slot], contents); 3257 if (likely(mas->index > 1)) 3258 slot++; 3259 } 3260 pivots[slot++] = mas->index - 1; 3261 } 3262 3263 rcu_assign_pointer(slots[slot], entry); 3264 mas->offset = slot; 3265 pivots[slot] = mas->last; 3266 if (mas->last != ULONG_MAX) 3267 pivots[++slot] = ULONG_MAX; 3268 3269 mt_set_height(mas->tree, 1); 3270 ma_set_meta(node, maple_leaf_64, 0, slot); 3271 /* swap the new root into the tree */ 3272 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3273 return; 3274 } 3275 3276 /* 3277 * mas_store_root() - Storing value into root. 3278 * @mas: The maple state 3279 * @entry: The entry to store. 3280 * 3281 * There is no root node now and we are storing a value into the root - this 3282 * function either assigns the pointer or expands into a node. 3283 */ 3284 static inline void mas_store_root(struct ma_state *mas, void *entry) 3285 { 3286 if (!entry) { 3287 if (!mas->index) 3288 rcu_assign_pointer(mas->tree->ma_root, NULL); 3289 } else if (likely((mas->last != 0) || (mas->index != 0))) 3290 mas_root_expand(mas, entry); 3291 else if (((unsigned long) (entry) & 3) == 2) 3292 mas_root_expand(mas, entry); 3293 else { 3294 rcu_assign_pointer(mas->tree->ma_root, entry); 3295 mas->status = ma_start; 3296 } 3297 } 3298 3299 /* 3300 * mas_is_span_wr() - Check if the write needs to be treated as a write that 3301 * spans the node. 3302 * @wr_mas: The maple write state 3303 * 3304 * Spanning writes are writes that start in one node and end in another OR if 3305 * the write of a %NULL will cause the node to end with a %NULL. 3306 * 3307 * Return: True if this is a spanning write, false otherwise. 3308 */ 3309 static bool mas_is_span_wr(struct ma_wr_state *wr_mas) 3310 { 3311 unsigned long max = wr_mas->r_max; 3312 unsigned long last = wr_mas->mas->last; 3313 enum maple_type type = wr_mas->type; 3314 void *entry = wr_mas->entry; 3315 3316 /* Contained in this pivot, fast path */ 3317 if (last < max) 3318 return false; 3319 3320 if (ma_is_leaf(type)) { 3321 max = wr_mas->mas->max; 3322 if (last < max) 3323 return false; 3324 } 3325 3326 if (last == max) { 3327 /* 3328 * The last entry of leaf node cannot be NULL unless it is the 3329 * rightmost node (writing ULONG_MAX), otherwise it spans slots. 3330 */ 3331 if (entry || last == ULONG_MAX) 3332 return false; 3333 } 3334 3335 trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry); 3336 return true; 3337 } 3338 3339 static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas) 3340 { 3341 wr_mas->type = mte_node_type(wr_mas->mas->node); 3342 mas_wr_node_walk(wr_mas); 3343 wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type); 3344 } 3345 3346 static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas) 3347 { 3348 wr_mas->mas->max = wr_mas->r_max; 3349 wr_mas->mas->min = wr_mas->r_min; 3350 wr_mas->mas->node = wr_mas->content; 3351 wr_mas->mas->offset = 0; 3352 wr_mas->mas->depth++; 3353 } 3354 /* 3355 * mas_wr_walk() - Walk the tree for a write. 3356 * @wr_mas: The maple write state 3357 * 3358 * Uses mas_slot_locked() and does not need to worry about dead nodes. 3359 * 3360 * Return: True if it's contained in a node, false on spanning write. 3361 */ 3362 static bool mas_wr_walk(struct ma_wr_state *wr_mas) 3363 { 3364 struct ma_state *mas = wr_mas->mas; 3365 3366 while (true) { 3367 mas_wr_walk_descend(wr_mas); 3368 if (unlikely(mas_is_span_wr(wr_mas))) 3369 return false; 3370 3371 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3372 mas->offset); 3373 if (ma_is_leaf(wr_mas->type)) 3374 return true; 3375 3376 if (mas->end < mt_slots[wr_mas->type] - 1) 3377 wr_mas->vacant_height = mas->depth + 1; 3378 3379 if (ma_is_root(mas_mn(mas))) { 3380 /* root needs more than 2 entries to be sufficient + 1 */ 3381 if (mas->end > 2) 3382 wr_mas->sufficient_height = 1; 3383 } else if (mas->end > mt_min_slots[wr_mas->type] + 1) 3384 wr_mas->sufficient_height = mas->depth + 1; 3385 3386 mas_wr_walk_traverse(wr_mas); 3387 } 3388 3389 return true; 3390 } 3391 3392 static void mas_wr_walk_index(struct ma_wr_state *wr_mas) 3393 { 3394 struct ma_state *mas = wr_mas->mas; 3395 3396 while (true) { 3397 mas_wr_walk_descend(wr_mas); 3398 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3399 mas->offset); 3400 if (ma_is_leaf(wr_mas->type)) 3401 return; 3402 mas_wr_walk_traverse(wr_mas); 3403 } 3404 } 3405 /* 3406 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs. 3407 * @l_wr_mas: The left maple write state 3408 * @r_wr_mas: The right maple write state 3409 */ 3410 static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas, 3411 struct ma_wr_state *r_wr_mas) 3412 { 3413 struct ma_state *r_mas = r_wr_mas->mas; 3414 struct ma_state *l_mas = l_wr_mas->mas; 3415 unsigned char l_slot; 3416 3417 l_slot = l_mas->offset; 3418 if (!l_wr_mas->content) 3419 l_mas->index = l_wr_mas->r_min; 3420 3421 if ((l_mas->index == l_wr_mas->r_min) && 3422 (l_slot && 3423 !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) { 3424 if (l_slot > 1) 3425 l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1; 3426 else 3427 l_mas->index = l_mas->min; 3428 3429 l_mas->offset = l_slot - 1; 3430 } 3431 3432 if (!r_wr_mas->content) { 3433 if (r_mas->last < r_wr_mas->r_max) 3434 r_mas->last = r_wr_mas->r_max; 3435 r_mas->offset++; 3436 } else if ((r_mas->last == r_wr_mas->r_max) && 3437 (r_mas->last < r_mas->max) && 3438 !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) { 3439 r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots, 3440 r_wr_mas->type, r_mas->offset + 1); 3441 r_mas->offset++; 3442 } 3443 } 3444 3445 static inline void *mas_state_walk(struct ma_state *mas) 3446 { 3447 void *entry; 3448 3449 entry = mas_start(mas); 3450 if (mas_is_none(mas)) 3451 return NULL; 3452 3453 if (mas_is_ptr(mas)) 3454 return entry; 3455 3456 return mtree_range_walk(mas); 3457 } 3458 3459 /* 3460 * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up 3461 * to date. 3462 * 3463 * @mas: The maple state. 3464 * 3465 * Note: Leaves mas in undesirable state. 3466 * Return: The entry for @mas->index or %NULL on dead node. 3467 */ 3468 static inline void *mtree_lookup_walk(struct ma_state *mas) 3469 { 3470 unsigned long *pivots; 3471 unsigned char offset; 3472 struct maple_node *node; 3473 struct maple_enode *next; 3474 enum maple_type type; 3475 void __rcu **slots; 3476 unsigned char end; 3477 3478 next = mas->node; 3479 do { 3480 node = mte_to_node(next); 3481 type = mte_node_type(next); 3482 pivots = ma_pivots(node, type); 3483 end = mt_pivots[type]; 3484 offset = 0; 3485 do { 3486 if (pivots[offset] >= mas->index) 3487 break; 3488 } while (++offset < end); 3489 3490 slots = ma_slots(node, type); 3491 next = mt_slot(mas->tree, slots, offset); 3492 if (unlikely(ma_dead_node(node))) 3493 goto dead_node; 3494 } while (!ma_is_leaf(type)); 3495 3496 return (void *)next; 3497 3498 dead_node: 3499 mas_reset(mas); 3500 return NULL; 3501 } 3502 3503 static void mte_destroy_walk(struct maple_enode *, struct maple_tree *); 3504 /* 3505 * mas_new_root() - Create a new root node that only contains the entry passed 3506 * in. 3507 * @mas: The maple state 3508 * @entry: The entry to store. 3509 * 3510 * Only valid when the index == 0 and the last == ULONG_MAX 3511 */ 3512 static inline void mas_new_root(struct ma_state *mas, void *entry) 3513 { 3514 struct maple_enode *root = mas_root_locked(mas); 3515 enum maple_type type = maple_leaf_64; 3516 struct maple_node *node; 3517 void __rcu **slots; 3518 unsigned long *pivots; 3519 3520 WARN_ON_ONCE(mas->index || mas->last != ULONG_MAX); 3521 3522 if (!entry) { 3523 mt_set_height(mas->tree, 0); 3524 rcu_assign_pointer(mas->tree->ma_root, entry); 3525 mas->status = ma_start; 3526 goto done; 3527 } 3528 3529 node = mas_pop_node(mas); 3530 pivots = ma_pivots(node, type); 3531 slots = ma_slots(node, type); 3532 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3533 mas->node = mt_mk_node(node, type); 3534 mas->status = ma_active; 3535 rcu_assign_pointer(slots[0], entry); 3536 pivots[0] = mas->last; 3537 mt_set_height(mas->tree, 1); 3538 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3539 3540 done: 3541 if (xa_is_node(root)) 3542 mte_destroy_walk(root, mas->tree); 3543 3544 return; 3545 } 3546 /* 3547 * mas_wr_spanning_store() - Create a subtree with the store operation completed 3548 * and new nodes where necessary, then place the sub-tree in the actual tree. 3549 * Note that mas is expected to point to the node which caused the store to 3550 * span. 3551 * @wr_mas: The maple write state 3552 */ 3553 static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas) 3554 { 3555 struct maple_subtree_state mast; 3556 struct maple_big_node b_node; 3557 struct ma_state *mas; 3558 unsigned char height; 3559 3560 /* Left and Right side of spanning store */ 3561 MA_STATE(l_mas, NULL, 0, 0); 3562 MA_STATE(r_mas, NULL, 0, 0); 3563 MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry); 3564 MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry); 3565 3566 /* 3567 * A store operation that spans multiple nodes is called a spanning 3568 * store and is handled early in the store call stack by the function 3569 * mas_is_span_wr(). When a spanning store is identified, the maple 3570 * state is duplicated. The first maple state walks the left tree path 3571 * to ``index``, the duplicate walks the right tree path to ``last``. 3572 * The data in the two nodes are combined into a single node, two nodes, 3573 * or possibly three nodes (see the 3-way split above). A ``NULL`` 3574 * written to the last entry of a node is considered a spanning store as 3575 * a rebalance is required for the operation to complete and an overflow 3576 * of data may happen. 3577 */ 3578 mas = wr_mas->mas; 3579 trace_ma_op(__func__, mas); 3580 3581 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 3582 return mas_new_root(mas, wr_mas->entry); 3583 /* 3584 * Node rebalancing may occur due to this store, so there may be three new 3585 * entries per level plus a new root. 3586 */ 3587 height = mas_mt_height(mas); 3588 3589 /* 3590 * Set up right side. Need to get to the next offset after the spanning 3591 * store to ensure it's not NULL and to combine both the next node and 3592 * the node with the start together. 3593 */ 3594 r_mas = *mas; 3595 /* Avoid overflow, walk to next slot in the tree. */ 3596 if (r_mas.last + 1) 3597 r_mas.last++; 3598 3599 r_mas.index = r_mas.last; 3600 mas_wr_walk_index(&r_wr_mas); 3601 r_mas.last = r_mas.index = mas->last; 3602 3603 /* Set up left side. */ 3604 l_mas = *mas; 3605 mas_wr_walk_index(&l_wr_mas); 3606 3607 if (!wr_mas->entry) { 3608 mas_extend_spanning_null(&l_wr_mas, &r_wr_mas); 3609 mas->offset = l_mas.offset; 3610 mas->index = l_mas.index; 3611 mas->last = l_mas.last = r_mas.last; 3612 } 3613 3614 /* expanding NULLs may make this cover the entire range */ 3615 if (!l_mas.index && r_mas.last == ULONG_MAX) { 3616 mas_set_range(mas, 0, ULONG_MAX); 3617 return mas_new_root(mas, wr_mas->entry); 3618 } 3619 3620 memset(&b_node, 0, sizeof(struct maple_big_node)); 3621 /* Copy l_mas and store the value in b_node. */ 3622 mas_store_b_node(&l_wr_mas, &b_node, l_mas.end); 3623 /* Copy r_mas into b_node if there is anything to copy. */ 3624 if (r_mas.max > r_mas.last) 3625 mas_mab_cp(&r_mas, r_mas.offset, r_mas.end, 3626 &b_node, b_node.b_end + 1); 3627 else 3628 b_node.b_end++; 3629 3630 /* Stop spanning searches by searching for just index. */ 3631 l_mas.index = l_mas.last = mas->index; 3632 3633 mast.bn = &b_node; 3634 mast.orig_l = &l_mas; 3635 mast.orig_r = &r_mas; 3636 /* Combine l_mas and r_mas and split them up evenly again. */ 3637 return mas_spanning_rebalance(mas, &mast, height + 1); 3638 } 3639 3640 /* 3641 * mas_wr_node_store() - Attempt to store the value in a node 3642 * @wr_mas: The maple write state 3643 * 3644 * Attempts to reuse the node, but may allocate. 3645 */ 3646 static inline void mas_wr_node_store(struct ma_wr_state *wr_mas, 3647 unsigned char new_end) 3648 { 3649 struct ma_state *mas = wr_mas->mas; 3650 void __rcu **dst_slots; 3651 unsigned long *dst_pivots; 3652 unsigned char dst_offset, offset_end = wr_mas->offset_end; 3653 struct maple_node reuse, *newnode; 3654 unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type]; 3655 bool in_rcu = mt_in_rcu(mas->tree); 3656 unsigned char height = mas_mt_height(mas); 3657 3658 if (mas->last == wr_mas->end_piv) 3659 offset_end++; /* don't copy this offset */ 3660 3661 /* set up node. */ 3662 if (in_rcu) { 3663 newnode = mas_pop_node(mas); 3664 } else { 3665 memset(&reuse, 0, sizeof(struct maple_node)); 3666 newnode = &reuse; 3667 } 3668 3669 newnode->parent = mas_mn(mas)->parent; 3670 dst_pivots = ma_pivots(newnode, wr_mas->type); 3671 dst_slots = ma_slots(newnode, wr_mas->type); 3672 /* Copy from start to insert point */ 3673 memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset); 3674 memcpy(dst_slots, wr_mas->slots, sizeof(void *) * mas->offset); 3675 3676 /* Handle insert of new range starting after old range */ 3677 if (wr_mas->r_min < mas->index) { 3678 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content); 3679 dst_pivots[mas->offset++] = mas->index - 1; 3680 } 3681 3682 /* Store the new entry and range end. */ 3683 if (mas->offset < node_pivots) 3684 dst_pivots[mas->offset] = mas->last; 3685 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry); 3686 3687 /* 3688 * this range wrote to the end of the node or it overwrote the rest of 3689 * the data 3690 */ 3691 if (offset_end > mas->end) 3692 goto done; 3693 3694 dst_offset = mas->offset + 1; 3695 /* Copy to the end of node if necessary. */ 3696 copy_size = mas->end - offset_end + 1; 3697 memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end, 3698 sizeof(void *) * copy_size); 3699 memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end, 3700 sizeof(unsigned long) * (copy_size - 1)); 3701 3702 if (new_end < node_pivots) 3703 dst_pivots[new_end] = mas->max; 3704 3705 done: 3706 mas_leaf_set_meta(newnode, maple_leaf_64, new_end); 3707 if (in_rcu) { 3708 struct maple_enode *old_enode = mas->node; 3709 3710 mas->node = mt_mk_node(newnode, wr_mas->type); 3711 mas_replace_node(mas, old_enode, height); 3712 } else { 3713 memcpy(wr_mas->node, newnode, sizeof(struct maple_node)); 3714 } 3715 trace_ma_write(__func__, mas, 0, wr_mas->entry); 3716 mas_update_gap(mas); 3717 mas->end = new_end; 3718 return; 3719 } 3720 3721 /* 3722 * mas_wr_slot_store: Attempt to store a value in a slot. 3723 * @wr_mas: the maple write state 3724 */ 3725 static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas) 3726 { 3727 struct ma_state *mas = wr_mas->mas; 3728 unsigned char offset = mas->offset; 3729 void __rcu **slots = wr_mas->slots; 3730 bool gap = false; 3731 3732 gap |= !mt_slot_locked(mas->tree, slots, offset); 3733 gap |= !mt_slot_locked(mas->tree, slots, offset + 1); 3734 3735 if (wr_mas->offset_end - offset == 1) { 3736 if (mas->index == wr_mas->r_min) { 3737 /* Overwriting the range and a part of the next one */ 3738 rcu_assign_pointer(slots[offset], wr_mas->entry); 3739 wr_mas->pivots[offset] = mas->last; 3740 } else { 3741 /* Overwriting a part of the range and the next one */ 3742 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 3743 wr_mas->pivots[offset] = mas->index - 1; 3744 mas->offset++; /* Keep mas accurate. */ 3745 } 3746 } else { 3747 WARN_ON_ONCE(mt_in_rcu(mas->tree)); 3748 /* 3749 * Expand the range, only partially overwriting the previous and 3750 * next ranges 3751 */ 3752 gap |= !mt_slot_locked(mas->tree, slots, offset + 2); 3753 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 3754 wr_mas->pivots[offset] = mas->index - 1; 3755 wr_mas->pivots[offset + 1] = mas->last; 3756 mas->offset++; /* Keep mas accurate. */ 3757 } 3758 3759 trace_ma_write(__func__, mas, 0, wr_mas->entry); 3760 /* 3761 * Only update gap when the new entry is empty or there is an empty 3762 * entry in the original two ranges. 3763 */ 3764 if (!wr_mas->entry || gap) 3765 mas_update_gap(mas); 3766 3767 return; 3768 } 3769 3770 static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas) 3771 { 3772 struct ma_state *mas = wr_mas->mas; 3773 3774 if (!wr_mas->slots[wr_mas->offset_end]) { 3775 /* If this one is null, the next and prev are not */ 3776 mas->last = wr_mas->end_piv; 3777 } else { 3778 /* Check next slot(s) if we are overwriting the end */ 3779 if ((mas->last == wr_mas->end_piv) && 3780 (mas->end != wr_mas->offset_end) && 3781 !wr_mas->slots[wr_mas->offset_end + 1]) { 3782 wr_mas->offset_end++; 3783 if (wr_mas->offset_end == mas->end) 3784 mas->last = mas->max; 3785 else 3786 mas->last = wr_mas->pivots[wr_mas->offset_end]; 3787 wr_mas->end_piv = mas->last; 3788 } 3789 } 3790 3791 if (!wr_mas->content) { 3792 /* If this one is null, the next and prev are not */ 3793 mas->index = wr_mas->r_min; 3794 } else { 3795 /* Check prev slot if we are overwriting the start */ 3796 if (mas->index == wr_mas->r_min && mas->offset && 3797 !wr_mas->slots[mas->offset - 1]) { 3798 mas->offset--; 3799 wr_mas->r_min = mas->index = 3800 mas_safe_min(mas, wr_mas->pivots, mas->offset); 3801 wr_mas->r_max = wr_mas->pivots[mas->offset]; 3802 } 3803 } 3804 } 3805 3806 static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas) 3807 { 3808 while ((wr_mas->offset_end < wr_mas->mas->end) && 3809 (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end])) 3810 wr_mas->offset_end++; 3811 3812 if (wr_mas->offset_end < wr_mas->mas->end) 3813 wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end]; 3814 else 3815 wr_mas->end_piv = wr_mas->mas->max; 3816 } 3817 3818 static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas) 3819 { 3820 struct ma_state *mas = wr_mas->mas; 3821 unsigned char new_end = mas->end + 2; 3822 3823 new_end -= wr_mas->offset_end - mas->offset; 3824 if (wr_mas->r_min == mas->index) 3825 new_end--; 3826 3827 if (wr_mas->end_piv == mas->last) 3828 new_end--; 3829 3830 return new_end; 3831 } 3832 3833 /* 3834 * mas_wr_append: Attempt to append 3835 * @wr_mas: the maple write state 3836 * @new_end: The end of the node after the modification 3837 * 3838 * This is currently unsafe in rcu mode since the end of the node may be cached 3839 * by readers while the node contents may be updated which could result in 3840 * inaccurate information. 3841 */ 3842 static inline void mas_wr_append(struct ma_wr_state *wr_mas, 3843 unsigned char new_end) 3844 { 3845 struct ma_state *mas = wr_mas->mas; 3846 void __rcu **slots; 3847 unsigned char end = mas->end; 3848 3849 if (new_end < mt_pivots[wr_mas->type]) { 3850 wr_mas->pivots[new_end] = wr_mas->pivots[end]; 3851 ma_set_meta(wr_mas->node, wr_mas->type, 0, new_end); 3852 } 3853 3854 slots = wr_mas->slots; 3855 if (new_end == end + 1) { 3856 if (mas->last == wr_mas->r_max) { 3857 /* Append to end of range */ 3858 rcu_assign_pointer(slots[new_end], wr_mas->entry); 3859 wr_mas->pivots[end] = mas->index - 1; 3860 mas->offset = new_end; 3861 } else { 3862 /* Append to start of range */ 3863 rcu_assign_pointer(slots[new_end], wr_mas->content); 3864 wr_mas->pivots[end] = mas->last; 3865 rcu_assign_pointer(slots[end], wr_mas->entry); 3866 } 3867 } else { 3868 /* Append to the range without touching any boundaries. */ 3869 rcu_assign_pointer(slots[new_end], wr_mas->content); 3870 wr_mas->pivots[end + 1] = mas->last; 3871 rcu_assign_pointer(slots[end + 1], wr_mas->entry); 3872 wr_mas->pivots[end] = mas->index - 1; 3873 mas->offset = end + 1; 3874 } 3875 3876 if (!wr_mas->content || !wr_mas->entry) 3877 mas_update_gap(mas); 3878 3879 mas->end = new_end; 3880 trace_ma_write(__func__, mas, new_end, wr_mas->entry); 3881 return; 3882 } 3883 3884 /* 3885 * mas_wr_bnode() - Slow path for a modification. 3886 * @wr_mas: The write maple state 3887 * 3888 * This is where split, rebalance end up. 3889 */ 3890 static void mas_wr_bnode(struct ma_wr_state *wr_mas) 3891 { 3892 struct maple_big_node b_node; 3893 3894 trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry); 3895 memset(&b_node, 0, sizeof(struct maple_big_node)); 3896 mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end); 3897 mas_commit_b_node(wr_mas, &b_node); 3898 } 3899 3900 /* 3901 * mas_wr_store_entry() - Internal call to store a value 3902 * @wr_mas: The maple write state 3903 */ 3904 static inline void mas_wr_store_entry(struct ma_wr_state *wr_mas) 3905 { 3906 struct ma_state *mas = wr_mas->mas; 3907 unsigned char new_end = mas_wr_new_end(wr_mas); 3908 3909 switch (mas->store_type) { 3910 case wr_exact_fit: 3911 rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry); 3912 if (!!wr_mas->entry ^ !!wr_mas->content) 3913 mas_update_gap(mas); 3914 break; 3915 case wr_append: 3916 mas_wr_append(wr_mas, new_end); 3917 break; 3918 case wr_slot_store: 3919 mas_wr_slot_store(wr_mas); 3920 break; 3921 case wr_node_store: 3922 mas_wr_node_store(wr_mas, new_end); 3923 break; 3924 case wr_spanning_store: 3925 mas_wr_spanning_store(wr_mas); 3926 break; 3927 case wr_split_store: 3928 case wr_rebalance: 3929 mas_wr_bnode(wr_mas); 3930 break; 3931 case wr_new_root: 3932 mas_new_root(mas, wr_mas->entry); 3933 break; 3934 case wr_store_root: 3935 mas_store_root(mas, wr_mas->entry); 3936 break; 3937 case wr_invalid: 3938 MT_BUG_ON(mas->tree, 1); 3939 } 3940 3941 return; 3942 } 3943 3944 static inline void mas_wr_prealloc_setup(struct ma_wr_state *wr_mas) 3945 { 3946 struct ma_state *mas = wr_mas->mas; 3947 3948 if (!mas_is_active(mas)) { 3949 if (mas_is_start(mas)) 3950 goto set_content; 3951 3952 if (unlikely(mas_is_paused(mas))) 3953 goto reset; 3954 3955 if (unlikely(mas_is_none(mas))) 3956 goto reset; 3957 3958 if (unlikely(mas_is_overflow(mas))) 3959 goto reset; 3960 3961 if (unlikely(mas_is_underflow(mas))) 3962 goto reset; 3963 } 3964 3965 /* 3966 * A less strict version of mas_is_span_wr() where we allow spanning 3967 * writes within this node. This is to stop partial walks in 3968 * mas_prealloc() from being reset. 3969 */ 3970 if (mas->last > mas->max) 3971 goto reset; 3972 3973 if (wr_mas->entry) 3974 goto set_content; 3975 3976 if (mte_is_leaf(mas->node) && mas->last == mas->max) 3977 goto reset; 3978 3979 goto set_content; 3980 3981 reset: 3982 mas_reset(mas); 3983 set_content: 3984 wr_mas->content = mas_start(mas); 3985 } 3986 3987 /** 3988 * mas_prealloc_calc() - Calculate number of nodes needed for a 3989 * given store oepration 3990 * @wr_mas: The maple write state 3991 * @entry: The entry to store into the tree 3992 * 3993 * Return: Number of nodes required for preallocation. 3994 */ 3995 static inline int mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry) 3996 { 3997 struct ma_state *mas = wr_mas->mas; 3998 unsigned char height = mas_mt_height(mas); 3999 int ret = height * 3 + 1; 4000 unsigned char delta = height - wr_mas->vacant_height; 4001 4002 switch (mas->store_type) { 4003 case wr_exact_fit: 4004 case wr_append: 4005 case wr_slot_store: 4006 ret = 0; 4007 break; 4008 case wr_spanning_store: 4009 if (wr_mas->sufficient_height < wr_mas->vacant_height) 4010 ret = (height - wr_mas->sufficient_height) * 3 + 1; 4011 else 4012 ret = delta * 3 + 1; 4013 break; 4014 case wr_split_store: 4015 ret = delta * 2 + 1; 4016 break; 4017 case wr_rebalance: 4018 if (wr_mas->sufficient_height < wr_mas->vacant_height) 4019 ret = (height - wr_mas->sufficient_height) * 2 + 1; 4020 else 4021 ret = delta * 2 + 1; 4022 break; 4023 case wr_node_store: 4024 ret = mt_in_rcu(mas->tree) ? 1 : 0; 4025 break; 4026 case wr_new_root: 4027 ret = 1; 4028 break; 4029 case wr_store_root: 4030 if (likely((mas->last != 0) || (mas->index != 0))) 4031 ret = 1; 4032 else if (((unsigned long) (entry) & 3) == 2) 4033 ret = 1; 4034 else 4035 ret = 0; 4036 break; 4037 case wr_invalid: 4038 WARN_ON_ONCE(1); 4039 } 4040 4041 return ret; 4042 } 4043 4044 /* 4045 * mas_wr_store_type() - Determine the store type for a given 4046 * store operation. 4047 * @wr_mas: The maple write state 4048 * 4049 * Return: the type of store needed for the operation 4050 */ 4051 static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas) 4052 { 4053 struct ma_state *mas = wr_mas->mas; 4054 unsigned char new_end; 4055 4056 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 4057 return wr_store_root; 4058 4059 if (unlikely(!mas_wr_walk(wr_mas))) 4060 return wr_spanning_store; 4061 4062 /* At this point, we are at the leaf node that needs to be altered. */ 4063 mas_wr_end_piv(wr_mas); 4064 if (!wr_mas->entry) 4065 mas_wr_extend_null(wr_mas); 4066 4067 if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last)) 4068 return wr_exact_fit; 4069 4070 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 4071 return wr_new_root; 4072 4073 new_end = mas_wr_new_end(wr_mas); 4074 /* Potential spanning rebalance collapsing a node */ 4075 if (new_end < mt_min_slots[wr_mas->type]) { 4076 if (!mte_is_root(mas->node)) 4077 return wr_rebalance; 4078 return wr_node_store; 4079 } 4080 4081 if (new_end >= mt_slots[wr_mas->type]) 4082 return wr_split_store; 4083 4084 if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end)) 4085 return wr_append; 4086 4087 if ((new_end == mas->end) && (!mt_in_rcu(mas->tree) || 4088 (wr_mas->offset_end - mas->offset == 1))) 4089 return wr_slot_store; 4090 4091 return wr_node_store; 4092 } 4093 4094 /** 4095 * mas_wr_preallocate() - Preallocate enough nodes for a store operation 4096 * @wr_mas: The maple write state 4097 * @entry: The entry that will be stored 4098 * 4099 */ 4100 static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry) 4101 { 4102 int request; 4103 4104 mas_wr_prealloc_setup(wr_mas); 4105 wr_mas->mas->store_type = mas_wr_store_type(wr_mas); 4106 request = mas_prealloc_calc(wr_mas, entry); 4107 if (!request) 4108 return; 4109 4110 mas_node_count(wr_mas->mas, request); 4111 } 4112 4113 /** 4114 * mas_insert() - Internal call to insert a value 4115 * @mas: The maple state 4116 * @entry: The entry to store 4117 * 4118 * Return: %NULL or the contents that already exists at the requested index 4119 * otherwise. The maple state needs to be checked for error conditions. 4120 */ 4121 static inline void *mas_insert(struct ma_state *mas, void *entry) 4122 { 4123 MA_WR_STATE(wr_mas, mas, entry); 4124 4125 /* 4126 * Inserting a new range inserts either 0, 1, or 2 pivots within the 4127 * tree. If the insert fits exactly into an existing gap with a value 4128 * of NULL, then the slot only needs to be written with the new value. 4129 * If the range being inserted is adjacent to another range, then only a 4130 * single pivot needs to be inserted (as well as writing the entry). If 4131 * the new range is within a gap but does not touch any other ranges, 4132 * then two pivots need to be inserted: the start - 1, and the end. As 4133 * usual, the entry must be written. Most operations require a new node 4134 * to be allocated and replace an existing node to ensure RCU safety, 4135 * when in RCU mode. The exception to requiring a newly allocated node 4136 * is when inserting at the end of a node (appending). When done 4137 * carefully, appending can reuse the node in place. 4138 */ 4139 wr_mas.content = mas_start(mas); 4140 if (wr_mas.content) 4141 goto exists; 4142 4143 mas_wr_preallocate(&wr_mas, entry); 4144 if (mas_is_err(mas)) 4145 return NULL; 4146 4147 /* spanning writes always overwrite something */ 4148 if (mas->store_type == wr_spanning_store) 4149 goto exists; 4150 4151 /* At this point, we are at the leaf node that needs to be altered. */ 4152 if (mas->store_type != wr_new_root && mas->store_type != wr_store_root) { 4153 wr_mas.offset_end = mas->offset; 4154 wr_mas.end_piv = wr_mas.r_max; 4155 4156 if (wr_mas.content || (mas->last > wr_mas.r_max)) 4157 goto exists; 4158 } 4159 4160 mas_wr_store_entry(&wr_mas); 4161 return wr_mas.content; 4162 4163 exists: 4164 mas_set_err(mas, -EEXIST); 4165 return wr_mas.content; 4166 4167 } 4168 4169 /** 4170 * mas_alloc_cyclic() - Internal call to find somewhere to store an entry 4171 * @mas: The maple state. 4172 * @startp: Pointer to ID. 4173 * @range_lo: Lower bound of range to search. 4174 * @range_hi: Upper bound of range to search. 4175 * @entry: The entry to store. 4176 * @next: Pointer to next ID to allocate. 4177 * @gfp: The GFP_FLAGS to use for allocations. 4178 * 4179 * Return: 0 if the allocation succeeded without wrapping, 1 if the 4180 * allocation succeeded after wrapping, or -EBUSY if there are no 4181 * free entries. 4182 */ 4183 int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp, 4184 void *entry, unsigned long range_lo, unsigned long range_hi, 4185 unsigned long *next, gfp_t gfp) 4186 { 4187 unsigned long min = range_lo; 4188 int ret = 0; 4189 4190 range_lo = max(min, *next); 4191 ret = mas_empty_area(mas, range_lo, range_hi, 1); 4192 if ((mas->tree->ma_flags & MT_FLAGS_ALLOC_WRAPPED) && ret == 0) { 4193 mas->tree->ma_flags &= ~MT_FLAGS_ALLOC_WRAPPED; 4194 ret = 1; 4195 } 4196 if (ret < 0 && range_lo > min) { 4197 mas_reset(mas); 4198 ret = mas_empty_area(mas, min, range_hi, 1); 4199 if (ret == 0) 4200 ret = 1; 4201 } 4202 if (ret < 0) 4203 return ret; 4204 4205 do { 4206 mas_insert(mas, entry); 4207 } while (mas_nomem(mas, gfp)); 4208 if (mas_is_err(mas)) 4209 return xa_err(mas->node); 4210 4211 *startp = mas->index; 4212 *next = *startp + 1; 4213 if (*next == 0) 4214 mas->tree->ma_flags |= MT_FLAGS_ALLOC_WRAPPED; 4215 4216 mas_destroy(mas); 4217 return ret; 4218 } 4219 EXPORT_SYMBOL(mas_alloc_cyclic); 4220 4221 static __always_inline void mas_rewalk(struct ma_state *mas, unsigned long index) 4222 { 4223 retry: 4224 mas_set(mas, index); 4225 mas_state_walk(mas); 4226 if (mas_is_start(mas)) 4227 goto retry; 4228 } 4229 4230 static __always_inline bool mas_rewalk_if_dead(struct ma_state *mas, 4231 struct maple_node *node, const unsigned long index) 4232 { 4233 if (unlikely(ma_dead_node(node))) { 4234 mas_rewalk(mas, index); 4235 return true; 4236 } 4237 return false; 4238 } 4239 4240 /* 4241 * mas_prev_node() - Find the prev non-null entry at the same level in the 4242 * tree. The prev value will be mas->node[mas->offset] or the status will be 4243 * ma_none. 4244 * @mas: The maple state 4245 * @min: The lower limit to search 4246 * 4247 * The prev node value will be mas->node[mas->offset] or the status will be 4248 * ma_none. 4249 * Return: 1 if the node is dead, 0 otherwise. 4250 */ 4251 static int mas_prev_node(struct ma_state *mas, unsigned long min) 4252 { 4253 enum maple_type mt; 4254 int offset, level; 4255 void __rcu **slots; 4256 struct maple_node *node; 4257 unsigned long *pivots; 4258 unsigned long max; 4259 4260 node = mas_mn(mas); 4261 if (!mas->min) 4262 goto no_entry; 4263 4264 max = mas->min - 1; 4265 if (max < min) 4266 goto no_entry; 4267 4268 level = 0; 4269 do { 4270 if (ma_is_root(node)) 4271 goto no_entry; 4272 4273 /* Walk up. */ 4274 if (unlikely(mas_ascend(mas))) 4275 return 1; 4276 offset = mas->offset; 4277 level++; 4278 node = mas_mn(mas); 4279 } while (!offset); 4280 4281 offset--; 4282 mt = mte_node_type(mas->node); 4283 while (level > 1) { 4284 level--; 4285 slots = ma_slots(node, mt); 4286 mas->node = mas_slot(mas, slots, offset); 4287 if (unlikely(ma_dead_node(node))) 4288 return 1; 4289 4290 mt = mte_node_type(mas->node); 4291 node = mas_mn(mas); 4292 pivots = ma_pivots(node, mt); 4293 offset = ma_data_end(node, mt, pivots, max); 4294 if (unlikely(ma_dead_node(node))) 4295 return 1; 4296 } 4297 4298 slots = ma_slots(node, mt); 4299 mas->node = mas_slot(mas, slots, offset); 4300 pivots = ma_pivots(node, mt); 4301 if (unlikely(ma_dead_node(node))) 4302 return 1; 4303 4304 if (likely(offset)) 4305 mas->min = pivots[offset - 1] + 1; 4306 mas->max = max; 4307 mas->offset = mas_data_end(mas); 4308 if (unlikely(mte_dead_node(mas->node))) 4309 return 1; 4310 4311 mas->end = mas->offset; 4312 return 0; 4313 4314 no_entry: 4315 if (unlikely(ma_dead_node(node))) 4316 return 1; 4317 4318 mas->status = ma_underflow; 4319 return 0; 4320 } 4321 4322 /* 4323 * mas_prev_slot() - Get the entry in the previous slot 4324 * 4325 * @mas: The maple state 4326 * @min: The minimum starting range 4327 * @empty: Can be empty 4328 * 4329 * Return: The entry in the previous slot which is possibly NULL 4330 */ 4331 static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty) 4332 { 4333 void *entry; 4334 void __rcu **slots; 4335 unsigned long pivot; 4336 enum maple_type type; 4337 unsigned long *pivots; 4338 struct maple_node *node; 4339 unsigned long save_point = mas->index; 4340 4341 retry: 4342 node = mas_mn(mas); 4343 type = mte_node_type(mas->node); 4344 pivots = ma_pivots(node, type); 4345 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4346 goto retry; 4347 4348 if (mas->min <= min) { 4349 pivot = mas_safe_min(mas, pivots, mas->offset); 4350 4351 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4352 goto retry; 4353 4354 if (pivot <= min) 4355 goto underflow; 4356 } 4357 4358 again: 4359 if (likely(mas->offset)) { 4360 mas->offset--; 4361 mas->last = mas->index - 1; 4362 mas->index = mas_safe_min(mas, pivots, mas->offset); 4363 } else { 4364 if (mas->index <= min) 4365 goto underflow; 4366 4367 if (mas_prev_node(mas, min)) { 4368 mas_rewalk(mas, save_point); 4369 goto retry; 4370 } 4371 4372 if (WARN_ON_ONCE(mas_is_underflow(mas))) 4373 return NULL; 4374 4375 mas->last = mas->max; 4376 node = mas_mn(mas); 4377 type = mte_node_type(mas->node); 4378 pivots = ma_pivots(node, type); 4379 mas->index = pivots[mas->offset - 1] + 1; 4380 } 4381 4382 slots = ma_slots(node, type); 4383 entry = mas_slot(mas, slots, mas->offset); 4384 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4385 goto retry; 4386 4387 if (likely(entry)) 4388 return entry; 4389 4390 if (!empty) { 4391 if (mas->index <= min) 4392 goto underflow; 4393 4394 goto again; 4395 } 4396 4397 return entry; 4398 4399 underflow: 4400 mas->status = ma_underflow; 4401 return NULL; 4402 } 4403 4404 /* 4405 * mas_next_node() - Get the next node at the same level in the tree. 4406 * @mas: The maple state 4407 * @node: The maple node 4408 * @max: The maximum pivot value to check. 4409 * 4410 * The next value will be mas->node[mas->offset] or the status will have 4411 * overflowed. 4412 * Return: 1 on dead node, 0 otherwise. 4413 */ 4414 static int mas_next_node(struct ma_state *mas, struct maple_node *node, 4415 unsigned long max) 4416 { 4417 unsigned long min; 4418 unsigned long *pivots; 4419 struct maple_enode *enode; 4420 struct maple_node *tmp; 4421 int level = 0; 4422 unsigned char node_end; 4423 enum maple_type mt; 4424 void __rcu **slots; 4425 4426 if (mas->max >= max) 4427 goto overflow; 4428 4429 min = mas->max + 1; 4430 level = 0; 4431 do { 4432 if (ma_is_root(node)) 4433 goto overflow; 4434 4435 /* Walk up. */ 4436 if (unlikely(mas_ascend(mas))) 4437 return 1; 4438 4439 level++; 4440 node = mas_mn(mas); 4441 mt = mte_node_type(mas->node); 4442 pivots = ma_pivots(node, mt); 4443 node_end = ma_data_end(node, mt, pivots, mas->max); 4444 if (unlikely(ma_dead_node(node))) 4445 return 1; 4446 4447 } while (unlikely(mas->offset == node_end)); 4448 4449 slots = ma_slots(node, mt); 4450 mas->offset++; 4451 enode = mas_slot(mas, slots, mas->offset); 4452 if (unlikely(ma_dead_node(node))) 4453 return 1; 4454 4455 if (level > 1) 4456 mas->offset = 0; 4457 4458 while (unlikely(level > 1)) { 4459 level--; 4460 mas->node = enode; 4461 node = mas_mn(mas); 4462 mt = mte_node_type(mas->node); 4463 slots = ma_slots(node, mt); 4464 enode = mas_slot(mas, slots, 0); 4465 if (unlikely(ma_dead_node(node))) 4466 return 1; 4467 } 4468 4469 if (!mas->offset) 4470 pivots = ma_pivots(node, mt); 4471 4472 mas->max = mas_safe_pivot(mas, pivots, mas->offset, mt); 4473 tmp = mte_to_node(enode); 4474 mt = mte_node_type(enode); 4475 pivots = ma_pivots(tmp, mt); 4476 mas->end = ma_data_end(tmp, mt, pivots, mas->max); 4477 if (unlikely(ma_dead_node(node))) 4478 return 1; 4479 4480 mas->node = enode; 4481 mas->min = min; 4482 return 0; 4483 4484 overflow: 4485 if (unlikely(ma_dead_node(node))) 4486 return 1; 4487 4488 mas->status = ma_overflow; 4489 return 0; 4490 } 4491 4492 /* 4493 * mas_next_slot() - Get the entry in the next slot 4494 * 4495 * @mas: The maple state 4496 * @max: The maximum starting range 4497 * @empty: Can be empty 4498 * 4499 * Return: The entry in the next slot which is possibly NULL 4500 */ 4501 static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty) 4502 { 4503 void __rcu **slots; 4504 unsigned long *pivots; 4505 unsigned long pivot; 4506 enum maple_type type; 4507 struct maple_node *node; 4508 unsigned long save_point = mas->last; 4509 void *entry; 4510 4511 retry: 4512 node = mas_mn(mas); 4513 type = mte_node_type(mas->node); 4514 pivots = ma_pivots(node, type); 4515 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4516 goto retry; 4517 4518 if (mas->max >= max) { 4519 if (likely(mas->offset < mas->end)) 4520 pivot = pivots[mas->offset]; 4521 else 4522 pivot = mas->max; 4523 4524 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4525 goto retry; 4526 4527 if (pivot >= max) { /* Was at the limit, next will extend beyond */ 4528 mas->status = ma_overflow; 4529 return NULL; 4530 } 4531 } 4532 4533 if (likely(mas->offset < mas->end)) { 4534 mas->index = pivots[mas->offset] + 1; 4535 again: 4536 mas->offset++; 4537 if (likely(mas->offset < mas->end)) 4538 mas->last = pivots[mas->offset]; 4539 else 4540 mas->last = mas->max; 4541 } else { 4542 if (mas->last >= max) { 4543 mas->status = ma_overflow; 4544 return NULL; 4545 } 4546 4547 if (mas_next_node(mas, node, max)) { 4548 mas_rewalk(mas, save_point); 4549 goto retry; 4550 } 4551 4552 if (WARN_ON_ONCE(mas_is_overflow(mas))) 4553 return NULL; 4554 4555 mas->offset = 0; 4556 mas->index = mas->min; 4557 node = mas_mn(mas); 4558 type = mte_node_type(mas->node); 4559 pivots = ma_pivots(node, type); 4560 mas->last = pivots[0]; 4561 } 4562 4563 slots = ma_slots(node, type); 4564 entry = mt_slot(mas->tree, slots, mas->offset); 4565 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4566 goto retry; 4567 4568 if (entry) 4569 return entry; 4570 4571 4572 if (!empty) { 4573 if (mas->last >= max) { 4574 mas->status = ma_overflow; 4575 return NULL; 4576 } 4577 4578 mas->index = mas->last + 1; 4579 goto again; 4580 } 4581 4582 return entry; 4583 } 4584 4585 /* 4586 * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the 4587 * highest gap address of a given size in a given node and descend. 4588 * @mas: The maple state 4589 * @size: The needed size. 4590 * 4591 * Return: True if found in a leaf, false otherwise. 4592 * 4593 */ 4594 static bool mas_rev_awalk(struct ma_state *mas, unsigned long size, 4595 unsigned long *gap_min, unsigned long *gap_max) 4596 { 4597 enum maple_type type = mte_node_type(mas->node); 4598 struct maple_node *node = mas_mn(mas); 4599 unsigned long *pivots, *gaps; 4600 void __rcu **slots; 4601 unsigned long gap = 0; 4602 unsigned long max, min; 4603 unsigned char offset; 4604 4605 if (unlikely(mas_is_err(mas))) 4606 return true; 4607 4608 if (ma_is_dense(type)) { 4609 /* dense nodes. */ 4610 mas->offset = (unsigned char)(mas->index - mas->min); 4611 return true; 4612 } 4613 4614 pivots = ma_pivots(node, type); 4615 slots = ma_slots(node, type); 4616 gaps = ma_gaps(node, type); 4617 offset = mas->offset; 4618 min = mas_safe_min(mas, pivots, offset); 4619 /* Skip out of bounds. */ 4620 while (mas->last < min) 4621 min = mas_safe_min(mas, pivots, --offset); 4622 4623 max = mas_safe_pivot(mas, pivots, offset, type); 4624 while (mas->index <= max) { 4625 gap = 0; 4626 if (gaps) 4627 gap = gaps[offset]; 4628 else if (!mas_slot(mas, slots, offset)) 4629 gap = max - min + 1; 4630 4631 if (gap) { 4632 if ((size <= gap) && (size <= mas->last - min + 1)) 4633 break; 4634 4635 if (!gaps) { 4636 /* Skip the next slot, it cannot be a gap. */ 4637 if (offset < 2) 4638 goto ascend; 4639 4640 offset -= 2; 4641 max = pivots[offset]; 4642 min = mas_safe_min(mas, pivots, offset); 4643 continue; 4644 } 4645 } 4646 4647 if (!offset) 4648 goto ascend; 4649 4650 offset--; 4651 max = min - 1; 4652 min = mas_safe_min(mas, pivots, offset); 4653 } 4654 4655 if (unlikely((mas->index > max) || (size - 1 > max - mas->index))) 4656 goto no_space; 4657 4658 if (unlikely(ma_is_leaf(type))) { 4659 mas->offset = offset; 4660 *gap_min = min; 4661 *gap_max = min + gap - 1; 4662 return true; 4663 } 4664 4665 /* descend, only happens under lock. */ 4666 mas->node = mas_slot(mas, slots, offset); 4667 mas->min = min; 4668 mas->max = max; 4669 mas->offset = mas_data_end(mas); 4670 return false; 4671 4672 ascend: 4673 if (!mte_is_root(mas->node)) 4674 return false; 4675 4676 no_space: 4677 mas_set_err(mas, -EBUSY); 4678 return false; 4679 } 4680 4681 static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) 4682 { 4683 enum maple_type type = mte_node_type(mas->node); 4684 unsigned long pivot, min, gap = 0; 4685 unsigned char offset, data_end; 4686 unsigned long *gaps, *pivots; 4687 void __rcu **slots; 4688 struct maple_node *node; 4689 bool found = false; 4690 4691 if (ma_is_dense(type)) { 4692 mas->offset = (unsigned char)(mas->index - mas->min); 4693 return true; 4694 } 4695 4696 node = mas_mn(mas); 4697 pivots = ma_pivots(node, type); 4698 slots = ma_slots(node, type); 4699 gaps = ma_gaps(node, type); 4700 offset = mas->offset; 4701 min = mas_safe_min(mas, pivots, offset); 4702 data_end = ma_data_end(node, type, pivots, mas->max); 4703 for (; offset <= data_end; offset++) { 4704 pivot = mas_safe_pivot(mas, pivots, offset, type); 4705 4706 /* Not within lower bounds */ 4707 if (mas->index > pivot) 4708 goto next_slot; 4709 4710 if (gaps) 4711 gap = gaps[offset]; 4712 else if (!mas_slot(mas, slots, offset)) 4713 gap = min(pivot, mas->last) - max(mas->index, min) + 1; 4714 else 4715 goto next_slot; 4716 4717 if (gap >= size) { 4718 if (ma_is_leaf(type)) { 4719 found = true; 4720 break; 4721 } 4722 4723 mas->node = mas_slot(mas, slots, offset); 4724 mas->min = min; 4725 mas->max = pivot; 4726 offset = 0; 4727 break; 4728 } 4729 next_slot: 4730 min = pivot + 1; 4731 if (mas->last <= pivot) { 4732 mas_set_err(mas, -EBUSY); 4733 return true; 4734 } 4735 } 4736 4737 mas->offset = offset; 4738 return found; 4739 } 4740 4741 /** 4742 * mas_walk() - Search for @mas->index in the tree. 4743 * @mas: The maple state. 4744 * 4745 * mas->index and mas->last will be set to the range if there is a value. If 4746 * mas->status is ma_none, reset to ma_start 4747 * 4748 * Return: the entry at the location or %NULL. 4749 */ 4750 void *mas_walk(struct ma_state *mas) 4751 { 4752 void *entry; 4753 4754 if (!mas_is_active(mas) && !mas_is_start(mas)) 4755 mas->status = ma_start; 4756 retry: 4757 entry = mas_state_walk(mas); 4758 if (mas_is_start(mas)) { 4759 goto retry; 4760 } else if (mas_is_none(mas)) { 4761 mas->index = 0; 4762 mas->last = ULONG_MAX; 4763 } else if (mas_is_ptr(mas)) { 4764 if (!mas->index) { 4765 mas->last = 0; 4766 return entry; 4767 } 4768 4769 mas->index = 1; 4770 mas->last = ULONG_MAX; 4771 mas->status = ma_none; 4772 return NULL; 4773 } 4774 4775 return entry; 4776 } 4777 EXPORT_SYMBOL_GPL(mas_walk); 4778 4779 static inline bool mas_rewind_node(struct ma_state *mas) 4780 { 4781 unsigned char slot; 4782 4783 do { 4784 if (mte_is_root(mas->node)) { 4785 slot = mas->offset; 4786 if (!slot) 4787 return false; 4788 } else { 4789 mas_ascend(mas); 4790 slot = mas->offset; 4791 } 4792 } while (!slot); 4793 4794 mas->offset = --slot; 4795 return true; 4796 } 4797 4798 /* 4799 * mas_skip_node() - Internal function. Skip over a node. 4800 * @mas: The maple state. 4801 * 4802 * Return: true if there is another node, false otherwise. 4803 */ 4804 static inline bool mas_skip_node(struct ma_state *mas) 4805 { 4806 if (mas_is_err(mas)) 4807 return false; 4808 4809 do { 4810 if (mte_is_root(mas->node)) { 4811 if (mas->offset >= mas_data_end(mas)) { 4812 mas_set_err(mas, -EBUSY); 4813 return false; 4814 } 4815 } else { 4816 mas_ascend(mas); 4817 } 4818 } while (mas->offset >= mas_data_end(mas)); 4819 4820 mas->offset++; 4821 return true; 4822 } 4823 4824 /* 4825 * mas_awalk() - Allocation walk. Search from low address to high, for a gap of 4826 * @size 4827 * @mas: The maple state 4828 * @size: The size of the gap required 4829 * 4830 * Search between @mas->index and @mas->last for a gap of @size. 4831 */ 4832 static inline void mas_awalk(struct ma_state *mas, unsigned long size) 4833 { 4834 struct maple_enode *last = NULL; 4835 4836 /* 4837 * There are 4 options: 4838 * go to child (descend) 4839 * go back to parent (ascend) 4840 * no gap found. (return, error == -EBUSY) 4841 * found the gap. (return) 4842 */ 4843 while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) { 4844 if (last == mas->node) 4845 mas_skip_node(mas); 4846 else 4847 last = mas->node; 4848 } 4849 } 4850 4851 /* 4852 * mas_sparse_area() - Internal function. Return upper or lower limit when 4853 * searching for a gap in an empty tree. 4854 * @mas: The maple state 4855 * @min: the minimum range 4856 * @max: The maximum range 4857 * @size: The size of the gap 4858 * @fwd: Searching forward or back 4859 */ 4860 static inline int mas_sparse_area(struct ma_state *mas, unsigned long min, 4861 unsigned long max, unsigned long size, bool fwd) 4862 { 4863 if (!unlikely(mas_is_none(mas)) && min == 0) { 4864 min++; 4865 /* 4866 * At this time, min is increased, we need to recheck whether 4867 * the size is satisfied. 4868 */ 4869 if (min > max || max - min + 1 < size) 4870 return -EBUSY; 4871 } 4872 /* mas_is_ptr */ 4873 4874 if (fwd) { 4875 mas->index = min; 4876 mas->last = min + size - 1; 4877 } else { 4878 mas->last = max; 4879 mas->index = max - size + 1; 4880 } 4881 return 0; 4882 } 4883 4884 /* 4885 * mas_empty_area() - Get the lowest address within the range that is 4886 * sufficient for the size requested. 4887 * @mas: The maple state 4888 * @min: The lowest value of the range 4889 * @max: The highest value of the range 4890 * @size: The size needed 4891 */ 4892 int mas_empty_area(struct ma_state *mas, unsigned long min, 4893 unsigned long max, unsigned long size) 4894 { 4895 unsigned char offset; 4896 unsigned long *pivots; 4897 enum maple_type mt; 4898 struct maple_node *node; 4899 4900 if (min > max) 4901 return -EINVAL; 4902 4903 if (size == 0 || max - min < size - 1) 4904 return -EINVAL; 4905 4906 if (mas_is_start(mas)) 4907 mas_start(mas); 4908 else if (mas->offset >= 2) 4909 mas->offset -= 2; 4910 else if (!mas_skip_node(mas)) 4911 return -EBUSY; 4912 4913 /* Empty set */ 4914 if (mas_is_none(mas) || mas_is_ptr(mas)) 4915 return mas_sparse_area(mas, min, max, size, true); 4916 4917 /* The start of the window can only be within these values */ 4918 mas->index = min; 4919 mas->last = max; 4920 mas_awalk(mas, size); 4921 4922 if (unlikely(mas_is_err(mas))) 4923 return xa_err(mas->node); 4924 4925 offset = mas->offset; 4926 node = mas_mn(mas); 4927 mt = mte_node_type(mas->node); 4928 pivots = ma_pivots(node, mt); 4929 min = mas_safe_min(mas, pivots, offset); 4930 if (mas->index < min) 4931 mas->index = min; 4932 mas->last = mas->index + size - 1; 4933 mas->end = ma_data_end(node, mt, pivots, mas->max); 4934 return 0; 4935 } 4936 EXPORT_SYMBOL_GPL(mas_empty_area); 4937 4938 /* 4939 * mas_empty_area_rev() - Get the highest address within the range that is 4940 * sufficient for the size requested. 4941 * @mas: The maple state 4942 * @min: The lowest value of the range 4943 * @max: The highest value of the range 4944 * @size: The size needed 4945 */ 4946 int mas_empty_area_rev(struct ma_state *mas, unsigned long min, 4947 unsigned long max, unsigned long size) 4948 { 4949 struct maple_enode *last = mas->node; 4950 4951 if (min > max) 4952 return -EINVAL; 4953 4954 if (size == 0 || max - min < size - 1) 4955 return -EINVAL; 4956 4957 if (mas_is_start(mas)) 4958 mas_start(mas); 4959 else if ((mas->offset < 2) && (!mas_rewind_node(mas))) 4960 return -EBUSY; 4961 4962 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 4963 return mas_sparse_area(mas, min, max, size, false); 4964 else if (mas->offset >= 2) 4965 mas->offset -= 2; 4966 else 4967 mas->offset = mas_data_end(mas); 4968 4969 4970 /* The start of the window can only be within these values. */ 4971 mas->index = min; 4972 mas->last = max; 4973 4974 while (!mas_rev_awalk(mas, size, &min, &max)) { 4975 if (last == mas->node) { 4976 if (!mas_rewind_node(mas)) 4977 return -EBUSY; 4978 } else { 4979 last = mas->node; 4980 } 4981 } 4982 4983 if (mas_is_err(mas)) 4984 return xa_err(mas->node); 4985 4986 if (unlikely(mas->offset == MAPLE_NODE_SLOTS)) 4987 return -EBUSY; 4988 4989 /* Trim the upper limit to the max. */ 4990 if (max < mas->last) 4991 mas->last = max; 4992 4993 mas->index = mas->last - size + 1; 4994 mas->end = mas_data_end(mas); 4995 return 0; 4996 } 4997 EXPORT_SYMBOL_GPL(mas_empty_area_rev); 4998 4999 /* 5000 * mte_dead_leaves() - Mark all leaves of a node as dead. 5001 * @enode: the encoded node 5002 * @mt: the maple tree 5003 * @slots: Pointer to the slot array 5004 * 5005 * Must hold the write lock. 5006 * 5007 * Return: The number of leaves marked as dead. 5008 */ 5009 static inline 5010 unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt, 5011 void __rcu **slots) 5012 { 5013 struct maple_node *node; 5014 enum maple_type type; 5015 void *entry; 5016 int offset; 5017 5018 for (offset = 0; offset < mt_slot_count(enode); offset++) { 5019 entry = mt_slot(mt, slots, offset); 5020 type = mte_node_type(entry); 5021 node = mte_to_node(entry); 5022 /* Use both node and type to catch LE & BE metadata */ 5023 if (!node || !type) 5024 break; 5025 5026 mte_set_node_dead(entry); 5027 node->type = type; 5028 rcu_assign_pointer(slots[offset], node); 5029 } 5030 5031 return offset; 5032 } 5033 5034 /** 5035 * mte_dead_walk() - Walk down a dead tree to just before the leaves 5036 * @enode: The maple encoded node 5037 * @offset: The starting offset 5038 * 5039 * Note: This can only be used from the RCU callback context. 5040 */ 5041 static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset) 5042 { 5043 struct maple_node *node, *next; 5044 void __rcu **slots = NULL; 5045 5046 next = mte_to_node(*enode); 5047 do { 5048 *enode = ma_enode_ptr(next); 5049 node = mte_to_node(*enode); 5050 slots = ma_slots(node, node->type); 5051 next = rcu_dereference_protected(slots[offset], 5052 lock_is_held(&rcu_callback_map)); 5053 offset = 0; 5054 } while (!ma_is_leaf(next->type)); 5055 5056 return slots; 5057 } 5058 5059 /** 5060 * mt_free_walk() - Walk & free a tree in the RCU callback context 5061 * @head: The RCU head that's within the node. 5062 * 5063 * Note: This can only be used from the RCU callback context. 5064 */ 5065 static void mt_free_walk(struct rcu_head *head) 5066 { 5067 void __rcu **slots; 5068 struct maple_node *node, *start; 5069 struct maple_enode *enode; 5070 unsigned char offset; 5071 enum maple_type type; 5072 5073 node = container_of(head, struct maple_node, rcu); 5074 5075 if (ma_is_leaf(node->type)) 5076 goto free_leaf; 5077 5078 start = node; 5079 enode = mt_mk_node(node, node->type); 5080 slots = mte_dead_walk(&enode, 0); 5081 node = mte_to_node(enode); 5082 do { 5083 mt_free_bulk(node->slot_len, slots); 5084 offset = node->parent_slot + 1; 5085 enode = node->piv_parent; 5086 if (mte_to_node(enode) == node) 5087 goto free_leaf; 5088 5089 type = mte_node_type(enode); 5090 slots = ma_slots(mte_to_node(enode), type); 5091 if ((offset < mt_slots[type]) && 5092 rcu_dereference_protected(slots[offset], 5093 lock_is_held(&rcu_callback_map))) 5094 slots = mte_dead_walk(&enode, offset); 5095 node = mte_to_node(enode); 5096 } while ((node != start) || (node->slot_len < offset)); 5097 5098 slots = ma_slots(node, node->type); 5099 mt_free_bulk(node->slot_len, slots); 5100 5101 free_leaf: 5102 mt_free_rcu(&node->rcu); 5103 } 5104 5105 static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, 5106 struct maple_tree *mt, struct maple_enode *prev, unsigned char offset) 5107 { 5108 struct maple_node *node; 5109 struct maple_enode *next = *enode; 5110 void __rcu **slots = NULL; 5111 enum maple_type type; 5112 unsigned char next_offset = 0; 5113 5114 do { 5115 *enode = next; 5116 node = mte_to_node(*enode); 5117 type = mte_node_type(*enode); 5118 slots = ma_slots(node, type); 5119 next = mt_slot_locked(mt, slots, next_offset); 5120 if ((mte_dead_node(next))) 5121 next = mt_slot_locked(mt, slots, ++next_offset); 5122 5123 mte_set_node_dead(*enode); 5124 node->type = type; 5125 node->piv_parent = prev; 5126 node->parent_slot = offset; 5127 offset = next_offset; 5128 next_offset = 0; 5129 prev = *enode; 5130 } while (!mte_is_leaf(next)); 5131 5132 return slots; 5133 } 5134 5135 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 5136 bool free) 5137 { 5138 void __rcu **slots; 5139 struct maple_node *node = mte_to_node(enode); 5140 struct maple_enode *start; 5141 5142 if (mte_is_leaf(enode)) { 5143 mte_set_node_dead(enode); 5144 node->type = mte_node_type(enode); 5145 goto free_leaf; 5146 } 5147 5148 start = enode; 5149 slots = mte_destroy_descend(&enode, mt, start, 0); 5150 node = mte_to_node(enode); // Updated in the above call. 5151 do { 5152 enum maple_type type; 5153 unsigned char offset; 5154 struct maple_enode *parent, *tmp; 5155 5156 node->slot_len = mte_dead_leaves(enode, mt, slots); 5157 if (free) 5158 mt_free_bulk(node->slot_len, slots); 5159 offset = node->parent_slot + 1; 5160 enode = node->piv_parent; 5161 if (mte_to_node(enode) == node) 5162 goto free_leaf; 5163 5164 type = mte_node_type(enode); 5165 slots = ma_slots(mte_to_node(enode), type); 5166 if (offset >= mt_slots[type]) 5167 goto next; 5168 5169 tmp = mt_slot_locked(mt, slots, offset); 5170 if (mte_node_type(tmp) && mte_to_node(tmp)) { 5171 parent = enode; 5172 enode = tmp; 5173 slots = mte_destroy_descend(&enode, mt, parent, offset); 5174 } 5175 next: 5176 node = mte_to_node(enode); 5177 } while (start != enode); 5178 5179 node = mte_to_node(enode); 5180 node->slot_len = mte_dead_leaves(enode, mt, slots); 5181 if (free) 5182 mt_free_bulk(node->slot_len, slots); 5183 5184 free_leaf: 5185 if (free) 5186 mt_free_rcu(&node->rcu); 5187 else 5188 mt_clear_meta(mt, node, node->type); 5189 } 5190 5191 /* 5192 * mte_destroy_walk() - Free a tree or sub-tree. 5193 * @enode: the encoded maple node (maple_enode) to start 5194 * @mt: the tree to free - needed for node types. 5195 * 5196 * Must hold the write lock. 5197 */ 5198 static inline void mte_destroy_walk(struct maple_enode *enode, 5199 struct maple_tree *mt) 5200 { 5201 struct maple_node *node = mte_to_node(enode); 5202 5203 if (mt_in_rcu(mt)) { 5204 mt_destroy_walk(enode, mt, false); 5205 call_rcu(&node->rcu, mt_free_walk); 5206 } else { 5207 mt_destroy_walk(enode, mt, true); 5208 } 5209 } 5210 /* Interface */ 5211 5212 /** 5213 * mas_store() - Store an @entry. 5214 * @mas: The maple state. 5215 * @entry: The entry to store. 5216 * 5217 * The @mas->index and @mas->last is used to set the range for the @entry. 5218 * 5219 * Return: the first entry between mas->index and mas->last or %NULL. 5220 */ 5221 void *mas_store(struct ma_state *mas, void *entry) 5222 { 5223 int request; 5224 MA_WR_STATE(wr_mas, mas, entry); 5225 5226 trace_ma_write(__func__, mas, 0, entry); 5227 #ifdef CONFIG_DEBUG_MAPLE_TREE 5228 if (MAS_WARN_ON(mas, mas->index > mas->last)) 5229 pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last, 5230 entry); 5231 5232 if (mas->index > mas->last) { 5233 mas_set_err(mas, -EINVAL); 5234 return NULL; 5235 } 5236 5237 #endif 5238 5239 /* 5240 * Storing is the same operation as insert with the added caveat that it 5241 * can overwrite entries. Although this seems simple enough, one may 5242 * want to examine what happens if a single store operation was to 5243 * overwrite multiple entries within a self-balancing B-Tree. 5244 */ 5245 mas_wr_prealloc_setup(&wr_mas); 5246 mas->store_type = mas_wr_store_type(&wr_mas); 5247 if (mas->mas_flags & MA_STATE_PREALLOC) { 5248 mas_wr_store_entry(&wr_mas); 5249 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5250 return wr_mas.content; 5251 } 5252 5253 request = mas_prealloc_calc(&wr_mas, entry); 5254 if (!request) 5255 goto store; 5256 5257 mas_node_count(mas, request); 5258 if (mas_is_err(mas)) 5259 return NULL; 5260 5261 store: 5262 mas_wr_store_entry(&wr_mas); 5263 mas_destroy(mas); 5264 return wr_mas.content; 5265 } 5266 EXPORT_SYMBOL_GPL(mas_store); 5267 5268 /** 5269 * mas_store_gfp() - Store a value into the tree. 5270 * @mas: The maple state 5271 * @entry: The entry to store 5272 * @gfp: The GFP_FLAGS to use for allocations if necessary. 5273 * 5274 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5275 * be allocated. 5276 */ 5277 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp) 5278 { 5279 unsigned long index = mas->index; 5280 unsigned long last = mas->last; 5281 MA_WR_STATE(wr_mas, mas, entry); 5282 int ret = 0; 5283 5284 retry: 5285 mas_wr_preallocate(&wr_mas, entry); 5286 if (unlikely(mas_nomem(mas, gfp))) { 5287 if (!entry) 5288 __mas_set_range(mas, index, last); 5289 goto retry; 5290 } 5291 5292 if (mas_is_err(mas)) { 5293 ret = xa_err(mas->node); 5294 goto out; 5295 } 5296 5297 mas_wr_store_entry(&wr_mas); 5298 out: 5299 mas_destroy(mas); 5300 return ret; 5301 } 5302 EXPORT_SYMBOL_GPL(mas_store_gfp); 5303 5304 /** 5305 * mas_store_prealloc() - Store a value into the tree using memory 5306 * preallocated in the maple state. 5307 * @mas: The maple state 5308 * @entry: The entry to store. 5309 */ 5310 void mas_store_prealloc(struct ma_state *mas, void *entry) 5311 { 5312 MA_WR_STATE(wr_mas, mas, entry); 5313 5314 if (mas->store_type == wr_store_root) { 5315 mas_wr_prealloc_setup(&wr_mas); 5316 goto store; 5317 } 5318 5319 mas_wr_walk_descend(&wr_mas); 5320 if (mas->store_type != wr_spanning_store) { 5321 /* set wr_mas->content to current slot */ 5322 wr_mas.content = mas_slot_locked(mas, wr_mas.slots, mas->offset); 5323 mas_wr_end_piv(&wr_mas); 5324 } 5325 5326 store: 5327 trace_ma_write(__func__, mas, 0, entry); 5328 mas_wr_store_entry(&wr_mas); 5329 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5330 mas_destroy(mas); 5331 } 5332 EXPORT_SYMBOL_GPL(mas_store_prealloc); 5333 5334 /** 5335 * mas_preallocate() - Preallocate enough nodes for a store operation 5336 * @mas: The maple state 5337 * @entry: The entry that will be stored 5338 * @gfp: The GFP_FLAGS to use for allocations. 5339 * 5340 * Return: 0 on success, -ENOMEM if memory could not be allocated. 5341 */ 5342 int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) 5343 { 5344 MA_WR_STATE(wr_mas, mas, entry); 5345 int ret = 0; 5346 int request; 5347 5348 mas_wr_prealloc_setup(&wr_mas); 5349 mas->store_type = mas_wr_store_type(&wr_mas); 5350 request = mas_prealloc_calc(&wr_mas, entry); 5351 if (!request) 5352 goto set_flag; 5353 5354 mas->mas_flags &= ~MA_STATE_PREALLOC; 5355 mas_node_count_gfp(mas, request, gfp); 5356 if (mas_is_err(mas)) { 5357 mas_set_alloc_req(mas, 0); 5358 ret = xa_err(mas->node); 5359 mas_destroy(mas); 5360 mas_reset(mas); 5361 return ret; 5362 } 5363 5364 set_flag: 5365 mas->mas_flags |= MA_STATE_PREALLOC; 5366 return ret; 5367 } 5368 EXPORT_SYMBOL_GPL(mas_preallocate); 5369 5370 /* 5371 * mas_destroy() - destroy a maple state. 5372 * @mas: The maple state 5373 * 5374 * Upon completion, check the left-most node and rebalance against the node to 5375 * the right if necessary. Frees any allocated nodes associated with this maple 5376 * state. 5377 */ 5378 void mas_destroy(struct ma_state *mas) 5379 { 5380 struct maple_alloc *node; 5381 unsigned long total; 5382 5383 mas->mas_flags &= ~MA_STATE_PREALLOC; 5384 5385 total = mas_allocated(mas); 5386 while (total) { 5387 node = mas->alloc; 5388 mas->alloc = node->slot[0]; 5389 if (node->node_count > 1) { 5390 size_t count = node->node_count - 1; 5391 5392 mt_free_bulk(count, (void __rcu **)&node->slot[1]); 5393 total -= count; 5394 } 5395 mt_free_one(ma_mnode_ptr(node)); 5396 total--; 5397 } 5398 5399 mas->alloc = NULL; 5400 } 5401 EXPORT_SYMBOL_GPL(mas_destroy); 5402 5403 static void mas_may_activate(struct ma_state *mas) 5404 { 5405 if (!mas->node) { 5406 mas->status = ma_start; 5407 } else if (mas->index > mas->max || mas->index < mas->min) { 5408 mas->status = ma_start; 5409 } else { 5410 mas->status = ma_active; 5411 } 5412 } 5413 5414 static bool mas_next_setup(struct ma_state *mas, unsigned long max, 5415 void **entry) 5416 { 5417 bool was_none = mas_is_none(mas); 5418 5419 if (unlikely(mas->last >= max)) { 5420 mas->status = ma_overflow; 5421 return true; 5422 } 5423 5424 switch (mas->status) { 5425 case ma_active: 5426 return false; 5427 case ma_none: 5428 fallthrough; 5429 case ma_pause: 5430 mas->status = ma_start; 5431 fallthrough; 5432 case ma_start: 5433 mas_walk(mas); /* Retries on dead nodes handled by mas_walk */ 5434 break; 5435 case ma_overflow: 5436 /* Overflowed before, but the max changed */ 5437 mas_may_activate(mas); 5438 break; 5439 case ma_underflow: 5440 /* The user expects the mas to be one before where it is */ 5441 mas_may_activate(mas); 5442 *entry = mas_walk(mas); 5443 if (*entry) 5444 return true; 5445 break; 5446 case ma_root: 5447 break; 5448 case ma_error: 5449 return true; 5450 } 5451 5452 if (likely(mas_is_active(mas))) /* Fast path */ 5453 return false; 5454 5455 if (mas_is_ptr(mas)) { 5456 *entry = NULL; 5457 if (was_none && mas->index == 0) { 5458 mas->index = mas->last = 0; 5459 return true; 5460 } 5461 mas->index = 1; 5462 mas->last = ULONG_MAX; 5463 mas->status = ma_none; 5464 return true; 5465 } 5466 5467 if (mas_is_none(mas)) 5468 return true; 5469 5470 return false; 5471 } 5472 5473 /** 5474 * mas_next() - Get the next entry. 5475 * @mas: The maple state 5476 * @max: The maximum index to check. 5477 * 5478 * Returns the next entry after @mas->index. 5479 * Must hold rcu_read_lock or the write lock. 5480 * Can return the zero entry. 5481 * 5482 * Return: The next entry or %NULL 5483 */ 5484 void *mas_next(struct ma_state *mas, unsigned long max) 5485 { 5486 void *entry = NULL; 5487 5488 if (mas_next_setup(mas, max, &entry)) 5489 return entry; 5490 5491 /* Retries on dead nodes handled by mas_next_slot */ 5492 return mas_next_slot(mas, max, false); 5493 } 5494 EXPORT_SYMBOL_GPL(mas_next); 5495 5496 /** 5497 * mas_next_range() - Advance the maple state to the next range 5498 * @mas: The maple state 5499 * @max: The maximum index to check. 5500 * 5501 * Sets @mas->index and @mas->last to the range. 5502 * Must hold rcu_read_lock or the write lock. 5503 * Can return the zero entry. 5504 * 5505 * Return: The next entry or %NULL 5506 */ 5507 void *mas_next_range(struct ma_state *mas, unsigned long max) 5508 { 5509 void *entry = NULL; 5510 5511 if (mas_next_setup(mas, max, &entry)) 5512 return entry; 5513 5514 /* Retries on dead nodes handled by mas_next_slot */ 5515 return mas_next_slot(mas, max, true); 5516 } 5517 EXPORT_SYMBOL_GPL(mas_next_range); 5518 5519 /** 5520 * mt_next() - get the next value in the maple tree 5521 * @mt: The maple tree 5522 * @index: The start index 5523 * @max: The maximum index to check 5524 * 5525 * Takes RCU read lock internally to protect the search, which does not 5526 * protect the returned pointer after dropping RCU read lock. 5527 * See also: Documentation/core-api/maple_tree.rst 5528 * 5529 * Return: The entry higher than @index or %NULL if nothing is found. 5530 */ 5531 void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max) 5532 { 5533 void *entry = NULL; 5534 MA_STATE(mas, mt, index, index); 5535 5536 rcu_read_lock(); 5537 entry = mas_next(&mas, max); 5538 rcu_read_unlock(); 5539 return entry; 5540 } 5541 EXPORT_SYMBOL_GPL(mt_next); 5542 5543 static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry) 5544 { 5545 if (unlikely(mas->index <= min)) { 5546 mas->status = ma_underflow; 5547 return true; 5548 } 5549 5550 switch (mas->status) { 5551 case ma_active: 5552 return false; 5553 case ma_start: 5554 break; 5555 case ma_none: 5556 fallthrough; 5557 case ma_pause: 5558 mas->status = ma_start; 5559 break; 5560 case ma_underflow: 5561 /* underflowed before but the min changed */ 5562 mas_may_activate(mas); 5563 break; 5564 case ma_overflow: 5565 /* User expects mas to be one after where it is */ 5566 mas_may_activate(mas); 5567 *entry = mas_walk(mas); 5568 if (*entry) 5569 return true; 5570 break; 5571 case ma_root: 5572 break; 5573 case ma_error: 5574 return true; 5575 } 5576 5577 if (mas_is_start(mas)) 5578 mas_walk(mas); 5579 5580 if (unlikely(mas_is_ptr(mas))) { 5581 if (!mas->index) { 5582 mas->status = ma_none; 5583 return true; 5584 } 5585 mas->index = mas->last = 0; 5586 *entry = mas_root(mas); 5587 return true; 5588 } 5589 5590 if (mas_is_none(mas)) { 5591 if (mas->index) { 5592 /* Walked to out-of-range pointer? */ 5593 mas->index = mas->last = 0; 5594 mas->status = ma_root; 5595 *entry = mas_root(mas); 5596 return true; 5597 } 5598 return true; 5599 } 5600 5601 return false; 5602 } 5603 5604 /** 5605 * mas_prev() - Get the previous entry 5606 * @mas: The maple state 5607 * @min: The minimum value to check. 5608 * 5609 * Must hold rcu_read_lock or the write lock. 5610 * Will reset mas to ma_start if the status is ma_none. Will stop on not 5611 * searchable nodes. 5612 * 5613 * Return: the previous value or %NULL. 5614 */ 5615 void *mas_prev(struct ma_state *mas, unsigned long min) 5616 { 5617 void *entry = NULL; 5618 5619 if (mas_prev_setup(mas, min, &entry)) 5620 return entry; 5621 5622 return mas_prev_slot(mas, min, false); 5623 } 5624 EXPORT_SYMBOL_GPL(mas_prev); 5625 5626 /** 5627 * mas_prev_range() - Advance to the previous range 5628 * @mas: The maple state 5629 * @min: The minimum value to check. 5630 * 5631 * Sets @mas->index and @mas->last to the range. 5632 * Must hold rcu_read_lock or the write lock. 5633 * Will reset mas to ma_start if the node is ma_none. Will stop on not 5634 * searchable nodes. 5635 * 5636 * Return: the previous value or %NULL. 5637 */ 5638 void *mas_prev_range(struct ma_state *mas, unsigned long min) 5639 { 5640 void *entry = NULL; 5641 5642 if (mas_prev_setup(mas, min, &entry)) 5643 return entry; 5644 5645 return mas_prev_slot(mas, min, true); 5646 } 5647 EXPORT_SYMBOL_GPL(mas_prev_range); 5648 5649 /** 5650 * mt_prev() - get the previous value in the maple tree 5651 * @mt: The maple tree 5652 * @index: The start index 5653 * @min: The minimum index to check 5654 * 5655 * Takes RCU read lock internally to protect the search, which does not 5656 * protect the returned pointer after dropping RCU read lock. 5657 * See also: Documentation/core-api/maple_tree.rst 5658 * 5659 * Return: The entry before @index or %NULL if nothing is found. 5660 */ 5661 void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min) 5662 { 5663 void *entry = NULL; 5664 MA_STATE(mas, mt, index, index); 5665 5666 rcu_read_lock(); 5667 entry = mas_prev(&mas, min); 5668 rcu_read_unlock(); 5669 return entry; 5670 } 5671 EXPORT_SYMBOL_GPL(mt_prev); 5672 5673 /** 5674 * mas_pause() - Pause a mas_find/mas_for_each to drop the lock. 5675 * @mas: The maple state to pause 5676 * 5677 * Some users need to pause a walk and drop the lock they're holding in 5678 * order to yield to a higher priority thread or carry out an operation 5679 * on an entry. Those users should call this function before they drop 5680 * the lock. It resets the @mas to be suitable for the next iteration 5681 * of the loop after the user has reacquired the lock. If most entries 5682 * found during a walk require you to call mas_pause(), the mt_for_each() 5683 * iterator may be more appropriate. 5684 * 5685 */ 5686 void mas_pause(struct ma_state *mas) 5687 { 5688 mas->status = ma_pause; 5689 mas->node = NULL; 5690 } 5691 EXPORT_SYMBOL_GPL(mas_pause); 5692 5693 /** 5694 * mas_find_setup() - Internal function to set up mas_find*(). 5695 * @mas: The maple state 5696 * @max: The maximum index 5697 * @entry: Pointer to the entry 5698 * 5699 * Returns: True if entry is the answer, false otherwise. 5700 */ 5701 static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long max, void **entry) 5702 { 5703 switch (mas->status) { 5704 case ma_active: 5705 if (mas->last < max) 5706 return false; 5707 return true; 5708 case ma_start: 5709 break; 5710 case ma_pause: 5711 if (unlikely(mas->last >= max)) 5712 return true; 5713 5714 mas->index = ++mas->last; 5715 mas->status = ma_start; 5716 break; 5717 case ma_none: 5718 if (unlikely(mas->last >= max)) 5719 return true; 5720 5721 mas->index = mas->last; 5722 mas->status = ma_start; 5723 break; 5724 case ma_underflow: 5725 /* mas is pointing at entry before unable to go lower */ 5726 if (unlikely(mas->index >= max)) { 5727 mas->status = ma_overflow; 5728 return true; 5729 } 5730 5731 mas_may_activate(mas); 5732 *entry = mas_walk(mas); 5733 if (*entry) 5734 return true; 5735 break; 5736 case ma_overflow: 5737 if (unlikely(mas->last >= max)) 5738 return true; 5739 5740 mas_may_activate(mas); 5741 *entry = mas_walk(mas); 5742 if (*entry) 5743 return true; 5744 break; 5745 case ma_root: 5746 break; 5747 case ma_error: 5748 return true; 5749 } 5750 5751 if (mas_is_start(mas)) { 5752 /* First run or continue */ 5753 if (mas->index > max) 5754 return true; 5755 5756 *entry = mas_walk(mas); 5757 if (*entry) 5758 return true; 5759 5760 } 5761 5762 if (unlikely(mas_is_ptr(mas))) 5763 goto ptr_out_of_range; 5764 5765 if (unlikely(mas_is_none(mas))) 5766 return true; 5767 5768 if (mas->index == max) 5769 return true; 5770 5771 return false; 5772 5773 ptr_out_of_range: 5774 mas->status = ma_none; 5775 mas->index = 1; 5776 mas->last = ULONG_MAX; 5777 return true; 5778 } 5779 5780 /** 5781 * mas_find() - On the first call, find the entry at or after mas->index up to 5782 * %max. Otherwise, find the entry after mas->index. 5783 * @mas: The maple state 5784 * @max: The maximum value to check. 5785 * 5786 * Must hold rcu_read_lock or the write lock. 5787 * If an entry exists, last and index are updated accordingly. 5788 * May set @mas->status to ma_overflow. 5789 * 5790 * Return: The entry or %NULL. 5791 */ 5792 void *mas_find(struct ma_state *mas, unsigned long max) 5793 { 5794 void *entry = NULL; 5795 5796 if (mas_find_setup(mas, max, &entry)) 5797 return entry; 5798 5799 /* Retries on dead nodes handled by mas_next_slot */ 5800 entry = mas_next_slot(mas, max, false); 5801 /* Ignore overflow */ 5802 mas->status = ma_active; 5803 return entry; 5804 } 5805 EXPORT_SYMBOL_GPL(mas_find); 5806 5807 /** 5808 * mas_find_range() - On the first call, find the entry at or after 5809 * mas->index up to %max. Otherwise, advance to the next slot mas->index. 5810 * @mas: The maple state 5811 * @max: The maximum value to check. 5812 * 5813 * Must hold rcu_read_lock or the write lock. 5814 * If an entry exists, last and index are updated accordingly. 5815 * May set @mas->status to ma_overflow. 5816 * 5817 * Return: The entry or %NULL. 5818 */ 5819 void *mas_find_range(struct ma_state *mas, unsigned long max) 5820 { 5821 void *entry = NULL; 5822 5823 if (mas_find_setup(mas, max, &entry)) 5824 return entry; 5825 5826 /* Retries on dead nodes handled by mas_next_slot */ 5827 return mas_next_slot(mas, max, true); 5828 } 5829 EXPORT_SYMBOL_GPL(mas_find_range); 5830 5831 /** 5832 * mas_find_rev_setup() - Internal function to set up mas_find_*_rev() 5833 * @mas: The maple state 5834 * @min: The minimum index 5835 * @entry: Pointer to the entry 5836 * 5837 * Returns: True if entry is the answer, false otherwise. 5838 */ 5839 static bool mas_find_rev_setup(struct ma_state *mas, unsigned long min, 5840 void **entry) 5841 { 5842 5843 switch (mas->status) { 5844 case ma_active: 5845 goto active; 5846 case ma_start: 5847 break; 5848 case ma_pause: 5849 if (unlikely(mas->index <= min)) { 5850 mas->status = ma_underflow; 5851 return true; 5852 } 5853 mas->last = --mas->index; 5854 mas->status = ma_start; 5855 break; 5856 case ma_none: 5857 if (mas->index <= min) 5858 goto none; 5859 5860 mas->last = mas->index; 5861 mas->status = ma_start; 5862 break; 5863 case ma_overflow: /* user expects the mas to be one after where it is */ 5864 if (unlikely(mas->index <= min)) { 5865 mas->status = ma_underflow; 5866 return true; 5867 } 5868 5869 mas->status = ma_active; 5870 break; 5871 case ma_underflow: /* user expects the mas to be one before where it is */ 5872 if (unlikely(mas->index <= min)) 5873 return true; 5874 5875 mas->status = ma_active; 5876 break; 5877 case ma_root: 5878 break; 5879 case ma_error: 5880 return true; 5881 } 5882 5883 if (mas_is_start(mas)) { 5884 /* First run or continue */ 5885 if (mas->index < min) 5886 return true; 5887 5888 *entry = mas_walk(mas); 5889 if (*entry) 5890 return true; 5891 } 5892 5893 if (unlikely(mas_is_ptr(mas))) 5894 goto none; 5895 5896 if (unlikely(mas_is_none(mas))) { 5897 /* 5898 * Walked to the location, and there was nothing so the previous 5899 * location is 0. 5900 */ 5901 mas->last = mas->index = 0; 5902 mas->status = ma_root; 5903 *entry = mas_root(mas); 5904 return true; 5905 } 5906 5907 active: 5908 if (mas->index < min) 5909 return true; 5910 5911 return false; 5912 5913 none: 5914 mas->status = ma_none; 5915 return true; 5916 } 5917 5918 /** 5919 * mas_find_rev: On the first call, find the first non-null entry at or below 5920 * mas->index down to %min. Otherwise find the first non-null entry below 5921 * mas->index down to %min. 5922 * @mas: The maple state 5923 * @min: The minimum value to check. 5924 * 5925 * Must hold rcu_read_lock or the write lock. 5926 * If an entry exists, last and index are updated accordingly. 5927 * May set @mas->status to ma_underflow. 5928 * 5929 * Return: The entry or %NULL. 5930 */ 5931 void *mas_find_rev(struct ma_state *mas, unsigned long min) 5932 { 5933 void *entry = NULL; 5934 5935 if (mas_find_rev_setup(mas, min, &entry)) 5936 return entry; 5937 5938 /* Retries on dead nodes handled by mas_prev_slot */ 5939 return mas_prev_slot(mas, min, false); 5940 5941 } 5942 EXPORT_SYMBOL_GPL(mas_find_rev); 5943 5944 /** 5945 * mas_find_range_rev: On the first call, find the first non-null entry at or 5946 * below mas->index down to %min. Otherwise advance to the previous slot after 5947 * mas->index down to %min. 5948 * @mas: The maple state 5949 * @min: The minimum value to check. 5950 * 5951 * Must hold rcu_read_lock or the write lock. 5952 * If an entry exists, last and index are updated accordingly. 5953 * May set @mas->status to ma_underflow. 5954 * 5955 * Return: The entry or %NULL. 5956 */ 5957 void *mas_find_range_rev(struct ma_state *mas, unsigned long min) 5958 { 5959 void *entry = NULL; 5960 5961 if (mas_find_rev_setup(mas, min, &entry)) 5962 return entry; 5963 5964 /* Retries on dead nodes handled by mas_prev_slot */ 5965 return mas_prev_slot(mas, min, true); 5966 } 5967 EXPORT_SYMBOL_GPL(mas_find_range_rev); 5968 5969 /** 5970 * mas_erase() - Find the range in which index resides and erase the entire 5971 * range. 5972 * @mas: The maple state 5973 * 5974 * Must hold the write lock. 5975 * Searches for @mas->index, sets @mas->index and @mas->last to the range and 5976 * erases that range. 5977 * 5978 * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated. 5979 */ 5980 void *mas_erase(struct ma_state *mas) 5981 { 5982 void *entry; 5983 unsigned long index = mas->index; 5984 MA_WR_STATE(wr_mas, mas, NULL); 5985 5986 if (!mas_is_active(mas) || !mas_is_start(mas)) 5987 mas->status = ma_start; 5988 5989 write_retry: 5990 entry = mas_state_walk(mas); 5991 if (!entry) 5992 return NULL; 5993 5994 /* Must reset to ensure spanning writes of last slot are detected */ 5995 mas_reset(mas); 5996 mas_wr_preallocate(&wr_mas, NULL); 5997 if (mas_nomem(mas, GFP_KERNEL)) { 5998 /* in case the range of entry changed when unlocked */ 5999 mas->index = mas->last = index; 6000 goto write_retry; 6001 } 6002 6003 if (mas_is_err(mas)) 6004 goto out; 6005 6006 mas_wr_store_entry(&wr_mas); 6007 out: 6008 mas_destroy(mas); 6009 return entry; 6010 } 6011 EXPORT_SYMBOL_GPL(mas_erase); 6012 6013 /** 6014 * mas_nomem() - Check if there was an error allocating and do the allocation 6015 * if necessary If there are allocations, then free them. 6016 * @mas: The maple state 6017 * @gfp: The GFP_FLAGS to use for allocations 6018 * Return: true on allocation, false otherwise. 6019 */ 6020 bool mas_nomem(struct ma_state *mas, gfp_t gfp) 6021 __must_hold(mas->tree->ma_lock) 6022 { 6023 if (likely(mas->node != MA_ERROR(-ENOMEM))) 6024 return false; 6025 6026 if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) { 6027 mtree_unlock(mas->tree); 6028 mas_alloc_nodes(mas, gfp); 6029 mtree_lock(mas->tree); 6030 } else { 6031 mas_alloc_nodes(mas, gfp); 6032 } 6033 6034 if (!mas_allocated(mas)) 6035 return false; 6036 6037 mas->status = ma_start; 6038 return true; 6039 } 6040 6041 void __init maple_tree_init(void) 6042 { 6043 struct kmem_cache_args args = { 6044 .align = sizeof(struct maple_node), 6045 .sheaf_capacity = 32, 6046 }; 6047 6048 maple_node_cache = kmem_cache_create("maple_node", 6049 sizeof(struct maple_node), &args, 6050 SLAB_PANIC); 6051 } 6052 6053 /** 6054 * mtree_load() - Load a value stored in a maple tree 6055 * @mt: The maple tree 6056 * @index: The index to load 6057 * 6058 * Return: the entry or %NULL 6059 */ 6060 void *mtree_load(struct maple_tree *mt, unsigned long index) 6061 { 6062 MA_STATE(mas, mt, index, index); 6063 void *entry; 6064 6065 trace_ma_read(__func__, &mas); 6066 rcu_read_lock(); 6067 retry: 6068 entry = mas_start(&mas); 6069 if (unlikely(mas_is_none(&mas))) 6070 goto unlock; 6071 6072 if (unlikely(mas_is_ptr(&mas))) { 6073 if (index) 6074 entry = NULL; 6075 6076 goto unlock; 6077 } 6078 6079 entry = mtree_lookup_walk(&mas); 6080 if (!entry && unlikely(mas_is_start(&mas))) 6081 goto retry; 6082 unlock: 6083 rcu_read_unlock(); 6084 if (xa_is_zero(entry)) 6085 return NULL; 6086 6087 return entry; 6088 } 6089 EXPORT_SYMBOL(mtree_load); 6090 6091 /** 6092 * mtree_store_range() - Store an entry at a given range. 6093 * @mt: The maple tree 6094 * @index: The start of the range 6095 * @last: The end of the range 6096 * @entry: The entry to store 6097 * @gfp: The GFP_FLAGS to use for allocations 6098 * 6099 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 6100 * be allocated. 6101 */ 6102 int mtree_store_range(struct maple_tree *mt, unsigned long index, 6103 unsigned long last, void *entry, gfp_t gfp) 6104 { 6105 MA_STATE(mas, mt, index, last); 6106 int ret = 0; 6107 6108 trace_ma_write(__func__, &mas, 0, entry); 6109 if (WARN_ON_ONCE(xa_is_advanced(entry))) 6110 return -EINVAL; 6111 6112 if (index > last) 6113 return -EINVAL; 6114 6115 mtree_lock(mt); 6116 ret = mas_store_gfp(&mas, entry, gfp); 6117 mtree_unlock(mt); 6118 6119 return ret; 6120 } 6121 EXPORT_SYMBOL(mtree_store_range); 6122 6123 /** 6124 * mtree_store() - Store an entry at a given index. 6125 * @mt: The maple tree 6126 * @index: The index to store the value 6127 * @entry: The entry to store 6128 * @gfp: The GFP_FLAGS to use for allocations 6129 * 6130 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 6131 * be allocated. 6132 */ 6133 int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, 6134 gfp_t gfp) 6135 { 6136 return mtree_store_range(mt, index, index, entry, gfp); 6137 } 6138 EXPORT_SYMBOL(mtree_store); 6139 6140 /** 6141 * mtree_insert_range() - Insert an entry at a given range if there is no value. 6142 * @mt: The maple tree 6143 * @first: The start of the range 6144 * @last: The end of the range 6145 * @entry: The entry to store 6146 * @gfp: The GFP_FLAGS to use for allocations. 6147 * 6148 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 6149 * request, -ENOMEM if memory could not be allocated. 6150 */ 6151 int mtree_insert_range(struct maple_tree *mt, unsigned long first, 6152 unsigned long last, void *entry, gfp_t gfp) 6153 { 6154 MA_STATE(ms, mt, first, last); 6155 int ret = 0; 6156 6157 if (WARN_ON_ONCE(xa_is_advanced(entry))) 6158 return -EINVAL; 6159 6160 if (first > last) 6161 return -EINVAL; 6162 6163 mtree_lock(mt); 6164 retry: 6165 mas_insert(&ms, entry); 6166 if (mas_nomem(&ms, gfp)) 6167 goto retry; 6168 6169 mtree_unlock(mt); 6170 if (mas_is_err(&ms)) 6171 ret = xa_err(ms.node); 6172 6173 mas_destroy(&ms); 6174 return ret; 6175 } 6176 EXPORT_SYMBOL(mtree_insert_range); 6177 6178 /** 6179 * mtree_insert() - Insert an entry at a given index if there is no value. 6180 * @mt: The maple tree 6181 * @index : The index to store the value 6182 * @entry: The entry to store 6183 * @gfp: The GFP_FLAGS to use for allocations. 6184 * 6185 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 6186 * request, -ENOMEM if memory could not be allocated. 6187 */ 6188 int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry, 6189 gfp_t gfp) 6190 { 6191 return mtree_insert_range(mt, index, index, entry, gfp); 6192 } 6193 EXPORT_SYMBOL(mtree_insert); 6194 6195 int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, 6196 void *entry, unsigned long size, unsigned long min, 6197 unsigned long max, gfp_t gfp) 6198 { 6199 int ret = 0; 6200 6201 MA_STATE(mas, mt, 0, 0); 6202 if (!mt_is_alloc(mt)) 6203 return -EINVAL; 6204 6205 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6206 return -EINVAL; 6207 6208 mtree_lock(mt); 6209 retry: 6210 ret = mas_empty_area(&mas, min, max, size); 6211 if (ret) 6212 goto unlock; 6213 6214 mas_insert(&mas, entry); 6215 /* 6216 * mas_nomem() may release the lock, causing the allocated area 6217 * to be unavailable, so try to allocate a free area again. 6218 */ 6219 if (mas_nomem(&mas, gfp)) 6220 goto retry; 6221 6222 if (mas_is_err(&mas)) 6223 ret = xa_err(mas.node); 6224 else 6225 *startp = mas.index; 6226 6227 unlock: 6228 mtree_unlock(mt); 6229 mas_destroy(&mas); 6230 return ret; 6231 } 6232 EXPORT_SYMBOL(mtree_alloc_range); 6233 6234 /** 6235 * mtree_alloc_cyclic() - Find somewhere to store this entry in the tree. 6236 * @mt: The maple tree. 6237 * @startp: Pointer to ID. 6238 * @range_lo: Lower bound of range to search. 6239 * @range_hi: Upper bound of range to search. 6240 * @entry: The entry to store. 6241 * @next: Pointer to next ID to allocate. 6242 * @gfp: The GFP_FLAGS to use for allocations. 6243 * 6244 * Finds an empty entry in @mt after @next, stores the new index into 6245 * the @id pointer, stores the entry at that index, then updates @next. 6246 * 6247 * @mt must be initialized with the MT_FLAGS_ALLOC_RANGE flag. 6248 * 6249 * Context: Any context. Takes and releases the mt.lock. May sleep if 6250 * the @gfp flags permit. 6251 * 6252 * Return: 0 if the allocation succeeded without wrapping, 1 if the 6253 * allocation succeeded after wrapping, -ENOMEM if memory could not be 6254 * allocated, -EINVAL if @mt cannot be used, or -EBUSY if there are no 6255 * free entries. 6256 */ 6257 int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp, 6258 void *entry, unsigned long range_lo, unsigned long range_hi, 6259 unsigned long *next, gfp_t gfp) 6260 { 6261 int ret; 6262 6263 MA_STATE(mas, mt, 0, 0); 6264 6265 if (!mt_is_alloc(mt)) 6266 return -EINVAL; 6267 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6268 return -EINVAL; 6269 mtree_lock(mt); 6270 ret = mas_alloc_cyclic(&mas, startp, entry, range_lo, range_hi, 6271 next, gfp); 6272 mtree_unlock(mt); 6273 return ret; 6274 } 6275 EXPORT_SYMBOL(mtree_alloc_cyclic); 6276 6277 int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, 6278 void *entry, unsigned long size, unsigned long min, 6279 unsigned long max, gfp_t gfp) 6280 { 6281 int ret = 0; 6282 6283 MA_STATE(mas, mt, 0, 0); 6284 if (!mt_is_alloc(mt)) 6285 return -EINVAL; 6286 6287 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6288 return -EINVAL; 6289 6290 mtree_lock(mt); 6291 retry: 6292 ret = mas_empty_area_rev(&mas, min, max, size); 6293 if (ret) 6294 goto unlock; 6295 6296 mas_insert(&mas, entry); 6297 /* 6298 * mas_nomem() may release the lock, causing the allocated area 6299 * to be unavailable, so try to allocate a free area again. 6300 */ 6301 if (mas_nomem(&mas, gfp)) 6302 goto retry; 6303 6304 if (mas_is_err(&mas)) 6305 ret = xa_err(mas.node); 6306 else 6307 *startp = mas.index; 6308 6309 unlock: 6310 mtree_unlock(mt); 6311 mas_destroy(&mas); 6312 return ret; 6313 } 6314 EXPORT_SYMBOL(mtree_alloc_rrange); 6315 6316 /** 6317 * mtree_erase() - Find an index and erase the entire range. 6318 * @mt: The maple tree 6319 * @index: The index to erase 6320 * 6321 * Erasing is the same as a walk to an entry then a store of a NULL to that 6322 * ENTIRE range. In fact, it is implemented as such using the advanced API. 6323 * 6324 * Return: The entry stored at the @index or %NULL 6325 */ 6326 void *mtree_erase(struct maple_tree *mt, unsigned long index) 6327 { 6328 void *entry = NULL; 6329 6330 MA_STATE(mas, mt, index, index); 6331 trace_ma_op(__func__, &mas); 6332 6333 mtree_lock(mt); 6334 entry = mas_erase(&mas); 6335 mtree_unlock(mt); 6336 6337 return entry; 6338 } 6339 EXPORT_SYMBOL(mtree_erase); 6340 6341 /* 6342 * mas_dup_free() - Free an incomplete duplication of a tree. 6343 * @mas: The maple state of a incomplete tree. 6344 * 6345 * The parameter @mas->node passed in indicates that the allocation failed on 6346 * this node. This function frees all nodes starting from @mas->node in the 6347 * reverse order of mas_dup_build(). There is no need to hold the source tree 6348 * lock at this time. 6349 */ 6350 static void mas_dup_free(struct ma_state *mas) 6351 { 6352 struct maple_node *node; 6353 enum maple_type type; 6354 void __rcu **slots; 6355 unsigned char count, i; 6356 6357 /* Maybe the first node allocation failed. */ 6358 if (mas_is_none(mas)) 6359 return; 6360 6361 while (!mte_is_root(mas->node)) { 6362 mas_ascend(mas); 6363 if (mas->offset) { 6364 mas->offset--; 6365 do { 6366 mas_descend(mas); 6367 mas->offset = mas_data_end(mas); 6368 } while (!mte_is_leaf(mas->node)); 6369 6370 mas_ascend(mas); 6371 } 6372 6373 node = mte_to_node(mas->node); 6374 type = mte_node_type(mas->node); 6375 slots = ma_slots(node, type); 6376 count = mas_data_end(mas) + 1; 6377 for (i = 0; i < count; i++) 6378 ((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK; 6379 mt_free_bulk(count, slots); 6380 } 6381 6382 node = mte_to_node(mas->node); 6383 mt_free_one(node); 6384 } 6385 6386 /* 6387 * mas_copy_node() - Copy a maple node and replace the parent. 6388 * @mas: The maple state of source tree. 6389 * @new_mas: The maple state of new tree. 6390 * @parent: The parent of the new node. 6391 * 6392 * Copy @mas->node to @new_mas->node, set @parent to be the parent of 6393 * @new_mas->node. If memory allocation fails, @mas is set to -ENOMEM. 6394 */ 6395 static inline void mas_copy_node(struct ma_state *mas, struct ma_state *new_mas, 6396 struct maple_pnode *parent) 6397 { 6398 struct maple_node *node = mte_to_node(mas->node); 6399 struct maple_node *new_node = mte_to_node(new_mas->node); 6400 unsigned long val; 6401 6402 /* Copy the node completely. */ 6403 memcpy(new_node, node, sizeof(struct maple_node)); 6404 /* Update the parent node pointer. */ 6405 val = (unsigned long)node->parent & MAPLE_NODE_MASK; 6406 new_node->parent = ma_parent_ptr(val | (unsigned long)parent); 6407 } 6408 6409 /* 6410 * mas_dup_alloc() - Allocate child nodes for a maple node. 6411 * @mas: The maple state of source tree. 6412 * @new_mas: The maple state of new tree. 6413 * @gfp: The GFP_FLAGS to use for allocations. 6414 * 6415 * This function allocates child nodes for @new_mas->node during the duplication 6416 * process. If memory allocation fails, @mas is set to -ENOMEM. 6417 */ 6418 static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas, 6419 gfp_t gfp) 6420 { 6421 struct maple_node *node = mte_to_node(mas->node); 6422 struct maple_node *new_node = mte_to_node(new_mas->node); 6423 enum maple_type type; 6424 unsigned char request, count, i; 6425 void __rcu **slots; 6426 void __rcu **new_slots; 6427 unsigned long val; 6428 6429 /* Allocate memory for child nodes. */ 6430 type = mte_node_type(mas->node); 6431 new_slots = ma_slots(new_node, type); 6432 request = mas_data_end(mas) + 1; 6433 count = mt_alloc_bulk(gfp, request, (void **)new_slots); 6434 if (unlikely(count < request)) { 6435 memset(new_slots, 0, request * sizeof(void *)); 6436 mas_set_err(mas, -ENOMEM); 6437 return; 6438 } 6439 6440 /* Restore node type information in slots. */ 6441 slots = ma_slots(node, type); 6442 for (i = 0; i < count; i++) { 6443 val = (unsigned long)mt_slot_locked(mas->tree, slots, i); 6444 val &= MAPLE_NODE_MASK; 6445 ((unsigned long *)new_slots)[i] |= val; 6446 } 6447 } 6448 6449 /* 6450 * mas_dup_build() - Build a new maple tree from a source tree 6451 * @mas: The maple state of source tree, need to be in MAS_START state. 6452 * @new_mas: The maple state of new tree, need to be in MAS_START state. 6453 * @gfp: The GFP_FLAGS to use for allocations. 6454 * 6455 * This function builds a new tree in DFS preorder. If the memory allocation 6456 * fails, the error code -ENOMEM will be set in @mas, and @new_mas points to the 6457 * last node. mas_dup_free() will free the incomplete duplication of a tree. 6458 * 6459 * Note that the attributes of the two trees need to be exactly the same, and the 6460 * new tree needs to be empty, otherwise -EINVAL will be set in @mas. 6461 */ 6462 static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas, 6463 gfp_t gfp) 6464 { 6465 struct maple_node *node; 6466 struct maple_pnode *parent = NULL; 6467 struct maple_enode *root; 6468 enum maple_type type; 6469 6470 if (unlikely(mt_attr(mas->tree) != mt_attr(new_mas->tree)) || 6471 unlikely(!mtree_empty(new_mas->tree))) { 6472 mas_set_err(mas, -EINVAL); 6473 return; 6474 } 6475 6476 root = mas_start(mas); 6477 if (mas_is_ptr(mas) || mas_is_none(mas)) 6478 goto set_new_tree; 6479 6480 node = mt_alloc_one(gfp); 6481 if (!node) { 6482 new_mas->status = ma_none; 6483 mas_set_err(mas, -ENOMEM); 6484 return; 6485 } 6486 6487 type = mte_node_type(mas->node); 6488 root = mt_mk_node(node, type); 6489 new_mas->node = root; 6490 new_mas->min = 0; 6491 new_mas->max = ULONG_MAX; 6492 root = mte_mk_root(root); 6493 while (1) { 6494 mas_copy_node(mas, new_mas, parent); 6495 if (!mte_is_leaf(mas->node)) { 6496 /* Only allocate child nodes for non-leaf nodes. */ 6497 mas_dup_alloc(mas, new_mas, gfp); 6498 if (unlikely(mas_is_err(mas))) 6499 return; 6500 } else { 6501 /* 6502 * This is the last leaf node and duplication is 6503 * completed. 6504 */ 6505 if (mas->max == ULONG_MAX) 6506 goto done; 6507 6508 /* This is not the last leaf node and needs to go up. */ 6509 do { 6510 mas_ascend(mas); 6511 mas_ascend(new_mas); 6512 } while (mas->offset == mas_data_end(mas)); 6513 6514 /* Move to the next subtree. */ 6515 mas->offset++; 6516 new_mas->offset++; 6517 } 6518 6519 mas_descend(mas); 6520 parent = ma_parent_ptr(mte_to_node(new_mas->node)); 6521 mas_descend(new_mas); 6522 mas->offset = 0; 6523 new_mas->offset = 0; 6524 } 6525 done: 6526 /* Specially handle the parent of the root node. */ 6527 mte_to_node(root)->parent = ma_parent_ptr(mas_tree_parent(new_mas)); 6528 set_new_tree: 6529 /* Make them the same height */ 6530 new_mas->tree->ma_flags = mas->tree->ma_flags; 6531 rcu_assign_pointer(new_mas->tree->ma_root, root); 6532 } 6533 6534 /** 6535 * __mt_dup(): Duplicate an entire maple tree 6536 * @mt: The source maple tree 6537 * @new: The new maple tree 6538 * @gfp: The GFP_FLAGS to use for allocations 6539 * 6540 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order 6541 * traversal. It uses memcpy() to copy nodes in the source tree and allocate 6542 * new child nodes in non-leaf nodes. The new node is exactly the same as the 6543 * source node except for all the addresses stored in it. It will be faster than 6544 * traversing all elements in the source tree and inserting them one by one into 6545 * the new tree. 6546 * The user needs to ensure that the attributes of the source tree and the new 6547 * tree are the same, and the new tree needs to be an empty tree, otherwise 6548 * -EINVAL will be returned. 6549 * Note that the user needs to manually lock the source tree and the new tree. 6550 * 6551 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If 6552 * the attributes of the two trees are different or the new tree is not an empty 6553 * tree. 6554 */ 6555 int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp) 6556 { 6557 int ret = 0; 6558 MA_STATE(mas, mt, 0, 0); 6559 MA_STATE(new_mas, new, 0, 0); 6560 6561 mas_dup_build(&mas, &new_mas, gfp); 6562 if (unlikely(mas_is_err(&mas))) { 6563 ret = xa_err(mas.node); 6564 if (ret == -ENOMEM) 6565 mas_dup_free(&new_mas); 6566 } 6567 6568 return ret; 6569 } 6570 EXPORT_SYMBOL(__mt_dup); 6571 6572 /** 6573 * mtree_dup(): Duplicate an entire maple tree 6574 * @mt: The source maple tree 6575 * @new: The new maple tree 6576 * @gfp: The GFP_FLAGS to use for allocations 6577 * 6578 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order 6579 * traversal. It uses memcpy() to copy nodes in the source tree and allocate 6580 * new child nodes in non-leaf nodes. The new node is exactly the same as the 6581 * source node except for all the addresses stored in it. It will be faster than 6582 * traversing all elements in the source tree and inserting them one by one into 6583 * the new tree. 6584 * The user needs to ensure that the attributes of the source tree and the new 6585 * tree are the same, and the new tree needs to be an empty tree, otherwise 6586 * -EINVAL will be returned. 6587 * 6588 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If 6589 * the attributes of the two trees are different or the new tree is not an empty 6590 * tree. 6591 */ 6592 int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp) 6593 { 6594 int ret = 0; 6595 MA_STATE(mas, mt, 0, 0); 6596 MA_STATE(new_mas, new, 0, 0); 6597 6598 mas_lock(&new_mas); 6599 mas_lock_nested(&mas, SINGLE_DEPTH_NESTING); 6600 mas_dup_build(&mas, &new_mas, gfp); 6601 mas_unlock(&mas); 6602 if (unlikely(mas_is_err(&mas))) { 6603 ret = xa_err(mas.node); 6604 if (ret == -ENOMEM) 6605 mas_dup_free(&new_mas); 6606 } 6607 6608 mas_unlock(&new_mas); 6609 return ret; 6610 } 6611 EXPORT_SYMBOL(mtree_dup); 6612 6613 /** 6614 * __mt_destroy() - Walk and free all nodes of a locked maple tree. 6615 * @mt: The maple tree 6616 * 6617 * Note: Does not handle locking. 6618 */ 6619 void __mt_destroy(struct maple_tree *mt) 6620 { 6621 void *root = mt_root_locked(mt); 6622 6623 rcu_assign_pointer(mt->ma_root, NULL); 6624 if (xa_is_node(root)) 6625 mte_destroy_walk(root, mt); 6626 6627 mt->ma_flags = mt_attr(mt); 6628 } 6629 EXPORT_SYMBOL_GPL(__mt_destroy); 6630 6631 /** 6632 * mtree_destroy() - Destroy a maple tree 6633 * @mt: The maple tree 6634 * 6635 * Frees all resources used by the tree. Handles locking. 6636 */ 6637 void mtree_destroy(struct maple_tree *mt) 6638 { 6639 mtree_lock(mt); 6640 __mt_destroy(mt); 6641 mtree_unlock(mt); 6642 } 6643 EXPORT_SYMBOL(mtree_destroy); 6644 6645 /** 6646 * mt_find() - Search from the start up until an entry is found. 6647 * @mt: The maple tree 6648 * @index: Pointer which contains the start location of the search 6649 * @max: The maximum value of the search range 6650 * 6651 * Takes RCU read lock internally to protect the search, which does not 6652 * protect the returned pointer after dropping RCU read lock. 6653 * See also: Documentation/core-api/maple_tree.rst 6654 * 6655 * In case that an entry is found @index is updated to point to the next 6656 * possible entry independent whether the found entry is occupying a 6657 * single index or a range if indices. 6658 * 6659 * Return: The entry at or after the @index or %NULL 6660 */ 6661 void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max) 6662 { 6663 MA_STATE(mas, mt, *index, *index); 6664 void *entry; 6665 #ifdef CONFIG_DEBUG_MAPLE_TREE 6666 unsigned long copy = *index; 6667 #endif 6668 6669 trace_ma_read(__func__, &mas); 6670 6671 if ((*index) > max) 6672 return NULL; 6673 6674 rcu_read_lock(); 6675 retry: 6676 entry = mas_state_walk(&mas); 6677 if (mas_is_start(&mas)) 6678 goto retry; 6679 6680 if (unlikely(xa_is_zero(entry))) 6681 entry = NULL; 6682 6683 if (entry) 6684 goto unlock; 6685 6686 while (mas_is_active(&mas) && (mas.last < max)) { 6687 entry = mas_next_slot(&mas, max, false); 6688 if (likely(entry && !xa_is_zero(entry))) 6689 break; 6690 } 6691 6692 if (unlikely(xa_is_zero(entry))) 6693 entry = NULL; 6694 unlock: 6695 rcu_read_unlock(); 6696 if (likely(entry)) { 6697 *index = mas.last + 1; 6698 #ifdef CONFIG_DEBUG_MAPLE_TREE 6699 if (MT_WARN_ON(mt, (*index) && ((*index) <= copy))) 6700 pr_err("index not increased! %lx <= %lx\n", 6701 *index, copy); 6702 #endif 6703 } 6704 6705 return entry; 6706 } 6707 EXPORT_SYMBOL(mt_find); 6708 6709 /** 6710 * mt_find_after() - Search from the start up until an entry is found. 6711 * @mt: The maple tree 6712 * @index: Pointer which contains the start location of the search 6713 * @max: The maximum value to check 6714 * 6715 * Same as mt_find() except that it checks @index for 0 before 6716 * searching. If @index == 0, the search is aborted. This covers a wrap 6717 * around of @index to 0 in an iterator loop. 6718 * 6719 * Return: The entry at or after the @index or %NULL 6720 */ 6721 void *mt_find_after(struct maple_tree *mt, unsigned long *index, 6722 unsigned long max) 6723 { 6724 if (!(*index)) 6725 return NULL; 6726 6727 return mt_find(mt, index, max); 6728 } 6729 EXPORT_SYMBOL(mt_find_after); 6730 6731 #ifdef CONFIG_DEBUG_MAPLE_TREE 6732 atomic_t maple_tree_tests_run; 6733 EXPORT_SYMBOL_GPL(maple_tree_tests_run); 6734 atomic_t maple_tree_tests_passed; 6735 EXPORT_SYMBOL_GPL(maple_tree_tests_passed); 6736 6737 #ifndef __KERNEL__ 6738 extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int); 6739 void mt_set_non_kernel(unsigned int val) 6740 { 6741 kmem_cache_set_non_kernel(maple_node_cache, val); 6742 } 6743 6744 extern void kmem_cache_set_callback(struct kmem_cache *cachep, 6745 void (*callback)(void *)); 6746 void mt_set_callback(void (*callback)(void *)) 6747 { 6748 kmem_cache_set_callback(maple_node_cache, callback); 6749 } 6750 6751 extern void kmem_cache_set_private(struct kmem_cache *cachep, void *private); 6752 void mt_set_private(void *private) 6753 { 6754 kmem_cache_set_private(maple_node_cache, private); 6755 } 6756 6757 extern unsigned long kmem_cache_get_alloc(struct kmem_cache *); 6758 unsigned long mt_get_alloc_size(void) 6759 { 6760 return kmem_cache_get_alloc(maple_node_cache); 6761 } 6762 6763 extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *); 6764 void mt_zero_nr_tallocated(void) 6765 { 6766 kmem_cache_zero_nr_tallocated(maple_node_cache); 6767 } 6768 6769 extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *); 6770 unsigned int mt_nr_tallocated(void) 6771 { 6772 return kmem_cache_nr_tallocated(maple_node_cache); 6773 } 6774 6775 extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *); 6776 unsigned int mt_nr_allocated(void) 6777 { 6778 return kmem_cache_nr_allocated(maple_node_cache); 6779 } 6780 6781 void mt_cache_shrink(void) 6782 { 6783 } 6784 #else 6785 /* 6786 * mt_cache_shrink() - For testing, don't use this. 6787 * 6788 * Certain testcases can trigger an OOM when combined with other memory 6789 * debugging configuration options. This function is used to reduce the 6790 * possibility of an out of memory even due to kmem_cache objects remaining 6791 * around for longer than usual. 6792 */ 6793 void mt_cache_shrink(void) 6794 { 6795 kmem_cache_shrink(maple_node_cache); 6796 6797 } 6798 EXPORT_SYMBOL_GPL(mt_cache_shrink); 6799 6800 #endif /* not defined __KERNEL__ */ 6801 /* 6802 * mas_get_slot() - Get the entry in the maple state node stored at @offset. 6803 * @mas: The maple state 6804 * @offset: The offset into the slot array to fetch. 6805 * 6806 * Return: The entry stored at @offset. 6807 */ 6808 static inline struct maple_enode *mas_get_slot(struct ma_state *mas, 6809 unsigned char offset) 6810 { 6811 return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)), 6812 offset); 6813 } 6814 6815 /* Depth first search, post-order */ 6816 static void mas_dfs_postorder(struct ma_state *mas, unsigned long max) 6817 { 6818 6819 struct maple_enode *p, *mn = mas->node; 6820 unsigned long p_min, p_max; 6821 6822 mas_next_node(mas, mas_mn(mas), max); 6823 if (!mas_is_overflow(mas)) 6824 return; 6825 6826 if (mte_is_root(mn)) 6827 return; 6828 6829 mas->node = mn; 6830 mas_ascend(mas); 6831 do { 6832 p = mas->node; 6833 p_min = mas->min; 6834 p_max = mas->max; 6835 mas_prev_node(mas, 0); 6836 } while (!mas_is_underflow(mas)); 6837 6838 mas->node = p; 6839 mas->max = p_max; 6840 mas->min = p_min; 6841 } 6842 6843 /* Tree validations */ 6844 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6845 unsigned long min, unsigned long max, unsigned int depth, 6846 enum mt_dump_format format); 6847 static void mt_dump_range(unsigned long min, unsigned long max, 6848 unsigned int depth, enum mt_dump_format format) 6849 { 6850 static const char spaces[] = " "; 6851 6852 switch(format) { 6853 case mt_dump_hex: 6854 if (min == max) 6855 pr_info("%.*s%lx: ", depth * 2, spaces, min); 6856 else 6857 pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max); 6858 break; 6859 case mt_dump_dec: 6860 if (min == max) 6861 pr_info("%.*s%lu: ", depth * 2, spaces, min); 6862 else 6863 pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max); 6864 } 6865 } 6866 6867 static void mt_dump_entry(void *entry, unsigned long min, unsigned long max, 6868 unsigned int depth, enum mt_dump_format format) 6869 { 6870 mt_dump_range(min, max, depth, format); 6871 6872 if (xa_is_value(entry)) 6873 pr_cont("value %ld (0x%lx) [" PTR_FMT "]\n", xa_to_value(entry), 6874 xa_to_value(entry), entry); 6875 else if (xa_is_zero(entry)) 6876 pr_cont("zero (%ld)\n", xa_to_internal(entry)); 6877 else if (mt_is_reserved(entry)) 6878 pr_cont("UNKNOWN ENTRY (" PTR_FMT ")\n", entry); 6879 else 6880 pr_cont(PTR_FMT "\n", entry); 6881 } 6882 6883 static void mt_dump_range64(const struct maple_tree *mt, void *entry, 6884 unsigned long min, unsigned long max, unsigned int depth, 6885 enum mt_dump_format format) 6886 { 6887 struct maple_range_64 *node = &mte_to_node(entry)->mr64; 6888 bool leaf = mte_is_leaf(entry); 6889 unsigned long first = min; 6890 int i; 6891 6892 pr_cont(" contents: "); 6893 for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) { 6894 switch(format) { 6895 case mt_dump_hex: 6896 pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]); 6897 break; 6898 case mt_dump_dec: 6899 pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]); 6900 } 6901 } 6902 pr_cont(PTR_FMT "\n", node->slot[i]); 6903 for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) { 6904 unsigned long last = max; 6905 6906 if (i < (MAPLE_RANGE64_SLOTS - 1)) 6907 last = node->pivot[i]; 6908 else if (!node->slot[i] && max != mt_node_max(entry)) 6909 break; 6910 if (last == 0 && i > 0) 6911 break; 6912 if (leaf) 6913 mt_dump_entry(mt_slot(mt, node->slot, i), 6914 first, last, depth + 1, format); 6915 else if (node->slot[i]) 6916 mt_dump_node(mt, mt_slot(mt, node->slot, i), 6917 first, last, depth + 1, format); 6918 6919 if (last == max) 6920 break; 6921 if (last > max) { 6922 switch(format) { 6923 case mt_dump_hex: 6924 pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n", 6925 node, last, max, i); 6926 break; 6927 case mt_dump_dec: 6928 pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", 6929 node, last, max, i); 6930 } 6931 } 6932 first = last + 1; 6933 } 6934 } 6935 6936 static void mt_dump_arange64(const struct maple_tree *mt, void *entry, 6937 unsigned long min, unsigned long max, unsigned int depth, 6938 enum mt_dump_format format) 6939 { 6940 struct maple_arange_64 *node = &mte_to_node(entry)->ma64; 6941 unsigned long first = min; 6942 int i; 6943 6944 pr_cont(" contents: "); 6945 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 6946 switch (format) { 6947 case mt_dump_hex: 6948 pr_cont("%lx ", node->gap[i]); 6949 break; 6950 case mt_dump_dec: 6951 pr_cont("%lu ", node->gap[i]); 6952 } 6953 } 6954 pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap); 6955 for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) { 6956 switch (format) { 6957 case mt_dump_hex: 6958 pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]); 6959 break; 6960 case mt_dump_dec: 6961 pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]); 6962 } 6963 } 6964 pr_cont(PTR_FMT "\n", node->slot[i]); 6965 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 6966 unsigned long last = max; 6967 6968 if (i < (MAPLE_ARANGE64_SLOTS - 1)) 6969 last = node->pivot[i]; 6970 else if (!node->slot[i]) 6971 break; 6972 if (last == 0 && i > 0) 6973 break; 6974 if (node->slot[i]) 6975 mt_dump_node(mt, mt_slot(mt, node->slot, i), 6976 first, last, depth + 1, format); 6977 6978 if (last == max) 6979 break; 6980 if (last > max) { 6981 switch(format) { 6982 case mt_dump_hex: 6983 pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n", 6984 node, last, max, i); 6985 break; 6986 case mt_dump_dec: 6987 pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", 6988 node, last, max, i); 6989 } 6990 } 6991 first = last + 1; 6992 } 6993 } 6994 6995 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6996 unsigned long min, unsigned long max, unsigned int depth, 6997 enum mt_dump_format format) 6998 { 6999 struct maple_node *node = mte_to_node(entry); 7000 unsigned int type = mte_node_type(entry); 7001 unsigned int i; 7002 7003 mt_dump_range(min, max, depth, format); 7004 7005 pr_cont("node " PTR_FMT " depth %d type %d parent " PTR_FMT, node, 7006 depth, type, node ? node->parent : NULL); 7007 switch (type) { 7008 case maple_dense: 7009 pr_cont("\n"); 7010 for (i = 0; i < MAPLE_NODE_SLOTS; i++) { 7011 if (min + i > max) 7012 pr_cont("OUT OF RANGE: "); 7013 mt_dump_entry(mt_slot(mt, node->slot, i), 7014 min + i, min + i, depth, format); 7015 } 7016 break; 7017 case maple_leaf_64: 7018 case maple_range_64: 7019 mt_dump_range64(mt, entry, min, max, depth, format); 7020 break; 7021 case maple_arange_64: 7022 mt_dump_arange64(mt, entry, min, max, depth, format); 7023 break; 7024 7025 default: 7026 pr_cont(" UNKNOWN TYPE\n"); 7027 } 7028 } 7029 7030 void mt_dump(const struct maple_tree *mt, enum mt_dump_format format) 7031 { 7032 void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt)); 7033 7034 pr_info("maple_tree(" PTR_FMT ") flags %X, height %u root " PTR_FMT "\n", 7035 mt, mt->ma_flags, mt_height(mt), entry); 7036 if (xa_is_node(entry)) 7037 mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format); 7038 else if (entry) 7039 mt_dump_entry(entry, 0, 0, 0, format); 7040 else 7041 pr_info("(empty)\n"); 7042 } 7043 EXPORT_SYMBOL_GPL(mt_dump); 7044 7045 /* 7046 * Calculate the maximum gap in a node and check if that's what is reported in 7047 * the parent (unless root). 7048 */ 7049 static void mas_validate_gaps(struct ma_state *mas) 7050 { 7051 struct maple_enode *mte = mas->node; 7052 struct maple_node *p_mn, *node = mte_to_node(mte); 7053 enum maple_type mt = mte_node_type(mas->node); 7054 unsigned long gap = 0, max_gap = 0; 7055 unsigned long p_end, p_start = mas->min; 7056 unsigned char p_slot, offset; 7057 unsigned long *gaps = NULL; 7058 unsigned long *pivots = ma_pivots(node, mt); 7059 unsigned int i; 7060 7061 if (ma_is_dense(mt)) { 7062 for (i = 0; i < mt_slot_count(mte); i++) { 7063 if (mas_get_slot(mas, i)) { 7064 if (gap > max_gap) 7065 max_gap = gap; 7066 gap = 0; 7067 continue; 7068 } 7069 gap++; 7070 } 7071 goto counted; 7072 } 7073 7074 gaps = ma_gaps(node, mt); 7075 for (i = 0; i < mt_slot_count(mte); i++) { 7076 p_end = mas_safe_pivot(mas, pivots, i, mt); 7077 7078 if (!gaps) { 7079 if (!mas_get_slot(mas, i)) 7080 gap = p_end - p_start + 1; 7081 } else { 7082 void *entry = mas_get_slot(mas, i); 7083 7084 gap = gaps[i]; 7085 MT_BUG_ON(mas->tree, !entry); 7086 7087 if (gap > p_end - p_start + 1) { 7088 pr_err(PTR_FMT "[%u] %lu >= %lu - %lu + 1 (%lu)\n", 7089 mas_mn(mas), i, gap, p_end, p_start, 7090 p_end - p_start + 1); 7091 MT_BUG_ON(mas->tree, gap > p_end - p_start + 1); 7092 } 7093 } 7094 7095 if (gap > max_gap) 7096 max_gap = gap; 7097 7098 p_start = p_end + 1; 7099 if (p_end >= mas->max) 7100 break; 7101 } 7102 7103 counted: 7104 if (mt == maple_arange_64) { 7105 MT_BUG_ON(mas->tree, !gaps); 7106 offset = ma_meta_gap(node); 7107 if (offset > i) { 7108 pr_err("gap offset " PTR_FMT "[%u] is invalid\n", node, offset); 7109 MT_BUG_ON(mas->tree, 1); 7110 } 7111 7112 if (gaps[offset] != max_gap) { 7113 pr_err("gap " PTR_FMT "[%u] is not the largest gap %lu\n", 7114 node, offset, max_gap); 7115 MT_BUG_ON(mas->tree, 1); 7116 } 7117 7118 for (i++ ; i < mt_slot_count(mte); i++) { 7119 if (gaps[i] != 0) { 7120 pr_err("gap " PTR_FMT "[%u] beyond node limit != 0\n", 7121 node, i); 7122 MT_BUG_ON(mas->tree, 1); 7123 } 7124 } 7125 } 7126 7127 if (mte_is_root(mte)) 7128 return; 7129 7130 p_slot = mte_parent_slot(mas->node); 7131 p_mn = mte_parent(mte); 7132 MT_BUG_ON(mas->tree, max_gap > mas->max); 7133 if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) { 7134 pr_err("gap " PTR_FMT "[%u] != %lu\n", p_mn, p_slot, max_gap); 7135 mt_dump(mas->tree, mt_dump_hex); 7136 MT_BUG_ON(mas->tree, 1); 7137 } 7138 } 7139 7140 static void mas_validate_parent_slot(struct ma_state *mas) 7141 { 7142 struct maple_node *parent; 7143 struct maple_enode *node; 7144 enum maple_type p_type; 7145 unsigned char p_slot; 7146 void __rcu **slots; 7147 int i; 7148 7149 if (mte_is_root(mas->node)) 7150 return; 7151 7152 p_slot = mte_parent_slot(mas->node); 7153 p_type = mas_parent_type(mas, mas->node); 7154 parent = mte_parent(mas->node); 7155 slots = ma_slots(parent, p_type); 7156 MT_BUG_ON(mas->tree, mas_mn(mas) == parent); 7157 7158 /* Check prev/next parent slot for duplicate node entry */ 7159 7160 for (i = 0; i < mt_slots[p_type]; i++) { 7161 node = mas_slot(mas, slots, i); 7162 if (i == p_slot) { 7163 if (node != mas->node) 7164 pr_err("parent " PTR_FMT "[%u] does not have " PTR_FMT "\n", 7165 parent, i, mas_mn(mas)); 7166 MT_BUG_ON(mas->tree, node != mas->node); 7167 } else if (node == mas->node) { 7168 pr_err("Invalid child " PTR_FMT " at parent " PTR_FMT "[%u] p_slot %u\n", 7169 mas_mn(mas), parent, i, p_slot); 7170 MT_BUG_ON(mas->tree, node == mas->node); 7171 } 7172 } 7173 } 7174 7175 static void mas_validate_child_slot(struct ma_state *mas) 7176 { 7177 enum maple_type type = mte_node_type(mas->node); 7178 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 7179 unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type); 7180 struct maple_enode *child; 7181 unsigned char i; 7182 7183 if (mte_is_leaf(mas->node)) 7184 return; 7185 7186 for (i = 0; i < mt_slots[type]; i++) { 7187 child = mas_slot(mas, slots, i); 7188 7189 if (!child) { 7190 pr_err("Non-leaf node lacks child at " PTR_FMT "[%u]\n", 7191 mas_mn(mas), i); 7192 MT_BUG_ON(mas->tree, 1); 7193 } 7194 7195 if (mte_parent_slot(child) != i) { 7196 pr_err("Slot error at " PTR_FMT "[%u]: child " PTR_FMT " has pslot %u\n", 7197 mas_mn(mas), i, mte_to_node(child), 7198 mte_parent_slot(child)); 7199 MT_BUG_ON(mas->tree, 1); 7200 } 7201 7202 if (mte_parent(child) != mte_to_node(mas->node)) { 7203 pr_err("child " PTR_FMT " has parent " PTR_FMT " not " PTR_FMT "\n", 7204 mte_to_node(child), mte_parent(child), 7205 mte_to_node(mas->node)); 7206 MT_BUG_ON(mas->tree, 1); 7207 } 7208 7209 if (i < mt_pivots[type] && pivots[i] == mas->max) 7210 break; 7211 } 7212 } 7213 7214 /* 7215 * Validate all pivots are within mas->min and mas->max, check metadata ends 7216 * where the maximum ends and ensure there is no slots or pivots set outside of 7217 * the end of the data. 7218 */ 7219 static void mas_validate_limits(struct ma_state *mas) 7220 { 7221 int i; 7222 unsigned long prev_piv = 0; 7223 enum maple_type type = mte_node_type(mas->node); 7224 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 7225 unsigned long *pivots = ma_pivots(mas_mn(mas), type); 7226 7227 for (i = 0; i < mt_slots[type]; i++) { 7228 unsigned long piv; 7229 7230 piv = mas_safe_pivot(mas, pivots, i, type); 7231 7232 if (!piv && (i != 0)) { 7233 pr_err("Missing node limit pivot at " PTR_FMT "[%u]", 7234 mas_mn(mas), i); 7235 MAS_WARN_ON(mas, 1); 7236 } 7237 7238 if (prev_piv > piv) { 7239 pr_err(PTR_FMT "[%u] piv %lu < prev_piv %lu\n", 7240 mas_mn(mas), i, piv, prev_piv); 7241 MAS_WARN_ON(mas, piv < prev_piv); 7242 } 7243 7244 if (piv < mas->min) { 7245 pr_err(PTR_FMT "[%u] %lu < %lu\n", mas_mn(mas), i, 7246 piv, mas->min); 7247 MAS_WARN_ON(mas, piv < mas->min); 7248 } 7249 if (piv > mas->max) { 7250 pr_err(PTR_FMT "[%u] %lu > %lu\n", mas_mn(mas), i, 7251 piv, mas->max); 7252 MAS_WARN_ON(mas, piv > mas->max); 7253 } 7254 prev_piv = piv; 7255 if (piv == mas->max) 7256 break; 7257 } 7258 7259 if (mas_data_end(mas) != i) { 7260 pr_err("node" PTR_FMT ": data_end %u != the last slot offset %u\n", 7261 mas_mn(mas), mas_data_end(mas), i); 7262 MT_BUG_ON(mas->tree, 1); 7263 } 7264 7265 for (i += 1; i < mt_slots[type]; i++) { 7266 void *entry = mas_slot(mas, slots, i); 7267 7268 if (entry && (i != mt_slots[type] - 1)) { 7269 pr_err(PTR_FMT "[%u] should not have entry " PTR_FMT "\n", 7270 mas_mn(mas), i, entry); 7271 MT_BUG_ON(mas->tree, entry != NULL); 7272 } 7273 7274 if (i < mt_pivots[type]) { 7275 unsigned long piv = pivots[i]; 7276 7277 if (!piv) 7278 continue; 7279 7280 pr_err(PTR_FMT "[%u] should not have piv %lu\n", 7281 mas_mn(mas), i, piv); 7282 MAS_WARN_ON(mas, i < mt_pivots[type] - 1); 7283 } 7284 } 7285 } 7286 7287 static void mt_validate_nulls(struct maple_tree *mt) 7288 { 7289 void *entry, *last = (void *)1; 7290 unsigned char offset = 0; 7291 void __rcu **slots; 7292 MA_STATE(mas, mt, 0, 0); 7293 7294 mas_start(&mas); 7295 if (mas_is_none(&mas) || (mas_is_ptr(&mas))) 7296 return; 7297 7298 while (!mte_is_leaf(mas.node)) 7299 mas_descend(&mas); 7300 7301 slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node)); 7302 do { 7303 entry = mas_slot(&mas, slots, offset); 7304 if (!last && !entry) { 7305 pr_err("Sequential nulls end at " PTR_FMT "[%u]\n", 7306 mas_mn(&mas), offset); 7307 } 7308 MT_BUG_ON(mt, !last && !entry); 7309 last = entry; 7310 if (offset == mas_data_end(&mas)) { 7311 mas_next_node(&mas, mas_mn(&mas), ULONG_MAX); 7312 if (mas_is_overflow(&mas)) 7313 return; 7314 offset = 0; 7315 slots = ma_slots(mte_to_node(mas.node), 7316 mte_node_type(mas.node)); 7317 } else { 7318 offset++; 7319 } 7320 7321 } while (!mas_is_overflow(&mas)); 7322 } 7323 7324 /* 7325 * validate a maple tree by checking: 7326 * 1. The limits (pivots are within mas->min to mas->max) 7327 * 2. The gap is correctly set in the parents 7328 */ 7329 void mt_validate(struct maple_tree *mt) 7330 __must_hold(mas->tree->ma_lock) 7331 { 7332 unsigned char end; 7333 7334 MA_STATE(mas, mt, 0, 0); 7335 mas_start(&mas); 7336 if (!mas_is_active(&mas)) 7337 return; 7338 7339 while (!mte_is_leaf(mas.node)) 7340 mas_descend(&mas); 7341 7342 while (!mas_is_overflow(&mas)) { 7343 MAS_WARN_ON(&mas, mte_dead_node(mas.node)); 7344 end = mas_data_end(&mas); 7345 if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) && 7346 (!mte_is_root(mas.node)))) { 7347 pr_err("Invalid size %u of " PTR_FMT "\n", 7348 end, mas_mn(&mas)); 7349 } 7350 7351 mas_validate_parent_slot(&mas); 7352 mas_validate_limits(&mas); 7353 mas_validate_child_slot(&mas); 7354 if (mt_is_alloc(mt)) 7355 mas_validate_gaps(&mas); 7356 mas_dfs_postorder(&mas, ULONG_MAX); 7357 } 7358 mt_validate_nulls(mt); 7359 } 7360 EXPORT_SYMBOL_GPL(mt_validate); 7361 7362 void mas_dump(const struct ma_state *mas) 7363 { 7364 pr_err("MAS: tree=" PTR_FMT " enode=" PTR_FMT " ", 7365 mas->tree, mas->node); 7366 switch (mas->status) { 7367 case ma_active: 7368 pr_err("(ma_active)"); 7369 break; 7370 case ma_none: 7371 pr_err("(ma_none)"); 7372 break; 7373 case ma_root: 7374 pr_err("(ma_root)"); 7375 break; 7376 case ma_start: 7377 pr_err("(ma_start) "); 7378 break; 7379 case ma_pause: 7380 pr_err("(ma_pause) "); 7381 break; 7382 case ma_overflow: 7383 pr_err("(ma_overflow) "); 7384 break; 7385 case ma_underflow: 7386 pr_err("(ma_underflow) "); 7387 break; 7388 case ma_error: 7389 pr_err("(ma_error) "); 7390 break; 7391 } 7392 7393 pr_err("Store Type: "); 7394 switch (mas->store_type) { 7395 case wr_invalid: 7396 pr_err("invalid store type\n"); 7397 break; 7398 case wr_new_root: 7399 pr_err("new_root\n"); 7400 break; 7401 case wr_store_root: 7402 pr_err("store_root\n"); 7403 break; 7404 case wr_exact_fit: 7405 pr_err("exact_fit\n"); 7406 break; 7407 case wr_split_store: 7408 pr_err("split_store\n"); 7409 break; 7410 case wr_slot_store: 7411 pr_err("slot_store\n"); 7412 break; 7413 case wr_append: 7414 pr_err("append\n"); 7415 break; 7416 case wr_node_store: 7417 pr_err("node_store\n"); 7418 break; 7419 case wr_spanning_store: 7420 pr_err("spanning_store\n"); 7421 break; 7422 case wr_rebalance: 7423 pr_err("rebalance\n"); 7424 break; 7425 } 7426 7427 pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end, 7428 mas->index, mas->last); 7429 pr_err(" min=%lx max=%lx alloc=" PTR_FMT ", depth=%u, flags=%x\n", 7430 mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags); 7431 if (mas->index > mas->last) 7432 pr_err("Check index & last\n"); 7433 } 7434 EXPORT_SYMBOL_GPL(mas_dump); 7435 7436 void mas_wr_dump(const struct ma_wr_state *wr_mas) 7437 { 7438 pr_err("WR_MAS: node=" PTR_FMT " r_min=%lx r_max=%lx\n", 7439 wr_mas->node, wr_mas->r_min, wr_mas->r_max); 7440 pr_err(" type=%u off_end=%u, node_end=%u, end_piv=%lx\n", 7441 wr_mas->type, wr_mas->offset_end, wr_mas->mas->end, 7442 wr_mas->end_piv); 7443 } 7444 EXPORT_SYMBOL_GPL(mas_wr_dump); 7445 7446 #endif /* CONFIG_DEBUG_MAPLE_TREE */ 7447