1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Maple Tree implementation 4 * Copyright (c) 2018-2022 Oracle Corporation 5 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com> 6 * Matthew Wilcox <willy@infradead.org> 7 * Copyright (c) 2023 ByteDance 8 * Author: Peng Zhang <zhangpeng.00@bytedance.com> 9 */ 10 11 /* 12 * DOC: Interesting implementation details of the Maple Tree 13 * 14 * Each node type has a number of slots for entries and a number of slots for 15 * pivots. In the case of dense nodes, the pivots are implied by the position 16 * and are simply the slot index + the minimum of the node. 17 * 18 * In regular B-Tree terms, pivots are called keys. The term pivot is used to 19 * indicate that the tree is specifying ranges. Pivots may appear in the 20 * subtree with an entry attached to the value whereas keys are unique to a 21 * specific position of a B-tree. Pivot values are inclusive of the slot with 22 * the same index. 23 * 24 * 25 * The following illustrates the layout of a range64 nodes slots and pivots. 26 * 27 * 28 * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 | 29 * ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ 30 * │ │ │ │ │ │ │ │ └─ Implied maximum 31 * │ │ │ │ │ │ │ └─ Pivot 14 32 * │ │ │ │ │ │ └─ Pivot 13 33 * │ │ │ │ │ └─ Pivot 12 34 * │ │ │ │ └─ Pivot 11 35 * │ │ │ └─ Pivot 2 36 * │ │ └─ Pivot 1 37 * │ └─ Pivot 0 38 * └─ Implied minimum 39 * 40 * Slot contents: 41 * Internal (non-leaf) nodes contain pointers to other nodes. 42 * Leaf nodes contain entries. 43 * 44 * The location of interest is often referred to as an offset. All offsets have 45 * a slot, but the last offset has an implied pivot from the node above (or 46 * UINT_MAX for the root node. 47 * 48 * Ranges complicate certain write activities. When modifying any of 49 * the B-tree variants, it is known that one entry will either be added or 50 * deleted. When modifying the Maple Tree, one store operation may overwrite 51 * the entire data set, or one half of the tree, or the middle half of the tree. 52 * 53 */ 54 55 56 #include <linux/maple_tree.h> 57 #include <linux/xarray.h> 58 #include <linux/types.h> 59 #include <linux/export.h> 60 #include <linux/slab.h> 61 #include <linux/limits.h> 62 #include <asm/barrier.h> 63 64 #define CREATE_TRACE_POINTS 65 #include <trace/events/maple_tree.h> 66 67 /* 68 * Kernel pointer hashing renders much of the maple tree dump useless as tagged 69 * pointers get hashed to arbitrary values. 70 * 71 * If CONFIG_DEBUG_VM_MAPLE_TREE is set we are in a debug mode where it is 72 * permissible to bypass this. Otherwise remain cautious and retain the hashing. 73 * 74 * Userland doesn't know about %px so also use %p there. 75 */ 76 #if defined(__KERNEL__) && defined(CONFIG_DEBUG_VM_MAPLE_TREE) 77 #define PTR_FMT "%px" 78 #else 79 #define PTR_FMT "%p" 80 #endif 81 82 #define MA_ROOT_PARENT 1 83 84 /* 85 * Maple state flags 86 * * MA_STATE_BULK - Bulk insert mode 87 * * MA_STATE_REBALANCE - Indicate a rebalance during bulk insert 88 * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation 89 */ 90 #define MA_STATE_BULK 1 91 #define MA_STATE_REBALANCE 2 92 #define MA_STATE_PREALLOC 4 93 94 #define ma_parent_ptr(x) ((struct maple_pnode *)(x)) 95 #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT) 96 #define ma_mnode_ptr(x) ((struct maple_node *)(x)) 97 #define ma_enode_ptr(x) ((struct maple_enode *)(x)) 98 static struct kmem_cache *maple_node_cache; 99 100 #ifdef CONFIG_DEBUG_MAPLE_TREE 101 static const unsigned long mt_max[] = { 102 [maple_dense] = MAPLE_NODE_SLOTS, 103 [maple_leaf_64] = ULONG_MAX, 104 [maple_range_64] = ULONG_MAX, 105 [maple_arange_64] = ULONG_MAX, 106 }; 107 #define mt_node_max(x) mt_max[mte_node_type(x)] 108 #endif 109 110 static const unsigned char mt_slots[] = { 111 [maple_dense] = MAPLE_NODE_SLOTS, 112 [maple_leaf_64] = MAPLE_RANGE64_SLOTS, 113 [maple_range_64] = MAPLE_RANGE64_SLOTS, 114 [maple_arange_64] = MAPLE_ARANGE64_SLOTS, 115 }; 116 #define mt_slot_count(x) mt_slots[mte_node_type(x)] 117 118 static const unsigned char mt_pivots[] = { 119 [maple_dense] = 0, 120 [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1, 121 [maple_range_64] = MAPLE_RANGE64_SLOTS - 1, 122 [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1, 123 }; 124 #define mt_pivot_count(x) mt_pivots[mte_node_type(x)] 125 126 static const unsigned char mt_min_slots[] = { 127 [maple_dense] = MAPLE_NODE_SLOTS / 2, 128 [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 129 [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 130 [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1, 131 }; 132 #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)] 133 134 #define MAPLE_BIG_NODE_SLOTS (MAPLE_RANGE64_SLOTS * 2 + 2) 135 #define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1) 136 137 struct maple_big_node { 138 unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1]; 139 union { 140 struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS]; 141 struct { 142 unsigned long padding[MAPLE_BIG_NODE_GAPS]; 143 unsigned long gap[MAPLE_BIG_NODE_GAPS]; 144 }; 145 }; 146 unsigned char b_end; 147 enum maple_type type; 148 }; 149 150 /* 151 * The maple_subtree_state is used to build a tree to replace a segment of an 152 * existing tree in a more atomic way. Any walkers of the older tree will hit a 153 * dead node and restart on updates. 154 */ 155 struct maple_subtree_state { 156 struct ma_state *orig_l; /* Original left side of subtree */ 157 struct ma_state *orig_r; /* Original right side of subtree */ 158 struct ma_state *l; /* New left side of subtree */ 159 struct ma_state *m; /* New middle of subtree (rare) */ 160 struct ma_state *r; /* New right side of subtree */ 161 struct ma_topiary *free; /* nodes to be freed */ 162 struct ma_topiary *destroy; /* Nodes to be destroyed (walked and freed) */ 163 struct maple_big_node *bn; 164 }; 165 166 #ifdef CONFIG_KASAN_STACK 167 /* Prevent mas_wr_bnode() from exceeding the stack frame limit */ 168 #define noinline_for_kasan noinline_for_stack 169 #else 170 #define noinline_for_kasan inline 171 #endif 172 173 /* Functions */ 174 static inline struct maple_node *mt_alloc_one(gfp_t gfp) 175 { 176 return kmem_cache_alloc(maple_node_cache, gfp); 177 } 178 179 static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes) 180 { 181 return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes); 182 } 183 184 static inline void mt_free_one(struct maple_node *node) 185 { 186 kmem_cache_free(maple_node_cache, node); 187 } 188 189 static inline void mt_free_bulk(size_t size, void __rcu **nodes) 190 { 191 kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); 192 } 193 194 static void mt_free_rcu(struct rcu_head *head) 195 { 196 struct maple_node *node = container_of(head, struct maple_node, rcu); 197 198 kmem_cache_free(maple_node_cache, node); 199 } 200 201 /* 202 * ma_free_rcu() - Use rcu callback to free a maple node 203 * @node: The node to free 204 * 205 * The maple tree uses the parent pointer to indicate this node is no longer in 206 * use and will be freed. 207 */ 208 static void ma_free_rcu(struct maple_node *node) 209 { 210 WARN_ON(node->parent != ma_parent_ptr(node)); 211 call_rcu(&node->rcu, mt_free_rcu); 212 } 213 214 static void mas_set_height(struct ma_state *mas) 215 { 216 unsigned int new_flags = mas->tree->ma_flags; 217 218 new_flags &= ~MT_FLAGS_HEIGHT_MASK; 219 MAS_BUG_ON(mas, mas->depth > MAPLE_HEIGHT_MAX); 220 new_flags |= mas->depth << MT_FLAGS_HEIGHT_OFFSET; 221 mas->tree->ma_flags = new_flags; 222 } 223 224 static unsigned int mas_mt_height(struct ma_state *mas) 225 { 226 return mt_height(mas->tree); 227 } 228 229 static inline unsigned int mt_attr(struct maple_tree *mt) 230 { 231 return mt->ma_flags & ~MT_FLAGS_HEIGHT_MASK; 232 } 233 234 static __always_inline enum maple_type mte_node_type( 235 const struct maple_enode *entry) 236 { 237 return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) & 238 MAPLE_NODE_TYPE_MASK; 239 } 240 241 static __always_inline bool ma_is_dense(const enum maple_type type) 242 { 243 return type < maple_leaf_64; 244 } 245 246 static __always_inline bool ma_is_leaf(const enum maple_type type) 247 { 248 return type < maple_range_64; 249 } 250 251 static __always_inline bool mte_is_leaf(const struct maple_enode *entry) 252 { 253 return ma_is_leaf(mte_node_type(entry)); 254 } 255 256 /* 257 * We also reserve values with the bottom two bits set to '10' which are 258 * below 4096 259 */ 260 static __always_inline bool mt_is_reserved(const void *entry) 261 { 262 return ((unsigned long)entry < MAPLE_RESERVED_RANGE) && 263 xa_is_internal(entry); 264 } 265 266 static __always_inline void mas_set_err(struct ma_state *mas, long err) 267 { 268 mas->node = MA_ERROR(err); 269 mas->status = ma_error; 270 } 271 272 static __always_inline bool mas_is_ptr(const struct ma_state *mas) 273 { 274 return mas->status == ma_root; 275 } 276 277 static __always_inline bool mas_is_start(const struct ma_state *mas) 278 { 279 return mas->status == ma_start; 280 } 281 282 static __always_inline bool mas_is_none(const struct ma_state *mas) 283 { 284 return mas->status == ma_none; 285 } 286 287 static __always_inline bool mas_is_paused(const struct ma_state *mas) 288 { 289 return mas->status == ma_pause; 290 } 291 292 static __always_inline bool mas_is_overflow(struct ma_state *mas) 293 { 294 return mas->status == ma_overflow; 295 } 296 297 static inline bool mas_is_underflow(struct ma_state *mas) 298 { 299 return mas->status == ma_underflow; 300 } 301 302 static __always_inline struct maple_node *mte_to_node( 303 const struct maple_enode *entry) 304 { 305 return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK); 306 } 307 308 /* 309 * mte_to_mat() - Convert a maple encoded node to a maple topiary node. 310 * @entry: The maple encoded node 311 * 312 * Return: a maple topiary pointer 313 */ 314 static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry) 315 { 316 return (struct maple_topiary *) 317 ((unsigned long)entry & ~MAPLE_NODE_MASK); 318 } 319 320 /* 321 * mas_mn() - Get the maple state node. 322 * @mas: The maple state 323 * 324 * Return: the maple node (not encoded - bare pointer). 325 */ 326 static inline struct maple_node *mas_mn(const struct ma_state *mas) 327 { 328 return mte_to_node(mas->node); 329 } 330 331 /* 332 * mte_set_node_dead() - Set a maple encoded node as dead. 333 * @mn: The maple encoded node. 334 */ 335 static inline void mte_set_node_dead(struct maple_enode *mn) 336 { 337 mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn)); 338 smp_wmb(); /* Needed for RCU */ 339 } 340 341 /* Bit 1 indicates the root is a node */ 342 #define MAPLE_ROOT_NODE 0x02 343 /* maple_type stored bit 3-6 */ 344 #define MAPLE_ENODE_TYPE_SHIFT 0x03 345 /* Bit 2 means a NULL somewhere below */ 346 #define MAPLE_ENODE_NULL 0x04 347 348 static inline struct maple_enode *mt_mk_node(const struct maple_node *node, 349 enum maple_type type) 350 { 351 return (void *)((unsigned long)node | 352 (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL); 353 } 354 355 static inline void *mte_mk_root(const struct maple_enode *node) 356 { 357 return (void *)((unsigned long)node | MAPLE_ROOT_NODE); 358 } 359 360 static inline void *mte_safe_root(const struct maple_enode *node) 361 { 362 return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE); 363 } 364 365 static inline void __maybe_unused *mte_set_full(const struct maple_enode *node) 366 { 367 return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL); 368 } 369 370 static inline void __maybe_unused *mte_clear_full(const struct maple_enode *node) 371 { 372 return (void *)((unsigned long)node | MAPLE_ENODE_NULL); 373 } 374 375 static inline bool __maybe_unused mte_has_null(const struct maple_enode *node) 376 { 377 return (unsigned long)node & MAPLE_ENODE_NULL; 378 } 379 380 static __always_inline bool ma_is_root(struct maple_node *node) 381 { 382 return ((unsigned long)node->parent & MA_ROOT_PARENT); 383 } 384 385 static __always_inline bool mte_is_root(const struct maple_enode *node) 386 { 387 return ma_is_root(mte_to_node(node)); 388 } 389 390 static inline bool mas_is_root_limits(const struct ma_state *mas) 391 { 392 return !mas->min && mas->max == ULONG_MAX; 393 } 394 395 static __always_inline bool mt_is_alloc(struct maple_tree *mt) 396 { 397 return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE); 398 } 399 400 /* 401 * The Parent Pointer 402 * Excluding root, the parent pointer is 256B aligned like all other tree nodes. 403 * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16 404 * bit values need an extra bit to store the offset. This extra bit comes from 405 * a reuse of the last bit in the node type. This is possible by using bit 1 to 406 * indicate if bit 2 is part of the type or the slot. 407 * 408 * Note types: 409 * 0x??1 = Root 410 * 0x?00 = 16 bit nodes 411 * 0x010 = 32 bit nodes 412 * 0x110 = 64 bit nodes 413 * 414 * Slot size and alignment 415 * 0b??1 : Root 416 * 0b?00 : 16 bit values, type in 0-1, slot in 2-7 417 * 0b010 : 32 bit values, type in 0-2, slot in 3-7 418 * 0b110 : 64 bit values, type in 0-2, slot in 3-7 419 */ 420 421 #define MAPLE_PARENT_ROOT 0x01 422 423 #define MAPLE_PARENT_SLOT_SHIFT 0x03 424 #define MAPLE_PARENT_SLOT_MASK 0xF8 425 426 #define MAPLE_PARENT_16B_SLOT_SHIFT 0x02 427 #define MAPLE_PARENT_16B_SLOT_MASK 0xFC 428 429 #define MAPLE_PARENT_RANGE64 0x06 430 #define MAPLE_PARENT_RANGE32 0x04 431 #define MAPLE_PARENT_NOT_RANGE16 0x02 432 433 /* 434 * mte_parent_shift() - Get the parent shift for the slot storage. 435 * @parent: The parent pointer cast as an unsigned long 436 * Return: The shift into that pointer to the star to of the slot 437 */ 438 static inline unsigned long mte_parent_shift(unsigned long parent) 439 { 440 /* Note bit 1 == 0 means 16B */ 441 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 442 return MAPLE_PARENT_SLOT_SHIFT; 443 444 return MAPLE_PARENT_16B_SLOT_SHIFT; 445 } 446 447 /* 448 * mte_parent_slot_mask() - Get the slot mask for the parent. 449 * @parent: The parent pointer cast as an unsigned long. 450 * Return: The slot mask for that parent. 451 */ 452 static inline unsigned long mte_parent_slot_mask(unsigned long parent) 453 { 454 /* Note bit 1 == 0 means 16B */ 455 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 456 return MAPLE_PARENT_SLOT_MASK; 457 458 return MAPLE_PARENT_16B_SLOT_MASK; 459 } 460 461 /* 462 * mas_parent_type() - Return the maple_type of the parent from the stored 463 * parent type. 464 * @mas: The maple state 465 * @enode: The maple_enode to extract the parent's enum 466 * Return: The node->parent maple_type 467 */ 468 static inline 469 enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode) 470 { 471 unsigned long p_type; 472 473 p_type = (unsigned long)mte_to_node(enode)->parent; 474 if (WARN_ON(p_type & MAPLE_PARENT_ROOT)) 475 return 0; 476 477 p_type &= MAPLE_NODE_MASK; 478 p_type &= ~mte_parent_slot_mask(p_type); 479 switch (p_type) { 480 case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */ 481 if (mt_is_alloc(mas->tree)) 482 return maple_arange_64; 483 return maple_range_64; 484 } 485 486 return 0; 487 } 488 489 /* 490 * mas_set_parent() - Set the parent node and encode the slot 491 * @mas: The maple state 492 * @enode: The encoded maple node. 493 * @parent: The encoded maple node that is the parent of @enode. 494 * @slot: The slot that @enode resides in @parent. 495 * 496 * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the 497 * parent type. 498 */ 499 static inline 500 void mas_set_parent(struct ma_state *mas, struct maple_enode *enode, 501 const struct maple_enode *parent, unsigned char slot) 502 { 503 unsigned long val = (unsigned long)parent; 504 unsigned long shift; 505 unsigned long type; 506 enum maple_type p_type = mte_node_type(parent); 507 508 MAS_BUG_ON(mas, p_type == maple_dense); 509 MAS_BUG_ON(mas, p_type == maple_leaf_64); 510 511 switch (p_type) { 512 case maple_range_64: 513 case maple_arange_64: 514 shift = MAPLE_PARENT_SLOT_SHIFT; 515 type = MAPLE_PARENT_RANGE64; 516 break; 517 default: 518 case maple_dense: 519 case maple_leaf_64: 520 shift = type = 0; 521 break; 522 } 523 524 val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */ 525 val |= (slot << shift) | type; 526 mte_to_node(enode)->parent = ma_parent_ptr(val); 527 } 528 529 /* 530 * mte_parent_slot() - get the parent slot of @enode. 531 * @enode: The encoded maple node. 532 * 533 * Return: The slot in the parent node where @enode resides. 534 */ 535 static __always_inline 536 unsigned int mte_parent_slot(const struct maple_enode *enode) 537 { 538 unsigned long val = (unsigned long)mte_to_node(enode)->parent; 539 540 if (unlikely(val & MA_ROOT_PARENT)) 541 return 0; 542 543 /* 544 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost 545 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT 546 */ 547 return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val); 548 } 549 550 /* 551 * mte_parent() - Get the parent of @node. 552 * @enode: The encoded maple node. 553 * 554 * Return: The parent maple node. 555 */ 556 static __always_inline 557 struct maple_node *mte_parent(const struct maple_enode *enode) 558 { 559 return (void *)((unsigned long) 560 (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK); 561 } 562 563 /* 564 * ma_dead_node() - check if the @enode is dead. 565 * @enode: The encoded maple node 566 * 567 * Return: true if dead, false otherwise. 568 */ 569 static __always_inline bool ma_dead_node(const struct maple_node *node) 570 { 571 struct maple_node *parent; 572 573 /* Do not reorder reads from the node prior to the parent check */ 574 smp_rmb(); 575 parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); 576 return (parent == node); 577 } 578 579 /* 580 * mte_dead_node() - check if the @enode is dead. 581 * @enode: The encoded maple node 582 * 583 * Return: true if dead, false otherwise. 584 */ 585 static __always_inline bool mte_dead_node(const struct maple_enode *enode) 586 { 587 struct maple_node *node; 588 589 node = mte_to_node(enode); 590 return ma_dead_node(node); 591 } 592 593 /* 594 * mas_allocated() - Get the number of nodes allocated in a maple state. 595 * @mas: The maple state 596 * 597 * The ma_state alloc member is overloaded to hold a pointer to the first 598 * allocated node or to the number of requested nodes to allocate. If bit 0 is 599 * set, then the alloc contains the number of requested nodes. If there is an 600 * allocated node, then the total allocated nodes is in that node. 601 * 602 * Return: The total number of nodes allocated 603 */ 604 static inline unsigned long mas_allocated(const struct ma_state *mas) 605 { 606 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) 607 return 0; 608 609 return mas->alloc->total; 610 } 611 612 /* 613 * mas_set_alloc_req() - Set the requested number of allocations. 614 * @mas: the maple state 615 * @count: the number of allocations. 616 * 617 * The requested number of allocations is either in the first allocated node, 618 * located in @mas->alloc->request_count, or directly in @mas->alloc if there is 619 * no allocated node. Set the request either in the node or do the necessary 620 * encoding to store in @mas->alloc directly. 621 */ 622 static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count) 623 { 624 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) { 625 if (!count) 626 mas->alloc = NULL; 627 else 628 mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U); 629 return; 630 } 631 632 mas->alloc->request_count = count; 633 } 634 635 /* 636 * mas_alloc_req() - get the requested number of allocations. 637 * @mas: The maple state 638 * 639 * The alloc count is either stored directly in @mas, or in 640 * @mas->alloc->request_count if there is at least one node allocated. Decode 641 * the request count if it's stored directly in @mas->alloc. 642 * 643 * Return: The allocation request count. 644 */ 645 static inline unsigned int mas_alloc_req(const struct ma_state *mas) 646 { 647 if ((unsigned long)mas->alloc & 0x1) 648 return (unsigned long)(mas->alloc) >> 1; 649 else if (mas->alloc) 650 return mas->alloc->request_count; 651 return 0; 652 } 653 654 /* 655 * ma_pivots() - Get a pointer to the maple node pivots. 656 * @node: the maple node 657 * @type: the node type 658 * 659 * In the event of a dead node, this array may be %NULL 660 * 661 * Return: A pointer to the maple node pivots 662 */ 663 static inline unsigned long *ma_pivots(struct maple_node *node, 664 enum maple_type type) 665 { 666 switch (type) { 667 case maple_arange_64: 668 return node->ma64.pivot; 669 case maple_range_64: 670 case maple_leaf_64: 671 return node->mr64.pivot; 672 case maple_dense: 673 return NULL; 674 } 675 return NULL; 676 } 677 678 /* 679 * ma_gaps() - Get a pointer to the maple node gaps. 680 * @node: the maple node 681 * @type: the node type 682 * 683 * Return: A pointer to the maple node gaps 684 */ 685 static inline unsigned long *ma_gaps(struct maple_node *node, 686 enum maple_type type) 687 { 688 switch (type) { 689 case maple_arange_64: 690 return node->ma64.gap; 691 case maple_range_64: 692 case maple_leaf_64: 693 case maple_dense: 694 return NULL; 695 } 696 return NULL; 697 } 698 699 /* 700 * mas_safe_pivot() - get the pivot at @piv or mas->max. 701 * @mas: The maple state 702 * @pivots: The pointer to the maple node pivots 703 * @piv: The pivot to fetch 704 * @type: The maple node type 705 * 706 * Return: The pivot at @piv within the limit of the @pivots array, @mas->max 707 * otherwise. 708 */ 709 static __always_inline unsigned long 710 mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots, 711 unsigned char piv, enum maple_type type) 712 { 713 if (piv >= mt_pivots[type]) 714 return mas->max; 715 716 return pivots[piv]; 717 } 718 719 /* 720 * mas_safe_min() - Return the minimum for a given offset. 721 * @mas: The maple state 722 * @pivots: The pointer to the maple node pivots 723 * @offset: The offset into the pivot array 724 * 725 * Return: The minimum range value that is contained in @offset. 726 */ 727 static inline unsigned long 728 mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset) 729 { 730 if (likely(offset)) 731 return pivots[offset - 1] + 1; 732 733 return mas->min; 734 } 735 736 /* 737 * mte_set_pivot() - Set a pivot to a value in an encoded maple node. 738 * @mn: The encoded maple node 739 * @piv: The pivot offset 740 * @val: The value of the pivot 741 */ 742 static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv, 743 unsigned long val) 744 { 745 struct maple_node *node = mte_to_node(mn); 746 enum maple_type type = mte_node_type(mn); 747 748 BUG_ON(piv >= mt_pivots[type]); 749 switch (type) { 750 case maple_range_64: 751 case maple_leaf_64: 752 node->mr64.pivot[piv] = val; 753 break; 754 case maple_arange_64: 755 node->ma64.pivot[piv] = val; 756 break; 757 case maple_dense: 758 break; 759 } 760 761 } 762 763 /* 764 * ma_slots() - Get a pointer to the maple node slots. 765 * @mn: The maple node 766 * @mt: The maple node type 767 * 768 * Return: A pointer to the maple node slots 769 */ 770 static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt) 771 { 772 switch (mt) { 773 case maple_arange_64: 774 return mn->ma64.slot; 775 case maple_range_64: 776 case maple_leaf_64: 777 return mn->mr64.slot; 778 case maple_dense: 779 return mn->slot; 780 } 781 782 return NULL; 783 } 784 785 static inline bool mt_write_locked(const struct maple_tree *mt) 786 { 787 return mt_external_lock(mt) ? mt_write_lock_is_held(mt) : 788 lockdep_is_held(&mt->ma_lock); 789 } 790 791 static __always_inline bool mt_locked(const struct maple_tree *mt) 792 { 793 return mt_external_lock(mt) ? mt_lock_is_held(mt) : 794 lockdep_is_held(&mt->ma_lock); 795 } 796 797 static __always_inline void *mt_slot(const struct maple_tree *mt, 798 void __rcu **slots, unsigned char offset) 799 { 800 return rcu_dereference_check(slots[offset], mt_locked(mt)); 801 } 802 803 static __always_inline void *mt_slot_locked(struct maple_tree *mt, 804 void __rcu **slots, unsigned char offset) 805 { 806 return rcu_dereference_protected(slots[offset], mt_write_locked(mt)); 807 } 808 /* 809 * mas_slot_locked() - Get the slot value when holding the maple tree lock. 810 * @mas: The maple state 811 * @slots: The pointer to the slots 812 * @offset: The offset into the slots array to fetch 813 * 814 * Return: The entry stored in @slots at the @offset. 815 */ 816 static __always_inline void *mas_slot_locked(struct ma_state *mas, 817 void __rcu **slots, unsigned char offset) 818 { 819 return mt_slot_locked(mas->tree, slots, offset); 820 } 821 822 /* 823 * mas_slot() - Get the slot value when not holding the maple tree lock. 824 * @mas: The maple state 825 * @slots: The pointer to the slots 826 * @offset: The offset into the slots array to fetch 827 * 828 * Return: The entry stored in @slots at the @offset 829 */ 830 static __always_inline void *mas_slot(struct ma_state *mas, void __rcu **slots, 831 unsigned char offset) 832 { 833 return mt_slot(mas->tree, slots, offset); 834 } 835 836 /* 837 * mas_root() - Get the maple tree root. 838 * @mas: The maple state. 839 * 840 * Return: The pointer to the root of the tree 841 */ 842 static __always_inline void *mas_root(struct ma_state *mas) 843 { 844 return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree)); 845 } 846 847 static inline void *mt_root_locked(struct maple_tree *mt) 848 { 849 return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt)); 850 } 851 852 /* 853 * mas_root_locked() - Get the maple tree root when holding the maple tree lock. 854 * @mas: The maple state. 855 * 856 * Return: The pointer to the root of the tree 857 */ 858 static inline void *mas_root_locked(struct ma_state *mas) 859 { 860 return mt_root_locked(mas->tree); 861 } 862 863 static inline struct maple_metadata *ma_meta(struct maple_node *mn, 864 enum maple_type mt) 865 { 866 switch (mt) { 867 case maple_arange_64: 868 return &mn->ma64.meta; 869 default: 870 return &mn->mr64.meta; 871 } 872 } 873 874 /* 875 * ma_set_meta() - Set the metadata information of a node. 876 * @mn: The maple node 877 * @mt: The maple node type 878 * @offset: The offset of the highest sub-gap in this node. 879 * @end: The end of the data in this node. 880 */ 881 static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, 882 unsigned char offset, unsigned char end) 883 { 884 struct maple_metadata *meta = ma_meta(mn, mt); 885 886 meta->gap = offset; 887 meta->end = end; 888 } 889 890 /* 891 * mt_clear_meta() - clear the metadata information of a node, if it exists 892 * @mt: The maple tree 893 * @mn: The maple node 894 * @type: The maple node type 895 */ 896 static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn, 897 enum maple_type type) 898 { 899 struct maple_metadata *meta; 900 unsigned long *pivots; 901 void __rcu **slots; 902 void *next; 903 904 switch (type) { 905 case maple_range_64: 906 pivots = mn->mr64.pivot; 907 if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { 908 slots = mn->mr64.slot; 909 next = mt_slot_locked(mt, slots, 910 MAPLE_RANGE64_SLOTS - 1); 911 if (unlikely((mte_to_node(next) && 912 mte_node_type(next)))) 913 return; /* no metadata, could be node */ 914 } 915 fallthrough; 916 case maple_arange_64: 917 meta = ma_meta(mn, type); 918 break; 919 default: 920 return; 921 } 922 923 meta->gap = 0; 924 meta->end = 0; 925 } 926 927 /* 928 * ma_meta_end() - Get the data end of a node from the metadata 929 * @mn: The maple node 930 * @mt: The maple node type 931 */ 932 static inline unsigned char ma_meta_end(struct maple_node *mn, 933 enum maple_type mt) 934 { 935 struct maple_metadata *meta = ma_meta(mn, mt); 936 937 return meta->end; 938 } 939 940 /* 941 * ma_meta_gap() - Get the largest gap location of a node from the metadata 942 * @mn: The maple node 943 */ 944 static inline unsigned char ma_meta_gap(struct maple_node *mn) 945 { 946 return mn->ma64.meta.gap; 947 } 948 949 /* 950 * ma_set_meta_gap() - Set the largest gap location in a nodes metadata 951 * @mn: The maple node 952 * @mt: The maple node type 953 * @offset: The location of the largest gap. 954 */ 955 static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt, 956 unsigned char offset) 957 { 958 959 struct maple_metadata *meta = ma_meta(mn, mt); 960 961 meta->gap = offset; 962 } 963 964 /* 965 * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes. 966 * @mat: the ma_topiary, a linked list of dead nodes. 967 * @dead_enode: the node to be marked as dead and added to the tail of the list 968 * 969 * Add the @dead_enode to the linked list in @mat. 970 */ 971 static inline void mat_add(struct ma_topiary *mat, 972 struct maple_enode *dead_enode) 973 { 974 mte_set_node_dead(dead_enode); 975 mte_to_mat(dead_enode)->next = NULL; 976 if (!mat->tail) { 977 mat->tail = mat->head = dead_enode; 978 return; 979 } 980 981 mte_to_mat(mat->tail)->next = dead_enode; 982 mat->tail = dead_enode; 983 } 984 985 static void mt_free_walk(struct rcu_head *head); 986 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 987 bool free); 988 /* 989 * mas_mat_destroy() - Free all nodes and subtrees in a dead list. 990 * @mas: the maple state 991 * @mat: the ma_topiary linked list of dead nodes to free. 992 * 993 * Destroy walk a dead list. 994 */ 995 static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat) 996 { 997 struct maple_enode *next; 998 struct maple_node *node; 999 bool in_rcu = mt_in_rcu(mas->tree); 1000 1001 while (mat->head) { 1002 next = mte_to_mat(mat->head)->next; 1003 node = mte_to_node(mat->head); 1004 mt_destroy_walk(mat->head, mas->tree, !in_rcu); 1005 if (in_rcu) 1006 call_rcu(&node->rcu, mt_free_walk); 1007 mat->head = next; 1008 } 1009 } 1010 /* 1011 * mas_descend() - Descend into the slot stored in the ma_state. 1012 * @mas: the maple state. 1013 * 1014 * Note: Not RCU safe, only use in write side or debug code. 1015 */ 1016 static inline void mas_descend(struct ma_state *mas) 1017 { 1018 enum maple_type type; 1019 unsigned long *pivots; 1020 struct maple_node *node; 1021 void __rcu **slots; 1022 1023 node = mas_mn(mas); 1024 type = mte_node_type(mas->node); 1025 pivots = ma_pivots(node, type); 1026 slots = ma_slots(node, type); 1027 1028 if (mas->offset) 1029 mas->min = pivots[mas->offset - 1] + 1; 1030 mas->max = mas_safe_pivot(mas, pivots, mas->offset, type); 1031 mas->node = mas_slot(mas, slots, mas->offset); 1032 } 1033 1034 /* 1035 * mte_set_gap() - Set a maple node gap. 1036 * @mn: The encoded maple node 1037 * @gap: The offset of the gap to set 1038 * @val: The gap value 1039 */ 1040 static inline void mte_set_gap(const struct maple_enode *mn, 1041 unsigned char gap, unsigned long val) 1042 { 1043 switch (mte_node_type(mn)) { 1044 default: 1045 break; 1046 case maple_arange_64: 1047 mte_to_node(mn)->ma64.gap[gap] = val; 1048 break; 1049 } 1050 } 1051 1052 /* 1053 * mas_ascend() - Walk up a level of the tree. 1054 * @mas: The maple state 1055 * 1056 * Sets the @mas->max and @mas->min to the correct values when walking up. This 1057 * may cause several levels of walking up to find the correct min and max. 1058 * May find a dead node which will cause a premature return. 1059 * Return: 1 on dead node, 0 otherwise 1060 */ 1061 static int mas_ascend(struct ma_state *mas) 1062 { 1063 struct maple_enode *p_enode; /* parent enode. */ 1064 struct maple_enode *a_enode; /* ancestor enode. */ 1065 struct maple_node *a_node; /* ancestor node. */ 1066 struct maple_node *p_node; /* parent node. */ 1067 unsigned char a_slot; 1068 enum maple_type a_type; 1069 unsigned long min, max; 1070 unsigned long *pivots; 1071 bool set_max = false, set_min = false; 1072 1073 a_node = mas_mn(mas); 1074 if (ma_is_root(a_node)) { 1075 mas->offset = 0; 1076 return 0; 1077 } 1078 1079 p_node = mte_parent(mas->node); 1080 if (unlikely(a_node == p_node)) 1081 return 1; 1082 1083 a_type = mas_parent_type(mas, mas->node); 1084 mas->offset = mte_parent_slot(mas->node); 1085 a_enode = mt_mk_node(p_node, a_type); 1086 1087 /* Check to make sure all parent information is still accurate */ 1088 if (p_node != mte_parent(mas->node)) 1089 return 1; 1090 1091 mas->node = a_enode; 1092 1093 if (mte_is_root(a_enode)) { 1094 mas->max = ULONG_MAX; 1095 mas->min = 0; 1096 return 0; 1097 } 1098 1099 min = 0; 1100 max = ULONG_MAX; 1101 if (!mas->offset) { 1102 min = mas->min; 1103 set_min = true; 1104 } 1105 1106 if (mas->max == ULONG_MAX) 1107 set_max = true; 1108 1109 do { 1110 p_enode = a_enode; 1111 a_type = mas_parent_type(mas, p_enode); 1112 a_node = mte_parent(p_enode); 1113 a_slot = mte_parent_slot(p_enode); 1114 a_enode = mt_mk_node(a_node, a_type); 1115 pivots = ma_pivots(a_node, a_type); 1116 1117 if (unlikely(ma_dead_node(a_node))) 1118 return 1; 1119 1120 if (!set_min && a_slot) { 1121 set_min = true; 1122 min = pivots[a_slot - 1] + 1; 1123 } 1124 1125 if (!set_max && a_slot < mt_pivots[a_type]) { 1126 set_max = true; 1127 max = pivots[a_slot]; 1128 } 1129 1130 if (unlikely(ma_dead_node(a_node))) 1131 return 1; 1132 1133 if (unlikely(ma_is_root(a_node))) 1134 break; 1135 1136 } while (!set_min || !set_max); 1137 1138 mas->max = max; 1139 mas->min = min; 1140 return 0; 1141 } 1142 1143 /* 1144 * mas_pop_node() - Get a previously allocated maple node from the maple state. 1145 * @mas: The maple state 1146 * 1147 * Return: A pointer to a maple node. 1148 */ 1149 static inline struct maple_node *mas_pop_node(struct ma_state *mas) 1150 { 1151 struct maple_alloc *ret, *node = mas->alloc; 1152 unsigned long total = mas_allocated(mas); 1153 unsigned int req = mas_alloc_req(mas); 1154 1155 /* nothing or a request pending. */ 1156 if (WARN_ON(!total)) 1157 return NULL; 1158 1159 if (total == 1) { 1160 /* single allocation in this ma_state */ 1161 mas->alloc = NULL; 1162 ret = node; 1163 goto single_node; 1164 } 1165 1166 if (node->node_count == 1) { 1167 /* Single allocation in this node. */ 1168 mas->alloc = node->slot[0]; 1169 mas->alloc->total = node->total - 1; 1170 ret = node; 1171 goto new_head; 1172 } 1173 node->total--; 1174 ret = node->slot[--node->node_count]; 1175 node->slot[node->node_count] = NULL; 1176 1177 single_node: 1178 new_head: 1179 if (req) { 1180 req++; 1181 mas_set_alloc_req(mas, req); 1182 } 1183 1184 memset(ret, 0, sizeof(*ret)); 1185 return (struct maple_node *)ret; 1186 } 1187 1188 /* 1189 * mas_push_node() - Push a node back on the maple state allocation. 1190 * @mas: The maple state 1191 * @used: The used maple node 1192 * 1193 * Stores the maple node back into @mas->alloc for reuse. Updates allocated and 1194 * requested node count as necessary. 1195 */ 1196 static inline void mas_push_node(struct ma_state *mas, struct maple_node *used) 1197 { 1198 struct maple_alloc *reuse = (struct maple_alloc *)used; 1199 struct maple_alloc *head = mas->alloc; 1200 unsigned long count; 1201 unsigned int requested = mas_alloc_req(mas); 1202 1203 count = mas_allocated(mas); 1204 1205 reuse->request_count = 0; 1206 reuse->node_count = 0; 1207 if (count) { 1208 if (head->node_count < MAPLE_ALLOC_SLOTS) { 1209 head->slot[head->node_count++] = reuse; 1210 head->total++; 1211 goto done; 1212 } 1213 reuse->slot[0] = head; 1214 reuse->node_count = 1; 1215 } 1216 1217 reuse->total = count + 1; 1218 mas->alloc = reuse; 1219 done: 1220 if (requested > 1) 1221 mas_set_alloc_req(mas, requested - 1); 1222 } 1223 1224 /* 1225 * mas_alloc_nodes() - Allocate nodes into a maple state 1226 * @mas: The maple state 1227 * @gfp: The GFP Flags 1228 */ 1229 static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) 1230 { 1231 struct maple_alloc *node; 1232 unsigned long allocated = mas_allocated(mas); 1233 unsigned int requested = mas_alloc_req(mas); 1234 unsigned int count; 1235 void **slots = NULL; 1236 unsigned int max_req = 0; 1237 1238 if (!requested) 1239 return; 1240 1241 mas_set_alloc_req(mas, 0); 1242 if (mas->mas_flags & MA_STATE_PREALLOC) { 1243 if (allocated) 1244 return; 1245 WARN_ON(!allocated); 1246 } 1247 1248 if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) { 1249 node = (struct maple_alloc *)mt_alloc_one(gfp); 1250 if (!node) 1251 goto nomem_one; 1252 1253 if (allocated) { 1254 node->slot[0] = mas->alloc; 1255 node->node_count = 1; 1256 } else { 1257 node->node_count = 0; 1258 } 1259 1260 mas->alloc = node; 1261 node->total = ++allocated; 1262 node->request_count = 0; 1263 requested--; 1264 } 1265 1266 node = mas->alloc; 1267 while (requested) { 1268 max_req = MAPLE_ALLOC_SLOTS - node->node_count; 1269 slots = (void **)&node->slot[node->node_count]; 1270 max_req = min(requested, max_req); 1271 count = mt_alloc_bulk(gfp, max_req, slots); 1272 if (!count) 1273 goto nomem_bulk; 1274 1275 if (node->node_count == 0) { 1276 node->slot[0]->node_count = 0; 1277 node->slot[0]->request_count = 0; 1278 } 1279 1280 node->node_count += count; 1281 allocated += count; 1282 /* find a non-full node*/ 1283 do { 1284 node = node->slot[0]; 1285 } while (unlikely(node->node_count == MAPLE_ALLOC_SLOTS)); 1286 requested -= count; 1287 } 1288 mas->alloc->total = allocated; 1289 return; 1290 1291 nomem_bulk: 1292 /* Clean up potential freed allocations on bulk failure */ 1293 memset(slots, 0, max_req * sizeof(unsigned long)); 1294 mas->alloc->total = allocated; 1295 nomem_one: 1296 mas_set_alloc_req(mas, requested); 1297 mas_set_err(mas, -ENOMEM); 1298 } 1299 1300 /* 1301 * mas_free() - Free an encoded maple node 1302 * @mas: The maple state 1303 * @used: The encoded maple node to free. 1304 * 1305 * Uses rcu free if necessary, pushes @used back on the maple state allocations 1306 * otherwise. 1307 */ 1308 static inline void mas_free(struct ma_state *mas, struct maple_enode *used) 1309 { 1310 struct maple_node *tmp = mte_to_node(used); 1311 1312 if (mt_in_rcu(mas->tree)) 1313 ma_free_rcu(tmp); 1314 else 1315 mas_push_node(mas, tmp); 1316 } 1317 1318 /* 1319 * mas_node_count_gfp() - Check if enough nodes are allocated and request more 1320 * if there is not enough nodes. 1321 * @mas: The maple state 1322 * @count: The number of nodes needed 1323 * @gfp: the gfp flags 1324 */ 1325 static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp) 1326 { 1327 unsigned long allocated = mas_allocated(mas); 1328 1329 if (allocated < count) { 1330 mas_set_alloc_req(mas, count - allocated); 1331 mas_alloc_nodes(mas, gfp); 1332 } 1333 } 1334 1335 /* 1336 * mas_node_count() - Check if enough nodes are allocated and request more if 1337 * there is not enough nodes. 1338 * @mas: The maple state 1339 * @count: The number of nodes needed 1340 * 1341 * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags. 1342 */ 1343 static void mas_node_count(struct ma_state *mas, int count) 1344 { 1345 return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN); 1346 } 1347 1348 /* 1349 * mas_start() - Sets up maple state for operations. 1350 * @mas: The maple state. 1351 * 1352 * If mas->status == ma_start, then set the min, max and depth to 1353 * defaults. 1354 * 1355 * Return: 1356 * - If mas->node is an error or not mas_start, return NULL. 1357 * - If it's an empty tree: NULL & mas->status == ma_none 1358 * - If it's a single entry: The entry & mas->status == ma_root 1359 * - If it's a tree: NULL & mas->status == ma_active 1360 */ 1361 static inline struct maple_enode *mas_start(struct ma_state *mas) 1362 { 1363 if (likely(mas_is_start(mas))) { 1364 struct maple_enode *root; 1365 1366 mas->min = 0; 1367 mas->max = ULONG_MAX; 1368 1369 retry: 1370 mas->depth = 0; 1371 root = mas_root(mas); 1372 /* Tree with nodes */ 1373 if (likely(xa_is_node(root))) { 1374 mas->depth = 1; 1375 mas->status = ma_active; 1376 mas->node = mte_safe_root(root); 1377 mas->offset = 0; 1378 if (mte_dead_node(mas->node)) 1379 goto retry; 1380 1381 return NULL; 1382 } 1383 1384 mas->node = NULL; 1385 /* empty tree */ 1386 if (unlikely(!root)) { 1387 mas->status = ma_none; 1388 mas->offset = MAPLE_NODE_SLOTS; 1389 return NULL; 1390 } 1391 1392 /* Single entry tree */ 1393 mas->status = ma_root; 1394 mas->offset = MAPLE_NODE_SLOTS; 1395 1396 /* Single entry tree. */ 1397 if (mas->index > 0) 1398 return NULL; 1399 1400 return root; 1401 } 1402 1403 return NULL; 1404 } 1405 1406 /* 1407 * ma_data_end() - Find the end of the data in a node. 1408 * @node: The maple node 1409 * @type: The maple node type 1410 * @pivots: The array of pivots in the node 1411 * @max: The maximum value in the node 1412 * 1413 * Uses metadata to find the end of the data when possible. 1414 * Return: The zero indexed last slot with data (may be null). 1415 */ 1416 static __always_inline unsigned char ma_data_end(struct maple_node *node, 1417 enum maple_type type, unsigned long *pivots, unsigned long max) 1418 { 1419 unsigned char offset; 1420 1421 if (!pivots) 1422 return 0; 1423 1424 if (type == maple_arange_64) 1425 return ma_meta_end(node, type); 1426 1427 offset = mt_pivots[type] - 1; 1428 if (likely(!pivots[offset])) 1429 return ma_meta_end(node, type); 1430 1431 if (likely(pivots[offset] == max)) 1432 return offset; 1433 1434 return mt_pivots[type]; 1435 } 1436 1437 /* 1438 * mas_data_end() - Find the end of the data (slot). 1439 * @mas: the maple state 1440 * 1441 * This method is optimized to check the metadata of a node if the node type 1442 * supports data end metadata. 1443 * 1444 * Return: The zero indexed last slot with data (may be null). 1445 */ 1446 static inline unsigned char mas_data_end(struct ma_state *mas) 1447 { 1448 enum maple_type type; 1449 struct maple_node *node; 1450 unsigned char offset; 1451 unsigned long *pivots; 1452 1453 type = mte_node_type(mas->node); 1454 node = mas_mn(mas); 1455 if (type == maple_arange_64) 1456 return ma_meta_end(node, type); 1457 1458 pivots = ma_pivots(node, type); 1459 if (unlikely(ma_dead_node(node))) 1460 return 0; 1461 1462 offset = mt_pivots[type] - 1; 1463 if (likely(!pivots[offset])) 1464 return ma_meta_end(node, type); 1465 1466 if (likely(pivots[offset] == mas->max)) 1467 return offset; 1468 1469 return mt_pivots[type]; 1470 } 1471 1472 /* 1473 * mas_leaf_max_gap() - Returns the largest gap in a leaf node 1474 * @mas: the maple state 1475 * 1476 * Return: The maximum gap in the leaf. 1477 */ 1478 static unsigned long mas_leaf_max_gap(struct ma_state *mas) 1479 { 1480 enum maple_type mt; 1481 unsigned long pstart, gap, max_gap; 1482 struct maple_node *mn; 1483 unsigned long *pivots; 1484 void __rcu **slots; 1485 unsigned char i; 1486 unsigned char max_piv; 1487 1488 mt = mte_node_type(mas->node); 1489 mn = mas_mn(mas); 1490 slots = ma_slots(mn, mt); 1491 max_gap = 0; 1492 if (unlikely(ma_is_dense(mt))) { 1493 gap = 0; 1494 for (i = 0; i < mt_slots[mt]; i++) { 1495 if (slots[i]) { 1496 if (gap > max_gap) 1497 max_gap = gap; 1498 gap = 0; 1499 } else { 1500 gap++; 1501 } 1502 } 1503 if (gap > max_gap) 1504 max_gap = gap; 1505 return max_gap; 1506 } 1507 1508 /* 1509 * Check the first implied pivot optimizes the loop below and slot 1 may 1510 * be skipped if there is a gap in slot 0. 1511 */ 1512 pivots = ma_pivots(mn, mt); 1513 if (likely(!slots[0])) { 1514 max_gap = pivots[0] - mas->min + 1; 1515 i = 2; 1516 } else { 1517 i = 1; 1518 } 1519 1520 /* reduce max_piv as the special case is checked before the loop */ 1521 max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1; 1522 /* 1523 * Check end implied pivot which can only be a gap on the right most 1524 * node. 1525 */ 1526 if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) { 1527 gap = ULONG_MAX - pivots[max_piv]; 1528 if (gap > max_gap) 1529 max_gap = gap; 1530 1531 if (max_gap > pivots[max_piv] - mas->min) 1532 return max_gap; 1533 } 1534 1535 for (; i <= max_piv; i++) { 1536 /* data == no gap. */ 1537 if (likely(slots[i])) 1538 continue; 1539 1540 pstart = pivots[i - 1]; 1541 gap = pivots[i] - pstart; 1542 if (gap > max_gap) 1543 max_gap = gap; 1544 1545 /* There cannot be two gaps in a row. */ 1546 i++; 1547 } 1548 return max_gap; 1549 } 1550 1551 /* 1552 * ma_max_gap() - Get the maximum gap in a maple node (non-leaf) 1553 * @node: The maple node 1554 * @gaps: The pointer to the gaps 1555 * @mt: The maple node type 1556 * @off: Pointer to store the offset location of the gap. 1557 * 1558 * Uses the metadata data end to scan backwards across set gaps. 1559 * 1560 * Return: The maximum gap value 1561 */ 1562 static inline unsigned long 1563 ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt, 1564 unsigned char *off) 1565 { 1566 unsigned char offset, i; 1567 unsigned long max_gap = 0; 1568 1569 i = offset = ma_meta_end(node, mt); 1570 do { 1571 if (gaps[i] > max_gap) { 1572 max_gap = gaps[i]; 1573 offset = i; 1574 } 1575 } while (i--); 1576 1577 *off = offset; 1578 return max_gap; 1579 } 1580 1581 /* 1582 * mas_max_gap() - find the largest gap in a non-leaf node and set the slot. 1583 * @mas: The maple state. 1584 * 1585 * Return: The gap value. 1586 */ 1587 static inline unsigned long mas_max_gap(struct ma_state *mas) 1588 { 1589 unsigned long *gaps; 1590 unsigned char offset; 1591 enum maple_type mt; 1592 struct maple_node *node; 1593 1594 mt = mte_node_type(mas->node); 1595 if (ma_is_leaf(mt)) 1596 return mas_leaf_max_gap(mas); 1597 1598 node = mas_mn(mas); 1599 MAS_BUG_ON(mas, mt != maple_arange_64); 1600 offset = ma_meta_gap(node); 1601 gaps = ma_gaps(node, mt); 1602 return gaps[offset]; 1603 } 1604 1605 /* 1606 * mas_parent_gap() - Set the parent gap and any gaps above, as needed 1607 * @mas: The maple state 1608 * @offset: The gap offset in the parent to set 1609 * @new: The new gap value. 1610 * 1611 * Set the parent gap then continue to set the gap upwards, using the metadata 1612 * of the parent to see if it is necessary to check the node above. 1613 */ 1614 static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset, 1615 unsigned long new) 1616 { 1617 unsigned long meta_gap = 0; 1618 struct maple_node *pnode; 1619 struct maple_enode *penode; 1620 unsigned long *pgaps; 1621 unsigned char meta_offset; 1622 enum maple_type pmt; 1623 1624 pnode = mte_parent(mas->node); 1625 pmt = mas_parent_type(mas, mas->node); 1626 penode = mt_mk_node(pnode, pmt); 1627 pgaps = ma_gaps(pnode, pmt); 1628 1629 ascend: 1630 MAS_BUG_ON(mas, pmt != maple_arange_64); 1631 meta_offset = ma_meta_gap(pnode); 1632 meta_gap = pgaps[meta_offset]; 1633 1634 pgaps[offset] = new; 1635 1636 if (meta_gap == new) 1637 return; 1638 1639 if (offset != meta_offset) { 1640 if (meta_gap > new) 1641 return; 1642 1643 ma_set_meta_gap(pnode, pmt, offset); 1644 } else if (new < meta_gap) { 1645 new = ma_max_gap(pnode, pgaps, pmt, &meta_offset); 1646 ma_set_meta_gap(pnode, pmt, meta_offset); 1647 } 1648 1649 if (ma_is_root(pnode)) 1650 return; 1651 1652 /* Go to the parent node. */ 1653 pnode = mte_parent(penode); 1654 pmt = mas_parent_type(mas, penode); 1655 pgaps = ma_gaps(pnode, pmt); 1656 offset = mte_parent_slot(penode); 1657 penode = mt_mk_node(pnode, pmt); 1658 goto ascend; 1659 } 1660 1661 /* 1662 * mas_update_gap() - Update a nodes gaps and propagate up if necessary. 1663 * @mas: the maple state. 1664 */ 1665 static inline void mas_update_gap(struct ma_state *mas) 1666 { 1667 unsigned char pslot; 1668 unsigned long p_gap; 1669 unsigned long max_gap; 1670 1671 if (!mt_is_alloc(mas->tree)) 1672 return; 1673 1674 if (mte_is_root(mas->node)) 1675 return; 1676 1677 max_gap = mas_max_gap(mas); 1678 1679 pslot = mte_parent_slot(mas->node); 1680 p_gap = ma_gaps(mte_parent(mas->node), 1681 mas_parent_type(mas, mas->node))[pslot]; 1682 1683 if (p_gap != max_gap) 1684 mas_parent_gap(mas, pslot, max_gap); 1685 } 1686 1687 /* 1688 * mas_adopt_children() - Set the parent pointer of all nodes in @parent to 1689 * @parent with the slot encoded. 1690 * @mas: the maple state (for the tree) 1691 * @parent: the maple encoded node containing the children. 1692 */ 1693 static inline void mas_adopt_children(struct ma_state *mas, 1694 struct maple_enode *parent) 1695 { 1696 enum maple_type type = mte_node_type(parent); 1697 struct maple_node *node = mte_to_node(parent); 1698 void __rcu **slots = ma_slots(node, type); 1699 unsigned long *pivots = ma_pivots(node, type); 1700 struct maple_enode *child; 1701 unsigned char offset; 1702 1703 offset = ma_data_end(node, type, pivots, mas->max); 1704 do { 1705 child = mas_slot_locked(mas, slots, offset); 1706 mas_set_parent(mas, child, parent, offset); 1707 } while (offset--); 1708 } 1709 1710 /* 1711 * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old 1712 * node as dead. 1713 * @mas: the maple state with the new node 1714 * @old_enode: The old maple encoded node to replace. 1715 */ 1716 static inline void mas_put_in_tree(struct ma_state *mas, 1717 struct maple_enode *old_enode) 1718 __must_hold(mas->tree->ma_lock) 1719 { 1720 unsigned char offset; 1721 void __rcu **slots; 1722 1723 if (mte_is_root(mas->node)) { 1724 mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas)); 1725 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 1726 mas_set_height(mas); 1727 } else { 1728 1729 offset = mte_parent_slot(mas->node); 1730 slots = ma_slots(mte_parent(mas->node), 1731 mas_parent_type(mas, mas->node)); 1732 rcu_assign_pointer(slots[offset], mas->node); 1733 } 1734 1735 mte_set_node_dead(old_enode); 1736 } 1737 1738 /* 1739 * mas_replace_node() - Replace a node by putting it in the tree, marking it 1740 * dead, and freeing it. 1741 * the parent encoding to locate the maple node in the tree. 1742 * @mas: the ma_state with @mas->node pointing to the new node. 1743 * @old_enode: The old maple encoded node. 1744 */ 1745 static inline void mas_replace_node(struct ma_state *mas, 1746 struct maple_enode *old_enode) 1747 __must_hold(mas->tree->ma_lock) 1748 { 1749 mas_put_in_tree(mas, old_enode); 1750 mas_free(mas, old_enode); 1751 } 1752 1753 /* 1754 * mas_find_child() - Find a child who has the parent @mas->node. 1755 * @mas: the maple state with the parent. 1756 * @child: the maple state to store the child. 1757 */ 1758 static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child) 1759 __must_hold(mas->tree->ma_lock) 1760 { 1761 enum maple_type mt; 1762 unsigned char offset; 1763 unsigned char end; 1764 unsigned long *pivots; 1765 struct maple_enode *entry; 1766 struct maple_node *node; 1767 void __rcu **slots; 1768 1769 mt = mte_node_type(mas->node); 1770 node = mas_mn(mas); 1771 slots = ma_slots(node, mt); 1772 pivots = ma_pivots(node, mt); 1773 end = ma_data_end(node, mt, pivots, mas->max); 1774 for (offset = mas->offset; offset <= end; offset++) { 1775 entry = mas_slot_locked(mas, slots, offset); 1776 if (mte_parent(entry) == node) { 1777 *child = *mas; 1778 mas->offset = offset + 1; 1779 child->offset = offset; 1780 mas_descend(child); 1781 child->offset = 0; 1782 return true; 1783 } 1784 } 1785 return false; 1786 } 1787 1788 /* 1789 * mab_shift_right() - Shift the data in mab right. Note, does not clean out the 1790 * old data or set b_node->b_end. 1791 * @b_node: the maple_big_node 1792 * @shift: the shift count 1793 */ 1794 static inline void mab_shift_right(struct maple_big_node *b_node, 1795 unsigned char shift) 1796 { 1797 unsigned long size = b_node->b_end * sizeof(unsigned long); 1798 1799 memmove(b_node->pivot + shift, b_node->pivot, size); 1800 memmove(b_node->slot + shift, b_node->slot, size); 1801 if (b_node->type == maple_arange_64) 1802 memmove(b_node->gap + shift, b_node->gap, size); 1803 } 1804 1805 /* 1806 * mab_middle_node() - Check if a middle node is needed (unlikely) 1807 * @b_node: the maple_big_node that contains the data. 1808 * @split: the potential split location 1809 * @slot_count: the size that can be stored in a single node being considered. 1810 * 1811 * Return: true if a middle node is required. 1812 */ 1813 static inline bool mab_middle_node(struct maple_big_node *b_node, int split, 1814 unsigned char slot_count) 1815 { 1816 unsigned char size = b_node->b_end; 1817 1818 if (size >= 2 * slot_count) 1819 return true; 1820 1821 if (!b_node->slot[split] && (size >= 2 * slot_count - 1)) 1822 return true; 1823 1824 return false; 1825 } 1826 1827 /* 1828 * mab_no_null_split() - ensure the split doesn't fall on a NULL 1829 * @b_node: the maple_big_node with the data 1830 * @split: the suggested split location 1831 * @slot_count: the number of slots in the node being considered. 1832 * 1833 * Return: the split location. 1834 */ 1835 static inline int mab_no_null_split(struct maple_big_node *b_node, 1836 unsigned char split, unsigned char slot_count) 1837 { 1838 if (!b_node->slot[split]) { 1839 /* 1840 * If the split is less than the max slot && the right side will 1841 * still be sufficient, then increment the split on NULL. 1842 */ 1843 if ((split < slot_count - 1) && 1844 (b_node->b_end - split) > (mt_min_slots[b_node->type])) 1845 split++; 1846 else 1847 split--; 1848 } 1849 return split; 1850 } 1851 1852 /* 1853 * mab_calc_split() - Calculate the split location and if there needs to be two 1854 * splits. 1855 * @mas: The maple state 1856 * @bn: The maple_big_node with the data 1857 * @mid_split: The second split, if required. 0 otherwise. 1858 * 1859 * Return: The first split location. The middle split is set in @mid_split. 1860 */ 1861 static inline int mab_calc_split(struct ma_state *mas, 1862 struct maple_big_node *bn, unsigned char *mid_split) 1863 { 1864 unsigned char b_end = bn->b_end; 1865 int split = b_end / 2; /* Assume equal split. */ 1866 unsigned char slot_count = mt_slots[bn->type]; 1867 1868 /* 1869 * To support gap tracking, all NULL entries are kept together and a node cannot 1870 * end on a NULL entry, with the exception of the left-most leaf. The 1871 * limitation means that the split of a node must be checked for this condition 1872 * and be able to put more data in one direction or the other. 1873 */ 1874 if (unlikely((mas->mas_flags & MA_STATE_BULK))) { 1875 *mid_split = 0; 1876 split = b_end - mt_min_slots[bn->type]; 1877 1878 if (!ma_is_leaf(bn->type)) 1879 return split; 1880 1881 mas->mas_flags |= MA_STATE_REBALANCE; 1882 if (!bn->slot[split]) 1883 split--; 1884 return split; 1885 } 1886 1887 /* 1888 * Although extremely rare, it is possible to enter what is known as the 3-way 1889 * split scenario. The 3-way split comes about by means of a store of a range 1890 * that overwrites the end and beginning of two full nodes. The result is a set 1891 * of entries that cannot be stored in 2 nodes. Sometimes, these two nodes can 1892 * also be located in different parent nodes which are also full. This can 1893 * carry upwards all the way to the root in the worst case. 1894 */ 1895 if (unlikely(mab_middle_node(bn, split, slot_count))) { 1896 split = b_end / 3; 1897 *mid_split = split * 2; 1898 } else { 1899 *mid_split = 0; 1900 } 1901 1902 /* Avoid ending a node on a NULL entry */ 1903 split = mab_no_null_split(bn, split, slot_count); 1904 1905 if (unlikely(*mid_split)) 1906 *mid_split = mab_no_null_split(bn, *mid_split, slot_count); 1907 1908 return split; 1909 } 1910 1911 /* 1912 * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node 1913 * and set @b_node->b_end to the next free slot. 1914 * @mas: The maple state 1915 * @mas_start: The starting slot to copy 1916 * @mas_end: The end slot to copy (inclusively) 1917 * @b_node: The maple_big_node to place the data 1918 * @mab_start: The starting location in maple_big_node to store the data. 1919 */ 1920 static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start, 1921 unsigned char mas_end, struct maple_big_node *b_node, 1922 unsigned char mab_start) 1923 { 1924 enum maple_type mt; 1925 struct maple_node *node; 1926 void __rcu **slots; 1927 unsigned long *pivots, *gaps; 1928 int i = mas_start, j = mab_start; 1929 unsigned char piv_end; 1930 1931 node = mas_mn(mas); 1932 mt = mte_node_type(mas->node); 1933 pivots = ma_pivots(node, mt); 1934 if (!i) { 1935 b_node->pivot[j] = pivots[i++]; 1936 if (unlikely(i > mas_end)) 1937 goto complete; 1938 j++; 1939 } 1940 1941 piv_end = min(mas_end, mt_pivots[mt]); 1942 for (; i < piv_end; i++, j++) { 1943 b_node->pivot[j] = pivots[i]; 1944 if (unlikely(!b_node->pivot[j])) 1945 goto complete; 1946 1947 if (unlikely(mas->max == b_node->pivot[j])) 1948 goto complete; 1949 } 1950 1951 b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt); 1952 1953 complete: 1954 b_node->b_end = ++j; 1955 j -= mab_start; 1956 slots = ma_slots(node, mt); 1957 memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j); 1958 if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) { 1959 gaps = ma_gaps(node, mt); 1960 memcpy(b_node->gap + mab_start, gaps + mas_start, 1961 sizeof(unsigned long) * j); 1962 } 1963 } 1964 1965 /* 1966 * mas_leaf_set_meta() - Set the metadata of a leaf if possible. 1967 * @node: The maple node 1968 * @mt: The maple type 1969 * @end: The node end 1970 */ 1971 static inline void mas_leaf_set_meta(struct maple_node *node, 1972 enum maple_type mt, unsigned char end) 1973 { 1974 if (end < mt_slots[mt] - 1) 1975 ma_set_meta(node, mt, 0, end); 1976 } 1977 1978 /* 1979 * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node. 1980 * @b_node: the maple_big_node that has the data 1981 * @mab_start: the start location in @b_node. 1982 * @mab_end: The end location in @b_node (inclusively) 1983 * @mas: The maple state with the maple encoded node. 1984 */ 1985 static inline void mab_mas_cp(struct maple_big_node *b_node, 1986 unsigned char mab_start, unsigned char mab_end, 1987 struct ma_state *mas, bool new_max) 1988 { 1989 int i, j = 0; 1990 enum maple_type mt = mte_node_type(mas->node); 1991 struct maple_node *node = mte_to_node(mas->node); 1992 void __rcu **slots = ma_slots(node, mt); 1993 unsigned long *pivots = ma_pivots(node, mt); 1994 unsigned long *gaps = NULL; 1995 unsigned char end; 1996 1997 if (mab_end - mab_start > mt_pivots[mt]) 1998 mab_end--; 1999 2000 if (!pivots[mt_pivots[mt] - 1]) 2001 slots[mt_pivots[mt]] = NULL; 2002 2003 i = mab_start; 2004 do { 2005 pivots[j++] = b_node->pivot[i++]; 2006 } while (i <= mab_end && likely(b_node->pivot[i])); 2007 2008 memcpy(slots, b_node->slot + mab_start, 2009 sizeof(void *) * (i - mab_start)); 2010 2011 if (new_max) 2012 mas->max = b_node->pivot[i - 1]; 2013 2014 end = j - 1; 2015 if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) { 2016 unsigned long max_gap = 0; 2017 unsigned char offset = 0; 2018 2019 gaps = ma_gaps(node, mt); 2020 do { 2021 gaps[--j] = b_node->gap[--i]; 2022 if (gaps[j] > max_gap) { 2023 offset = j; 2024 max_gap = gaps[j]; 2025 } 2026 } while (j); 2027 2028 ma_set_meta(node, mt, offset, end); 2029 } else { 2030 mas_leaf_set_meta(node, mt, end); 2031 } 2032 } 2033 2034 /* 2035 * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert. 2036 * @mas: The maple state 2037 * @end: The maple node end 2038 * @mt: The maple node type 2039 */ 2040 static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end, 2041 enum maple_type mt) 2042 { 2043 if (!(mas->mas_flags & MA_STATE_BULK)) 2044 return; 2045 2046 if (mte_is_root(mas->node)) 2047 return; 2048 2049 if (end > mt_min_slots[mt]) { 2050 mas->mas_flags &= ~MA_STATE_REBALANCE; 2051 return; 2052 } 2053 } 2054 2055 /* 2056 * mas_store_b_node() - Store an @entry into the b_node while also copying the 2057 * data from a maple encoded node. 2058 * @wr_mas: the maple write state 2059 * @b_node: the maple_big_node to fill with data 2060 * @offset_end: the offset to end copying 2061 * 2062 * Return: The actual end of the data stored in @b_node 2063 */ 2064 static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas, 2065 struct maple_big_node *b_node, unsigned char offset_end) 2066 { 2067 unsigned char slot; 2068 unsigned char b_end; 2069 /* Possible underflow of piv will wrap back to 0 before use. */ 2070 unsigned long piv; 2071 struct ma_state *mas = wr_mas->mas; 2072 2073 b_node->type = wr_mas->type; 2074 b_end = 0; 2075 slot = mas->offset; 2076 if (slot) { 2077 /* Copy start data up to insert. */ 2078 mas_mab_cp(mas, 0, slot - 1, b_node, 0); 2079 b_end = b_node->b_end; 2080 piv = b_node->pivot[b_end - 1]; 2081 } else 2082 piv = mas->min - 1; 2083 2084 if (piv + 1 < mas->index) { 2085 /* Handle range starting after old range */ 2086 b_node->slot[b_end] = wr_mas->content; 2087 if (!wr_mas->content) 2088 b_node->gap[b_end] = mas->index - 1 - piv; 2089 b_node->pivot[b_end++] = mas->index - 1; 2090 } 2091 2092 /* Store the new entry. */ 2093 mas->offset = b_end; 2094 b_node->slot[b_end] = wr_mas->entry; 2095 b_node->pivot[b_end] = mas->last; 2096 2097 /* Appended. */ 2098 if (mas->last >= mas->max) 2099 goto b_end; 2100 2101 /* Handle new range ending before old range ends */ 2102 piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type); 2103 if (piv > mas->last) { 2104 if (piv == ULONG_MAX) 2105 mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type); 2106 2107 if (offset_end != slot) 2108 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 2109 offset_end); 2110 2111 b_node->slot[++b_end] = wr_mas->content; 2112 if (!wr_mas->content) 2113 b_node->gap[b_end] = piv - mas->last + 1; 2114 b_node->pivot[b_end] = piv; 2115 } 2116 2117 slot = offset_end + 1; 2118 if (slot > mas->end) 2119 goto b_end; 2120 2121 /* Copy end data to the end of the node. */ 2122 mas_mab_cp(mas, slot, mas->end + 1, b_node, ++b_end); 2123 b_node->b_end--; 2124 return; 2125 2126 b_end: 2127 b_node->b_end = b_end; 2128 } 2129 2130 /* 2131 * mas_prev_sibling() - Find the previous node with the same parent. 2132 * @mas: the maple state 2133 * 2134 * Return: True if there is a previous sibling, false otherwise. 2135 */ 2136 static inline bool mas_prev_sibling(struct ma_state *mas) 2137 { 2138 unsigned int p_slot = mte_parent_slot(mas->node); 2139 2140 /* For root node, p_slot is set to 0 by mte_parent_slot(). */ 2141 if (!p_slot) 2142 return false; 2143 2144 mas_ascend(mas); 2145 mas->offset = p_slot - 1; 2146 mas_descend(mas); 2147 return true; 2148 } 2149 2150 /* 2151 * mas_next_sibling() - Find the next node with the same parent. 2152 * @mas: the maple state 2153 * 2154 * Return: true if there is a next sibling, false otherwise. 2155 */ 2156 static inline bool mas_next_sibling(struct ma_state *mas) 2157 { 2158 MA_STATE(parent, mas->tree, mas->index, mas->last); 2159 2160 if (mte_is_root(mas->node)) 2161 return false; 2162 2163 parent = *mas; 2164 mas_ascend(&parent); 2165 parent.offset = mte_parent_slot(mas->node) + 1; 2166 if (parent.offset > mas_data_end(&parent)) 2167 return false; 2168 2169 *mas = parent; 2170 mas_descend(mas); 2171 return true; 2172 } 2173 2174 /* 2175 * mas_node_or_none() - Set the enode and state. 2176 * @mas: the maple state 2177 * @enode: The encoded maple node. 2178 * 2179 * Set the node to the enode and the status. 2180 */ 2181 static inline void mas_node_or_none(struct ma_state *mas, 2182 struct maple_enode *enode) 2183 { 2184 if (enode) { 2185 mas->node = enode; 2186 mas->status = ma_active; 2187 } else { 2188 mas->node = NULL; 2189 mas->status = ma_none; 2190 } 2191 } 2192 2193 /* 2194 * mas_wr_node_walk() - Find the correct offset for the index in the @mas. 2195 * If @mas->index cannot be found within the containing 2196 * node, we traverse to the last entry in the node. 2197 * @wr_mas: The maple write state 2198 * 2199 * Uses mas_slot_locked() and does not need to worry about dead nodes. 2200 */ 2201 static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) 2202 { 2203 struct ma_state *mas = wr_mas->mas; 2204 unsigned char count, offset; 2205 2206 if (unlikely(ma_is_dense(wr_mas->type))) { 2207 wr_mas->r_max = wr_mas->r_min = mas->index; 2208 mas->offset = mas->index = mas->min; 2209 return; 2210 } 2211 2212 wr_mas->node = mas_mn(wr_mas->mas); 2213 wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type); 2214 count = mas->end = ma_data_end(wr_mas->node, wr_mas->type, 2215 wr_mas->pivots, mas->max); 2216 offset = mas->offset; 2217 2218 while (offset < count && mas->index > wr_mas->pivots[offset]) 2219 offset++; 2220 2221 wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max; 2222 wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset); 2223 wr_mas->offset_end = mas->offset = offset; 2224 } 2225 2226 /* 2227 * mast_rebalance_next() - Rebalance against the next node 2228 * @mast: The maple subtree state 2229 */ 2230 static inline void mast_rebalance_next(struct maple_subtree_state *mast) 2231 { 2232 unsigned char b_end = mast->bn->b_end; 2233 2234 mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node), 2235 mast->bn, b_end); 2236 mast->orig_r->last = mast->orig_r->max; 2237 } 2238 2239 /* 2240 * mast_rebalance_prev() - Rebalance against the previous node 2241 * @mast: The maple subtree state 2242 */ 2243 static inline void mast_rebalance_prev(struct maple_subtree_state *mast) 2244 { 2245 unsigned char end = mas_data_end(mast->orig_l) + 1; 2246 unsigned char b_end = mast->bn->b_end; 2247 2248 mab_shift_right(mast->bn, end); 2249 mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0); 2250 mast->l->min = mast->orig_l->min; 2251 mast->orig_l->index = mast->orig_l->min; 2252 mast->bn->b_end = end + b_end; 2253 mast->l->offset += end; 2254 } 2255 2256 /* 2257 * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring 2258 * the node to the right. Checking the nodes to the right then the left at each 2259 * level upwards until root is reached. 2260 * Data is copied into the @mast->bn. 2261 * @mast: The maple_subtree_state. 2262 */ 2263 static inline 2264 bool mast_spanning_rebalance(struct maple_subtree_state *mast) 2265 { 2266 struct ma_state r_tmp = *mast->orig_r; 2267 struct ma_state l_tmp = *mast->orig_l; 2268 unsigned char depth = 0; 2269 2270 do { 2271 mas_ascend(mast->orig_r); 2272 mas_ascend(mast->orig_l); 2273 depth++; 2274 if (mast->orig_r->offset < mas_data_end(mast->orig_r)) { 2275 mast->orig_r->offset++; 2276 do { 2277 mas_descend(mast->orig_r); 2278 mast->orig_r->offset = 0; 2279 } while (--depth); 2280 2281 mast_rebalance_next(mast); 2282 *mast->orig_l = l_tmp; 2283 return true; 2284 } else if (mast->orig_l->offset != 0) { 2285 mast->orig_l->offset--; 2286 do { 2287 mas_descend(mast->orig_l); 2288 mast->orig_l->offset = 2289 mas_data_end(mast->orig_l); 2290 } while (--depth); 2291 2292 mast_rebalance_prev(mast); 2293 *mast->orig_r = r_tmp; 2294 return true; 2295 } 2296 } while (!mte_is_root(mast->orig_r->node)); 2297 2298 *mast->orig_r = r_tmp; 2299 *mast->orig_l = l_tmp; 2300 return false; 2301 } 2302 2303 /* 2304 * mast_ascend() - Ascend the original left and right maple states. 2305 * @mast: the maple subtree state. 2306 * 2307 * Ascend the original left and right sides. Set the offsets to point to the 2308 * data already in the new tree (@mast->l and @mast->r). 2309 */ 2310 static inline void mast_ascend(struct maple_subtree_state *mast) 2311 { 2312 MA_WR_STATE(wr_mas, mast->orig_r, NULL); 2313 mas_ascend(mast->orig_l); 2314 mas_ascend(mast->orig_r); 2315 2316 mast->orig_r->offset = 0; 2317 mast->orig_r->index = mast->r->max; 2318 /* last should be larger than or equal to index */ 2319 if (mast->orig_r->last < mast->orig_r->index) 2320 mast->orig_r->last = mast->orig_r->index; 2321 2322 wr_mas.type = mte_node_type(mast->orig_r->node); 2323 mas_wr_node_walk(&wr_mas); 2324 /* Set up the left side of things */ 2325 mast->orig_l->offset = 0; 2326 mast->orig_l->index = mast->l->min; 2327 wr_mas.mas = mast->orig_l; 2328 wr_mas.type = mte_node_type(mast->orig_l->node); 2329 mas_wr_node_walk(&wr_mas); 2330 2331 mast->bn->type = wr_mas.type; 2332 } 2333 2334 /* 2335 * mas_new_ma_node() - Create and return a new maple node. Helper function. 2336 * @mas: the maple state with the allocations. 2337 * @b_node: the maple_big_node with the type encoding. 2338 * 2339 * Use the node type from the maple_big_node to allocate a new node from the 2340 * ma_state. This function exists mainly for code readability. 2341 * 2342 * Return: A new maple encoded node 2343 */ 2344 static inline struct maple_enode 2345 *mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node) 2346 { 2347 return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type); 2348 } 2349 2350 /* 2351 * mas_mab_to_node() - Set up right and middle nodes 2352 * 2353 * @mas: the maple state that contains the allocations. 2354 * @b_node: the node which contains the data. 2355 * @left: The pointer which will have the left node 2356 * @right: The pointer which may have the right node 2357 * @middle: the pointer which may have the middle node (rare) 2358 * @mid_split: the split location for the middle node 2359 * 2360 * Return: the split of left. 2361 */ 2362 static inline unsigned char mas_mab_to_node(struct ma_state *mas, 2363 struct maple_big_node *b_node, struct maple_enode **left, 2364 struct maple_enode **right, struct maple_enode **middle, 2365 unsigned char *mid_split) 2366 { 2367 unsigned char split = 0; 2368 unsigned char slot_count = mt_slots[b_node->type]; 2369 2370 *left = mas_new_ma_node(mas, b_node); 2371 *right = NULL; 2372 *middle = NULL; 2373 *mid_split = 0; 2374 2375 if (b_node->b_end < slot_count) { 2376 split = b_node->b_end; 2377 } else { 2378 split = mab_calc_split(mas, b_node, mid_split); 2379 *right = mas_new_ma_node(mas, b_node); 2380 } 2381 2382 if (*mid_split) 2383 *middle = mas_new_ma_node(mas, b_node); 2384 2385 return split; 2386 2387 } 2388 2389 /* 2390 * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end 2391 * pointer. 2392 * @b_node: the big node to add the entry 2393 * @mas: the maple state to get the pivot (mas->max) 2394 * @entry: the entry to add, if NULL nothing happens. 2395 */ 2396 static inline void mab_set_b_end(struct maple_big_node *b_node, 2397 struct ma_state *mas, 2398 void *entry) 2399 { 2400 if (!entry) 2401 return; 2402 2403 b_node->slot[b_node->b_end] = entry; 2404 if (mt_is_alloc(mas->tree)) 2405 b_node->gap[b_node->b_end] = mas_max_gap(mas); 2406 b_node->pivot[b_node->b_end++] = mas->max; 2407 } 2408 2409 /* 2410 * mas_set_split_parent() - combine_then_separate helper function. Sets the parent 2411 * of @mas->node to either @left or @right, depending on @slot and @split 2412 * 2413 * @mas: the maple state with the node that needs a parent 2414 * @left: possible parent 1 2415 * @right: possible parent 2 2416 * @slot: the slot the mas->node was placed 2417 * @split: the split location between @left and @right 2418 */ 2419 static inline void mas_set_split_parent(struct ma_state *mas, 2420 struct maple_enode *left, 2421 struct maple_enode *right, 2422 unsigned char *slot, unsigned char split) 2423 { 2424 if (mas_is_none(mas)) 2425 return; 2426 2427 if ((*slot) <= split) 2428 mas_set_parent(mas, mas->node, left, *slot); 2429 else if (right) 2430 mas_set_parent(mas, mas->node, right, (*slot) - split - 1); 2431 2432 (*slot)++; 2433 } 2434 2435 /* 2436 * mte_mid_split_check() - Check if the next node passes the mid-split 2437 * @l: Pointer to left encoded maple node. 2438 * @m: Pointer to middle encoded maple node. 2439 * @r: Pointer to right encoded maple node. 2440 * @slot: The offset 2441 * @split: The split location. 2442 * @mid_split: The middle split. 2443 */ 2444 static inline void mte_mid_split_check(struct maple_enode **l, 2445 struct maple_enode **r, 2446 struct maple_enode *right, 2447 unsigned char slot, 2448 unsigned char *split, 2449 unsigned char mid_split) 2450 { 2451 if (*r == right) 2452 return; 2453 2454 if (slot < mid_split) 2455 return; 2456 2457 *l = *r; 2458 *r = right; 2459 *split = mid_split; 2460 } 2461 2462 /* 2463 * mast_set_split_parents() - Helper function to set three nodes parents. Slot 2464 * is taken from @mast->l. 2465 * @mast: the maple subtree state 2466 * @left: the left node 2467 * @right: the right node 2468 * @split: the split location. 2469 */ 2470 static inline void mast_set_split_parents(struct maple_subtree_state *mast, 2471 struct maple_enode *left, 2472 struct maple_enode *middle, 2473 struct maple_enode *right, 2474 unsigned char split, 2475 unsigned char mid_split) 2476 { 2477 unsigned char slot; 2478 struct maple_enode *l = left; 2479 struct maple_enode *r = right; 2480 2481 if (mas_is_none(mast->l)) 2482 return; 2483 2484 if (middle) 2485 r = middle; 2486 2487 slot = mast->l->offset; 2488 2489 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2490 mas_set_split_parent(mast->l, l, r, &slot, split); 2491 2492 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2493 mas_set_split_parent(mast->m, l, r, &slot, split); 2494 2495 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2496 mas_set_split_parent(mast->r, l, r, &slot, split); 2497 } 2498 2499 /* 2500 * mas_topiary_node() - Dispose of a single node 2501 * @mas: The maple state for pushing nodes 2502 * @in_rcu: If the tree is in rcu mode 2503 * 2504 * The node will either be RCU freed or pushed back on the maple state. 2505 */ 2506 static inline void mas_topiary_node(struct ma_state *mas, 2507 struct ma_state *tmp_mas, bool in_rcu) 2508 { 2509 struct maple_node *tmp; 2510 struct maple_enode *enode; 2511 2512 if (mas_is_none(tmp_mas)) 2513 return; 2514 2515 enode = tmp_mas->node; 2516 tmp = mte_to_node(enode); 2517 mte_set_node_dead(enode); 2518 if (in_rcu) 2519 ma_free_rcu(tmp); 2520 else 2521 mas_push_node(mas, tmp); 2522 } 2523 2524 /* 2525 * mas_topiary_replace() - Replace the data with new data, then repair the 2526 * parent links within the new tree. Iterate over the dead sub-tree and collect 2527 * the dead subtrees and topiary the nodes that are no longer of use. 2528 * 2529 * The new tree will have up to three children with the correct parent. Keep 2530 * track of the new entries as they need to be followed to find the next level 2531 * of new entries. 2532 * 2533 * The old tree will have up to three children with the old parent. Keep track 2534 * of the old entries as they may have more nodes below replaced. Nodes within 2535 * [index, last] are dead subtrees, others need to be freed and followed. 2536 * 2537 * @mas: The maple state pointing at the new data 2538 * @old_enode: The maple encoded node being replaced 2539 * 2540 */ 2541 static inline void mas_topiary_replace(struct ma_state *mas, 2542 struct maple_enode *old_enode) 2543 { 2544 struct ma_state tmp[3], tmp_next[3]; 2545 MA_TOPIARY(subtrees, mas->tree); 2546 bool in_rcu; 2547 int i, n; 2548 2549 /* Place data in tree & then mark node as old */ 2550 mas_put_in_tree(mas, old_enode); 2551 2552 /* Update the parent pointers in the tree */ 2553 tmp[0] = *mas; 2554 tmp[0].offset = 0; 2555 tmp[1].status = ma_none; 2556 tmp[2].status = ma_none; 2557 while (!mte_is_leaf(tmp[0].node)) { 2558 n = 0; 2559 for (i = 0; i < 3; i++) { 2560 if (mas_is_none(&tmp[i])) 2561 continue; 2562 2563 while (n < 3) { 2564 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2565 break; 2566 n++; 2567 } 2568 2569 mas_adopt_children(&tmp[i], tmp[i].node); 2570 } 2571 2572 if (MAS_WARN_ON(mas, n == 0)) 2573 break; 2574 2575 while (n < 3) 2576 tmp_next[n++].status = ma_none; 2577 2578 for (i = 0; i < 3; i++) 2579 tmp[i] = tmp_next[i]; 2580 } 2581 2582 /* Collect the old nodes that need to be discarded */ 2583 if (mte_is_leaf(old_enode)) 2584 return mas_free(mas, old_enode); 2585 2586 tmp[0] = *mas; 2587 tmp[0].offset = 0; 2588 tmp[0].node = old_enode; 2589 tmp[1].status = ma_none; 2590 tmp[2].status = ma_none; 2591 in_rcu = mt_in_rcu(mas->tree); 2592 do { 2593 n = 0; 2594 for (i = 0; i < 3; i++) { 2595 if (mas_is_none(&tmp[i])) 2596 continue; 2597 2598 while (n < 3) { 2599 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2600 break; 2601 2602 if ((tmp_next[n].min >= tmp_next->index) && 2603 (tmp_next[n].max <= tmp_next->last)) { 2604 mat_add(&subtrees, tmp_next[n].node); 2605 tmp_next[n].status = ma_none; 2606 } else { 2607 n++; 2608 } 2609 } 2610 } 2611 2612 if (MAS_WARN_ON(mas, n == 0)) 2613 break; 2614 2615 while (n < 3) 2616 tmp_next[n++].status = ma_none; 2617 2618 for (i = 0; i < 3; i++) { 2619 mas_topiary_node(mas, &tmp[i], in_rcu); 2620 tmp[i] = tmp_next[i]; 2621 } 2622 } while (!mte_is_leaf(tmp[0].node)); 2623 2624 for (i = 0; i < 3; i++) 2625 mas_topiary_node(mas, &tmp[i], in_rcu); 2626 2627 mas_mat_destroy(mas, &subtrees); 2628 } 2629 2630 /* 2631 * mas_wmb_replace() - Write memory barrier and replace 2632 * @mas: The maple state 2633 * @old_enode: The old maple encoded node that is being replaced. 2634 * 2635 * Updates gap as necessary. 2636 */ 2637 static inline void mas_wmb_replace(struct ma_state *mas, 2638 struct maple_enode *old_enode) 2639 { 2640 /* Insert the new data in the tree */ 2641 mas_topiary_replace(mas, old_enode); 2642 2643 if (mte_is_leaf(mas->node)) 2644 return; 2645 2646 mas_update_gap(mas); 2647 } 2648 2649 /* 2650 * mast_cp_to_nodes() - Copy data out to nodes. 2651 * @mast: The maple subtree state 2652 * @left: The left encoded maple node 2653 * @middle: The middle encoded maple node 2654 * @right: The right encoded maple node 2655 * @split: The location to split between left and (middle ? middle : right) 2656 * @mid_split: The location to split between middle and right. 2657 */ 2658 static inline void mast_cp_to_nodes(struct maple_subtree_state *mast, 2659 struct maple_enode *left, struct maple_enode *middle, 2660 struct maple_enode *right, unsigned char split, unsigned char mid_split) 2661 { 2662 bool new_lmax = true; 2663 2664 mas_node_or_none(mast->l, left); 2665 mas_node_or_none(mast->m, middle); 2666 mas_node_or_none(mast->r, right); 2667 2668 mast->l->min = mast->orig_l->min; 2669 if (split == mast->bn->b_end) { 2670 mast->l->max = mast->orig_r->max; 2671 new_lmax = false; 2672 } 2673 2674 mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax); 2675 2676 if (middle) { 2677 mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true); 2678 mast->m->min = mast->bn->pivot[split] + 1; 2679 split = mid_split; 2680 } 2681 2682 mast->r->max = mast->orig_r->max; 2683 if (right) { 2684 mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false); 2685 mast->r->min = mast->bn->pivot[split] + 1; 2686 } 2687 } 2688 2689 /* 2690 * mast_combine_cp_left - Copy in the original left side of the tree into the 2691 * combined data set in the maple subtree state big node. 2692 * @mast: The maple subtree state 2693 */ 2694 static inline void mast_combine_cp_left(struct maple_subtree_state *mast) 2695 { 2696 unsigned char l_slot = mast->orig_l->offset; 2697 2698 if (!l_slot) 2699 return; 2700 2701 mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0); 2702 } 2703 2704 /* 2705 * mast_combine_cp_right: Copy in the original right side of the tree into the 2706 * combined data set in the maple subtree state big node. 2707 * @mast: The maple subtree state 2708 */ 2709 static inline void mast_combine_cp_right(struct maple_subtree_state *mast) 2710 { 2711 if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max) 2712 return; 2713 2714 mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1, 2715 mt_slot_count(mast->orig_r->node), mast->bn, 2716 mast->bn->b_end); 2717 mast->orig_r->last = mast->orig_r->max; 2718 } 2719 2720 /* 2721 * mast_sufficient: Check if the maple subtree state has enough data in the big 2722 * node to create at least one sufficient node 2723 * @mast: the maple subtree state 2724 */ 2725 static inline bool mast_sufficient(struct maple_subtree_state *mast) 2726 { 2727 if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node)) 2728 return true; 2729 2730 return false; 2731 } 2732 2733 /* 2734 * mast_overflow: Check if there is too much data in the subtree state for a 2735 * single node. 2736 * @mast: The maple subtree state 2737 */ 2738 static inline bool mast_overflow(struct maple_subtree_state *mast) 2739 { 2740 if (mast->bn->b_end >= mt_slot_count(mast->orig_l->node)) 2741 return true; 2742 2743 return false; 2744 } 2745 2746 static inline void *mtree_range_walk(struct ma_state *mas) 2747 { 2748 unsigned long *pivots; 2749 unsigned char offset; 2750 struct maple_node *node; 2751 struct maple_enode *next, *last; 2752 enum maple_type type; 2753 void __rcu **slots; 2754 unsigned char end; 2755 unsigned long max, min; 2756 unsigned long prev_max, prev_min; 2757 2758 next = mas->node; 2759 min = mas->min; 2760 max = mas->max; 2761 do { 2762 last = next; 2763 node = mte_to_node(next); 2764 type = mte_node_type(next); 2765 pivots = ma_pivots(node, type); 2766 end = ma_data_end(node, type, pivots, max); 2767 prev_min = min; 2768 prev_max = max; 2769 if (pivots[0] >= mas->index) { 2770 offset = 0; 2771 max = pivots[0]; 2772 goto next; 2773 } 2774 2775 offset = 1; 2776 while (offset < end) { 2777 if (pivots[offset] >= mas->index) { 2778 max = pivots[offset]; 2779 break; 2780 } 2781 offset++; 2782 } 2783 2784 min = pivots[offset - 1] + 1; 2785 next: 2786 slots = ma_slots(node, type); 2787 next = mt_slot(mas->tree, slots, offset); 2788 if (unlikely(ma_dead_node(node))) 2789 goto dead_node; 2790 } while (!ma_is_leaf(type)); 2791 2792 mas->end = end; 2793 mas->offset = offset; 2794 mas->index = min; 2795 mas->last = max; 2796 mas->min = prev_min; 2797 mas->max = prev_max; 2798 mas->node = last; 2799 return (void *)next; 2800 2801 dead_node: 2802 mas_reset(mas); 2803 return NULL; 2804 } 2805 2806 /* 2807 * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers. 2808 * @mas: The starting maple state 2809 * @mast: The maple_subtree_state, keeps track of 4 maple states. 2810 * @count: The estimated count of iterations needed. 2811 * 2812 * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root 2813 * is hit. First @b_node is split into two entries which are inserted into the 2814 * next iteration of the loop. @b_node is returned populated with the final 2815 * iteration. @mas is used to obtain allocations. orig_l_mas keeps track of the 2816 * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last 2817 * to account of what has been copied into the new sub-tree. The update of 2818 * orig_l_mas->last is used in mas_consume to find the slots that will need to 2819 * be either freed or destroyed. orig_l_mas->depth keeps track of the height of 2820 * the new sub-tree in case the sub-tree becomes the full tree. 2821 */ 2822 static void mas_spanning_rebalance(struct ma_state *mas, 2823 struct maple_subtree_state *mast, unsigned char count) 2824 { 2825 unsigned char split, mid_split; 2826 unsigned char slot = 0; 2827 struct maple_enode *left = NULL, *middle = NULL, *right = NULL; 2828 struct maple_enode *old_enode; 2829 2830 MA_STATE(l_mas, mas->tree, mas->index, mas->index); 2831 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2832 MA_STATE(m_mas, mas->tree, mas->index, mas->index); 2833 2834 /* 2835 * The tree needs to be rebalanced and leaves need to be kept at the same level. 2836 * Rebalancing is done by use of the ``struct maple_topiary``. 2837 */ 2838 mast->l = &l_mas; 2839 mast->m = &m_mas; 2840 mast->r = &r_mas; 2841 l_mas.status = r_mas.status = m_mas.status = ma_none; 2842 2843 /* Check if this is not root and has sufficient data. */ 2844 if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) && 2845 unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type])) 2846 mast_spanning_rebalance(mast); 2847 2848 l_mas.depth = 0; 2849 2850 /* 2851 * Each level of the tree is examined and balanced, pushing data to the left or 2852 * right, or rebalancing against left or right nodes is employed to avoid 2853 * rippling up the tree to limit the amount of churn. Once a new sub-section of 2854 * the tree is created, there may be a mix of new and old nodes. The old nodes 2855 * will have the incorrect parent pointers and currently be in two trees: the 2856 * original tree and the partially new tree. To remedy the parent pointers in 2857 * the old tree, the new data is swapped into the active tree and a walk down 2858 * the tree is performed and the parent pointers are updated. 2859 * See mas_topiary_replace() for more information. 2860 */ 2861 while (count--) { 2862 mast->bn->b_end--; 2863 mast->bn->type = mte_node_type(mast->orig_l->node); 2864 split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle, 2865 &mid_split); 2866 mast_set_split_parents(mast, left, middle, right, split, 2867 mid_split); 2868 mast_cp_to_nodes(mast, left, middle, right, split, mid_split); 2869 2870 /* 2871 * Copy data from next level in the tree to mast->bn from next 2872 * iteration 2873 */ 2874 memset(mast->bn, 0, sizeof(struct maple_big_node)); 2875 mast->bn->type = mte_node_type(left); 2876 l_mas.depth++; 2877 2878 /* Root already stored in l->node. */ 2879 if (mas_is_root_limits(mast->l)) 2880 goto new_root; 2881 2882 mast_ascend(mast); 2883 mast_combine_cp_left(mast); 2884 l_mas.offset = mast->bn->b_end; 2885 mab_set_b_end(mast->bn, &l_mas, left); 2886 mab_set_b_end(mast->bn, &m_mas, middle); 2887 mab_set_b_end(mast->bn, &r_mas, right); 2888 2889 /* Copy anything necessary out of the right node. */ 2890 mast_combine_cp_right(mast); 2891 mast->orig_l->last = mast->orig_l->max; 2892 2893 if (mast_sufficient(mast)) 2894 continue; 2895 2896 if (mast_overflow(mast)) 2897 continue; 2898 2899 /* May be a new root stored in mast->bn */ 2900 if (mas_is_root_limits(mast->orig_l)) 2901 break; 2902 2903 mast_spanning_rebalance(mast); 2904 2905 /* rebalancing from other nodes may require another loop. */ 2906 if (!count) 2907 count++; 2908 } 2909 2910 l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), 2911 mte_node_type(mast->orig_l->node)); 2912 l_mas.depth++; 2913 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true); 2914 mas_set_parent(mas, left, l_mas.node, slot); 2915 if (middle) 2916 mas_set_parent(mas, middle, l_mas.node, ++slot); 2917 2918 if (right) 2919 mas_set_parent(mas, right, l_mas.node, ++slot); 2920 2921 if (mas_is_root_limits(mast->l)) { 2922 new_root: 2923 mas_mn(mast->l)->parent = ma_parent_ptr(mas_tree_parent(mas)); 2924 while (!mte_is_root(mast->orig_l->node)) 2925 mast_ascend(mast); 2926 } else { 2927 mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent; 2928 } 2929 2930 old_enode = mast->orig_l->node; 2931 mas->depth = l_mas.depth; 2932 mas->node = l_mas.node; 2933 mas->min = l_mas.min; 2934 mas->max = l_mas.max; 2935 mas->offset = l_mas.offset; 2936 mas_wmb_replace(mas, old_enode); 2937 mtree_range_walk(mas); 2938 return; 2939 } 2940 2941 /* 2942 * mas_rebalance() - Rebalance a given node. 2943 * @mas: The maple state 2944 * @b_node: The big maple node. 2945 * 2946 * Rebalance two nodes into a single node or two new nodes that are sufficient. 2947 * Continue upwards until tree is sufficient. 2948 */ 2949 static inline void mas_rebalance(struct ma_state *mas, 2950 struct maple_big_node *b_node) 2951 { 2952 char empty_count = mas_mt_height(mas); 2953 struct maple_subtree_state mast; 2954 unsigned char shift, b_end = ++b_node->b_end; 2955 2956 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 2957 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2958 2959 trace_ma_op(__func__, mas); 2960 2961 /* 2962 * Rebalancing occurs if a node is insufficient. Data is rebalanced 2963 * against the node to the right if it exists, otherwise the node to the 2964 * left of this node is rebalanced against this node. If rebalancing 2965 * causes just one node to be produced instead of two, then the parent 2966 * is also examined and rebalanced if it is insufficient. Every level 2967 * tries to combine the data in the same way. If one node contains the 2968 * entire range of the tree, then that node is used as a new root node. 2969 */ 2970 2971 mast.orig_l = &l_mas; 2972 mast.orig_r = &r_mas; 2973 mast.bn = b_node; 2974 mast.bn->type = mte_node_type(mas->node); 2975 2976 l_mas = r_mas = *mas; 2977 2978 if (mas_next_sibling(&r_mas)) { 2979 mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end); 2980 r_mas.last = r_mas.index = r_mas.max; 2981 } else { 2982 mas_prev_sibling(&l_mas); 2983 shift = mas_data_end(&l_mas) + 1; 2984 mab_shift_right(b_node, shift); 2985 mas->offset += shift; 2986 mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0); 2987 b_node->b_end = shift + b_end; 2988 l_mas.index = l_mas.last = l_mas.min; 2989 } 2990 2991 return mas_spanning_rebalance(mas, &mast, empty_count); 2992 } 2993 2994 /* 2995 * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple 2996 * state. 2997 * @mas: The maple state 2998 * @end: The end of the left-most node. 2999 * 3000 * During a mass-insert event (such as forking), it may be necessary to 3001 * rebalance the left-most node when it is not sufficient. 3002 */ 3003 static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end) 3004 { 3005 enum maple_type mt = mte_node_type(mas->node); 3006 struct maple_node reuse, *newnode, *parent, *new_left, *left, *node; 3007 struct maple_enode *eparent, *old_eparent; 3008 unsigned char offset, tmp, split = mt_slots[mt] / 2; 3009 void __rcu **l_slots, **slots; 3010 unsigned long *l_pivs, *pivs, gap; 3011 bool in_rcu = mt_in_rcu(mas->tree); 3012 3013 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 3014 3015 l_mas = *mas; 3016 mas_prev_sibling(&l_mas); 3017 3018 /* set up node. */ 3019 if (in_rcu) { 3020 newnode = mas_pop_node(mas); 3021 } else { 3022 newnode = &reuse; 3023 } 3024 3025 node = mas_mn(mas); 3026 newnode->parent = node->parent; 3027 slots = ma_slots(newnode, mt); 3028 pivs = ma_pivots(newnode, mt); 3029 left = mas_mn(&l_mas); 3030 l_slots = ma_slots(left, mt); 3031 l_pivs = ma_pivots(left, mt); 3032 if (!l_slots[split]) 3033 split++; 3034 tmp = mas_data_end(&l_mas) - split; 3035 3036 memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp); 3037 memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp); 3038 pivs[tmp] = l_mas.max; 3039 memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end); 3040 memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end); 3041 3042 l_mas.max = l_pivs[split]; 3043 mas->min = l_mas.max + 1; 3044 old_eparent = mt_mk_node(mte_parent(l_mas.node), 3045 mas_parent_type(&l_mas, l_mas.node)); 3046 tmp += end; 3047 if (!in_rcu) { 3048 unsigned char max_p = mt_pivots[mt]; 3049 unsigned char max_s = mt_slots[mt]; 3050 3051 if (tmp < max_p) 3052 memset(pivs + tmp, 0, 3053 sizeof(unsigned long) * (max_p - tmp)); 3054 3055 if (tmp < mt_slots[mt]) 3056 memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp)); 3057 3058 memcpy(node, newnode, sizeof(struct maple_node)); 3059 ma_set_meta(node, mt, 0, tmp - 1); 3060 mte_set_pivot(old_eparent, mte_parent_slot(l_mas.node), 3061 l_pivs[split]); 3062 3063 /* Remove data from l_pivs. */ 3064 tmp = split + 1; 3065 memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp)); 3066 memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp)); 3067 ma_set_meta(left, mt, 0, split); 3068 eparent = old_eparent; 3069 3070 goto done; 3071 } 3072 3073 /* RCU requires replacing both l_mas, mas, and parent. */ 3074 mas->node = mt_mk_node(newnode, mt); 3075 ma_set_meta(newnode, mt, 0, tmp); 3076 3077 new_left = mas_pop_node(mas); 3078 new_left->parent = left->parent; 3079 mt = mte_node_type(l_mas.node); 3080 slots = ma_slots(new_left, mt); 3081 pivs = ma_pivots(new_left, mt); 3082 memcpy(slots, l_slots, sizeof(void *) * split); 3083 memcpy(pivs, l_pivs, sizeof(unsigned long) * split); 3084 ma_set_meta(new_left, mt, 0, split); 3085 l_mas.node = mt_mk_node(new_left, mt); 3086 3087 /* replace parent. */ 3088 offset = mte_parent_slot(mas->node); 3089 mt = mas_parent_type(&l_mas, l_mas.node); 3090 parent = mas_pop_node(mas); 3091 slots = ma_slots(parent, mt); 3092 pivs = ma_pivots(parent, mt); 3093 memcpy(parent, mte_to_node(old_eparent), sizeof(struct maple_node)); 3094 rcu_assign_pointer(slots[offset], mas->node); 3095 rcu_assign_pointer(slots[offset - 1], l_mas.node); 3096 pivs[offset - 1] = l_mas.max; 3097 eparent = mt_mk_node(parent, mt); 3098 done: 3099 gap = mas_leaf_max_gap(mas); 3100 mte_set_gap(eparent, mte_parent_slot(mas->node), gap); 3101 gap = mas_leaf_max_gap(&l_mas); 3102 mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap); 3103 mas_ascend(mas); 3104 3105 if (in_rcu) { 3106 mas_replace_node(mas, old_eparent); 3107 mas_adopt_children(mas, mas->node); 3108 } 3109 3110 mas_update_gap(mas); 3111 } 3112 3113 /* 3114 * mas_split_final_node() - Split the final node in a subtree operation. 3115 * @mast: the maple subtree state 3116 * @mas: The maple state 3117 * @height: The height of the tree in case it's a new root. 3118 */ 3119 static inline void mas_split_final_node(struct maple_subtree_state *mast, 3120 struct ma_state *mas, int height) 3121 { 3122 struct maple_enode *ancestor; 3123 3124 if (mte_is_root(mas->node)) { 3125 if (mt_is_alloc(mas->tree)) 3126 mast->bn->type = maple_arange_64; 3127 else 3128 mast->bn->type = maple_range_64; 3129 mas->depth = height; 3130 } 3131 /* 3132 * Only a single node is used here, could be root. 3133 * The Big_node data should just fit in a single node. 3134 */ 3135 ancestor = mas_new_ma_node(mas, mast->bn); 3136 mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset); 3137 mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset); 3138 mte_to_node(ancestor)->parent = mas_mn(mas)->parent; 3139 3140 mast->l->node = ancestor; 3141 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true); 3142 mas->offset = mast->bn->b_end - 1; 3143 } 3144 3145 /* 3146 * mast_fill_bnode() - Copy data into the big node in the subtree state 3147 * @mast: The maple subtree state 3148 * @mas: the maple state 3149 * @skip: The number of entries to skip for new nodes insertion. 3150 */ 3151 static inline void mast_fill_bnode(struct maple_subtree_state *mast, 3152 struct ma_state *mas, 3153 unsigned char skip) 3154 { 3155 bool cp = true; 3156 unsigned char split; 3157 3158 memset(mast->bn, 0, sizeof(struct maple_big_node)); 3159 3160 if (mte_is_root(mas->node)) { 3161 cp = false; 3162 } else { 3163 mas_ascend(mas); 3164 mas->offset = mte_parent_slot(mas->node); 3165 } 3166 3167 if (cp && mast->l->offset) 3168 mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0); 3169 3170 split = mast->bn->b_end; 3171 mab_set_b_end(mast->bn, mast->l, mast->l->node); 3172 mast->r->offset = mast->bn->b_end; 3173 mab_set_b_end(mast->bn, mast->r, mast->r->node); 3174 if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max) 3175 cp = false; 3176 3177 if (cp) 3178 mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1, 3179 mast->bn, mast->bn->b_end); 3180 3181 mast->bn->b_end--; 3182 mast->bn->type = mte_node_type(mas->node); 3183 } 3184 3185 /* 3186 * mast_split_data() - Split the data in the subtree state big node into regular 3187 * nodes. 3188 * @mast: The maple subtree state 3189 * @mas: The maple state 3190 * @split: The location to split the big node 3191 */ 3192 static inline void mast_split_data(struct maple_subtree_state *mast, 3193 struct ma_state *mas, unsigned char split) 3194 { 3195 unsigned char p_slot; 3196 3197 mab_mas_cp(mast->bn, 0, split, mast->l, true); 3198 mte_set_pivot(mast->r->node, 0, mast->r->max); 3199 mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false); 3200 mast->l->offset = mte_parent_slot(mas->node); 3201 mast->l->max = mast->bn->pivot[split]; 3202 mast->r->min = mast->l->max + 1; 3203 if (mte_is_leaf(mas->node)) 3204 return; 3205 3206 p_slot = mast->orig_l->offset; 3207 mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node, 3208 &p_slot, split); 3209 mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node, 3210 &p_slot, split); 3211 } 3212 3213 /* 3214 * mas_push_data() - Instead of splitting a node, it is beneficial to push the 3215 * data to the right or left node if there is room. 3216 * @mas: The maple state 3217 * @height: The current height of the maple state 3218 * @mast: The maple subtree state 3219 * @left: Push left or not. 3220 * 3221 * Keeping the height of the tree low means faster lookups. 3222 * 3223 * Return: True if pushed, false otherwise. 3224 */ 3225 static inline bool mas_push_data(struct ma_state *mas, int height, 3226 struct maple_subtree_state *mast, bool left) 3227 { 3228 unsigned char slot_total = mast->bn->b_end; 3229 unsigned char end, space, split; 3230 3231 MA_STATE(tmp_mas, mas->tree, mas->index, mas->last); 3232 tmp_mas = *mas; 3233 tmp_mas.depth = mast->l->depth; 3234 3235 if (left && !mas_prev_sibling(&tmp_mas)) 3236 return false; 3237 else if (!left && !mas_next_sibling(&tmp_mas)) 3238 return false; 3239 3240 end = mas_data_end(&tmp_mas); 3241 slot_total += end; 3242 space = 2 * mt_slot_count(mas->node) - 2; 3243 /* -2 instead of -1 to ensure there isn't a triple split */ 3244 if (ma_is_leaf(mast->bn->type)) 3245 space--; 3246 3247 if (mas->max == ULONG_MAX) 3248 space--; 3249 3250 if (slot_total >= space) 3251 return false; 3252 3253 /* Get the data; Fill mast->bn */ 3254 mast->bn->b_end++; 3255 if (left) { 3256 mab_shift_right(mast->bn, end + 1); 3257 mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0); 3258 mast->bn->b_end = slot_total + 1; 3259 } else { 3260 mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end); 3261 } 3262 3263 /* Configure mast for splitting of mast->bn */ 3264 split = mt_slots[mast->bn->type] - 2; 3265 if (left) { 3266 /* Switch mas to prev node */ 3267 *mas = tmp_mas; 3268 /* Start using mast->l for the left side. */ 3269 tmp_mas.node = mast->l->node; 3270 *mast->l = tmp_mas; 3271 } else { 3272 tmp_mas.node = mast->r->node; 3273 *mast->r = tmp_mas; 3274 split = slot_total - split; 3275 } 3276 split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]); 3277 /* Update parent slot for split calculation. */ 3278 if (left) 3279 mast->orig_l->offset += end + 1; 3280 3281 mast_split_data(mast, mas, split); 3282 mast_fill_bnode(mast, mas, 2); 3283 mas_split_final_node(mast, mas, height + 1); 3284 return true; 3285 } 3286 3287 /* 3288 * mas_split() - Split data that is too big for one node into two. 3289 * @mas: The maple state 3290 * @b_node: The maple big node 3291 */ 3292 static void mas_split(struct ma_state *mas, struct maple_big_node *b_node) 3293 { 3294 struct maple_subtree_state mast; 3295 int height = 0; 3296 unsigned char mid_split, split = 0; 3297 struct maple_enode *old; 3298 3299 /* 3300 * Splitting is handled differently from any other B-tree; the Maple 3301 * Tree splits upwards. Splitting up means that the split operation 3302 * occurs when the walk of the tree hits the leaves and not on the way 3303 * down. The reason for splitting up is that it is impossible to know 3304 * how much space will be needed until the leaf is (or leaves are) 3305 * reached. Since overwriting data is allowed and a range could 3306 * overwrite more than one range or result in changing one entry into 3 3307 * entries, it is impossible to know if a split is required until the 3308 * data is examined. 3309 * 3310 * Splitting is a balancing act between keeping allocations to a minimum 3311 * and avoiding a 'jitter' event where a tree is expanded to make room 3312 * for an entry followed by a contraction when the entry is removed. To 3313 * accomplish the balance, there are empty slots remaining in both left 3314 * and right nodes after a split. 3315 */ 3316 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 3317 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 3318 MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last); 3319 MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last); 3320 3321 trace_ma_op(__func__, mas); 3322 mas->depth = mas_mt_height(mas); 3323 3324 mast.l = &l_mas; 3325 mast.r = &r_mas; 3326 mast.orig_l = &prev_l_mas; 3327 mast.orig_r = &prev_r_mas; 3328 mast.bn = b_node; 3329 3330 while (height++ <= mas->depth) { 3331 if (mt_slots[b_node->type] > b_node->b_end) { 3332 mas_split_final_node(&mast, mas, height); 3333 break; 3334 } 3335 3336 l_mas = r_mas = *mas; 3337 l_mas.node = mas_new_ma_node(mas, b_node); 3338 r_mas.node = mas_new_ma_node(mas, b_node); 3339 /* 3340 * Another way that 'jitter' is avoided is to terminate a split up early if the 3341 * left or right node has space to spare. This is referred to as "pushing left" 3342 * or "pushing right" and is similar to the B* tree, except the nodes left or 3343 * right can rarely be reused due to RCU, but the ripple upwards is halted which 3344 * is a significant savings. 3345 */ 3346 /* Try to push left. */ 3347 if (mas_push_data(mas, height, &mast, true)) 3348 break; 3349 /* Try to push right. */ 3350 if (mas_push_data(mas, height, &mast, false)) 3351 break; 3352 3353 split = mab_calc_split(mas, b_node, &mid_split); 3354 mast_split_data(&mast, mas, split); 3355 /* 3356 * Usually correct, mab_mas_cp in the above call overwrites 3357 * r->max. 3358 */ 3359 mast.r->max = mas->max; 3360 mast_fill_bnode(&mast, mas, 1); 3361 prev_l_mas = *mast.l; 3362 prev_r_mas = *mast.r; 3363 } 3364 3365 /* Set the original node as dead */ 3366 old = mas->node; 3367 mas->node = l_mas.node; 3368 mas_wmb_replace(mas, old); 3369 mtree_range_walk(mas); 3370 return; 3371 } 3372 3373 /* 3374 * mas_commit_b_node() - Commit the big node into the tree. 3375 * @wr_mas: The maple write state 3376 * @b_node: The maple big node 3377 */ 3378 static noinline_for_kasan void mas_commit_b_node(struct ma_wr_state *wr_mas, 3379 struct maple_big_node *b_node) 3380 { 3381 enum store_type type = wr_mas->mas->store_type; 3382 3383 WARN_ON_ONCE(type != wr_rebalance && type != wr_split_store); 3384 3385 if (type == wr_rebalance) 3386 return mas_rebalance(wr_mas->mas, b_node); 3387 3388 return mas_split(wr_mas->mas, b_node); 3389 } 3390 3391 /* 3392 * mas_root_expand() - Expand a root to a node 3393 * @mas: The maple state 3394 * @entry: The entry to store into the tree 3395 */ 3396 static inline void mas_root_expand(struct ma_state *mas, void *entry) 3397 { 3398 void *contents = mas_root_locked(mas); 3399 enum maple_type type = maple_leaf_64; 3400 struct maple_node *node; 3401 void __rcu **slots; 3402 unsigned long *pivots; 3403 int slot = 0; 3404 3405 node = mas_pop_node(mas); 3406 pivots = ma_pivots(node, type); 3407 slots = ma_slots(node, type); 3408 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3409 mas->node = mt_mk_node(node, type); 3410 mas->status = ma_active; 3411 3412 if (mas->index) { 3413 if (contents) { 3414 rcu_assign_pointer(slots[slot], contents); 3415 if (likely(mas->index > 1)) 3416 slot++; 3417 } 3418 pivots[slot++] = mas->index - 1; 3419 } 3420 3421 rcu_assign_pointer(slots[slot], entry); 3422 mas->offset = slot; 3423 pivots[slot] = mas->last; 3424 if (mas->last != ULONG_MAX) 3425 pivots[++slot] = ULONG_MAX; 3426 3427 mas->depth = 1; 3428 mas_set_height(mas); 3429 ma_set_meta(node, maple_leaf_64, 0, slot); 3430 /* swap the new root into the tree */ 3431 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3432 return; 3433 } 3434 3435 /* 3436 * mas_store_root() - Storing value into root. 3437 * @mas: The maple state 3438 * @entry: The entry to store. 3439 * 3440 * There is no root node now and we are storing a value into the root - this 3441 * function either assigns the pointer or expands into a node. 3442 */ 3443 static inline void mas_store_root(struct ma_state *mas, void *entry) 3444 { 3445 if (!entry) { 3446 if (!mas->index) 3447 rcu_assign_pointer(mas->tree->ma_root, NULL); 3448 } else if (likely((mas->last != 0) || (mas->index != 0))) 3449 mas_root_expand(mas, entry); 3450 else if (((unsigned long) (entry) & 3) == 2) 3451 mas_root_expand(mas, entry); 3452 else { 3453 rcu_assign_pointer(mas->tree->ma_root, entry); 3454 mas->status = ma_start; 3455 } 3456 } 3457 3458 /* 3459 * mas_is_span_wr() - Check if the write needs to be treated as a write that 3460 * spans the node. 3461 * @wr_mas: The maple write state 3462 * 3463 * Spanning writes are writes that start in one node and end in another OR if 3464 * the write of a %NULL will cause the node to end with a %NULL. 3465 * 3466 * Return: True if this is a spanning write, false otherwise. 3467 */ 3468 static bool mas_is_span_wr(struct ma_wr_state *wr_mas) 3469 { 3470 unsigned long max = wr_mas->r_max; 3471 unsigned long last = wr_mas->mas->last; 3472 enum maple_type type = wr_mas->type; 3473 void *entry = wr_mas->entry; 3474 3475 /* Contained in this pivot, fast path */ 3476 if (last < max) 3477 return false; 3478 3479 if (ma_is_leaf(type)) { 3480 max = wr_mas->mas->max; 3481 if (last < max) 3482 return false; 3483 } 3484 3485 if (last == max) { 3486 /* 3487 * The last entry of leaf node cannot be NULL unless it is the 3488 * rightmost node (writing ULONG_MAX), otherwise it spans slots. 3489 */ 3490 if (entry || last == ULONG_MAX) 3491 return false; 3492 } 3493 3494 trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry); 3495 return true; 3496 } 3497 3498 static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas) 3499 { 3500 wr_mas->type = mte_node_type(wr_mas->mas->node); 3501 mas_wr_node_walk(wr_mas); 3502 wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type); 3503 } 3504 3505 static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas) 3506 { 3507 wr_mas->mas->max = wr_mas->r_max; 3508 wr_mas->mas->min = wr_mas->r_min; 3509 wr_mas->mas->node = wr_mas->content; 3510 wr_mas->mas->offset = 0; 3511 wr_mas->mas->depth++; 3512 } 3513 /* 3514 * mas_wr_walk() - Walk the tree for a write. 3515 * @wr_mas: The maple write state 3516 * 3517 * Uses mas_slot_locked() and does not need to worry about dead nodes. 3518 * 3519 * Return: True if it's contained in a node, false on spanning write. 3520 */ 3521 static bool mas_wr_walk(struct ma_wr_state *wr_mas) 3522 { 3523 struct ma_state *mas = wr_mas->mas; 3524 3525 while (true) { 3526 mas_wr_walk_descend(wr_mas); 3527 if (unlikely(mas_is_span_wr(wr_mas))) 3528 return false; 3529 3530 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3531 mas->offset); 3532 if (ma_is_leaf(wr_mas->type)) 3533 return true; 3534 3535 mas_wr_walk_traverse(wr_mas); 3536 } 3537 3538 return true; 3539 } 3540 3541 static void mas_wr_walk_index(struct ma_wr_state *wr_mas) 3542 { 3543 struct ma_state *mas = wr_mas->mas; 3544 3545 while (true) { 3546 mas_wr_walk_descend(wr_mas); 3547 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3548 mas->offset); 3549 if (ma_is_leaf(wr_mas->type)) 3550 return; 3551 mas_wr_walk_traverse(wr_mas); 3552 } 3553 } 3554 /* 3555 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs. 3556 * @l_wr_mas: The left maple write state 3557 * @r_wr_mas: The right maple write state 3558 */ 3559 static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas, 3560 struct ma_wr_state *r_wr_mas) 3561 { 3562 struct ma_state *r_mas = r_wr_mas->mas; 3563 struct ma_state *l_mas = l_wr_mas->mas; 3564 unsigned char l_slot; 3565 3566 l_slot = l_mas->offset; 3567 if (!l_wr_mas->content) 3568 l_mas->index = l_wr_mas->r_min; 3569 3570 if ((l_mas->index == l_wr_mas->r_min) && 3571 (l_slot && 3572 !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) { 3573 if (l_slot > 1) 3574 l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1; 3575 else 3576 l_mas->index = l_mas->min; 3577 3578 l_mas->offset = l_slot - 1; 3579 } 3580 3581 if (!r_wr_mas->content) { 3582 if (r_mas->last < r_wr_mas->r_max) 3583 r_mas->last = r_wr_mas->r_max; 3584 r_mas->offset++; 3585 } else if ((r_mas->last == r_wr_mas->r_max) && 3586 (r_mas->last < r_mas->max) && 3587 !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) { 3588 r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots, 3589 r_wr_mas->type, r_mas->offset + 1); 3590 r_mas->offset++; 3591 } 3592 } 3593 3594 static inline void *mas_state_walk(struct ma_state *mas) 3595 { 3596 void *entry; 3597 3598 entry = mas_start(mas); 3599 if (mas_is_none(mas)) 3600 return NULL; 3601 3602 if (mas_is_ptr(mas)) 3603 return entry; 3604 3605 return mtree_range_walk(mas); 3606 } 3607 3608 /* 3609 * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up 3610 * to date. 3611 * 3612 * @mas: The maple state. 3613 * 3614 * Note: Leaves mas in undesirable state. 3615 * Return: The entry for @mas->index or %NULL on dead node. 3616 */ 3617 static inline void *mtree_lookup_walk(struct ma_state *mas) 3618 { 3619 unsigned long *pivots; 3620 unsigned char offset; 3621 struct maple_node *node; 3622 struct maple_enode *next; 3623 enum maple_type type; 3624 void __rcu **slots; 3625 unsigned char end; 3626 3627 next = mas->node; 3628 do { 3629 node = mte_to_node(next); 3630 type = mte_node_type(next); 3631 pivots = ma_pivots(node, type); 3632 end = mt_pivots[type]; 3633 offset = 0; 3634 do { 3635 if (pivots[offset] >= mas->index) 3636 break; 3637 } while (++offset < end); 3638 3639 slots = ma_slots(node, type); 3640 next = mt_slot(mas->tree, slots, offset); 3641 if (unlikely(ma_dead_node(node))) 3642 goto dead_node; 3643 } while (!ma_is_leaf(type)); 3644 3645 return (void *)next; 3646 3647 dead_node: 3648 mas_reset(mas); 3649 return NULL; 3650 } 3651 3652 static void mte_destroy_walk(struct maple_enode *, struct maple_tree *); 3653 /* 3654 * mas_new_root() - Create a new root node that only contains the entry passed 3655 * in. 3656 * @mas: The maple state 3657 * @entry: The entry to store. 3658 * 3659 * Only valid when the index == 0 and the last == ULONG_MAX 3660 */ 3661 static inline void mas_new_root(struct ma_state *mas, void *entry) 3662 { 3663 struct maple_enode *root = mas_root_locked(mas); 3664 enum maple_type type = maple_leaf_64; 3665 struct maple_node *node; 3666 void __rcu **slots; 3667 unsigned long *pivots; 3668 3669 WARN_ON_ONCE(mas->index || mas->last != ULONG_MAX); 3670 3671 if (!entry) { 3672 mas->depth = 0; 3673 mas_set_height(mas); 3674 rcu_assign_pointer(mas->tree->ma_root, entry); 3675 mas->status = ma_start; 3676 goto done; 3677 } 3678 3679 node = mas_pop_node(mas); 3680 pivots = ma_pivots(node, type); 3681 slots = ma_slots(node, type); 3682 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3683 mas->node = mt_mk_node(node, type); 3684 mas->status = ma_active; 3685 rcu_assign_pointer(slots[0], entry); 3686 pivots[0] = mas->last; 3687 mas->depth = 1; 3688 mas_set_height(mas); 3689 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3690 3691 done: 3692 if (xa_is_node(root)) 3693 mte_destroy_walk(root, mas->tree); 3694 3695 return; 3696 } 3697 /* 3698 * mas_wr_spanning_store() - Create a subtree with the store operation completed 3699 * and new nodes where necessary, then place the sub-tree in the actual tree. 3700 * Note that mas is expected to point to the node which caused the store to 3701 * span. 3702 * @wr_mas: The maple write state 3703 */ 3704 static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas) 3705 { 3706 struct maple_subtree_state mast; 3707 struct maple_big_node b_node; 3708 struct ma_state *mas; 3709 unsigned char height; 3710 3711 /* Left and Right side of spanning store */ 3712 MA_STATE(l_mas, NULL, 0, 0); 3713 MA_STATE(r_mas, NULL, 0, 0); 3714 MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry); 3715 MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry); 3716 3717 /* 3718 * A store operation that spans multiple nodes is called a spanning 3719 * store and is handled early in the store call stack by the function 3720 * mas_is_span_wr(). When a spanning store is identified, the maple 3721 * state is duplicated. The first maple state walks the left tree path 3722 * to ``index``, the duplicate walks the right tree path to ``last``. 3723 * The data in the two nodes are combined into a single node, two nodes, 3724 * or possibly three nodes (see the 3-way split above). A ``NULL`` 3725 * written to the last entry of a node is considered a spanning store as 3726 * a rebalance is required for the operation to complete and an overflow 3727 * of data may happen. 3728 */ 3729 mas = wr_mas->mas; 3730 trace_ma_op(__func__, mas); 3731 3732 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 3733 return mas_new_root(mas, wr_mas->entry); 3734 /* 3735 * Node rebalancing may occur due to this store, so there may be three new 3736 * entries per level plus a new root. 3737 */ 3738 height = mas_mt_height(mas); 3739 3740 /* 3741 * Set up right side. Need to get to the next offset after the spanning 3742 * store to ensure it's not NULL and to combine both the next node and 3743 * the node with the start together. 3744 */ 3745 r_mas = *mas; 3746 /* Avoid overflow, walk to next slot in the tree. */ 3747 if (r_mas.last + 1) 3748 r_mas.last++; 3749 3750 r_mas.index = r_mas.last; 3751 mas_wr_walk_index(&r_wr_mas); 3752 r_mas.last = r_mas.index = mas->last; 3753 3754 /* Set up left side. */ 3755 l_mas = *mas; 3756 mas_wr_walk_index(&l_wr_mas); 3757 3758 if (!wr_mas->entry) { 3759 mas_extend_spanning_null(&l_wr_mas, &r_wr_mas); 3760 mas->offset = l_mas.offset; 3761 mas->index = l_mas.index; 3762 mas->last = l_mas.last = r_mas.last; 3763 } 3764 3765 /* expanding NULLs may make this cover the entire range */ 3766 if (!l_mas.index && r_mas.last == ULONG_MAX) { 3767 mas_set_range(mas, 0, ULONG_MAX); 3768 return mas_new_root(mas, wr_mas->entry); 3769 } 3770 3771 memset(&b_node, 0, sizeof(struct maple_big_node)); 3772 /* Copy l_mas and store the value in b_node. */ 3773 mas_store_b_node(&l_wr_mas, &b_node, l_mas.end); 3774 /* Copy r_mas into b_node if there is anything to copy. */ 3775 if (r_mas.max > r_mas.last) 3776 mas_mab_cp(&r_mas, r_mas.offset, r_mas.end, 3777 &b_node, b_node.b_end + 1); 3778 else 3779 b_node.b_end++; 3780 3781 /* Stop spanning searches by searching for just index. */ 3782 l_mas.index = l_mas.last = mas->index; 3783 3784 mast.bn = &b_node; 3785 mast.orig_l = &l_mas; 3786 mast.orig_r = &r_mas; 3787 /* Combine l_mas and r_mas and split them up evenly again. */ 3788 return mas_spanning_rebalance(mas, &mast, height + 1); 3789 } 3790 3791 /* 3792 * mas_wr_node_store() - Attempt to store the value in a node 3793 * @wr_mas: The maple write state 3794 * 3795 * Attempts to reuse the node, but may allocate. 3796 */ 3797 static inline void mas_wr_node_store(struct ma_wr_state *wr_mas, 3798 unsigned char new_end) 3799 { 3800 struct ma_state *mas = wr_mas->mas; 3801 void __rcu **dst_slots; 3802 unsigned long *dst_pivots; 3803 unsigned char dst_offset, offset_end = wr_mas->offset_end; 3804 struct maple_node reuse, *newnode; 3805 unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type]; 3806 bool in_rcu = mt_in_rcu(mas->tree); 3807 3808 if (mas->last == wr_mas->end_piv) 3809 offset_end++; /* don't copy this offset */ 3810 else if (unlikely(wr_mas->r_max == ULONG_MAX)) 3811 mas_bulk_rebalance(mas, mas->end, wr_mas->type); 3812 3813 /* set up node. */ 3814 if (in_rcu) { 3815 newnode = mas_pop_node(mas); 3816 } else { 3817 memset(&reuse, 0, sizeof(struct maple_node)); 3818 newnode = &reuse; 3819 } 3820 3821 newnode->parent = mas_mn(mas)->parent; 3822 dst_pivots = ma_pivots(newnode, wr_mas->type); 3823 dst_slots = ma_slots(newnode, wr_mas->type); 3824 /* Copy from start to insert point */ 3825 memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset); 3826 memcpy(dst_slots, wr_mas->slots, sizeof(void *) * mas->offset); 3827 3828 /* Handle insert of new range starting after old range */ 3829 if (wr_mas->r_min < mas->index) { 3830 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content); 3831 dst_pivots[mas->offset++] = mas->index - 1; 3832 } 3833 3834 /* Store the new entry and range end. */ 3835 if (mas->offset < node_pivots) 3836 dst_pivots[mas->offset] = mas->last; 3837 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry); 3838 3839 /* 3840 * this range wrote to the end of the node or it overwrote the rest of 3841 * the data 3842 */ 3843 if (offset_end > mas->end) 3844 goto done; 3845 3846 dst_offset = mas->offset + 1; 3847 /* Copy to the end of node if necessary. */ 3848 copy_size = mas->end - offset_end + 1; 3849 memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end, 3850 sizeof(void *) * copy_size); 3851 memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end, 3852 sizeof(unsigned long) * (copy_size - 1)); 3853 3854 if (new_end < node_pivots) 3855 dst_pivots[new_end] = mas->max; 3856 3857 done: 3858 mas_leaf_set_meta(newnode, maple_leaf_64, new_end); 3859 if (in_rcu) { 3860 struct maple_enode *old_enode = mas->node; 3861 3862 mas->node = mt_mk_node(newnode, wr_mas->type); 3863 mas_replace_node(mas, old_enode); 3864 } else { 3865 memcpy(wr_mas->node, newnode, sizeof(struct maple_node)); 3866 } 3867 trace_ma_write(__func__, mas, 0, wr_mas->entry); 3868 mas_update_gap(mas); 3869 mas->end = new_end; 3870 return; 3871 } 3872 3873 /* 3874 * mas_wr_slot_store: Attempt to store a value in a slot. 3875 * @wr_mas: the maple write state 3876 */ 3877 static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas) 3878 { 3879 struct ma_state *mas = wr_mas->mas; 3880 unsigned char offset = mas->offset; 3881 void __rcu **slots = wr_mas->slots; 3882 bool gap = false; 3883 3884 gap |= !mt_slot_locked(mas->tree, slots, offset); 3885 gap |= !mt_slot_locked(mas->tree, slots, offset + 1); 3886 3887 if (wr_mas->offset_end - offset == 1) { 3888 if (mas->index == wr_mas->r_min) { 3889 /* Overwriting the range and a part of the next one */ 3890 rcu_assign_pointer(slots[offset], wr_mas->entry); 3891 wr_mas->pivots[offset] = mas->last; 3892 } else { 3893 /* Overwriting a part of the range and the next one */ 3894 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 3895 wr_mas->pivots[offset] = mas->index - 1; 3896 mas->offset++; /* Keep mas accurate. */ 3897 } 3898 } else { 3899 WARN_ON_ONCE(mt_in_rcu(mas->tree)); 3900 /* 3901 * Expand the range, only partially overwriting the previous and 3902 * next ranges 3903 */ 3904 gap |= !mt_slot_locked(mas->tree, slots, offset + 2); 3905 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 3906 wr_mas->pivots[offset] = mas->index - 1; 3907 wr_mas->pivots[offset + 1] = mas->last; 3908 mas->offset++; /* Keep mas accurate. */ 3909 } 3910 3911 trace_ma_write(__func__, mas, 0, wr_mas->entry); 3912 /* 3913 * Only update gap when the new entry is empty or there is an empty 3914 * entry in the original two ranges. 3915 */ 3916 if (!wr_mas->entry || gap) 3917 mas_update_gap(mas); 3918 3919 return; 3920 } 3921 3922 static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas) 3923 { 3924 struct ma_state *mas = wr_mas->mas; 3925 3926 if (!wr_mas->slots[wr_mas->offset_end]) { 3927 /* If this one is null, the next and prev are not */ 3928 mas->last = wr_mas->end_piv; 3929 } else { 3930 /* Check next slot(s) if we are overwriting the end */ 3931 if ((mas->last == wr_mas->end_piv) && 3932 (mas->end != wr_mas->offset_end) && 3933 !wr_mas->slots[wr_mas->offset_end + 1]) { 3934 wr_mas->offset_end++; 3935 if (wr_mas->offset_end == mas->end) 3936 mas->last = mas->max; 3937 else 3938 mas->last = wr_mas->pivots[wr_mas->offset_end]; 3939 wr_mas->end_piv = mas->last; 3940 } 3941 } 3942 3943 if (!wr_mas->content) { 3944 /* If this one is null, the next and prev are not */ 3945 mas->index = wr_mas->r_min; 3946 } else { 3947 /* Check prev slot if we are overwriting the start */ 3948 if (mas->index == wr_mas->r_min && mas->offset && 3949 !wr_mas->slots[mas->offset - 1]) { 3950 mas->offset--; 3951 wr_mas->r_min = mas->index = 3952 mas_safe_min(mas, wr_mas->pivots, mas->offset); 3953 wr_mas->r_max = wr_mas->pivots[mas->offset]; 3954 } 3955 } 3956 } 3957 3958 static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas) 3959 { 3960 while ((wr_mas->offset_end < wr_mas->mas->end) && 3961 (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end])) 3962 wr_mas->offset_end++; 3963 3964 if (wr_mas->offset_end < wr_mas->mas->end) 3965 wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end]; 3966 else 3967 wr_mas->end_piv = wr_mas->mas->max; 3968 } 3969 3970 static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas) 3971 { 3972 struct ma_state *mas = wr_mas->mas; 3973 unsigned char new_end = mas->end + 2; 3974 3975 new_end -= wr_mas->offset_end - mas->offset; 3976 if (wr_mas->r_min == mas->index) 3977 new_end--; 3978 3979 if (wr_mas->end_piv == mas->last) 3980 new_end--; 3981 3982 return new_end; 3983 } 3984 3985 /* 3986 * mas_wr_append: Attempt to append 3987 * @wr_mas: the maple write state 3988 * @new_end: The end of the node after the modification 3989 * 3990 * This is currently unsafe in rcu mode since the end of the node may be cached 3991 * by readers while the node contents may be updated which could result in 3992 * inaccurate information. 3993 */ 3994 static inline void mas_wr_append(struct ma_wr_state *wr_mas, 3995 unsigned char new_end) 3996 { 3997 struct ma_state *mas = wr_mas->mas; 3998 void __rcu **slots; 3999 unsigned char end = mas->end; 4000 4001 if (new_end < mt_pivots[wr_mas->type]) { 4002 wr_mas->pivots[new_end] = wr_mas->pivots[end]; 4003 ma_set_meta(wr_mas->node, wr_mas->type, 0, new_end); 4004 } 4005 4006 slots = wr_mas->slots; 4007 if (new_end == end + 1) { 4008 if (mas->last == wr_mas->r_max) { 4009 /* Append to end of range */ 4010 rcu_assign_pointer(slots[new_end], wr_mas->entry); 4011 wr_mas->pivots[end] = mas->index - 1; 4012 mas->offset = new_end; 4013 } else { 4014 /* Append to start of range */ 4015 rcu_assign_pointer(slots[new_end], wr_mas->content); 4016 wr_mas->pivots[end] = mas->last; 4017 rcu_assign_pointer(slots[end], wr_mas->entry); 4018 } 4019 } else { 4020 /* Append to the range without touching any boundaries. */ 4021 rcu_assign_pointer(slots[new_end], wr_mas->content); 4022 wr_mas->pivots[end + 1] = mas->last; 4023 rcu_assign_pointer(slots[end + 1], wr_mas->entry); 4024 wr_mas->pivots[end] = mas->index - 1; 4025 mas->offset = end + 1; 4026 } 4027 4028 if (!wr_mas->content || !wr_mas->entry) 4029 mas_update_gap(mas); 4030 4031 mas->end = new_end; 4032 trace_ma_write(__func__, mas, new_end, wr_mas->entry); 4033 return; 4034 } 4035 4036 /* 4037 * mas_wr_bnode() - Slow path for a modification. 4038 * @wr_mas: The write maple state 4039 * 4040 * This is where split, rebalance end up. 4041 */ 4042 static void mas_wr_bnode(struct ma_wr_state *wr_mas) 4043 { 4044 struct maple_big_node b_node; 4045 4046 trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry); 4047 memset(&b_node, 0, sizeof(struct maple_big_node)); 4048 mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end); 4049 mas_commit_b_node(wr_mas, &b_node); 4050 } 4051 4052 /* 4053 * mas_wr_store_entry() - Internal call to store a value 4054 * @wr_mas: The maple write state 4055 */ 4056 static inline void mas_wr_store_entry(struct ma_wr_state *wr_mas) 4057 { 4058 struct ma_state *mas = wr_mas->mas; 4059 unsigned char new_end = mas_wr_new_end(wr_mas); 4060 4061 switch (mas->store_type) { 4062 case wr_invalid: 4063 MT_BUG_ON(mas->tree, 1); 4064 return; 4065 case wr_new_root: 4066 mas_new_root(mas, wr_mas->entry); 4067 break; 4068 case wr_store_root: 4069 mas_store_root(mas, wr_mas->entry); 4070 break; 4071 case wr_exact_fit: 4072 rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry); 4073 if (!!wr_mas->entry ^ !!wr_mas->content) 4074 mas_update_gap(mas); 4075 break; 4076 case wr_append: 4077 mas_wr_append(wr_mas, new_end); 4078 break; 4079 case wr_slot_store: 4080 mas_wr_slot_store(wr_mas); 4081 break; 4082 case wr_node_store: 4083 mas_wr_node_store(wr_mas, new_end); 4084 break; 4085 case wr_spanning_store: 4086 mas_wr_spanning_store(wr_mas); 4087 break; 4088 case wr_split_store: 4089 case wr_rebalance: 4090 mas_wr_bnode(wr_mas); 4091 break; 4092 } 4093 4094 return; 4095 } 4096 4097 static inline void mas_wr_prealloc_setup(struct ma_wr_state *wr_mas) 4098 { 4099 struct ma_state *mas = wr_mas->mas; 4100 4101 if (!mas_is_active(mas)) { 4102 if (mas_is_start(mas)) 4103 goto set_content; 4104 4105 if (unlikely(mas_is_paused(mas))) 4106 goto reset; 4107 4108 if (unlikely(mas_is_none(mas))) 4109 goto reset; 4110 4111 if (unlikely(mas_is_overflow(mas))) 4112 goto reset; 4113 4114 if (unlikely(mas_is_underflow(mas))) 4115 goto reset; 4116 } 4117 4118 /* 4119 * A less strict version of mas_is_span_wr() where we allow spanning 4120 * writes within this node. This is to stop partial walks in 4121 * mas_prealloc() from being reset. 4122 */ 4123 if (mas->last > mas->max) 4124 goto reset; 4125 4126 if (wr_mas->entry) 4127 goto set_content; 4128 4129 if (mte_is_leaf(mas->node) && mas->last == mas->max) 4130 goto reset; 4131 4132 goto set_content; 4133 4134 reset: 4135 mas_reset(mas); 4136 set_content: 4137 wr_mas->content = mas_start(mas); 4138 } 4139 4140 /** 4141 * mas_prealloc_calc() - Calculate number of nodes needed for a 4142 * given store oepration 4143 * @mas: The maple state 4144 * @entry: The entry to store into the tree 4145 * 4146 * Return: Number of nodes required for preallocation. 4147 */ 4148 static inline int mas_prealloc_calc(struct ma_state *mas, void *entry) 4149 { 4150 int ret = mas_mt_height(mas) * 3 + 1; 4151 4152 switch (mas->store_type) { 4153 case wr_invalid: 4154 WARN_ON_ONCE(1); 4155 break; 4156 case wr_new_root: 4157 ret = 1; 4158 break; 4159 case wr_store_root: 4160 if (likely((mas->last != 0) || (mas->index != 0))) 4161 ret = 1; 4162 else if (((unsigned long) (entry) & 3) == 2) 4163 ret = 1; 4164 else 4165 ret = 0; 4166 break; 4167 case wr_spanning_store: 4168 ret = mas_mt_height(mas) * 3 + 1; 4169 break; 4170 case wr_split_store: 4171 ret = mas_mt_height(mas) * 2 + 1; 4172 break; 4173 case wr_rebalance: 4174 ret = mas_mt_height(mas) * 2 - 1; 4175 break; 4176 case wr_node_store: 4177 ret = mt_in_rcu(mas->tree) ? 1 : 0; 4178 break; 4179 case wr_append: 4180 case wr_exact_fit: 4181 case wr_slot_store: 4182 ret = 0; 4183 } 4184 4185 return ret; 4186 } 4187 4188 /* 4189 * mas_wr_store_type() - Determine the store type for a given 4190 * store operation. 4191 * @wr_mas: The maple write state 4192 * 4193 * Return: the type of store needed for the operation 4194 */ 4195 static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas) 4196 { 4197 struct ma_state *mas = wr_mas->mas; 4198 unsigned char new_end; 4199 4200 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 4201 return wr_store_root; 4202 4203 if (unlikely(!mas_wr_walk(wr_mas))) 4204 return wr_spanning_store; 4205 4206 /* At this point, we are at the leaf node that needs to be altered. */ 4207 mas_wr_end_piv(wr_mas); 4208 if (!wr_mas->entry) 4209 mas_wr_extend_null(wr_mas); 4210 4211 if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last)) 4212 return wr_exact_fit; 4213 4214 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 4215 return wr_new_root; 4216 4217 new_end = mas_wr_new_end(wr_mas); 4218 /* Potential spanning rebalance collapsing a node */ 4219 if (new_end < mt_min_slots[wr_mas->type]) { 4220 if (!mte_is_root(mas->node) && !(mas->mas_flags & MA_STATE_BULK)) 4221 return wr_rebalance; 4222 return wr_node_store; 4223 } 4224 4225 if (new_end >= mt_slots[wr_mas->type]) 4226 return wr_split_store; 4227 4228 if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end)) 4229 return wr_append; 4230 4231 if ((new_end == mas->end) && (!mt_in_rcu(mas->tree) || 4232 (wr_mas->offset_end - mas->offset == 1))) 4233 return wr_slot_store; 4234 4235 return wr_node_store; 4236 } 4237 4238 /** 4239 * mas_wr_preallocate() - Preallocate enough nodes for a store operation 4240 * @wr_mas: The maple write state 4241 * @entry: The entry that will be stored 4242 * 4243 */ 4244 static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry) 4245 { 4246 struct ma_state *mas = wr_mas->mas; 4247 int request; 4248 4249 mas_wr_prealloc_setup(wr_mas); 4250 mas->store_type = mas_wr_store_type(wr_mas); 4251 request = mas_prealloc_calc(mas, entry); 4252 if (!request) 4253 return; 4254 4255 mas_node_count(mas, request); 4256 } 4257 4258 /** 4259 * mas_insert() - Internal call to insert a value 4260 * @mas: The maple state 4261 * @entry: The entry to store 4262 * 4263 * Return: %NULL or the contents that already exists at the requested index 4264 * otherwise. The maple state needs to be checked for error conditions. 4265 */ 4266 static inline void *mas_insert(struct ma_state *mas, void *entry) 4267 { 4268 MA_WR_STATE(wr_mas, mas, entry); 4269 4270 /* 4271 * Inserting a new range inserts either 0, 1, or 2 pivots within the 4272 * tree. If the insert fits exactly into an existing gap with a value 4273 * of NULL, then the slot only needs to be written with the new value. 4274 * If the range being inserted is adjacent to another range, then only a 4275 * single pivot needs to be inserted (as well as writing the entry). If 4276 * the new range is within a gap but does not touch any other ranges, 4277 * then two pivots need to be inserted: the start - 1, and the end. As 4278 * usual, the entry must be written. Most operations require a new node 4279 * to be allocated and replace an existing node to ensure RCU safety, 4280 * when in RCU mode. The exception to requiring a newly allocated node 4281 * is when inserting at the end of a node (appending). When done 4282 * carefully, appending can reuse the node in place. 4283 */ 4284 wr_mas.content = mas_start(mas); 4285 if (wr_mas.content) 4286 goto exists; 4287 4288 mas_wr_preallocate(&wr_mas, entry); 4289 if (mas_is_err(mas)) 4290 return NULL; 4291 4292 /* spanning writes always overwrite something */ 4293 if (mas->store_type == wr_spanning_store) 4294 goto exists; 4295 4296 /* At this point, we are at the leaf node that needs to be altered. */ 4297 if (mas->store_type != wr_new_root && mas->store_type != wr_store_root) { 4298 wr_mas.offset_end = mas->offset; 4299 wr_mas.end_piv = wr_mas.r_max; 4300 4301 if (wr_mas.content || (mas->last > wr_mas.r_max)) 4302 goto exists; 4303 } 4304 4305 mas_wr_store_entry(&wr_mas); 4306 return wr_mas.content; 4307 4308 exists: 4309 mas_set_err(mas, -EEXIST); 4310 return wr_mas.content; 4311 4312 } 4313 4314 /** 4315 * mas_alloc_cyclic() - Internal call to find somewhere to store an entry 4316 * @mas: The maple state. 4317 * @startp: Pointer to ID. 4318 * @range_lo: Lower bound of range to search. 4319 * @range_hi: Upper bound of range to search. 4320 * @entry: The entry to store. 4321 * @next: Pointer to next ID to allocate. 4322 * @gfp: The GFP_FLAGS to use for allocations. 4323 * 4324 * Return: 0 if the allocation succeeded without wrapping, 1 if the 4325 * allocation succeeded after wrapping, or -EBUSY if there are no 4326 * free entries. 4327 */ 4328 int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp, 4329 void *entry, unsigned long range_lo, unsigned long range_hi, 4330 unsigned long *next, gfp_t gfp) 4331 { 4332 unsigned long min = range_lo; 4333 int ret = 0; 4334 4335 range_lo = max(min, *next); 4336 ret = mas_empty_area(mas, range_lo, range_hi, 1); 4337 if ((mas->tree->ma_flags & MT_FLAGS_ALLOC_WRAPPED) && ret == 0) { 4338 mas->tree->ma_flags &= ~MT_FLAGS_ALLOC_WRAPPED; 4339 ret = 1; 4340 } 4341 if (ret < 0 && range_lo > min) { 4342 mas_reset(mas); 4343 ret = mas_empty_area(mas, min, range_hi, 1); 4344 if (ret == 0) 4345 ret = 1; 4346 } 4347 if (ret < 0) 4348 return ret; 4349 4350 do { 4351 mas_insert(mas, entry); 4352 } while (mas_nomem(mas, gfp)); 4353 if (mas_is_err(mas)) 4354 return xa_err(mas->node); 4355 4356 *startp = mas->index; 4357 *next = *startp + 1; 4358 if (*next == 0) 4359 mas->tree->ma_flags |= MT_FLAGS_ALLOC_WRAPPED; 4360 4361 mas_destroy(mas); 4362 return ret; 4363 } 4364 EXPORT_SYMBOL(mas_alloc_cyclic); 4365 4366 static __always_inline void mas_rewalk(struct ma_state *mas, unsigned long index) 4367 { 4368 retry: 4369 mas_set(mas, index); 4370 mas_state_walk(mas); 4371 if (mas_is_start(mas)) 4372 goto retry; 4373 } 4374 4375 static __always_inline bool mas_rewalk_if_dead(struct ma_state *mas, 4376 struct maple_node *node, const unsigned long index) 4377 { 4378 if (unlikely(ma_dead_node(node))) { 4379 mas_rewalk(mas, index); 4380 return true; 4381 } 4382 return false; 4383 } 4384 4385 /* 4386 * mas_prev_node() - Find the prev non-null entry at the same level in the 4387 * tree. The prev value will be mas->node[mas->offset] or the status will be 4388 * ma_none. 4389 * @mas: The maple state 4390 * @min: The lower limit to search 4391 * 4392 * The prev node value will be mas->node[mas->offset] or the status will be 4393 * ma_none. 4394 * Return: 1 if the node is dead, 0 otherwise. 4395 */ 4396 static int mas_prev_node(struct ma_state *mas, unsigned long min) 4397 { 4398 enum maple_type mt; 4399 int offset, level; 4400 void __rcu **slots; 4401 struct maple_node *node; 4402 unsigned long *pivots; 4403 unsigned long max; 4404 4405 node = mas_mn(mas); 4406 if (!mas->min) 4407 goto no_entry; 4408 4409 max = mas->min - 1; 4410 if (max < min) 4411 goto no_entry; 4412 4413 level = 0; 4414 do { 4415 if (ma_is_root(node)) 4416 goto no_entry; 4417 4418 /* Walk up. */ 4419 if (unlikely(mas_ascend(mas))) 4420 return 1; 4421 offset = mas->offset; 4422 level++; 4423 node = mas_mn(mas); 4424 } while (!offset); 4425 4426 offset--; 4427 mt = mte_node_type(mas->node); 4428 while (level > 1) { 4429 level--; 4430 slots = ma_slots(node, mt); 4431 mas->node = mas_slot(mas, slots, offset); 4432 if (unlikely(ma_dead_node(node))) 4433 return 1; 4434 4435 mt = mte_node_type(mas->node); 4436 node = mas_mn(mas); 4437 pivots = ma_pivots(node, mt); 4438 offset = ma_data_end(node, mt, pivots, max); 4439 if (unlikely(ma_dead_node(node))) 4440 return 1; 4441 } 4442 4443 slots = ma_slots(node, mt); 4444 mas->node = mas_slot(mas, slots, offset); 4445 pivots = ma_pivots(node, mt); 4446 if (unlikely(ma_dead_node(node))) 4447 return 1; 4448 4449 if (likely(offset)) 4450 mas->min = pivots[offset - 1] + 1; 4451 mas->max = max; 4452 mas->offset = mas_data_end(mas); 4453 if (unlikely(mte_dead_node(mas->node))) 4454 return 1; 4455 4456 mas->end = mas->offset; 4457 return 0; 4458 4459 no_entry: 4460 if (unlikely(ma_dead_node(node))) 4461 return 1; 4462 4463 mas->status = ma_underflow; 4464 return 0; 4465 } 4466 4467 /* 4468 * mas_prev_slot() - Get the entry in the previous slot 4469 * 4470 * @mas: The maple state 4471 * @min: The minimum starting range 4472 * @empty: Can be empty 4473 * 4474 * Return: The entry in the previous slot which is possibly NULL 4475 */ 4476 static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty) 4477 { 4478 void *entry; 4479 void __rcu **slots; 4480 unsigned long pivot; 4481 enum maple_type type; 4482 unsigned long *pivots; 4483 struct maple_node *node; 4484 unsigned long save_point = mas->index; 4485 4486 retry: 4487 node = mas_mn(mas); 4488 type = mte_node_type(mas->node); 4489 pivots = ma_pivots(node, type); 4490 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4491 goto retry; 4492 4493 if (mas->min <= min) { 4494 pivot = mas_safe_min(mas, pivots, mas->offset); 4495 4496 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4497 goto retry; 4498 4499 if (pivot <= min) 4500 goto underflow; 4501 } 4502 4503 again: 4504 if (likely(mas->offset)) { 4505 mas->offset--; 4506 mas->last = mas->index - 1; 4507 mas->index = mas_safe_min(mas, pivots, mas->offset); 4508 } else { 4509 if (mas->index <= min) 4510 goto underflow; 4511 4512 if (mas_prev_node(mas, min)) { 4513 mas_rewalk(mas, save_point); 4514 goto retry; 4515 } 4516 4517 if (WARN_ON_ONCE(mas_is_underflow(mas))) 4518 return NULL; 4519 4520 mas->last = mas->max; 4521 node = mas_mn(mas); 4522 type = mte_node_type(mas->node); 4523 pivots = ma_pivots(node, type); 4524 mas->index = pivots[mas->offset - 1] + 1; 4525 } 4526 4527 slots = ma_slots(node, type); 4528 entry = mas_slot(mas, slots, mas->offset); 4529 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4530 goto retry; 4531 4532 4533 if (likely(entry)) 4534 return entry; 4535 4536 if (!empty) { 4537 if (mas->index <= min) { 4538 mas->status = ma_underflow; 4539 return NULL; 4540 } 4541 4542 goto again; 4543 } 4544 4545 return entry; 4546 4547 underflow: 4548 mas->status = ma_underflow; 4549 return NULL; 4550 } 4551 4552 /* 4553 * mas_next_node() - Get the next node at the same level in the tree. 4554 * @mas: The maple state 4555 * @node: The maple node 4556 * @max: The maximum pivot value to check. 4557 * 4558 * The next value will be mas->node[mas->offset] or the status will have 4559 * overflowed. 4560 * Return: 1 on dead node, 0 otherwise. 4561 */ 4562 static int mas_next_node(struct ma_state *mas, struct maple_node *node, 4563 unsigned long max) 4564 { 4565 unsigned long min; 4566 unsigned long *pivots; 4567 struct maple_enode *enode; 4568 struct maple_node *tmp; 4569 int level = 0; 4570 unsigned char node_end; 4571 enum maple_type mt; 4572 void __rcu **slots; 4573 4574 if (mas->max >= max) 4575 goto overflow; 4576 4577 min = mas->max + 1; 4578 level = 0; 4579 do { 4580 if (ma_is_root(node)) 4581 goto overflow; 4582 4583 /* Walk up. */ 4584 if (unlikely(mas_ascend(mas))) 4585 return 1; 4586 4587 level++; 4588 node = mas_mn(mas); 4589 mt = mte_node_type(mas->node); 4590 pivots = ma_pivots(node, mt); 4591 node_end = ma_data_end(node, mt, pivots, mas->max); 4592 if (unlikely(ma_dead_node(node))) 4593 return 1; 4594 4595 } while (unlikely(mas->offset == node_end)); 4596 4597 slots = ma_slots(node, mt); 4598 mas->offset++; 4599 enode = mas_slot(mas, slots, mas->offset); 4600 if (unlikely(ma_dead_node(node))) 4601 return 1; 4602 4603 if (level > 1) 4604 mas->offset = 0; 4605 4606 while (unlikely(level > 1)) { 4607 level--; 4608 mas->node = enode; 4609 node = mas_mn(mas); 4610 mt = mte_node_type(mas->node); 4611 slots = ma_slots(node, mt); 4612 enode = mas_slot(mas, slots, 0); 4613 if (unlikely(ma_dead_node(node))) 4614 return 1; 4615 } 4616 4617 if (!mas->offset) 4618 pivots = ma_pivots(node, mt); 4619 4620 mas->max = mas_safe_pivot(mas, pivots, mas->offset, mt); 4621 tmp = mte_to_node(enode); 4622 mt = mte_node_type(enode); 4623 pivots = ma_pivots(tmp, mt); 4624 mas->end = ma_data_end(tmp, mt, pivots, mas->max); 4625 if (unlikely(ma_dead_node(node))) 4626 return 1; 4627 4628 mas->node = enode; 4629 mas->min = min; 4630 return 0; 4631 4632 overflow: 4633 if (unlikely(ma_dead_node(node))) 4634 return 1; 4635 4636 mas->status = ma_overflow; 4637 return 0; 4638 } 4639 4640 /* 4641 * mas_next_slot() - Get the entry in the next slot 4642 * 4643 * @mas: The maple state 4644 * @max: The maximum starting range 4645 * @empty: Can be empty 4646 * 4647 * Return: The entry in the next slot which is possibly NULL 4648 */ 4649 static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty) 4650 { 4651 void __rcu **slots; 4652 unsigned long *pivots; 4653 unsigned long pivot; 4654 enum maple_type type; 4655 struct maple_node *node; 4656 unsigned long save_point = mas->last; 4657 void *entry; 4658 4659 retry: 4660 node = mas_mn(mas); 4661 type = mte_node_type(mas->node); 4662 pivots = ma_pivots(node, type); 4663 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4664 goto retry; 4665 4666 if (mas->max >= max) { 4667 if (likely(mas->offset < mas->end)) 4668 pivot = pivots[mas->offset]; 4669 else 4670 pivot = mas->max; 4671 4672 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4673 goto retry; 4674 4675 if (pivot >= max) { /* Was at the limit, next will extend beyond */ 4676 mas->status = ma_overflow; 4677 return NULL; 4678 } 4679 } 4680 4681 if (likely(mas->offset < mas->end)) { 4682 mas->index = pivots[mas->offset] + 1; 4683 again: 4684 mas->offset++; 4685 if (likely(mas->offset < mas->end)) 4686 mas->last = pivots[mas->offset]; 4687 else 4688 mas->last = mas->max; 4689 } else { 4690 if (mas->last >= max) { 4691 mas->status = ma_overflow; 4692 return NULL; 4693 } 4694 4695 if (mas_next_node(mas, node, max)) { 4696 mas_rewalk(mas, save_point); 4697 goto retry; 4698 } 4699 4700 if (WARN_ON_ONCE(mas_is_overflow(mas))) 4701 return NULL; 4702 4703 mas->offset = 0; 4704 mas->index = mas->min; 4705 node = mas_mn(mas); 4706 type = mte_node_type(mas->node); 4707 pivots = ma_pivots(node, type); 4708 mas->last = pivots[0]; 4709 } 4710 4711 slots = ma_slots(node, type); 4712 entry = mt_slot(mas->tree, slots, mas->offset); 4713 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4714 goto retry; 4715 4716 if (entry) 4717 return entry; 4718 4719 4720 if (!empty) { 4721 if (mas->last >= max) { 4722 mas->status = ma_overflow; 4723 return NULL; 4724 } 4725 4726 mas->index = mas->last + 1; 4727 goto again; 4728 } 4729 4730 return entry; 4731 } 4732 4733 /* 4734 * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the 4735 * highest gap address of a given size in a given node and descend. 4736 * @mas: The maple state 4737 * @size: The needed size. 4738 * 4739 * Return: True if found in a leaf, false otherwise. 4740 * 4741 */ 4742 static bool mas_rev_awalk(struct ma_state *mas, unsigned long size, 4743 unsigned long *gap_min, unsigned long *gap_max) 4744 { 4745 enum maple_type type = mte_node_type(mas->node); 4746 struct maple_node *node = mas_mn(mas); 4747 unsigned long *pivots, *gaps; 4748 void __rcu **slots; 4749 unsigned long gap = 0; 4750 unsigned long max, min; 4751 unsigned char offset; 4752 4753 if (unlikely(mas_is_err(mas))) 4754 return true; 4755 4756 if (ma_is_dense(type)) { 4757 /* dense nodes. */ 4758 mas->offset = (unsigned char)(mas->index - mas->min); 4759 return true; 4760 } 4761 4762 pivots = ma_pivots(node, type); 4763 slots = ma_slots(node, type); 4764 gaps = ma_gaps(node, type); 4765 offset = mas->offset; 4766 min = mas_safe_min(mas, pivots, offset); 4767 /* Skip out of bounds. */ 4768 while (mas->last < min) 4769 min = mas_safe_min(mas, pivots, --offset); 4770 4771 max = mas_safe_pivot(mas, pivots, offset, type); 4772 while (mas->index <= max) { 4773 gap = 0; 4774 if (gaps) 4775 gap = gaps[offset]; 4776 else if (!mas_slot(mas, slots, offset)) 4777 gap = max - min + 1; 4778 4779 if (gap) { 4780 if ((size <= gap) && (size <= mas->last - min + 1)) 4781 break; 4782 4783 if (!gaps) { 4784 /* Skip the next slot, it cannot be a gap. */ 4785 if (offset < 2) 4786 goto ascend; 4787 4788 offset -= 2; 4789 max = pivots[offset]; 4790 min = mas_safe_min(mas, pivots, offset); 4791 continue; 4792 } 4793 } 4794 4795 if (!offset) 4796 goto ascend; 4797 4798 offset--; 4799 max = min - 1; 4800 min = mas_safe_min(mas, pivots, offset); 4801 } 4802 4803 if (unlikely((mas->index > max) || (size - 1 > max - mas->index))) 4804 goto no_space; 4805 4806 if (unlikely(ma_is_leaf(type))) { 4807 mas->offset = offset; 4808 *gap_min = min; 4809 *gap_max = min + gap - 1; 4810 return true; 4811 } 4812 4813 /* descend, only happens under lock. */ 4814 mas->node = mas_slot(mas, slots, offset); 4815 mas->min = min; 4816 mas->max = max; 4817 mas->offset = mas_data_end(mas); 4818 return false; 4819 4820 ascend: 4821 if (!mte_is_root(mas->node)) 4822 return false; 4823 4824 no_space: 4825 mas_set_err(mas, -EBUSY); 4826 return false; 4827 } 4828 4829 static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) 4830 { 4831 enum maple_type type = mte_node_type(mas->node); 4832 unsigned long pivot, min, gap = 0; 4833 unsigned char offset, data_end; 4834 unsigned long *gaps, *pivots; 4835 void __rcu **slots; 4836 struct maple_node *node; 4837 bool found = false; 4838 4839 if (ma_is_dense(type)) { 4840 mas->offset = (unsigned char)(mas->index - mas->min); 4841 return true; 4842 } 4843 4844 node = mas_mn(mas); 4845 pivots = ma_pivots(node, type); 4846 slots = ma_slots(node, type); 4847 gaps = ma_gaps(node, type); 4848 offset = mas->offset; 4849 min = mas_safe_min(mas, pivots, offset); 4850 data_end = ma_data_end(node, type, pivots, mas->max); 4851 for (; offset <= data_end; offset++) { 4852 pivot = mas_safe_pivot(mas, pivots, offset, type); 4853 4854 /* Not within lower bounds */ 4855 if (mas->index > pivot) 4856 goto next_slot; 4857 4858 if (gaps) 4859 gap = gaps[offset]; 4860 else if (!mas_slot(mas, slots, offset)) 4861 gap = min(pivot, mas->last) - max(mas->index, min) + 1; 4862 else 4863 goto next_slot; 4864 4865 if (gap >= size) { 4866 if (ma_is_leaf(type)) { 4867 found = true; 4868 break; 4869 } 4870 4871 mas->node = mas_slot(mas, slots, offset); 4872 mas->min = min; 4873 mas->max = pivot; 4874 offset = 0; 4875 break; 4876 } 4877 next_slot: 4878 min = pivot + 1; 4879 if (mas->last <= pivot) { 4880 mas_set_err(mas, -EBUSY); 4881 return true; 4882 } 4883 } 4884 4885 mas->offset = offset; 4886 return found; 4887 } 4888 4889 /** 4890 * mas_walk() - Search for @mas->index in the tree. 4891 * @mas: The maple state. 4892 * 4893 * mas->index and mas->last will be set to the range if there is a value. If 4894 * mas->status is ma_none, reset to ma_start 4895 * 4896 * Return: the entry at the location or %NULL. 4897 */ 4898 void *mas_walk(struct ma_state *mas) 4899 { 4900 void *entry; 4901 4902 if (!mas_is_active(mas) || !mas_is_start(mas)) 4903 mas->status = ma_start; 4904 retry: 4905 entry = mas_state_walk(mas); 4906 if (mas_is_start(mas)) { 4907 goto retry; 4908 } else if (mas_is_none(mas)) { 4909 mas->index = 0; 4910 mas->last = ULONG_MAX; 4911 } else if (mas_is_ptr(mas)) { 4912 if (!mas->index) { 4913 mas->last = 0; 4914 return entry; 4915 } 4916 4917 mas->index = 1; 4918 mas->last = ULONG_MAX; 4919 mas->status = ma_none; 4920 return NULL; 4921 } 4922 4923 return entry; 4924 } 4925 EXPORT_SYMBOL_GPL(mas_walk); 4926 4927 static inline bool mas_rewind_node(struct ma_state *mas) 4928 { 4929 unsigned char slot; 4930 4931 do { 4932 if (mte_is_root(mas->node)) { 4933 slot = mas->offset; 4934 if (!slot) 4935 return false; 4936 } else { 4937 mas_ascend(mas); 4938 slot = mas->offset; 4939 } 4940 } while (!slot); 4941 4942 mas->offset = --slot; 4943 return true; 4944 } 4945 4946 /* 4947 * mas_skip_node() - Internal function. Skip over a node. 4948 * @mas: The maple state. 4949 * 4950 * Return: true if there is another node, false otherwise. 4951 */ 4952 static inline bool mas_skip_node(struct ma_state *mas) 4953 { 4954 if (mas_is_err(mas)) 4955 return false; 4956 4957 do { 4958 if (mte_is_root(mas->node)) { 4959 if (mas->offset >= mas_data_end(mas)) { 4960 mas_set_err(mas, -EBUSY); 4961 return false; 4962 } 4963 } else { 4964 mas_ascend(mas); 4965 } 4966 } while (mas->offset >= mas_data_end(mas)); 4967 4968 mas->offset++; 4969 return true; 4970 } 4971 4972 /* 4973 * mas_awalk() - Allocation walk. Search from low address to high, for a gap of 4974 * @size 4975 * @mas: The maple state 4976 * @size: The size of the gap required 4977 * 4978 * Search between @mas->index and @mas->last for a gap of @size. 4979 */ 4980 static inline void mas_awalk(struct ma_state *mas, unsigned long size) 4981 { 4982 struct maple_enode *last = NULL; 4983 4984 /* 4985 * There are 4 options: 4986 * go to child (descend) 4987 * go back to parent (ascend) 4988 * no gap found. (return, error == -EBUSY) 4989 * found the gap. (return) 4990 */ 4991 while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) { 4992 if (last == mas->node) 4993 mas_skip_node(mas); 4994 else 4995 last = mas->node; 4996 } 4997 } 4998 4999 /* 5000 * mas_sparse_area() - Internal function. Return upper or lower limit when 5001 * searching for a gap in an empty tree. 5002 * @mas: The maple state 5003 * @min: the minimum range 5004 * @max: The maximum range 5005 * @size: The size of the gap 5006 * @fwd: Searching forward or back 5007 */ 5008 static inline int mas_sparse_area(struct ma_state *mas, unsigned long min, 5009 unsigned long max, unsigned long size, bool fwd) 5010 { 5011 if (!unlikely(mas_is_none(mas)) && min == 0) { 5012 min++; 5013 /* 5014 * At this time, min is increased, we need to recheck whether 5015 * the size is satisfied. 5016 */ 5017 if (min > max || max - min + 1 < size) 5018 return -EBUSY; 5019 } 5020 /* mas_is_ptr */ 5021 5022 if (fwd) { 5023 mas->index = min; 5024 mas->last = min + size - 1; 5025 } else { 5026 mas->last = max; 5027 mas->index = max - size + 1; 5028 } 5029 return 0; 5030 } 5031 5032 /* 5033 * mas_empty_area() - Get the lowest address within the range that is 5034 * sufficient for the size requested. 5035 * @mas: The maple state 5036 * @min: The lowest value of the range 5037 * @max: The highest value of the range 5038 * @size: The size needed 5039 */ 5040 int mas_empty_area(struct ma_state *mas, unsigned long min, 5041 unsigned long max, unsigned long size) 5042 { 5043 unsigned char offset; 5044 unsigned long *pivots; 5045 enum maple_type mt; 5046 struct maple_node *node; 5047 5048 if (min > max) 5049 return -EINVAL; 5050 5051 if (size == 0 || max - min < size - 1) 5052 return -EINVAL; 5053 5054 if (mas_is_start(mas)) 5055 mas_start(mas); 5056 else if (mas->offset >= 2) 5057 mas->offset -= 2; 5058 else if (!mas_skip_node(mas)) 5059 return -EBUSY; 5060 5061 /* Empty set */ 5062 if (mas_is_none(mas) || mas_is_ptr(mas)) 5063 return mas_sparse_area(mas, min, max, size, true); 5064 5065 /* The start of the window can only be within these values */ 5066 mas->index = min; 5067 mas->last = max; 5068 mas_awalk(mas, size); 5069 5070 if (unlikely(mas_is_err(mas))) 5071 return xa_err(mas->node); 5072 5073 offset = mas->offset; 5074 node = mas_mn(mas); 5075 mt = mte_node_type(mas->node); 5076 pivots = ma_pivots(node, mt); 5077 min = mas_safe_min(mas, pivots, offset); 5078 if (mas->index < min) 5079 mas->index = min; 5080 mas->last = mas->index + size - 1; 5081 mas->end = ma_data_end(node, mt, pivots, mas->max); 5082 return 0; 5083 } 5084 EXPORT_SYMBOL_GPL(mas_empty_area); 5085 5086 /* 5087 * mas_empty_area_rev() - Get the highest address within the range that is 5088 * sufficient for the size requested. 5089 * @mas: The maple state 5090 * @min: The lowest value of the range 5091 * @max: The highest value of the range 5092 * @size: The size needed 5093 */ 5094 int mas_empty_area_rev(struct ma_state *mas, unsigned long min, 5095 unsigned long max, unsigned long size) 5096 { 5097 struct maple_enode *last = mas->node; 5098 5099 if (min > max) 5100 return -EINVAL; 5101 5102 if (size == 0 || max - min < size - 1) 5103 return -EINVAL; 5104 5105 if (mas_is_start(mas)) 5106 mas_start(mas); 5107 else if ((mas->offset < 2) && (!mas_rewind_node(mas))) 5108 return -EBUSY; 5109 5110 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 5111 return mas_sparse_area(mas, min, max, size, false); 5112 else if (mas->offset >= 2) 5113 mas->offset -= 2; 5114 else 5115 mas->offset = mas_data_end(mas); 5116 5117 5118 /* The start of the window can only be within these values. */ 5119 mas->index = min; 5120 mas->last = max; 5121 5122 while (!mas_rev_awalk(mas, size, &min, &max)) { 5123 if (last == mas->node) { 5124 if (!mas_rewind_node(mas)) 5125 return -EBUSY; 5126 } else { 5127 last = mas->node; 5128 } 5129 } 5130 5131 if (mas_is_err(mas)) 5132 return xa_err(mas->node); 5133 5134 if (unlikely(mas->offset == MAPLE_NODE_SLOTS)) 5135 return -EBUSY; 5136 5137 /* Trim the upper limit to the max. */ 5138 if (max < mas->last) 5139 mas->last = max; 5140 5141 mas->index = mas->last - size + 1; 5142 mas->end = mas_data_end(mas); 5143 return 0; 5144 } 5145 EXPORT_SYMBOL_GPL(mas_empty_area_rev); 5146 5147 /* 5148 * mte_dead_leaves() - Mark all leaves of a node as dead. 5149 * @enode: the encoded node 5150 * @mt: the maple tree 5151 * @slots: Pointer to the slot array 5152 * 5153 * Must hold the write lock. 5154 * 5155 * Return: The number of leaves marked as dead. 5156 */ 5157 static inline 5158 unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt, 5159 void __rcu **slots) 5160 { 5161 struct maple_node *node; 5162 enum maple_type type; 5163 void *entry; 5164 int offset; 5165 5166 for (offset = 0; offset < mt_slot_count(enode); offset++) { 5167 entry = mt_slot(mt, slots, offset); 5168 type = mte_node_type(entry); 5169 node = mte_to_node(entry); 5170 /* Use both node and type to catch LE & BE metadata */ 5171 if (!node || !type) 5172 break; 5173 5174 mte_set_node_dead(entry); 5175 node->type = type; 5176 rcu_assign_pointer(slots[offset], node); 5177 } 5178 5179 return offset; 5180 } 5181 5182 /** 5183 * mte_dead_walk() - Walk down a dead tree to just before the leaves 5184 * @enode: The maple encoded node 5185 * @offset: The starting offset 5186 * 5187 * Note: This can only be used from the RCU callback context. 5188 */ 5189 static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset) 5190 { 5191 struct maple_node *node, *next; 5192 void __rcu **slots = NULL; 5193 5194 next = mte_to_node(*enode); 5195 do { 5196 *enode = ma_enode_ptr(next); 5197 node = mte_to_node(*enode); 5198 slots = ma_slots(node, node->type); 5199 next = rcu_dereference_protected(slots[offset], 5200 lock_is_held(&rcu_callback_map)); 5201 offset = 0; 5202 } while (!ma_is_leaf(next->type)); 5203 5204 return slots; 5205 } 5206 5207 /** 5208 * mt_free_walk() - Walk & free a tree in the RCU callback context 5209 * @head: The RCU head that's within the node. 5210 * 5211 * Note: This can only be used from the RCU callback context. 5212 */ 5213 static void mt_free_walk(struct rcu_head *head) 5214 { 5215 void __rcu **slots; 5216 struct maple_node *node, *start; 5217 struct maple_enode *enode; 5218 unsigned char offset; 5219 enum maple_type type; 5220 5221 node = container_of(head, struct maple_node, rcu); 5222 5223 if (ma_is_leaf(node->type)) 5224 goto free_leaf; 5225 5226 start = node; 5227 enode = mt_mk_node(node, node->type); 5228 slots = mte_dead_walk(&enode, 0); 5229 node = mte_to_node(enode); 5230 do { 5231 mt_free_bulk(node->slot_len, slots); 5232 offset = node->parent_slot + 1; 5233 enode = node->piv_parent; 5234 if (mte_to_node(enode) == node) 5235 goto free_leaf; 5236 5237 type = mte_node_type(enode); 5238 slots = ma_slots(mte_to_node(enode), type); 5239 if ((offset < mt_slots[type]) && 5240 rcu_dereference_protected(slots[offset], 5241 lock_is_held(&rcu_callback_map))) 5242 slots = mte_dead_walk(&enode, offset); 5243 node = mte_to_node(enode); 5244 } while ((node != start) || (node->slot_len < offset)); 5245 5246 slots = ma_slots(node, node->type); 5247 mt_free_bulk(node->slot_len, slots); 5248 5249 free_leaf: 5250 mt_free_rcu(&node->rcu); 5251 } 5252 5253 static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, 5254 struct maple_tree *mt, struct maple_enode *prev, unsigned char offset) 5255 { 5256 struct maple_node *node; 5257 struct maple_enode *next = *enode; 5258 void __rcu **slots = NULL; 5259 enum maple_type type; 5260 unsigned char next_offset = 0; 5261 5262 do { 5263 *enode = next; 5264 node = mte_to_node(*enode); 5265 type = mte_node_type(*enode); 5266 slots = ma_slots(node, type); 5267 next = mt_slot_locked(mt, slots, next_offset); 5268 if ((mte_dead_node(next))) 5269 next = mt_slot_locked(mt, slots, ++next_offset); 5270 5271 mte_set_node_dead(*enode); 5272 node->type = type; 5273 node->piv_parent = prev; 5274 node->parent_slot = offset; 5275 offset = next_offset; 5276 next_offset = 0; 5277 prev = *enode; 5278 } while (!mte_is_leaf(next)); 5279 5280 return slots; 5281 } 5282 5283 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 5284 bool free) 5285 { 5286 void __rcu **slots; 5287 struct maple_node *node = mte_to_node(enode); 5288 struct maple_enode *start; 5289 5290 if (mte_is_leaf(enode)) { 5291 node->type = mte_node_type(enode); 5292 goto free_leaf; 5293 } 5294 5295 start = enode; 5296 slots = mte_destroy_descend(&enode, mt, start, 0); 5297 node = mte_to_node(enode); // Updated in the above call. 5298 do { 5299 enum maple_type type; 5300 unsigned char offset; 5301 struct maple_enode *parent, *tmp; 5302 5303 node->slot_len = mte_dead_leaves(enode, mt, slots); 5304 if (free) 5305 mt_free_bulk(node->slot_len, slots); 5306 offset = node->parent_slot + 1; 5307 enode = node->piv_parent; 5308 if (mte_to_node(enode) == node) 5309 goto free_leaf; 5310 5311 type = mte_node_type(enode); 5312 slots = ma_slots(mte_to_node(enode), type); 5313 if (offset >= mt_slots[type]) 5314 goto next; 5315 5316 tmp = mt_slot_locked(mt, slots, offset); 5317 if (mte_node_type(tmp) && mte_to_node(tmp)) { 5318 parent = enode; 5319 enode = tmp; 5320 slots = mte_destroy_descend(&enode, mt, parent, offset); 5321 } 5322 next: 5323 node = mte_to_node(enode); 5324 } while (start != enode); 5325 5326 node = mte_to_node(enode); 5327 node->slot_len = mte_dead_leaves(enode, mt, slots); 5328 if (free) 5329 mt_free_bulk(node->slot_len, slots); 5330 5331 free_leaf: 5332 if (free) 5333 mt_free_rcu(&node->rcu); 5334 else 5335 mt_clear_meta(mt, node, node->type); 5336 } 5337 5338 /* 5339 * mte_destroy_walk() - Free a tree or sub-tree. 5340 * @enode: the encoded maple node (maple_enode) to start 5341 * @mt: the tree to free - needed for node types. 5342 * 5343 * Must hold the write lock. 5344 */ 5345 static inline void mte_destroy_walk(struct maple_enode *enode, 5346 struct maple_tree *mt) 5347 { 5348 struct maple_node *node = mte_to_node(enode); 5349 5350 if (mt_in_rcu(mt)) { 5351 mt_destroy_walk(enode, mt, false); 5352 call_rcu(&node->rcu, mt_free_walk); 5353 } else { 5354 mt_destroy_walk(enode, mt, true); 5355 } 5356 } 5357 /* Interface */ 5358 5359 /** 5360 * mas_store() - Store an @entry. 5361 * @mas: The maple state. 5362 * @entry: The entry to store. 5363 * 5364 * The @mas->index and @mas->last is used to set the range for the @entry. 5365 * 5366 * Return: the first entry between mas->index and mas->last or %NULL. 5367 */ 5368 void *mas_store(struct ma_state *mas, void *entry) 5369 { 5370 int request; 5371 MA_WR_STATE(wr_mas, mas, entry); 5372 5373 trace_ma_write(__func__, mas, 0, entry); 5374 #ifdef CONFIG_DEBUG_MAPLE_TREE 5375 if (MAS_WARN_ON(mas, mas->index > mas->last)) 5376 pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last, 5377 entry); 5378 5379 if (mas->index > mas->last) { 5380 mas_set_err(mas, -EINVAL); 5381 return NULL; 5382 } 5383 5384 #endif 5385 5386 /* 5387 * Storing is the same operation as insert with the added caveat that it 5388 * can overwrite entries. Although this seems simple enough, one may 5389 * want to examine what happens if a single store operation was to 5390 * overwrite multiple entries within a self-balancing B-Tree. 5391 */ 5392 mas_wr_prealloc_setup(&wr_mas); 5393 mas->store_type = mas_wr_store_type(&wr_mas); 5394 if (mas->mas_flags & MA_STATE_PREALLOC) { 5395 mas_wr_store_entry(&wr_mas); 5396 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5397 return wr_mas.content; 5398 } 5399 5400 request = mas_prealloc_calc(mas, entry); 5401 if (!request) 5402 goto store; 5403 5404 mas_node_count(mas, request); 5405 if (mas_is_err(mas)) 5406 return NULL; 5407 5408 store: 5409 mas_wr_store_entry(&wr_mas); 5410 mas_destroy(mas); 5411 return wr_mas.content; 5412 } 5413 EXPORT_SYMBOL_GPL(mas_store); 5414 5415 /** 5416 * mas_store_gfp() - Store a value into the tree. 5417 * @mas: The maple state 5418 * @entry: The entry to store 5419 * @gfp: The GFP_FLAGS to use for allocations if necessary. 5420 * 5421 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5422 * be allocated. 5423 */ 5424 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp) 5425 { 5426 unsigned long index = mas->index; 5427 unsigned long last = mas->last; 5428 MA_WR_STATE(wr_mas, mas, entry); 5429 int ret = 0; 5430 5431 retry: 5432 mas_wr_preallocate(&wr_mas, entry); 5433 if (unlikely(mas_nomem(mas, gfp))) { 5434 if (!entry) 5435 __mas_set_range(mas, index, last); 5436 goto retry; 5437 } 5438 5439 if (mas_is_err(mas)) { 5440 ret = xa_err(mas->node); 5441 goto out; 5442 } 5443 5444 mas_wr_store_entry(&wr_mas); 5445 out: 5446 mas_destroy(mas); 5447 return ret; 5448 } 5449 EXPORT_SYMBOL_GPL(mas_store_gfp); 5450 5451 /** 5452 * mas_store_prealloc() - Store a value into the tree using memory 5453 * preallocated in the maple state. 5454 * @mas: The maple state 5455 * @entry: The entry to store. 5456 */ 5457 void mas_store_prealloc(struct ma_state *mas, void *entry) 5458 { 5459 MA_WR_STATE(wr_mas, mas, entry); 5460 5461 if (mas->store_type == wr_store_root) { 5462 mas_wr_prealloc_setup(&wr_mas); 5463 goto store; 5464 } 5465 5466 mas_wr_walk_descend(&wr_mas); 5467 if (mas->store_type != wr_spanning_store) { 5468 /* set wr_mas->content to current slot */ 5469 wr_mas.content = mas_slot_locked(mas, wr_mas.slots, mas->offset); 5470 mas_wr_end_piv(&wr_mas); 5471 } 5472 5473 store: 5474 trace_ma_write(__func__, mas, 0, entry); 5475 mas_wr_store_entry(&wr_mas); 5476 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5477 mas_destroy(mas); 5478 } 5479 EXPORT_SYMBOL_GPL(mas_store_prealloc); 5480 5481 /** 5482 * mas_preallocate() - Preallocate enough nodes for a store operation 5483 * @mas: The maple state 5484 * @entry: The entry that will be stored 5485 * @gfp: The GFP_FLAGS to use for allocations. 5486 * 5487 * Return: 0 on success, -ENOMEM if memory could not be allocated. 5488 */ 5489 int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) 5490 { 5491 MA_WR_STATE(wr_mas, mas, entry); 5492 int ret = 0; 5493 int request; 5494 5495 mas_wr_prealloc_setup(&wr_mas); 5496 mas->store_type = mas_wr_store_type(&wr_mas); 5497 request = mas_prealloc_calc(mas, entry); 5498 if (!request) 5499 return ret; 5500 5501 mas_node_count_gfp(mas, request, gfp); 5502 if (mas_is_err(mas)) { 5503 mas_set_alloc_req(mas, 0); 5504 ret = xa_err(mas->node); 5505 mas_destroy(mas); 5506 mas_reset(mas); 5507 return ret; 5508 } 5509 5510 mas->mas_flags |= MA_STATE_PREALLOC; 5511 return ret; 5512 } 5513 EXPORT_SYMBOL_GPL(mas_preallocate); 5514 5515 /* 5516 * mas_destroy() - destroy a maple state. 5517 * @mas: The maple state 5518 * 5519 * Upon completion, check the left-most node and rebalance against the node to 5520 * the right if necessary. Frees any allocated nodes associated with this maple 5521 * state. 5522 */ 5523 void mas_destroy(struct ma_state *mas) 5524 { 5525 struct maple_alloc *node; 5526 unsigned long total; 5527 5528 /* 5529 * When using mas_for_each() to insert an expected number of elements, 5530 * it is possible that the number inserted is less than the expected 5531 * number. To fix an invalid final node, a check is performed here to 5532 * rebalance the previous node with the final node. 5533 */ 5534 if (mas->mas_flags & MA_STATE_REBALANCE) { 5535 unsigned char end; 5536 if (mas_is_err(mas)) 5537 mas_reset(mas); 5538 mas_start(mas); 5539 mtree_range_walk(mas); 5540 end = mas->end + 1; 5541 if (end < mt_min_slot_count(mas->node) - 1) 5542 mas_destroy_rebalance(mas, end); 5543 5544 mas->mas_flags &= ~MA_STATE_REBALANCE; 5545 } 5546 mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC); 5547 5548 total = mas_allocated(mas); 5549 while (total) { 5550 node = mas->alloc; 5551 mas->alloc = node->slot[0]; 5552 if (node->node_count > 1) { 5553 size_t count = node->node_count - 1; 5554 5555 mt_free_bulk(count, (void __rcu **)&node->slot[1]); 5556 total -= count; 5557 } 5558 mt_free_one(ma_mnode_ptr(node)); 5559 total--; 5560 } 5561 5562 mas->alloc = NULL; 5563 } 5564 EXPORT_SYMBOL_GPL(mas_destroy); 5565 5566 /* 5567 * mas_expected_entries() - Set the expected number of entries that will be inserted. 5568 * @mas: The maple state 5569 * @nr_entries: The number of expected entries. 5570 * 5571 * This will attempt to pre-allocate enough nodes to store the expected number 5572 * of entries. The allocations will occur using the bulk allocator interface 5573 * for speed. Please call mas_destroy() on the @mas after inserting the entries 5574 * to ensure any unused nodes are freed. 5575 * 5576 * Return: 0 on success, -ENOMEM if memory could not be allocated. 5577 */ 5578 int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries) 5579 { 5580 int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2; 5581 struct maple_enode *enode = mas->node; 5582 int nr_nodes; 5583 int ret; 5584 5585 /* 5586 * Sometimes it is necessary to duplicate a tree to a new tree, such as 5587 * forking a process and duplicating the VMAs from one tree to a new 5588 * tree. When such a situation arises, it is known that the new tree is 5589 * not going to be used until the entire tree is populated. For 5590 * performance reasons, it is best to use a bulk load with RCU disabled. 5591 * This allows for optimistic splitting that favours the left and reuse 5592 * of nodes during the operation. 5593 */ 5594 5595 /* Optimize splitting for bulk insert in-order */ 5596 mas->mas_flags |= MA_STATE_BULK; 5597 5598 /* 5599 * Avoid overflow, assume a gap between each entry and a trailing null. 5600 * If this is wrong, it just means allocation can happen during 5601 * insertion of entries. 5602 */ 5603 nr_nodes = max(nr_entries, nr_entries * 2 + 1); 5604 if (!mt_is_alloc(mas->tree)) 5605 nonleaf_cap = MAPLE_RANGE64_SLOTS - 2; 5606 5607 /* Leaves; reduce slots to keep space for expansion */ 5608 nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2); 5609 /* Internal nodes */ 5610 nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap); 5611 /* Add working room for split (2 nodes) + new parents */ 5612 mas_node_count_gfp(mas, nr_nodes + 3, GFP_KERNEL); 5613 5614 /* Detect if allocations run out */ 5615 mas->mas_flags |= MA_STATE_PREALLOC; 5616 5617 if (!mas_is_err(mas)) 5618 return 0; 5619 5620 ret = xa_err(mas->node); 5621 mas->node = enode; 5622 mas_destroy(mas); 5623 return ret; 5624 5625 } 5626 EXPORT_SYMBOL_GPL(mas_expected_entries); 5627 5628 static bool mas_next_setup(struct ma_state *mas, unsigned long max, 5629 void **entry) 5630 { 5631 bool was_none = mas_is_none(mas); 5632 5633 if (unlikely(mas->last >= max)) { 5634 mas->status = ma_overflow; 5635 return true; 5636 } 5637 5638 switch (mas->status) { 5639 case ma_active: 5640 return false; 5641 case ma_none: 5642 fallthrough; 5643 case ma_pause: 5644 mas->status = ma_start; 5645 fallthrough; 5646 case ma_start: 5647 mas_walk(mas); /* Retries on dead nodes handled by mas_walk */ 5648 break; 5649 case ma_overflow: 5650 /* Overflowed before, but the max changed */ 5651 mas->status = ma_active; 5652 break; 5653 case ma_underflow: 5654 /* The user expects the mas to be one before where it is */ 5655 mas->status = ma_active; 5656 *entry = mas_walk(mas); 5657 if (*entry) 5658 return true; 5659 break; 5660 case ma_root: 5661 break; 5662 case ma_error: 5663 return true; 5664 } 5665 5666 if (likely(mas_is_active(mas))) /* Fast path */ 5667 return false; 5668 5669 if (mas_is_ptr(mas)) { 5670 *entry = NULL; 5671 if (was_none && mas->index == 0) { 5672 mas->index = mas->last = 0; 5673 return true; 5674 } 5675 mas->index = 1; 5676 mas->last = ULONG_MAX; 5677 mas->status = ma_none; 5678 return true; 5679 } 5680 5681 if (mas_is_none(mas)) 5682 return true; 5683 5684 return false; 5685 } 5686 5687 /** 5688 * mas_next() - Get the next entry. 5689 * @mas: The maple state 5690 * @max: The maximum index to check. 5691 * 5692 * Returns the next entry after @mas->index. 5693 * Must hold rcu_read_lock or the write lock. 5694 * Can return the zero entry. 5695 * 5696 * Return: The next entry or %NULL 5697 */ 5698 void *mas_next(struct ma_state *mas, unsigned long max) 5699 { 5700 void *entry = NULL; 5701 5702 if (mas_next_setup(mas, max, &entry)) 5703 return entry; 5704 5705 /* Retries on dead nodes handled by mas_next_slot */ 5706 return mas_next_slot(mas, max, false); 5707 } 5708 EXPORT_SYMBOL_GPL(mas_next); 5709 5710 /** 5711 * mas_next_range() - Advance the maple state to the next range 5712 * @mas: The maple state 5713 * @max: The maximum index to check. 5714 * 5715 * Sets @mas->index and @mas->last to the range. 5716 * Must hold rcu_read_lock or the write lock. 5717 * Can return the zero entry. 5718 * 5719 * Return: The next entry or %NULL 5720 */ 5721 void *mas_next_range(struct ma_state *mas, unsigned long max) 5722 { 5723 void *entry = NULL; 5724 5725 if (mas_next_setup(mas, max, &entry)) 5726 return entry; 5727 5728 /* Retries on dead nodes handled by mas_next_slot */ 5729 return mas_next_slot(mas, max, true); 5730 } 5731 EXPORT_SYMBOL_GPL(mas_next_range); 5732 5733 /** 5734 * mt_next() - get the next value in the maple tree 5735 * @mt: The maple tree 5736 * @index: The start index 5737 * @max: The maximum index to check 5738 * 5739 * Takes RCU read lock internally to protect the search, which does not 5740 * protect the returned pointer after dropping RCU read lock. 5741 * See also: Documentation/core-api/maple_tree.rst 5742 * 5743 * Return: The entry higher than @index or %NULL if nothing is found. 5744 */ 5745 void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max) 5746 { 5747 void *entry = NULL; 5748 MA_STATE(mas, mt, index, index); 5749 5750 rcu_read_lock(); 5751 entry = mas_next(&mas, max); 5752 rcu_read_unlock(); 5753 return entry; 5754 } 5755 EXPORT_SYMBOL_GPL(mt_next); 5756 5757 static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry) 5758 { 5759 if (unlikely(mas->index <= min)) { 5760 mas->status = ma_underflow; 5761 return true; 5762 } 5763 5764 switch (mas->status) { 5765 case ma_active: 5766 return false; 5767 case ma_start: 5768 break; 5769 case ma_none: 5770 fallthrough; 5771 case ma_pause: 5772 mas->status = ma_start; 5773 break; 5774 case ma_underflow: 5775 /* underflowed before but the min changed */ 5776 mas->status = ma_active; 5777 break; 5778 case ma_overflow: 5779 /* User expects mas to be one after where it is */ 5780 mas->status = ma_active; 5781 *entry = mas_walk(mas); 5782 if (*entry) 5783 return true; 5784 break; 5785 case ma_root: 5786 break; 5787 case ma_error: 5788 return true; 5789 } 5790 5791 if (mas_is_start(mas)) 5792 mas_walk(mas); 5793 5794 if (unlikely(mas_is_ptr(mas))) { 5795 if (!mas->index) { 5796 mas->status = ma_none; 5797 return true; 5798 } 5799 mas->index = mas->last = 0; 5800 *entry = mas_root(mas); 5801 return true; 5802 } 5803 5804 if (mas_is_none(mas)) { 5805 if (mas->index) { 5806 /* Walked to out-of-range pointer? */ 5807 mas->index = mas->last = 0; 5808 mas->status = ma_root; 5809 *entry = mas_root(mas); 5810 return true; 5811 } 5812 return true; 5813 } 5814 5815 return false; 5816 } 5817 5818 /** 5819 * mas_prev() - Get the previous entry 5820 * @mas: The maple state 5821 * @min: The minimum value to check. 5822 * 5823 * Must hold rcu_read_lock or the write lock. 5824 * Will reset mas to ma_start if the status is ma_none. Will stop on not 5825 * searchable nodes. 5826 * 5827 * Return: the previous value or %NULL. 5828 */ 5829 void *mas_prev(struct ma_state *mas, unsigned long min) 5830 { 5831 void *entry = NULL; 5832 5833 if (mas_prev_setup(mas, min, &entry)) 5834 return entry; 5835 5836 return mas_prev_slot(mas, min, false); 5837 } 5838 EXPORT_SYMBOL_GPL(mas_prev); 5839 5840 /** 5841 * mas_prev_range() - Advance to the previous range 5842 * @mas: The maple state 5843 * @min: The minimum value to check. 5844 * 5845 * Sets @mas->index and @mas->last to the range. 5846 * Must hold rcu_read_lock or the write lock. 5847 * Will reset mas to ma_start if the node is ma_none. Will stop on not 5848 * searchable nodes. 5849 * 5850 * Return: the previous value or %NULL. 5851 */ 5852 void *mas_prev_range(struct ma_state *mas, unsigned long min) 5853 { 5854 void *entry = NULL; 5855 5856 if (mas_prev_setup(mas, min, &entry)) 5857 return entry; 5858 5859 return mas_prev_slot(mas, min, true); 5860 } 5861 EXPORT_SYMBOL_GPL(mas_prev_range); 5862 5863 /** 5864 * mt_prev() - get the previous value in the maple tree 5865 * @mt: The maple tree 5866 * @index: The start index 5867 * @min: The minimum index to check 5868 * 5869 * Takes RCU read lock internally to protect the search, which does not 5870 * protect the returned pointer after dropping RCU read lock. 5871 * See also: Documentation/core-api/maple_tree.rst 5872 * 5873 * Return: The entry before @index or %NULL if nothing is found. 5874 */ 5875 void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min) 5876 { 5877 void *entry = NULL; 5878 MA_STATE(mas, mt, index, index); 5879 5880 rcu_read_lock(); 5881 entry = mas_prev(&mas, min); 5882 rcu_read_unlock(); 5883 return entry; 5884 } 5885 EXPORT_SYMBOL_GPL(mt_prev); 5886 5887 /** 5888 * mas_pause() - Pause a mas_find/mas_for_each to drop the lock. 5889 * @mas: The maple state to pause 5890 * 5891 * Some users need to pause a walk and drop the lock they're holding in 5892 * order to yield to a higher priority thread or carry out an operation 5893 * on an entry. Those users should call this function before they drop 5894 * the lock. It resets the @mas to be suitable for the next iteration 5895 * of the loop after the user has reacquired the lock. If most entries 5896 * found during a walk require you to call mas_pause(), the mt_for_each() 5897 * iterator may be more appropriate. 5898 * 5899 */ 5900 void mas_pause(struct ma_state *mas) 5901 { 5902 mas->status = ma_pause; 5903 mas->node = NULL; 5904 } 5905 EXPORT_SYMBOL_GPL(mas_pause); 5906 5907 /** 5908 * mas_find_setup() - Internal function to set up mas_find*(). 5909 * @mas: The maple state 5910 * @max: The maximum index 5911 * @entry: Pointer to the entry 5912 * 5913 * Returns: True if entry is the answer, false otherwise. 5914 */ 5915 static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long max, void **entry) 5916 { 5917 switch (mas->status) { 5918 case ma_active: 5919 if (mas->last < max) 5920 return false; 5921 return true; 5922 case ma_start: 5923 break; 5924 case ma_pause: 5925 if (unlikely(mas->last >= max)) 5926 return true; 5927 5928 mas->index = ++mas->last; 5929 mas->status = ma_start; 5930 break; 5931 case ma_none: 5932 if (unlikely(mas->last >= max)) 5933 return true; 5934 5935 mas->index = mas->last; 5936 mas->status = ma_start; 5937 break; 5938 case ma_underflow: 5939 /* mas is pointing at entry before unable to go lower */ 5940 if (unlikely(mas->index >= max)) { 5941 mas->status = ma_overflow; 5942 return true; 5943 } 5944 5945 mas->status = ma_active; 5946 *entry = mas_walk(mas); 5947 if (*entry) 5948 return true; 5949 break; 5950 case ma_overflow: 5951 if (unlikely(mas->last >= max)) 5952 return true; 5953 5954 mas->status = ma_active; 5955 *entry = mas_walk(mas); 5956 if (*entry) 5957 return true; 5958 break; 5959 case ma_root: 5960 break; 5961 case ma_error: 5962 return true; 5963 } 5964 5965 if (mas_is_start(mas)) { 5966 /* First run or continue */ 5967 if (mas->index > max) 5968 return true; 5969 5970 *entry = mas_walk(mas); 5971 if (*entry) 5972 return true; 5973 5974 } 5975 5976 if (unlikely(mas_is_ptr(mas))) 5977 goto ptr_out_of_range; 5978 5979 if (unlikely(mas_is_none(mas))) 5980 return true; 5981 5982 if (mas->index == max) 5983 return true; 5984 5985 return false; 5986 5987 ptr_out_of_range: 5988 mas->status = ma_none; 5989 mas->index = 1; 5990 mas->last = ULONG_MAX; 5991 return true; 5992 } 5993 5994 /** 5995 * mas_find() - On the first call, find the entry at or after mas->index up to 5996 * %max. Otherwise, find the entry after mas->index. 5997 * @mas: The maple state 5998 * @max: The maximum value to check. 5999 * 6000 * Must hold rcu_read_lock or the write lock. 6001 * If an entry exists, last and index are updated accordingly. 6002 * May set @mas->status to ma_overflow. 6003 * 6004 * Return: The entry or %NULL. 6005 */ 6006 void *mas_find(struct ma_state *mas, unsigned long max) 6007 { 6008 void *entry = NULL; 6009 6010 if (mas_find_setup(mas, max, &entry)) 6011 return entry; 6012 6013 /* Retries on dead nodes handled by mas_next_slot */ 6014 entry = mas_next_slot(mas, max, false); 6015 /* Ignore overflow */ 6016 mas->status = ma_active; 6017 return entry; 6018 } 6019 EXPORT_SYMBOL_GPL(mas_find); 6020 6021 /** 6022 * mas_find_range() - On the first call, find the entry at or after 6023 * mas->index up to %max. Otherwise, advance to the next slot mas->index. 6024 * @mas: The maple state 6025 * @max: The maximum value to check. 6026 * 6027 * Must hold rcu_read_lock or the write lock. 6028 * If an entry exists, last and index are updated accordingly. 6029 * May set @mas->status to ma_overflow. 6030 * 6031 * Return: The entry or %NULL. 6032 */ 6033 void *mas_find_range(struct ma_state *mas, unsigned long max) 6034 { 6035 void *entry = NULL; 6036 6037 if (mas_find_setup(mas, max, &entry)) 6038 return entry; 6039 6040 /* Retries on dead nodes handled by mas_next_slot */ 6041 return mas_next_slot(mas, max, true); 6042 } 6043 EXPORT_SYMBOL_GPL(mas_find_range); 6044 6045 /** 6046 * mas_find_rev_setup() - Internal function to set up mas_find_*_rev() 6047 * @mas: The maple state 6048 * @min: The minimum index 6049 * @entry: Pointer to the entry 6050 * 6051 * Returns: True if entry is the answer, false otherwise. 6052 */ 6053 static bool mas_find_rev_setup(struct ma_state *mas, unsigned long min, 6054 void **entry) 6055 { 6056 6057 switch (mas->status) { 6058 case ma_active: 6059 goto active; 6060 case ma_start: 6061 break; 6062 case ma_pause: 6063 if (unlikely(mas->index <= min)) { 6064 mas->status = ma_underflow; 6065 return true; 6066 } 6067 mas->last = --mas->index; 6068 mas->status = ma_start; 6069 break; 6070 case ma_none: 6071 if (mas->index <= min) 6072 goto none; 6073 6074 mas->last = mas->index; 6075 mas->status = ma_start; 6076 break; 6077 case ma_overflow: /* user expects the mas to be one after where it is */ 6078 if (unlikely(mas->index <= min)) { 6079 mas->status = ma_underflow; 6080 return true; 6081 } 6082 6083 mas->status = ma_active; 6084 break; 6085 case ma_underflow: /* user expects the mas to be one before where it is */ 6086 if (unlikely(mas->index <= min)) 6087 return true; 6088 6089 mas->status = ma_active; 6090 break; 6091 case ma_root: 6092 break; 6093 case ma_error: 6094 return true; 6095 } 6096 6097 if (mas_is_start(mas)) { 6098 /* First run or continue */ 6099 if (mas->index < min) 6100 return true; 6101 6102 *entry = mas_walk(mas); 6103 if (*entry) 6104 return true; 6105 } 6106 6107 if (unlikely(mas_is_ptr(mas))) 6108 goto none; 6109 6110 if (unlikely(mas_is_none(mas))) { 6111 /* 6112 * Walked to the location, and there was nothing so the previous 6113 * location is 0. 6114 */ 6115 mas->last = mas->index = 0; 6116 mas->status = ma_root; 6117 *entry = mas_root(mas); 6118 return true; 6119 } 6120 6121 active: 6122 if (mas->index < min) 6123 return true; 6124 6125 return false; 6126 6127 none: 6128 mas->status = ma_none; 6129 return true; 6130 } 6131 6132 /** 6133 * mas_find_rev: On the first call, find the first non-null entry at or below 6134 * mas->index down to %min. Otherwise find the first non-null entry below 6135 * mas->index down to %min. 6136 * @mas: The maple state 6137 * @min: The minimum value to check. 6138 * 6139 * Must hold rcu_read_lock or the write lock. 6140 * If an entry exists, last and index are updated accordingly. 6141 * May set @mas->status to ma_underflow. 6142 * 6143 * Return: The entry or %NULL. 6144 */ 6145 void *mas_find_rev(struct ma_state *mas, unsigned long min) 6146 { 6147 void *entry = NULL; 6148 6149 if (mas_find_rev_setup(mas, min, &entry)) 6150 return entry; 6151 6152 /* Retries on dead nodes handled by mas_prev_slot */ 6153 return mas_prev_slot(mas, min, false); 6154 6155 } 6156 EXPORT_SYMBOL_GPL(mas_find_rev); 6157 6158 /** 6159 * mas_find_range_rev: On the first call, find the first non-null entry at or 6160 * below mas->index down to %min. Otherwise advance to the previous slot after 6161 * mas->index down to %min. 6162 * @mas: The maple state 6163 * @min: The minimum value to check. 6164 * 6165 * Must hold rcu_read_lock or the write lock. 6166 * If an entry exists, last and index are updated accordingly. 6167 * May set @mas->status to ma_underflow. 6168 * 6169 * Return: The entry or %NULL. 6170 */ 6171 void *mas_find_range_rev(struct ma_state *mas, unsigned long min) 6172 { 6173 void *entry = NULL; 6174 6175 if (mas_find_rev_setup(mas, min, &entry)) 6176 return entry; 6177 6178 /* Retries on dead nodes handled by mas_prev_slot */ 6179 return mas_prev_slot(mas, min, true); 6180 } 6181 EXPORT_SYMBOL_GPL(mas_find_range_rev); 6182 6183 /** 6184 * mas_erase() - Find the range in which index resides and erase the entire 6185 * range. 6186 * @mas: The maple state 6187 * 6188 * Must hold the write lock. 6189 * Searches for @mas->index, sets @mas->index and @mas->last to the range and 6190 * erases that range. 6191 * 6192 * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated. 6193 */ 6194 void *mas_erase(struct ma_state *mas) 6195 { 6196 void *entry; 6197 unsigned long index = mas->index; 6198 MA_WR_STATE(wr_mas, mas, NULL); 6199 6200 if (!mas_is_active(mas) || !mas_is_start(mas)) 6201 mas->status = ma_start; 6202 6203 write_retry: 6204 entry = mas_state_walk(mas); 6205 if (!entry) 6206 return NULL; 6207 6208 /* Must reset to ensure spanning writes of last slot are detected */ 6209 mas_reset(mas); 6210 mas_wr_preallocate(&wr_mas, NULL); 6211 if (mas_nomem(mas, GFP_KERNEL)) { 6212 /* in case the range of entry changed when unlocked */ 6213 mas->index = mas->last = index; 6214 goto write_retry; 6215 } 6216 6217 if (mas_is_err(mas)) 6218 goto out; 6219 6220 mas_wr_store_entry(&wr_mas); 6221 out: 6222 mas_destroy(mas); 6223 return entry; 6224 } 6225 EXPORT_SYMBOL_GPL(mas_erase); 6226 6227 /** 6228 * mas_nomem() - Check if there was an error allocating and do the allocation 6229 * if necessary If there are allocations, then free them. 6230 * @mas: The maple state 6231 * @gfp: The GFP_FLAGS to use for allocations 6232 * Return: true on allocation, false otherwise. 6233 */ 6234 bool mas_nomem(struct ma_state *mas, gfp_t gfp) 6235 __must_hold(mas->tree->ma_lock) 6236 { 6237 if (likely(mas->node != MA_ERROR(-ENOMEM))) 6238 return false; 6239 6240 if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) { 6241 mtree_unlock(mas->tree); 6242 mas_alloc_nodes(mas, gfp); 6243 mtree_lock(mas->tree); 6244 } else { 6245 mas_alloc_nodes(mas, gfp); 6246 } 6247 6248 if (!mas_allocated(mas)) 6249 return false; 6250 6251 mas->status = ma_start; 6252 return true; 6253 } 6254 6255 void __init maple_tree_init(void) 6256 { 6257 maple_node_cache = kmem_cache_create("maple_node", 6258 sizeof(struct maple_node), sizeof(struct maple_node), 6259 SLAB_PANIC, NULL); 6260 } 6261 6262 /** 6263 * mtree_load() - Load a value stored in a maple tree 6264 * @mt: The maple tree 6265 * @index: The index to load 6266 * 6267 * Return: the entry or %NULL 6268 */ 6269 void *mtree_load(struct maple_tree *mt, unsigned long index) 6270 { 6271 MA_STATE(mas, mt, index, index); 6272 void *entry; 6273 6274 trace_ma_read(__func__, &mas); 6275 rcu_read_lock(); 6276 retry: 6277 entry = mas_start(&mas); 6278 if (unlikely(mas_is_none(&mas))) 6279 goto unlock; 6280 6281 if (unlikely(mas_is_ptr(&mas))) { 6282 if (index) 6283 entry = NULL; 6284 6285 goto unlock; 6286 } 6287 6288 entry = mtree_lookup_walk(&mas); 6289 if (!entry && unlikely(mas_is_start(&mas))) 6290 goto retry; 6291 unlock: 6292 rcu_read_unlock(); 6293 if (xa_is_zero(entry)) 6294 return NULL; 6295 6296 return entry; 6297 } 6298 EXPORT_SYMBOL(mtree_load); 6299 6300 /** 6301 * mtree_store_range() - Store an entry at a given range. 6302 * @mt: The maple tree 6303 * @index: The start of the range 6304 * @last: The end of the range 6305 * @entry: The entry to store 6306 * @gfp: The GFP_FLAGS to use for allocations 6307 * 6308 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 6309 * be allocated. 6310 */ 6311 int mtree_store_range(struct maple_tree *mt, unsigned long index, 6312 unsigned long last, void *entry, gfp_t gfp) 6313 { 6314 MA_STATE(mas, mt, index, last); 6315 int ret = 0; 6316 6317 trace_ma_write(__func__, &mas, 0, entry); 6318 if (WARN_ON_ONCE(xa_is_advanced(entry))) 6319 return -EINVAL; 6320 6321 if (index > last) 6322 return -EINVAL; 6323 6324 mtree_lock(mt); 6325 ret = mas_store_gfp(&mas, entry, gfp); 6326 mtree_unlock(mt); 6327 6328 return ret; 6329 } 6330 EXPORT_SYMBOL(mtree_store_range); 6331 6332 /** 6333 * mtree_store() - Store an entry at a given index. 6334 * @mt: The maple tree 6335 * @index: The index to store the value 6336 * @entry: The entry to store 6337 * @gfp: The GFP_FLAGS to use for allocations 6338 * 6339 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 6340 * be allocated. 6341 */ 6342 int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, 6343 gfp_t gfp) 6344 { 6345 return mtree_store_range(mt, index, index, entry, gfp); 6346 } 6347 EXPORT_SYMBOL(mtree_store); 6348 6349 /** 6350 * mtree_insert_range() - Insert an entry at a given range if there is no value. 6351 * @mt: The maple tree 6352 * @first: The start of the range 6353 * @last: The end of the range 6354 * @entry: The entry to store 6355 * @gfp: The GFP_FLAGS to use for allocations. 6356 * 6357 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 6358 * request, -ENOMEM if memory could not be allocated. 6359 */ 6360 int mtree_insert_range(struct maple_tree *mt, unsigned long first, 6361 unsigned long last, void *entry, gfp_t gfp) 6362 { 6363 MA_STATE(ms, mt, first, last); 6364 int ret = 0; 6365 6366 if (WARN_ON_ONCE(xa_is_advanced(entry))) 6367 return -EINVAL; 6368 6369 if (first > last) 6370 return -EINVAL; 6371 6372 mtree_lock(mt); 6373 retry: 6374 mas_insert(&ms, entry); 6375 if (mas_nomem(&ms, gfp)) 6376 goto retry; 6377 6378 mtree_unlock(mt); 6379 if (mas_is_err(&ms)) 6380 ret = xa_err(ms.node); 6381 6382 mas_destroy(&ms); 6383 return ret; 6384 } 6385 EXPORT_SYMBOL(mtree_insert_range); 6386 6387 /** 6388 * mtree_insert() - Insert an entry at a given index if there is no value. 6389 * @mt: The maple tree 6390 * @index : The index to store the value 6391 * @entry: The entry to store 6392 * @gfp: The GFP_FLAGS to use for allocations. 6393 * 6394 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 6395 * request, -ENOMEM if memory could not be allocated. 6396 */ 6397 int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry, 6398 gfp_t gfp) 6399 { 6400 return mtree_insert_range(mt, index, index, entry, gfp); 6401 } 6402 EXPORT_SYMBOL(mtree_insert); 6403 6404 int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, 6405 void *entry, unsigned long size, unsigned long min, 6406 unsigned long max, gfp_t gfp) 6407 { 6408 int ret = 0; 6409 6410 MA_STATE(mas, mt, 0, 0); 6411 if (!mt_is_alloc(mt)) 6412 return -EINVAL; 6413 6414 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6415 return -EINVAL; 6416 6417 mtree_lock(mt); 6418 retry: 6419 ret = mas_empty_area(&mas, min, max, size); 6420 if (ret) 6421 goto unlock; 6422 6423 mas_insert(&mas, entry); 6424 /* 6425 * mas_nomem() may release the lock, causing the allocated area 6426 * to be unavailable, so try to allocate a free area again. 6427 */ 6428 if (mas_nomem(&mas, gfp)) 6429 goto retry; 6430 6431 if (mas_is_err(&mas)) 6432 ret = xa_err(mas.node); 6433 else 6434 *startp = mas.index; 6435 6436 unlock: 6437 mtree_unlock(mt); 6438 mas_destroy(&mas); 6439 return ret; 6440 } 6441 EXPORT_SYMBOL(mtree_alloc_range); 6442 6443 /** 6444 * mtree_alloc_cyclic() - Find somewhere to store this entry in the tree. 6445 * @mt: The maple tree. 6446 * @startp: Pointer to ID. 6447 * @range_lo: Lower bound of range to search. 6448 * @range_hi: Upper bound of range to search. 6449 * @entry: The entry to store. 6450 * @next: Pointer to next ID to allocate. 6451 * @gfp: The GFP_FLAGS to use for allocations. 6452 * 6453 * Finds an empty entry in @mt after @next, stores the new index into 6454 * the @id pointer, stores the entry at that index, then updates @next. 6455 * 6456 * @mt must be initialized with the MT_FLAGS_ALLOC_RANGE flag. 6457 * 6458 * Context: Any context. Takes and releases the mt.lock. May sleep if 6459 * the @gfp flags permit. 6460 * 6461 * Return: 0 if the allocation succeeded without wrapping, 1 if the 6462 * allocation succeeded after wrapping, -ENOMEM if memory could not be 6463 * allocated, -EINVAL if @mt cannot be used, or -EBUSY if there are no 6464 * free entries. 6465 */ 6466 int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp, 6467 void *entry, unsigned long range_lo, unsigned long range_hi, 6468 unsigned long *next, gfp_t gfp) 6469 { 6470 int ret; 6471 6472 MA_STATE(mas, mt, 0, 0); 6473 6474 if (!mt_is_alloc(mt)) 6475 return -EINVAL; 6476 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6477 return -EINVAL; 6478 mtree_lock(mt); 6479 ret = mas_alloc_cyclic(&mas, startp, entry, range_lo, range_hi, 6480 next, gfp); 6481 mtree_unlock(mt); 6482 return ret; 6483 } 6484 EXPORT_SYMBOL(mtree_alloc_cyclic); 6485 6486 int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, 6487 void *entry, unsigned long size, unsigned long min, 6488 unsigned long max, gfp_t gfp) 6489 { 6490 int ret = 0; 6491 6492 MA_STATE(mas, mt, 0, 0); 6493 if (!mt_is_alloc(mt)) 6494 return -EINVAL; 6495 6496 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6497 return -EINVAL; 6498 6499 mtree_lock(mt); 6500 retry: 6501 ret = mas_empty_area_rev(&mas, min, max, size); 6502 if (ret) 6503 goto unlock; 6504 6505 mas_insert(&mas, entry); 6506 /* 6507 * mas_nomem() may release the lock, causing the allocated area 6508 * to be unavailable, so try to allocate a free area again. 6509 */ 6510 if (mas_nomem(&mas, gfp)) 6511 goto retry; 6512 6513 if (mas_is_err(&mas)) 6514 ret = xa_err(mas.node); 6515 else 6516 *startp = mas.index; 6517 6518 unlock: 6519 mtree_unlock(mt); 6520 mas_destroy(&mas); 6521 return ret; 6522 } 6523 EXPORT_SYMBOL(mtree_alloc_rrange); 6524 6525 /** 6526 * mtree_erase() - Find an index and erase the entire range. 6527 * @mt: The maple tree 6528 * @index: The index to erase 6529 * 6530 * Erasing is the same as a walk to an entry then a store of a NULL to that 6531 * ENTIRE range. In fact, it is implemented as such using the advanced API. 6532 * 6533 * Return: The entry stored at the @index or %NULL 6534 */ 6535 void *mtree_erase(struct maple_tree *mt, unsigned long index) 6536 { 6537 void *entry = NULL; 6538 6539 MA_STATE(mas, mt, index, index); 6540 trace_ma_op(__func__, &mas); 6541 6542 mtree_lock(mt); 6543 entry = mas_erase(&mas); 6544 mtree_unlock(mt); 6545 6546 return entry; 6547 } 6548 EXPORT_SYMBOL(mtree_erase); 6549 6550 /* 6551 * mas_dup_free() - Free an incomplete duplication of a tree. 6552 * @mas: The maple state of a incomplete tree. 6553 * 6554 * The parameter @mas->node passed in indicates that the allocation failed on 6555 * this node. This function frees all nodes starting from @mas->node in the 6556 * reverse order of mas_dup_build(). There is no need to hold the source tree 6557 * lock at this time. 6558 */ 6559 static void mas_dup_free(struct ma_state *mas) 6560 { 6561 struct maple_node *node; 6562 enum maple_type type; 6563 void __rcu **slots; 6564 unsigned char count, i; 6565 6566 /* Maybe the first node allocation failed. */ 6567 if (mas_is_none(mas)) 6568 return; 6569 6570 while (!mte_is_root(mas->node)) { 6571 mas_ascend(mas); 6572 if (mas->offset) { 6573 mas->offset--; 6574 do { 6575 mas_descend(mas); 6576 mas->offset = mas_data_end(mas); 6577 } while (!mte_is_leaf(mas->node)); 6578 6579 mas_ascend(mas); 6580 } 6581 6582 node = mte_to_node(mas->node); 6583 type = mte_node_type(mas->node); 6584 slots = ma_slots(node, type); 6585 count = mas_data_end(mas) + 1; 6586 for (i = 0; i < count; i++) 6587 ((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK; 6588 mt_free_bulk(count, slots); 6589 } 6590 6591 node = mte_to_node(mas->node); 6592 mt_free_one(node); 6593 } 6594 6595 /* 6596 * mas_copy_node() - Copy a maple node and replace the parent. 6597 * @mas: The maple state of source tree. 6598 * @new_mas: The maple state of new tree. 6599 * @parent: The parent of the new node. 6600 * 6601 * Copy @mas->node to @new_mas->node, set @parent to be the parent of 6602 * @new_mas->node. If memory allocation fails, @mas is set to -ENOMEM. 6603 */ 6604 static inline void mas_copy_node(struct ma_state *mas, struct ma_state *new_mas, 6605 struct maple_pnode *parent) 6606 { 6607 struct maple_node *node = mte_to_node(mas->node); 6608 struct maple_node *new_node = mte_to_node(new_mas->node); 6609 unsigned long val; 6610 6611 /* Copy the node completely. */ 6612 memcpy(new_node, node, sizeof(struct maple_node)); 6613 /* Update the parent node pointer. */ 6614 val = (unsigned long)node->parent & MAPLE_NODE_MASK; 6615 new_node->parent = ma_parent_ptr(val | (unsigned long)parent); 6616 } 6617 6618 /* 6619 * mas_dup_alloc() - Allocate child nodes for a maple node. 6620 * @mas: The maple state of source tree. 6621 * @new_mas: The maple state of new tree. 6622 * @gfp: The GFP_FLAGS to use for allocations. 6623 * 6624 * This function allocates child nodes for @new_mas->node during the duplication 6625 * process. If memory allocation fails, @mas is set to -ENOMEM. 6626 */ 6627 static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas, 6628 gfp_t gfp) 6629 { 6630 struct maple_node *node = mte_to_node(mas->node); 6631 struct maple_node *new_node = mte_to_node(new_mas->node); 6632 enum maple_type type; 6633 unsigned char request, count, i; 6634 void __rcu **slots; 6635 void __rcu **new_slots; 6636 unsigned long val; 6637 6638 /* Allocate memory for child nodes. */ 6639 type = mte_node_type(mas->node); 6640 new_slots = ma_slots(new_node, type); 6641 request = mas_data_end(mas) + 1; 6642 count = mt_alloc_bulk(gfp, request, (void **)new_slots); 6643 if (unlikely(count < request)) { 6644 memset(new_slots, 0, request * sizeof(void *)); 6645 mas_set_err(mas, -ENOMEM); 6646 return; 6647 } 6648 6649 /* Restore node type information in slots. */ 6650 slots = ma_slots(node, type); 6651 for (i = 0; i < count; i++) { 6652 val = (unsigned long)mt_slot_locked(mas->tree, slots, i); 6653 val &= MAPLE_NODE_MASK; 6654 ((unsigned long *)new_slots)[i] |= val; 6655 } 6656 } 6657 6658 /* 6659 * mas_dup_build() - Build a new maple tree from a source tree 6660 * @mas: The maple state of source tree, need to be in MAS_START state. 6661 * @new_mas: The maple state of new tree, need to be in MAS_START state. 6662 * @gfp: The GFP_FLAGS to use for allocations. 6663 * 6664 * This function builds a new tree in DFS preorder. If the memory allocation 6665 * fails, the error code -ENOMEM will be set in @mas, and @new_mas points to the 6666 * last node. mas_dup_free() will free the incomplete duplication of a tree. 6667 * 6668 * Note that the attributes of the two trees need to be exactly the same, and the 6669 * new tree needs to be empty, otherwise -EINVAL will be set in @mas. 6670 */ 6671 static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas, 6672 gfp_t gfp) 6673 { 6674 struct maple_node *node; 6675 struct maple_pnode *parent = NULL; 6676 struct maple_enode *root; 6677 enum maple_type type; 6678 6679 if (unlikely(mt_attr(mas->tree) != mt_attr(new_mas->tree)) || 6680 unlikely(!mtree_empty(new_mas->tree))) { 6681 mas_set_err(mas, -EINVAL); 6682 return; 6683 } 6684 6685 root = mas_start(mas); 6686 if (mas_is_ptr(mas) || mas_is_none(mas)) 6687 goto set_new_tree; 6688 6689 node = mt_alloc_one(gfp); 6690 if (!node) { 6691 new_mas->status = ma_none; 6692 mas_set_err(mas, -ENOMEM); 6693 return; 6694 } 6695 6696 type = mte_node_type(mas->node); 6697 root = mt_mk_node(node, type); 6698 new_mas->node = root; 6699 new_mas->min = 0; 6700 new_mas->max = ULONG_MAX; 6701 root = mte_mk_root(root); 6702 while (1) { 6703 mas_copy_node(mas, new_mas, parent); 6704 if (!mte_is_leaf(mas->node)) { 6705 /* Only allocate child nodes for non-leaf nodes. */ 6706 mas_dup_alloc(mas, new_mas, gfp); 6707 if (unlikely(mas_is_err(mas))) 6708 return; 6709 } else { 6710 /* 6711 * This is the last leaf node and duplication is 6712 * completed. 6713 */ 6714 if (mas->max == ULONG_MAX) 6715 goto done; 6716 6717 /* This is not the last leaf node and needs to go up. */ 6718 do { 6719 mas_ascend(mas); 6720 mas_ascend(new_mas); 6721 } while (mas->offset == mas_data_end(mas)); 6722 6723 /* Move to the next subtree. */ 6724 mas->offset++; 6725 new_mas->offset++; 6726 } 6727 6728 mas_descend(mas); 6729 parent = ma_parent_ptr(mte_to_node(new_mas->node)); 6730 mas_descend(new_mas); 6731 mas->offset = 0; 6732 new_mas->offset = 0; 6733 } 6734 done: 6735 /* Specially handle the parent of the root node. */ 6736 mte_to_node(root)->parent = ma_parent_ptr(mas_tree_parent(new_mas)); 6737 set_new_tree: 6738 /* Make them the same height */ 6739 new_mas->tree->ma_flags = mas->tree->ma_flags; 6740 rcu_assign_pointer(new_mas->tree->ma_root, root); 6741 } 6742 6743 /** 6744 * __mt_dup(): Duplicate an entire maple tree 6745 * @mt: The source maple tree 6746 * @new: The new maple tree 6747 * @gfp: The GFP_FLAGS to use for allocations 6748 * 6749 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order 6750 * traversal. It uses memcpy() to copy nodes in the source tree and allocate 6751 * new child nodes in non-leaf nodes. The new node is exactly the same as the 6752 * source node except for all the addresses stored in it. It will be faster than 6753 * traversing all elements in the source tree and inserting them one by one into 6754 * the new tree. 6755 * The user needs to ensure that the attributes of the source tree and the new 6756 * tree are the same, and the new tree needs to be an empty tree, otherwise 6757 * -EINVAL will be returned. 6758 * Note that the user needs to manually lock the source tree and the new tree. 6759 * 6760 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If 6761 * the attributes of the two trees are different or the new tree is not an empty 6762 * tree. 6763 */ 6764 int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp) 6765 { 6766 int ret = 0; 6767 MA_STATE(mas, mt, 0, 0); 6768 MA_STATE(new_mas, new, 0, 0); 6769 6770 mas_dup_build(&mas, &new_mas, gfp); 6771 if (unlikely(mas_is_err(&mas))) { 6772 ret = xa_err(mas.node); 6773 if (ret == -ENOMEM) 6774 mas_dup_free(&new_mas); 6775 } 6776 6777 return ret; 6778 } 6779 EXPORT_SYMBOL(__mt_dup); 6780 6781 /** 6782 * mtree_dup(): Duplicate an entire maple tree 6783 * @mt: The source maple tree 6784 * @new: The new maple tree 6785 * @gfp: The GFP_FLAGS to use for allocations 6786 * 6787 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order 6788 * traversal. It uses memcpy() to copy nodes in the source tree and allocate 6789 * new child nodes in non-leaf nodes. The new node is exactly the same as the 6790 * source node except for all the addresses stored in it. It will be faster than 6791 * traversing all elements in the source tree and inserting them one by one into 6792 * the new tree. 6793 * The user needs to ensure that the attributes of the source tree and the new 6794 * tree are the same, and the new tree needs to be an empty tree, otherwise 6795 * -EINVAL will be returned. 6796 * 6797 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If 6798 * the attributes of the two trees are different or the new tree is not an empty 6799 * tree. 6800 */ 6801 int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp) 6802 { 6803 int ret = 0; 6804 MA_STATE(mas, mt, 0, 0); 6805 MA_STATE(new_mas, new, 0, 0); 6806 6807 mas_lock(&new_mas); 6808 mas_lock_nested(&mas, SINGLE_DEPTH_NESTING); 6809 mas_dup_build(&mas, &new_mas, gfp); 6810 mas_unlock(&mas); 6811 if (unlikely(mas_is_err(&mas))) { 6812 ret = xa_err(mas.node); 6813 if (ret == -ENOMEM) 6814 mas_dup_free(&new_mas); 6815 } 6816 6817 mas_unlock(&new_mas); 6818 return ret; 6819 } 6820 EXPORT_SYMBOL(mtree_dup); 6821 6822 /** 6823 * __mt_destroy() - Walk and free all nodes of a locked maple tree. 6824 * @mt: The maple tree 6825 * 6826 * Note: Does not handle locking. 6827 */ 6828 void __mt_destroy(struct maple_tree *mt) 6829 { 6830 void *root = mt_root_locked(mt); 6831 6832 rcu_assign_pointer(mt->ma_root, NULL); 6833 if (xa_is_node(root)) 6834 mte_destroy_walk(root, mt); 6835 6836 mt->ma_flags = mt_attr(mt); 6837 } 6838 EXPORT_SYMBOL_GPL(__mt_destroy); 6839 6840 /** 6841 * mtree_destroy() - Destroy a maple tree 6842 * @mt: The maple tree 6843 * 6844 * Frees all resources used by the tree. Handles locking. 6845 */ 6846 void mtree_destroy(struct maple_tree *mt) 6847 { 6848 mtree_lock(mt); 6849 __mt_destroy(mt); 6850 mtree_unlock(mt); 6851 } 6852 EXPORT_SYMBOL(mtree_destroy); 6853 6854 /** 6855 * mt_find() - Search from the start up until an entry is found. 6856 * @mt: The maple tree 6857 * @index: Pointer which contains the start location of the search 6858 * @max: The maximum value of the search range 6859 * 6860 * Takes RCU read lock internally to protect the search, which does not 6861 * protect the returned pointer after dropping RCU read lock. 6862 * See also: Documentation/core-api/maple_tree.rst 6863 * 6864 * In case that an entry is found @index is updated to point to the next 6865 * possible entry independent whether the found entry is occupying a 6866 * single index or a range if indices. 6867 * 6868 * Return: The entry at or after the @index or %NULL 6869 */ 6870 void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max) 6871 { 6872 MA_STATE(mas, mt, *index, *index); 6873 void *entry; 6874 #ifdef CONFIG_DEBUG_MAPLE_TREE 6875 unsigned long copy = *index; 6876 #endif 6877 6878 trace_ma_read(__func__, &mas); 6879 6880 if ((*index) > max) 6881 return NULL; 6882 6883 rcu_read_lock(); 6884 retry: 6885 entry = mas_state_walk(&mas); 6886 if (mas_is_start(&mas)) 6887 goto retry; 6888 6889 if (unlikely(xa_is_zero(entry))) 6890 entry = NULL; 6891 6892 if (entry) 6893 goto unlock; 6894 6895 while (mas_is_active(&mas) && (mas.last < max)) { 6896 entry = mas_next_slot(&mas, max, false); 6897 if (likely(entry && !xa_is_zero(entry))) 6898 break; 6899 } 6900 6901 if (unlikely(xa_is_zero(entry))) 6902 entry = NULL; 6903 unlock: 6904 rcu_read_unlock(); 6905 if (likely(entry)) { 6906 *index = mas.last + 1; 6907 #ifdef CONFIG_DEBUG_MAPLE_TREE 6908 if (MT_WARN_ON(mt, (*index) && ((*index) <= copy))) 6909 pr_err("index not increased! %lx <= %lx\n", 6910 *index, copy); 6911 #endif 6912 } 6913 6914 return entry; 6915 } 6916 EXPORT_SYMBOL(mt_find); 6917 6918 /** 6919 * mt_find_after() - Search from the start up until an entry is found. 6920 * @mt: The maple tree 6921 * @index: Pointer which contains the start location of the search 6922 * @max: The maximum value to check 6923 * 6924 * Same as mt_find() except that it checks @index for 0 before 6925 * searching. If @index == 0, the search is aborted. This covers a wrap 6926 * around of @index to 0 in an iterator loop. 6927 * 6928 * Return: The entry at or after the @index or %NULL 6929 */ 6930 void *mt_find_after(struct maple_tree *mt, unsigned long *index, 6931 unsigned long max) 6932 { 6933 if (!(*index)) 6934 return NULL; 6935 6936 return mt_find(mt, index, max); 6937 } 6938 EXPORT_SYMBOL(mt_find_after); 6939 6940 #ifdef CONFIG_DEBUG_MAPLE_TREE 6941 atomic_t maple_tree_tests_run; 6942 EXPORT_SYMBOL_GPL(maple_tree_tests_run); 6943 atomic_t maple_tree_tests_passed; 6944 EXPORT_SYMBOL_GPL(maple_tree_tests_passed); 6945 6946 #ifndef __KERNEL__ 6947 extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int); 6948 void mt_set_non_kernel(unsigned int val) 6949 { 6950 kmem_cache_set_non_kernel(maple_node_cache, val); 6951 } 6952 6953 extern void kmem_cache_set_callback(struct kmem_cache *cachep, 6954 void (*callback)(void *)); 6955 void mt_set_callback(void (*callback)(void *)) 6956 { 6957 kmem_cache_set_callback(maple_node_cache, callback); 6958 } 6959 6960 extern void kmem_cache_set_private(struct kmem_cache *cachep, void *private); 6961 void mt_set_private(void *private) 6962 { 6963 kmem_cache_set_private(maple_node_cache, private); 6964 } 6965 6966 extern unsigned long kmem_cache_get_alloc(struct kmem_cache *); 6967 unsigned long mt_get_alloc_size(void) 6968 { 6969 return kmem_cache_get_alloc(maple_node_cache); 6970 } 6971 6972 extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *); 6973 void mt_zero_nr_tallocated(void) 6974 { 6975 kmem_cache_zero_nr_tallocated(maple_node_cache); 6976 } 6977 6978 extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *); 6979 unsigned int mt_nr_tallocated(void) 6980 { 6981 return kmem_cache_nr_tallocated(maple_node_cache); 6982 } 6983 6984 extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *); 6985 unsigned int mt_nr_allocated(void) 6986 { 6987 return kmem_cache_nr_allocated(maple_node_cache); 6988 } 6989 6990 void mt_cache_shrink(void) 6991 { 6992 } 6993 #else 6994 /* 6995 * mt_cache_shrink() - For testing, don't use this. 6996 * 6997 * Certain testcases can trigger an OOM when combined with other memory 6998 * debugging configuration options. This function is used to reduce the 6999 * possibility of an out of memory even due to kmem_cache objects remaining 7000 * around for longer than usual. 7001 */ 7002 void mt_cache_shrink(void) 7003 { 7004 kmem_cache_shrink(maple_node_cache); 7005 7006 } 7007 EXPORT_SYMBOL_GPL(mt_cache_shrink); 7008 7009 #endif /* not defined __KERNEL__ */ 7010 /* 7011 * mas_get_slot() - Get the entry in the maple state node stored at @offset. 7012 * @mas: The maple state 7013 * @offset: The offset into the slot array to fetch. 7014 * 7015 * Return: The entry stored at @offset. 7016 */ 7017 static inline struct maple_enode *mas_get_slot(struct ma_state *mas, 7018 unsigned char offset) 7019 { 7020 return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)), 7021 offset); 7022 } 7023 7024 /* Depth first search, post-order */ 7025 static void mas_dfs_postorder(struct ma_state *mas, unsigned long max) 7026 { 7027 7028 struct maple_enode *p, *mn = mas->node; 7029 unsigned long p_min, p_max; 7030 7031 mas_next_node(mas, mas_mn(mas), max); 7032 if (!mas_is_overflow(mas)) 7033 return; 7034 7035 if (mte_is_root(mn)) 7036 return; 7037 7038 mas->node = mn; 7039 mas_ascend(mas); 7040 do { 7041 p = mas->node; 7042 p_min = mas->min; 7043 p_max = mas->max; 7044 mas_prev_node(mas, 0); 7045 } while (!mas_is_underflow(mas)); 7046 7047 mas->node = p; 7048 mas->max = p_max; 7049 mas->min = p_min; 7050 } 7051 7052 /* Tree validations */ 7053 static void mt_dump_node(const struct maple_tree *mt, void *entry, 7054 unsigned long min, unsigned long max, unsigned int depth, 7055 enum mt_dump_format format); 7056 static void mt_dump_range(unsigned long min, unsigned long max, 7057 unsigned int depth, enum mt_dump_format format) 7058 { 7059 static const char spaces[] = " "; 7060 7061 switch(format) { 7062 case mt_dump_hex: 7063 if (min == max) 7064 pr_info("%.*s%lx: ", depth * 2, spaces, min); 7065 else 7066 pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max); 7067 break; 7068 case mt_dump_dec: 7069 if (min == max) 7070 pr_info("%.*s%lu: ", depth * 2, spaces, min); 7071 else 7072 pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max); 7073 } 7074 } 7075 7076 static void mt_dump_entry(void *entry, unsigned long min, unsigned long max, 7077 unsigned int depth, enum mt_dump_format format) 7078 { 7079 mt_dump_range(min, max, depth, format); 7080 7081 if (xa_is_value(entry)) 7082 pr_cont("value %ld (0x%lx) [" PTR_FMT "]\n", xa_to_value(entry), 7083 xa_to_value(entry), entry); 7084 else if (xa_is_zero(entry)) 7085 pr_cont("zero (%ld)\n", xa_to_internal(entry)); 7086 else if (mt_is_reserved(entry)) 7087 pr_cont("UNKNOWN ENTRY (" PTR_FMT ")\n", entry); 7088 else 7089 pr_cont(PTR_FMT "\n", entry); 7090 } 7091 7092 static void mt_dump_range64(const struct maple_tree *mt, void *entry, 7093 unsigned long min, unsigned long max, unsigned int depth, 7094 enum mt_dump_format format) 7095 { 7096 struct maple_range_64 *node = &mte_to_node(entry)->mr64; 7097 bool leaf = mte_is_leaf(entry); 7098 unsigned long first = min; 7099 int i; 7100 7101 pr_cont(" contents: "); 7102 for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) { 7103 switch(format) { 7104 case mt_dump_hex: 7105 pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]); 7106 break; 7107 case mt_dump_dec: 7108 pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]); 7109 } 7110 } 7111 pr_cont(PTR_FMT "\n", node->slot[i]); 7112 for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) { 7113 unsigned long last = max; 7114 7115 if (i < (MAPLE_RANGE64_SLOTS - 1)) 7116 last = node->pivot[i]; 7117 else if (!node->slot[i] && max != mt_node_max(entry)) 7118 break; 7119 if (last == 0 && i > 0) 7120 break; 7121 if (leaf) 7122 mt_dump_entry(mt_slot(mt, node->slot, i), 7123 first, last, depth + 1, format); 7124 else if (node->slot[i]) 7125 mt_dump_node(mt, mt_slot(mt, node->slot, i), 7126 first, last, depth + 1, format); 7127 7128 if (last == max) 7129 break; 7130 if (last > max) { 7131 switch(format) { 7132 case mt_dump_hex: 7133 pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n", 7134 node, last, max, i); 7135 break; 7136 case mt_dump_dec: 7137 pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", 7138 node, last, max, i); 7139 } 7140 } 7141 first = last + 1; 7142 } 7143 } 7144 7145 static void mt_dump_arange64(const struct maple_tree *mt, void *entry, 7146 unsigned long min, unsigned long max, unsigned int depth, 7147 enum mt_dump_format format) 7148 { 7149 struct maple_arange_64 *node = &mte_to_node(entry)->ma64; 7150 unsigned long first = min; 7151 int i; 7152 7153 pr_cont(" contents: "); 7154 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 7155 switch (format) { 7156 case mt_dump_hex: 7157 pr_cont("%lx ", node->gap[i]); 7158 break; 7159 case mt_dump_dec: 7160 pr_cont("%lu ", node->gap[i]); 7161 } 7162 } 7163 pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap); 7164 for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) { 7165 switch (format) { 7166 case mt_dump_hex: 7167 pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]); 7168 break; 7169 case mt_dump_dec: 7170 pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]); 7171 } 7172 } 7173 pr_cont(PTR_FMT "\n", node->slot[i]); 7174 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 7175 unsigned long last = max; 7176 7177 if (i < (MAPLE_ARANGE64_SLOTS - 1)) 7178 last = node->pivot[i]; 7179 else if (!node->slot[i]) 7180 break; 7181 if (last == 0 && i > 0) 7182 break; 7183 if (node->slot[i]) 7184 mt_dump_node(mt, mt_slot(mt, node->slot, i), 7185 first, last, depth + 1, format); 7186 7187 if (last == max) 7188 break; 7189 if (last > max) { 7190 switch(format) { 7191 case mt_dump_hex: 7192 pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n", 7193 node, last, max, i); 7194 break; 7195 case mt_dump_dec: 7196 pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", 7197 node, last, max, i); 7198 } 7199 } 7200 first = last + 1; 7201 } 7202 } 7203 7204 static void mt_dump_node(const struct maple_tree *mt, void *entry, 7205 unsigned long min, unsigned long max, unsigned int depth, 7206 enum mt_dump_format format) 7207 { 7208 struct maple_node *node = mte_to_node(entry); 7209 unsigned int type = mte_node_type(entry); 7210 unsigned int i; 7211 7212 mt_dump_range(min, max, depth, format); 7213 7214 pr_cont("node " PTR_FMT " depth %d type %d parent " PTR_FMT, node, 7215 depth, type, node ? node->parent : NULL); 7216 switch (type) { 7217 case maple_dense: 7218 pr_cont("\n"); 7219 for (i = 0; i < MAPLE_NODE_SLOTS; i++) { 7220 if (min + i > max) 7221 pr_cont("OUT OF RANGE: "); 7222 mt_dump_entry(mt_slot(mt, node->slot, i), 7223 min + i, min + i, depth, format); 7224 } 7225 break; 7226 case maple_leaf_64: 7227 case maple_range_64: 7228 mt_dump_range64(mt, entry, min, max, depth, format); 7229 break; 7230 case maple_arange_64: 7231 mt_dump_arange64(mt, entry, min, max, depth, format); 7232 break; 7233 7234 default: 7235 pr_cont(" UNKNOWN TYPE\n"); 7236 } 7237 } 7238 7239 void mt_dump(const struct maple_tree *mt, enum mt_dump_format format) 7240 { 7241 void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt)); 7242 7243 pr_info("maple_tree(" PTR_FMT ") flags %X, height %u root " PTR_FMT "\n", 7244 mt, mt->ma_flags, mt_height(mt), entry); 7245 if (xa_is_node(entry)) 7246 mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format); 7247 else if (entry) 7248 mt_dump_entry(entry, 0, 0, 0, format); 7249 else 7250 pr_info("(empty)\n"); 7251 } 7252 EXPORT_SYMBOL_GPL(mt_dump); 7253 7254 /* 7255 * Calculate the maximum gap in a node and check if that's what is reported in 7256 * the parent (unless root). 7257 */ 7258 static void mas_validate_gaps(struct ma_state *mas) 7259 { 7260 struct maple_enode *mte = mas->node; 7261 struct maple_node *p_mn, *node = mte_to_node(mte); 7262 enum maple_type mt = mte_node_type(mas->node); 7263 unsigned long gap = 0, max_gap = 0; 7264 unsigned long p_end, p_start = mas->min; 7265 unsigned char p_slot, offset; 7266 unsigned long *gaps = NULL; 7267 unsigned long *pivots = ma_pivots(node, mt); 7268 unsigned int i; 7269 7270 if (ma_is_dense(mt)) { 7271 for (i = 0; i < mt_slot_count(mte); i++) { 7272 if (mas_get_slot(mas, i)) { 7273 if (gap > max_gap) 7274 max_gap = gap; 7275 gap = 0; 7276 continue; 7277 } 7278 gap++; 7279 } 7280 goto counted; 7281 } 7282 7283 gaps = ma_gaps(node, mt); 7284 for (i = 0; i < mt_slot_count(mte); i++) { 7285 p_end = mas_safe_pivot(mas, pivots, i, mt); 7286 7287 if (!gaps) { 7288 if (!mas_get_slot(mas, i)) 7289 gap = p_end - p_start + 1; 7290 } else { 7291 void *entry = mas_get_slot(mas, i); 7292 7293 gap = gaps[i]; 7294 MT_BUG_ON(mas->tree, !entry); 7295 7296 if (gap > p_end - p_start + 1) { 7297 pr_err(PTR_FMT "[%u] %lu >= %lu - %lu + 1 (%lu)\n", 7298 mas_mn(mas), i, gap, p_end, p_start, 7299 p_end - p_start + 1); 7300 MT_BUG_ON(mas->tree, gap > p_end - p_start + 1); 7301 } 7302 } 7303 7304 if (gap > max_gap) 7305 max_gap = gap; 7306 7307 p_start = p_end + 1; 7308 if (p_end >= mas->max) 7309 break; 7310 } 7311 7312 counted: 7313 if (mt == maple_arange_64) { 7314 MT_BUG_ON(mas->tree, !gaps); 7315 offset = ma_meta_gap(node); 7316 if (offset > i) { 7317 pr_err("gap offset " PTR_FMT "[%u] is invalid\n", node, offset); 7318 MT_BUG_ON(mas->tree, 1); 7319 } 7320 7321 if (gaps[offset] != max_gap) { 7322 pr_err("gap " PTR_FMT "[%u] is not the largest gap %lu\n", 7323 node, offset, max_gap); 7324 MT_BUG_ON(mas->tree, 1); 7325 } 7326 7327 for (i++ ; i < mt_slot_count(mte); i++) { 7328 if (gaps[i] != 0) { 7329 pr_err("gap " PTR_FMT "[%u] beyond node limit != 0\n", 7330 node, i); 7331 MT_BUG_ON(mas->tree, 1); 7332 } 7333 } 7334 } 7335 7336 if (mte_is_root(mte)) 7337 return; 7338 7339 p_slot = mte_parent_slot(mas->node); 7340 p_mn = mte_parent(mte); 7341 MT_BUG_ON(mas->tree, max_gap > mas->max); 7342 if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) { 7343 pr_err("gap " PTR_FMT "[%u] != %lu\n", p_mn, p_slot, max_gap); 7344 mt_dump(mas->tree, mt_dump_hex); 7345 MT_BUG_ON(mas->tree, 1); 7346 } 7347 } 7348 7349 static void mas_validate_parent_slot(struct ma_state *mas) 7350 { 7351 struct maple_node *parent; 7352 struct maple_enode *node; 7353 enum maple_type p_type; 7354 unsigned char p_slot; 7355 void __rcu **slots; 7356 int i; 7357 7358 if (mte_is_root(mas->node)) 7359 return; 7360 7361 p_slot = mte_parent_slot(mas->node); 7362 p_type = mas_parent_type(mas, mas->node); 7363 parent = mte_parent(mas->node); 7364 slots = ma_slots(parent, p_type); 7365 MT_BUG_ON(mas->tree, mas_mn(mas) == parent); 7366 7367 /* Check prev/next parent slot for duplicate node entry */ 7368 7369 for (i = 0; i < mt_slots[p_type]; i++) { 7370 node = mas_slot(mas, slots, i); 7371 if (i == p_slot) { 7372 if (node != mas->node) 7373 pr_err("parent " PTR_FMT "[%u] does not have " PTR_FMT "\n", 7374 parent, i, mas_mn(mas)); 7375 MT_BUG_ON(mas->tree, node != mas->node); 7376 } else if (node == mas->node) { 7377 pr_err("Invalid child " PTR_FMT " at parent " PTR_FMT "[%u] p_slot %u\n", 7378 mas_mn(mas), parent, i, p_slot); 7379 MT_BUG_ON(mas->tree, node == mas->node); 7380 } 7381 } 7382 } 7383 7384 static void mas_validate_child_slot(struct ma_state *mas) 7385 { 7386 enum maple_type type = mte_node_type(mas->node); 7387 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 7388 unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type); 7389 struct maple_enode *child; 7390 unsigned char i; 7391 7392 if (mte_is_leaf(mas->node)) 7393 return; 7394 7395 for (i = 0; i < mt_slots[type]; i++) { 7396 child = mas_slot(mas, slots, i); 7397 7398 if (!child) { 7399 pr_err("Non-leaf node lacks child at " PTR_FMT "[%u]\n", 7400 mas_mn(mas), i); 7401 MT_BUG_ON(mas->tree, 1); 7402 } 7403 7404 if (mte_parent_slot(child) != i) { 7405 pr_err("Slot error at " PTR_FMT "[%u]: child " PTR_FMT " has pslot %u\n", 7406 mas_mn(mas), i, mte_to_node(child), 7407 mte_parent_slot(child)); 7408 MT_BUG_ON(mas->tree, 1); 7409 } 7410 7411 if (mte_parent(child) != mte_to_node(mas->node)) { 7412 pr_err("child " PTR_FMT " has parent " PTR_FMT " not " PTR_FMT "\n", 7413 mte_to_node(child), mte_parent(child), 7414 mte_to_node(mas->node)); 7415 MT_BUG_ON(mas->tree, 1); 7416 } 7417 7418 if (i < mt_pivots[type] && pivots[i] == mas->max) 7419 break; 7420 } 7421 } 7422 7423 /* 7424 * Validate all pivots are within mas->min and mas->max, check metadata ends 7425 * where the maximum ends and ensure there is no slots or pivots set outside of 7426 * the end of the data. 7427 */ 7428 static void mas_validate_limits(struct ma_state *mas) 7429 { 7430 int i; 7431 unsigned long prev_piv = 0; 7432 enum maple_type type = mte_node_type(mas->node); 7433 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 7434 unsigned long *pivots = ma_pivots(mas_mn(mas), type); 7435 7436 for (i = 0; i < mt_slots[type]; i++) { 7437 unsigned long piv; 7438 7439 piv = mas_safe_pivot(mas, pivots, i, type); 7440 7441 if (!piv && (i != 0)) { 7442 pr_err("Missing node limit pivot at " PTR_FMT "[%u]", 7443 mas_mn(mas), i); 7444 MAS_WARN_ON(mas, 1); 7445 } 7446 7447 if (prev_piv > piv) { 7448 pr_err(PTR_FMT "[%u] piv %lu < prev_piv %lu\n", 7449 mas_mn(mas), i, piv, prev_piv); 7450 MAS_WARN_ON(mas, piv < prev_piv); 7451 } 7452 7453 if (piv < mas->min) { 7454 pr_err(PTR_FMT "[%u] %lu < %lu\n", mas_mn(mas), i, 7455 piv, mas->min); 7456 MAS_WARN_ON(mas, piv < mas->min); 7457 } 7458 if (piv > mas->max) { 7459 pr_err(PTR_FMT "[%u] %lu > %lu\n", mas_mn(mas), i, 7460 piv, mas->max); 7461 MAS_WARN_ON(mas, piv > mas->max); 7462 } 7463 prev_piv = piv; 7464 if (piv == mas->max) 7465 break; 7466 } 7467 7468 if (mas_data_end(mas) != i) { 7469 pr_err("node" PTR_FMT ": data_end %u != the last slot offset %u\n", 7470 mas_mn(mas), mas_data_end(mas), i); 7471 MT_BUG_ON(mas->tree, 1); 7472 } 7473 7474 for (i += 1; i < mt_slots[type]; i++) { 7475 void *entry = mas_slot(mas, slots, i); 7476 7477 if (entry && (i != mt_slots[type] - 1)) { 7478 pr_err(PTR_FMT "[%u] should not have entry " PTR_FMT "\n", 7479 mas_mn(mas), i, entry); 7480 MT_BUG_ON(mas->tree, entry != NULL); 7481 } 7482 7483 if (i < mt_pivots[type]) { 7484 unsigned long piv = pivots[i]; 7485 7486 if (!piv) 7487 continue; 7488 7489 pr_err(PTR_FMT "[%u] should not have piv %lu\n", 7490 mas_mn(mas), i, piv); 7491 MAS_WARN_ON(mas, i < mt_pivots[type] - 1); 7492 } 7493 } 7494 } 7495 7496 static void mt_validate_nulls(struct maple_tree *mt) 7497 { 7498 void *entry, *last = (void *)1; 7499 unsigned char offset = 0; 7500 void __rcu **slots; 7501 MA_STATE(mas, mt, 0, 0); 7502 7503 mas_start(&mas); 7504 if (mas_is_none(&mas) || (mas_is_ptr(&mas))) 7505 return; 7506 7507 while (!mte_is_leaf(mas.node)) 7508 mas_descend(&mas); 7509 7510 slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node)); 7511 do { 7512 entry = mas_slot(&mas, slots, offset); 7513 if (!last && !entry) { 7514 pr_err("Sequential nulls end at " PTR_FMT "[%u]\n", 7515 mas_mn(&mas), offset); 7516 } 7517 MT_BUG_ON(mt, !last && !entry); 7518 last = entry; 7519 if (offset == mas_data_end(&mas)) { 7520 mas_next_node(&mas, mas_mn(&mas), ULONG_MAX); 7521 if (mas_is_overflow(&mas)) 7522 return; 7523 offset = 0; 7524 slots = ma_slots(mte_to_node(mas.node), 7525 mte_node_type(mas.node)); 7526 } else { 7527 offset++; 7528 } 7529 7530 } while (!mas_is_overflow(&mas)); 7531 } 7532 7533 /* 7534 * validate a maple tree by checking: 7535 * 1. The limits (pivots are within mas->min to mas->max) 7536 * 2. The gap is correctly set in the parents 7537 */ 7538 void mt_validate(struct maple_tree *mt) 7539 __must_hold(mas->tree->ma_lock) 7540 { 7541 unsigned char end; 7542 7543 MA_STATE(mas, mt, 0, 0); 7544 mas_start(&mas); 7545 if (!mas_is_active(&mas)) 7546 return; 7547 7548 while (!mte_is_leaf(mas.node)) 7549 mas_descend(&mas); 7550 7551 while (!mas_is_overflow(&mas)) { 7552 MAS_WARN_ON(&mas, mte_dead_node(mas.node)); 7553 end = mas_data_end(&mas); 7554 if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) && 7555 (!mte_is_root(mas.node)))) { 7556 pr_err("Invalid size %u of " PTR_FMT "\n", 7557 end, mas_mn(&mas)); 7558 } 7559 7560 mas_validate_parent_slot(&mas); 7561 mas_validate_limits(&mas); 7562 mas_validate_child_slot(&mas); 7563 if (mt_is_alloc(mt)) 7564 mas_validate_gaps(&mas); 7565 mas_dfs_postorder(&mas, ULONG_MAX); 7566 } 7567 mt_validate_nulls(mt); 7568 } 7569 EXPORT_SYMBOL_GPL(mt_validate); 7570 7571 void mas_dump(const struct ma_state *mas) 7572 { 7573 pr_err("MAS: tree=" PTR_FMT " enode=" PTR_FMT " ", 7574 mas->tree, mas->node); 7575 switch (mas->status) { 7576 case ma_active: 7577 pr_err("(ma_active)"); 7578 break; 7579 case ma_none: 7580 pr_err("(ma_none)"); 7581 break; 7582 case ma_root: 7583 pr_err("(ma_root)"); 7584 break; 7585 case ma_start: 7586 pr_err("(ma_start) "); 7587 break; 7588 case ma_pause: 7589 pr_err("(ma_pause) "); 7590 break; 7591 case ma_overflow: 7592 pr_err("(ma_overflow) "); 7593 break; 7594 case ma_underflow: 7595 pr_err("(ma_underflow) "); 7596 break; 7597 case ma_error: 7598 pr_err("(ma_error) "); 7599 break; 7600 } 7601 7602 pr_err("Store Type: "); 7603 switch (mas->store_type) { 7604 case wr_invalid: 7605 pr_err("invalid store type\n"); 7606 break; 7607 case wr_new_root: 7608 pr_err("new_root\n"); 7609 break; 7610 case wr_store_root: 7611 pr_err("store_root\n"); 7612 break; 7613 case wr_exact_fit: 7614 pr_err("exact_fit\n"); 7615 break; 7616 case wr_split_store: 7617 pr_err("split_store\n"); 7618 break; 7619 case wr_slot_store: 7620 pr_err("slot_store\n"); 7621 break; 7622 case wr_append: 7623 pr_err("append\n"); 7624 break; 7625 case wr_node_store: 7626 pr_err("node_store\n"); 7627 break; 7628 case wr_spanning_store: 7629 pr_err("spanning_store\n"); 7630 break; 7631 case wr_rebalance: 7632 pr_err("rebalance\n"); 7633 break; 7634 } 7635 7636 pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end, 7637 mas->index, mas->last); 7638 pr_err(" min=%lx max=%lx alloc=" PTR_FMT ", depth=%u, flags=%x\n", 7639 mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags); 7640 if (mas->index > mas->last) 7641 pr_err("Check index & last\n"); 7642 } 7643 EXPORT_SYMBOL_GPL(mas_dump); 7644 7645 void mas_wr_dump(const struct ma_wr_state *wr_mas) 7646 { 7647 pr_err("WR_MAS: node=" PTR_FMT " r_min=%lx r_max=%lx\n", 7648 wr_mas->node, wr_mas->r_min, wr_mas->r_max); 7649 pr_err(" type=%u off_end=%u, node_end=%u, end_piv=%lx\n", 7650 wr_mas->type, wr_mas->offset_end, wr_mas->mas->end, 7651 wr_mas->end_piv); 7652 } 7653 EXPORT_SYMBOL_GPL(mas_wr_dump); 7654 7655 #endif /* CONFIG_DEBUG_MAPLE_TREE */ 7656