1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Maple Tree implementation 4 * Copyright (c) 2018-2022 Oracle Corporation 5 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com> 6 * Matthew Wilcox <willy@infradead.org> 7 * Copyright (c) 2023 ByteDance 8 * Author: Peng Zhang <zhangpeng.00@bytedance.com> 9 */ 10 11 /* 12 * DOC: Interesting implementation details of the Maple Tree 13 * 14 * Each node type has a number of slots for entries and a number of slots for 15 * pivots. In the case of dense nodes, the pivots are implied by the position 16 * and are simply the slot index + the minimum of the node. 17 * 18 * In regular B-Tree terms, pivots are called keys. The term pivot is used to 19 * indicate that the tree is specifying ranges, Pivots may appear in the 20 * subtree with an entry attached to the value where as keys are unique to a 21 * specific position of a B-tree. Pivot values are inclusive of the slot with 22 * the same index. 23 * 24 * 25 * The following illustrates the layout of a range64 nodes slots and pivots. 26 * 27 * 28 * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 | 29 * ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ 30 * │ │ │ │ │ │ │ │ └─ Implied maximum 31 * │ │ │ │ │ │ │ └─ Pivot 14 32 * │ │ │ │ │ │ └─ Pivot 13 33 * │ │ │ │ │ └─ Pivot 12 34 * │ │ │ │ └─ Pivot 11 35 * │ │ │ └─ Pivot 2 36 * │ │ └─ Pivot 1 37 * │ └─ Pivot 0 38 * └─ Implied minimum 39 * 40 * Slot contents: 41 * Internal (non-leaf) nodes contain pointers to other nodes. 42 * Leaf nodes contain entries. 43 * 44 * The location of interest is often referred to as an offset. All offsets have 45 * a slot, but the last offset has an implied pivot from the node above (or 46 * UINT_MAX for the root node. 47 * 48 * Ranges complicate certain write activities. When modifying any of 49 * the B-tree variants, it is known that one entry will either be added or 50 * deleted. When modifying the Maple Tree, one store operation may overwrite 51 * the entire data set, or one half of the tree, or the middle half of the tree. 52 * 53 */ 54 55 56 #include <linux/maple_tree.h> 57 #include <linux/xarray.h> 58 #include <linux/types.h> 59 #include <linux/export.h> 60 #include <linux/slab.h> 61 #include <linux/limits.h> 62 #include <asm/barrier.h> 63 64 #define CREATE_TRACE_POINTS 65 #include <trace/events/maple_tree.h> 66 67 #define MA_ROOT_PARENT 1 68 69 /* 70 * Maple state flags 71 * * MA_STATE_BULK - Bulk insert mode 72 * * MA_STATE_REBALANCE - Indicate a rebalance during bulk insert 73 * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation 74 */ 75 #define MA_STATE_BULK 1 76 #define MA_STATE_REBALANCE 2 77 #define MA_STATE_PREALLOC 4 78 79 #define ma_parent_ptr(x) ((struct maple_pnode *)(x)) 80 #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT) 81 #define ma_mnode_ptr(x) ((struct maple_node *)(x)) 82 #define ma_enode_ptr(x) ((struct maple_enode *)(x)) 83 static struct kmem_cache *maple_node_cache; 84 85 #ifdef CONFIG_DEBUG_MAPLE_TREE 86 static const unsigned long mt_max[] = { 87 [maple_dense] = MAPLE_NODE_SLOTS, 88 [maple_leaf_64] = ULONG_MAX, 89 [maple_range_64] = ULONG_MAX, 90 [maple_arange_64] = ULONG_MAX, 91 }; 92 #define mt_node_max(x) mt_max[mte_node_type(x)] 93 #endif 94 95 static const unsigned char mt_slots[] = { 96 [maple_dense] = MAPLE_NODE_SLOTS, 97 [maple_leaf_64] = MAPLE_RANGE64_SLOTS, 98 [maple_range_64] = MAPLE_RANGE64_SLOTS, 99 [maple_arange_64] = MAPLE_ARANGE64_SLOTS, 100 }; 101 #define mt_slot_count(x) mt_slots[mte_node_type(x)] 102 103 static const unsigned char mt_pivots[] = { 104 [maple_dense] = 0, 105 [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1, 106 [maple_range_64] = MAPLE_RANGE64_SLOTS - 1, 107 [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1, 108 }; 109 #define mt_pivot_count(x) mt_pivots[mte_node_type(x)] 110 111 static const unsigned char mt_min_slots[] = { 112 [maple_dense] = MAPLE_NODE_SLOTS / 2, 113 [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 114 [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 115 [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1, 116 }; 117 #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)] 118 119 #define MAPLE_BIG_NODE_SLOTS (MAPLE_RANGE64_SLOTS * 2 + 2) 120 #define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1) 121 122 struct maple_big_node { 123 struct maple_pnode *parent; 124 unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1]; 125 union { 126 struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS]; 127 struct { 128 unsigned long padding[MAPLE_BIG_NODE_GAPS]; 129 unsigned long gap[MAPLE_BIG_NODE_GAPS]; 130 }; 131 }; 132 unsigned char b_end; 133 enum maple_type type; 134 }; 135 136 /* 137 * The maple_subtree_state is used to build a tree to replace a segment of an 138 * existing tree in a more atomic way. Any walkers of the older tree will hit a 139 * dead node and restart on updates. 140 */ 141 struct maple_subtree_state { 142 struct ma_state *orig_l; /* Original left side of subtree */ 143 struct ma_state *orig_r; /* Original right side of subtree */ 144 struct ma_state *l; /* New left side of subtree */ 145 struct ma_state *m; /* New middle of subtree (rare) */ 146 struct ma_state *r; /* New right side of subtree */ 147 struct ma_topiary *free; /* nodes to be freed */ 148 struct ma_topiary *destroy; /* Nodes to be destroyed (walked and freed) */ 149 struct maple_big_node *bn; 150 }; 151 152 #ifdef CONFIG_KASAN_STACK 153 /* Prevent mas_wr_bnode() from exceeding the stack frame limit */ 154 #define noinline_for_kasan noinline_for_stack 155 #else 156 #define noinline_for_kasan inline 157 #endif 158 159 /* Functions */ 160 static inline struct maple_node *mt_alloc_one(gfp_t gfp) 161 { 162 return kmem_cache_alloc(maple_node_cache, gfp); 163 } 164 165 static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes) 166 { 167 return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes); 168 } 169 170 static inline void mt_free_one(struct maple_node *node) 171 { 172 kmem_cache_free(maple_node_cache, node); 173 } 174 175 static inline void mt_free_bulk(size_t size, void __rcu **nodes) 176 { 177 kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); 178 } 179 180 static void mt_free_rcu(struct rcu_head *head) 181 { 182 struct maple_node *node = container_of(head, struct maple_node, rcu); 183 184 kmem_cache_free(maple_node_cache, node); 185 } 186 187 /* 188 * ma_free_rcu() - Use rcu callback to free a maple node 189 * @node: The node to free 190 * 191 * The maple tree uses the parent pointer to indicate this node is no longer in 192 * use and will be freed. 193 */ 194 static void ma_free_rcu(struct maple_node *node) 195 { 196 WARN_ON(node->parent != ma_parent_ptr(node)); 197 call_rcu(&node->rcu, mt_free_rcu); 198 } 199 200 static void mas_set_height(struct ma_state *mas) 201 { 202 unsigned int new_flags = mas->tree->ma_flags; 203 204 new_flags &= ~MT_FLAGS_HEIGHT_MASK; 205 MAS_BUG_ON(mas, mas->depth > MAPLE_HEIGHT_MAX); 206 new_flags |= mas->depth << MT_FLAGS_HEIGHT_OFFSET; 207 mas->tree->ma_flags = new_flags; 208 } 209 210 static unsigned int mas_mt_height(struct ma_state *mas) 211 { 212 return mt_height(mas->tree); 213 } 214 215 static inline unsigned int mt_attr(struct maple_tree *mt) 216 { 217 return mt->ma_flags & ~MT_FLAGS_HEIGHT_MASK; 218 } 219 220 static __always_inline enum maple_type mte_node_type( 221 const struct maple_enode *entry) 222 { 223 return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) & 224 MAPLE_NODE_TYPE_MASK; 225 } 226 227 static __always_inline bool ma_is_dense(const enum maple_type type) 228 { 229 return type < maple_leaf_64; 230 } 231 232 static __always_inline bool ma_is_leaf(const enum maple_type type) 233 { 234 return type < maple_range_64; 235 } 236 237 static __always_inline bool mte_is_leaf(const struct maple_enode *entry) 238 { 239 return ma_is_leaf(mte_node_type(entry)); 240 } 241 242 /* 243 * We also reserve values with the bottom two bits set to '10' which are 244 * below 4096 245 */ 246 static __always_inline bool mt_is_reserved(const void *entry) 247 { 248 return ((unsigned long)entry < MAPLE_RESERVED_RANGE) && 249 xa_is_internal(entry); 250 } 251 252 static __always_inline void mas_set_err(struct ma_state *mas, long err) 253 { 254 mas->node = MA_ERROR(err); 255 mas->status = ma_error; 256 } 257 258 static __always_inline bool mas_is_ptr(const struct ma_state *mas) 259 { 260 return mas->status == ma_root; 261 } 262 263 static __always_inline bool mas_is_start(const struct ma_state *mas) 264 { 265 return mas->status == ma_start; 266 } 267 268 static __always_inline bool mas_is_none(const struct ma_state *mas) 269 { 270 return mas->status == ma_none; 271 } 272 273 static __always_inline bool mas_is_paused(const struct ma_state *mas) 274 { 275 return mas->status == ma_pause; 276 } 277 278 static __always_inline bool mas_is_overflow(struct ma_state *mas) 279 { 280 return mas->status == ma_overflow; 281 } 282 283 static inline bool mas_is_underflow(struct ma_state *mas) 284 { 285 return mas->status == ma_underflow; 286 } 287 288 static __always_inline struct maple_node *mte_to_node( 289 const struct maple_enode *entry) 290 { 291 return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK); 292 } 293 294 /* 295 * mte_to_mat() - Convert a maple encoded node to a maple topiary node. 296 * @entry: The maple encoded node 297 * 298 * Return: a maple topiary pointer 299 */ 300 static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry) 301 { 302 return (struct maple_topiary *) 303 ((unsigned long)entry & ~MAPLE_NODE_MASK); 304 } 305 306 /* 307 * mas_mn() - Get the maple state node. 308 * @mas: The maple state 309 * 310 * Return: the maple node (not encoded - bare pointer). 311 */ 312 static inline struct maple_node *mas_mn(const struct ma_state *mas) 313 { 314 return mte_to_node(mas->node); 315 } 316 317 /* 318 * mte_set_node_dead() - Set a maple encoded node as dead. 319 * @mn: The maple encoded node. 320 */ 321 static inline void mte_set_node_dead(struct maple_enode *mn) 322 { 323 mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn)); 324 smp_wmb(); /* Needed for RCU */ 325 } 326 327 /* Bit 1 indicates the root is a node */ 328 #define MAPLE_ROOT_NODE 0x02 329 /* maple_type stored bit 3-6 */ 330 #define MAPLE_ENODE_TYPE_SHIFT 0x03 331 /* Bit 2 means a NULL somewhere below */ 332 #define MAPLE_ENODE_NULL 0x04 333 334 static inline struct maple_enode *mt_mk_node(const struct maple_node *node, 335 enum maple_type type) 336 { 337 return (void *)((unsigned long)node | 338 (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL); 339 } 340 341 static inline void *mte_mk_root(const struct maple_enode *node) 342 { 343 return (void *)((unsigned long)node | MAPLE_ROOT_NODE); 344 } 345 346 static inline void *mte_safe_root(const struct maple_enode *node) 347 { 348 return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE); 349 } 350 351 static inline void *mte_set_full(const struct maple_enode *node) 352 { 353 return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL); 354 } 355 356 static inline void *mte_clear_full(const struct maple_enode *node) 357 { 358 return (void *)((unsigned long)node | MAPLE_ENODE_NULL); 359 } 360 361 static inline bool mte_has_null(const struct maple_enode *node) 362 { 363 return (unsigned long)node & MAPLE_ENODE_NULL; 364 } 365 366 static __always_inline bool ma_is_root(struct maple_node *node) 367 { 368 return ((unsigned long)node->parent & MA_ROOT_PARENT); 369 } 370 371 static __always_inline bool mte_is_root(const struct maple_enode *node) 372 { 373 return ma_is_root(mte_to_node(node)); 374 } 375 376 static inline bool mas_is_root_limits(const struct ma_state *mas) 377 { 378 return !mas->min && mas->max == ULONG_MAX; 379 } 380 381 static __always_inline bool mt_is_alloc(struct maple_tree *mt) 382 { 383 return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE); 384 } 385 386 /* 387 * The Parent Pointer 388 * Excluding root, the parent pointer is 256B aligned like all other tree nodes. 389 * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16 390 * bit values need an extra bit to store the offset. This extra bit comes from 391 * a reuse of the last bit in the node type. This is possible by using bit 1 to 392 * indicate if bit 2 is part of the type or the slot. 393 * 394 * Note types: 395 * 0x??1 = Root 396 * 0x?00 = 16 bit nodes 397 * 0x010 = 32 bit nodes 398 * 0x110 = 64 bit nodes 399 * 400 * Slot size and alignment 401 * 0b??1 : Root 402 * 0b?00 : 16 bit values, type in 0-1, slot in 2-7 403 * 0b010 : 32 bit values, type in 0-2, slot in 3-7 404 * 0b110 : 64 bit values, type in 0-2, slot in 3-7 405 */ 406 407 #define MAPLE_PARENT_ROOT 0x01 408 409 #define MAPLE_PARENT_SLOT_SHIFT 0x03 410 #define MAPLE_PARENT_SLOT_MASK 0xF8 411 412 #define MAPLE_PARENT_16B_SLOT_SHIFT 0x02 413 #define MAPLE_PARENT_16B_SLOT_MASK 0xFC 414 415 #define MAPLE_PARENT_RANGE64 0x06 416 #define MAPLE_PARENT_RANGE32 0x04 417 #define MAPLE_PARENT_NOT_RANGE16 0x02 418 419 /* 420 * mte_parent_shift() - Get the parent shift for the slot storage. 421 * @parent: The parent pointer cast as an unsigned long 422 * Return: The shift into that pointer to the star to of the slot 423 */ 424 static inline unsigned long mte_parent_shift(unsigned long parent) 425 { 426 /* Note bit 1 == 0 means 16B */ 427 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 428 return MAPLE_PARENT_SLOT_SHIFT; 429 430 return MAPLE_PARENT_16B_SLOT_SHIFT; 431 } 432 433 /* 434 * mte_parent_slot_mask() - Get the slot mask for the parent. 435 * @parent: The parent pointer cast as an unsigned long. 436 * Return: The slot mask for that parent. 437 */ 438 static inline unsigned long mte_parent_slot_mask(unsigned long parent) 439 { 440 /* Note bit 1 == 0 means 16B */ 441 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 442 return MAPLE_PARENT_SLOT_MASK; 443 444 return MAPLE_PARENT_16B_SLOT_MASK; 445 } 446 447 /* 448 * mas_parent_type() - Return the maple_type of the parent from the stored 449 * parent type. 450 * @mas: The maple state 451 * @enode: The maple_enode to extract the parent's enum 452 * Return: The node->parent maple_type 453 */ 454 static inline 455 enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode) 456 { 457 unsigned long p_type; 458 459 p_type = (unsigned long)mte_to_node(enode)->parent; 460 if (WARN_ON(p_type & MAPLE_PARENT_ROOT)) 461 return 0; 462 463 p_type &= MAPLE_NODE_MASK; 464 p_type &= ~mte_parent_slot_mask(p_type); 465 switch (p_type) { 466 case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */ 467 if (mt_is_alloc(mas->tree)) 468 return maple_arange_64; 469 return maple_range_64; 470 } 471 472 return 0; 473 } 474 475 /* 476 * mas_set_parent() - Set the parent node and encode the slot 477 * @enode: The encoded maple node. 478 * @parent: The encoded maple node that is the parent of @enode. 479 * @slot: The slot that @enode resides in @parent. 480 * 481 * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the 482 * parent type. 483 */ 484 static inline 485 void mas_set_parent(struct ma_state *mas, struct maple_enode *enode, 486 const struct maple_enode *parent, unsigned char slot) 487 { 488 unsigned long val = (unsigned long)parent; 489 unsigned long shift; 490 unsigned long type; 491 enum maple_type p_type = mte_node_type(parent); 492 493 MAS_BUG_ON(mas, p_type == maple_dense); 494 MAS_BUG_ON(mas, p_type == maple_leaf_64); 495 496 switch (p_type) { 497 case maple_range_64: 498 case maple_arange_64: 499 shift = MAPLE_PARENT_SLOT_SHIFT; 500 type = MAPLE_PARENT_RANGE64; 501 break; 502 default: 503 case maple_dense: 504 case maple_leaf_64: 505 shift = type = 0; 506 break; 507 } 508 509 val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */ 510 val |= (slot << shift) | type; 511 mte_to_node(enode)->parent = ma_parent_ptr(val); 512 } 513 514 /* 515 * mte_parent_slot() - get the parent slot of @enode. 516 * @enode: The encoded maple node. 517 * 518 * Return: The slot in the parent node where @enode resides. 519 */ 520 static __always_inline 521 unsigned int mte_parent_slot(const struct maple_enode *enode) 522 { 523 unsigned long val = (unsigned long)mte_to_node(enode)->parent; 524 525 if (unlikely(val & MA_ROOT_PARENT)) 526 return 0; 527 528 /* 529 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost 530 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT 531 */ 532 return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val); 533 } 534 535 /* 536 * mte_parent() - Get the parent of @node. 537 * @node: The encoded maple node. 538 * 539 * Return: The parent maple node. 540 */ 541 static __always_inline 542 struct maple_node *mte_parent(const struct maple_enode *enode) 543 { 544 return (void *)((unsigned long) 545 (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK); 546 } 547 548 /* 549 * ma_dead_node() - check if the @enode is dead. 550 * @enode: The encoded maple node 551 * 552 * Return: true if dead, false otherwise. 553 */ 554 static __always_inline bool ma_dead_node(const struct maple_node *node) 555 { 556 struct maple_node *parent; 557 558 /* Do not reorder reads from the node prior to the parent check */ 559 smp_rmb(); 560 parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); 561 return (parent == node); 562 } 563 564 /* 565 * mte_dead_node() - check if the @enode is dead. 566 * @enode: The encoded maple node 567 * 568 * Return: true if dead, false otherwise. 569 */ 570 static __always_inline bool mte_dead_node(const struct maple_enode *enode) 571 { 572 struct maple_node *parent, *node; 573 574 node = mte_to_node(enode); 575 /* Do not reorder reads from the node prior to the parent check */ 576 smp_rmb(); 577 parent = mte_parent(enode); 578 return (parent == node); 579 } 580 581 /* 582 * mas_allocated() - Get the number of nodes allocated in a maple state. 583 * @mas: The maple state 584 * 585 * The ma_state alloc member is overloaded to hold a pointer to the first 586 * allocated node or to the number of requested nodes to allocate. If bit 0 is 587 * set, then the alloc contains the number of requested nodes. If there is an 588 * allocated node, then the total allocated nodes is in that node. 589 * 590 * Return: The total number of nodes allocated 591 */ 592 static inline unsigned long mas_allocated(const struct ma_state *mas) 593 { 594 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) 595 return 0; 596 597 return mas->alloc->total; 598 } 599 600 /* 601 * mas_set_alloc_req() - Set the requested number of allocations. 602 * @mas: the maple state 603 * @count: the number of allocations. 604 * 605 * The requested number of allocations is either in the first allocated node, 606 * located in @mas->alloc->request_count, or directly in @mas->alloc if there is 607 * no allocated node. Set the request either in the node or do the necessary 608 * encoding to store in @mas->alloc directly. 609 */ 610 static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count) 611 { 612 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) { 613 if (!count) 614 mas->alloc = NULL; 615 else 616 mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U); 617 return; 618 } 619 620 mas->alloc->request_count = count; 621 } 622 623 /* 624 * mas_alloc_req() - get the requested number of allocations. 625 * @mas: The maple state 626 * 627 * The alloc count is either stored directly in @mas, or in 628 * @mas->alloc->request_count if there is at least one node allocated. Decode 629 * the request count if it's stored directly in @mas->alloc. 630 * 631 * Return: The allocation request count. 632 */ 633 static inline unsigned int mas_alloc_req(const struct ma_state *mas) 634 { 635 if ((unsigned long)mas->alloc & 0x1) 636 return (unsigned long)(mas->alloc) >> 1; 637 else if (mas->alloc) 638 return mas->alloc->request_count; 639 return 0; 640 } 641 642 /* 643 * ma_pivots() - Get a pointer to the maple node pivots. 644 * @node - the maple node 645 * @type - the node type 646 * 647 * In the event of a dead node, this array may be %NULL 648 * 649 * Return: A pointer to the maple node pivots 650 */ 651 static inline unsigned long *ma_pivots(struct maple_node *node, 652 enum maple_type type) 653 { 654 switch (type) { 655 case maple_arange_64: 656 return node->ma64.pivot; 657 case maple_range_64: 658 case maple_leaf_64: 659 return node->mr64.pivot; 660 case maple_dense: 661 return NULL; 662 } 663 return NULL; 664 } 665 666 /* 667 * ma_gaps() - Get a pointer to the maple node gaps. 668 * @node - the maple node 669 * @type - the node type 670 * 671 * Return: A pointer to the maple node gaps 672 */ 673 static inline unsigned long *ma_gaps(struct maple_node *node, 674 enum maple_type type) 675 { 676 switch (type) { 677 case maple_arange_64: 678 return node->ma64.gap; 679 case maple_range_64: 680 case maple_leaf_64: 681 case maple_dense: 682 return NULL; 683 } 684 return NULL; 685 } 686 687 /* 688 * mas_safe_pivot() - get the pivot at @piv or mas->max. 689 * @mas: The maple state 690 * @pivots: The pointer to the maple node pivots 691 * @piv: The pivot to fetch 692 * @type: The maple node type 693 * 694 * Return: The pivot at @piv within the limit of the @pivots array, @mas->max 695 * otherwise. 696 */ 697 static __always_inline unsigned long 698 mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots, 699 unsigned char piv, enum maple_type type) 700 { 701 if (piv >= mt_pivots[type]) 702 return mas->max; 703 704 return pivots[piv]; 705 } 706 707 /* 708 * mas_safe_min() - Return the minimum for a given offset. 709 * @mas: The maple state 710 * @pivots: The pointer to the maple node pivots 711 * @offset: The offset into the pivot array 712 * 713 * Return: The minimum range value that is contained in @offset. 714 */ 715 static inline unsigned long 716 mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset) 717 { 718 if (likely(offset)) 719 return pivots[offset - 1] + 1; 720 721 return mas->min; 722 } 723 724 /* 725 * mte_set_pivot() - Set a pivot to a value in an encoded maple node. 726 * @mn: The encoded maple node 727 * @piv: The pivot offset 728 * @val: The value of the pivot 729 */ 730 static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv, 731 unsigned long val) 732 { 733 struct maple_node *node = mte_to_node(mn); 734 enum maple_type type = mte_node_type(mn); 735 736 BUG_ON(piv >= mt_pivots[type]); 737 switch (type) { 738 case maple_range_64: 739 case maple_leaf_64: 740 node->mr64.pivot[piv] = val; 741 break; 742 case maple_arange_64: 743 node->ma64.pivot[piv] = val; 744 break; 745 case maple_dense: 746 break; 747 } 748 749 } 750 751 /* 752 * ma_slots() - Get a pointer to the maple node slots. 753 * @mn: The maple node 754 * @mt: The maple node type 755 * 756 * Return: A pointer to the maple node slots 757 */ 758 static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt) 759 { 760 switch (mt) { 761 case maple_arange_64: 762 return mn->ma64.slot; 763 case maple_range_64: 764 case maple_leaf_64: 765 return mn->mr64.slot; 766 case maple_dense: 767 return mn->slot; 768 } 769 770 return NULL; 771 } 772 773 static inline bool mt_write_locked(const struct maple_tree *mt) 774 { 775 return mt_external_lock(mt) ? mt_write_lock_is_held(mt) : 776 lockdep_is_held(&mt->ma_lock); 777 } 778 779 static __always_inline bool mt_locked(const struct maple_tree *mt) 780 { 781 return mt_external_lock(mt) ? mt_lock_is_held(mt) : 782 lockdep_is_held(&mt->ma_lock); 783 } 784 785 static __always_inline void *mt_slot(const struct maple_tree *mt, 786 void __rcu **slots, unsigned char offset) 787 { 788 return rcu_dereference_check(slots[offset], mt_locked(mt)); 789 } 790 791 static __always_inline void *mt_slot_locked(struct maple_tree *mt, 792 void __rcu **slots, unsigned char offset) 793 { 794 return rcu_dereference_protected(slots[offset], mt_write_locked(mt)); 795 } 796 /* 797 * mas_slot_locked() - Get the slot value when holding the maple tree lock. 798 * @mas: The maple state 799 * @slots: The pointer to the slots 800 * @offset: The offset into the slots array to fetch 801 * 802 * Return: The entry stored in @slots at the @offset. 803 */ 804 static __always_inline void *mas_slot_locked(struct ma_state *mas, 805 void __rcu **slots, unsigned char offset) 806 { 807 return mt_slot_locked(mas->tree, slots, offset); 808 } 809 810 /* 811 * mas_slot() - Get the slot value when not holding the maple tree lock. 812 * @mas: The maple state 813 * @slots: The pointer to the slots 814 * @offset: The offset into the slots array to fetch 815 * 816 * Return: The entry stored in @slots at the @offset 817 */ 818 static __always_inline void *mas_slot(struct ma_state *mas, void __rcu **slots, 819 unsigned char offset) 820 { 821 return mt_slot(mas->tree, slots, offset); 822 } 823 824 /* 825 * mas_root() - Get the maple tree root. 826 * @mas: The maple state. 827 * 828 * Return: The pointer to the root of the tree 829 */ 830 static __always_inline void *mas_root(struct ma_state *mas) 831 { 832 return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree)); 833 } 834 835 static inline void *mt_root_locked(struct maple_tree *mt) 836 { 837 return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt)); 838 } 839 840 /* 841 * mas_root_locked() - Get the maple tree root when holding the maple tree lock. 842 * @mas: The maple state. 843 * 844 * Return: The pointer to the root of the tree 845 */ 846 static inline void *mas_root_locked(struct ma_state *mas) 847 { 848 return mt_root_locked(mas->tree); 849 } 850 851 static inline struct maple_metadata *ma_meta(struct maple_node *mn, 852 enum maple_type mt) 853 { 854 switch (mt) { 855 case maple_arange_64: 856 return &mn->ma64.meta; 857 default: 858 return &mn->mr64.meta; 859 } 860 } 861 862 /* 863 * ma_set_meta() - Set the metadata information of a node. 864 * @mn: The maple node 865 * @mt: The maple node type 866 * @offset: The offset of the highest sub-gap in this node. 867 * @end: The end of the data in this node. 868 */ 869 static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, 870 unsigned char offset, unsigned char end) 871 { 872 struct maple_metadata *meta = ma_meta(mn, mt); 873 874 meta->gap = offset; 875 meta->end = end; 876 } 877 878 /* 879 * mt_clear_meta() - clear the metadata information of a node, if it exists 880 * @mt: The maple tree 881 * @mn: The maple node 882 * @type: The maple node type 883 * @offset: The offset of the highest sub-gap in this node. 884 * @end: The end of the data in this node. 885 */ 886 static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn, 887 enum maple_type type) 888 { 889 struct maple_metadata *meta; 890 unsigned long *pivots; 891 void __rcu **slots; 892 void *next; 893 894 switch (type) { 895 case maple_range_64: 896 pivots = mn->mr64.pivot; 897 if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { 898 slots = mn->mr64.slot; 899 next = mt_slot_locked(mt, slots, 900 MAPLE_RANGE64_SLOTS - 1); 901 if (unlikely((mte_to_node(next) && 902 mte_node_type(next)))) 903 return; /* no metadata, could be node */ 904 } 905 fallthrough; 906 case maple_arange_64: 907 meta = ma_meta(mn, type); 908 break; 909 default: 910 return; 911 } 912 913 meta->gap = 0; 914 meta->end = 0; 915 } 916 917 /* 918 * ma_meta_end() - Get the data end of a node from the metadata 919 * @mn: The maple node 920 * @mt: The maple node type 921 */ 922 static inline unsigned char ma_meta_end(struct maple_node *mn, 923 enum maple_type mt) 924 { 925 struct maple_metadata *meta = ma_meta(mn, mt); 926 927 return meta->end; 928 } 929 930 /* 931 * ma_meta_gap() - Get the largest gap location of a node from the metadata 932 * @mn: The maple node 933 */ 934 static inline unsigned char ma_meta_gap(struct maple_node *mn) 935 { 936 return mn->ma64.meta.gap; 937 } 938 939 /* 940 * ma_set_meta_gap() - Set the largest gap location in a nodes metadata 941 * @mn: The maple node 942 * @mn: The maple node type 943 * @offset: The location of the largest gap. 944 */ 945 static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt, 946 unsigned char offset) 947 { 948 949 struct maple_metadata *meta = ma_meta(mn, mt); 950 951 meta->gap = offset; 952 } 953 954 /* 955 * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes. 956 * @mat - the ma_topiary, a linked list of dead nodes. 957 * @dead_enode - the node to be marked as dead and added to the tail of the list 958 * 959 * Add the @dead_enode to the linked list in @mat. 960 */ 961 static inline void mat_add(struct ma_topiary *mat, 962 struct maple_enode *dead_enode) 963 { 964 mte_set_node_dead(dead_enode); 965 mte_to_mat(dead_enode)->next = NULL; 966 if (!mat->tail) { 967 mat->tail = mat->head = dead_enode; 968 return; 969 } 970 971 mte_to_mat(mat->tail)->next = dead_enode; 972 mat->tail = dead_enode; 973 } 974 975 static void mt_free_walk(struct rcu_head *head); 976 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 977 bool free); 978 /* 979 * mas_mat_destroy() - Free all nodes and subtrees in a dead list. 980 * @mas - the maple state 981 * @mat - the ma_topiary linked list of dead nodes to free. 982 * 983 * Destroy walk a dead list. 984 */ 985 static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat) 986 { 987 struct maple_enode *next; 988 struct maple_node *node; 989 bool in_rcu = mt_in_rcu(mas->tree); 990 991 while (mat->head) { 992 next = mte_to_mat(mat->head)->next; 993 node = mte_to_node(mat->head); 994 mt_destroy_walk(mat->head, mas->tree, !in_rcu); 995 if (in_rcu) 996 call_rcu(&node->rcu, mt_free_walk); 997 mat->head = next; 998 } 999 } 1000 /* 1001 * mas_descend() - Descend into the slot stored in the ma_state. 1002 * @mas - the maple state. 1003 * 1004 * Note: Not RCU safe, only use in write side or debug code. 1005 */ 1006 static inline void mas_descend(struct ma_state *mas) 1007 { 1008 enum maple_type type; 1009 unsigned long *pivots; 1010 struct maple_node *node; 1011 void __rcu **slots; 1012 1013 node = mas_mn(mas); 1014 type = mte_node_type(mas->node); 1015 pivots = ma_pivots(node, type); 1016 slots = ma_slots(node, type); 1017 1018 if (mas->offset) 1019 mas->min = pivots[mas->offset - 1] + 1; 1020 mas->max = mas_safe_pivot(mas, pivots, mas->offset, type); 1021 mas->node = mas_slot(mas, slots, mas->offset); 1022 } 1023 1024 /* 1025 * mte_set_gap() - Set a maple node gap. 1026 * @mn: The encoded maple node 1027 * @gap: The offset of the gap to set 1028 * @val: The gap value 1029 */ 1030 static inline void mte_set_gap(const struct maple_enode *mn, 1031 unsigned char gap, unsigned long val) 1032 { 1033 switch (mte_node_type(mn)) { 1034 default: 1035 break; 1036 case maple_arange_64: 1037 mte_to_node(mn)->ma64.gap[gap] = val; 1038 break; 1039 } 1040 } 1041 1042 /* 1043 * mas_ascend() - Walk up a level of the tree. 1044 * @mas: The maple state 1045 * 1046 * Sets the @mas->max and @mas->min to the correct values when walking up. This 1047 * may cause several levels of walking up to find the correct min and max. 1048 * May find a dead node which will cause a premature return. 1049 * Return: 1 on dead node, 0 otherwise 1050 */ 1051 static int mas_ascend(struct ma_state *mas) 1052 { 1053 struct maple_enode *p_enode; /* parent enode. */ 1054 struct maple_enode *a_enode; /* ancestor enode. */ 1055 struct maple_node *a_node; /* ancestor node. */ 1056 struct maple_node *p_node; /* parent node. */ 1057 unsigned char a_slot; 1058 enum maple_type a_type; 1059 unsigned long min, max; 1060 unsigned long *pivots; 1061 bool set_max = false, set_min = false; 1062 1063 a_node = mas_mn(mas); 1064 if (ma_is_root(a_node)) { 1065 mas->offset = 0; 1066 return 0; 1067 } 1068 1069 p_node = mte_parent(mas->node); 1070 if (unlikely(a_node == p_node)) 1071 return 1; 1072 1073 a_type = mas_parent_type(mas, mas->node); 1074 mas->offset = mte_parent_slot(mas->node); 1075 a_enode = mt_mk_node(p_node, a_type); 1076 1077 /* Check to make sure all parent information is still accurate */ 1078 if (p_node != mte_parent(mas->node)) 1079 return 1; 1080 1081 mas->node = a_enode; 1082 1083 if (mte_is_root(a_enode)) { 1084 mas->max = ULONG_MAX; 1085 mas->min = 0; 1086 return 0; 1087 } 1088 1089 min = 0; 1090 max = ULONG_MAX; 1091 if (!mas->offset) { 1092 min = mas->min; 1093 set_min = true; 1094 } 1095 1096 if (mas->max == ULONG_MAX) 1097 set_max = true; 1098 1099 do { 1100 p_enode = a_enode; 1101 a_type = mas_parent_type(mas, p_enode); 1102 a_node = mte_parent(p_enode); 1103 a_slot = mte_parent_slot(p_enode); 1104 a_enode = mt_mk_node(a_node, a_type); 1105 pivots = ma_pivots(a_node, a_type); 1106 1107 if (unlikely(ma_dead_node(a_node))) 1108 return 1; 1109 1110 if (!set_min && a_slot) { 1111 set_min = true; 1112 min = pivots[a_slot - 1] + 1; 1113 } 1114 1115 if (!set_max && a_slot < mt_pivots[a_type]) { 1116 set_max = true; 1117 max = pivots[a_slot]; 1118 } 1119 1120 if (unlikely(ma_dead_node(a_node))) 1121 return 1; 1122 1123 if (unlikely(ma_is_root(a_node))) 1124 break; 1125 1126 } while (!set_min || !set_max); 1127 1128 mas->max = max; 1129 mas->min = min; 1130 return 0; 1131 } 1132 1133 /* 1134 * mas_pop_node() - Get a previously allocated maple node from the maple state. 1135 * @mas: The maple state 1136 * 1137 * Return: A pointer to a maple node. 1138 */ 1139 static inline struct maple_node *mas_pop_node(struct ma_state *mas) 1140 { 1141 struct maple_alloc *ret, *node = mas->alloc; 1142 unsigned long total = mas_allocated(mas); 1143 unsigned int req = mas_alloc_req(mas); 1144 1145 /* nothing or a request pending. */ 1146 if (WARN_ON(!total)) 1147 return NULL; 1148 1149 if (total == 1) { 1150 /* single allocation in this ma_state */ 1151 mas->alloc = NULL; 1152 ret = node; 1153 goto single_node; 1154 } 1155 1156 if (node->node_count == 1) { 1157 /* Single allocation in this node. */ 1158 mas->alloc = node->slot[0]; 1159 mas->alloc->total = node->total - 1; 1160 ret = node; 1161 goto new_head; 1162 } 1163 node->total--; 1164 ret = node->slot[--node->node_count]; 1165 node->slot[node->node_count] = NULL; 1166 1167 single_node: 1168 new_head: 1169 if (req) { 1170 req++; 1171 mas_set_alloc_req(mas, req); 1172 } 1173 1174 memset(ret, 0, sizeof(*ret)); 1175 return (struct maple_node *)ret; 1176 } 1177 1178 /* 1179 * mas_push_node() - Push a node back on the maple state allocation. 1180 * @mas: The maple state 1181 * @used: The used maple node 1182 * 1183 * Stores the maple node back into @mas->alloc for reuse. Updates allocated and 1184 * requested node count as necessary. 1185 */ 1186 static inline void mas_push_node(struct ma_state *mas, struct maple_node *used) 1187 { 1188 struct maple_alloc *reuse = (struct maple_alloc *)used; 1189 struct maple_alloc *head = mas->alloc; 1190 unsigned long count; 1191 unsigned int requested = mas_alloc_req(mas); 1192 1193 count = mas_allocated(mas); 1194 1195 reuse->request_count = 0; 1196 reuse->node_count = 0; 1197 if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) { 1198 head->slot[head->node_count++] = reuse; 1199 head->total++; 1200 goto done; 1201 } 1202 1203 reuse->total = 1; 1204 if ((head) && !((unsigned long)head & 0x1)) { 1205 reuse->slot[0] = head; 1206 reuse->node_count = 1; 1207 reuse->total += head->total; 1208 } 1209 1210 mas->alloc = reuse; 1211 done: 1212 if (requested > 1) 1213 mas_set_alloc_req(mas, requested - 1); 1214 } 1215 1216 /* 1217 * mas_alloc_nodes() - Allocate nodes into a maple state 1218 * @mas: The maple state 1219 * @gfp: The GFP Flags 1220 */ 1221 static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) 1222 { 1223 struct maple_alloc *node; 1224 unsigned long allocated = mas_allocated(mas); 1225 unsigned int requested = mas_alloc_req(mas); 1226 unsigned int count; 1227 void **slots = NULL; 1228 unsigned int max_req = 0; 1229 1230 if (!requested) 1231 return; 1232 1233 mas_set_alloc_req(mas, 0); 1234 if (mas->mas_flags & MA_STATE_PREALLOC) { 1235 if (allocated) 1236 return; 1237 BUG_ON(!allocated); 1238 WARN_ON(!allocated); 1239 } 1240 1241 if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) { 1242 node = (struct maple_alloc *)mt_alloc_one(gfp); 1243 if (!node) 1244 goto nomem_one; 1245 1246 if (allocated) { 1247 node->slot[0] = mas->alloc; 1248 node->node_count = 1; 1249 } else { 1250 node->node_count = 0; 1251 } 1252 1253 mas->alloc = node; 1254 node->total = ++allocated; 1255 requested--; 1256 } 1257 1258 node = mas->alloc; 1259 node->request_count = 0; 1260 while (requested) { 1261 max_req = MAPLE_ALLOC_SLOTS - node->node_count; 1262 slots = (void **)&node->slot[node->node_count]; 1263 max_req = min(requested, max_req); 1264 count = mt_alloc_bulk(gfp, max_req, slots); 1265 if (!count) 1266 goto nomem_bulk; 1267 1268 if (node->node_count == 0) { 1269 node->slot[0]->node_count = 0; 1270 node->slot[0]->request_count = 0; 1271 } 1272 1273 node->node_count += count; 1274 allocated += count; 1275 node = node->slot[0]; 1276 requested -= count; 1277 } 1278 mas->alloc->total = allocated; 1279 return; 1280 1281 nomem_bulk: 1282 /* Clean up potential freed allocations on bulk failure */ 1283 memset(slots, 0, max_req * sizeof(unsigned long)); 1284 nomem_one: 1285 mas_set_alloc_req(mas, requested); 1286 if (mas->alloc && !(((unsigned long)mas->alloc & 0x1))) 1287 mas->alloc->total = allocated; 1288 mas_set_err(mas, -ENOMEM); 1289 } 1290 1291 /* 1292 * mas_free() - Free an encoded maple node 1293 * @mas: The maple state 1294 * @used: The encoded maple node to free. 1295 * 1296 * Uses rcu free if necessary, pushes @used back on the maple state allocations 1297 * otherwise. 1298 */ 1299 static inline void mas_free(struct ma_state *mas, struct maple_enode *used) 1300 { 1301 struct maple_node *tmp = mte_to_node(used); 1302 1303 if (mt_in_rcu(mas->tree)) 1304 ma_free_rcu(tmp); 1305 else 1306 mas_push_node(mas, tmp); 1307 } 1308 1309 /* 1310 * mas_node_count() - Check if enough nodes are allocated and request more if 1311 * there is not enough nodes. 1312 * @mas: The maple state 1313 * @count: The number of nodes needed 1314 * @gfp: the gfp flags 1315 */ 1316 static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp) 1317 { 1318 unsigned long allocated = mas_allocated(mas); 1319 1320 if (allocated < count) { 1321 mas_set_alloc_req(mas, count - allocated); 1322 mas_alloc_nodes(mas, gfp); 1323 } 1324 } 1325 1326 /* 1327 * mas_node_count() - Check if enough nodes are allocated and request more if 1328 * there is not enough nodes. 1329 * @mas: The maple state 1330 * @count: The number of nodes needed 1331 * 1332 * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags. 1333 */ 1334 static void mas_node_count(struct ma_state *mas, int count) 1335 { 1336 return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN); 1337 } 1338 1339 /* 1340 * mas_start() - Sets up maple state for operations. 1341 * @mas: The maple state. 1342 * 1343 * If mas->status == mas_start, then set the min, max and depth to 1344 * defaults. 1345 * 1346 * Return: 1347 * - If mas->node is an error or not mas_start, return NULL. 1348 * - If it's an empty tree: NULL & mas->status == ma_none 1349 * - If it's a single entry: The entry & mas->status == mas_root 1350 * - If it's a tree: NULL & mas->status == safe root node. 1351 */ 1352 static inline struct maple_enode *mas_start(struct ma_state *mas) 1353 { 1354 if (likely(mas_is_start(mas))) { 1355 struct maple_enode *root; 1356 1357 mas->min = 0; 1358 mas->max = ULONG_MAX; 1359 1360 retry: 1361 mas->depth = 0; 1362 root = mas_root(mas); 1363 /* Tree with nodes */ 1364 if (likely(xa_is_node(root))) { 1365 mas->depth = 1; 1366 mas->status = ma_active; 1367 mas->node = mte_safe_root(root); 1368 mas->offset = 0; 1369 if (mte_dead_node(mas->node)) 1370 goto retry; 1371 1372 return NULL; 1373 } 1374 1375 /* empty tree */ 1376 if (unlikely(!root)) { 1377 mas->node = NULL; 1378 mas->status = ma_none; 1379 mas->offset = MAPLE_NODE_SLOTS; 1380 return NULL; 1381 } 1382 1383 /* Single entry tree */ 1384 mas->status = ma_root; 1385 mas->offset = MAPLE_NODE_SLOTS; 1386 1387 /* Single entry tree. */ 1388 if (mas->index > 0) 1389 return NULL; 1390 1391 return root; 1392 } 1393 1394 return NULL; 1395 } 1396 1397 /* 1398 * ma_data_end() - Find the end of the data in a node. 1399 * @node: The maple node 1400 * @type: The maple node type 1401 * @pivots: The array of pivots in the node 1402 * @max: The maximum value in the node 1403 * 1404 * Uses metadata to find the end of the data when possible. 1405 * Return: The zero indexed last slot with data (may be null). 1406 */ 1407 static __always_inline unsigned char ma_data_end(struct maple_node *node, 1408 enum maple_type type, unsigned long *pivots, unsigned long max) 1409 { 1410 unsigned char offset; 1411 1412 if (!pivots) 1413 return 0; 1414 1415 if (type == maple_arange_64) 1416 return ma_meta_end(node, type); 1417 1418 offset = mt_pivots[type] - 1; 1419 if (likely(!pivots[offset])) 1420 return ma_meta_end(node, type); 1421 1422 if (likely(pivots[offset] == max)) 1423 return offset; 1424 1425 return mt_pivots[type]; 1426 } 1427 1428 /* 1429 * mas_data_end() - Find the end of the data (slot). 1430 * @mas: the maple state 1431 * 1432 * This method is optimized to check the metadata of a node if the node type 1433 * supports data end metadata. 1434 * 1435 * Return: The zero indexed last slot with data (may be null). 1436 */ 1437 static inline unsigned char mas_data_end(struct ma_state *mas) 1438 { 1439 enum maple_type type; 1440 struct maple_node *node; 1441 unsigned char offset; 1442 unsigned long *pivots; 1443 1444 type = mte_node_type(mas->node); 1445 node = mas_mn(mas); 1446 if (type == maple_arange_64) 1447 return ma_meta_end(node, type); 1448 1449 pivots = ma_pivots(node, type); 1450 if (unlikely(ma_dead_node(node))) 1451 return 0; 1452 1453 offset = mt_pivots[type] - 1; 1454 if (likely(!pivots[offset])) 1455 return ma_meta_end(node, type); 1456 1457 if (likely(pivots[offset] == mas->max)) 1458 return offset; 1459 1460 return mt_pivots[type]; 1461 } 1462 1463 /* 1464 * mas_leaf_max_gap() - Returns the largest gap in a leaf node 1465 * @mas - the maple state 1466 * 1467 * Return: The maximum gap in the leaf. 1468 */ 1469 static unsigned long mas_leaf_max_gap(struct ma_state *mas) 1470 { 1471 enum maple_type mt; 1472 unsigned long pstart, gap, max_gap; 1473 struct maple_node *mn; 1474 unsigned long *pivots; 1475 void __rcu **slots; 1476 unsigned char i; 1477 unsigned char max_piv; 1478 1479 mt = mte_node_type(mas->node); 1480 mn = mas_mn(mas); 1481 slots = ma_slots(mn, mt); 1482 max_gap = 0; 1483 if (unlikely(ma_is_dense(mt))) { 1484 gap = 0; 1485 for (i = 0; i < mt_slots[mt]; i++) { 1486 if (slots[i]) { 1487 if (gap > max_gap) 1488 max_gap = gap; 1489 gap = 0; 1490 } else { 1491 gap++; 1492 } 1493 } 1494 if (gap > max_gap) 1495 max_gap = gap; 1496 return max_gap; 1497 } 1498 1499 /* 1500 * Check the first implied pivot optimizes the loop below and slot 1 may 1501 * be skipped if there is a gap in slot 0. 1502 */ 1503 pivots = ma_pivots(mn, mt); 1504 if (likely(!slots[0])) { 1505 max_gap = pivots[0] - mas->min + 1; 1506 i = 2; 1507 } else { 1508 i = 1; 1509 } 1510 1511 /* reduce max_piv as the special case is checked before the loop */ 1512 max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1; 1513 /* 1514 * Check end implied pivot which can only be a gap on the right most 1515 * node. 1516 */ 1517 if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) { 1518 gap = ULONG_MAX - pivots[max_piv]; 1519 if (gap > max_gap) 1520 max_gap = gap; 1521 } 1522 1523 for (; i <= max_piv; i++) { 1524 /* data == no gap. */ 1525 if (likely(slots[i])) 1526 continue; 1527 1528 pstart = pivots[i - 1]; 1529 gap = pivots[i] - pstart; 1530 if (gap > max_gap) 1531 max_gap = gap; 1532 1533 /* There cannot be two gaps in a row. */ 1534 i++; 1535 } 1536 return max_gap; 1537 } 1538 1539 /* 1540 * ma_max_gap() - Get the maximum gap in a maple node (non-leaf) 1541 * @node: The maple node 1542 * @gaps: The pointer to the gaps 1543 * @mt: The maple node type 1544 * @*off: Pointer to store the offset location of the gap. 1545 * 1546 * Uses the metadata data end to scan backwards across set gaps. 1547 * 1548 * Return: The maximum gap value 1549 */ 1550 static inline unsigned long 1551 ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt, 1552 unsigned char *off) 1553 { 1554 unsigned char offset, i; 1555 unsigned long max_gap = 0; 1556 1557 i = offset = ma_meta_end(node, mt); 1558 do { 1559 if (gaps[i] > max_gap) { 1560 max_gap = gaps[i]; 1561 offset = i; 1562 } 1563 } while (i--); 1564 1565 *off = offset; 1566 return max_gap; 1567 } 1568 1569 /* 1570 * mas_max_gap() - find the largest gap in a non-leaf node and set the slot. 1571 * @mas: The maple state. 1572 * 1573 * Return: The gap value. 1574 */ 1575 static inline unsigned long mas_max_gap(struct ma_state *mas) 1576 { 1577 unsigned long *gaps; 1578 unsigned char offset; 1579 enum maple_type mt; 1580 struct maple_node *node; 1581 1582 mt = mte_node_type(mas->node); 1583 if (ma_is_leaf(mt)) 1584 return mas_leaf_max_gap(mas); 1585 1586 node = mas_mn(mas); 1587 MAS_BUG_ON(mas, mt != maple_arange_64); 1588 offset = ma_meta_gap(node); 1589 gaps = ma_gaps(node, mt); 1590 return gaps[offset]; 1591 } 1592 1593 /* 1594 * mas_parent_gap() - Set the parent gap and any gaps above, as needed 1595 * @mas: The maple state 1596 * @offset: The gap offset in the parent to set 1597 * @new: The new gap value. 1598 * 1599 * Set the parent gap then continue to set the gap upwards, using the metadata 1600 * of the parent to see if it is necessary to check the node above. 1601 */ 1602 static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset, 1603 unsigned long new) 1604 { 1605 unsigned long meta_gap = 0; 1606 struct maple_node *pnode; 1607 struct maple_enode *penode; 1608 unsigned long *pgaps; 1609 unsigned char meta_offset; 1610 enum maple_type pmt; 1611 1612 pnode = mte_parent(mas->node); 1613 pmt = mas_parent_type(mas, mas->node); 1614 penode = mt_mk_node(pnode, pmt); 1615 pgaps = ma_gaps(pnode, pmt); 1616 1617 ascend: 1618 MAS_BUG_ON(mas, pmt != maple_arange_64); 1619 meta_offset = ma_meta_gap(pnode); 1620 meta_gap = pgaps[meta_offset]; 1621 1622 pgaps[offset] = new; 1623 1624 if (meta_gap == new) 1625 return; 1626 1627 if (offset != meta_offset) { 1628 if (meta_gap > new) 1629 return; 1630 1631 ma_set_meta_gap(pnode, pmt, offset); 1632 } else if (new < meta_gap) { 1633 new = ma_max_gap(pnode, pgaps, pmt, &meta_offset); 1634 ma_set_meta_gap(pnode, pmt, meta_offset); 1635 } 1636 1637 if (ma_is_root(pnode)) 1638 return; 1639 1640 /* Go to the parent node. */ 1641 pnode = mte_parent(penode); 1642 pmt = mas_parent_type(mas, penode); 1643 pgaps = ma_gaps(pnode, pmt); 1644 offset = mte_parent_slot(penode); 1645 penode = mt_mk_node(pnode, pmt); 1646 goto ascend; 1647 } 1648 1649 /* 1650 * mas_update_gap() - Update a nodes gaps and propagate up if necessary. 1651 * @mas - the maple state. 1652 */ 1653 static inline void mas_update_gap(struct ma_state *mas) 1654 { 1655 unsigned char pslot; 1656 unsigned long p_gap; 1657 unsigned long max_gap; 1658 1659 if (!mt_is_alloc(mas->tree)) 1660 return; 1661 1662 if (mte_is_root(mas->node)) 1663 return; 1664 1665 max_gap = mas_max_gap(mas); 1666 1667 pslot = mte_parent_slot(mas->node); 1668 p_gap = ma_gaps(mte_parent(mas->node), 1669 mas_parent_type(mas, mas->node))[pslot]; 1670 1671 if (p_gap != max_gap) 1672 mas_parent_gap(mas, pslot, max_gap); 1673 } 1674 1675 /* 1676 * mas_adopt_children() - Set the parent pointer of all nodes in @parent to 1677 * @parent with the slot encoded. 1678 * @mas - the maple state (for the tree) 1679 * @parent - the maple encoded node containing the children. 1680 */ 1681 static inline void mas_adopt_children(struct ma_state *mas, 1682 struct maple_enode *parent) 1683 { 1684 enum maple_type type = mte_node_type(parent); 1685 struct maple_node *node = mte_to_node(parent); 1686 void __rcu **slots = ma_slots(node, type); 1687 unsigned long *pivots = ma_pivots(node, type); 1688 struct maple_enode *child; 1689 unsigned char offset; 1690 1691 offset = ma_data_end(node, type, pivots, mas->max); 1692 do { 1693 child = mas_slot_locked(mas, slots, offset); 1694 mas_set_parent(mas, child, parent, offset); 1695 } while (offset--); 1696 } 1697 1698 /* 1699 * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old 1700 * node as dead. 1701 * @mas - the maple state with the new node 1702 * @old_enode - The old maple encoded node to replace. 1703 */ 1704 static inline void mas_put_in_tree(struct ma_state *mas, 1705 struct maple_enode *old_enode) 1706 __must_hold(mas->tree->ma_lock) 1707 { 1708 unsigned char offset; 1709 void __rcu **slots; 1710 1711 if (mte_is_root(mas->node)) { 1712 mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas)); 1713 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 1714 mas_set_height(mas); 1715 } else { 1716 1717 offset = mte_parent_slot(mas->node); 1718 slots = ma_slots(mte_parent(mas->node), 1719 mas_parent_type(mas, mas->node)); 1720 rcu_assign_pointer(slots[offset], mas->node); 1721 } 1722 1723 mte_set_node_dead(old_enode); 1724 } 1725 1726 /* 1727 * mas_replace_node() - Replace a node by putting it in the tree, marking it 1728 * dead, and freeing it. 1729 * the parent encoding to locate the maple node in the tree. 1730 * @mas - the ma_state with @mas->node pointing to the new node. 1731 * @old_enode - The old maple encoded node. 1732 */ 1733 static inline void mas_replace_node(struct ma_state *mas, 1734 struct maple_enode *old_enode) 1735 __must_hold(mas->tree->ma_lock) 1736 { 1737 mas_put_in_tree(mas, old_enode); 1738 mas_free(mas, old_enode); 1739 } 1740 1741 /* 1742 * mas_find_child() - Find a child who has the parent @mas->node. 1743 * @mas: the maple state with the parent. 1744 * @child: the maple state to store the child. 1745 */ 1746 static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child) 1747 __must_hold(mas->tree->ma_lock) 1748 { 1749 enum maple_type mt; 1750 unsigned char offset; 1751 unsigned char end; 1752 unsigned long *pivots; 1753 struct maple_enode *entry; 1754 struct maple_node *node; 1755 void __rcu **slots; 1756 1757 mt = mte_node_type(mas->node); 1758 node = mas_mn(mas); 1759 slots = ma_slots(node, mt); 1760 pivots = ma_pivots(node, mt); 1761 end = ma_data_end(node, mt, pivots, mas->max); 1762 for (offset = mas->offset; offset <= end; offset++) { 1763 entry = mas_slot_locked(mas, slots, offset); 1764 if (mte_parent(entry) == node) { 1765 *child = *mas; 1766 mas->offset = offset + 1; 1767 child->offset = offset; 1768 mas_descend(child); 1769 child->offset = 0; 1770 return true; 1771 } 1772 } 1773 return false; 1774 } 1775 1776 /* 1777 * mab_shift_right() - Shift the data in mab right. Note, does not clean out the 1778 * old data or set b_node->b_end. 1779 * @b_node: the maple_big_node 1780 * @shift: the shift count 1781 */ 1782 static inline void mab_shift_right(struct maple_big_node *b_node, 1783 unsigned char shift) 1784 { 1785 unsigned long size = b_node->b_end * sizeof(unsigned long); 1786 1787 memmove(b_node->pivot + shift, b_node->pivot, size); 1788 memmove(b_node->slot + shift, b_node->slot, size); 1789 if (b_node->type == maple_arange_64) 1790 memmove(b_node->gap + shift, b_node->gap, size); 1791 } 1792 1793 /* 1794 * mab_middle_node() - Check if a middle node is needed (unlikely) 1795 * @b_node: the maple_big_node that contains the data. 1796 * @size: the amount of data in the b_node 1797 * @split: the potential split location 1798 * @slot_count: the size that can be stored in a single node being considered. 1799 * 1800 * Return: true if a middle node is required. 1801 */ 1802 static inline bool mab_middle_node(struct maple_big_node *b_node, int split, 1803 unsigned char slot_count) 1804 { 1805 unsigned char size = b_node->b_end; 1806 1807 if (size >= 2 * slot_count) 1808 return true; 1809 1810 if (!b_node->slot[split] && (size >= 2 * slot_count - 1)) 1811 return true; 1812 1813 return false; 1814 } 1815 1816 /* 1817 * mab_no_null_split() - ensure the split doesn't fall on a NULL 1818 * @b_node: the maple_big_node with the data 1819 * @split: the suggested split location 1820 * @slot_count: the number of slots in the node being considered. 1821 * 1822 * Return: the split location. 1823 */ 1824 static inline int mab_no_null_split(struct maple_big_node *b_node, 1825 unsigned char split, unsigned char slot_count) 1826 { 1827 if (!b_node->slot[split]) { 1828 /* 1829 * If the split is less than the max slot && the right side will 1830 * still be sufficient, then increment the split on NULL. 1831 */ 1832 if ((split < slot_count - 1) && 1833 (b_node->b_end - split) > (mt_min_slots[b_node->type])) 1834 split++; 1835 else 1836 split--; 1837 } 1838 return split; 1839 } 1840 1841 /* 1842 * mab_calc_split() - Calculate the split location and if there needs to be two 1843 * splits. 1844 * @bn: The maple_big_node with the data 1845 * @mid_split: The second split, if required. 0 otherwise. 1846 * 1847 * Return: The first split location. The middle split is set in @mid_split. 1848 */ 1849 static inline int mab_calc_split(struct ma_state *mas, 1850 struct maple_big_node *bn, unsigned char *mid_split, unsigned long min) 1851 { 1852 unsigned char b_end = bn->b_end; 1853 int split = b_end / 2; /* Assume equal split. */ 1854 unsigned char slot_min, slot_count = mt_slots[bn->type]; 1855 1856 /* 1857 * To support gap tracking, all NULL entries are kept together and a node cannot 1858 * end on a NULL entry, with the exception of the left-most leaf. The 1859 * limitation means that the split of a node must be checked for this condition 1860 * and be able to put more data in one direction or the other. 1861 */ 1862 if (unlikely((mas->mas_flags & MA_STATE_BULK))) { 1863 *mid_split = 0; 1864 split = b_end - mt_min_slots[bn->type]; 1865 1866 if (!ma_is_leaf(bn->type)) 1867 return split; 1868 1869 mas->mas_flags |= MA_STATE_REBALANCE; 1870 if (!bn->slot[split]) 1871 split--; 1872 return split; 1873 } 1874 1875 /* 1876 * Although extremely rare, it is possible to enter what is known as the 3-way 1877 * split scenario. The 3-way split comes about by means of a store of a range 1878 * that overwrites the end and beginning of two full nodes. The result is a set 1879 * of entries that cannot be stored in 2 nodes. Sometimes, these two nodes can 1880 * also be located in different parent nodes which are also full. This can 1881 * carry upwards all the way to the root in the worst case. 1882 */ 1883 if (unlikely(mab_middle_node(bn, split, slot_count))) { 1884 split = b_end / 3; 1885 *mid_split = split * 2; 1886 } else { 1887 slot_min = mt_min_slots[bn->type]; 1888 1889 *mid_split = 0; 1890 /* 1891 * Avoid having a range less than the slot count unless it 1892 * causes one node to be deficient. 1893 * NOTE: mt_min_slots is 1 based, b_end and split are zero. 1894 */ 1895 while ((split < slot_count - 1) && 1896 ((bn->pivot[split] - min) < slot_count - 1) && 1897 (b_end - split > slot_min)) 1898 split++; 1899 } 1900 1901 /* Avoid ending a node on a NULL entry */ 1902 split = mab_no_null_split(bn, split, slot_count); 1903 1904 if (unlikely(*mid_split)) 1905 *mid_split = mab_no_null_split(bn, *mid_split, slot_count); 1906 1907 return split; 1908 } 1909 1910 /* 1911 * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node 1912 * and set @b_node->b_end to the next free slot. 1913 * @mas: The maple state 1914 * @mas_start: The starting slot to copy 1915 * @mas_end: The end slot to copy (inclusively) 1916 * @b_node: The maple_big_node to place the data 1917 * @mab_start: The starting location in maple_big_node to store the data. 1918 */ 1919 static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start, 1920 unsigned char mas_end, struct maple_big_node *b_node, 1921 unsigned char mab_start) 1922 { 1923 enum maple_type mt; 1924 struct maple_node *node; 1925 void __rcu **slots; 1926 unsigned long *pivots, *gaps; 1927 int i = mas_start, j = mab_start; 1928 unsigned char piv_end; 1929 1930 node = mas_mn(mas); 1931 mt = mte_node_type(mas->node); 1932 pivots = ma_pivots(node, mt); 1933 if (!i) { 1934 b_node->pivot[j] = pivots[i++]; 1935 if (unlikely(i > mas_end)) 1936 goto complete; 1937 j++; 1938 } 1939 1940 piv_end = min(mas_end, mt_pivots[mt]); 1941 for (; i < piv_end; i++, j++) { 1942 b_node->pivot[j] = pivots[i]; 1943 if (unlikely(!b_node->pivot[j])) 1944 break; 1945 1946 if (unlikely(mas->max == b_node->pivot[j])) 1947 goto complete; 1948 } 1949 1950 if (likely(i <= mas_end)) 1951 b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt); 1952 1953 complete: 1954 b_node->b_end = ++j; 1955 j -= mab_start; 1956 slots = ma_slots(node, mt); 1957 memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j); 1958 if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) { 1959 gaps = ma_gaps(node, mt); 1960 memcpy(b_node->gap + mab_start, gaps + mas_start, 1961 sizeof(unsigned long) * j); 1962 } 1963 } 1964 1965 /* 1966 * mas_leaf_set_meta() - Set the metadata of a leaf if possible. 1967 * @node: The maple node 1968 * @mt: The maple type 1969 * @end: The node end 1970 */ 1971 static inline void mas_leaf_set_meta(struct maple_node *node, 1972 enum maple_type mt, unsigned char end) 1973 { 1974 if (end < mt_slots[mt] - 1) 1975 ma_set_meta(node, mt, 0, end); 1976 } 1977 1978 /* 1979 * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node. 1980 * @b_node: the maple_big_node that has the data 1981 * @mab_start: the start location in @b_node. 1982 * @mab_end: The end location in @b_node (inclusively) 1983 * @mas: The maple state with the maple encoded node. 1984 */ 1985 static inline void mab_mas_cp(struct maple_big_node *b_node, 1986 unsigned char mab_start, unsigned char mab_end, 1987 struct ma_state *mas, bool new_max) 1988 { 1989 int i, j = 0; 1990 enum maple_type mt = mte_node_type(mas->node); 1991 struct maple_node *node = mte_to_node(mas->node); 1992 void __rcu **slots = ma_slots(node, mt); 1993 unsigned long *pivots = ma_pivots(node, mt); 1994 unsigned long *gaps = NULL; 1995 unsigned char end; 1996 1997 if (mab_end - mab_start > mt_pivots[mt]) 1998 mab_end--; 1999 2000 if (!pivots[mt_pivots[mt] - 1]) 2001 slots[mt_pivots[mt]] = NULL; 2002 2003 i = mab_start; 2004 do { 2005 pivots[j++] = b_node->pivot[i++]; 2006 } while (i <= mab_end && likely(b_node->pivot[i])); 2007 2008 memcpy(slots, b_node->slot + mab_start, 2009 sizeof(void *) * (i - mab_start)); 2010 2011 if (new_max) 2012 mas->max = b_node->pivot[i - 1]; 2013 2014 end = j - 1; 2015 if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) { 2016 unsigned long max_gap = 0; 2017 unsigned char offset = 0; 2018 2019 gaps = ma_gaps(node, mt); 2020 do { 2021 gaps[--j] = b_node->gap[--i]; 2022 if (gaps[j] > max_gap) { 2023 offset = j; 2024 max_gap = gaps[j]; 2025 } 2026 } while (j); 2027 2028 ma_set_meta(node, mt, offset, end); 2029 } else { 2030 mas_leaf_set_meta(node, mt, end); 2031 } 2032 } 2033 2034 /* 2035 * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert. 2036 * @mas: The maple state 2037 * @end: The maple node end 2038 * @mt: The maple node type 2039 */ 2040 static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end, 2041 enum maple_type mt) 2042 { 2043 if (!(mas->mas_flags & MA_STATE_BULK)) 2044 return; 2045 2046 if (mte_is_root(mas->node)) 2047 return; 2048 2049 if (end > mt_min_slots[mt]) { 2050 mas->mas_flags &= ~MA_STATE_REBALANCE; 2051 return; 2052 } 2053 } 2054 2055 /* 2056 * mas_store_b_node() - Store an @entry into the b_node while also copying the 2057 * data from a maple encoded node. 2058 * @wr_mas: the maple write state 2059 * @b_node: the maple_big_node to fill with data 2060 * @offset_end: the offset to end copying 2061 * 2062 * Return: The actual end of the data stored in @b_node 2063 */ 2064 static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas, 2065 struct maple_big_node *b_node, unsigned char offset_end) 2066 { 2067 unsigned char slot; 2068 unsigned char b_end; 2069 /* Possible underflow of piv will wrap back to 0 before use. */ 2070 unsigned long piv; 2071 struct ma_state *mas = wr_mas->mas; 2072 2073 b_node->type = wr_mas->type; 2074 b_end = 0; 2075 slot = mas->offset; 2076 if (slot) { 2077 /* Copy start data up to insert. */ 2078 mas_mab_cp(mas, 0, slot - 1, b_node, 0); 2079 b_end = b_node->b_end; 2080 piv = b_node->pivot[b_end - 1]; 2081 } else 2082 piv = mas->min - 1; 2083 2084 if (piv + 1 < mas->index) { 2085 /* Handle range starting after old range */ 2086 b_node->slot[b_end] = wr_mas->content; 2087 if (!wr_mas->content) 2088 b_node->gap[b_end] = mas->index - 1 - piv; 2089 b_node->pivot[b_end++] = mas->index - 1; 2090 } 2091 2092 /* Store the new entry. */ 2093 mas->offset = b_end; 2094 b_node->slot[b_end] = wr_mas->entry; 2095 b_node->pivot[b_end] = mas->last; 2096 2097 /* Appended. */ 2098 if (mas->last >= mas->max) 2099 goto b_end; 2100 2101 /* Handle new range ending before old range ends */ 2102 piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type); 2103 if (piv > mas->last) { 2104 if (piv == ULONG_MAX) 2105 mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type); 2106 2107 if (offset_end != slot) 2108 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 2109 offset_end); 2110 2111 b_node->slot[++b_end] = wr_mas->content; 2112 if (!wr_mas->content) 2113 b_node->gap[b_end] = piv - mas->last + 1; 2114 b_node->pivot[b_end] = piv; 2115 } 2116 2117 slot = offset_end + 1; 2118 if (slot > mas->end) 2119 goto b_end; 2120 2121 /* Copy end data to the end of the node. */ 2122 mas_mab_cp(mas, slot, mas->end + 1, b_node, ++b_end); 2123 b_node->b_end--; 2124 return; 2125 2126 b_end: 2127 b_node->b_end = b_end; 2128 } 2129 2130 /* 2131 * mas_prev_sibling() - Find the previous node with the same parent. 2132 * @mas: the maple state 2133 * 2134 * Return: True if there is a previous sibling, false otherwise. 2135 */ 2136 static inline bool mas_prev_sibling(struct ma_state *mas) 2137 { 2138 unsigned int p_slot = mte_parent_slot(mas->node); 2139 2140 if (mte_is_root(mas->node)) 2141 return false; 2142 2143 if (!p_slot) 2144 return false; 2145 2146 mas_ascend(mas); 2147 mas->offset = p_slot - 1; 2148 mas_descend(mas); 2149 return true; 2150 } 2151 2152 /* 2153 * mas_next_sibling() - Find the next node with the same parent. 2154 * @mas: the maple state 2155 * 2156 * Return: true if there is a next sibling, false otherwise. 2157 */ 2158 static inline bool mas_next_sibling(struct ma_state *mas) 2159 { 2160 MA_STATE(parent, mas->tree, mas->index, mas->last); 2161 2162 if (mte_is_root(mas->node)) 2163 return false; 2164 2165 parent = *mas; 2166 mas_ascend(&parent); 2167 parent.offset = mte_parent_slot(mas->node) + 1; 2168 if (parent.offset > mas_data_end(&parent)) 2169 return false; 2170 2171 *mas = parent; 2172 mas_descend(mas); 2173 return true; 2174 } 2175 2176 /* 2177 * mte_node_or_none() - Set the enode and state. 2178 * @enode: The encoded maple node. 2179 * 2180 * Set the node to the enode and the status. 2181 */ 2182 static inline void mas_node_or_none(struct ma_state *mas, 2183 struct maple_enode *enode) 2184 { 2185 if (enode) { 2186 mas->node = enode; 2187 mas->status = ma_active; 2188 } else { 2189 mas->node = NULL; 2190 mas->status = ma_none; 2191 } 2192 } 2193 2194 /* 2195 * mas_wr_node_walk() - Find the correct offset for the index in the @mas. 2196 * @wr_mas: The maple write state 2197 * 2198 * Uses mas_slot_locked() and does not need to worry about dead nodes. 2199 */ 2200 static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) 2201 { 2202 struct ma_state *mas = wr_mas->mas; 2203 unsigned char count, offset; 2204 2205 if (unlikely(ma_is_dense(wr_mas->type))) { 2206 wr_mas->r_max = wr_mas->r_min = mas->index; 2207 mas->offset = mas->index = mas->min; 2208 return; 2209 } 2210 2211 wr_mas->node = mas_mn(wr_mas->mas); 2212 wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type); 2213 count = mas->end = ma_data_end(wr_mas->node, wr_mas->type, 2214 wr_mas->pivots, mas->max); 2215 offset = mas->offset; 2216 2217 while (offset < count && mas->index > wr_mas->pivots[offset]) 2218 offset++; 2219 2220 wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max; 2221 wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset); 2222 wr_mas->offset_end = mas->offset = offset; 2223 } 2224 2225 /* 2226 * mast_rebalance_next() - Rebalance against the next node 2227 * @mast: The maple subtree state 2228 * @old_r: The encoded maple node to the right (next node). 2229 */ 2230 static inline void mast_rebalance_next(struct maple_subtree_state *mast) 2231 { 2232 unsigned char b_end = mast->bn->b_end; 2233 2234 mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node), 2235 mast->bn, b_end); 2236 mast->orig_r->last = mast->orig_r->max; 2237 } 2238 2239 /* 2240 * mast_rebalance_prev() - Rebalance against the previous node 2241 * @mast: The maple subtree state 2242 * @old_l: The encoded maple node to the left (previous node) 2243 */ 2244 static inline void mast_rebalance_prev(struct maple_subtree_state *mast) 2245 { 2246 unsigned char end = mas_data_end(mast->orig_l) + 1; 2247 unsigned char b_end = mast->bn->b_end; 2248 2249 mab_shift_right(mast->bn, end); 2250 mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0); 2251 mast->l->min = mast->orig_l->min; 2252 mast->orig_l->index = mast->orig_l->min; 2253 mast->bn->b_end = end + b_end; 2254 mast->l->offset += end; 2255 } 2256 2257 /* 2258 * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring 2259 * the node to the right. Checking the nodes to the right then the left at each 2260 * level upwards until root is reached. 2261 * Data is copied into the @mast->bn. 2262 * @mast: The maple_subtree_state. 2263 */ 2264 static inline 2265 bool mast_spanning_rebalance(struct maple_subtree_state *mast) 2266 { 2267 struct ma_state r_tmp = *mast->orig_r; 2268 struct ma_state l_tmp = *mast->orig_l; 2269 unsigned char depth = 0; 2270 2271 r_tmp = *mast->orig_r; 2272 l_tmp = *mast->orig_l; 2273 do { 2274 mas_ascend(mast->orig_r); 2275 mas_ascend(mast->orig_l); 2276 depth++; 2277 if (mast->orig_r->offset < mas_data_end(mast->orig_r)) { 2278 mast->orig_r->offset++; 2279 do { 2280 mas_descend(mast->orig_r); 2281 mast->orig_r->offset = 0; 2282 } while (--depth); 2283 2284 mast_rebalance_next(mast); 2285 *mast->orig_l = l_tmp; 2286 return true; 2287 } else if (mast->orig_l->offset != 0) { 2288 mast->orig_l->offset--; 2289 do { 2290 mas_descend(mast->orig_l); 2291 mast->orig_l->offset = 2292 mas_data_end(mast->orig_l); 2293 } while (--depth); 2294 2295 mast_rebalance_prev(mast); 2296 *mast->orig_r = r_tmp; 2297 return true; 2298 } 2299 } while (!mte_is_root(mast->orig_r->node)); 2300 2301 *mast->orig_r = r_tmp; 2302 *mast->orig_l = l_tmp; 2303 return false; 2304 } 2305 2306 /* 2307 * mast_ascend() - Ascend the original left and right maple states. 2308 * @mast: the maple subtree state. 2309 * 2310 * Ascend the original left and right sides. Set the offsets to point to the 2311 * data already in the new tree (@mast->l and @mast->r). 2312 */ 2313 static inline void mast_ascend(struct maple_subtree_state *mast) 2314 { 2315 MA_WR_STATE(wr_mas, mast->orig_r, NULL); 2316 mas_ascend(mast->orig_l); 2317 mas_ascend(mast->orig_r); 2318 2319 mast->orig_r->offset = 0; 2320 mast->orig_r->index = mast->r->max; 2321 /* last should be larger than or equal to index */ 2322 if (mast->orig_r->last < mast->orig_r->index) 2323 mast->orig_r->last = mast->orig_r->index; 2324 2325 wr_mas.type = mte_node_type(mast->orig_r->node); 2326 mas_wr_node_walk(&wr_mas); 2327 /* Set up the left side of things */ 2328 mast->orig_l->offset = 0; 2329 mast->orig_l->index = mast->l->min; 2330 wr_mas.mas = mast->orig_l; 2331 wr_mas.type = mte_node_type(mast->orig_l->node); 2332 mas_wr_node_walk(&wr_mas); 2333 2334 mast->bn->type = wr_mas.type; 2335 } 2336 2337 /* 2338 * mas_new_ma_node() - Create and return a new maple node. Helper function. 2339 * @mas: the maple state with the allocations. 2340 * @b_node: the maple_big_node with the type encoding. 2341 * 2342 * Use the node type from the maple_big_node to allocate a new node from the 2343 * ma_state. This function exists mainly for code readability. 2344 * 2345 * Return: A new maple encoded node 2346 */ 2347 static inline struct maple_enode 2348 *mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node) 2349 { 2350 return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type); 2351 } 2352 2353 /* 2354 * mas_mab_to_node() - Set up right and middle nodes 2355 * 2356 * @mas: the maple state that contains the allocations. 2357 * @b_node: the node which contains the data. 2358 * @left: The pointer which will have the left node 2359 * @right: The pointer which may have the right node 2360 * @middle: the pointer which may have the middle node (rare) 2361 * @mid_split: the split location for the middle node 2362 * 2363 * Return: the split of left. 2364 */ 2365 static inline unsigned char mas_mab_to_node(struct ma_state *mas, 2366 struct maple_big_node *b_node, struct maple_enode **left, 2367 struct maple_enode **right, struct maple_enode **middle, 2368 unsigned char *mid_split, unsigned long min) 2369 { 2370 unsigned char split = 0; 2371 unsigned char slot_count = mt_slots[b_node->type]; 2372 2373 *left = mas_new_ma_node(mas, b_node); 2374 *right = NULL; 2375 *middle = NULL; 2376 *mid_split = 0; 2377 2378 if (b_node->b_end < slot_count) { 2379 split = b_node->b_end; 2380 } else { 2381 split = mab_calc_split(mas, b_node, mid_split, min); 2382 *right = mas_new_ma_node(mas, b_node); 2383 } 2384 2385 if (*mid_split) 2386 *middle = mas_new_ma_node(mas, b_node); 2387 2388 return split; 2389 2390 } 2391 2392 /* 2393 * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end 2394 * pointer. 2395 * @b_node - the big node to add the entry 2396 * @mas - the maple state to get the pivot (mas->max) 2397 * @entry - the entry to add, if NULL nothing happens. 2398 */ 2399 static inline void mab_set_b_end(struct maple_big_node *b_node, 2400 struct ma_state *mas, 2401 void *entry) 2402 { 2403 if (!entry) 2404 return; 2405 2406 b_node->slot[b_node->b_end] = entry; 2407 if (mt_is_alloc(mas->tree)) 2408 b_node->gap[b_node->b_end] = mas_max_gap(mas); 2409 b_node->pivot[b_node->b_end++] = mas->max; 2410 } 2411 2412 /* 2413 * mas_set_split_parent() - combine_then_separate helper function. Sets the parent 2414 * of @mas->node to either @left or @right, depending on @slot and @split 2415 * 2416 * @mas - the maple state with the node that needs a parent 2417 * @left - possible parent 1 2418 * @right - possible parent 2 2419 * @slot - the slot the mas->node was placed 2420 * @split - the split location between @left and @right 2421 */ 2422 static inline void mas_set_split_parent(struct ma_state *mas, 2423 struct maple_enode *left, 2424 struct maple_enode *right, 2425 unsigned char *slot, unsigned char split) 2426 { 2427 if (mas_is_none(mas)) 2428 return; 2429 2430 if ((*slot) <= split) 2431 mas_set_parent(mas, mas->node, left, *slot); 2432 else if (right) 2433 mas_set_parent(mas, mas->node, right, (*slot) - split - 1); 2434 2435 (*slot)++; 2436 } 2437 2438 /* 2439 * mte_mid_split_check() - Check if the next node passes the mid-split 2440 * @**l: Pointer to left encoded maple node. 2441 * @**m: Pointer to middle encoded maple node. 2442 * @**r: Pointer to right encoded maple node. 2443 * @slot: The offset 2444 * @*split: The split location. 2445 * @mid_split: The middle split. 2446 */ 2447 static inline void mte_mid_split_check(struct maple_enode **l, 2448 struct maple_enode **r, 2449 struct maple_enode *right, 2450 unsigned char slot, 2451 unsigned char *split, 2452 unsigned char mid_split) 2453 { 2454 if (*r == right) 2455 return; 2456 2457 if (slot < mid_split) 2458 return; 2459 2460 *l = *r; 2461 *r = right; 2462 *split = mid_split; 2463 } 2464 2465 /* 2466 * mast_set_split_parents() - Helper function to set three nodes parents. Slot 2467 * is taken from @mast->l. 2468 * @mast - the maple subtree state 2469 * @left - the left node 2470 * @right - the right node 2471 * @split - the split location. 2472 */ 2473 static inline void mast_set_split_parents(struct maple_subtree_state *mast, 2474 struct maple_enode *left, 2475 struct maple_enode *middle, 2476 struct maple_enode *right, 2477 unsigned char split, 2478 unsigned char mid_split) 2479 { 2480 unsigned char slot; 2481 struct maple_enode *l = left; 2482 struct maple_enode *r = right; 2483 2484 if (mas_is_none(mast->l)) 2485 return; 2486 2487 if (middle) 2488 r = middle; 2489 2490 slot = mast->l->offset; 2491 2492 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2493 mas_set_split_parent(mast->l, l, r, &slot, split); 2494 2495 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2496 mas_set_split_parent(mast->m, l, r, &slot, split); 2497 2498 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2499 mas_set_split_parent(mast->r, l, r, &slot, split); 2500 } 2501 2502 /* 2503 * mas_topiary_node() - Dispose of a singe node 2504 * @mas: The maple state for pushing nodes 2505 * @enode: The encoded maple node 2506 * @in_rcu: If the tree is in rcu mode 2507 * 2508 * The node will either be RCU freed or pushed back on the maple state. 2509 */ 2510 static inline void mas_topiary_node(struct ma_state *mas, 2511 struct ma_state *tmp_mas, bool in_rcu) 2512 { 2513 struct maple_node *tmp; 2514 struct maple_enode *enode; 2515 2516 if (mas_is_none(tmp_mas)) 2517 return; 2518 2519 enode = tmp_mas->node; 2520 tmp = mte_to_node(enode); 2521 mte_set_node_dead(enode); 2522 if (in_rcu) 2523 ma_free_rcu(tmp); 2524 else 2525 mas_push_node(mas, tmp); 2526 } 2527 2528 /* 2529 * mas_topiary_replace() - Replace the data with new data, then repair the 2530 * parent links within the new tree. Iterate over the dead sub-tree and collect 2531 * the dead subtrees and topiary the nodes that are no longer of use. 2532 * 2533 * The new tree will have up to three children with the correct parent. Keep 2534 * track of the new entries as they need to be followed to find the next level 2535 * of new entries. 2536 * 2537 * The old tree will have up to three children with the old parent. Keep track 2538 * of the old entries as they may have more nodes below replaced. Nodes within 2539 * [index, last] are dead subtrees, others need to be freed and followed. 2540 * 2541 * @mas: The maple state pointing at the new data 2542 * @old_enode: The maple encoded node being replaced 2543 * 2544 */ 2545 static inline void mas_topiary_replace(struct ma_state *mas, 2546 struct maple_enode *old_enode) 2547 { 2548 struct ma_state tmp[3], tmp_next[3]; 2549 MA_TOPIARY(subtrees, mas->tree); 2550 bool in_rcu; 2551 int i, n; 2552 2553 /* Place data in tree & then mark node as old */ 2554 mas_put_in_tree(mas, old_enode); 2555 2556 /* Update the parent pointers in the tree */ 2557 tmp[0] = *mas; 2558 tmp[0].offset = 0; 2559 tmp[1].status = ma_none; 2560 tmp[2].status = ma_none; 2561 while (!mte_is_leaf(tmp[0].node)) { 2562 n = 0; 2563 for (i = 0; i < 3; i++) { 2564 if (mas_is_none(&tmp[i])) 2565 continue; 2566 2567 while (n < 3) { 2568 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2569 break; 2570 n++; 2571 } 2572 2573 mas_adopt_children(&tmp[i], tmp[i].node); 2574 } 2575 2576 if (MAS_WARN_ON(mas, n == 0)) 2577 break; 2578 2579 while (n < 3) 2580 tmp_next[n++].status = ma_none; 2581 2582 for (i = 0; i < 3; i++) 2583 tmp[i] = tmp_next[i]; 2584 } 2585 2586 /* Collect the old nodes that need to be discarded */ 2587 if (mte_is_leaf(old_enode)) 2588 return mas_free(mas, old_enode); 2589 2590 tmp[0] = *mas; 2591 tmp[0].offset = 0; 2592 tmp[0].node = old_enode; 2593 tmp[1].status = ma_none; 2594 tmp[2].status = ma_none; 2595 in_rcu = mt_in_rcu(mas->tree); 2596 do { 2597 n = 0; 2598 for (i = 0; i < 3; i++) { 2599 if (mas_is_none(&tmp[i])) 2600 continue; 2601 2602 while (n < 3) { 2603 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2604 break; 2605 2606 if ((tmp_next[n].min >= tmp_next->index) && 2607 (tmp_next[n].max <= tmp_next->last)) { 2608 mat_add(&subtrees, tmp_next[n].node); 2609 tmp_next[n].status = ma_none; 2610 } else { 2611 n++; 2612 } 2613 } 2614 } 2615 2616 if (MAS_WARN_ON(mas, n == 0)) 2617 break; 2618 2619 while (n < 3) 2620 tmp_next[n++].status = ma_none; 2621 2622 for (i = 0; i < 3; i++) { 2623 mas_topiary_node(mas, &tmp[i], in_rcu); 2624 tmp[i] = tmp_next[i]; 2625 } 2626 } while (!mte_is_leaf(tmp[0].node)); 2627 2628 for (i = 0; i < 3; i++) 2629 mas_topiary_node(mas, &tmp[i], in_rcu); 2630 2631 mas_mat_destroy(mas, &subtrees); 2632 } 2633 2634 /* 2635 * mas_wmb_replace() - Write memory barrier and replace 2636 * @mas: The maple state 2637 * @old: The old maple encoded node that is being replaced. 2638 * 2639 * Updates gap as necessary. 2640 */ 2641 static inline void mas_wmb_replace(struct ma_state *mas, 2642 struct maple_enode *old_enode) 2643 { 2644 /* Insert the new data in the tree */ 2645 mas_topiary_replace(mas, old_enode); 2646 2647 if (mte_is_leaf(mas->node)) 2648 return; 2649 2650 mas_update_gap(mas); 2651 } 2652 2653 /* 2654 * mast_cp_to_nodes() - Copy data out to nodes. 2655 * @mast: The maple subtree state 2656 * @left: The left encoded maple node 2657 * @middle: The middle encoded maple node 2658 * @right: The right encoded maple node 2659 * @split: The location to split between left and (middle ? middle : right) 2660 * @mid_split: The location to split between middle and right. 2661 */ 2662 static inline void mast_cp_to_nodes(struct maple_subtree_state *mast, 2663 struct maple_enode *left, struct maple_enode *middle, 2664 struct maple_enode *right, unsigned char split, unsigned char mid_split) 2665 { 2666 bool new_lmax = true; 2667 2668 mas_node_or_none(mast->l, left); 2669 mas_node_or_none(mast->m, middle); 2670 mas_node_or_none(mast->r, right); 2671 2672 mast->l->min = mast->orig_l->min; 2673 if (split == mast->bn->b_end) { 2674 mast->l->max = mast->orig_r->max; 2675 new_lmax = false; 2676 } 2677 2678 mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax); 2679 2680 if (middle) { 2681 mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true); 2682 mast->m->min = mast->bn->pivot[split] + 1; 2683 split = mid_split; 2684 } 2685 2686 mast->r->max = mast->orig_r->max; 2687 if (right) { 2688 mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false); 2689 mast->r->min = mast->bn->pivot[split] + 1; 2690 } 2691 } 2692 2693 /* 2694 * mast_combine_cp_left - Copy in the original left side of the tree into the 2695 * combined data set in the maple subtree state big node. 2696 * @mast: The maple subtree state 2697 */ 2698 static inline void mast_combine_cp_left(struct maple_subtree_state *mast) 2699 { 2700 unsigned char l_slot = mast->orig_l->offset; 2701 2702 if (!l_slot) 2703 return; 2704 2705 mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0); 2706 } 2707 2708 /* 2709 * mast_combine_cp_right: Copy in the original right side of the tree into the 2710 * combined data set in the maple subtree state big node. 2711 * @mast: The maple subtree state 2712 */ 2713 static inline void mast_combine_cp_right(struct maple_subtree_state *mast) 2714 { 2715 if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max) 2716 return; 2717 2718 mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1, 2719 mt_slot_count(mast->orig_r->node), mast->bn, 2720 mast->bn->b_end); 2721 mast->orig_r->last = mast->orig_r->max; 2722 } 2723 2724 /* 2725 * mast_sufficient: Check if the maple subtree state has enough data in the big 2726 * node to create at least one sufficient node 2727 * @mast: the maple subtree state 2728 */ 2729 static inline bool mast_sufficient(struct maple_subtree_state *mast) 2730 { 2731 if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node)) 2732 return true; 2733 2734 return false; 2735 } 2736 2737 /* 2738 * mast_overflow: Check if there is too much data in the subtree state for a 2739 * single node. 2740 * @mast: The maple subtree state 2741 */ 2742 static inline bool mast_overflow(struct maple_subtree_state *mast) 2743 { 2744 if (mast->bn->b_end >= mt_slot_count(mast->orig_l->node)) 2745 return true; 2746 2747 return false; 2748 } 2749 2750 static inline void *mtree_range_walk(struct ma_state *mas) 2751 { 2752 unsigned long *pivots; 2753 unsigned char offset; 2754 struct maple_node *node; 2755 struct maple_enode *next, *last; 2756 enum maple_type type; 2757 void __rcu **slots; 2758 unsigned char end; 2759 unsigned long max, min; 2760 unsigned long prev_max, prev_min; 2761 2762 next = mas->node; 2763 min = mas->min; 2764 max = mas->max; 2765 do { 2766 last = next; 2767 node = mte_to_node(next); 2768 type = mte_node_type(next); 2769 pivots = ma_pivots(node, type); 2770 end = ma_data_end(node, type, pivots, max); 2771 prev_min = min; 2772 prev_max = max; 2773 if (pivots[0] >= mas->index) { 2774 offset = 0; 2775 max = pivots[0]; 2776 goto next; 2777 } 2778 2779 offset = 1; 2780 while (offset < end) { 2781 if (pivots[offset] >= mas->index) { 2782 max = pivots[offset]; 2783 break; 2784 } 2785 offset++; 2786 } 2787 2788 min = pivots[offset - 1] + 1; 2789 next: 2790 slots = ma_slots(node, type); 2791 next = mt_slot(mas->tree, slots, offset); 2792 if (unlikely(ma_dead_node(node))) 2793 goto dead_node; 2794 } while (!ma_is_leaf(type)); 2795 2796 mas->end = end; 2797 mas->offset = offset; 2798 mas->index = min; 2799 mas->last = max; 2800 mas->min = prev_min; 2801 mas->max = prev_max; 2802 mas->node = last; 2803 return (void *)next; 2804 2805 dead_node: 2806 mas_reset(mas); 2807 return NULL; 2808 } 2809 2810 /* 2811 * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers. 2812 * @mas: The starting maple state 2813 * @mast: The maple_subtree_state, keeps track of 4 maple states. 2814 * @count: The estimated count of iterations needed. 2815 * 2816 * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root 2817 * is hit. First @b_node is split into two entries which are inserted into the 2818 * next iteration of the loop. @b_node is returned populated with the final 2819 * iteration. @mas is used to obtain allocations. orig_l_mas keeps track of the 2820 * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last 2821 * to account of what has been copied into the new sub-tree. The update of 2822 * orig_l_mas->last is used in mas_consume to find the slots that will need to 2823 * be either freed or destroyed. orig_l_mas->depth keeps track of the height of 2824 * the new sub-tree in case the sub-tree becomes the full tree. 2825 * 2826 * Return: the number of elements in b_node during the last loop. 2827 */ 2828 static int mas_spanning_rebalance(struct ma_state *mas, 2829 struct maple_subtree_state *mast, unsigned char count) 2830 { 2831 unsigned char split, mid_split; 2832 unsigned char slot = 0; 2833 struct maple_enode *left = NULL, *middle = NULL, *right = NULL; 2834 struct maple_enode *old_enode; 2835 2836 MA_STATE(l_mas, mas->tree, mas->index, mas->index); 2837 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2838 MA_STATE(m_mas, mas->tree, mas->index, mas->index); 2839 2840 /* 2841 * The tree needs to be rebalanced and leaves need to be kept at the same level. 2842 * Rebalancing is done by use of the ``struct maple_topiary``. 2843 */ 2844 mast->l = &l_mas; 2845 mast->m = &m_mas; 2846 mast->r = &r_mas; 2847 l_mas.status = r_mas.status = m_mas.status = ma_none; 2848 2849 /* Check if this is not root and has sufficient data. */ 2850 if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) && 2851 unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type])) 2852 mast_spanning_rebalance(mast); 2853 2854 l_mas.depth = 0; 2855 2856 /* 2857 * Each level of the tree is examined and balanced, pushing data to the left or 2858 * right, or rebalancing against left or right nodes is employed to avoid 2859 * rippling up the tree to limit the amount of churn. Once a new sub-section of 2860 * the tree is created, there may be a mix of new and old nodes. The old nodes 2861 * will have the incorrect parent pointers and currently be in two trees: the 2862 * original tree and the partially new tree. To remedy the parent pointers in 2863 * the old tree, the new data is swapped into the active tree and a walk down 2864 * the tree is performed and the parent pointers are updated. 2865 * See mas_topiary_replace() for more information. 2866 */ 2867 while (count--) { 2868 mast->bn->b_end--; 2869 mast->bn->type = mte_node_type(mast->orig_l->node); 2870 split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle, 2871 &mid_split, mast->orig_l->min); 2872 mast_set_split_parents(mast, left, middle, right, split, 2873 mid_split); 2874 mast_cp_to_nodes(mast, left, middle, right, split, mid_split); 2875 2876 /* 2877 * Copy data from next level in the tree to mast->bn from next 2878 * iteration 2879 */ 2880 memset(mast->bn, 0, sizeof(struct maple_big_node)); 2881 mast->bn->type = mte_node_type(left); 2882 l_mas.depth++; 2883 2884 /* Root already stored in l->node. */ 2885 if (mas_is_root_limits(mast->l)) 2886 goto new_root; 2887 2888 mast_ascend(mast); 2889 mast_combine_cp_left(mast); 2890 l_mas.offset = mast->bn->b_end; 2891 mab_set_b_end(mast->bn, &l_mas, left); 2892 mab_set_b_end(mast->bn, &m_mas, middle); 2893 mab_set_b_end(mast->bn, &r_mas, right); 2894 2895 /* Copy anything necessary out of the right node. */ 2896 mast_combine_cp_right(mast); 2897 mast->orig_l->last = mast->orig_l->max; 2898 2899 if (mast_sufficient(mast)) 2900 continue; 2901 2902 if (mast_overflow(mast)) 2903 continue; 2904 2905 /* May be a new root stored in mast->bn */ 2906 if (mas_is_root_limits(mast->orig_l)) 2907 break; 2908 2909 mast_spanning_rebalance(mast); 2910 2911 /* rebalancing from other nodes may require another loop. */ 2912 if (!count) 2913 count++; 2914 } 2915 2916 l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), 2917 mte_node_type(mast->orig_l->node)); 2918 l_mas.depth++; 2919 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true); 2920 mas_set_parent(mas, left, l_mas.node, slot); 2921 if (middle) 2922 mas_set_parent(mas, middle, l_mas.node, ++slot); 2923 2924 if (right) 2925 mas_set_parent(mas, right, l_mas.node, ++slot); 2926 2927 if (mas_is_root_limits(mast->l)) { 2928 new_root: 2929 mas_mn(mast->l)->parent = ma_parent_ptr(mas_tree_parent(mas)); 2930 while (!mte_is_root(mast->orig_l->node)) 2931 mast_ascend(mast); 2932 } else { 2933 mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent; 2934 } 2935 2936 old_enode = mast->orig_l->node; 2937 mas->depth = l_mas.depth; 2938 mas->node = l_mas.node; 2939 mas->min = l_mas.min; 2940 mas->max = l_mas.max; 2941 mas->offset = l_mas.offset; 2942 mas_wmb_replace(mas, old_enode); 2943 mtree_range_walk(mas); 2944 return mast->bn->b_end; 2945 } 2946 2947 /* 2948 * mas_rebalance() - Rebalance a given node. 2949 * @mas: The maple state 2950 * @b_node: The big maple node. 2951 * 2952 * Rebalance two nodes into a single node or two new nodes that are sufficient. 2953 * Continue upwards until tree is sufficient. 2954 * 2955 * Return: the number of elements in b_node during the last loop. 2956 */ 2957 static inline int mas_rebalance(struct ma_state *mas, 2958 struct maple_big_node *b_node) 2959 { 2960 char empty_count = mas_mt_height(mas); 2961 struct maple_subtree_state mast; 2962 unsigned char shift, b_end = ++b_node->b_end; 2963 2964 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 2965 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2966 2967 trace_ma_op(__func__, mas); 2968 2969 /* 2970 * Rebalancing occurs if a node is insufficient. Data is rebalanced 2971 * against the node to the right if it exists, otherwise the node to the 2972 * left of this node is rebalanced against this node. If rebalancing 2973 * causes just one node to be produced instead of two, then the parent 2974 * is also examined and rebalanced if it is insufficient. Every level 2975 * tries to combine the data in the same way. If one node contains the 2976 * entire range of the tree, then that node is used as a new root node. 2977 */ 2978 mas_node_count(mas, empty_count * 2 - 1); 2979 if (mas_is_err(mas)) 2980 return 0; 2981 2982 mast.orig_l = &l_mas; 2983 mast.orig_r = &r_mas; 2984 mast.bn = b_node; 2985 mast.bn->type = mte_node_type(mas->node); 2986 2987 l_mas = r_mas = *mas; 2988 2989 if (mas_next_sibling(&r_mas)) { 2990 mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end); 2991 r_mas.last = r_mas.index = r_mas.max; 2992 } else { 2993 mas_prev_sibling(&l_mas); 2994 shift = mas_data_end(&l_mas) + 1; 2995 mab_shift_right(b_node, shift); 2996 mas->offset += shift; 2997 mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0); 2998 b_node->b_end = shift + b_end; 2999 l_mas.index = l_mas.last = l_mas.min; 3000 } 3001 3002 return mas_spanning_rebalance(mas, &mast, empty_count); 3003 } 3004 3005 /* 3006 * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple 3007 * state. 3008 * @mas: The maple state 3009 * @end: The end of the left-most node. 3010 * 3011 * During a mass-insert event (such as forking), it may be necessary to 3012 * rebalance the left-most node when it is not sufficient. 3013 */ 3014 static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end) 3015 { 3016 enum maple_type mt = mte_node_type(mas->node); 3017 struct maple_node reuse, *newnode, *parent, *new_left, *left, *node; 3018 struct maple_enode *eparent, *old_eparent; 3019 unsigned char offset, tmp, split = mt_slots[mt] / 2; 3020 void __rcu **l_slots, **slots; 3021 unsigned long *l_pivs, *pivs, gap; 3022 bool in_rcu = mt_in_rcu(mas->tree); 3023 3024 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 3025 3026 l_mas = *mas; 3027 mas_prev_sibling(&l_mas); 3028 3029 /* set up node. */ 3030 if (in_rcu) { 3031 /* Allocate for both left and right as well as parent. */ 3032 mas_node_count(mas, 3); 3033 if (mas_is_err(mas)) 3034 return; 3035 3036 newnode = mas_pop_node(mas); 3037 } else { 3038 newnode = &reuse; 3039 } 3040 3041 node = mas_mn(mas); 3042 newnode->parent = node->parent; 3043 slots = ma_slots(newnode, mt); 3044 pivs = ma_pivots(newnode, mt); 3045 left = mas_mn(&l_mas); 3046 l_slots = ma_slots(left, mt); 3047 l_pivs = ma_pivots(left, mt); 3048 if (!l_slots[split]) 3049 split++; 3050 tmp = mas_data_end(&l_mas) - split; 3051 3052 memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp); 3053 memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp); 3054 pivs[tmp] = l_mas.max; 3055 memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end); 3056 memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end); 3057 3058 l_mas.max = l_pivs[split]; 3059 mas->min = l_mas.max + 1; 3060 old_eparent = mt_mk_node(mte_parent(l_mas.node), 3061 mas_parent_type(&l_mas, l_mas.node)); 3062 tmp += end; 3063 if (!in_rcu) { 3064 unsigned char max_p = mt_pivots[mt]; 3065 unsigned char max_s = mt_slots[mt]; 3066 3067 if (tmp < max_p) 3068 memset(pivs + tmp, 0, 3069 sizeof(unsigned long) * (max_p - tmp)); 3070 3071 if (tmp < mt_slots[mt]) 3072 memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp)); 3073 3074 memcpy(node, newnode, sizeof(struct maple_node)); 3075 ma_set_meta(node, mt, 0, tmp - 1); 3076 mte_set_pivot(old_eparent, mte_parent_slot(l_mas.node), 3077 l_pivs[split]); 3078 3079 /* Remove data from l_pivs. */ 3080 tmp = split + 1; 3081 memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp)); 3082 memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp)); 3083 ma_set_meta(left, mt, 0, split); 3084 eparent = old_eparent; 3085 3086 goto done; 3087 } 3088 3089 /* RCU requires replacing both l_mas, mas, and parent. */ 3090 mas->node = mt_mk_node(newnode, mt); 3091 ma_set_meta(newnode, mt, 0, tmp); 3092 3093 new_left = mas_pop_node(mas); 3094 new_left->parent = left->parent; 3095 mt = mte_node_type(l_mas.node); 3096 slots = ma_slots(new_left, mt); 3097 pivs = ma_pivots(new_left, mt); 3098 memcpy(slots, l_slots, sizeof(void *) * split); 3099 memcpy(pivs, l_pivs, sizeof(unsigned long) * split); 3100 ma_set_meta(new_left, mt, 0, split); 3101 l_mas.node = mt_mk_node(new_left, mt); 3102 3103 /* replace parent. */ 3104 offset = mte_parent_slot(mas->node); 3105 mt = mas_parent_type(&l_mas, l_mas.node); 3106 parent = mas_pop_node(mas); 3107 slots = ma_slots(parent, mt); 3108 pivs = ma_pivots(parent, mt); 3109 memcpy(parent, mte_to_node(old_eparent), sizeof(struct maple_node)); 3110 rcu_assign_pointer(slots[offset], mas->node); 3111 rcu_assign_pointer(slots[offset - 1], l_mas.node); 3112 pivs[offset - 1] = l_mas.max; 3113 eparent = mt_mk_node(parent, mt); 3114 done: 3115 gap = mas_leaf_max_gap(mas); 3116 mte_set_gap(eparent, mte_parent_slot(mas->node), gap); 3117 gap = mas_leaf_max_gap(&l_mas); 3118 mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap); 3119 mas_ascend(mas); 3120 3121 if (in_rcu) { 3122 mas_replace_node(mas, old_eparent); 3123 mas_adopt_children(mas, mas->node); 3124 } 3125 3126 mas_update_gap(mas); 3127 } 3128 3129 /* 3130 * mas_split_final_node() - Split the final node in a subtree operation. 3131 * @mast: the maple subtree state 3132 * @mas: The maple state 3133 * @height: The height of the tree in case it's a new root. 3134 */ 3135 static inline void mas_split_final_node(struct maple_subtree_state *mast, 3136 struct ma_state *mas, int height) 3137 { 3138 struct maple_enode *ancestor; 3139 3140 if (mte_is_root(mas->node)) { 3141 if (mt_is_alloc(mas->tree)) 3142 mast->bn->type = maple_arange_64; 3143 else 3144 mast->bn->type = maple_range_64; 3145 mas->depth = height; 3146 } 3147 /* 3148 * Only a single node is used here, could be root. 3149 * The Big_node data should just fit in a single node. 3150 */ 3151 ancestor = mas_new_ma_node(mas, mast->bn); 3152 mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset); 3153 mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset); 3154 mte_to_node(ancestor)->parent = mas_mn(mas)->parent; 3155 3156 mast->l->node = ancestor; 3157 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true); 3158 mas->offset = mast->bn->b_end - 1; 3159 } 3160 3161 /* 3162 * mast_fill_bnode() - Copy data into the big node in the subtree state 3163 * @mast: The maple subtree state 3164 * @mas: the maple state 3165 * @skip: The number of entries to skip for new nodes insertion. 3166 */ 3167 static inline void mast_fill_bnode(struct maple_subtree_state *mast, 3168 struct ma_state *mas, 3169 unsigned char skip) 3170 { 3171 bool cp = true; 3172 unsigned char split; 3173 3174 memset(mast->bn->gap, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->gap)); 3175 memset(mast->bn->slot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->slot)); 3176 memset(mast->bn->pivot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->pivot)); 3177 mast->bn->b_end = 0; 3178 3179 if (mte_is_root(mas->node)) { 3180 cp = false; 3181 } else { 3182 mas_ascend(mas); 3183 mas->offset = mte_parent_slot(mas->node); 3184 } 3185 3186 if (cp && mast->l->offset) 3187 mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0); 3188 3189 split = mast->bn->b_end; 3190 mab_set_b_end(mast->bn, mast->l, mast->l->node); 3191 mast->r->offset = mast->bn->b_end; 3192 mab_set_b_end(mast->bn, mast->r, mast->r->node); 3193 if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max) 3194 cp = false; 3195 3196 if (cp) 3197 mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1, 3198 mast->bn, mast->bn->b_end); 3199 3200 mast->bn->b_end--; 3201 mast->bn->type = mte_node_type(mas->node); 3202 } 3203 3204 /* 3205 * mast_split_data() - Split the data in the subtree state big node into regular 3206 * nodes. 3207 * @mast: The maple subtree state 3208 * @mas: The maple state 3209 * @split: The location to split the big node 3210 */ 3211 static inline void mast_split_data(struct maple_subtree_state *mast, 3212 struct ma_state *mas, unsigned char split) 3213 { 3214 unsigned char p_slot; 3215 3216 mab_mas_cp(mast->bn, 0, split, mast->l, true); 3217 mte_set_pivot(mast->r->node, 0, mast->r->max); 3218 mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false); 3219 mast->l->offset = mte_parent_slot(mas->node); 3220 mast->l->max = mast->bn->pivot[split]; 3221 mast->r->min = mast->l->max + 1; 3222 if (mte_is_leaf(mas->node)) 3223 return; 3224 3225 p_slot = mast->orig_l->offset; 3226 mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node, 3227 &p_slot, split); 3228 mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node, 3229 &p_slot, split); 3230 } 3231 3232 /* 3233 * mas_push_data() - Instead of splitting a node, it is beneficial to push the 3234 * data to the right or left node if there is room. 3235 * @mas: The maple state 3236 * @height: The current height of the maple state 3237 * @mast: The maple subtree state 3238 * @left: Push left or not. 3239 * 3240 * Keeping the height of the tree low means faster lookups. 3241 * 3242 * Return: True if pushed, false otherwise. 3243 */ 3244 static inline bool mas_push_data(struct ma_state *mas, int height, 3245 struct maple_subtree_state *mast, bool left) 3246 { 3247 unsigned char slot_total = mast->bn->b_end; 3248 unsigned char end, space, split; 3249 3250 MA_STATE(tmp_mas, mas->tree, mas->index, mas->last); 3251 tmp_mas = *mas; 3252 tmp_mas.depth = mast->l->depth; 3253 3254 if (left && !mas_prev_sibling(&tmp_mas)) 3255 return false; 3256 else if (!left && !mas_next_sibling(&tmp_mas)) 3257 return false; 3258 3259 end = mas_data_end(&tmp_mas); 3260 slot_total += end; 3261 space = 2 * mt_slot_count(mas->node) - 2; 3262 /* -2 instead of -1 to ensure there isn't a triple split */ 3263 if (ma_is_leaf(mast->bn->type)) 3264 space--; 3265 3266 if (mas->max == ULONG_MAX) 3267 space--; 3268 3269 if (slot_total >= space) 3270 return false; 3271 3272 /* Get the data; Fill mast->bn */ 3273 mast->bn->b_end++; 3274 if (left) { 3275 mab_shift_right(mast->bn, end + 1); 3276 mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0); 3277 mast->bn->b_end = slot_total + 1; 3278 } else { 3279 mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end); 3280 } 3281 3282 /* Configure mast for splitting of mast->bn */ 3283 split = mt_slots[mast->bn->type] - 2; 3284 if (left) { 3285 /* Switch mas to prev node */ 3286 *mas = tmp_mas; 3287 /* Start using mast->l for the left side. */ 3288 tmp_mas.node = mast->l->node; 3289 *mast->l = tmp_mas; 3290 } else { 3291 tmp_mas.node = mast->r->node; 3292 *mast->r = tmp_mas; 3293 split = slot_total - split; 3294 } 3295 split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]); 3296 /* Update parent slot for split calculation. */ 3297 if (left) 3298 mast->orig_l->offset += end + 1; 3299 3300 mast_split_data(mast, mas, split); 3301 mast_fill_bnode(mast, mas, 2); 3302 mas_split_final_node(mast, mas, height + 1); 3303 return true; 3304 } 3305 3306 /* 3307 * mas_split() - Split data that is too big for one node into two. 3308 * @mas: The maple state 3309 * @b_node: The maple big node 3310 * Return: 1 on success, 0 on failure. 3311 */ 3312 static int mas_split(struct ma_state *mas, struct maple_big_node *b_node) 3313 { 3314 struct maple_subtree_state mast; 3315 int height = 0; 3316 unsigned char mid_split, split = 0; 3317 struct maple_enode *old; 3318 3319 /* 3320 * Splitting is handled differently from any other B-tree; the Maple 3321 * Tree splits upwards. Splitting up means that the split operation 3322 * occurs when the walk of the tree hits the leaves and not on the way 3323 * down. The reason for splitting up is that it is impossible to know 3324 * how much space will be needed until the leaf is (or leaves are) 3325 * reached. Since overwriting data is allowed and a range could 3326 * overwrite more than one range or result in changing one entry into 3 3327 * entries, it is impossible to know if a split is required until the 3328 * data is examined. 3329 * 3330 * Splitting is a balancing act between keeping allocations to a minimum 3331 * and avoiding a 'jitter' event where a tree is expanded to make room 3332 * for an entry followed by a contraction when the entry is removed. To 3333 * accomplish the balance, there are empty slots remaining in both left 3334 * and right nodes after a split. 3335 */ 3336 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 3337 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 3338 MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last); 3339 MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last); 3340 3341 trace_ma_op(__func__, mas); 3342 mas->depth = mas_mt_height(mas); 3343 /* Allocation failures will happen early. */ 3344 mas_node_count(mas, 1 + mas->depth * 2); 3345 if (mas_is_err(mas)) 3346 return 0; 3347 3348 mast.l = &l_mas; 3349 mast.r = &r_mas; 3350 mast.orig_l = &prev_l_mas; 3351 mast.orig_r = &prev_r_mas; 3352 mast.bn = b_node; 3353 3354 while (height++ <= mas->depth) { 3355 if (mt_slots[b_node->type] > b_node->b_end) { 3356 mas_split_final_node(&mast, mas, height); 3357 break; 3358 } 3359 3360 l_mas = r_mas = *mas; 3361 l_mas.node = mas_new_ma_node(mas, b_node); 3362 r_mas.node = mas_new_ma_node(mas, b_node); 3363 /* 3364 * Another way that 'jitter' is avoided is to terminate a split up early if the 3365 * left or right node has space to spare. This is referred to as "pushing left" 3366 * or "pushing right" and is similar to the B* tree, except the nodes left or 3367 * right can rarely be reused due to RCU, but the ripple upwards is halted which 3368 * is a significant savings. 3369 */ 3370 /* Try to push left. */ 3371 if (mas_push_data(mas, height, &mast, true)) 3372 break; 3373 /* Try to push right. */ 3374 if (mas_push_data(mas, height, &mast, false)) 3375 break; 3376 3377 split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min); 3378 mast_split_data(&mast, mas, split); 3379 /* 3380 * Usually correct, mab_mas_cp in the above call overwrites 3381 * r->max. 3382 */ 3383 mast.r->max = mas->max; 3384 mast_fill_bnode(&mast, mas, 1); 3385 prev_l_mas = *mast.l; 3386 prev_r_mas = *mast.r; 3387 } 3388 3389 /* Set the original node as dead */ 3390 old = mas->node; 3391 mas->node = l_mas.node; 3392 mas_wmb_replace(mas, old); 3393 mtree_range_walk(mas); 3394 return 1; 3395 } 3396 3397 /* 3398 * mas_reuse_node() - Reuse the node to store the data. 3399 * @wr_mas: The maple write state 3400 * @bn: The maple big node 3401 * @end: The end of the data. 3402 * 3403 * Will always return false in RCU mode. 3404 * 3405 * Return: True if node was reused, false otherwise. 3406 */ 3407 static inline bool mas_reuse_node(struct ma_wr_state *wr_mas, 3408 struct maple_big_node *bn, unsigned char end) 3409 { 3410 /* Need to be rcu safe. */ 3411 if (mt_in_rcu(wr_mas->mas->tree)) 3412 return false; 3413 3414 if (end > bn->b_end) { 3415 int clear = mt_slots[wr_mas->type] - bn->b_end; 3416 3417 memset(wr_mas->slots + bn->b_end, 0, sizeof(void *) * clear--); 3418 memset(wr_mas->pivots + bn->b_end, 0, sizeof(void *) * clear); 3419 } 3420 mab_mas_cp(bn, 0, bn->b_end, wr_mas->mas, false); 3421 return true; 3422 } 3423 3424 /* 3425 * mas_commit_b_node() - Commit the big node into the tree. 3426 * @wr_mas: The maple write state 3427 * @b_node: The maple big node 3428 * @end: The end of the data. 3429 */ 3430 static noinline_for_kasan int mas_commit_b_node(struct ma_wr_state *wr_mas, 3431 struct maple_big_node *b_node, unsigned char end) 3432 { 3433 struct maple_node *node; 3434 struct maple_enode *old_enode; 3435 unsigned char b_end = b_node->b_end; 3436 enum maple_type b_type = b_node->type; 3437 3438 old_enode = wr_mas->mas->node; 3439 if ((b_end < mt_min_slots[b_type]) && 3440 (!mte_is_root(old_enode)) && 3441 (mas_mt_height(wr_mas->mas) > 1)) 3442 return mas_rebalance(wr_mas->mas, b_node); 3443 3444 if (b_end >= mt_slots[b_type]) 3445 return mas_split(wr_mas->mas, b_node); 3446 3447 if (mas_reuse_node(wr_mas, b_node, end)) 3448 goto reuse_node; 3449 3450 mas_node_count(wr_mas->mas, 1); 3451 if (mas_is_err(wr_mas->mas)) 3452 return 0; 3453 3454 node = mas_pop_node(wr_mas->mas); 3455 node->parent = mas_mn(wr_mas->mas)->parent; 3456 wr_mas->mas->node = mt_mk_node(node, b_type); 3457 mab_mas_cp(b_node, 0, b_end, wr_mas->mas, false); 3458 mas_replace_node(wr_mas->mas, old_enode); 3459 reuse_node: 3460 mas_update_gap(wr_mas->mas); 3461 wr_mas->mas->end = b_end; 3462 return 1; 3463 } 3464 3465 /* 3466 * mas_root_expand() - Expand a root to a node 3467 * @mas: The maple state 3468 * @entry: The entry to store into the tree 3469 */ 3470 static inline int mas_root_expand(struct ma_state *mas, void *entry) 3471 { 3472 void *contents = mas_root_locked(mas); 3473 enum maple_type type = maple_leaf_64; 3474 struct maple_node *node; 3475 void __rcu **slots; 3476 unsigned long *pivots; 3477 int slot = 0; 3478 3479 mas_node_count(mas, 1); 3480 if (unlikely(mas_is_err(mas))) 3481 return 0; 3482 3483 node = mas_pop_node(mas); 3484 pivots = ma_pivots(node, type); 3485 slots = ma_slots(node, type); 3486 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3487 mas->node = mt_mk_node(node, type); 3488 mas->status = ma_active; 3489 3490 if (mas->index) { 3491 if (contents) { 3492 rcu_assign_pointer(slots[slot], contents); 3493 if (likely(mas->index > 1)) 3494 slot++; 3495 } 3496 pivots[slot++] = mas->index - 1; 3497 } 3498 3499 rcu_assign_pointer(slots[slot], entry); 3500 mas->offset = slot; 3501 pivots[slot] = mas->last; 3502 if (mas->last != ULONG_MAX) 3503 pivots[++slot] = ULONG_MAX; 3504 3505 mas->depth = 1; 3506 mas_set_height(mas); 3507 ma_set_meta(node, maple_leaf_64, 0, slot); 3508 /* swap the new root into the tree */ 3509 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3510 return slot; 3511 } 3512 3513 static inline void mas_store_root(struct ma_state *mas, void *entry) 3514 { 3515 if (likely((mas->last != 0) || (mas->index != 0))) 3516 mas_root_expand(mas, entry); 3517 else if (((unsigned long) (entry) & 3) == 2) 3518 mas_root_expand(mas, entry); 3519 else { 3520 rcu_assign_pointer(mas->tree->ma_root, entry); 3521 mas->status = ma_start; 3522 } 3523 } 3524 3525 /* 3526 * mas_is_span_wr() - Check if the write needs to be treated as a write that 3527 * spans the node. 3528 * @mas: The maple state 3529 * @piv: The pivot value being written 3530 * @type: The maple node type 3531 * @entry: The data to write 3532 * 3533 * Spanning writes are writes that start in one node and end in another OR if 3534 * the write of a %NULL will cause the node to end with a %NULL. 3535 * 3536 * Return: True if this is a spanning write, false otherwise. 3537 */ 3538 static bool mas_is_span_wr(struct ma_wr_state *wr_mas) 3539 { 3540 unsigned long max = wr_mas->r_max; 3541 unsigned long last = wr_mas->mas->last; 3542 enum maple_type type = wr_mas->type; 3543 void *entry = wr_mas->entry; 3544 3545 /* Contained in this pivot, fast path */ 3546 if (last < max) 3547 return false; 3548 3549 if (ma_is_leaf(type)) { 3550 max = wr_mas->mas->max; 3551 if (last < max) 3552 return false; 3553 } 3554 3555 if (last == max) { 3556 /* 3557 * The last entry of leaf node cannot be NULL unless it is the 3558 * rightmost node (writing ULONG_MAX), otherwise it spans slots. 3559 */ 3560 if (entry || last == ULONG_MAX) 3561 return false; 3562 } 3563 3564 trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry); 3565 return true; 3566 } 3567 3568 static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas) 3569 { 3570 wr_mas->type = mte_node_type(wr_mas->mas->node); 3571 mas_wr_node_walk(wr_mas); 3572 wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type); 3573 } 3574 3575 static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas) 3576 { 3577 wr_mas->mas->max = wr_mas->r_max; 3578 wr_mas->mas->min = wr_mas->r_min; 3579 wr_mas->mas->node = wr_mas->content; 3580 wr_mas->mas->offset = 0; 3581 wr_mas->mas->depth++; 3582 } 3583 /* 3584 * mas_wr_walk() - Walk the tree for a write. 3585 * @wr_mas: The maple write state 3586 * 3587 * Uses mas_slot_locked() and does not need to worry about dead nodes. 3588 * 3589 * Return: True if it's contained in a node, false on spanning write. 3590 */ 3591 static bool mas_wr_walk(struct ma_wr_state *wr_mas) 3592 { 3593 struct ma_state *mas = wr_mas->mas; 3594 3595 while (true) { 3596 mas_wr_walk_descend(wr_mas); 3597 if (unlikely(mas_is_span_wr(wr_mas))) 3598 return false; 3599 3600 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3601 mas->offset); 3602 if (ma_is_leaf(wr_mas->type)) 3603 return true; 3604 3605 mas_wr_walk_traverse(wr_mas); 3606 } 3607 3608 return true; 3609 } 3610 3611 static bool mas_wr_walk_index(struct ma_wr_state *wr_mas) 3612 { 3613 struct ma_state *mas = wr_mas->mas; 3614 3615 while (true) { 3616 mas_wr_walk_descend(wr_mas); 3617 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3618 mas->offset); 3619 if (ma_is_leaf(wr_mas->type)) 3620 return true; 3621 mas_wr_walk_traverse(wr_mas); 3622 3623 } 3624 return true; 3625 } 3626 /* 3627 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs. 3628 * @l_wr_mas: The left maple write state 3629 * @r_wr_mas: The right maple write state 3630 */ 3631 static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas, 3632 struct ma_wr_state *r_wr_mas) 3633 { 3634 struct ma_state *r_mas = r_wr_mas->mas; 3635 struct ma_state *l_mas = l_wr_mas->mas; 3636 unsigned char l_slot; 3637 3638 l_slot = l_mas->offset; 3639 if (!l_wr_mas->content) 3640 l_mas->index = l_wr_mas->r_min; 3641 3642 if ((l_mas->index == l_wr_mas->r_min) && 3643 (l_slot && 3644 !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) { 3645 if (l_slot > 1) 3646 l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1; 3647 else 3648 l_mas->index = l_mas->min; 3649 3650 l_mas->offset = l_slot - 1; 3651 } 3652 3653 if (!r_wr_mas->content) { 3654 if (r_mas->last < r_wr_mas->r_max) 3655 r_mas->last = r_wr_mas->r_max; 3656 r_mas->offset++; 3657 } else if ((r_mas->last == r_wr_mas->r_max) && 3658 (r_mas->last < r_mas->max) && 3659 !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) { 3660 r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots, 3661 r_wr_mas->type, r_mas->offset + 1); 3662 r_mas->offset++; 3663 } 3664 } 3665 3666 static inline void *mas_state_walk(struct ma_state *mas) 3667 { 3668 void *entry; 3669 3670 entry = mas_start(mas); 3671 if (mas_is_none(mas)) 3672 return NULL; 3673 3674 if (mas_is_ptr(mas)) 3675 return entry; 3676 3677 return mtree_range_walk(mas); 3678 } 3679 3680 /* 3681 * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up 3682 * to date. 3683 * 3684 * @mas: The maple state. 3685 * 3686 * Note: Leaves mas in undesirable state. 3687 * Return: The entry for @mas->index or %NULL on dead node. 3688 */ 3689 static inline void *mtree_lookup_walk(struct ma_state *mas) 3690 { 3691 unsigned long *pivots; 3692 unsigned char offset; 3693 struct maple_node *node; 3694 struct maple_enode *next; 3695 enum maple_type type; 3696 void __rcu **slots; 3697 unsigned char end; 3698 3699 next = mas->node; 3700 do { 3701 node = mte_to_node(next); 3702 type = mte_node_type(next); 3703 pivots = ma_pivots(node, type); 3704 end = mt_pivots[type]; 3705 offset = 0; 3706 do { 3707 if (pivots[offset] >= mas->index) 3708 break; 3709 } while (++offset < end); 3710 3711 slots = ma_slots(node, type); 3712 next = mt_slot(mas->tree, slots, offset); 3713 if (unlikely(ma_dead_node(node))) 3714 goto dead_node; 3715 } while (!ma_is_leaf(type)); 3716 3717 return (void *)next; 3718 3719 dead_node: 3720 mas_reset(mas); 3721 return NULL; 3722 } 3723 3724 static void mte_destroy_walk(struct maple_enode *, struct maple_tree *); 3725 /* 3726 * mas_new_root() - Create a new root node that only contains the entry passed 3727 * in. 3728 * @mas: The maple state 3729 * @entry: The entry to store. 3730 * 3731 * Only valid when the index == 0 and the last == ULONG_MAX 3732 * 3733 * Return 0 on error, 1 on success. 3734 */ 3735 static inline int mas_new_root(struct ma_state *mas, void *entry) 3736 { 3737 struct maple_enode *root = mas_root_locked(mas); 3738 enum maple_type type = maple_leaf_64; 3739 struct maple_node *node; 3740 void __rcu **slots; 3741 unsigned long *pivots; 3742 3743 if (!entry && !mas->index && mas->last == ULONG_MAX) { 3744 mas->depth = 0; 3745 mas_set_height(mas); 3746 rcu_assign_pointer(mas->tree->ma_root, entry); 3747 mas->status = ma_start; 3748 goto done; 3749 } 3750 3751 mas_node_count(mas, 1); 3752 if (mas_is_err(mas)) 3753 return 0; 3754 3755 node = mas_pop_node(mas); 3756 pivots = ma_pivots(node, type); 3757 slots = ma_slots(node, type); 3758 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3759 mas->node = mt_mk_node(node, type); 3760 mas->status = ma_active; 3761 rcu_assign_pointer(slots[0], entry); 3762 pivots[0] = mas->last; 3763 mas->depth = 1; 3764 mas_set_height(mas); 3765 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3766 3767 done: 3768 if (xa_is_node(root)) 3769 mte_destroy_walk(root, mas->tree); 3770 3771 return 1; 3772 } 3773 /* 3774 * mas_wr_spanning_store() - Create a subtree with the store operation completed 3775 * and new nodes where necessary, then place the sub-tree in the actual tree. 3776 * Note that mas is expected to point to the node which caused the store to 3777 * span. 3778 * @wr_mas: The maple write state 3779 * 3780 * Return: 0 on error, positive on success. 3781 */ 3782 static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas) 3783 { 3784 struct maple_subtree_state mast; 3785 struct maple_big_node b_node; 3786 struct ma_state *mas; 3787 unsigned char height; 3788 3789 /* Left and Right side of spanning store */ 3790 MA_STATE(l_mas, NULL, 0, 0); 3791 MA_STATE(r_mas, NULL, 0, 0); 3792 MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry); 3793 MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry); 3794 3795 /* 3796 * A store operation that spans multiple nodes is called a spanning 3797 * store and is handled early in the store call stack by the function 3798 * mas_is_span_wr(). When a spanning store is identified, the maple 3799 * state is duplicated. The first maple state walks the left tree path 3800 * to ``index``, the duplicate walks the right tree path to ``last``. 3801 * The data in the two nodes are combined into a single node, two nodes, 3802 * or possibly three nodes (see the 3-way split above). A ``NULL`` 3803 * written to the last entry of a node is considered a spanning store as 3804 * a rebalance is required for the operation to complete and an overflow 3805 * of data may happen. 3806 */ 3807 mas = wr_mas->mas; 3808 trace_ma_op(__func__, mas); 3809 3810 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 3811 return mas_new_root(mas, wr_mas->entry); 3812 /* 3813 * Node rebalancing may occur due to this store, so there may be three new 3814 * entries per level plus a new root. 3815 */ 3816 height = mas_mt_height(mas); 3817 mas_node_count(mas, 1 + height * 3); 3818 if (mas_is_err(mas)) 3819 return 0; 3820 3821 /* 3822 * Set up right side. Need to get to the next offset after the spanning 3823 * store to ensure it's not NULL and to combine both the next node and 3824 * the node with the start together. 3825 */ 3826 r_mas = *mas; 3827 /* Avoid overflow, walk to next slot in the tree. */ 3828 if (r_mas.last + 1) 3829 r_mas.last++; 3830 3831 r_mas.index = r_mas.last; 3832 mas_wr_walk_index(&r_wr_mas); 3833 r_mas.last = r_mas.index = mas->last; 3834 3835 /* Set up left side. */ 3836 l_mas = *mas; 3837 mas_wr_walk_index(&l_wr_mas); 3838 3839 if (!wr_mas->entry) { 3840 mas_extend_spanning_null(&l_wr_mas, &r_wr_mas); 3841 mas->offset = l_mas.offset; 3842 mas->index = l_mas.index; 3843 mas->last = l_mas.last = r_mas.last; 3844 } 3845 3846 /* expanding NULLs may make this cover the entire range */ 3847 if (!l_mas.index && r_mas.last == ULONG_MAX) { 3848 mas_set_range(mas, 0, ULONG_MAX); 3849 return mas_new_root(mas, wr_mas->entry); 3850 } 3851 3852 memset(&b_node, 0, sizeof(struct maple_big_node)); 3853 /* Copy l_mas and store the value in b_node. */ 3854 mas_store_b_node(&l_wr_mas, &b_node, l_mas.end); 3855 /* Copy r_mas into b_node. */ 3856 if (r_mas.offset <= r_mas.end) 3857 mas_mab_cp(&r_mas, r_mas.offset, r_mas.end, 3858 &b_node, b_node.b_end + 1); 3859 else 3860 b_node.b_end++; 3861 3862 /* Stop spanning searches by searching for just index. */ 3863 l_mas.index = l_mas.last = mas->index; 3864 3865 mast.bn = &b_node; 3866 mast.orig_l = &l_mas; 3867 mast.orig_r = &r_mas; 3868 /* Combine l_mas and r_mas and split them up evenly again. */ 3869 return mas_spanning_rebalance(mas, &mast, height + 1); 3870 } 3871 3872 /* 3873 * mas_wr_node_store() - Attempt to store the value in a node 3874 * @wr_mas: The maple write state 3875 * 3876 * Attempts to reuse the node, but may allocate. 3877 * 3878 * Return: True if stored, false otherwise 3879 */ 3880 static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas, 3881 unsigned char new_end) 3882 { 3883 struct ma_state *mas = wr_mas->mas; 3884 void __rcu **dst_slots; 3885 unsigned long *dst_pivots; 3886 unsigned char dst_offset, offset_end = wr_mas->offset_end; 3887 struct maple_node reuse, *newnode; 3888 unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type]; 3889 bool in_rcu = mt_in_rcu(mas->tree); 3890 3891 /* Check if there is enough data. The room is enough. */ 3892 if (!mte_is_root(mas->node) && (new_end <= mt_min_slots[wr_mas->type]) && 3893 !(mas->mas_flags & MA_STATE_BULK)) 3894 return false; 3895 3896 if (mas->last == wr_mas->end_piv) 3897 offset_end++; /* don't copy this offset */ 3898 else if (unlikely(wr_mas->r_max == ULONG_MAX)) 3899 mas_bulk_rebalance(mas, mas->end, wr_mas->type); 3900 3901 /* set up node. */ 3902 if (in_rcu) { 3903 mas_node_count(mas, 1); 3904 if (mas_is_err(mas)) 3905 return false; 3906 3907 newnode = mas_pop_node(mas); 3908 } else { 3909 memset(&reuse, 0, sizeof(struct maple_node)); 3910 newnode = &reuse; 3911 } 3912 3913 newnode->parent = mas_mn(mas)->parent; 3914 dst_pivots = ma_pivots(newnode, wr_mas->type); 3915 dst_slots = ma_slots(newnode, wr_mas->type); 3916 /* Copy from start to insert point */ 3917 memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset); 3918 memcpy(dst_slots, wr_mas->slots, sizeof(void *) * mas->offset); 3919 3920 /* Handle insert of new range starting after old range */ 3921 if (wr_mas->r_min < mas->index) { 3922 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content); 3923 dst_pivots[mas->offset++] = mas->index - 1; 3924 } 3925 3926 /* Store the new entry and range end. */ 3927 if (mas->offset < node_pivots) 3928 dst_pivots[mas->offset] = mas->last; 3929 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry); 3930 3931 /* 3932 * this range wrote to the end of the node or it overwrote the rest of 3933 * the data 3934 */ 3935 if (offset_end > mas->end) 3936 goto done; 3937 3938 dst_offset = mas->offset + 1; 3939 /* Copy to the end of node if necessary. */ 3940 copy_size = mas->end - offset_end + 1; 3941 memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end, 3942 sizeof(void *) * copy_size); 3943 memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end, 3944 sizeof(unsigned long) * (copy_size - 1)); 3945 3946 if (new_end < node_pivots) 3947 dst_pivots[new_end] = mas->max; 3948 3949 done: 3950 mas_leaf_set_meta(newnode, maple_leaf_64, new_end); 3951 if (in_rcu) { 3952 struct maple_enode *old_enode = mas->node; 3953 3954 mas->node = mt_mk_node(newnode, wr_mas->type); 3955 mas_replace_node(mas, old_enode); 3956 } else { 3957 memcpy(wr_mas->node, newnode, sizeof(struct maple_node)); 3958 } 3959 trace_ma_write(__func__, mas, 0, wr_mas->entry); 3960 mas_update_gap(mas); 3961 mas->end = new_end; 3962 return true; 3963 } 3964 3965 /* 3966 * mas_wr_slot_store: Attempt to store a value in a slot. 3967 * @wr_mas: the maple write state 3968 * 3969 * Return: True if stored, false otherwise 3970 */ 3971 static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas) 3972 { 3973 struct ma_state *mas = wr_mas->mas; 3974 unsigned char offset = mas->offset; 3975 void __rcu **slots = wr_mas->slots; 3976 bool gap = false; 3977 3978 gap |= !mt_slot_locked(mas->tree, slots, offset); 3979 gap |= !mt_slot_locked(mas->tree, slots, offset + 1); 3980 3981 if (wr_mas->offset_end - offset == 1) { 3982 if (mas->index == wr_mas->r_min) { 3983 /* Overwriting the range and a part of the next one */ 3984 rcu_assign_pointer(slots[offset], wr_mas->entry); 3985 wr_mas->pivots[offset] = mas->last; 3986 } else { 3987 /* Overwriting a part of the range and the next one */ 3988 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 3989 wr_mas->pivots[offset] = mas->index - 1; 3990 mas->offset++; /* Keep mas accurate. */ 3991 } 3992 } else if (!mt_in_rcu(mas->tree)) { 3993 /* 3994 * Expand the range, only partially overwriting the previous and 3995 * next ranges 3996 */ 3997 gap |= !mt_slot_locked(mas->tree, slots, offset + 2); 3998 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 3999 wr_mas->pivots[offset] = mas->index - 1; 4000 wr_mas->pivots[offset + 1] = mas->last; 4001 mas->offset++; /* Keep mas accurate. */ 4002 } else { 4003 return false; 4004 } 4005 4006 trace_ma_write(__func__, mas, 0, wr_mas->entry); 4007 /* 4008 * Only update gap when the new entry is empty or there is an empty 4009 * entry in the original two ranges. 4010 */ 4011 if (!wr_mas->entry || gap) 4012 mas_update_gap(mas); 4013 4014 return true; 4015 } 4016 4017 static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas) 4018 { 4019 struct ma_state *mas = wr_mas->mas; 4020 4021 if (!wr_mas->slots[wr_mas->offset_end]) { 4022 /* If this one is null, the next and prev are not */ 4023 mas->last = wr_mas->end_piv; 4024 } else { 4025 /* Check next slot(s) if we are overwriting the end */ 4026 if ((mas->last == wr_mas->end_piv) && 4027 (mas->end != wr_mas->offset_end) && 4028 !wr_mas->slots[wr_mas->offset_end + 1]) { 4029 wr_mas->offset_end++; 4030 if (wr_mas->offset_end == mas->end) 4031 mas->last = mas->max; 4032 else 4033 mas->last = wr_mas->pivots[wr_mas->offset_end]; 4034 wr_mas->end_piv = mas->last; 4035 } 4036 } 4037 4038 if (!wr_mas->content) { 4039 /* If this one is null, the next and prev are not */ 4040 mas->index = wr_mas->r_min; 4041 } else { 4042 /* Check prev slot if we are overwriting the start */ 4043 if (mas->index == wr_mas->r_min && mas->offset && 4044 !wr_mas->slots[mas->offset - 1]) { 4045 mas->offset--; 4046 wr_mas->r_min = mas->index = 4047 mas_safe_min(mas, wr_mas->pivots, mas->offset); 4048 wr_mas->r_max = wr_mas->pivots[mas->offset]; 4049 } 4050 } 4051 } 4052 4053 static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas) 4054 { 4055 while ((wr_mas->offset_end < wr_mas->mas->end) && 4056 (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end])) 4057 wr_mas->offset_end++; 4058 4059 if (wr_mas->offset_end < wr_mas->mas->end) 4060 wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end]; 4061 else 4062 wr_mas->end_piv = wr_mas->mas->max; 4063 4064 if (!wr_mas->entry) 4065 mas_wr_extend_null(wr_mas); 4066 } 4067 4068 static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas) 4069 { 4070 struct ma_state *mas = wr_mas->mas; 4071 unsigned char new_end = mas->end + 2; 4072 4073 new_end -= wr_mas->offset_end - mas->offset; 4074 if (wr_mas->r_min == mas->index) 4075 new_end--; 4076 4077 if (wr_mas->end_piv == mas->last) 4078 new_end--; 4079 4080 return new_end; 4081 } 4082 4083 /* 4084 * mas_wr_append: Attempt to append 4085 * @wr_mas: the maple write state 4086 * @new_end: The end of the node after the modification 4087 * 4088 * This is currently unsafe in rcu mode since the end of the node may be cached 4089 * by readers while the node contents may be updated which could result in 4090 * inaccurate information. 4091 * 4092 * Return: True if appended, false otherwise 4093 */ 4094 static inline bool mas_wr_append(struct ma_wr_state *wr_mas, 4095 unsigned char new_end) 4096 { 4097 struct ma_state *mas; 4098 void __rcu **slots; 4099 unsigned char end; 4100 4101 mas = wr_mas->mas; 4102 if (mt_in_rcu(mas->tree)) 4103 return false; 4104 4105 end = mas->end; 4106 if (mas->offset != end) 4107 return false; 4108 4109 if (new_end < mt_pivots[wr_mas->type]) { 4110 wr_mas->pivots[new_end] = wr_mas->pivots[end]; 4111 ma_set_meta(wr_mas->node, wr_mas->type, 0, new_end); 4112 } 4113 4114 slots = wr_mas->slots; 4115 if (new_end == end + 1) { 4116 if (mas->last == wr_mas->r_max) { 4117 /* Append to end of range */ 4118 rcu_assign_pointer(slots[new_end], wr_mas->entry); 4119 wr_mas->pivots[end] = mas->index - 1; 4120 mas->offset = new_end; 4121 } else { 4122 /* Append to start of range */ 4123 rcu_assign_pointer(slots[new_end], wr_mas->content); 4124 wr_mas->pivots[end] = mas->last; 4125 rcu_assign_pointer(slots[end], wr_mas->entry); 4126 } 4127 } else { 4128 /* Append to the range without touching any boundaries. */ 4129 rcu_assign_pointer(slots[new_end], wr_mas->content); 4130 wr_mas->pivots[end + 1] = mas->last; 4131 rcu_assign_pointer(slots[end + 1], wr_mas->entry); 4132 wr_mas->pivots[end] = mas->index - 1; 4133 mas->offset = end + 1; 4134 } 4135 4136 if (!wr_mas->content || !wr_mas->entry) 4137 mas_update_gap(mas); 4138 4139 mas->end = new_end; 4140 trace_ma_write(__func__, mas, new_end, wr_mas->entry); 4141 return true; 4142 } 4143 4144 /* 4145 * mas_wr_bnode() - Slow path for a modification. 4146 * @wr_mas: The write maple state 4147 * 4148 * This is where split, rebalance end up. 4149 */ 4150 static void mas_wr_bnode(struct ma_wr_state *wr_mas) 4151 { 4152 struct maple_big_node b_node; 4153 4154 trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry); 4155 memset(&b_node, 0, sizeof(struct maple_big_node)); 4156 mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end); 4157 mas_commit_b_node(wr_mas, &b_node, wr_mas->mas->end); 4158 } 4159 4160 static inline void mas_wr_modify(struct ma_wr_state *wr_mas) 4161 { 4162 struct ma_state *mas = wr_mas->mas; 4163 unsigned char new_end; 4164 4165 /* Direct replacement */ 4166 if (wr_mas->r_min == mas->index && wr_mas->r_max == mas->last) { 4167 rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry); 4168 if (!!wr_mas->entry ^ !!wr_mas->content) 4169 mas_update_gap(mas); 4170 return; 4171 } 4172 4173 /* 4174 * new_end exceeds the size of the maple node and cannot enter the fast 4175 * path. 4176 */ 4177 new_end = mas_wr_new_end(wr_mas); 4178 if (new_end >= mt_slots[wr_mas->type]) 4179 goto slow_path; 4180 4181 /* Attempt to append */ 4182 if (mas_wr_append(wr_mas, new_end)) 4183 return; 4184 4185 if (new_end == mas->end && mas_wr_slot_store(wr_mas)) 4186 return; 4187 4188 if (mas_wr_node_store(wr_mas, new_end)) 4189 return; 4190 4191 if (mas_is_err(mas)) 4192 return; 4193 4194 slow_path: 4195 mas_wr_bnode(wr_mas); 4196 } 4197 4198 /* 4199 * mas_wr_store_entry() - Internal call to store a value 4200 * @mas: The maple state 4201 * @entry: The entry to store. 4202 * 4203 * Return: The contents that was stored at the index. 4204 */ 4205 static inline void *mas_wr_store_entry(struct ma_wr_state *wr_mas) 4206 { 4207 struct ma_state *mas = wr_mas->mas; 4208 4209 wr_mas->content = mas_start(mas); 4210 if (mas_is_none(mas) || mas_is_ptr(mas)) { 4211 mas_store_root(mas, wr_mas->entry); 4212 return wr_mas->content; 4213 } 4214 4215 if (unlikely(!mas_wr_walk(wr_mas))) { 4216 mas_wr_spanning_store(wr_mas); 4217 return wr_mas->content; 4218 } 4219 4220 /* At this point, we are at the leaf node that needs to be altered. */ 4221 mas_wr_end_piv(wr_mas); 4222 /* New root for a single pointer */ 4223 if (unlikely(!mas->index && mas->last == ULONG_MAX)) { 4224 mas_new_root(mas, wr_mas->entry); 4225 return wr_mas->content; 4226 } 4227 4228 mas_wr_modify(wr_mas); 4229 return wr_mas->content; 4230 } 4231 4232 /** 4233 * mas_insert() - Internal call to insert a value 4234 * @mas: The maple state 4235 * @entry: The entry to store 4236 * 4237 * Return: %NULL or the contents that already exists at the requested index 4238 * otherwise. The maple state needs to be checked for error conditions. 4239 */ 4240 static inline void *mas_insert(struct ma_state *mas, void *entry) 4241 { 4242 MA_WR_STATE(wr_mas, mas, entry); 4243 4244 /* 4245 * Inserting a new range inserts either 0, 1, or 2 pivots within the 4246 * tree. If the insert fits exactly into an existing gap with a value 4247 * of NULL, then the slot only needs to be written with the new value. 4248 * If the range being inserted is adjacent to another range, then only a 4249 * single pivot needs to be inserted (as well as writing the entry). If 4250 * the new range is within a gap but does not touch any other ranges, 4251 * then two pivots need to be inserted: the start - 1, and the end. As 4252 * usual, the entry must be written. Most operations require a new node 4253 * to be allocated and replace an existing node to ensure RCU safety, 4254 * when in RCU mode. The exception to requiring a newly allocated node 4255 * is when inserting at the end of a node (appending). When done 4256 * carefully, appending can reuse the node in place. 4257 */ 4258 wr_mas.content = mas_start(mas); 4259 if (wr_mas.content) 4260 goto exists; 4261 4262 if (mas_is_none(mas) || mas_is_ptr(mas)) { 4263 mas_store_root(mas, entry); 4264 return NULL; 4265 } 4266 4267 /* spanning writes always overwrite something */ 4268 if (!mas_wr_walk(&wr_mas)) 4269 goto exists; 4270 4271 /* At this point, we are at the leaf node that needs to be altered. */ 4272 wr_mas.offset_end = mas->offset; 4273 wr_mas.end_piv = wr_mas.r_max; 4274 4275 if (wr_mas.content || (mas->last > wr_mas.r_max)) 4276 goto exists; 4277 4278 if (!entry) 4279 return NULL; 4280 4281 mas_wr_modify(&wr_mas); 4282 return wr_mas.content; 4283 4284 exists: 4285 mas_set_err(mas, -EEXIST); 4286 return wr_mas.content; 4287 4288 } 4289 4290 static __always_inline void mas_rewalk(struct ma_state *mas, unsigned long index) 4291 { 4292 retry: 4293 mas_set(mas, index); 4294 mas_state_walk(mas); 4295 if (mas_is_start(mas)) 4296 goto retry; 4297 } 4298 4299 static __always_inline bool mas_rewalk_if_dead(struct ma_state *mas, 4300 struct maple_node *node, const unsigned long index) 4301 { 4302 if (unlikely(ma_dead_node(node))) { 4303 mas_rewalk(mas, index); 4304 return true; 4305 } 4306 return false; 4307 } 4308 4309 /* 4310 * mas_prev_node() - Find the prev non-null entry at the same level in the 4311 * tree. The prev value will be mas->node[mas->offset] or the status will be 4312 * ma_none. 4313 * @mas: The maple state 4314 * @min: The lower limit to search 4315 * 4316 * The prev node value will be mas->node[mas->offset] or the status will be 4317 * ma_none. 4318 * Return: 1 if the node is dead, 0 otherwise. 4319 */ 4320 static int mas_prev_node(struct ma_state *mas, unsigned long min) 4321 { 4322 enum maple_type mt; 4323 int offset, level; 4324 void __rcu **slots; 4325 struct maple_node *node; 4326 unsigned long *pivots; 4327 unsigned long max; 4328 4329 node = mas_mn(mas); 4330 if (!mas->min) 4331 goto no_entry; 4332 4333 max = mas->min - 1; 4334 if (max < min) 4335 goto no_entry; 4336 4337 level = 0; 4338 do { 4339 if (ma_is_root(node)) 4340 goto no_entry; 4341 4342 /* Walk up. */ 4343 if (unlikely(mas_ascend(mas))) 4344 return 1; 4345 offset = mas->offset; 4346 level++; 4347 node = mas_mn(mas); 4348 } while (!offset); 4349 4350 offset--; 4351 mt = mte_node_type(mas->node); 4352 while (level > 1) { 4353 level--; 4354 slots = ma_slots(node, mt); 4355 mas->node = mas_slot(mas, slots, offset); 4356 if (unlikely(ma_dead_node(node))) 4357 return 1; 4358 4359 mt = mte_node_type(mas->node); 4360 node = mas_mn(mas); 4361 pivots = ma_pivots(node, mt); 4362 offset = ma_data_end(node, mt, pivots, max); 4363 if (unlikely(ma_dead_node(node))) 4364 return 1; 4365 } 4366 4367 slots = ma_slots(node, mt); 4368 mas->node = mas_slot(mas, slots, offset); 4369 pivots = ma_pivots(node, mt); 4370 if (unlikely(ma_dead_node(node))) 4371 return 1; 4372 4373 if (likely(offset)) 4374 mas->min = pivots[offset - 1] + 1; 4375 mas->max = max; 4376 mas->offset = mas_data_end(mas); 4377 if (unlikely(mte_dead_node(mas->node))) 4378 return 1; 4379 4380 mas->end = mas->offset; 4381 return 0; 4382 4383 no_entry: 4384 if (unlikely(ma_dead_node(node))) 4385 return 1; 4386 4387 mas->status = ma_underflow; 4388 return 0; 4389 } 4390 4391 /* 4392 * mas_prev_slot() - Get the entry in the previous slot 4393 * 4394 * @mas: The maple state 4395 * @max: The minimum starting range 4396 * @empty: Can be empty 4397 * @set_underflow: Set the @mas->node to underflow state on limit. 4398 * 4399 * Return: The entry in the previous slot which is possibly NULL 4400 */ 4401 static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty) 4402 { 4403 void *entry; 4404 void __rcu **slots; 4405 unsigned long pivot; 4406 enum maple_type type; 4407 unsigned long *pivots; 4408 struct maple_node *node; 4409 unsigned long save_point = mas->index; 4410 4411 retry: 4412 node = mas_mn(mas); 4413 type = mte_node_type(mas->node); 4414 pivots = ma_pivots(node, type); 4415 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4416 goto retry; 4417 4418 if (mas->min <= min) { 4419 pivot = mas_safe_min(mas, pivots, mas->offset); 4420 4421 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4422 goto retry; 4423 4424 if (pivot <= min) 4425 goto underflow; 4426 } 4427 4428 again: 4429 if (likely(mas->offset)) { 4430 mas->offset--; 4431 mas->last = mas->index - 1; 4432 mas->index = mas_safe_min(mas, pivots, mas->offset); 4433 } else { 4434 if (mas->index <= min) 4435 goto underflow; 4436 4437 if (mas_prev_node(mas, min)) { 4438 mas_rewalk(mas, save_point); 4439 goto retry; 4440 } 4441 4442 if (WARN_ON_ONCE(mas_is_underflow(mas))) 4443 return NULL; 4444 4445 mas->last = mas->max; 4446 node = mas_mn(mas); 4447 type = mte_node_type(mas->node); 4448 pivots = ma_pivots(node, type); 4449 mas->index = pivots[mas->offset - 1] + 1; 4450 } 4451 4452 slots = ma_slots(node, type); 4453 entry = mas_slot(mas, slots, mas->offset); 4454 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4455 goto retry; 4456 4457 4458 if (likely(entry)) 4459 return entry; 4460 4461 if (!empty) { 4462 if (mas->index <= min) { 4463 mas->status = ma_underflow; 4464 return NULL; 4465 } 4466 4467 goto again; 4468 } 4469 4470 return entry; 4471 4472 underflow: 4473 mas->status = ma_underflow; 4474 return NULL; 4475 } 4476 4477 /* 4478 * mas_next_node() - Get the next node at the same level in the tree. 4479 * @mas: The maple state 4480 * @max: The maximum pivot value to check. 4481 * 4482 * The next value will be mas->node[mas->offset] or the status will have 4483 * overflowed. 4484 * Return: 1 on dead node, 0 otherwise. 4485 */ 4486 static int mas_next_node(struct ma_state *mas, struct maple_node *node, 4487 unsigned long max) 4488 { 4489 unsigned long min; 4490 unsigned long *pivots; 4491 struct maple_enode *enode; 4492 struct maple_node *tmp; 4493 int level = 0; 4494 unsigned char node_end; 4495 enum maple_type mt; 4496 void __rcu **slots; 4497 4498 if (mas->max >= max) 4499 goto overflow; 4500 4501 min = mas->max + 1; 4502 level = 0; 4503 do { 4504 if (ma_is_root(node)) 4505 goto overflow; 4506 4507 /* Walk up. */ 4508 if (unlikely(mas_ascend(mas))) 4509 return 1; 4510 4511 level++; 4512 node = mas_mn(mas); 4513 mt = mte_node_type(mas->node); 4514 pivots = ma_pivots(node, mt); 4515 node_end = ma_data_end(node, mt, pivots, mas->max); 4516 if (unlikely(ma_dead_node(node))) 4517 return 1; 4518 4519 } while (unlikely(mas->offset == node_end)); 4520 4521 slots = ma_slots(node, mt); 4522 mas->offset++; 4523 enode = mas_slot(mas, slots, mas->offset); 4524 if (unlikely(ma_dead_node(node))) 4525 return 1; 4526 4527 if (level > 1) 4528 mas->offset = 0; 4529 4530 while (unlikely(level > 1)) { 4531 level--; 4532 mas->node = enode; 4533 node = mas_mn(mas); 4534 mt = mte_node_type(mas->node); 4535 slots = ma_slots(node, mt); 4536 enode = mas_slot(mas, slots, 0); 4537 if (unlikely(ma_dead_node(node))) 4538 return 1; 4539 } 4540 4541 if (!mas->offset) 4542 pivots = ma_pivots(node, mt); 4543 4544 mas->max = mas_safe_pivot(mas, pivots, mas->offset, mt); 4545 tmp = mte_to_node(enode); 4546 mt = mte_node_type(enode); 4547 pivots = ma_pivots(tmp, mt); 4548 mas->end = ma_data_end(tmp, mt, pivots, mas->max); 4549 if (unlikely(ma_dead_node(node))) 4550 return 1; 4551 4552 mas->node = enode; 4553 mas->min = min; 4554 return 0; 4555 4556 overflow: 4557 if (unlikely(ma_dead_node(node))) 4558 return 1; 4559 4560 mas->status = ma_overflow; 4561 return 0; 4562 } 4563 4564 /* 4565 * mas_next_slot() - Get the entry in the next slot 4566 * 4567 * @mas: The maple state 4568 * @max: The maximum starting range 4569 * @empty: Can be empty 4570 * @set_overflow: Should @mas->node be set to overflow when the limit is 4571 * reached. 4572 * 4573 * Return: The entry in the next slot which is possibly NULL 4574 */ 4575 static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty) 4576 { 4577 void __rcu **slots; 4578 unsigned long *pivots; 4579 unsigned long pivot; 4580 enum maple_type type; 4581 struct maple_node *node; 4582 unsigned long save_point = mas->last; 4583 void *entry; 4584 4585 retry: 4586 node = mas_mn(mas); 4587 type = mte_node_type(mas->node); 4588 pivots = ma_pivots(node, type); 4589 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4590 goto retry; 4591 4592 if (mas->max >= max) { 4593 if (likely(mas->offset < mas->end)) 4594 pivot = pivots[mas->offset]; 4595 else 4596 pivot = mas->max; 4597 4598 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4599 goto retry; 4600 4601 if (pivot >= max) { /* Was at the limit, next will extend beyond */ 4602 mas->status = ma_overflow; 4603 return NULL; 4604 } 4605 } 4606 4607 if (likely(mas->offset < mas->end)) { 4608 mas->index = pivots[mas->offset] + 1; 4609 again: 4610 mas->offset++; 4611 if (likely(mas->offset < mas->end)) 4612 mas->last = pivots[mas->offset]; 4613 else 4614 mas->last = mas->max; 4615 } else { 4616 if (mas->last >= max) { 4617 mas->status = ma_overflow; 4618 return NULL; 4619 } 4620 4621 if (mas_next_node(mas, node, max)) { 4622 mas_rewalk(mas, save_point); 4623 goto retry; 4624 } 4625 4626 if (WARN_ON_ONCE(mas_is_overflow(mas))) 4627 return NULL; 4628 4629 mas->offset = 0; 4630 mas->index = mas->min; 4631 node = mas_mn(mas); 4632 type = mte_node_type(mas->node); 4633 pivots = ma_pivots(node, type); 4634 mas->last = pivots[0]; 4635 } 4636 4637 slots = ma_slots(node, type); 4638 entry = mt_slot(mas->tree, slots, mas->offset); 4639 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4640 goto retry; 4641 4642 if (entry) 4643 return entry; 4644 4645 4646 if (!empty) { 4647 if (mas->last >= max) { 4648 mas->status = ma_overflow; 4649 return NULL; 4650 } 4651 4652 mas->index = mas->last + 1; 4653 goto again; 4654 } 4655 4656 return entry; 4657 } 4658 4659 /* 4660 * mas_next_entry() - Internal function to get the next entry. 4661 * @mas: The maple state 4662 * @limit: The maximum range start. 4663 * 4664 * Set the @mas->node to the next entry and the range_start to 4665 * the beginning value for the entry. Does not check beyond @limit. 4666 * Sets @mas->index and @mas->last to the range, Does not update @mas->index and 4667 * @mas->last on overflow. 4668 * Restarts on dead nodes. 4669 * 4670 * Return: the next entry or %NULL. 4671 */ 4672 static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit) 4673 { 4674 if (mas->last >= limit) { 4675 mas->status = ma_overflow; 4676 return NULL; 4677 } 4678 4679 return mas_next_slot(mas, limit, false); 4680 } 4681 4682 /* 4683 * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the 4684 * highest gap address of a given size in a given node and descend. 4685 * @mas: The maple state 4686 * @size: The needed size. 4687 * 4688 * Return: True if found in a leaf, false otherwise. 4689 * 4690 */ 4691 static bool mas_rev_awalk(struct ma_state *mas, unsigned long size, 4692 unsigned long *gap_min, unsigned long *gap_max) 4693 { 4694 enum maple_type type = mte_node_type(mas->node); 4695 struct maple_node *node = mas_mn(mas); 4696 unsigned long *pivots, *gaps; 4697 void __rcu **slots; 4698 unsigned long gap = 0; 4699 unsigned long max, min; 4700 unsigned char offset; 4701 4702 if (unlikely(mas_is_err(mas))) 4703 return true; 4704 4705 if (ma_is_dense(type)) { 4706 /* dense nodes. */ 4707 mas->offset = (unsigned char)(mas->index - mas->min); 4708 return true; 4709 } 4710 4711 pivots = ma_pivots(node, type); 4712 slots = ma_slots(node, type); 4713 gaps = ma_gaps(node, type); 4714 offset = mas->offset; 4715 min = mas_safe_min(mas, pivots, offset); 4716 /* Skip out of bounds. */ 4717 while (mas->last < min) 4718 min = mas_safe_min(mas, pivots, --offset); 4719 4720 max = mas_safe_pivot(mas, pivots, offset, type); 4721 while (mas->index <= max) { 4722 gap = 0; 4723 if (gaps) 4724 gap = gaps[offset]; 4725 else if (!mas_slot(mas, slots, offset)) 4726 gap = max - min + 1; 4727 4728 if (gap) { 4729 if ((size <= gap) && (size <= mas->last - min + 1)) 4730 break; 4731 4732 if (!gaps) { 4733 /* Skip the next slot, it cannot be a gap. */ 4734 if (offset < 2) 4735 goto ascend; 4736 4737 offset -= 2; 4738 max = pivots[offset]; 4739 min = mas_safe_min(mas, pivots, offset); 4740 continue; 4741 } 4742 } 4743 4744 if (!offset) 4745 goto ascend; 4746 4747 offset--; 4748 max = min - 1; 4749 min = mas_safe_min(mas, pivots, offset); 4750 } 4751 4752 if (unlikely((mas->index > max) || (size - 1 > max - mas->index))) 4753 goto no_space; 4754 4755 if (unlikely(ma_is_leaf(type))) { 4756 mas->offset = offset; 4757 *gap_min = min; 4758 *gap_max = min + gap - 1; 4759 return true; 4760 } 4761 4762 /* descend, only happens under lock. */ 4763 mas->node = mas_slot(mas, slots, offset); 4764 mas->min = min; 4765 mas->max = max; 4766 mas->offset = mas_data_end(mas); 4767 return false; 4768 4769 ascend: 4770 if (!mte_is_root(mas->node)) 4771 return false; 4772 4773 no_space: 4774 mas_set_err(mas, -EBUSY); 4775 return false; 4776 } 4777 4778 static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) 4779 { 4780 enum maple_type type = mte_node_type(mas->node); 4781 unsigned long pivot, min, gap = 0; 4782 unsigned char offset, data_end; 4783 unsigned long *gaps, *pivots; 4784 void __rcu **slots; 4785 struct maple_node *node; 4786 bool found = false; 4787 4788 if (ma_is_dense(type)) { 4789 mas->offset = (unsigned char)(mas->index - mas->min); 4790 return true; 4791 } 4792 4793 node = mas_mn(mas); 4794 pivots = ma_pivots(node, type); 4795 slots = ma_slots(node, type); 4796 gaps = ma_gaps(node, type); 4797 offset = mas->offset; 4798 min = mas_safe_min(mas, pivots, offset); 4799 data_end = ma_data_end(node, type, pivots, mas->max); 4800 for (; offset <= data_end; offset++) { 4801 pivot = mas_safe_pivot(mas, pivots, offset, type); 4802 4803 /* Not within lower bounds */ 4804 if (mas->index > pivot) 4805 goto next_slot; 4806 4807 if (gaps) 4808 gap = gaps[offset]; 4809 else if (!mas_slot(mas, slots, offset)) 4810 gap = min(pivot, mas->last) - max(mas->index, min) + 1; 4811 else 4812 goto next_slot; 4813 4814 if (gap >= size) { 4815 if (ma_is_leaf(type)) { 4816 found = true; 4817 goto done; 4818 } 4819 if (mas->index <= pivot) { 4820 mas->node = mas_slot(mas, slots, offset); 4821 mas->min = min; 4822 mas->max = pivot; 4823 offset = 0; 4824 break; 4825 } 4826 } 4827 next_slot: 4828 min = pivot + 1; 4829 if (mas->last <= pivot) { 4830 mas_set_err(mas, -EBUSY); 4831 return true; 4832 } 4833 } 4834 4835 if (mte_is_root(mas->node)) 4836 found = true; 4837 done: 4838 mas->offset = offset; 4839 return found; 4840 } 4841 4842 /** 4843 * mas_walk() - Search for @mas->index in the tree. 4844 * @mas: The maple state. 4845 * 4846 * mas->index and mas->last will be set to the range if there is a value. If 4847 * mas->status is ma_none, reset to ma_start 4848 * 4849 * Return: the entry at the location or %NULL. 4850 */ 4851 void *mas_walk(struct ma_state *mas) 4852 { 4853 void *entry; 4854 4855 if (!mas_is_active(mas) || !mas_is_start(mas)) 4856 mas->status = ma_start; 4857 retry: 4858 entry = mas_state_walk(mas); 4859 if (mas_is_start(mas)) { 4860 goto retry; 4861 } else if (mas_is_none(mas)) { 4862 mas->index = 0; 4863 mas->last = ULONG_MAX; 4864 } else if (mas_is_ptr(mas)) { 4865 if (!mas->index) { 4866 mas->last = 0; 4867 return entry; 4868 } 4869 4870 mas->index = 1; 4871 mas->last = ULONG_MAX; 4872 mas->status = ma_none; 4873 return NULL; 4874 } 4875 4876 return entry; 4877 } 4878 EXPORT_SYMBOL_GPL(mas_walk); 4879 4880 static inline bool mas_rewind_node(struct ma_state *mas) 4881 { 4882 unsigned char slot; 4883 4884 do { 4885 if (mte_is_root(mas->node)) { 4886 slot = mas->offset; 4887 if (!slot) 4888 return false; 4889 } else { 4890 mas_ascend(mas); 4891 slot = mas->offset; 4892 } 4893 } while (!slot); 4894 4895 mas->offset = --slot; 4896 return true; 4897 } 4898 4899 /* 4900 * mas_skip_node() - Internal function. Skip over a node. 4901 * @mas: The maple state. 4902 * 4903 * Return: true if there is another node, false otherwise. 4904 */ 4905 static inline bool mas_skip_node(struct ma_state *mas) 4906 { 4907 if (mas_is_err(mas)) 4908 return false; 4909 4910 do { 4911 if (mte_is_root(mas->node)) { 4912 if (mas->offset >= mas_data_end(mas)) { 4913 mas_set_err(mas, -EBUSY); 4914 return false; 4915 } 4916 } else { 4917 mas_ascend(mas); 4918 } 4919 } while (mas->offset >= mas_data_end(mas)); 4920 4921 mas->offset++; 4922 return true; 4923 } 4924 4925 /* 4926 * mas_awalk() - Allocation walk. Search from low address to high, for a gap of 4927 * @size 4928 * @mas: The maple state 4929 * @size: The size of the gap required 4930 * 4931 * Search between @mas->index and @mas->last for a gap of @size. 4932 */ 4933 static inline void mas_awalk(struct ma_state *mas, unsigned long size) 4934 { 4935 struct maple_enode *last = NULL; 4936 4937 /* 4938 * There are 4 options: 4939 * go to child (descend) 4940 * go back to parent (ascend) 4941 * no gap found. (return, slot == MAPLE_NODE_SLOTS) 4942 * found the gap. (return, slot != MAPLE_NODE_SLOTS) 4943 */ 4944 while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) { 4945 if (last == mas->node) 4946 mas_skip_node(mas); 4947 else 4948 last = mas->node; 4949 } 4950 } 4951 4952 /* 4953 * mas_sparse_area() - Internal function. Return upper or lower limit when 4954 * searching for a gap in an empty tree. 4955 * @mas: The maple state 4956 * @min: the minimum range 4957 * @max: The maximum range 4958 * @size: The size of the gap 4959 * @fwd: Searching forward or back 4960 */ 4961 static inline int mas_sparse_area(struct ma_state *mas, unsigned long min, 4962 unsigned long max, unsigned long size, bool fwd) 4963 { 4964 if (!unlikely(mas_is_none(mas)) && min == 0) { 4965 min++; 4966 /* 4967 * At this time, min is increased, we need to recheck whether 4968 * the size is satisfied. 4969 */ 4970 if (min > max || max - min + 1 < size) 4971 return -EBUSY; 4972 } 4973 /* mas_is_ptr */ 4974 4975 if (fwd) { 4976 mas->index = min; 4977 mas->last = min + size - 1; 4978 } else { 4979 mas->last = max; 4980 mas->index = max - size + 1; 4981 } 4982 return 0; 4983 } 4984 4985 /* 4986 * mas_empty_area() - Get the lowest address within the range that is 4987 * sufficient for the size requested. 4988 * @mas: The maple state 4989 * @min: The lowest value of the range 4990 * @max: The highest value of the range 4991 * @size: The size needed 4992 */ 4993 int mas_empty_area(struct ma_state *mas, unsigned long min, 4994 unsigned long max, unsigned long size) 4995 { 4996 unsigned char offset; 4997 unsigned long *pivots; 4998 enum maple_type mt; 4999 struct maple_node *node; 5000 5001 if (min > max) 5002 return -EINVAL; 5003 5004 if (size == 0 || max - min < size - 1) 5005 return -EINVAL; 5006 5007 if (mas_is_start(mas)) 5008 mas_start(mas); 5009 else if (mas->offset >= 2) 5010 mas->offset -= 2; 5011 else if (!mas_skip_node(mas)) 5012 return -EBUSY; 5013 5014 /* Empty set */ 5015 if (mas_is_none(mas) || mas_is_ptr(mas)) 5016 return mas_sparse_area(mas, min, max, size, true); 5017 5018 /* The start of the window can only be within these values */ 5019 mas->index = min; 5020 mas->last = max; 5021 mas_awalk(mas, size); 5022 5023 if (unlikely(mas_is_err(mas))) 5024 return xa_err(mas->node); 5025 5026 offset = mas->offset; 5027 if (unlikely(offset == MAPLE_NODE_SLOTS)) 5028 return -EBUSY; 5029 5030 node = mas_mn(mas); 5031 mt = mte_node_type(mas->node); 5032 pivots = ma_pivots(node, mt); 5033 min = mas_safe_min(mas, pivots, offset); 5034 if (mas->index < min) 5035 mas->index = min; 5036 mas->last = mas->index + size - 1; 5037 mas->end = ma_data_end(node, mt, pivots, mas->max); 5038 return 0; 5039 } 5040 EXPORT_SYMBOL_GPL(mas_empty_area); 5041 5042 /* 5043 * mas_empty_area_rev() - Get the highest address within the range that is 5044 * sufficient for the size requested. 5045 * @mas: The maple state 5046 * @min: The lowest value of the range 5047 * @max: The highest value of the range 5048 * @size: The size needed 5049 */ 5050 int mas_empty_area_rev(struct ma_state *mas, unsigned long min, 5051 unsigned long max, unsigned long size) 5052 { 5053 struct maple_enode *last = mas->node; 5054 5055 if (min > max) 5056 return -EINVAL; 5057 5058 if (size == 0 || max - min < size - 1) 5059 return -EINVAL; 5060 5061 if (mas_is_start(mas)) { 5062 mas_start(mas); 5063 mas->offset = mas_data_end(mas); 5064 } else if (mas->offset >= 2) { 5065 mas->offset -= 2; 5066 } else if (!mas_rewind_node(mas)) { 5067 return -EBUSY; 5068 } 5069 5070 /* Empty set. */ 5071 if (mas_is_none(mas) || mas_is_ptr(mas)) 5072 return mas_sparse_area(mas, min, max, size, false); 5073 5074 /* The start of the window can only be within these values. */ 5075 mas->index = min; 5076 mas->last = max; 5077 5078 while (!mas_rev_awalk(mas, size, &min, &max)) { 5079 if (last == mas->node) { 5080 if (!mas_rewind_node(mas)) 5081 return -EBUSY; 5082 } else { 5083 last = mas->node; 5084 } 5085 } 5086 5087 if (mas_is_err(mas)) 5088 return xa_err(mas->node); 5089 5090 if (unlikely(mas->offset == MAPLE_NODE_SLOTS)) 5091 return -EBUSY; 5092 5093 /* Trim the upper limit to the max. */ 5094 if (max < mas->last) 5095 mas->last = max; 5096 5097 mas->index = mas->last - size + 1; 5098 mas->end = mas_data_end(mas); 5099 return 0; 5100 } 5101 EXPORT_SYMBOL_GPL(mas_empty_area_rev); 5102 5103 /* 5104 * mte_dead_leaves() - Mark all leaves of a node as dead. 5105 * @mas: The maple state 5106 * @slots: Pointer to the slot array 5107 * @type: The maple node type 5108 * 5109 * Must hold the write lock. 5110 * 5111 * Return: The number of leaves marked as dead. 5112 */ 5113 static inline 5114 unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt, 5115 void __rcu **slots) 5116 { 5117 struct maple_node *node; 5118 enum maple_type type; 5119 void *entry; 5120 int offset; 5121 5122 for (offset = 0; offset < mt_slot_count(enode); offset++) { 5123 entry = mt_slot(mt, slots, offset); 5124 type = mte_node_type(entry); 5125 node = mte_to_node(entry); 5126 /* Use both node and type to catch LE & BE metadata */ 5127 if (!node || !type) 5128 break; 5129 5130 mte_set_node_dead(entry); 5131 node->type = type; 5132 rcu_assign_pointer(slots[offset], node); 5133 } 5134 5135 return offset; 5136 } 5137 5138 /** 5139 * mte_dead_walk() - Walk down a dead tree to just before the leaves 5140 * @enode: The maple encoded node 5141 * @offset: The starting offset 5142 * 5143 * Note: This can only be used from the RCU callback context. 5144 */ 5145 static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset) 5146 { 5147 struct maple_node *node, *next; 5148 void __rcu **slots = NULL; 5149 5150 next = mte_to_node(*enode); 5151 do { 5152 *enode = ma_enode_ptr(next); 5153 node = mte_to_node(*enode); 5154 slots = ma_slots(node, node->type); 5155 next = rcu_dereference_protected(slots[offset], 5156 lock_is_held(&rcu_callback_map)); 5157 offset = 0; 5158 } while (!ma_is_leaf(next->type)); 5159 5160 return slots; 5161 } 5162 5163 /** 5164 * mt_free_walk() - Walk & free a tree in the RCU callback context 5165 * @head: The RCU head that's within the node. 5166 * 5167 * Note: This can only be used from the RCU callback context. 5168 */ 5169 static void mt_free_walk(struct rcu_head *head) 5170 { 5171 void __rcu **slots; 5172 struct maple_node *node, *start; 5173 struct maple_enode *enode; 5174 unsigned char offset; 5175 enum maple_type type; 5176 5177 node = container_of(head, struct maple_node, rcu); 5178 5179 if (ma_is_leaf(node->type)) 5180 goto free_leaf; 5181 5182 start = node; 5183 enode = mt_mk_node(node, node->type); 5184 slots = mte_dead_walk(&enode, 0); 5185 node = mte_to_node(enode); 5186 do { 5187 mt_free_bulk(node->slot_len, slots); 5188 offset = node->parent_slot + 1; 5189 enode = node->piv_parent; 5190 if (mte_to_node(enode) == node) 5191 goto free_leaf; 5192 5193 type = mte_node_type(enode); 5194 slots = ma_slots(mte_to_node(enode), type); 5195 if ((offset < mt_slots[type]) && 5196 rcu_dereference_protected(slots[offset], 5197 lock_is_held(&rcu_callback_map))) 5198 slots = mte_dead_walk(&enode, offset); 5199 node = mte_to_node(enode); 5200 } while ((node != start) || (node->slot_len < offset)); 5201 5202 slots = ma_slots(node, node->type); 5203 mt_free_bulk(node->slot_len, slots); 5204 5205 free_leaf: 5206 mt_free_rcu(&node->rcu); 5207 } 5208 5209 static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, 5210 struct maple_tree *mt, struct maple_enode *prev, unsigned char offset) 5211 { 5212 struct maple_node *node; 5213 struct maple_enode *next = *enode; 5214 void __rcu **slots = NULL; 5215 enum maple_type type; 5216 unsigned char next_offset = 0; 5217 5218 do { 5219 *enode = next; 5220 node = mte_to_node(*enode); 5221 type = mte_node_type(*enode); 5222 slots = ma_slots(node, type); 5223 next = mt_slot_locked(mt, slots, next_offset); 5224 if ((mte_dead_node(next))) 5225 next = mt_slot_locked(mt, slots, ++next_offset); 5226 5227 mte_set_node_dead(*enode); 5228 node->type = type; 5229 node->piv_parent = prev; 5230 node->parent_slot = offset; 5231 offset = next_offset; 5232 next_offset = 0; 5233 prev = *enode; 5234 } while (!mte_is_leaf(next)); 5235 5236 return slots; 5237 } 5238 5239 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 5240 bool free) 5241 { 5242 void __rcu **slots; 5243 struct maple_node *node = mte_to_node(enode); 5244 struct maple_enode *start; 5245 5246 if (mte_is_leaf(enode)) { 5247 node->type = mte_node_type(enode); 5248 goto free_leaf; 5249 } 5250 5251 start = enode; 5252 slots = mte_destroy_descend(&enode, mt, start, 0); 5253 node = mte_to_node(enode); // Updated in the above call. 5254 do { 5255 enum maple_type type; 5256 unsigned char offset; 5257 struct maple_enode *parent, *tmp; 5258 5259 node->slot_len = mte_dead_leaves(enode, mt, slots); 5260 if (free) 5261 mt_free_bulk(node->slot_len, slots); 5262 offset = node->parent_slot + 1; 5263 enode = node->piv_parent; 5264 if (mte_to_node(enode) == node) 5265 goto free_leaf; 5266 5267 type = mte_node_type(enode); 5268 slots = ma_slots(mte_to_node(enode), type); 5269 if (offset >= mt_slots[type]) 5270 goto next; 5271 5272 tmp = mt_slot_locked(mt, slots, offset); 5273 if (mte_node_type(tmp) && mte_to_node(tmp)) { 5274 parent = enode; 5275 enode = tmp; 5276 slots = mte_destroy_descend(&enode, mt, parent, offset); 5277 } 5278 next: 5279 node = mte_to_node(enode); 5280 } while (start != enode); 5281 5282 node = mte_to_node(enode); 5283 node->slot_len = mte_dead_leaves(enode, mt, slots); 5284 if (free) 5285 mt_free_bulk(node->slot_len, slots); 5286 5287 free_leaf: 5288 if (free) 5289 mt_free_rcu(&node->rcu); 5290 else 5291 mt_clear_meta(mt, node, node->type); 5292 } 5293 5294 /* 5295 * mte_destroy_walk() - Free a tree or sub-tree. 5296 * @enode: the encoded maple node (maple_enode) to start 5297 * @mt: the tree to free - needed for node types. 5298 * 5299 * Must hold the write lock. 5300 */ 5301 static inline void mte_destroy_walk(struct maple_enode *enode, 5302 struct maple_tree *mt) 5303 { 5304 struct maple_node *node = mte_to_node(enode); 5305 5306 if (mt_in_rcu(mt)) { 5307 mt_destroy_walk(enode, mt, false); 5308 call_rcu(&node->rcu, mt_free_walk); 5309 } else { 5310 mt_destroy_walk(enode, mt, true); 5311 } 5312 } 5313 5314 static void mas_wr_store_setup(struct ma_wr_state *wr_mas) 5315 { 5316 if (!mas_is_active(wr_mas->mas)) { 5317 if (mas_is_start(wr_mas->mas)) 5318 return; 5319 5320 if (unlikely(mas_is_paused(wr_mas->mas))) 5321 goto reset; 5322 5323 if (unlikely(mas_is_none(wr_mas->mas))) 5324 goto reset; 5325 5326 if (unlikely(mas_is_overflow(wr_mas->mas))) 5327 goto reset; 5328 5329 if (unlikely(mas_is_underflow(wr_mas->mas))) 5330 goto reset; 5331 } 5332 5333 /* 5334 * A less strict version of mas_is_span_wr() where we allow spanning 5335 * writes within this node. This is to stop partial walks in 5336 * mas_prealloc() from being reset. 5337 */ 5338 if (wr_mas->mas->last > wr_mas->mas->max) 5339 goto reset; 5340 5341 if (wr_mas->entry) 5342 return; 5343 5344 if (mte_is_leaf(wr_mas->mas->node) && 5345 wr_mas->mas->last == wr_mas->mas->max) 5346 goto reset; 5347 5348 return; 5349 5350 reset: 5351 mas_reset(wr_mas->mas); 5352 } 5353 5354 /* Interface */ 5355 5356 /** 5357 * mas_store() - Store an @entry. 5358 * @mas: The maple state. 5359 * @entry: The entry to store. 5360 * 5361 * The @mas->index and @mas->last is used to set the range for the @entry. 5362 * Note: The @mas should have pre-allocated entries to ensure there is memory to 5363 * store the entry. Please see mas_expected_entries()/mas_destroy() for more details. 5364 * 5365 * Return: the first entry between mas->index and mas->last or %NULL. 5366 */ 5367 void *mas_store(struct ma_state *mas, void *entry) 5368 { 5369 MA_WR_STATE(wr_mas, mas, entry); 5370 5371 trace_ma_write(__func__, mas, 0, entry); 5372 #ifdef CONFIG_DEBUG_MAPLE_TREE 5373 if (MAS_WARN_ON(mas, mas->index > mas->last)) 5374 pr_err("Error %lX > %lX %p\n", mas->index, mas->last, entry); 5375 5376 if (mas->index > mas->last) { 5377 mas_set_err(mas, -EINVAL); 5378 return NULL; 5379 } 5380 5381 #endif 5382 5383 /* 5384 * Storing is the same operation as insert with the added caveat that it 5385 * can overwrite entries. Although this seems simple enough, one may 5386 * want to examine what happens if a single store operation was to 5387 * overwrite multiple entries within a self-balancing B-Tree. 5388 */ 5389 mas_wr_store_setup(&wr_mas); 5390 mas_wr_store_entry(&wr_mas); 5391 return wr_mas.content; 5392 } 5393 EXPORT_SYMBOL_GPL(mas_store); 5394 5395 /** 5396 * mas_store_gfp() - Store a value into the tree. 5397 * @mas: The maple state 5398 * @entry: The entry to store 5399 * @gfp: The GFP_FLAGS to use for allocations if necessary. 5400 * 5401 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5402 * be allocated. 5403 */ 5404 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp) 5405 { 5406 MA_WR_STATE(wr_mas, mas, entry); 5407 5408 mas_wr_store_setup(&wr_mas); 5409 trace_ma_write(__func__, mas, 0, entry); 5410 retry: 5411 mas_wr_store_entry(&wr_mas); 5412 if (unlikely(mas_nomem(mas, gfp))) 5413 goto retry; 5414 5415 if (unlikely(mas_is_err(mas))) 5416 return xa_err(mas->node); 5417 5418 return 0; 5419 } 5420 EXPORT_SYMBOL_GPL(mas_store_gfp); 5421 5422 /** 5423 * mas_store_prealloc() - Store a value into the tree using memory 5424 * preallocated in the maple state. 5425 * @mas: The maple state 5426 * @entry: The entry to store. 5427 */ 5428 void mas_store_prealloc(struct ma_state *mas, void *entry) 5429 { 5430 MA_WR_STATE(wr_mas, mas, entry); 5431 5432 mas_wr_store_setup(&wr_mas); 5433 trace_ma_write(__func__, mas, 0, entry); 5434 mas_wr_store_entry(&wr_mas); 5435 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5436 mas_destroy(mas); 5437 } 5438 EXPORT_SYMBOL_GPL(mas_store_prealloc); 5439 5440 /** 5441 * mas_preallocate() - Preallocate enough nodes for a store operation 5442 * @mas: The maple state 5443 * @entry: The entry that will be stored 5444 * @gfp: The GFP_FLAGS to use for allocations. 5445 * 5446 * Return: 0 on success, -ENOMEM if memory could not be allocated. 5447 */ 5448 int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) 5449 { 5450 MA_WR_STATE(wr_mas, mas, entry); 5451 unsigned char node_size; 5452 int request = 1; 5453 int ret; 5454 5455 5456 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 5457 goto ask_now; 5458 5459 mas_wr_store_setup(&wr_mas); 5460 wr_mas.content = mas_start(mas); 5461 /* Root expand */ 5462 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 5463 goto ask_now; 5464 5465 if (unlikely(!mas_wr_walk(&wr_mas))) { 5466 /* Spanning store, use worst case for now */ 5467 request = 1 + mas_mt_height(mas) * 3; 5468 goto ask_now; 5469 } 5470 5471 /* At this point, we are at the leaf node that needs to be altered. */ 5472 /* Exact fit, no nodes needed. */ 5473 if (wr_mas.r_min == mas->index && wr_mas.r_max == mas->last) 5474 return 0; 5475 5476 mas_wr_end_piv(&wr_mas); 5477 node_size = mas_wr_new_end(&wr_mas); 5478 5479 /* Slot store, does not require additional nodes */ 5480 if (node_size == mas->end) { 5481 /* reuse node */ 5482 if (!mt_in_rcu(mas->tree)) 5483 return 0; 5484 /* shifting boundary */ 5485 if (wr_mas.offset_end - mas->offset == 1) 5486 return 0; 5487 } 5488 5489 if (node_size >= mt_slots[wr_mas.type]) { 5490 /* Split, worst case for now. */ 5491 request = 1 + mas_mt_height(mas) * 2; 5492 goto ask_now; 5493 } 5494 5495 /* New root needs a singe node */ 5496 if (unlikely(mte_is_root(mas->node))) 5497 goto ask_now; 5498 5499 /* Potential spanning rebalance collapsing a node, use worst-case */ 5500 if (node_size - 1 <= mt_min_slots[wr_mas.type]) 5501 request = mas_mt_height(mas) * 2 - 1; 5502 5503 /* node store, slot store needs one node */ 5504 ask_now: 5505 mas_node_count_gfp(mas, request, gfp); 5506 mas->mas_flags |= MA_STATE_PREALLOC; 5507 if (likely(!mas_is_err(mas))) 5508 return 0; 5509 5510 mas_set_alloc_req(mas, 0); 5511 ret = xa_err(mas->node); 5512 mas_reset(mas); 5513 mas_destroy(mas); 5514 mas_reset(mas); 5515 return ret; 5516 } 5517 EXPORT_SYMBOL_GPL(mas_preallocate); 5518 5519 /* 5520 * mas_destroy() - destroy a maple state. 5521 * @mas: The maple state 5522 * 5523 * Upon completion, check the left-most node and rebalance against the node to 5524 * the right if necessary. Frees any allocated nodes associated with this maple 5525 * state. 5526 */ 5527 void mas_destroy(struct ma_state *mas) 5528 { 5529 struct maple_alloc *node; 5530 unsigned long total; 5531 5532 /* 5533 * When using mas_for_each() to insert an expected number of elements, 5534 * it is possible that the number inserted is less than the expected 5535 * number. To fix an invalid final node, a check is performed here to 5536 * rebalance the previous node with the final node. 5537 */ 5538 if (mas->mas_flags & MA_STATE_REBALANCE) { 5539 unsigned char end; 5540 5541 mas_start(mas); 5542 mtree_range_walk(mas); 5543 end = mas->end + 1; 5544 if (end < mt_min_slot_count(mas->node) - 1) 5545 mas_destroy_rebalance(mas, end); 5546 5547 mas->mas_flags &= ~MA_STATE_REBALANCE; 5548 } 5549 mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC); 5550 5551 total = mas_allocated(mas); 5552 while (total) { 5553 node = mas->alloc; 5554 mas->alloc = node->slot[0]; 5555 if (node->node_count > 1) { 5556 size_t count = node->node_count - 1; 5557 5558 mt_free_bulk(count, (void __rcu **)&node->slot[1]); 5559 total -= count; 5560 } 5561 mt_free_one(ma_mnode_ptr(node)); 5562 total--; 5563 } 5564 5565 mas->alloc = NULL; 5566 } 5567 EXPORT_SYMBOL_GPL(mas_destroy); 5568 5569 /* 5570 * mas_expected_entries() - Set the expected number of entries that will be inserted. 5571 * @mas: The maple state 5572 * @nr_entries: The number of expected entries. 5573 * 5574 * This will attempt to pre-allocate enough nodes to store the expected number 5575 * of entries. The allocations will occur using the bulk allocator interface 5576 * for speed. Please call mas_destroy() on the @mas after inserting the entries 5577 * to ensure any unused nodes are freed. 5578 * 5579 * Return: 0 on success, -ENOMEM if memory could not be allocated. 5580 */ 5581 int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries) 5582 { 5583 int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2; 5584 struct maple_enode *enode = mas->node; 5585 int nr_nodes; 5586 int ret; 5587 5588 /* 5589 * Sometimes it is necessary to duplicate a tree to a new tree, such as 5590 * forking a process and duplicating the VMAs from one tree to a new 5591 * tree. When such a situation arises, it is known that the new tree is 5592 * not going to be used until the entire tree is populated. For 5593 * performance reasons, it is best to use a bulk load with RCU disabled. 5594 * This allows for optimistic splitting that favours the left and reuse 5595 * of nodes during the operation. 5596 */ 5597 5598 /* Optimize splitting for bulk insert in-order */ 5599 mas->mas_flags |= MA_STATE_BULK; 5600 5601 /* 5602 * Avoid overflow, assume a gap between each entry and a trailing null. 5603 * If this is wrong, it just means allocation can happen during 5604 * insertion of entries. 5605 */ 5606 nr_nodes = max(nr_entries, nr_entries * 2 + 1); 5607 if (!mt_is_alloc(mas->tree)) 5608 nonleaf_cap = MAPLE_RANGE64_SLOTS - 2; 5609 5610 /* Leaves; reduce slots to keep space for expansion */ 5611 nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2); 5612 /* Internal nodes */ 5613 nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap); 5614 /* Add working room for split (2 nodes) + new parents */ 5615 mas_node_count_gfp(mas, nr_nodes + 3, GFP_KERNEL); 5616 5617 /* Detect if allocations run out */ 5618 mas->mas_flags |= MA_STATE_PREALLOC; 5619 5620 if (!mas_is_err(mas)) 5621 return 0; 5622 5623 ret = xa_err(mas->node); 5624 mas->node = enode; 5625 mas_destroy(mas); 5626 return ret; 5627 5628 } 5629 EXPORT_SYMBOL_GPL(mas_expected_entries); 5630 5631 static bool mas_next_setup(struct ma_state *mas, unsigned long max, 5632 void **entry) 5633 { 5634 bool was_none = mas_is_none(mas); 5635 5636 if (unlikely(mas->last >= max)) { 5637 mas->status = ma_overflow; 5638 return true; 5639 } 5640 5641 switch (mas->status) { 5642 case ma_active: 5643 return false; 5644 case ma_none: 5645 fallthrough; 5646 case ma_pause: 5647 mas->status = ma_start; 5648 fallthrough; 5649 case ma_start: 5650 mas_walk(mas); /* Retries on dead nodes handled by mas_walk */ 5651 break; 5652 case ma_overflow: 5653 /* Overflowed before, but the max changed */ 5654 mas->status = ma_active; 5655 break; 5656 case ma_underflow: 5657 /* The user expects the mas to be one before where it is */ 5658 mas->status = ma_active; 5659 *entry = mas_walk(mas); 5660 if (*entry) 5661 return true; 5662 break; 5663 case ma_root: 5664 break; 5665 case ma_error: 5666 return true; 5667 } 5668 5669 if (likely(mas_is_active(mas))) /* Fast path */ 5670 return false; 5671 5672 if (mas_is_ptr(mas)) { 5673 *entry = NULL; 5674 if (was_none && mas->index == 0) { 5675 mas->index = mas->last = 0; 5676 return true; 5677 } 5678 mas->index = 1; 5679 mas->last = ULONG_MAX; 5680 mas->status = ma_none; 5681 return true; 5682 } 5683 5684 if (mas_is_none(mas)) 5685 return true; 5686 5687 return false; 5688 } 5689 5690 /** 5691 * mas_next() - Get the next entry. 5692 * @mas: The maple state 5693 * @max: The maximum index to check. 5694 * 5695 * Returns the next entry after @mas->index. 5696 * Must hold rcu_read_lock or the write lock. 5697 * Can return the zero entry. 5698 * 5699 * Return: The next entry or %NULL 5700 */ 5701 void *mas_next(struct ma_state *mas, unsigned long max) 5702 { 5703 void *entry = NULL; 5704 5705 if (mas_next_setup(mas, max, &entry)) 5706 return entry; 5707 5708 /* Retries on dead nodes handled by mas_next_slot */ 5709 return mas_next_slot(mas, max, false); 5710 } 5711 EXPORT_SYMBOL_GPL(mas_next); 5712 5713 /** 5714 * mas_next_range() - Advance the maple state to the next range 5715 * @mas: The maple state 5716 * @max: The maximum index to check. 5717 * 5718 * Sets @mas->index and @mas->last to the range. 5719 * Must hold rcu_read_lock or the write lock. 5720 * Can return the zero entry. 5721 * 5722 * Return: The next entry or %NULL 5723 */ 5724 void *mas_next_range(struct ma_state *mas, unsigned long max) 5725 { 5726 void *entry = NULL; 5727 5728 if (mas_next_setup(mas, max, &entry)) 5729 return entry; 5730 5731 /* Retries on dead nodes handled by mas_next_slot */ 5732 return mas_next_slot(mas, max, true); 5733 } 5734 EXPORT_SYMBOL_GPL(mas_next_range); 5735 5736 /** 5737 * mt_next() - get the next value in the maple tree 5738 * @mt: The maple tree 5739 * @index: The start index 5740 * @max: The maximum index to check 5741 * 5742 * Takes RCU read lock internally to protect the search, which does not 5743 * protect the returned pointer after dropping RCU read lock. 5744 * See also: Documentation/core-api/maple_tree.rst 5745 * 5746 * Return: The entry higher than @index or %NULL if nothing is found. 5747 */ 5748 void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max) 5749 { 5750 void *entry = NULL; 5751 MA_STATE(mas, mt, index, index); 5752 5753 rcu_read_lock(); 5754 entry = mas_next(&mas, max); 5755 rcu_read_unlock(); 5756 return entry; 5757 } 5758 EXPORT_SYMBOL_GPL(mt_next); 5759 5760 static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry) 5761 { 5762 if (unlikely(mas->index <= min)) { 5763 mas->status = ma_underflow; 5764 return true; 5765 } 5766 5767 switch (mas->status) { 5768 case ma_active: 5769 return false; 5770 case ma_start: 5771 break; 5772 case ma_none: 5773 fallthrough; 5774 case ma_pause: 5775 mas->status = ma_start; 5776 break; 5777 case ma_underflow: 5778 /* underflowed before but the min changed */ 5779 mas->status = ma_active; 5780 break; 5781 case ma_overflow: 5782 /* User expects mas to be one after where it is */ 5783 mas->status = ma_active; 5784 *entry = mas_walk(mas); 5785 if (*entry) 5786 return true; 5787 break; 5788 case ma_root: 5789 break; 5790 case ma_error: 5791 return true; 5792 } 5793 5794 if (mas_is_start(mas)) 5795 mas_walk(mas); 5796 5797 if (unlikely(mas_is_ptr(mas))) { 5798 if (!mas->index) { 5799 mas->status = ma_none; 5800 return true; 5801 } 5802 mas->index = mas->last = 0; 5803 *entry = mas_root(mas); 5804 return true; 5805 } 5806 5807 if (mas_is_none(mas)) { 5808 if (mas->index) { 5809 /* Walked to out-of-range pointer? */ 5810 mas->index = mas->last = 0; 5811 mas->status = ma_root; 5812 *entry = mas_root(mas); 5813 return true; 5814 } 5815 return true; 5816 } 5817 5818 return false; 5819 } 5820 5821 /** 5822 * mas_prev() - Get the previous entry 5823 * @mas: The maple state 5824 * @min: The minimum value to check. 5825 * 5826 * Must hold rcu_read_lock or the write lock. 5827 * Will reset mas to ma_start if the status is ma_none. Will stop on not 5828 * searchable nodes. 5829 * 5830 * Return: the previous value or %NULL. 5831 */ 5832 void *mas_prev(struct ma_state *mas, unsigned long min) 5833 { 5834 void *entry = NULL; 5835 5836 if (mas_prev_setup(mas, min, &entry)) 5837 return entry; 5838 5839 return mas_prev_slot(mas, min, false); 5840 } 5841 EXPORT_SYMBOL_GPL(mas_prev); 5842 5843 /** 5844 * mas_prev_range() - Advance to the previous range 5845 * @mas: The maple state 5846 * @min: The minimum value to check. 5847 * 5848 * Sets @mas->index and @mas->last to the range. 5849 * Must hold rcu_read_lock or the write lock. 5850 * Will reset mas to ma_start if the node is ma_none. Will stop on not 5851 * searchable nodes. 5852 * 5853 * Return: the previous value or %NULL. 5854 */ 5855 void *mas_prev_range(struct ma_state *mas, unsigned long min) 5856 { 5857 void *entry = NULL; 5858 5859 if (mas_prev_setup(mas, min, &entry)) 5860 return entry; 5861 5862 return mas_prev_slot(mas, min, true); 5863 } 5864 EXPORT_SYMBOL_GPL(mas_prev_range); 5865 5866 /** 5867 * mt_prev() - get the previous value in the maple tree 5868 * @mt: The maple tree 5869 * @index: The start index 5870 * @min: The minimum index to check 5871 * 5872 * Takes RCU read lock internally to protect the search, which does not 5873 * protect the returned pointer after dropping RCU read lock. 5874 * See also: Documentation/core-api/maple_tree.rst 5875 * 5876 * Return: The entry before @index or %NULL if nothing is found. 5877 */ 5878 void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min) 5879 { 5880 void *entry = NULL; 5881 MA_STATE(mas, mt, index, index); 5882 5883 rcu_read_lock(); 5884 entry = mas_prev(&mas, min); 5885 rcu_read_unlock(); 5886 return entry; 5887 } 5888 EXPORT_SYMBOL_GPL(mt_prev); 5889 5890 /** 5891 * mas_pause() - Pause a mas_find/mas_for_each to drop the lock. 5892 * @mas: The maple state to pause 5893 * 5894 * Some users need to pause a walk and drop the lock they're holding in 5895 * order to yield to a higher priority thread or carry out an operation 5896 * on an entry. Those users should call this function before they drop 5897 * the lock. It resets the @mas to be suitable for the next iteration 5898 * of the loop after the user has reacquired the lock. If most entries 5899 * found during a walk require you to call mas_pause(), the mt_for_each() 5900 * iterator may be more appropriate. 5901 * 5902 */ 5903 void mas_pause(struct ma_state *mas) 5904 { 5905 mas->status = ma_pause; 5906 mas->node = NULL; 5907 } 5908 EXPORT_SYMBOL_GPL(mas_pause); 5909 5910 /** 5911 * mas_find_setup() - Internal function to set up mas_find*(). 5912 * @mas: The maple state 5913 * @max: The maximum index 5914 * @entry: Pointer to the entry 5915 * 5916 * Returns: True if entry is the answer, false otherwise. 5917 */ 5918 static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long max, void **entry) 5919 { 5920 switch (mas->status) { 5921 case ma_active: 5922 if (mas->last < max) 5923 return false; 5924 return true; 5925 case ma_start: 5926 break; 5927 case ma_pause: 5928 if (unlikely(mas->last >= max)) 5929 return true; 5930 5931 mas->index = ++mas->last; 5932 mas->status = ma_start; 5933 break; 5934 case ma_none: 5935 if (unlikely(mas->last >= max)) 5936 return true; 5937 5938 mas->index = mas->last; 5939 mas->status = ma_start; 5940 break; 5941 case ma_underflow: 5942 /* mas is pointing at entry before unable to go lower */ 5943 if (unlikely(mas->index >= max)) { 5944 mas->status = ma_overflow; 5945 return true; 5946 } 5947 5948 mas->status = ma_active; 5949 *entry = mas_walk(mas); 5950 if (*entry) 5951 return true; 5952 break; 5953 case ma_overflow: 5954 if (unlikely(mas->last >= max)) 5955 return true; 5956 5957 mas->status = ma_active; 5958 *entry = mas_walk(mas); 5959 if (*entry) 5960 return true; 5961 break; 5962 case ma_root: 5963 break; 5964 case ma_error: 5965 return true; 5966 } 5967 5968 if (mas_is_start(mas)) { 5969 /* First run or continue */ 5970 if (mas->index > max) 5971 return true; 5972 5973 *entry = mas_walk(mas); 5974 if (*entry) 5975 return true; 5976 5977 } 5978 5979 if (unlikely(mas_is_ptr(mas))) 5980 goto ptr_out_of_range; 5981 5982 if (unlikely(mas_is_none(mas))) 5983 return true; 5984 5985 if (mas->index == max) 5986 return true; 5987 5988 return false; 5989 5990 ptr_out_of_range: 5991 mas->status = ma_none; 5992 mas->index = 1; 5993 mas->last = ULONG_MAX; 5994 return true; 5995 } 5996 5997 /** 5998 * mas_find() - On the first call, find the entry at or after mas->index up to 5999 * %max. Otherwise, find the entry after mas->index. 6000 * @mas: The maple state 6001 * @max: The maximum value to check. 6002 * 6003 * Must hold rcu_read_lock or the write lock. 6004 * If an entry exists, last and index are updated accordingly. 6005 * May set @mas->status to ma_overflow. 6006 * 6007 * Return: The entry or %NULL. 6008 */ 6009 void *mas_find(struct ma_state *mas, unsigned long max) 6010 { 6011 void *entry = NULL; 6012 6013 if (mas_find_setup(mas, max, &entry)) 6014 return entry; 6015 6016 /* Retries on dead nodes handled by mas_next_slot */ 6017 entry = mas_next_slot(mas, max, false); 6018 /* Ignore overflow */ 6019 mas->status = ma_active; 6020 return entry; 6021 } 6022 EXPORT_SYMBOL_GPL(mas_find); 6023 6024 /** 6025 * mas_find_range() - On the first call, find the entry at or after 6026 * mas->index up to %max. Otherwise, advance to the next slot mas->index. 6027 * @mas: The maple state 6028 * @max: The maximum value to check. 6029 * 6030 * Must hold rcu_read_lock or the write lock. 6031 * If an entry exists, last and index are updated accordingly. 6032 * May set @mas->status to ma_overflow. 6033 * 6034 * Return: The entry or %NULL. 6035 */ 6036 void *mas_find_range(struct ma_state *mas, unsigned long max) 6037 { 6038 void *entry = NULL; 6039 6040 if (mas_find_setup(mas, max, &entry)) 6041 return entry; 6042 6043 /* Retries on dead nodes handled by mas_next_slot */ 6044 return mas_next_slot(mas, max, true); 6045 } 6046 EXPORT_SYMBOL_GPL(mas_find_range); 6047 6048 /** 6049 * mas_find_rev_setup() - Internal function to set up mas_find_*_rev() 6050 * @mas: The maple state 6051 * @min: The minimum index 6052 * @entry: Pointer to the entry 6053 * 6054 * Returns: True if entry is the answer, false otherwise. 6055 */ 6056 static bool mas_find_rev_setup(struct ma_state *mas, unsigned long min, 6057 void **entry) 6058 { 6059 6060 switch (mas->status) { 6061 case ma_active: 6062 goto active; 6063 case ma_start: 6064 break; 6065 case ma_pause: 6066 if (unlikely(mas->index <= min)) { 6067 mas->status = ma_underflow; 6068 return true; 6069 } 6070 mas->last = --mas->index; 6071 mas->status = ma_start; 6072 break; 6073 case ma_none: 6074 if (mas->index <= min) 6075 goto none; 6076 6077 mas->last = mas->index; 6078 mas->status = ma_start; 6079 break; 6080 case ma_overflow: /* user expects the mas to be one after where it is */ 6081 if (unlikely(mas->index <= min)) { 6082 mas->status = ma_underflow; 6083 return true; 6084 } 6085 6086 mas->status = ma_active; 6087 break; 6088 case ma_underflow: /* user expects the mas to be one before where it is */ 6089 if (unlikely(mas->index <= min)) 6090 return true; 6091 6092 mas->status = ma_active; 6093 break; 6094 case ma_root: 6095 break; 6096 case ma_error: 6097 return true; 6098 } 6099 6100 if (mas_is_start(mas)) { 6101 /* First run or continue */ 6102 if (mas->index < min) 6103 return true; 6104 6105 *entry = mas_walk(mas); 6106 if (*entry) 6107 return true; 6108 } 6109 6110 if (unlikely(mas_is_ptr(mas))) 6111 goto none; 6112 6113 if (unlikely(mas_is_none(mas))) { 6114 /* 6115 * Walked to the location, and there was nothing so the previous 6116 * location is 0. 6117 */ 6118 mas->last = mas->index = 0; 6119 mas->status = ma_root; 6120 *entry = mas_root(mas); 6121 return true; 6122 } 6123 6124 active: 6125 if (mas->index < min) 6126 return true; 6127 6128 return false; 6129 6130 none: 6131 mas->status = ma_none; 6132 return true; 6133 } 6134 6135 /** 6136 * mas_find_rev: On the first call, find the first non-null entry at or below 6137 * mas->index down to %min. Otherwise find the first non-null entry below 6138 * mas->index down to %min. 6139 * @mas: The maple state 6140 * @min: The minimum value to check. 6141 * 6142 * Must hold rcu_read_lock or the write lock. 6143 * If an entry exists, last and index are updated accordingly. 6144 * May set @mas->status to ma_underflow. 6145 * 6146 * Return: The entry or %NULL. 6147 */ 6148 void *mas_find_rev(struct ma_state *mas, unsigned long min) 6149 { 6150 void *entry = NULL; 6151 6152 if (mas_find_rev_setup(mas, min, &entry)) 6153 return entry; 6154 6155 /* Retries on dead nodes handled by mas_prev_slot */ 6156 return mas_prev_slot(mas, min, false); 6157 6158 } 6159 EXPORT_SYMBOL_GPL(mas_find_rev); 6160 6161 /** 6162 * mas_find_range_rev: On the first call, find the first non-null entry at or 6163 * below mas->index down to %min. Otherwise advance to the previous slot after 6164 * mas->index down to %min. 6165 * @mas: The maple state 6166 * @min: The minimum value to check. 6167 * 6168 * Must hold rcu_read_lock or the write lock. 6169 * If an entry exists, last and index are updated accordingly. 6170 * May set @mas->status to ma_underflow. 6171 * 6172 * Return: The entry or %NULL. 6173 */ 6174 void *mas_find_range_rev(struct ma_state *mas, unsigned long min) 6175 { 6176 void *entry = NULL; 6177 6178 if (mas_find_rev_setup(mas, min, &entry)) 6179 return entry; 6180 6181 /* Retries on dead nodes handled by mas_prev_slot */ 6182 return mas_prev_slot(mas, min, true); 6183 } 6184 EXPORT_SYMBOL_GPL(mas_find_range_rev); 6185 6186 /** 6187 * mas_erase() - Find the range in which index resides and erase the entire 6188 * range. 6189 * @mas: The maple state 6190 * 6191 * Must hold the write lock. 6192 * Searches for @mas->index, sets @mas->index and @mas->last to the range and 6193 * erases that range. 6194 * 6195 * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated. 6196 */ 6197 void *mas_erase(struct ma_state *mas) 6198 { 6199 void *entry; 6200 MA_WR_STATE(wr_mas, mas, NULL); 6201 6202 if (!mas_is_active(mas) || !mas_is_start(mas)) 6203 mas->status = ma_start; 6204 6205 /* Retry unnecessary when holding the write lock. */ 6206 entry = mas_state_walk(mas); 6207 if (!entry) 6208 return NULL; 6209 6210 write_retry: 6211 /* Must reset to ensure spanning writes of last slot are detected */ 6212 mas_reset(mas); 6213 mas_wr_store_setup(&wr_mas); 6214 mas_wr_store_entry(&wr_mas); 6215 if (mas_nomem(mas, GFP_KERNEL)) 6216 goto write_retry; 6217 6218 return entry; 6219 } 6220 EXPORT_SYMBOL_GPL(mas_erase); 6221 6222 /** 6223 * mas_nomem() - Check if there was an error allocating and do the allocation 6224 * if necessary If there are allocations, then free them. 6225 * @mas: The maple state 6226 * @gfp: The GFP_FLAGS to use for allocations 6227 * Return: true on allocation, false otherwise. 6228 */ 6229 bool mas_nomem(struct ma_state *mas, gfp_t gfp) 6230 __must_hold(mas->tree->ma_lock) 6231 { 6232 if (likely(mas->node != MA_ERROR(-ENOMEM))) { 6233 mas_destroy(mas); 6234 return false; 6235 } 6236 6237 if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) { 6238 mtree_unlock(mas->tree); 6239 mas_alloc_nodes(mas, gfp); 6240 mtree_lock(mas->tree); 6241 } else { 6242 mas_alloc_nodes(mas, gfp); 6243 } 6244 6245 if (!mas_allocated(mas)) 6246 return false; 6247 6248 mas->status = ma_start; 6249 return true; 6250 } 6251 6252 void __init maple_tree_init(void) 6253 { 6254 maple_node_cache = kmem_cache_create("maple_node", 6255 sizeof(struct maple_node), sizeof(struct maple_node), 6256 SLAB_PANIC, NULL); 6257 } 6258 6259 /** 6260 * mtree_load() - Load a value stored in a maple tree 6261 * @mt: The maple tree 6262 * @index: The index to load 6263 * 6264 * Return: the entry or %NULL 6265 */ 6266 void *mtree_load(struct maple_tree *mt, unsigned long index) 6267 { 6268 MA_STATE(mas, mt, index, index); 6269 void *entry; 6270 6271 trace_ma_read(__func__, &mas); 6272 rcu_read_lock(); 6273 retry: 6274 entry = mas_start(&mas); 6275 if (unlikely(mas_is_none(&mas))) 6276 goto unlock; 6277 6278 if (unlikely(mas_is_ptr(&mas))) { 6279 if (index) 6280 entry = NULL; 6281 6282 goto unlock; 6283 } 6284 6285 entry = mtree_lookup_walk(&mas); 6286 if (!entry && unlikely(mas_is_start(&mas))) 6287 goto retry; 6288 unlock: 6289 rcu_read_unlock(); 6290 if (xa_is_zero(entry)) 6291 return NULL; 6292 6293 return entry; 6294 } 6295 EXPORT_SYMBOL(mtree_load); 6296 6297 /** 6298 * mtree_store_range() - Store an entry at a given range. 6299 * @mt: The maple tree 6300 * @index: The start of the range 6301 * @last: The end of the range 6302 * @entry: The entry to store 6303 * @gfp: The GFP_FLAGS to use for allocations 6304 * 6305 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 6306 * be allocated. 6307 */ 6308 int mtree_store_range(struct maple_tree *mt, unsigned long index, 6309 unsigned long last, void *entry, gfp_t gfp) 6310 { 6311 MA_STATE(mas, mt, index, last); 6312 MA_WR_STATE(wr_mas, &mas, entry); 6313 6314 trace_ma_write(__func__, &mas, 0, entry); 6315 if (WARN_ON_ONCE(xa_is_advanced(entry))) 6316 return -EINVAL; 6317 6318 if (index > last) 6319 return -EINVAL; 6320 6321 mtree_lock(mt); 6322 retry: 6323 mas_wr_store_entry(&wr_mas); 6324 if (mas_nomem(&mas, gfp)) 6325 goto retry; 6326 6327 mtree_unlock(mt); 6328 if (mas_is_err(&mas)) 6329 return xa_err(mas.node); 6330 6331 return 0; 6332 } 6333 EXPORT_SYMBOL(mtree_store_range); 6334 6335 /** 6336 * mtree_store() - Store an entry at a given index. 6337 * @mt: The maple tree 6338 * @index: The index to store the value 6339 * @entry: The entry to store 6340 * @gfp: The GFP_FLAGS to use for allocations 6341 * 6342 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 6343 * be allocated. 6344 */ 6345 int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, 6346 gfp_t gfp) 6347 { 6348 return mtree_store_range(mt, index, index, entry, gfp); 6349 } 6350 EXPORT_SYMBOL(mtree_store); 6351 6352 /** 6353 * mtree_insert_range() - Insert an entry at a given range if there is no value. 6354 * @mt: The maple tree 6355 * @first: The start of the range 6356 * @last: The end of the range 6357 * @entry: The entry to store 6358 * @gfp: The GFP_FLAGS to use for allocations. 6359 * 6360 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 6361 * request, -ENOMEM if memory could not be allocated. 6362 */ 6363 int mtree_insert_range(struct maple_tree *mt, unsigned long first, 6364 unsigned long last, void *entry, gfp_t gfp) 6365 { 6366 MA_STATE(ms, mt, first, last); 6367 6368 if (WARN_ON_ONCE(xa_is_advanced(entry))) 6369 return -EINVAL; 6370 6371 if (first > last) 6372 return -EINVAL; 6373 6374 mtree_lock(mt); 6375 retry: 6376 mas_insert(&ms, entry); 6377 if (mas_nomem(&ms, gfp)) 6378 goto retry; 6379 6380 mtree_unlock(mt); 6381 if (mas_is_err(&ms)) 6382 return xa_err(ms.node); 6383 6384 return 0; 6385 } 6386 EXPORT_SYMBOL(mtree_insert_range); 6387 6388 /** 6389 * mtree_insert() - Insert an entry at a given index if there is no value. 6390 * @mt: The maple tree 6391 * @index : The index to store the value 6392 * @entry: The entry to store 6393 * @gfp: The GFP_FLAGS to use for allocations. 6394 * 6395 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 6396 * request, -ENOMEM if memory could not be allocated. 6397 */ 6398 int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry, 6399 gfp_t gfp) 6400 { 6401 return mtree_insert_range(mt, index, index, entry, gfp); 6402 } 6403 EXPORT_SYMBOL(mtree_insert); 6404 6405 int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, 6406 void *entry, unsigned long size, unsigned long min, 6407 unsigned long max, gfp_t gfp) 6408 { 6409 int ret = 0; 6410 6411 MA_STATE(mas, mt, 0, 0); 6412 if (!mt_is_alloc(mt)) 6413 return -EINVAL; 6414 6415 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6416 return -EINVAL; 6417 6418 mtree_lock(mt); 6419 retry: 6420 ret = mas_empty_area(&mas, min, max, size); 6421 if (ret) 6422 goto unlock; 6423 6424 mas_insert(&mas, entry); 6425 /* 6426 * mas_nomem() may release the lock, causing the allocated area 6427 * to be unavailable, so try to allocate a free area again. 6428 */ 6429 if (mas_nomem(&mas, gfp)) 6430 goto retry; 6431 6432 if (mas_is_err(&mas)) 6433 ret = xa_err(mas.node); 6434 else 6435 *startp = mas.index; 6436 6437 unlock: 6438 mtree_unlock(mt); 6439 return ret; 6440 } 6441 EXPORT_SYMBOL(mtree_alloc_range); 6442 6443 int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, 6444 void *entry, unsigned long size, unsigned long min, 6445 unsigned long max, gfp_t gfp) 6446 { 6447 int ret = 0; 6448 6449 MA_STATE(mas, mt, 0, 0); 6450 if (!mt_is_alloc(mt)) 6451 return -EINVAL; 6452 6453 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6454 return -EINVAL; 6455 6456 mtree_lock(mt); 6457 retry: 6458 ret = mas_empty_area_rev(&mas, min, max, size); 6459 if (ret) 6460 goto unlock; 6461 6462 mas_insert(&mas, entry); 6463 /* 6464 * mas_nomem() may release the lock, causing the allocated area 6465 * to be unavailable, so try to allocate a free area again. 6466 */ 6467 if (mas_nomem(&mas, gfp)) 6468 goto retry; 6469 6470 if (mas_is_err(&mas)) 6471 ret = xa_err(mas.node); 6472 else 6473 *startp = mas.index; 6474 6475 unlock: 6476 mtree_unlock(mt); 6477 return ret; 6478 } 6479 EXPORT_SYMBOL(mtree_alloc_rrange); 6480 6481 /** 6482 * mtree_erase() - Find an index and erase the entire range. 6483 * @mt: The maple tree 6484 * @index: The index to erase 6485 * 6486 * Erasing is the same as a walk to an entry then a store of a NULL to that 6487 * ENTIRE range. In fact, it is implemented as such using the advanced API. 6488 * 6489 * Return: The entry stored at the @index or %NULL 6490 */ 6491 void *mtree_erase(struct maple_tree *mt, unsigned long index) 6492 { 6493 void *entry = NULL; 6494 6495 MA_STATE(mas, mt, index, index); 6496 trace_ma_op(__func__, &mas); 6497 6498 mtree_lock(mt); 6499 entry = mas_erase(&mas); 6500 mtree_unlock(mt); 6501 6502 return entry; 6503 } 6504 EXPORT_SYMBOL(mtree_erase); 6505 6506 /* 6507 * mas_dup_free() - Free an incomplete duplication of a tree. 6508 * @mas: The maple state of a incomplete tree. 6509 * 6510 * The parameter @mas->node passed in indicates that the allocation failed on 6511 * this node. This function frees all nodes starting from @mas->node in the 6512 * reverse order of mas_dup_build(). There is no need to hold the source tree 6513 * lock at this time. 6514 */ 6515 static void mas_dup_free(struct ma_state *mas) 6516 { 6517 struct maple_node *node; 6518 enum maple_type type; 6519 void __rcu **slots; 6520 unsigned char count, i; 6521 6522 /* Maybe the first node allocation failed. */ 6523 if (mas_is_none(mas)) 6524 return; 6525 6526 while (!mte_is_root(mas->node)) { 6527 mas_ascend(mas); 6528 if (mas->offset) { 6529 mas->offset--; 6530 do { 6531 mas_descend(mas); 6532 mas->offset = mas_data_end(mas); 6533 } while (!mte_is_leaf(mas->node)); 6534 6535 mas_ascend(mas); 6536 } 6537 6538 node = mte_to_node(mas->node); 6539 type = mte_node_type(mas->node); 6540 slots = ma_slots(node, type); 6541 count = mas_data_end(mas) + 1; 6542 for (i = 0; i < count; i++) 6543 ((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK; 6544 mt_free_bulk(count, slots); 6545 } 6546 6547 node = mte_to_node(mas->node); 6548 mt_free_one(node); 6549 } 6550 6551 /* 6552 * mas_copy_node() - Copy a maple node and replace the parent. 6553 * @mas: The maple state of source tree. 6554 * @new_mas: The maple state of new tree. 6555 * @parent: The parent of the new node. 6556 * 6557 * Copy @mas->node to @new_mas->node, set @parent to be the parent of 6558 * @new_mas->node. If memory allocation fails, @mas is set to -ENOMEM. 6559 */ 6560 static inline void mas_copy_node(struct ma_state *mas, struct ma_state *new_mas, 6561 struct maple_pnode *parent) 6562 { 6563 struct maple_node *node = mte_to_node(mas->node); 6564 struct maple_node *new_node = mte_to_node(new_mas->node); 6565 unsigned long val; 6566 6567 /* Copy the node completely. */ 6568 memcpy(new_node, node, sizeof(struct maple_node)); 6569 /* Update the parent node pointer. */ 6570 val = (unsigned long)node->parent & MAPLE_NODE_MASK; 6571 new_node->parent = ma_parent_ptr(val | (unsigned long)parent); 6572 } 6573 6574 /* 6575 * mas_dup_alloc() - Allocate child nodes for a maple node. 6576 * @mas: The maple state of source tree. 6577 * @new_mas: The maple state of new tree. 6578 * @gfp: The GFP_FLAGS to use for allocations. 6579 * 6580 * This function allocates child nodes for @new_mas->node during the duplication 6581 * process. If memory allocation fails, @mas is set to -ENOMEM. 6582 */ 6583 static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas, 6584 gfp_t gfp) 6585 { 6586 struct maple_node *node = mte_to_node(mas->node); 6587 struct maple_node *new_node = mte_to_node(new_mas->node); 6588 enum maple_type type; 6589 unsigned char request, count, i; 6590 void __rcu **slots; 6591 void __rcu **new_slots; 6592 unsigned long val; 6593 6594 /* Allocate memory for child nodes. */ 6595 type = mte_node_type(mas->node); 6596 new_slots = ma_slots(new_node, type); 6597 request = mas_data_end(mas) + 1; 6598 count = mt_alloc_bulk(gfp, request, (void **)new_slots); 6599 if (unlikely(count < request)) { 6600 memset(new_slots, 0, request * sizeof(void *)); 6601 mas_set_err(mas, -ENOMEM); 6602 return; 6603 } 6604 6605 /* Restore node type information in slots. */ 6606 slots = ma_slots(node, type); 6607 for (i = 0; i < count; i++) { 6608 val = (unsigned long)mt_slot_locked(mas->tree, slots, i); 6609 val &= MAPLE_NODE_MASK; 6610 ((unsigned long *)new_slots)[i] |= val; 6611 } 6612 } 6613 6614 /* 6615 * mas_dup_build() - Build a new maple tree from a source tree 6616 * @mas: The maple state of source tree, need to be in MAS_START state. 6617 * @new_mas: The maple state of new tree, need to be in MAS_START state. 6618 * @gfp: The GFP_FLAGS to use for allocations. 6619 * 6620 * This function builds a new tree in DFS preorder. If the memory allocation 6621 * fails, the error code -ENOMEM will be set in @mas, and @new_mas points to the 6622 * last node. mas_dup_free() will free the incomplete duplication of a tree. 6623 * 6624 * Note that the attributes of the two trees need to be exactly the same, and the 6625 * new tree needs to be empty, otherwise -EINVAL will be set in @mas. 6626 */ 6627 static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas, 6628 gfp_t gfp) 6629 { 6630 struct maple_node *node; 6631 struct maple_pnode *parent = NULL; 6632 struct maple_enode *root; 6633 enum maple_type type; 6634 6635 if (unlikely(mt_attr(mas->tree) != mt_attr(new_mas->tree)) || 6636 unlikely(!mtree_empty(new_mas->tree))) { 6637 mas_set_err(mas, -EINVAL); 6638 return; 6639 } 6640 6641 root = mas_start(mas); 6642 if (mas_is_ptr(mas) || mas_is_none(mas)) 6643 goto set_new_tree; 6644 6645 node = mt_alloc_one(gfp); 6646 if (!node) { 6647 new_mas->status = ma_none; 6648 mas_set_err(mas, -ENOMEM); 6649 return; 6650 } 6651 6652 type = mte_node_type(mas->node); 6653 root = mt_mk_node(node, type); 6654 new_mas->node = root; 6655 new_mas->min = 0; 6656 new_mas->max = ULONG_MAX; 6657 root = mte_mk_root(root); 6658 while (1) { 6659 mas_copy_node(mas, new_mas, parent); 6660 if (!mte_is_leaf(mas->node)) { 6661 /* Only allocate child nodes for non-leaf nodes. */ 6662 mas_dup_alloc(mas, new_mas, gfp); 6663 if (unlikely(mas_is_err(mas))) 6664 return; 6665 } else { 6666 /* 6667 * This is the last leaf node and duplication is 6668 * completed. 6669 */ 6670 if (mas->max == ULONG_MAX) 6671 goto done; 6672 6673 /* This is not the last leaf node and needs to go up. */ 6674 do { 6675 mas_ascend(mas); 6676 mas_ascend(new_mas); 6677 } while (mas->offset == mas_data_end(mas)); 6678 6679 /* Move to the next subtree. */ 6680 mas->offset++; 6681 new_mas->offset++; 6682 } 6683 6684 mas_descend(mas); 6685 parent = ma_parent_ptr(mte_to_node(new_mas->node)); 6686 mas_descend(new_mas); 6687 mas->offset = 0; 6688 new_mas->offset = 0; 6689 } 6690 done: 6691 /* Specially handle the parent of the root node. */ 6692 mte_to_node(root)->parent = ma_parent_ptr(mas_tree_parent(new_mas)); 6693 set_new_tree: 6694 /* Make them the same height */ 6695 new_mas->tree->ma_flags = mas->tree->ma_flags; 6696 rcu_assign_pointer(new_mas->tree->ma_root, root); 6697 } 6698 6699 /** 6700 * __mt_dup(): Duplicate an entire maple tree 6701 * @mt: The source maple tree 6702 * @new: The new maple tree 6703 * @gfp: The GFP_FLAGS to use for allocations 6704 * 6705 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order 6706 * traversal. It uses memcpy() to copy nodes in the source tree and allocate 6707 * new child nodes in non-leaf nodes. The new node is exactly the same as the 6708 * source node except for all the addresses stored in it. It will be faster than 6709 * traversing all elements in the source tree and inserting them one by one into 6710 * the new tree. 6711 * The user needs to ensure that the attributes of the source tree and the new 6712 * tree are the same, and the new tree needs to be an empty tree, otherwise 6713 * -EINVAL will be returned. 6714 * Note that the user needs to manually lock the source tree and the new tree. 6715 * 6716 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If 6717 * the attributes of the two trees are different or the new tree is not an empty 6718 * tree. 6719 */ 6720 int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp) 6721 { 6722 int ret = 0; 6723 MA_STATE(mas, mt, 0, 0); 6724 MA_STATE(new_mas, new, 0, 0); 6725 6726 mas_dup_build(&mas, &new_mas, gfp); 6727 if (unlikely(mas_is_err(&mas))) { 6728 ret = xa_err(mas.node); 6729 if (ret == -ENOMEM) 6730 mas_dup_free(&new_mas); 6731 } 6732 6733 return ret; 6734 } 6735 EXPORT_SYMBOL(__mt_dup); 6736 6737 /** 6738 * mtree_dup(): Duplicate an entire maple tree 6739 * @mt: The source maple tree 6740 * @new: The new maple tree 6741 * @gfp: The GFP_FLAGS to use for allocations 6742 * 6743 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order 6744 * traversal. It uses memcpy() to copy nodes in the source tree and allocate 6745 * new child nodes in non-leaf nodes. The new node is exactly the same as the 6746 * source node except for all the addresses stored in it. It will be faster than 6747 * traversing all elements in the source tree and inserting them one by one into 6748 * the new tree. 6749 * The user needs to ensure that the attributes of the source tree and the new 6750 * tree are the same, and the new tree needs to be an empty tree, otherwise 6751 * -EINVAL will be returned. 6752 * 6753 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If 6754 * the attributes of the two trees are different or the new tree is not an empty 6755 * tree. 6756 */ 6757 int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp) 6758 { 6759 int ret = 0; 6760 MA_STATE(mas, mt, 0, 0); 6761 MA_STATE(new_mas, new, 0, 0); 6762 6763 mas_lock(&new_mas); 6764 mas_lock_nested(&mas, SINGLE_DEPTH_NESTING); 6765 mas_dup_build(&mas, &new_mas, gfp); 6766 mas_unlock(&mas); 6767 if (unlikely(mas_is_err(&mas))) { 6768 ret = xa_err(mas.node); 6769 if (ret == -ENOMEM) 6770 mas_dup_free(&new_mas); 6771 } 6772 6773 mas_unlock(&new_mas); 6774 return ret; 6775 } 6776 EXPORT_SYMBOL(mtree_dup); 6777 6778 /** 6779 * __mt_destroy() - Walk and free all nodes of a locked maple tree. 6780 * @mt: The maple tree 6781 * 6782 * Note: Does not handle locking. 6783 */ 6784 void __mt_destroy(struct maple_tree *mt) 6785 { 6786 void *root = mt_root_locked(mt); 6787 6788 rcu_assign_pointer(mt->ma_root, NULL); 6789 if (xa_is_node(root)) 6790 mte_destroy_walk(root, mt); 6791 6792 mt->ma_flags = mt_attr(mt); 6793 } 6794 EXPORT_SYMBOL_GPL(__mt_destroy); 6795 6796 /** 6797 * mtree_destroy() - Destroy a maple tree 6798 * @mt: The maple tree 6799 * 6800 * Frees all resources used by the tree. Handles locking. 6801 */ 6802 void mtree_destroy(struct maple_tree *mt) 6803 { 6804 mtree_lock(mt); 6805 __mt_destroy(mt); 6806 mtree_unlock(mt); 6807 } 6808 EXPORT_SYMBOL(mtree_destroy); 6809 6810 /** 6811 * mt_find() - Search from the start up until an entry is found. 6812 * @mt: The maple tree 6813 * @index: Pointer which contains the start location of the search 6814 * @max: The maximum value of the search range 6815 * 6816 * Takes RCU read lock internally to protect the search, which does not 6817 * protect the returned pointer after dropping RCU read lock. 6818 * See also: Documentation/core-api/maple_tree.rst 6819 * 6820 * In case that an entry is found @index is updated to point to the next 6821 * possible entry independent whether the found entry is occupying a 6822 * single index or a range if indices. 6823 * 6824 * Return: The entry at or after the @index or %NULL 6825 */ 6826 void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max) 6827 { 6828 MA_STATE(mas, mt, *index, *index); 6829 void *entry; 6830 #ifdef CONFIG_DEBUG_MAPLE_TREE 6831 unsigned long copy = *index; 6832 #endif 6833 6834 trace_ma_read(__func__, &mas); 6835 6836 if ((*index) > max) 6837 return NULL; 6838 6839 rcu_read_lock(); 6840 retry: 6841 entry = mas_state_walk(&mas); 6842 if (mas_is_start(&mas)) 6843 goto retry; 6844 6845 if (unlikely(xa_is_zero(entry))) 6846 entry = NULL; 6847 6848 if (entry) 6849 goto unlock; 6850 6851 while (mas_is_active(&mas) && (mas.last < max)) { 6852 entry = mas_next_entry(&mas, max); 6853 if (likely(entry && !xa_is_zero(entry))) 6854 break; 6855 } 6856 6857 if (unlikely(xa_is_zero(entry))) 6858 entry = NULL; 6859 unlock: 6860 rcu_read_unlock(); 6861 if (likely(entry)) { 6862 *index = mas.last + 1; 6863 #ifdef CONFIG_DEBUG_MAPLE_TREE 6864 if (MT_WARN_ON(mt, (*index) && ((*index) <= copy))) 6865 pr_err("index not increased! %lx <= %lx\n", 6866 *index, copy); 6867 #endif 6868 } 6869 6870 return entry; 6871 } 6872 EXPORT_SYMBOL(mt_find); 6873 6874 /** 6875 * mt_find_after() - Search from the start up until an entry is found. 6876 * @mt: The maple tree 6877 * @index: Pointer which contains the start location of the search 6878 * @max: The maximum value to check 6879 * 6880 * Same as mt_find() except that it checks @index for 0 before 6881 * searching. If @index == 0, the search is aborted. This covers a wrap 6882 * around of @index to 0 in an iterator loop. 6883 * 6884 * Return: The entry at or after the @index or %NULL 6885 */ 6886 void *mt_find_after(struct maple_tree *mt, unsigned long *index, 6887 unsigned long max) 6888 { 6889 if (!(*index)) 6890 return NULL; 6891 6892 return mt_find(mt, index, max); 6893 } 6894 EXPORT_SYMBOL(mt_find_after); 6895 6896 #ifdef CONFIG_DEBUG_MAPLE_TREE 6897 atomic_t maple_tree_tests_run; 6898 EXPORT_SYMBOL_GPL(maple_tree_tests_run); 6899 atomic_t maple_tree_tests_passed; 6900 EXPORT_SYMBOL_GPL(maple_tree_tests_passed); 6901 6902 #ifndef __KERNEL__ 6903 extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int); 6904 void mt_set_non_kernel(unsigned int val) 6905 { 6906 kmem_cache_set_non_kernel(maple_node_cache, val); 6907 } 6908 6909 extern unsigned long kmem_cache_get_alloc(struct kmem_cache *); 6910 unsigned long mt_get_alloc_size(void) 6911 { 6912 return kmem_cache_get_alloc(maple_node_cache); 6913 } 6914 6915 extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *); 6916 void mt_zero_nr_tallocated(void) 6917 { 6918 kmem_cache_zero_nr_tallocated(maple_node_cache); 6919 } 6920 6921 extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *); 6922 unsigned int mt_nr_tallocated(void) 6923 { 6924 return kmem_cache_nr_tallocated(maple_node_cache); 6925 } 6926 6927 extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *); 6928 unsigned int mt_nr_allocated(void) 6929 { 6930 return kmem_cache_nr_allocated(maple_node_cache); 6931 } 6932 6933 void mt_cache_shrink(void) 6934 { 6935 } 6936 #else 6937 /* 6938 * mt_cache_shrink() - For testing, don't use this. 6939 * 6940 * Certain testcases can trigger an OOM when combined with other memory 6941 * debugging configuration options. This function is used to reduce the 6942 * possibility of an out of memory even due to kmem_cache objects remaining 6943 * around for longer than usual. 6944 */ 6945 void mt_cache_shrink(void) 6946 { 6947 kmem_cache_shrink(maple_node_cache); 6948 6949 } 6950 EXPORT_SYMBOL_GPL(mt_cache_shrink); 6951 6952 #endif /* not defined __KERNEL__ */ 6953 /* 6954 * mas_get_slot() - Get the entry in the maple state node stored at @offset. 6955 * @mas: The maple state 6956 * @offset: The offset into the slot array to fetch. 6957 * 6958 * Return: The entry stored at @offset. 6959 */ 6960 static inline struct maple_enode *mas_get_slot(struct ma_state *mas, 6961 unsigned char offset) 6962 { 6963 return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)), 6964 offset); 6965 } 6966 6967 /* Depth first search, post-order */ 6968 static void mas_dfs_postorder(struct ma_state *mas, unsigned long max) 6969 { 6970 6971 struct maple_enode *p, *mn = mas->node; 6972 unsigned long p_min, p_max; 6973 6974 mas_next_node(mas, mas_mn(mas), max); 6975 if (!mas_is_overflow(mas)) 6976 return; 6977 6978 if (mte_is_root(mn)) 6979 return; 6980 6981 mas->node = mn; 6982 mas_ascend(mas); 6983 do { 6984 p = mas->node; 6985 p_min = mas->min; 6986 p_max = mas->max; 6987 mas_prev_node(mas, 0); 6988 } while (!mas_is_underflow(mas)); 6989 6990 mas->node = p; 6991 mas->max = p_max; 6992 mas->min = p_min; 6993 } 6994 6995 /* Tree validations */ 6996 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6997 unsigned long min, unsigned long max, unsigned int depth, 6998 enum mt_dump_format format); 6999 static void mt_dump_range(unsigned long min, unsigned long max, 7000 unsigned int depth, enum mt_dump_format format) 7001 { 7002 static const char spaces[] = " "; 7003 7004 switch(format) { 7005 case mt_dump_hex: 7006 if (min == max) 7007 pr_info("%.*s%lx: ", depth * 2, spaces, min); 7008 else 7009 pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max); 7010 break; 7011 case mt_dump_dec: 7012 if (min == max) 7013 pr_info("%.*s%lu: ", depth * 2, spaces, min); 7014 else 7015 pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max); 7016 } 7017 } 7018 7019 static void mt_dump_entry(void *entry, unsigned long min, unsigned long max, 7020 unsigned int depth, enum mt_dump_format format) 7021 { 7022 mt_dump_range(min, max, depth, format); 7023 7024 if (xa_is_value(entry)) 7025 pr_cont("value %ld (0x%lx) [%p]\n", xa_to_value(entry), 7026 xa_to_value(entry), entry); 7027 else if (xa_is_zero(entry)) 7028 pr_cont("zero (%ld)\n", xa_to_internal(entry)); 7029 else if (mt_is_reserved(entry)) 7030 pr_cont("UNKNOWN ENTRY (%p)\n", entry); 7031 else 7032 pr_cont("%p\n", entry); 7033 } 7034 7035 static void mt_dump_range64(const struct maple_tree *mt, void *entry, 7036 unsigned long min, unsigned long max, unsigned int depth, 7037 enum mt_dump_format format) 7038 { 7039 struct maple_range_64 *node = &mte_to_node(entry)->mr64; 7040 bool leaf = mte_is_leaf(entry); 7041 unsigned long first = min; 7042 int i; 7043 7044 pr_cont(" contents: "); 7045 for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) { 7046 switch(format) { 7047 case mt_dump_hex: 7048 pr_cont("%p %lX ", node->slot[i], node->pivot[i]); 7049 break; 7050 case mt_dump_dec: 7051 pr_cont("%p %lu ", node->slot[i], node->pivot[i]); 7052 } 7053 } 7054 pr_cont("%p\n", node->slot[i]); 7055 for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) { 7056 unsigned long last = max; 7057 7058 if (i < (MAPLE_RANGE64_SLOTS - 1)) 7059 last = node->pivot[i]; 7060 else if (!node->slot[i] && max != mt_node_max(entry)) 7061 break; 7062 if (last == 0 && i > 0) 7063 break; 7064 if (leaf) 7065 mt_dump_entry(mt_slot(mt, node->slot, i), 7066 first, last, depth + 1, format); 7067 else if (node->slot[i]) 7068 mt_dump_node(mt, mt_slot(mt, node->slot, i), 7069 first, last, depth + 1, format); 7070 7071 if (last == max) 7072 break; 7073 if (last > max) { 7074 switch(format) { 7075 case mt_dump_hex: 7076 pr_err("node %p last (%lx) > max (%lx) at pivot %d!\n", 7077 node, last, max, i); 7078 break; 7079 case mt_dump_dec: 7080 pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n", 7081 node, last, max, i); 7082 } 7083 } 7084 first = last + 1; 7085 } 7086 } 7087 7088 static void mt_dump_arange64(const struct maple_tree *mt, void *entry, 7089 unsigned long min, unsigned long max, unsigned int depth, 7090 enum mt_dump_format format) 7091 { 7092 struct maple_arange_64 *node = &mte_to_node(entry)->ma64; 7093 bool leaf = mte_is_leaf(entry); 7094 unsigned long first = min; 7095 int i; 7096 7097 pr_cont(" contents: "); 7098 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 7099 switch (format) { 7100 case mt_dump_hex: 7101 pr_cont("%lx ", node->gap[i]); 7102 break; 7103 case mt_dump_dec: 7104 pr_cont("%lu ", node->gap[i]); 7105 } 7106 } 7107 pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap); 7108 for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) { 7109 switch (format) { 7110 case mt_dump_hex: 7111 pr_cont("%p %lX ", node->slot[i], node->pivot[i]); 7112 break; 7113 case mt_dump_dec: 7114 pr_cont("%p %lu ", node->slot[i], node->pivot[i]); 7115 } 7116 } 7117 pr_cont("%p\n", node->slot[i]); 7118 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 7119 unsigned long last = max; 7120 7121 if (i < (MAPLE_ARANGE64_SLOTS - 1)) 7122 last = node->pivot[i]; 7123 else if (!node->slot[i]) 7124 break; 7125 if (last == 0 && i > 0) 7126 break; 7127 if (leaf) 7128 mt_dump_entry(mt_slot(mt, node->slot, i), 7129 first, last, depth + 1, format); 7130 else if (node->slot[i]) 7131 mt_dump_node(mt, mt_slot(mt, node->slot, i), 7132 first, last, depth + 1, format); 7133 7134 if (last == max) 7135 break; 7136 if (last > max) { 7137 pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n", 7138 node, last, max, i); 7139 break; 7140 } 7141 first = last + 1; 7142 } 7143 } 7144 7145 static void mt_dump_node(const struct maple_tree *mt, void *entry, 7146 unsigned long min, unsigned long max, unsigned int depth, 7147 enum mt_dump_format format) 7148 { 7149 struct maple_node *node = mte_to_node(entry); 7150 unsigned int type = mte_node_type(entry); 7151 unsigned int i; 7152 7153 mt_dump_range(min, max, depth, format); 7154 7155 pr_cont("node %p depth %d type %d parent %p", node, depth, type, 7156 node ? node->parent : NULL); 7157 switch (type) { 7158 case maple_dense: 7159 pr_cont("\n"); 7160 for (i = 0; i < MAPLE_NODE_SLOTS; i++) { 7161 if (min + i > max) 7162 pr_cont("OUT OF RANGE: "); 7163 mt_dump_entry(mt_slot(mt, node->slot, i), 7164 min + i, min + i, depth, format); 7165 } 7166 break; 7167 case maple_leaf_64: 7168 case maple_range_64: 7169 mt_dump_range64(mt, entry, min, max, depth, format); 7170 break; 7171 case maple_arange_64: 7172 mt_dump_arange64(mt, entry, min, max, depth, format); 7173 break; 7174 7175 default: 7176 pr_cont(" UNKNOWN TYPE\n"); 7177 } 7178 } 7179 7180 void mt_dump(const struct maple_tree *mt, enum mt_dump_format format) 7181 { 7182 void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt)); 7183 7184 pr_info("maple_tree(%p) flags %X, height %u root %p\n", 7185 mt, mt->ma_flags, mt_height(mt), entry); 7186 if (!xa_is_node(entry)) 7187 mt_dump_entry(entry, 0, 0, 0, format); 7188 else if (entry) 7189 mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format); 7190 } 7191 EXPORT_SYMBOL_GPL(mt_dump); 7192 7193 /* 7194 * Calculate the maximum gap in a node and check if that's what is reported in 7195 * the parent (unless root). 7196 */ 7197 static void mas_validate_gaps(struct ma_state *mas) 7198 { 7199 struct maple_enode *mte = mas->node; 7200 struct maple_node *p_mn, *node = mte_to_node(mte); 7201 enum maple_type mt = mte_node_type(mas->node); 7202 unsigned long gap = 0, max_gap = 0; 7203 unsigned long p_end, p_start = mas->min; 7204 unsigned char p_slot, offset; 7205 unsigned long *gaps = NULL; 7206 unsigned long *pivots = ma_pivots(node, mt); 7207 unsigned int i; 7208 7209 if (ma_is_dense(mt)) { 7210 for (i = 0; i < mt_slot_count(mte); i++) { 7211 if (mas_get_slot(mas, i)) { 7212 if (gap > max_gap) 7213 max_gap = gap; 7214 gap = 0; 7215 continue; 7216 } 7217 gap++; 7218 } 7219 goto counted; 7220 } 7221 7222 gaps = ma_gaps(node, mt); 7223 for (i = 0; i < mt_slot_count(mte); i++) { 7224 p_end = mas_safe_pivot(mas, pivots, i, mt); 7225 7226 if (!gaps) { 7227 if (!mas_get_slot(mas, i)) 7228 gap = p_end - p_start + 1; 7229 } else { 7230 void *entry = mas_get_slot(mas, i); 7231 7232 gap = gaps[i]; 7233 MT_BUG_ON(mas->tree, !entry); 7234 7235 if (gap > p_end - p_start + 1) { 7236 pr_err("%p[%u] %lu >= %lu - %lu + 1 (%lu)\n", 7237 mas_mn(mas), i, gap, p_end, p_start, 7238 p_end - p_start + 1); 7239 MT_BUG_ON(mas->tree, gap > p_end - p_start + 1); 7240 } 7241 } 7242 7243 if (gap > max_gap) 7244 max_gap = gap; 7245 7246 p_start = p_end + 1; 7247 if (p_end >= mas->max) 7248 break; 7249 } 7250 7251 counted: 7252 if (mt == maple_arange_64) { 7253 MT_BUG_ON(mas->tree, !gaps); 7254 offset = ma_meta_gap(node); 7255 if (offset > i) { 7256 pr_err("gap offset %p[%u] is invalid\n", node, offset); 7257 MT_BUG_ON(mas->tree, 1); 7258 } 7259 7260 if (gaps[offset] != max_gap) { 7261 pr_err("gap %p[%u] is not the largest gap %lu\n", 7262 node, offset, max_gap); 7263 MT_BUG_ON(mas->tree, 1); 7264 } 7265 7266 for (i++ ; i < mt_slot_count(mte); i++) { 7267 if (gaps[i] != 0) { 7268 pr_err("gap %p[%u] beyond node limit != 0\n", 7269 node, i); 7270 MT_BUG_ON(mas->tree, 1); 7271 } 7272 } 7273 } 7274 7275 if (mte_is_root(mte)) 7276 return; 7277 7278 p_slot = mte_parent_slot(mas->node); 7279 p_mn = mte_parent(mte); 7280 MT_BUG_ON(mas->tree, max_gap > mas->max); 7281 if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) { 7282 pr_err("gap %p[%u] != %lu\n", p_mn, p_slot, max_gap); 7283 mt_dump(mas->tree, mt_dump_hex); 7284 MT_BUG_ON(mas->tree, 1); 7285 } 7286 } 7287 7288 static void mas_validate_parent_slot(struct ma_state *mas) 7289 { 7290 struct maple_node *parent; 7291 struct maple_enode *node; 7292 enum maple_type p_type; 7293 unsigned char p_slot; 7294 void __rcu **slots; 7295 int i; 7296 7297 if (mte_is_root(mas->node)) 7298 return; 7299 7300 p_slot = mte_parent_slot(mas->node); 7301 p_type = mas_parent_type(mas, mas->node); 7302 parent = mte_parent(mas->node); 7303 slots = ma_slots(parent, p_type); 7304 MT_BUG_ON(mas->tree, mas_mn(mas) == parent); 7305 7306 /* Check prev/next parent slot for duplicate node entry */ 7307 7308 for (i = 0; i < mt_slots[p_type]; i++) { 7309 node = mas_slot(mas, slots, i); 7310 if (i == p_slot) { 7311 if (node != mas->node) 7312 pr_err("parent %p[%u] does not have %p\n", 7313 parent, i, mas_mn(mas)); 7314 MT_BUG_ON(mas->tree, node != mas->node); 7315 } else if (node == mas->node) { 7316 pr_err("Invalid child %p at parent %p[%u] p_slot %u\n", 7317 mas_mn(mas), parent, i, p_slot); 7318 MT_BUG_ON(mas->tree, node == mas->node); 7319 } 7320 } 7321 } 7322 7323 static void mas_validate_child_slot(struct ma_state *mas) 7324 { 7325 enum maple_type type = mte_node_type(mas->node); 7326 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 7327 unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type); 7328 struct maple_enode *child; 7329 unsigned char i; 7330 7331 if (mte_is_leaf(mas->node)) 7332 return; 7333 7334 for (i = 0; i < mt_slots[type]; i++) { 7335 child = mas_slot(mas, slots, i); 7336 7337 if (!child) { 7338 pr_err("Non-leaf node lacks child at %p[%u]\n", 7339 mas_mn(mas), i); 7340 MT_BUG_ON(mas->tree, 1); 7341 } 7342 7343 if (mte_parent_slot(child) != i) { 7344 pr_err("Slot error at %p[%u]: child %p has pslot %u\n", 7345 mas_mn(mas), i, mte_to_node(child), 7346 mte_parent_slot(child)); 7347 MT_BUG_ON(mas->tree, 1); 7348 } 7349 7350 if (mte_parent(child) != mte_to_node(mas->node)) { 7351 pr_err("child %p has parent %p not %p\n", 7352 mte_to_node(child), mte_parent(child), 7353 mte_to_node(mas->node)); 7354 MT_BUG_ON(mas->tree, 1); 7355 } 7356 7357 if (i < mt_pivots[type] && pivots[i] == mas->max) 7358 break; 7359 } 7360 } 7361 7362 /* 7363 * Validate all pivots are within mas->min and mas->max, check metadata ends 7364 * where the maximum ends and ensure there is no slots or pivots set outside of 7365 * the end of the data. 7366 */ 7367 static void mas_validate_limits(struct ma_state *mas) 7368 { 7369 int i; 7370 unsigned long prev_piv = 0; 7371 enum maple_type type = mte_node_type(mas->node); 7372 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 7373 unsigned long *pivots = ma_pivots(mas_mn(mas), type); 7374 7375 for (i = 0; i < mt_slots[type]; i++) { 7376 unsigned long piv; 7377 7378 piv = mas_safe_pivot(mas, pivots, i, type); 7379 7380 if (!piv && (i != 0)) { 7381 pr_err("Missing node limit pivot at %p[%u]", 7382 mas_mn(mas), i); 7383 MAS_WARN_ON(mas, 1); 7384 } 7385 7386 if (prev_piv > piv) { 7387 pr_err("%p[%u] piv %lu < prev_piv %lu\n", 7388 mas_mn(mas), i, piv, prev_piv); 7389 MAS_WARN_ON(mas, piv < prev_piv); 7390 } 7391 7392 if (piv < mas->min) { 7393 pr_err("%p[%u] %lu < %lu\n", mas_mn(mas), i, 7394 piv, mas->min); 7395 MAS_WARN_ON(mas, piv < mas->min); 7396 } 7397 if (piv > mas->max) { 7398 pr_err("%p[%u] %lu > %lu\n", mas_mn(mas), i, 7399 piv, mas->max); 7400 MAS_WARN_ON(mas, piv > mas->max); 7401 } 7402 prev_piv = piv; 7403 if (piv == mas->max) 7404 break; 7405 } 7406 7407 if (mas_data_end(mas) != i) { 7408 pr_err("node%p: data_end %u != the last slot offset %u\n", 7409 mas_mn(mas), mas_data_end(mas), i); 7410 MT_BUG_ON(mas->tree, 1); 7411 } 7412 7413 for (i += 1; i < mt_slots[type]; i++) { 7414 void *entry = mas_slot(mas, slots, i); 7415 7416 if (entry && (i != mt_slots[type] - 1)) { 7417 pr_err("%p[%u] should not have entry %p\n", mas_mn(mas), 7418 i, entry); 7419 MT_BUG_ON(mas->tree, entry != NULL); 7420 } 7421 7422 if (i < mt_pivots[type]) { 7423 unsigned long piv = pivots[i]; 7424 7425 if (!piv) 7426 continue; 7427 7428 pr_err("%p[%u] should not have piv %lu\n", 7429 mas_mn(mas), i, piv); 7430 MAS_WARN_ON(mas, i < mt_pivots[type] - 1); 7431 } 7432 } 7433 } 7434 7435 static void mt_validate_nulls(struct maple_tree *mt) 7436 { 7437 void *entry, *last = (void *)1; 7438 unsigned char offset = 0; 7439 void __rcu **slots; 7440 MA_STATE(mas, mt, 0, 0); 7441 7442 mas_start(&mas); 7443 if (mas_is_none(&mas) || (mas_is_ptr(&mas))) 7444 return; 7445 7446 while (!mte_is_leaf(mas.node)) 7447 mas_descend(&mas); 7448 7449 slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node)); 7450 do { 7451 entry = mas_slot(&mas, slots, offset); 7452 if (!last && !entry) { 7453 pr_err("Sequential nulls end at %p[%u]\n", 7454 mas_mn(&mas), offset); 7455 } 7456 MT_BUG_ON(mt, !last && !entry); 7457 last = entry; 7458 if (offset == mas_data_end(&mas)) { 7459 mas_next_node(&mas, mas_mn(&mas), ULONG_MAX); 7460 if (mas_is_overflow(&mas)) 7461 return; 7462 offset = 0; 7463 slots = ma_slots(mte_to_node(mas.node), 7464 mte_node_type(mas.node)); 7465 } else { 7466 offset++; 7467 } 7468 7469 } while (!mas_is_overflow(&mas)); 7470 } 7471 7472 /* 7473 * validate a maple tree by checking: 7474 * 1. The limits (pivots are within mas->min to mas->max) 7475 * 2. The gap is correctly set in the parents 7476 */ 7477 void mt_validate(struct maple_tree *mt) 7478 { 7479 unsigned char end; 7480 7481 MA_STATE(mas, mt, 0, 0); 7482 rcu_read_lock(); 7483 mas_start(&mas); 7484 if (!mas_is_active(&mas)) 7485 goto done; 7486 7487 while (!mte_is_leaf(mas.node)) 7488 mas_descend(&mas); 7489 7490 while (!mas_is_overflow(&mas)) { 7491 MAS_WARN_ON(&mas, mte_dead_node(mas.node)); 7492 end = mas_data_end(&mas); 7493 if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) && 7494 (mas.max != ULONG_MAX))) { 7495 pr_err("Invalid size %u of %p\n", end, mas_mn(&mas)); 7496 } 7497 7498 mas_validate_parent_slot(&mas); 7499 mas_validate_limits(&mas); 7500 mas_validate_child_slot(&mas); 7501 if (mt_is_alloc(mt)) 7502 mas_validate_gaps(&mas); 7503 mas_dfs_postorder(&mas, ULONG_MAX); 7504 } 7505 mt_validate_nulls(mt); 7506 done: 7507 rcu_read_unlock(); 7508 7509 } 7510 EXPORT_SYMBOL_GPL(mt_validate); 7511 7512 void mas_dump(const struct ma_state *mas) 7513 { 7514 pr_err("MAS: tree=%p enode=%p ", mas->tree, mas->node); 7515 switch (mas->status) { 7516 case ma_active: 7517 pr_err("(ma_active)"); 7518 break; 7519 case ma_none: 7520 pr_err("(ma_none)"); 7521 break; 7522 case ma_root: 7523 pr_err("(ma_root)"); 7524 break; 7525 case ma_start: 7526 pr_err("(ma_start) "); 7527 break; 7528 case ma_pause: 7529 pr_err("(ma_pause) "); 7530 break; 7531 case ma_overflow: 7532 pr_err("(ma_overflow) "); 7533 break; 7534 case ma_underflow: 7535 pr_err("(ma_underflow) "); 7536 break; 7537 case ma_error: 7538 pr_err("(ma_error) "); 7539 break; 7540 } 7541 7542 pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end, 7543 mas->index, mas->last); 7544 pr_err(" min=%lx max=%lx alloc=%p, depth=%u, flags=%x\n", 7545 mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags); 7546 if (mas->index > mas->last) 7547 pr_err("Check index & last\n"); 7548 } 7549 EXPORT_SYMBOL_GPL(mas_dump); 7550 7551 void mas_wr_dump(const struct ma_wr_state *wr_mas) 7552 { 7553 pr_err("WR_MAS: node=%p r_min=%lx r_max=%lx\n", 7554 wr_mas->node, wr_mas->r_min, wr_mas->r_max); 7555 pr_err(" type=%u off_end=%u, node_end=%u, end_piv=%lx\n", 7556 wr_mas->type, wr_mas->offset_end, wr_mas->mas->end, 7557 wr_mas->end_piv); 7558 } 7559 EXPORT_SYMBOL_GPL(mas_wr_dump); 7560 7561 #endif /* CONFIG_DEBUG_MAPLE_TREE */ 7562