1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Maple Tree implementation 4 * Copyright (c) 2018-2022 Oracle Corporation 5 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com> 6 * Matthew Wilcox <willy@infradead.org> 7 * Copyright (c) 2023 ByteDance 8 * Author: Peng Zhang <zhangpeng.00@bytedance.com> 9 */ 10 11 /* 12 * DOC: Interesting implementation details of the Maple Tree 13 * 14 * Each node type has a number of slots for entries and a number of slots for 15 * pivots. In the case of dense nodes, the pivots are implied by the position 16 * and are simply the slot index + the minimum of the node. 17 * 18 * In regular B-Tree terms, pivots are called keys. The term pivot is used to 19 * indicate that the tree is specifying ranges. Pivots may appear in the 20 * subtree with an entry attached to the value whereas keys are unique to a 21 * specific position of a B-tree. Pivot values are inclusive of the slot with 22 * the same index. 23 * 24 * 25 * The following illustrates the layout of a range64 nodes slots and pivots. 26 * 27 * 28 * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 | 29 * ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ 30 * │ │ │ │ │ │ │ │ └─ Implied maximum 31 * │ │ │ │ │ │ │ └─ Pivot 14 32 * │ │ │ │ │ │ └─ Pivot 13 33 * │ │ │ │ │ └─ Pivot 12 34 * │ │ │ │ └─ Pivot 11 35 * │ │ │ └─ Pivot 2 36 * │ │ └─ Pivot 1 37 * │ └─ Pivot 0 38 * └─ Implied minimum 39 * 40 * Slot contents: 41 * Internal (non-leaf) nodes contain pointers to other nodes. 42 * Leaf nodes contain entries. 43 * 44 * The location of interest is often referred to as an offset. All offsets have 45 * a slot, but the last offset has an implied pivot from the node above (or 46 * UINT_MAX for the root node. 47 * 48 * Ranges complicate certain write activities. When modifying any of 49 * the B-tree variants, it is known that one entry will either be added or 50 * deleted. When modifying the Maple Tree, one store operation may overwrite 51 * the entire data set, or one half of the tree, or the middle half of the tree. 52 * 53 */ 54 55 56 #include <linux/maple_tree.h> 57 #include <linux/xarray.h> 58 #include <linux/types.h> 59 #include <linux/export.h> 60 #include <linux/slab.h> 61 #include <linux/limits.h> 62 #include <asm/barrier.h> 63 64 #define CREATE_TRACE_POINTS 65 #include <trace/events/maple_tree.h> 66 67 #define TP_FCT tracepoint_string(__func__) 68 69 /* 70 * Kernel pointer hashing renders much of the maple tree dump useless as tagged 71 * pointers get hashed to arbitrary values. 72 * 73 * If CONFIG_DEBUG_VM_MAPLE_TREE is set we are in a debug mode where it is 74 * permissible to bypass this. Otherwise remain cautious and retain the hashing. 75 * 76 * Userland doesn't know about %px so also use %p there. 77 */ 78 #if defined(__KERNEL__) && defined(CONFIG_DEBUG_VM_MAPLE_TREE) 79 #define PTR_FMT "%px" 80 #else 81 #define PTR_FMT "%p" 82 #endif 83 84 #define MA_ROOT_PARENT 1 85 86 /* 87 * Maple state flags 88 * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation 89 */ 90 #define MA_STATE_PREALLOC 1 91 92 #define ma_parent_ptr(x) ((struct maple_pnode *)(x)) 93 #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT) 94 #define ma_mnode_ptr(x) ((struct maple_node *)(x)) 95 #define ma_enode_ptr(x) ((struct maple_enode *)(x)) 96 static struct kmem_cache *maple_node_cache; 97 98 #ifdef CONFIG_DEBUG_MAPLE_TREE 99 static const unsigned long mt_max[] = { 100 [maple_dense] = MAPLE_NODE_SLOTS, 101 [maple_leaf_64] = ULONG_MAX, 102 [maple_range_64] = ULONG_MAX, 103 [maple_arange_64] = ULONG_MAX, 104 [maple_copy] = ULONG_MAX, 105 }; 106 #define mt_node_max(x) mt_max[mte_node_type(x)] 107 #endif 108 109 static const unsigned char mt_slots[] = { 110 [maple_dense] = MAPLE_NODE_SLOTS, 111 [maple_leaf_64] = MAPLE_RANGE64_SLOTS, 112 [maple_range_64] = MAPLE_RANGE64_SLOTS, 113 [maple_arange_64] = MAPLE_ARANGE64_SLOTS, 114 [maple_copy] = 3, 115 }; 116 #define mt_slot_count(x) mt_slots[mte_node_type(x)] 117 118 static const unsigned char mt_pivots[] = { 119 [maple_dense] = 0, 120 [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1, 121 [maple_range_64] = MAPLE_RANGE64_SLOTS - 1, 122 [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1, 123 [maple_copy] = 3, 124 }; 125 #define mt_pivot_count(x) mt_pivots[mte_node_type(x)] 126 127 static const unsigned char mt_min_slots[] = { 128 [maple_dense] = MAPLE_NODE_SLOTS / 2, 129 [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 130 [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 131 [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1, 132 [maple_copy] = 1, /* Should never be used */ 133 }; 134 #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)] 135 136 /* Functions */ 137 static inline struct maple_node *mt_alloc_one(gfp_t gfp) 138 { 139 return kmem_cache_alloc(maple_node_cache, gfp); 140 } 141 142 static inline void mt_free_bulk(size_t size, void __rcu **nodes) 143 { 144 kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); 145 } 146 147 static void mt_return_sheaf(struct slab_sheaf *sheaf) 148 { 149 kmem_cache_return_sheaf(maple_node_cache, GFP_NOWAIT, sheaf); 150 } 151 152 static struct slab_sheaf *mt_get_sheaf(gfp_t gfp, int count) 153 { 154 return kmem_cache_prefill_sheaf(maple_node_cache, gfp, count); 155 } 156 157 static int mt_refill_sheaf(gfp_t gfp, struct slab_sheaf **sheaf, 158 unsigned int size) 159 { 160 return kmem_cache_refill_sheaf(maple_node_cache, gfp, sheaf, size); 161 } 162 163 /* 164 * ma_free_rcu() - Use rcu callback to free a maple node 165 * @node: The node to free 166 * 167 * The maple tree uses the parent pointer to indicate this node is no longer in 168 * use and will be freed. 169 */ 170 static void ma_free_rcu(struct maple_node *node) 171 { 172 WARN_ON(node->parent != ma_parent_ptr(node)); 173 kfree_rcu(node, rcu); 174 } 175 176 static void mt_set_height(struct maple_tree *mt, unsigned char height) 177 { 178 unsigned int new_flags = mt->ma_flags; 179 180 new_flags &= ~MT_FLAGS_HEIGHT_MASK; 181 MT_BUG_ON(mt, height > MAPLE_HEIGHT_MAX); 182 new_flags |= height << MT_FLAGS_HEIGHT_OFFSET; 183 mt->ma_flags = new_flags; 184 } 185 186 static unsigned int mas_mt_height(struct ma_state *mas) 187 { 188 return mt_height(mas->tree); 189 } 190 191 static inline unsigned int mt_attr(struct maple_tree *mt) 192 { 193 return mt->ma_flags & ~MT_FLAGS_HEIGHT_MASK; 194 } 195 196 static __always_inline enum maple_type mte_node_type( 197 const struct maple_enode *entry) 198 { 199 return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) & 200 MAPLE_NODE_TYPE_MASK; 201 } 202 203 static __always_inline bool ma_is_dense(const enum maple_type type) 204 { 205 return type < maple_leaf_64; 206 } 207 208 static __always_inline bool ma_is_leaf(const enum maple_type type) 209 { 210 return type < maple_range_64; 211 } 212 213 static __always_inline bool mte_is_leaf(const struct maple_enode *entry) 214 { 215 return ma_is_leaf(mte_node_type(entry)); 216 } 217 218 /* 219 * We also reserve values with the bottom two bits set to '10' which are 220 * below 4096 221 */ 222 static __always_inline bool mt_is_reserved(const void *entry) 223 { 224 return ((unsigned long)entry < MAPLE_RESERVED_RANGE) && 225 xa_is_internal(entry); 226 } 227 228 static __always_inline void mas_set_err(struct ma_state *mas, long err) 229 { 230 mas->node = MA_ERROR(err); 231 mas->status = ma_error; 232 } 233 234 static __always_inline bool mas_is_ptr(const struct ma_state *mas) 235 { 236 return mas->status == ma_root; 237 } 238 239 static __always_inline bool mas_is_start(const struct ma_state *mas) 240 { 241 return mas->status == ma_start; 242 } 243 244 static __always_inline bool mas_is_none(const struct ma_state *mas) 245 { 246 return mas->status == ma_none; 247 } 248 249 static __always_inline bool mas_is_paused(const struct ma_state *mas) 250 { 251 return mas->status == ma_pause; 252 } 253 254 static __always_inline bool mas_is_overflow(struct ma_state *mas) 255 { 256 return mas->status == ma_overflow; 257 } 258 259 static inline bool mas_is_underflow(struct ma_state *mas) 260 { 261 return mas->status == ma_underflow; 262 } 263 264 static __always_inline struct maple_node *mte_to_node( 265 const struct maple_enode *entry) 266 { 267 return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK); 268 } 269 270 /* 271 * mte_to_mat() - Convert a maple encoded node to a maple topiary node. 272 * @entry: The maple encoded node 273 * 274 * Return: a maple topiary pointer 275 */ 276 static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry) 277 { 278 return (struct maple_topiary *) 279 ((unsigned long)entry & ~MAPLE_NODE_MASK); 280 } 281 282 /* 283 * mas_mn() - Get the maple state node. 284 * @mas: The maple state 285 * 286 * Return: the maple node (not encoded - bare pointer). 287 */ 288 static inline struct maple_node *mas_mn(const struct ma_state *mas) 289 { 290 return mte_to_node(mas->node); 291 } 292 293 /* 294 * mte_set_node_dead() - Set a maple encoded node as dead. 295 * @mn: The maple encoded node. 296 */ 297 static inline void mte_set_node_dead(struct maple_enode *mn) 298 { 299 mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn)); 300 smp_wmb(); /* Needed for RCU */ 301 } 302 303 /* Bit 1 indicates the root is a node */ 304 #define MAPLE_ROOT_NODE 0x02 305 /* maple_type stored bit 3-6 */ 306 #define MAPLE_ENODE_TYPE_SHIFT 0x03 307 /* Bit 2 means a NULL somewhere below */ 308 #define MAPLE_ENODE_NULL 0x04 309 310 static inline struct maple_enode *mt_mk_node(const struct maple_node *node, 311 enum maple_type type) 312 { 313 return (void *)((unsigned long)node | 314 (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL); 315 } 316 317 static inline void ma_init_slot(void __rcu **slot, const struct maple_node *mn, 318 const enum maple_type mt) 319 { 320 /* WARNING: this is unsafe if the slot is exposed to readers. */ 321 RCU_INIT_POINTER(*slot, (void *)mt_mk_node(mn, mt)); 322 } 323 324 static inline void *mte_mk_root(const struct maple_enode *node) 325 { 326 return (void *)((unsigned long)node | MAPLE_ROOT_NODE); 327 } 328 329 static inline void *mte_safe_root(const struct maple_enode *node) 330 { 331 return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE); 332 } 333 334 static inline void __maybe_unused *mte_set_full(const struct maple_enode *node) 335 { 336 return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL); 337 } 338 339 static inline void __maybe_unused *mte_clear_full(const struct maple_enode *node) 340 { 341 return (void *)((unsigned long)node | MAPLE_ENODE_NULL); 342 } 343 344 static inline bool __maybe_unused mte_has_null(const struct maple_enode *node) 345 { 346 return (unsigned long)node & MAPLE_ENODE_NULL; 347 } 348 349 static __always_inline bool ma_is_root(struct maple_node *node) 350 { 351 return ((unsigned long)node->parent & MA_ROOT_PARENT); 352 } 353 354 static __always_inline bool mte_is_root(const struct maple_enode *node) 355 { 356 return ma_is_root(mte_to_node(node)); 357 } 358 359 static inline bool mas_is_root_limits(const struct ma_state *mas) 360 { 361 return !mas->min && mas->max == ULONG_MAX; 362 } 363 364 static __always_inline bool mt_is_alloc(struct maple_tree *mt) 365 { 366 return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE); 367 } 368 369 /* 370 * The Parent Pointer 371 * Excluding root, the parent pointer is 256B aligned like all other tree nodes. 372 * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16 373 * bit values need an extra bit to store the offset. This extra bit comes from 374 * a reuse of the last bit in the node type. This is possible by using bit 1 to 375 * indicate if bit 2 is part of the type or the slot. 376 * 377 * Node types: 378 * 0b??1 = Root 379 * 0b?00 = 16 bit nodes 380 * 0b010 = 32 bit nodes 381 * 0b110 = 64 bit nodes 382 * 383 * Slot size and alignment 384 * 0b??1 : Root 385 * 0b?00 : 16 bit values, type in 0-1, slot in 2-7 386 * 0b010 : 32 bit values, type in 0-2, slot in 3-7 387 * 0b110 : 64 bit values, type in 0-2, slot in 3-7 388 */ 389 390 #define MAPLE_PARENT_ROOT 0x01 391 392 #define MAPLE_PARENT_SLOT_SHIFT 0x03 393 #define MAPLE_PARENT_SLOT_MASK 0xF8 394 395 #define MAPLE_PARENT_16B_SLOT_SHIFT 0x02 396 #define MAPLE_PARENT_16B_SLOT_MASK 0xFC 397 398 #define MAPLE_PARENT_RANGE64 0x06 399 #define MAPLE_PARENT_RANGE32 0x02 400 #define MAPLE_PARENT_NOT_RANGE16 0x02 401 402 /* 403 * mte_parent_shift() - Get the parent shift for the slot storage. 404 * @parent: The parent pointer cast as an unsigned long 405 * Return: The shift into that pointer to the star to of the slot 406 */ 407 static inline unsigned long mte_parent_shift(unsigned long parent) 408 { 409 /* Note bit 1 == 0 means 16B */ 410 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 411 return MAPLE_PARENT_SLOT_SHIFT; 412 413 return MAPLE_PARENT_16B_SLOT_SHIFT; 414 } 415 416 /* 417 * mte_parent_slot_mask() - Get the slot mask for the parent. 418 * @parent: The parent pointer cast as an unsigned long. 419 * Return: The slot mask for that parent. 420 */ 421 static inline unsigned long mte_parent_slot_mask(unsigned long parent) 422 { 423 /* Note bit 1 == 0 means 16B */ 424 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 425 return MAPLE_PARENT_SLOT_MASK; 426 427 return MAPLE_PARENT_16B_SLOT_MASK; 428 } 429 430 /* 431 * mas_parent_type() - Return the maple_type of the parent from the stored 432 * parent type. 433 * @mas: The maple state 434 * @enode: The maple_enode to extract the parent's enum 435 * Return: The node->parent maple_type 436 */ 437 static inline 438 enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode) 439 { 440 unsigned long p_type; 441 442 p_type = (unsigned long)mte_to_node(enode)->parent; 443 if (WARN_ON(p_type & MAPLE_PARENT_ROOT)) 444 return 0; 445 446 p_type &= MAPLE_NODE_MASK; 447 p_type &= ~mte_parent_slot_mask(p_type); 448 switch (p_type) { 449 case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */ 450 if (mt_is_alloc(mas->tree)) 451 return maple_arange_64; 452 return maple_range_64; 453 } 454 455 return 0; 456 } 457 458 /* 459 * mas_set_parent() - Set the parent node and encode the slot 460 * @mas: The maple state 461 * @enode: The encoded maple node. 462 * @parent: The encoded maple node that is the parent of @enode. 463 * @slot: The slot that @enode resides in @parent. 464 * 465 * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the 466 * parent type. 467 */ 468 static inline 469 void mas_set_parent(struct ma_state *mas, struct maple_enode *enode, 470 const struct maple_enode *parent, unsigned char slot) 471 { 472 unsigned long val = (unsigned long)parent; 473 unsigned long shift; 474 unsigned long type; 475 enum maple_type p_type = mte_node_type(parent); 476 477 MAS_BUG_ON(mas, p_type == maple_dense); 478 MAS_BUG_ON(mas, p_type == maple_leaf_64); 479 480 switch (p_type) { 481 case maple_range_64: 482 case maple_arange_64: 483 shift = MAPLE_PARENT_SLOT_SHIFT; 484 type = MAPLE_PARENT_RANGE64; 485 break; 486 default: 487 case maple_dense: 488 case maple_leaf_64: 489 shift = type = 0; 490 break; 491 } 492 493 val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */ 494 val |= (slot << shift) | type; 495 mte_to_node(enode)->parent = ma_parent_ptr(val); 496 } 497 498 /* 499 * mte_parent_slot() - get the parent slot of @enode. 500 * @enode: The encoded maple node. 501 * 502 * Return: The slot in the parent node where @enode resides. 503 */ 504 static __always_inline 505 unsigned int mte_parent_slot(const struct maple_enode *enode) 506 { 507 unsigned long val = (unsigned long)mte_to_node(enode)->parent; 508 509 if (unlikely(val & MA_ROOT_PARENT)) 510 return 0; 511 512 /* 513 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost 514 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT 515 */ 516 return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val); 517 } 518 519 /* 520 * mte_parent() - Get the parent of @node. 521 * @enode: The encoded maple node. 522 * 523 * Return: The parent maple node. 524 */ 525 static __always_inline 526 struct maple_node *mte_parent(const struct maple_enode *enode) 527 { 528 return (void *)((unsigned long) 529 (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK); 530 } 531 532 /* 533 * ma_dead_node() - check if the @enode is dead. 534 * @enode: The encoded maple node 535 * 536 * Return: true if dead, false otherwise. 537 */ 538 static __always_inline bool ma_dead_node(const struct maple_node *node) 539 { 540 struct maple_node *parent; 541 542 /* Do not reorder reads from the node prior to the parent check */ 543 smp_rmb(); 544 parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); 545 return (parent == node); 546 } 547 548 /* 549 * mte_dead_node() - check if the @enode is dead. 550 * @enode: The encoded maple node 551 * 552 * Return: true if dead, false otherwise. 553 */ 554 static __always_inline bool mte_dead_node(const struct maple_enode *enode) 555 { 556 struct maple_node *node; 557 558 node = mte_to_node(enode); 559 return ma_dead_node(node); 560 } 561 562 /* 563 * ma_pivots() - Get a pointer to the maple node pivots. 564 * @node: the maple node 565 * @type: the node type 566 * 567 * In the event of a dead node, this array may be %NULL 568 * 569 * Return: A pointer to the maple node pivots 570 */ 571 static inline unsigned long *ma_pivots(struct maple_node *node, 572 enum maple_type type) 573 { 574 switch (type) { 575 case maple_arange_64: 576 return node->ma64.pivot; 577 case maple_range_64: 578 case maple_leaf_64: 579 return node->mr64.pivot; 580 case maple_copy: 581 return node->cp.pivot; 582 case maple_dense: 583 return NULL; 584 } 585 return NULL; 586 } 587 588 /* 589 * ma_gaps() - Get a pointer to the maple node gaps. 590 * @node: the maple node 591 * @type: the node type 592 * 593 * Return: A pointer to the maple node gaps 594 */ 595 static inline unsigned long *ma_gaps(struct maple_node *node, 596 enum maple_type type) 597 { 598 switch (type) { 599 case maple_arange_64: 600 return node->ma64.gap; 601 case maple_copy: 602 return node->cp.gap; 603 case maple_range_64: 604 case maple_leaf_64: 605 case maple_dense: 606 return NULL; 607 } 608 return NULL; 609 } 610 611 /* 612 * mas_safe_pivot() - get the pivot at @piv or mas->max. 613 * @mas: The maple state 614 * @pivots: The pointer to the maple node pivots 615 * @piv: The pivot to fetch 616 * @type: The maple node type 617 * 618 * Return: The pivot at @piv within the limit of the @pivots array, @mas->max 619 * otherwise. 620 */ 621 static __always_inline unsigned long 622 mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots, 623 unsigned char piv, enum maple_type type) 624 { 625 if (piv >= mt_pivots[type]) 626 return mas->max; 627 628 return pivots[piv]; 629 } 630 631 /* 632 * mas_safe_min() - Return the minimum for a given offset. 633 * @mas: The maple state 634 * @pivots: The pointer to the maple node pivots 635 * @offset: The offset into the pivot array 636 * 637 * Return: The minimum range value that is contained in @offset. 638 */ 639 static inline unsigned long 640 mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset) 641 { 642 if (likely(offset)) 643 return pivots[offset - 1] + 1; 644 645 return mas->min; 646 } 647 648 /* 649 * mte_set_pivot() - Set a pivot to a value in an encoded maple node. 650 * @mn: The encoded maple node 651 * @piv: The pivot offset 652 * @val: The value of the pivot 653 */ 654 static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv, 655 unsigned long val) 656 { 657 struct maple_node *node = mte_to_node(mn); 658 enum maple_type type = mte_node_type(mn); 659 660 BUG_ON(piv >= mt_pivots[type]); 661 switch (type) { 662 case maple_range_64: 663 case maple_leaf_64: 664 node->mr64.pivot[piv] = val; 665 break; 666 case maple_arange_64: 667 node->ma64.pivot[piv] = val; 668 break; 669 case maple_copy: 670 case maple_dense: 671 break; 672 } 673 674 } 675 676 /* 677 * ma_slots() - Get a pointer to the maple node slots. 678 * @mn: The maple node 679 * @mt: The maple node type 680 * 681 * Return: A pointer to the maple node slots 682 */ 683 static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt) 684 { 685 switch (mt) { 686 case maple_arange_64: 687 return mn->ma64.slot; 688 case maple_range_64: 689 case maple_leaf_64: 690 return mn->mr64.slot; 691 case maple_copy: 692 return mn->cp.slot; 693 case maple_dense: 694 return mn->slot; 695 } 696 697 return NULL; 698 } 699 700 static inline bool mt_write_locked(const struct maple_tree *mt) 701 { 702 return mt_external_lock(mt) ? mt_write_lock_is_held(mt) : 703 lockdep_is_held(&mt->ma_lock); 704 } 705 706 static __always_inline bool mt_locked(const struct maple_tree *mt) 707 { 708 return mt_external_lock(mt) ? mt_lock_is_held(mt) : 709 lockdep_is_held(&mt->ma_lock); 710 } 711 712 static __always_inline void *mt_slot(const struct maple_tree *mt, 713 void __rcu **slots, unsigned char offset) 714 { 715 return rcu_dereference_check(slots[offset], mt_locked(mt)); 716 } 717 718 static __always_inline void *mt_slot_locked(struct maple_tree *mt, 719 void __rcu **slots, unsigned char offset) 720 { 721 return rcu_dereference_protected(slots[offset], mt_write_locked(mt)); 722 } 723 /* 724 * mas_slot_locked() - Get the slot value when holding the maple tree lock. 725 * @mas: The maple state 726 * @slots: The pointer to the slots 727 * @offset: The offset into the slots array to fetch 728 * 729 * Return: The entry stored in @slots at the @offset. 730 */ 731 static __always_inline void *mas_slot_locked(struct ma_state *mas, 732 void __rcu **slots, unsigned char offset) 733 { 734 return mt_slot_locked(mas->tree, slots, offset); 735 } 736 737 /* 738 * mas_slot() - Get the slot value when not holding the maple tree lock. 739 * @mas: The maple state 740 * @slots: The pointer to the slots 741 * @offset: The offset into the slots array to fetch 742 * 743 * Return: The entry stored in @slots at the @offset 744 */ 745 static __always_inline void *mas_slot(struct ma_state *mas, void __rcu **slots, 746 unsigned char offset) 747 { 748 return mt_slot(mas->tree, slots, offset); 749 } 750 751 /* 752 * mas_root() - Get the maple tree root. 753 * @mas: The maple state. 754 * 755 * Return: The pointer to the root of the tree 756 */ 757 static __always_inline void *mas_root(struct ma_state *mas) 758 { 759 return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree)); 760 } 761 762 static inline void *mt_root_locked(struct maple_tree *mt) 763 { 764 return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt)); 765 } 766 767 /* 768 * mas_root_locked() - Get the maple tree root when holding the maple tree lock. 769 * @mas: The maple state. 770 * 771 * Return: The pointer to the root of the tree 772 */ 773 static inline void *mas_root_locked(struct ma_state *mas) 774 { 775 return mt_root_locked(mas->tree); 776 } 777 778 static inline struct maple_metadata *ma_meta(struct maple_node *mn, 779 enum maple_type mt) 780 { 781 switch (mt) { 782 case maple_arange_64: 783 return &mn->ma64.meta; 784 default: 785 return &mn->mr64.meta; 786 } 787 } 788 789 /* 790 * ma_set_meta() - Set the metadata information of a node. 791 * @mn: The maple node 792 * @mt: The maple node type 793 * @offset: The offset of the highest sub-gap in this node. 794 * @end: The end of the data in this node. 795 */ 796 static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, 797 unsigned char offset, unsigned char end) 798 { 799 struct maple_metadata *meta = ma_meta(mn, mt); 800 801 meta->gap = offset; 802 meta->end = end; 803 } 804 805 /* 806 * mt_clear_meta() - clear the metadata information of a node, if it exists 807 * @mt: The maple tree 808 * @mn: The maple node 809 * @type: The maple node type 810 */ 811 static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn, 812 enum maple_type type) 813 { 814 struct maple_metadata *meta; 815 unsigned long *pivots; 816 void __rcu **slots; 817 void *next; 818 819 switch (type) { 820 case maple_range_64: 821 pivots = mn->mr64.pivot; 822 if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { 823 slots = mn->mr64.slot; 824 next = mt_slot_locked(mt, slots, 825 MAPLE_RANGE64_SLOTS - 1); 826 if (unlikely((mte_to_node(next) && 827 mte_node_type(next)))) 828 return; /* no metadata, could be node */ 829 } 830 fallthrough; 831 case maple_arange_64: 832 meta = ma_meta(mn, type); 833 break; 834 default: 835 return; 836 } 837 838 meta->gap = 0; 839 meta->end = 0; 840 } 841 842 /* 843 * ma_meta_end() - Get the data end of a node from the metadata 844 * @mn: The maple node 845 * @mt: The maple node type 846 */ 847 static inline unsigned char ma_meta_end(struct maple_node *mn, 848 enum maple_type mt) 849 { 850 struct maple_metadata *meta = ma_meta(mn, mt); 851 852 return meta->end; 853 } 854 855 /* 856 * ma_meta_gap() - Get the largest gap location of a node from the metadata 857 * @mn: The maple node 858 */ 859 static inline unsigned char ma_meta_gap(struct maple_node *mn) 860 { 861 return mn->ma64.meta.gap; 862 } 863 864 /* 865 * ma_set_meta_gap() - Set the largest gap location in a nodes metadata 866 * @mn: The maple node 867 * @mt: The maple node type 868 * @offset: The location of the largest gap. 869 */ 870 static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt, 871 unsigned char offset) 872 { 873 874 struct maple_metadata *meta = ma_meta(mn, mt); 875 876 meta->gap = offset; 877 } 878 879 /* 880 * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes. 881 * @mat: the ma_topiary, a linked list of dead nodes. 882 * @dead_enode: the node to be marked as dead and added to the tail of the list 883 * 884 * Add the @dead_enode to the linked list in @mat. 885 */ 886 static inline void mat_add(struct ma_topiary *mat, 887 struct maple_enode *dead_enode) 888 { 889 mte_set_node_dead(dead_enode); 890 mte_to_mat(dead_enode)->next = NULL; 891 if (!mat->tail) { 892 mat->tail = mat->head = dead_enode; 893 return; 894 } 895 896 mte_to_mat(mat->tail)->next = dead_enode; 897 mat->tail = dead_enode; 898 } 899 900 static void mt_free_walk(struct rcu_head *head); 901 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 902 bool free); 903 /* 904 * mas_mat_destroy() - Free all nodes and subtrees in a dead list. 905 * @mas: the maple state 906 * @mat: the ma_topiary linked list of dead nodes to free. 907 * 908 * Destroy walk a dead list. 909 */ 910 static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat) 911 { 912 struct maple_enode *next; 913 struct maple_node *node; 914 bool in_rcu = mt_in_rcu(mas->tree); 915 916 while (mat->head) { 917 next = mte_to_mat(mat->head)->next; 918 node = mte_to_node(mat->head); 919 mt_destroy_walk(mat->head, mas->tree, !in_rcu); 920 if (in_rcu) 921 call_rcu(&node->rcu, mt_free_walk); 922 mat->head = next; 923 } 924 } 925 /* 926 * mas_descend() - Descend into the slot stored in the ma_state. 927 * @mas: the maple state. 928 * 929 * Note: Not RCU safe, only use in write side or debug code. 930 */ 931 static inline void mas_descend(struct ma_state *mas) 932 { 933 enum maple_type type; 934 unsigned long *pivots; 935 struct maple_node *node; 936 void __rcu **slots; 937 938 node = mas_mn(mas); 939 type = mte_node_type(mas->node); 940 pivots = ma_pivots(node, type); 941 slots = ma_slots(node, type); 942 943 if (mas->offset) 944 mas->min = pivots[mas->offset - 1] + 1; 945 mas->max = mas_safe_pivot(mas, pivots, mas->offset, type); 946 mas->node = mas_slot(mas, slots, mas->offset); 947 } 948 949 /* 950 * mas_ascend() - Walk up a level of the tree. 951 * @mas: The maple state 952 * 953 * Sets the @mas->max and @mas->min for the parent node of mas->node. This 954 * may cause several levels of walking up to find the correct min and max. 955 * May find a dead node which will cause a premature return. 956 * Return: 1 on dead node, 0 otherwise 957 */ 958 static int mas_ascend(struct ma_state *mas) 959 { 960 struct maple_enode *p_enode; /* parent enode. */ 961 struct maple_enode *a_enode; /* ancestor enode. */ 962 struct maple_node *a_node; /* ancestor node. */ 963 struct maple_node *p_node; /* parent node. */ 964 unsigned char a_slot; 965 enum maple_type a_type; 966 unsigned long min, max; 967 unsigned long *pivots; 968 bool set_max = false, set_min = false; 969 970 a_node = mas_mn(mas); 971 if (ma_is_root(a_node)) { 972 mas->offset = 0; 973 return 0; 974 } 975 976 p_node = mte_parent(mas->node); 977 if (unlikely(a_node == p_node)) 978 return 1; 979 980 a_type = mas_parent_type(mas, mas->node); 981 mas->offset = mte_parent_slot(mas->node); 982 a_enode = mt_mk_node(p_node, a_type); 983 984 /* Check to make sure all parent information is still accurate */ 985 if (p_node != mte_parent(mas->node)) 986 return 1; 987 988 mas->node = a_enode; 989 990 if (mte_is_root(a_enode)) { 991 mas->max = ULONG_MAX; 992 mas->min = 0; 993 return 0; 994 } 995 996 min = 0; 997 max = ULONG_MAX; 998 999 /* 1000 * !mas->offset implies that parent node min == mas->min. 1001 * mas->offset > 0 implies that we need to walk up to find the 1002 * implied pivot min. 1003 */ 1004 if (!mas->offset) { 1005 min = mas->min; 1006 set_min = true; 1007 } 1008 1009 if (mas->max == ULONG_MAX) 1010 set_max = true; 1011 1012 do { 1013 p_enode = a_enode; 1014 a_type = mas_parent_type(mas, p_enode); 1015 a_node = mte_parent(p_enode); 1016 a_slot = mte_parent_slot(p_enode); 1017 a_enode = mt_mk_node(a_node, a_type); 1018 pivots = ma_pivots(a_node, a_type); 1019 1020 if (unlikely(ma_dead_node(a_node))) 1021 return 1; 1022 1023 if (!set_min && a_slot) { 1024 set_min = true; 1025 min = pivots[a_slot - 1] + 1; 1026 } 1027 1028 if (!set_max && a_slot < mt_pivots[a_type]) { 1029 set_max = true; 1030 max = pivots[a_slot]; 1031 } 1032 1033 if (unlikely(ma_dead_node(a_node))) 1034 return 1; 1035 1036 if (unlikely(ma_is_root(a_node))) 1037 break; 1038 1039 } while (!set_min || !set_max); 1040 1041 mas->max = max; 1042 mas->min = min; 1043 return 0; 1044 } 1045 1046 /* 1047 * mas_pop_node() - Get a previously allocated maple node from the maple state. 1048 * @mas: The maple state 1049 * 1050 * Return: A pointer to a maple node. 1051 */ 1052 static __always_inline struct maple_node *mas_pop_node(struct ma_state *mas) 1053 { 1054 struct maple_node *ret; 1055 1056 if (mas->alloc) { 1057 ret = mas->alloc; 1058 mas->alloc = NULL; 1059 goto out; 1060 } 1061 1062 if (WARN_ON_ONCE(!mas->sheaf)) 1063 return NULL; 1064 1065 ret = kmem_cache_alloc_from_sheaf(maple_node_cache, GFP_NOWAIT, mas->sheaf); 1066 1067 out: 1068 memset(ret, 0, sizeof(*ret)); 1069 return ret; 1070 } 1071 1072 /* 1073 * mas_alloc_nodes() - Allocate nodes into a maple state 1074 * @mas: The maple state 1075 * @gfp: The GFP Flags 1076 */ 1077 static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) 1078 { 1079 if (!mas->node_request) 1080 return; 1081 1082 if (mas->node_request == 1) { 1083 if (mas->sheaf) 1084 goto use_sheaf; 1085 1086 if (mas->alloc) 1087 return; 1088 1089 mas->alloc = mt_alloc_one(gfp); 1090 if (!mas->alloc) 1091 goto error; 1092 1093 mas->node_request = 0; 1094 return; 1095 } 1096 1097 use_sheaf: 1098 if (unlikely(mas->alloc)) { 1099 kfree(mas->alloc); 1100 mas->alloc = NULL; 1101 } 1102 1103 if (mas->sheaf) { 1104 unsigned long refill; 1105 1106 refill = mas->node_request; 1107 if (kmem_cache_sheaf_size(mas->sheaf) >= refill) { 1108 mas->node_request = 0; 1109 return; 1110 } 1111 1112 if (mt_refill_sheaf(gfp, &mas->sheaf, refill)) 1113 goto error; 1114 1115 mas->node_request = 0; 1116 return; 1117 } 1118 1119 mas->sheaf = mt_get_sheaf(gfp, mas->node_request); 1120 if (likely(mas->sheaf)) { 1121 mas->node_request = 0; 1122 return; 1123 } 1124 1125 error: 1126 mas_set_err(mas, -ENOMEM); 1127 } 1128 1129 static inline void mas_empty_nodes(struct ma_state *mas) 1130 { 1131 mas->node_request = 0; 1132 if (mas->sheaf) { 1133 mt_return_sheaf(mas->sheaf); 1134 mas->sheaf = NULL; 1135 } 1136 1137 if (mas->alloc) { 1138 kfree(mas->alloc); 1139 mas->alloc = NULL; 1140 } 1141 } 1142 1143 /* 1144 * mas_free() - Free an encoded maple node 1145 * @mas: The maple state 1146 * @used: The encoded maple node to free. 1147 * 1148 * Uses rcu free if necessary, pushes @used back on the maple state allocations 1149 * otherwise. 1150 */ 1151 static inline void mas_free(struct ma_state *mas, struct maple_enode *used) 1152 { 1153 ma_free_rcu(mte_to_node(used)); 1154 } 1155 1156 /* 1157 * mas_start() - Sets up maple state for operations. 1158 * @mas: The maple state. 1159 * 1160 * If mas->status == ma_start, then set the min, max and depth to 1161 * defaults. 1162 * 1163 * Return: 1164 * - If mas->node is an error or not mas_start, return NULL. 1165 * - If it's an empty tree: NULL & mas->status == ma_none 1166 * - If it's a single entry: The entry & mas->status == ma_root 1167 * - If it's a tree: NULL & mas->status == ma_active 1168 */ 1169 static inline struct maple_enode *mas_start(struct ma_state *mas) 1170 { 1171 if (likely(mas_is_start(mas))) { 1172 struct maple_enode *root; 1173 1174 mas->min = 0; 1175 mas->max = ULONG_MAX; 1176 1177 retry: 1178 mas->depth = 0; 1179 root = mas_root(mas); 1180 /* Tree with nodes */ 1181 if (likely(xa_is_node(root))) { 1182 mas->depth = 0; 1183 mas->status = ma_active; 1184 mas->node = mte_safe_root(root); 1185 mas->offset = 0; 1186 if (mte_dead_node(mas->node)) 1187 goto retry; 1188 1189 return NULL; 1190 } 1191 1192 mas->node = NULL; 1193 /* empty tree */ 1194 if (unlikely(!root)) { 1195 mas->status = ma_none; 1196 mas->offset = MAPLE_NODE_SLOTS; 1197 return NULL; 1198 } 1199 1200 /* Single entry tree */ 1201 mas->status = ma_root; 1202 mas->offset = MAPLE_NODE_SLOTS; 1203 1204 /* Single entry tree. */ 1205 if (mas->index > 0) 1206 return NULL; 1207 1208 return root; 1209 } 1210 1211 return NULL; 1212 } 1213 1214 /* 1215 * ma_data_end() - Find the end of the data in a node. 1216 * @node: The maple node 1217 * @type: The maple node type 1218 * @pivots: The array of pivots in the node 1219 * @max: The maximum value in the node 1220 * 1221 * Uses metadata to find the end of the data when possible. 1222 * Return: The zero indexed last slot with data (may be null). 1223 */ 1224 static __always_inline unsigned char ma_data_end(struct maple_node *node, 1225 enum maple_type type, unsigned long *pivots, unsigned long max) 1226 { 1227 unsigned char offset; 1228 1229 if (!pivots) 1230 return 0; 1231 1232 if (type == maple_arange_64) 1233 return ma_meta_end(node, type); 1234 1235 offset = mt_pivots[type] - 1; 1236 if (likely(!pivots[offset])) 1237 return ma_meta_end(node, type); 1238 1239 if (likely(pivots[offset] == max)) 1240 return offset; 1241 1242 return mt_pivots[type]; 1243 } 1244 1245 /* 1246 * mas_data_end() - Find the end of the data (slot). 1247 * @mas: the maple state 1248 * 1249 * This method is optimized to check the metadata of a node if the node type 1250 * supports data end metadata. 1251 * 1252 * Return: The zero indexed last slot with data (may be null). 1253 */ 1254 static inline unsigned char mas_data_end(struct ma_state *mas) 1255 { 1256 enum maple_type type; 1257 struct maple_node *node; 1258 unsigned char offset; 1259 unsigned long *pivots; 1260 1261 type = mte_node_type(mas->node); 1262 node = mas_mn(mas); 1263 if (type == maple_arange_64) 1264 return ma_meta_end(node, type); 1265 1266 pivots = ma_pivots(node, type); 1267 if (unlikely(ma_dead_node(node))) 1268 return 0; 1269 1270 offset = mt_pivots[type] - 1; 1271 if (likely(!pivots[offset])) 1272 return ma_meta_end(node, type); 1273 1274 if (likely(pivots[offset] == mas->max)) 1275 return offset; 1276 1277 return mt_pivots[type]; 1278 } 1279 1280 static inline 1281 void wr_mas_setup(struct ma_wr_state *wr_mas, struct ma_state *mas) 1282 { 1283 wr_mas->node = mas_mn(mas); 1284 wr_mas->type = mte_node_type(mas->node); 1285 wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type); 1286 wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type); 1287 wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, mas->offset); 1288 wr_mas->r_max = mas_safe_pivot(mas, wr_mas->pivots, mas->offset, 1289 wr_mas->type); 1290 } 1291 1292 static inline 1293 void wr_mas_ascend(struct ma_wr_state *wr_mas) 1294 { 1295 struct ma_state *mas = wr_mas->mas; 1296 1297 mas_ascend(mas); 1298 wr_mas_setup(wr_mas, mas); 1299 mas->end = ma_data_end(wr_mas->node, wr_mas->type, wr_mas->pivots, 1300 mas->max); 1301 /* Careful, this may be wrong.. */ 1302 wr_mas->end_piv = wr_mas->r_max; 1303 wr_mas->offset_end = mas->offset; 1304 } 1305 1306 static inline unsigned long ma_leaf_max_gap(struct maple_node *mn, 1307 enum maple_type mt, unsigned long min, unsigned long max, 1308 unsigned long *pivots, void __rcu **slots) 1309 { 1310 unsigned long pstart, gap, max_gap; 1311 unsigned char i; 1312 unsigned char max_piv; 1313 1314 max_gap = 0; 1315 if (unlikely(ma_is_dense(mt))) { 1316 gap = 0; 1317 for (i = 0; i < mt_slots[mt]; i++) { 1318 if (slots[i]) { 1319 if (gap > max_gap) 1320 max_gap = gap; 1321 gap = 0; 1322 } else { 1323 gap++; 1324 } 1325 } 1326 if (gap > max_gap) 1327 max_gap = gap; 1328 return max_gap; 1329 } 1330 1331 /* 1332 * Check the first implied pivot optimizes the loop below and slot 1 may 1333 * be skipped if there is a gap in slot 0. 1334 */ 1335 if (likely(!slots[0])) { 1336 max_gap = pivots[0] - min + 1; 1337 i = 2; 1338 } else { 1339 i = 1; 1340 } 1341 1342 /* reduce max_piv as the special case is checked before the loop */ 1343 max_piv = ma_data_end(mn, mt, pivots, max) - 1; 1344 /* 1345 * Check end implied pivot which can only be a gap on the right most 1346 * node. 1347 */ 1348 if (unlikely(max == ULONG_MAX) && !slots[max_piv + 1]) { 1349 gap = ULONG_MAX - pivots[max_piv]; 1350 if (gap > max_gap) 1351 max_gap = gap; 1352 1353 if (max_gap > pivots[max_piv] - min) 1354 return max_gap; 1355 } 1356 1357 for (; i <= max_piv; i++) { 1358 /* data == no gap. */ 1359 if (likely(slots[i])) 1360 continue; 1361 1362 pstart = pivots[i - 1]; 1363 gap = pivots[i] - pstart; 1364 if (gap > max_gap) 1365 max_gap = gap; 1366 1367 /* There cannot be two gaps in a row. */ 1368 i++; 1369 } 1370 return max_gap; 1371 } 1372 1373 /* 1374 * mas_leaf_max_gap() - Returns the largest gap in a leaf node 1375 * @mas: the maple state 1376 * 1377 * Return: The maximum gap in the leaf. 1378 */ 1379 static inline unsigned long mas_leaf_max_gap(struct ma_state *mas) 1380 { 1381 enum maple_type mt; 1382 struct maple_node *mn; 1383 unsigned long *pivots; 1384 void __rcu **slots; 1385 1386 mn = mas_mn(mas); 1387 mt = mte_node_type(mas->node); 1388 slots = ma_slots(mn, mt); 1389 pivots = ma_pivots(mn, mt); 1390 1391 return ma_leaf_max_gap(mn, mt, mas->min, mas->max, pivots, slots); 1392 } 1393 1394 /* 1395 * ma_max_gap() - Get the maximum gap in a maple node (non-leaf) 1396 * @node: The maple node 1397 * @gaps: The pointer to the gaps 1398 * @mt: The maple node type 1399 * @off: Pointer to store the offset location of the gap. 1400 * 1401 * Uses the metadata data end to scan backwards across set gaps. 1402 * 1403 * Return: The maximum gap value 1404 */ 1405 static inline unsigned long 1406 ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt, 1407 unsigned char *off) 1408 { 1409 unsigned char offset, i; 1410 unsigned long max_gap = 0; 1411 1412 i = offset = ma_meta_end(node, mt); 1413 do { 1414 if (gaps[i] > max_gap) { 1415 max_gap = gaps[i]; 1416 offset = i; 1417 } 1418 } while (i--); 1419 1420 *off = offset; 1421 return max_gap; 1422 } 1423 1424 /* 1425 * mas_max_gap() - find the largest gap in a non-leaf node and set the slot. 1426 * @mas: The maple state. 1427 * 1428 * Return: The gap value. 1429 */ 1430 static inline unsigned long mas_max_gap(struct ma_state *mas) 1431 { 1432 unsigned long *gaps; 1433 unsigned char offset; 1434 enum maple_type mt; 1435 struct maple_node *node; 1436 1437 mt = mte_node_type(mas->node); 1438 if (ma_is_leaf(mt)) 1439 return mas_leaf_max_gap(mas); 1440 1441 node = mas_mn(mas); 1442 MAS_BUG_ON(mas, mt != maple_arange_64); 1443 offset = ma_meta_gap(node); 1444 gaps = ma_gaps(node, mt); 1445 return gaps[offset]; 1446 } 1447 1448 /* 1449 * mas_parent_gap() - Set the parent gap and any gaps above, as needed 1450 * @mas: The maple state 1451 * @offset: The gap offset in the parent to set 1452 * @new: The new gap value. 1453 * 1454 * Set the parent gap then continue to set the gap upwards, using the metadata 1455 * of the parent to see if it is necessary to check the node above. 1456 */ 1457 static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset, 1458 unsigned long new) 1459 { 1460 unsigned long meta_gap = 0; 1461 struct maple_node *pnode; 1462 struct maple_enode *penode; 1463 unsigned long *pgaps; 1464 unsigned char meta_offset; 1465 enum maple_type pmt; 1466 1467 pnode = mte_parent(mas->node); 1468 pmt = mas_parent_type(mas, mas->node); 1469 penode = mt_mk_node(pnode, pmt); 1470 pgaps = ma_gaps(pnode, pmt); 1471 1472 ascend: 1473 MAS_BUG_ON(mas, pmt != maple_arange_64); 1474 meta_offset = ma_meta_gap(pnode); 1475 meta_gap = pgaps[meta_offset]; 1476 1477 pgaps[offset] = new; 1478 1479 if (meta_gap == new) 1480 return; 1481 1482 if (offset != meta_offset) { 1483 if (meta_gap > new) 1484 return; 1485 1486 ma_set_meta_gap(pnode, pmt, offset); 1487 } else if (new < meta_gap) { 1488 new = ma_max_gap(pnode, pgaps, pmt, &meta_offset); 1489 ma_set_meta_gap(pnode, pmt, meta_offset); 1490 } 1491 1492 if (ma_is_root(pnode)) 1493 return; 1494 1495 /* Go to the parent node. */ 1496 pnode = mte_parent(penode); 1497 pmt = mas_parent_type(mas, penode); 1498 pgaps = ma_gaps(pnode, pmt); 1499 offset = mte_parent_slot(penode); 1500 penode = mt_mk_node(pnode, pmt); 1501 goto ascend; 1502 } 1503 1504 /* 1505 * mas_update_gap() - Update a nodes gaps and propagate up if necessary. 1506 * @mas: the maple state. 1507 */ 1508 static inline void mas_update_gap(struct ma_state *mas) 1509 { 1510 unsigned char pslot; 1511 unsigned long p_gap; 1512 unsigned long max_gap; 1513 1514 if (!mt_is_alloc(mas->tree)) 1515 return; 1516 1517 if (mte_is_root(mas->node)) 1518 return; 1519 1520 max_gap = mas_max_gap(mas); 1521 1522 pslot = mte_parent_slot(mas->node); 1523 p_gap = ma_gaps(mte_parent(mas->node), 1524 mas_parent_type(mas, mas->node))[pslot]; 1525 1526 if (p_gap != max_gap) 1527 mas_parent_gap(mas, pslot, max_gap); 1528 } 1529 1530 /* 1531 * mas_adopt_children() - Set the parent pointer of all nodes in @parent to 1532 * @parent with the slot encoded. 1533 * @mas: the maple state (for the tree) 1534 * @parent: the maple encoded node containing the children. 1535 */ 1536 static inline void mas_adopt_children(struct ma_state *mas, 1537 struct maple_enode *parent) 1538 { 1539 enum maple_type type = mte_node_type(parent); 1540 struct maple_node *node = mte_to_node(parent); 1541 void __rcu **slots = ma_slots(node, type); 1542 unsigned long *pivots = ma_pivots(node, type); 1543 struct maple_enode *child; 1544 unsigned char offset; 1545 1546 offset = ma_data_end(node, type, pivots, mas->max); 1547 do { 1548 child = mas_slot_locked(mas, slots, offset); 1549 mas_set_parent(mas, child, parent, offset); 1550 } while (offset--); 1551 } 1552 1553 /* 1554 * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old 1555 * node as dead. 1556 * @mas: the maple state with the new node 1557 * @old_enode: The old maple encoded node to replace. 1558 * @new_height: if we are inserting a root node, update the height of the tree 1559 */ 1560 static inline void mas_put_in_tree(struct ma_state *mas, 1561 struct maple_enode *old_enode, char new_height) 1562 __must_hold(mas->tree->ma_lock) 1563 { 1564 unsigned char offset; 1565 void __rcu **slots; 1566 1567 if (mte_is_root(mas->node)) { 1568 mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas)); 1569 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 1570 mt_set_height(mas->tree, new_height); 1571 } else { 1572 1573 offset = mte_parent_slot(mas->node); 1574 slots = ma_slots(mte_parent(mas->node), 1575 mas_parent_type(mas, mas->node)); 1576 rcu_assign_pointer(slots[offset], mas->node); 1577 } 1578 1579 mte_set_node_dead(old_enode); 1580 } 1581 1582 /* 1583 * mas_replace_node() - Replace a node by putting it in the tree, marking it 1584 * dead, and freeing it. 1585 * the parent encoding to locate the maple node in the tree. 1586 * @mas: the ma_state with @mas->node pointing to the new node. 1587 * @old_enode: The old maple encoded node. 1588 * @new_height: The new height of the tree as a result of the operation 1589 */ 1590 static inline void mas_replace_node(struct ma_state *mas, 1591 struct maple_enode *old_enode, unsigned char new_height) 1592 __must_hold(mas->tree->ma_lock) 1593 { 1594 mas_put_in_tree(mas, old_enode, new_height); 1595 mas_free(mas, old_enode); 1596 } 1597 1598 /* 1599 * mas_find_child() - Find a child who has the parent @mas->node. 1600 * @mas: the maple state with the parent. 1601 * @child: the maple state to store the child. 1602 */ 1603 static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child) 1604 __must_hold(mas->tree->ma_lock) 1605 { 1606 enum maple_type mt; 1607 unsigned char offset; 1608 unsigned char end; 1609 unsigned long *pivots; 1610 struct maple_enode *entry; 1611 struct maple_node *node; 1612 void __rcu **slots; 1613 1614 mt = mte_node_type(mas->node); 1615 node = mas_mn(mas); 1616 slots = ma_slots(node, mt); 1617 pivots = ma_pivots(node, mt); 1618 end = ma_data_end(node, mt, pivots, mas->max); 1619 for (offset = mas->offset; offset <= end; offset++) { 1620 entry = mas_slot_locked(mas, slots, offset); 1621 if (mte_parent(entry) == node) { 1622 *child = *mas; 1623 mas->offset = offset + 1; 1624 child->offset = offset; 1625 mas_descend(child); 1626 child->offset = 0; 1627 return true; 1628 } 1629 } 1630 return false; 1631 } 1632 1633 /* 1634 * mas_leaf_set_meta() - Set the metadata of a leaf if possible. 1635 * @node: The maple node 1636 * @mt: The maple type 1637 * @end: The node end 1638 */ 1639 static inline void mas_leaf_set_meta(struct maple_node *node, 1640 enum maple_type mt, unsigned char end) 1641 { 1642 if (end < mt_slots[mt] - 1) 1643 ma_set_meta(node, mt, 0, end); 1644 } 1645 1646 /* 1647 * mas_prev_sibling() - Find the previous node with the same parent. 1648 * @mas: the maple state 1649 * 1650 * Return: True if there is a previous sibling, false otherwise. 1651 */ 1652 static inline bool mas_prev_sibling(struct ma_state *mas) 1653 { 1654 unsigned int p_slot = mte_parent_slot(mas->node); 1655 1656 /* For root node, p_slot is set to 0 by mte_parent_slot(). */ 1657 if (!p_slot) 1658 return false; 1659 1660 mas_ascend(mas); 1661 mas->offset = p_slot - 1; 1662 mas_descend(mas); 1663 return true; 1664 } 1665 1666 /* 1667 * mas_next_sibling() - Find the next node with the same parent. 1668 * @mas: the maple state 1669 * 1670 * Return: true if there is a next sibling, false otherwise. 1671 */ 1672 static inline bool mas_next_sibling(struct ma_state *mas) 1673 { 1674 MA_STATE(parent, mas->tree, mas->index, mas->last); 1675 1676 if (mte_is_root(mas->node)) 1677 return false; 1678 1679 parent = *mas; 1680 mas_ascend(&parent); 1681 parent.offset = mte_parent_slot(mas->node) + 1; 1682 if (parent.offset > mas_data_end(&parent)) 1683 return false; 1684 1685 *mas = parent; 1686 mas_descend(mas); 1687 return true; 1688 } 1689 1690 /* 1691 * mas_wr_node_walk() - Find the correct offset for the index in the @mas. 1692 * If @mas->index cannot be found within the containing 1693 * node, we traverse to the last entry in the node. 1694 * @wr_mas: The maple write state 1695 * 1696 * Uses mas_slot_locked() and does not need to worry about dead nodes. 1697 */ 1698 static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) 1699 { 1700 struct ma_state *mas = wr_mas->mas; 1701 unsigned char count, offset; 1702 1703 if (unlikely(ma_is_dense(wr_mas->type))) { 1704 wr_mas->r_max = wr_mas->r_min = mas->index; 1705 mas->offset = mas->index = mas->min; 1706 return; 1707 } 1708 1709 wr_mas->node = mas_mn(wr_mas->mas); 1710 wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type); 1711 count = mas->end = ma_data_end(wr_mas->node, wr_mas->type, 1712 wr_mas->pivots, mas->max); 1713 offset = mas->offset; 1714 1715 while (offset < count && mas->index > wr_mas->pivots[offset]) 1716 offset++; 1717 1718 wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max; 1719 wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset); 1720 wr_mas->offset_end = mas->offset = offset; 1721 } 1722 1723 static inline void rebalance_sib(struct ma_state *parent, struct ma_state *sib) 1724 { 1725 *sib = *parent; 1726 /* Prioritize move right to pull data left */ 1727 if (sib->offset < sib->end) 1728 sib->offset++; 1729 else 1730 sib->offset--; 1731 1732 mas_descend(sib); 1733 sib->end = mas_data_end(sib); 1734 } 1735 1736 static inline 1737 void spanning_sib(struct ma_wr_state *l_wr_mas, 1738 struct ma_wr_state *r_wr_mas, struct ma_state *nneighbour) 1739 { 1740 struct ma_state l_tmp = *l_wr_mas->mas; 1741 struct ma_state r_tmp = *r_wr_mas->mas; 1742 unsigned char depth = 0; 1743 1744 do { 1745 mas_ascend(&r_tmp); 1746 mas_ascend(&l_tmp); 1747 depth++; 1748 if (r_tmp.offset < mas_data_end(&r_tmp)) { 1749 r_tmp.offset++; 1750 mas_descend(&r_tmp); 1751 r_tmp.offset = 0; 1752 while (--depth) 1753 mas_descend(&r_tmp); 1754 1755 r_tmp.end = mas_data_end(&r_tmp); 1756 *nneighbour = r_tmp; 1757 return; 1758 } else if (l_tmp.offset) { 1759 l_tmp.offset--; 1760 do { 1761 mas_descend(&l_tmp); 1762 l_tmp.offset = mas_data_end(&l_tmp); 1763 } while (--depth); 1764 1765 l_tmp.end = l_tmp.offset; 1766 *nneighbour = l_tmp; 1767 return; 1768 } 1769 } while (!mte_is_root(r_tmp.node)); 1770 1771 WARN_ON_ONCE(1); 1772 } 1773 1774 /* 1775 * mas_topiary_node() - Dispose of a single node 1776 * @mas: The maple state for pushing nodes 1777 * @in_rcu: If the tree is in rcu mode 1778 * 1779 * The node will either be RCU freed or pushed back on the maple state. 1780 */ 1781 static inline void mas_topiary_node(struct ma_state *mas, 1782 struct ma_state *tmp_mas, bool in_rcu) 1783 { 1784 struct maple_node *tmp; 1785 struct maple_enode *enode; 1786 1787 if (mas_is_none(tmp_mas)) 1788 return; 1789 1790 enode = tmp_mas->node; 1791 tmp = mte_to_node(enode); 1792 mte_set_node_dead(enode); 1793 ma_free_rcu(tmp); 1794 } 1795 1796 /* 1797 * mas_topiary_replace() - Replace the data with new data, then repair the 1798 * parent links within the new tree. Iterate over the dead sub-tree and collect 1799 * the dead subtrees and topiary the nodes that are no longer of use. 1800 * 1801 * The new tree will have up to three children with the correct parent. Keep 1802 * track of the new entries as they need to be followed to find the next level 1803 * of new entries. 1804 * 1805 * The old tree will have up to three children with the old parent. Keep track 1806 * of the old entries as they may have more nodes below replaced. Nodes within 1807 * [index, last] are dead subtrees, others need to be freed and followed. 1808 * 1809 * @mas: The maple state pointing at the new data 1810 * @old_enode: The maple encoded node being replaced 1811 * @new_height: The new height of the tree as a result of the operation 1812 * 1813 */ 1814 static inline void mas_topiary_replace(struct ma_state *mas, 1815 struct maple_enode *old_enode, unsigned char new_height) 1816 { 1817 struct ma_state tmp[3], tmp_next[3]; 1818 MA_TOPIARY(subtrees, mas->tree); 1819 bool in_rcu; 1820 int i, n; 1821 1822 /* Place data in tree & then mark node as old */ 1823 mas_put_in_tree(mas, old_enode, new_height); 1824 1825 /* Update the parent pointers in the tree */ 1826 tmp[0] = *mas; 1827 tmp[0].offset = 0; 1828 tmp[1].status = ma_none; 1829 tmp[2].status = ma_none; 1830 while (!mte_is_leaf(tmp[0].node)) { 1831 n = 0; 1832 for (i = 0; i < 3; i++) { 1833 if (mas_is_none(&tmp[i])) 1834 continue; 1835 1836 while (n < 3) { 1837 if (!mas_find_child(&tmp[i], &tmp_next[n])) 1838 break; 1839 n++; 1840 } 1841 1842 mas_adopt_children(&tmp[i], tmp[i].node); 1843 } 1844 1845 if (MAS_WARN_ON(mas, n == 0)) 1846 break; 1847 1848 while (n < 3) 1849 tmp_next[n++].status = ma_none; 1850 1851 for (i = 0; i < 3; i++) 1852 tmp[i] = tmp_next[i]; 1853 } 1854 1855 /* Collect the old nodes that need to be discarded */ 1856 if (mte_is_leaf(old_enode)) 1857 return mas_free(mas, old_enode); 1858 1859 tmp[0] = *mas; 1860 tmp[0].offset = 0; 1861 tmp[0].node = old_enode; 1862 tmp[1].status = ma_none; 1863 tmp[2].status = ma_none; 1864 in_rcu = mt_in_rcu(mas->tree); 1865 do { 1866 n = 0; 1867 for (i = 0; i < 3; i++) { 1868 if (mas_is_none(&tmp[i])) 1869 continue; 1870 1871 while (n < 3) { 1872 if (!mas_find_child(&tmp[i], &tmp_next[n])) 1873 break; 1874 1875 if ((tmp_next[n].min >= tmp_next->index) && 1876 (tmp_next[n].max <= tmp_next->last)) { 1877 mat_add(&subtrees, tmp_next[n].node); 1878 tmp_next[n].status = ma_none; 1879 } else { 1880 n++; 1881 } 1882 } 1883 } 1884 1885 if (MAS_WARN_ON(mas, n == 0)) 1886 break; 1887 1888 while (n < 3) 1889 tmp_next[n++].status = ma_none; 1890 1891 for (i = 0; i < 3; i++) { 1892 mas_topiary_node(mas, &tmp[i], in_rcu); 1893 tmp[i] = tmp_next[i]; 1894 } 1895 } while (!mte_is_leaf(tmp[0].node)); 1896 1897 for (i = 0; i < 3; i++) 1898 mas_topiary_node(mas, &tmp[i], in_rcu); 1899 1900 mas_mat_destroy(mas, &subtrees); 1901 } 1902 1903 /* 1904 * node_copy() - Copy from one node to another. 1905 * 1906 * @mas: The maple state 1907 * @src: The source node 1908 * @start: The offset into the src to start copying 1909 * @size: The size to copy (non-zero) 1910 * @s_max: The source node max 1911 * @s_mt: The source maple node type 1912 * @dst: The destination 1913 * @d_start: The start location in the destination node 1914 * @d_mt: The destination maple node type 1915 */ 1916 static inline 1917 unsigned long node_copy(struct ma_state *mas, struct maple_node *src, 1918 unsigned char start, unsigned char size, unsigned long s_max, 1919 enum maple_type s_mt, struct maple_node *dst, unsigned char d_start, 1920 enum maple_type d_mt) 1921 { 1922 unsigned long *s_pivots, *d_pivots; 1923 void __rcu **s_slots, **d_slots; 1924 unsigned long *s_gaps, *d_gaps; 1925 unsigned long d_max; 1926 1927 d_slots = ma_slots(dst, d_mt) + d_start; 1928 d_pivots = ma_pivots(dst, d_mt) + d_start; 1929 s_slots = ma_slots(src, s_mt) + start; 1930 s_pivots = ma_pivots(src, s_mt) + start; 1931 memcpy(d_slots, s_slots, size * sizeof(void __rcu *)); 1932 if (!ma_is_leaf(d_mt) && s_mt == maple_copy) { 1933 struct maple_enode *edst = mt_mk_node(dst, d_mt); 1934 1935 1936 for (int i = 0; i < size; i++) 1937 mas_set_parent(mas, 1938 mt_slot_locked(mas->tree, d_slots, i), 1939 edst, d_start + i); 1940 } 1941 1942 d_gaps = ma_gaps(dst, d_mt); 1943 if (d_gaps) { 1944 s_gaps = ma_gaps(src, s_mt) + start; 1945 d_gaps += d_start; 1946 memcpy(d_gaps, s_gaps, size * sizeof(unsigned long)); 1947 } 1948 1949 if (start + size - 1 < mt_pivots[s_mt]) 1950 d_max = s_pivots[size - 1]; 1951 else 1952 d_max = s_max; 1953 1954 if (d_start + size <= mt_pivots[d_mt]) 1955 d_pivots[size - 1] = d_max; 1956 1957 size--; 1958 if (size) 1959 memcpy(d_pivots, s_pivots, size * sizeof(unsigned long)); 1960 1961 return d_max; 1962 } 1963 1964 /* 1965 * node_finalise() - Zero out unused area and populate metadata 1966 * @node: The maple node 1967 * @mt: The maple node type 1968 * @end: The end of the used area 1969 */ 1970 static inline 1971 void node_finalise(struct maple_node *node, enum maple_type mt, 1972 unsigned char end) 1973 { 1974 unsigned char max_end = mt_slots[mt]; 1975 unsigned char size; 1976 unsigned long *gaps; 1977 unsigned char gap_slot; 1978 1979 gaps = ma_gaps(node, mt); 1980 if (end < max_end - 1) { 1981 size = max_end - end; 1982 memset(ma_slots(node, mt) + end, 0, size * sizeof(void *)); 1983 1984 if (gaps) 1985 memset(gaps + end, 0, size * sizeof(unsigned long)); 1986 1987 if (--size) 1988 memset(ma_pivots(node, mt) + end, 0, size * sizeof(unsigned long)); 1989 } 1990 1991 gap_slot = 0; 1992 if (gaps && !ma_is_leaf(mt)) { 1993 unsigned long max_gap; 1994 1995 max_gap = 0; 1996 for (int i = 0; i <= end; i++) 1997 if (gaps[i] > max_gap) { 1998 gap_slot = i; 1999 max_gap = gaps[i]; 2000 } 2001 } 2002 2003 if (mt == maple_arange_64) 2004 ma_set_meta(node, mt, gap_slot, end - 1); 2005 else if (end <= max_end - 1) 2006 ma_set_meta(node, mt, gap_slot, end - 1); 2007 } 2008 2009 static inline void *mtree_range_walk(struct ma_state *mas) 2010 { 2011 unsigned long *pivots; 2012 unsigned char offset; 2013 struct maple_node *node; 2014 struct maple_enode *next, *last; 2015 enum maple_type type; 2016 void __rcu **slots; 2017 unsigned char end; 2018 unsigned long max, min; 2019 unsigned long prev_max, prev_min; 2020 2021 next = mas->node; 2022 min = mas->min; 2023 max = mas->max; 2024 do { 2025 last = next; 2026 node = mte_to_node(next); 2027 type = mte_node_type(next); 2028 pivots = ma_pivots(node, type); 2029 end = ma_data_end(node, type, pivots, max); 2030 prev_min = min; 2031 prev_max = max; 2032 if (pivots[0] >= mas->index) { 2033 offset = 0; 2034 max = pivots[0]; 2035 goto next; 2036 } 2037 2038 offset = 1; 2039 while (offset < end) { 2040 if (pivots[offset] >= mas->index) { 2041 max = pivots[offset]; 2042 break; 2043 } 2044 offset++; 2045 } 2046 2047 min = pivots[offset - 1] + 1; 2048 next: 2049 slots = ma_slots(node, type); 2050 next = mt_slot(mas->tree, slots, offset); 2051 if (unlikely(ma_dead_node(node))) 2052 goto dead_node; 2053 } while (!ma_is_leaf(type)); 2054 2055 mas->end = end; 2056 mas->offset = offset; 2057 mas->index = min; 2058 mas->last = max; 2059 mas->min = prev_min; 2060 mas->max = prev_max; 2061 mas->node = last; 2062 return (void *)next; 2063 2064 dead_node: 2065 mas_reset(mas); 2066 return NULL; 2067 } 2068 2069 /* 2070 * mas_wmb_replace() - Write memory barrier and replace 2071 * @mas: The maple state 2072 * @cp: The maple copy node 2073 * 2074 * Updates gap as necessary. 2075 */ 2076 static inline void mas_wmb_replace(struct ma_state *mas, struct maple_copy *cp) 2077 { 2078 struct maple_enode *old_enode; 2079 2080 old_enode = mas->node; 2081 mas->node = mt_slot_locked(mas->tree, cp->slot, 0); 2082 /* Insert the new data in the tree */ 2083 mas_topiary_replace(mas, old_enode, cp->height); 2084 if (!mte_is_leaf(mas->node)) 2085 mas_update_gap(mas); 2086 2087 mtree_range_walk(mas); 2088 } 2089 2090 2091 /* 2092 * cp_leaf_init() - Initialize a maple_copy node for the leaf level of a 2093 * spanning store 2094 * @cp: The maple copy node 2095 * @mas: The maple state 2096 * @l_wr_mas: The left write state of the spanning store 2097 * @r_wr_mas: The right write state of the spanning store 2098 */ 2099 static inline void cp_leaf_init(struct maple_copy *cp, 2100 struct ma_state *mas, struct ma_wr_state *l_wr_mas, 2101 struct ma_wr_state *r_wr_mas) 2102 { 2103 unsigned char end = 0; 2104 2105 /* 2106 * WARNING: The use of RCU_INIT_POINTER() makes it extremely important 2107 * to not expose the maple_copy node to any readers. Exposure may 2108 * result in buggy code when a compiler reorders the instructions. 2109 */ 2110 2111 cp->height = 1; 2112 /* Create entries to insert including split entries to left and right */ 2113 if (l_wr_mas->r_min < mas->index) { 2114 end++; 2115 RCU_INIT_POINTER(cp->slot[0], l_wr_mas->content); 2116 cp->pivot[0] = mas->index - 1; 2117 } 2118 RCU_INIT_POINTER(cp->slot[end], l_wr_mas->entry); 2119 cp->pivot[end] = mas->last; 2120 2121 if (r_wr_mas->end_piv > mas->last) { 2122 end++; 2123 RCU_INIT_POINTER(cp->slot[end], 2124 r_wr_mas->slots[r_wr_mas->offset_end]); 2125 cp->pivot[end] = r_wr_mas->end_piv; 2126 } 2127 2128 cp->min = l_wr_mas->r_min; 2129 cp->max = cp->pivot[end]; 2130 cp->end = end; 2131 } 2132 2133 /* 2134 * cp_data_calc() - Calculate the size of the data (1 indexed). 2135 * @cp: The maple copy struct with the new data populated. 2136 * @l_wr_mas: The maple write state containing the data to the left of the write 2137 * @r_wr_mas: The maple write state containing the data to the right of the 2138 * write 2139 * 2140 * cp->data is a size (not indexed by 0). 2141 */ 2142 static inline void cp_data_calc(struct maple_copy *cp, 2143 struct ma_wr_state *l_wr_mas, struct ma_wr_state *r_wr_mas) 2144 { 2145 2146 /* Add 1 every time for the 0th element */ 2147 cp->data = l_wr_mas->mas->offset; 2148 /* Add the new data and any partial overwrites */ 2149 cp->data += cp->end + 1; 2150 /* Data from right (offset + 1 to end), +1 for zero */ 2151 cp->data += r_wr_mas->mas->end - r_wr_mas->offset_end; 2152 } 2153 2154 static bool data_fits(struct ma_state *sib, struct ma_state *mas, 2155 struct maple_copy *cp) 2156 { 2157 unsigned char new_data; 2158 enum maple_type type; 2159 unsigned char space; 2160 unsigned char end; 2161 2162 type = mte_node_type(mas->node); 2163 space = 2 * mt_slots[type]; 2164 end = sib->end; 2165 2166 new_data = end + 1 + cp->data; 2167 if (new_data > space) 2168 return false; 2169 2170 /* 2171 * This is off by one by design. The extra space is left to reduce 2172 * jitter in operations that add then remove two entries. 2173 * 2174 * end is an index while new space and data are both sizes. Adding one 2175 * to end to convert the index to a size means that the below 2176 * calculation should be <=, but we want to keep an extra space in nodes 2177 * to reduce jitter. 2178 * 2179 * Note that it is still possible to get a full node on the left by the 2180 * NULL landing exactly on the split. The NULL ending of a node happens 2181 * in the dst_setup() function, where we will either increase the split 2182 * by one or decrease it by one, if possible. In the case of split 2183 * (this case), it is always possible to shift the spilt by one - again 2184 * because there is at least one slot free by the below checking. 2185 */ 2186 if (new_data < space) 2187 return true; 2188 2189 return false; 2190 } 2191 2192 static inline void push_data_sib(struct maple_copy *cp, struct ma_state *mas, 2193 struct ma_state *sib, struct ma_state *parent) 2194 { 2195 2196 if (mte_is_root(mas->node)) 2197 goto no_push; 2198 2199 2200 *sib = *parent; 2201 if (sib->offset) { 2202 sib->offset--; 2203 mas_descend(sib); 2204 sib->end = mas_data_end(sib); 2205 if (data_fits(sib, mas, cp)) /* Push left */ 2206 return; 2207 2208 *sib = *parent; 2209 } 2210 2211 if (sib->offset >= sib->end) 2212 goto no_push; 2213 2214 sib->offset++; 2215 mas_descend(sib); 2216 sib->end = mas_data_end(sib); 2217 if (data_fits(sib, mas, cp)) /* Push right*/ 2218 return; 2219 2220 no_push: 2221 sib->end = 0; 2222 } 2223 2224 /* 2225 * rebalance_data() - Calculate the @cp data, populate @sib if insufficient or 2226 * if the data can be pushed into a sibling. 2227 * @cp: The maple copy node 2228 * @wr_mas: The left write maple state 2229 * @sib: The maple state of the sibling. 2230 * 2231 * Note: @cp->data is a size and not indexed by 0. @sib->end may be set to 0 to 2232 * indicate it will not be used. 2233 * 2234 */ 2235 static inline void rebalance_data(struct maple_copy *cp, 2236 struct ma_wr_state *wr_mas, struct ma_state *sib, 2237 struct ma_state *parent) 2238 { 2239 cp_data_calc(cp, wr_mas, wr_mas); 2240 sib->end = 0; 2241 if (cp->data > mt_slots[wr_mas->type]) { 2242 push_data_sib(cp, wr_mas->mas, sib, parent); 2243 if (sib->end) 2244 goto use_sib; 2245 } else if (cp->data <= mt_min_slots[wr_mas->type]) { 2246 if ((wr_mas->mas->min != 0) || 2247 (wr_mas->mas->max != ULONG_MAX)) { 2248 rebalance_sib(parent, sib); 2249 goto use_sib; 2250 } 2251 } 2252 2253 return; 2254 2255 use_sib: 2256 2257 cp->data += sib->end + 1; 2258 } 2259 2260 /* 2261 * spanning_data() - Calculate the @cp data and populate @sib if insufficient 2262 * @cp: The maple copy node 2263 * @l_wr_mas: The left write maple state 2264 * @r_wr_mas: The right write maple state 2265 * @sib: The maple state of the sibling. 2266 * 2267 * Note: @cp->data is a size and not indexed by 0. @sib->end may be set to 0 to 2268 * indicate it will not be used. 2269 */ 2270 static inline void spanning_data(struct maple_copy *cp, 2271 struct ma_wr_state *l_wr_mas, struct ma_wr_state *r_wr_mas, 2272 struct ma_state *sib) 2273 { 2274 cp_data_calc(cp, l_wr_mas, r_wr_mas); 2275 if (((l_wr_mas->mas->min != 0) || (r_wr_mas->mas->max != ULONG_MAX)) && 2276 (cp->data <= mt_min_slots[l_wr_mas->type])) { 2277 spanning_sib(l_wr_mas, r_wr_mas, sib); 2278 cp->data += sib->end + 1; 2279 } else { 2280 sib->end = 0; 2281 } 2282 } 2283 2284 /* 2285 * dst_setup() - Set up one or more destinations for the new data. 2286 * @cp: The maple copy node 2287 * @mas: The maple state 2288 * @mt: The source node type 2289 */ 2290 static inline 2291 void dst_setup(struct maple_copy *cp, struct ma_state *mas, enum maple_type mt) 2292 { 2293 /* Data is 1 indexed, every src has +1 added. */ 2294 2295 if (cp->data <= mt_slots[mt]) { 2296 cp->split = cp->data - 1; 2297 cp->d_count = 1; 2298 goto node_setup; 2299 } 2300 2301 cp->split = (cp->data - 1) / 2; 2302 cp->d_count = 2; 2303 if (cp->data < mt_slots[mt] * 2) 2304 goto node_setup; 2305 2306 if (cp->data == mt_slots[mt] * 2) { 2307 unsigned char off; 2308 unsigned char s; 2309 2310 if (!ma_is_leaf(mt)) 2311 goto node_setup; 2312 2313 /* 2314 * Leaf nodes are a bit tricky because we cannot assume the data 2315 * can fit due to the NULL limitation on node ends. 2316 */ 2317 off = cp->split; 2318 for (s = 0; s < cp->s_count; s++) { 2319 unsigned char s_off; 2320 2321 s_off = cp->src[s].end - cp->src[s].start; 2322 if (s_off >= off) 2323 break; 2324 2325 s_off++; 2326 off -= s_off; 2327 } 2328 2329 off += cp->src[s].start; 2330 if (ma_slots(cp->src[s].node, cp->src[s].mt)[off]) 2331 goto node_setup; 2332 2333 cp->split++; 2334 if (cp->split < mt_slots[mt]) 2335 goto node_setup; 2336 2337 cp->split -= 2; 2338 if (cp->data - 2 - cp->split < mt_slots[mt]) 2339 goto node_setup; 2340 2341 } 2342 2343 /* No other choice but to 3-way split the data */ 2344 cp->split = (cp->data + 2) / 3; 2345 cp->d_count = 3; 2346 2347 node_setup: 2348 for (int i = 0; i < cp->d_count; i++) { 2349 cp->dst[i].mt = mt; 2350 cp->dst[i].node = ma_mnode_ptr(mas_pop_node(mas)); 2351 } 2352 } 2353 2354 static inline void append_mas_cp(struct maple_copy *cp, 2355 struct ma_state *mas, unsigned char start, unsigned char end) 2356 { 2357 struct maple_node *node; 2358 enum maple_type mt; 2359 unsigned char count; 2360 2361 count = cp->s_count; 2362 node = mas_mn(mas); 2363 mt = mte_node_type(mas->node); 2364 cp->src[count].node = node; 2365 cp->src[count].mt = mt; 2366 if (mas->end <= end) 2367 cp->src[count].max = mas->max; 2368 else 2369 cp->src[count].max = ma_pivots(node, mt)[end]; 2370 2371 cp->src[count].start = start; 2372 cp->src[count].end = end; 2373 cp->s_count++; 2374 } 2375 2376 static inline void append_wr_mas_cp(struct maple_copy *cp, 2377 struct ma_wr_state *wr_mas, unsigned char start, unsigned char end) 2378 { 2379 unsigned char count; 2380 2381 count = cp->s_count; 2382 cp->src[count].node = wr_mas->node; 2383 cp->src[count].mt = wr_mas->type; 2384 if (wr_mas->mas->end <= end) 2385 cp->src[count].max = wr_mas->mas->max; 2386 else 2387 cp->src[count].max = wr_mas->pivots[end]; 2388 2389 cp->src[count].start = start; 2390 cp->src[count].end = end; 2391 cp->s_count++; 2392 } 2393 2394 static inline void init_cp_src(struct maple_copy *cp) 2395 { 2396 cp->src[cp->s_count].node = ma_mnode_ptr(cp); 2397 cp->src[cp->s_count].mt = maple_copy; 2398 cp->src[cp->s_count].max = cp->max; 2399 cp->src[cp->s_count].start = 0; 2400 cp->src[cp->s_count].end = cp->end; 2401 cp->s_count++; 2402 } 2403 2404 /* 2405 * multi_src_setup() - Set the @cp node up with multiple sources to copy from. 2406 * @cp: The maple copy node 2407 * @l_wr_mas: The left write maple state 2408 * @r_wr_mas: The right write maple state 2409 * @sib: The sibling maple state 2410 * 2411 * Note: @sib->end == 0 indicates no sibling will be used. 2412 */ 2413 static inline 2414 void multi_src_setup(struct maple_copy *cp, struct ma_wr_state *l_wr_mas, 2415 struct ma_wr_state *r_wr_mas, struct ma_state *sib) 2416 { 2417 cp->s_count = 0; 2418 if (sib->end && sib->max < l_wr_mas->mas->min) 2419 append_mas_cp(cp, sib, 0, sib->end); 2420 2421 /* Copy left 0 - offset */ 2422 if (l_wr_mas->mas->offset) { 2423 unsigned char off = l_wr_mas->mas->offset - 1; 2424 2425 append_wr_mas_cp(cp, l_wr_mas, 0, off); 2426 cp->src[cp->s_count - 1].max = cp->min - 1; 2427 } 2428 2429 init_cp_src(cp); 2430 2431 /* Copy right either from offset or offset + 1 pending on r_max */ 2432 if (r_wr_mas->mas->end != r_wr_mas->offset_end) 2433 append_wr_mas_cp(cp, r_wr_mas, r_wr_mas->offset_end + 1, 2434 r_wr_mas->mas->end); 2435 2436 if (sib->end && sib->min > r_wr_mas->mas->max) 2437 append_mas_cp(cp, sib, 0, sib->end); 2438 } 2439 2440 static inline 2441 void cp_data_write(struct maple_copy *cp, struct ma_state *mas) 2442 { 2443 struct maple_node *dst, *src; 2444 unsigned char s, d; 2445 unsigned char dst_offset; 2446 unsigned char data_offset; 2447 unsigned char src_end, s_offset; 2448 unsigned char split; 2449 unsigned long s_max, d_max; 2450 unsigned char dst_size; 2451 enum maple_type s_mt, d_mt; 2452 2453 data_offset = 0; 2454 s = d = 0; 2455 /* Readability help */ 2456 src = cp->src[s].node; 2457 dst = cp->dst[d].node; 2458 s_offset = cp->src[s].start; 2459 src_end = cp->src[s].end; 2460 split = cp->split; 2461 s_max = cp->src[s].max; 2462 s_mt = cp->src[s].mt; 2463 d_mt = cp->dst[d].mt; 2464 do { 2465 dst_offset = 0; 2466 d_max = 0; 2467 dst = cp->dst[d].node; 2468 d_mt = cp->dst[d].mt; 2469 dst_size = split + 1; 2470 2471 while (dst_size) { 2472 unsigned char size; 2473 2474 if (src_end - s_offset + 1 < dst_size) 2475 size = src_end - s_offset + 1; 2476 else 2477 size = dst_size; 2478 2479 d_max = node_copy(mas, src, s_offset, size, s_max, s_mt, 2480 dst, dst_offset, d_mt); 2481 2482 dst_offset += size; 2483 s_offset += size; 2484 if (s_offset > src_end) { 2485 /* This source is exhausted */ 2486 s++; 2487 if (s >= cp->s_count) { 2488 cp->dst[d].max = d_max; 2489 node_finalise(dst, d_mt, dst_offset); 2490 return; 2491 } 2492 /* Reset local src */ 2493 src = cp->src[s].node; 2494 s_offset = cp->src[s].start; 2495 src_end = cp->src[s].end; 2496 s_max = cp->src[s].max; 2497 s_mt = cp->src[s].mt; 2498 } 2499 2500 dst_size -= size; 2501 data_offset += size; 2502 } 2503 2504 split = cp->split; 2505 cp->dst[d].max = d_max; 2506 /* Handle null entries */ 2507 if (cp->dst[d].max != ULONG_MAX && 2508 !ma_slots(dst, d_mt)[dst_offset - 1]) { 2509 if (s_offset == cp->src[s].start) { 2510 s--; 2511 src = cp->src[s].node; 2512 src_end = cp->src[s].end; 2513 s_max = cp->src[s].max; 2514 s_mt = cp->src[s].mt; 2515 s_offset = src_end; 2516 } else { 2517 s_offset--; 2518 } 2519 /* Set dst max and clear pivot */ 2520 split++; 2521 data_offset--; 2522 dst_offset--; 2523 cp->dst[d].max = ma_pivots(dst, d_mt)[dst_offset - 1]; 2524 } 2525 2526 node_finalise(dst, d_mt, dst_offset); 2527 ++d; /* Next destination */ 2528 if (d == cp->d_count - 1) 2529 split = cp->data - data_offset; 2530 2531 if (d >= cp->d_count) { 2532 WARN_ON(data_offset < cp->data); 2533 return; 2534 } 2535 2536 } while (data_offset <= cp->data); 2537 } 2538 2539 /* 2540 * cp_dst_to_slots() - Migrate the maple copy destination to the maple copy 2541 * slots 2542 * @cp: The maple copy node 2543 * @min: The minimal value represented 2544 * @max: The maximum value represented 2545 * @mas: The maple state 2546 */ 2547 static inline void cp_dst_to_slots(struct maple_copy *cp, unsigned long min, 2548 unsigned long max, struct ma_state *mas) 2549 { 2550 unsigned char d; 2551 unsigned long slot_min = min; 2552 2553 for (d = 0; d < cp->d_count; d++) { 2554 struct maple_node *mn = cp->dst[d].node; 2555 enum maple_type mt = cp->dst[d].mt; 2556 unsigned long slot_max = cp->dst[d].max; 2557 2558 /* 2559 * Warning, see cp_leaf_init() comment and rcu_assign_pointer() 2560 * documentation. Since these are new nodes, there are no 2561 * read-side operations that can view them until they are 2562 * inserted into the tree after an rcu_assign_pointer() call. 2563 */ 2564 ma_init_slot(&cp->slot[d], mn, mt); 2565 cp->pivot[d] = slot_max; 2566 if (mt_is_alloc(mas->tree)) { 2567 if (ma_is_leaf(mt)) { 2568 cp->gap[d] = ma_leaf_max_gap(mn, mt, slot_min, 2569 slot_max, ma_pivots(mn, mt), 2570 ma_slots(mn, mt)); 2571 } else { 2572 unsigned long *gaps = ma_gaps(mn, mt); 2573 2574 if (gaps) { 2575 unsigned char gap_slot; 2576 2577 gap_slot = ma_meta_gap(mn); 2578 cp->gap[d] = gaps[gap_slot]; 2579 } 2580 } 2581 } 2582 slot_min = slot_max + 1; 2583 } 2584 2585 cp->end = cp->d_count - 1; 2586 cp->min = min; 2587 cp->max = max; 2588 } 2589 2590 static inline bool cp_is_new_root(struct maple_copy *cp, struct ma_state *mas) 2591 { 2592 if (cp->min || cp->max != ULONG_MAX) 2593 return false; 2594 2595 if (cp->d_count != 1) { 2596 enum maple_type mt = maple_arange_64; 2597 2598 if (!mt_is_alloc(mas->tree)) 2599 mt = maple_range_64; 2600 2601 cp->data = cp->d_count; 2602 cp->s_count = 0; 2603 dst_setup(cp, mas, mt); 2604 init_cp_src(cp); 2605 node_copy(mas, cp->src[0].node, 0, cp->data, cp->max, maple_copy, 2606 cp->dst[0].node, 0, mt); 2607 node_finalise(cp->dst[0].node, mt, cp->end + 1); 2608 /* 2609 * Warning, see cp_leaf_init() comment and rcu_assign_pointer() 2610 * documentation. Since this is a new root, there are no 2611 * read-side operations that can view it until it is insert into 2612 * the tree after an rcu_assign_pointer() call. 2613 */ 2614 ma_init_slot(&cp->slot[0], cp->dst[0].node, mt); 2615 cp->height++; 2616 } 2617 WARN_ON_ONCE(cp->dst[0].node != mte_to_node( 2618 mt_slot_locked(mas->tree, cp->slot, 0))); 2619 cp->dst[0].node->parent = ma_parent_ptr(mas_tree_parent(mas)); 2620 mas->min = 0; 2621 mas->max = ULONG_MAX; 2622 mas->depth = 0; 2623 mas->node = mas_root_locked(mas); 2624 return true; 2625 } 2626 2627 static inline bool cp_converged(struct maple_copy *cp, struct ma_state *mas, 2628 struct ma_state *sib) 2629 { 2630 if (cp->d_count != 1 || sib->end) 2631 return false; 2632 2633 cp->dst[0].node->parent = ma_parent_ptr(mas_mn(mas)->parent); 2634 return true; 2635 } 2636 2637 /* 2638 * spanning_ascend() - See if a spanning store operation has to keep walking up 2639 * the tree 2640 * @cp: The maple_copy node 2641 * @l_wr_mas: The left maple write state 2642 * @r_wr_mas: The right maple write state 2643 * @sib: the maple state of the sibling 2644 * 2645 * Returns: True if another iteration is necessary. 2646 */ 2647 static bool spanning_ascend(struct maple_copy *cp, struct ma_state *mas, 2648 struct ma_wr_state *l_wr_mas, struct ma_wr_state *r_wr_mas, 2649 struct ma_state *sib) 2650 { 2651 if (sib->end) { 2652 if (sib->max < l_wr_mas->mas->min) 2653 *l_wr_mas->mas = *sib; 2654 else 2655 *r_wr_mas->mas = *sib; 2656 } 2657 2658 cp_dst_to_slots(cp, l_wr_mas->mas->min, r_wr_mas->mas->max, mas); 2659 if (cp_is_new_root(cp, mas)) 2660 return false; 2661 2662 /* Converged and has a single destination */ 2663 if ((cp->d_count == 1) && 2664 (l_wr_mas->mas->node == r_wr_mas->mas->node)) { 2665 cp->dst[0].node->parent = ma_parent_ptr(mas_mn(mas)->parent); 2666 return false; 2667 } 2668 2669 cp->height++; 2670 wr_mas_ascend(l_wr_mas); 2671 wr_mas_ascend(r_wr_mas); 2672 return true; 2673 } 2674 2675 static inline 2676 void copy_tree_location(const struct ma_state *src, struct ma_state *dst) 2677 { 2678 dst->node = src->node; 2679 dst->offset = src->offset; 2680 dst->min = src->min; 2681 dst->max = src->max; 2682 dst->end = src->end; 2683 dst->depth = src->depth; 2684 } 2685 2686 /* 2687 * rebalance_ascend() - Ascend the tree and set up for the next loop - if 2688 * necessary 2689 * 2690 * Return: True if there another rebalancing operation on the next level is 2691 * needed, false otherwise. 2692 */ 2693 static inline bool rebalance_ascend(struct maple_copy *cp, 2694 struct ma_wr_state *wr_mas, struct ma_state *sib, 2695 struct ma_state *parent) 2696 { 2697 struct ma_state *mas; 2698 unsigned long min, max; 2699 2700 mas = wr_mas->mas; 2701 if (!sib->end) { 2702 min = mas->min; 2703 max = mas->max; 2704 } else if (sib->min > mas->max) { /* Move right succeeded */ 2705 min = mas->min; 2706 max = sib->max; 2707 wr_mas->offset_end = parent->offset + 1; 2708 } else { 2709 min = sib->min; 2710 max = mas->max; 2711 wr_mas->offset_end = parent->offset; 2712 parent->offset--; 2713 } 2714 2715 cp_dst_to_slots(cp, min, max, mas); 2716 if (cp_is_new_root(cp, mas)) 2717 return false; 2718 2719 if (cp_converged(cp, mas, sib)) 2720 return false; 2721 2722 cp->height++; 2723 copy_tree_location(parent, mas); 2724 wr_mas_setup(wr_mas, mas); 2725 return true; 2726 } 2727 2728 /* 2729 * mas_root_expand() - Expand a root to a node 2730 * @mas: The maple state 2731 * @entry: The entry to store into the tree 2732 */ 2733 static inline void mas_root_expand(struct ma_state *mas, void *entry) 2734 { 2735 void *contents = mas_root_locked(mas); 2736 enum maple_type type = maple_leaf_64; 2737 struct maple_node *node; 2738 void __rcu **slots; 2739 unsigned long *pivots; 2740 int slot = 0; 2741 2742 node = mas_pop_node(mas); 2743 pivots = ma_pivots(node, type); 2744 slots = ma_slots(node, type); 2745 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 2746 mas->node = mt_mk_node(node, type); 2747 mas->status = ma_active; 2748 2749 if (mas->index) { 2750 if (contents) { 2751 rcu_assign_pointer(slots[slot], contents); 2752 if (likely(mas->index > 1)) 2753 slot++; 2754 } 2755 pivots[slot++] = mas->index - 1; 2756 } 2757 2758 rcu_assign_pointer(slots[slot], entry); 2759 mas->offset = slot; 2760 pivots[slot] = mas->last; 2761 if (mas->last != ULONG_MAX) 2762 pivots[++slot] = ULONG_MAX; 2763 2764 mt_set_height(mas->tree, 1); 2765 ma_set_meta(node, maple_leaf_64, 0, slot); 2766 /* swap the new root into the tree */ 2767 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 2768 } 2769 2770 /* 2771 * mas_store_root() - Storing value into root. 2772 * @mas: The maple state 2773 * @entry: The entry to store. 2774 * 2775 * There is no root node now and we are storing a value into the root - this 2776 * function either assigns the pointer or expands into a node. 2777 */ 2778 static inline void mas_store_root(struct ma_state *mas, void *entry) 2779 { 2780 if (!entry) { 2781 if (!mas->index) 2782 rcu_assign_pointer(mas->tree->ma_root, NULL); 2783 } else if (likely((mas->last != 0) || (mas->index != 0))) 2784 mas_root_expand(mas, entry); 2785 else if (((unsigned long) (entry) & 3) == 2) 2786 mas_root_expand(mas, entry); 2787 else { 2788 rcu_assign_pointer(mas->tree->ma_root, entry); 2789 mas->status = ma_start; 2790 } 2791 } 2792 2793 /* 2794 * mas_is_span_wr() - Check if the write needs to be treated as a write that 2795 * spans the node. 2796 * @wr_mas: The maple write state 2797 * 2798 * Spanning writes are writes that start in one node and end in another OR if 2799 * the write of a %NULL will cause the node to end with a %NULL. 2800 * 2801 * Return: True if this is a spanning write, false otherwise. 2802 */ 2803 static bool mas_is_span_wr(struct ma_wr_state *wr_mas) 2804 { 2805 unsigned long max = wr_mas->r_max; 2806 unsigned long last = wr_mas->mas->last; 2807 enum maple_type type = wr_mas->type; 2808 void *entry = wr_mas->entry; 2809 2810 /* Contained in this pivot, fast path */ 2811 if (last < max) 2812 return false; 2813 2814 if (ma_is_leaf(type)) { 2815 max = wr_mas->mas->max; 2816 if (last < max) 2817 return false; 2818 } 2819 2820 if (last == max) { 2821 /* 2822 * The last entry of leaf node cannot be NULL unless it is the 2823 * rightmost node (writing ULONG_MAX), otherwise it spans slots. 2824 */ 2825 if (entry || last == ULONG_MAX) 2826 return false; 2827 } 2828 2829 trace_ma_write(TP_FCT, wr_mas->mas, wr_mas->r_max, entry); 2830 return true; 2831 } 2832 2833 static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas) 2834 { 2835 wr_mas->type = mte_node_type(wr_mas->mas->node); 2836 mas_wr_node_walk(wr_mas); 2837 wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type); 2838 } 2839 2840 static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas) 2841 { 2842 wr_mas->mas->max = wr_mas->r_max; 2843 wr_mas->mas->min = wr_mas->r_min; 2844 wr_mas->mas->node = wr_mas->content; 2845 wr_mas->mas->offset = 0; 2846 wr_mas->mas->depth++; 2847 } 2848 /* 2849 * mas_wr_walk() - Walk the tree for a write. 2850 * @wr_mas: The maple write state 2851 * 2852 * Uses mas_slot_locked() and does not need to worry about dead nodes. 2853 * 2854 * Return: True if it's contained in a node, false on spanning write. 2855 */ 2856 static bool mas_wr_walk(struct ma_wr_state *wr_mas) 2857 { 2858 struct ma_state *mas = wr_mas->mas; 2859 2860 while (true) { 2861 mas_wr_walk_descend(wr_mas); 2862 if (unlikely(mas_is_span_wr(wr_mas))) 2863 return false; 2864 2865 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 2866 mas->offset); 2867 if (ma_is_leaf(wr_mas->type)) 2868 return true; 2869 2870 if (mas->end < mt_slots[wr_mas->type] - 1) 2871 wr_mas->vacant_height = mas->depth + 1; 2872 2873 if (ma_is_root(mas_mn(mas))) { 2874 /* root needs more than 2 entries to be sufficient + 1 */ 2875 if (mas->end > 2) 2876 wr_mas->sufficient_height = 1; 2877 } else if (mas->end > mt_min_slots[wr_mas->type] + 1) 2878 wr_mas->sufficient_height = mas->depth + 1; 2879 2880 mas_wr_walk_traverse(wr_mas); 2881 } 2882 2883 return true; 2884 } 2885 2886 static void mas_wr_walk_index(struct ma_wr_state *wr_mas) 2887 { 2888 struct ma_state *mas = wr_mas->mas; 2889 2890 while (true) { 2891 mas_wr_walk_descend(wr_mas); 2892 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 2893 mas->offset); 2894 if (ma_is_leaf(wr_mas->type)) 2895 return; 2896 mas_wr_walk_traverse(wr_mas); 2897 } 2898 } 2899 /* 2900 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs. 2901 * @l_wr_mas: The left maple write state 2902 * @r_wr_mas: The right maple write state 2903 */ 2904 static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas, 2905 struct ma_wr_state *r_wr_mas) 2906 { 2907 struct ma_state *r_mas = r_wr_mas->mas; 2908 struct ma_state *l_mas = l_wr_mas->mas; 2909 unsigned char l_slot; 2910 2911 l_slot = l_mas->offset; 2912 if (!l_wr_mas->content) 2913 l_mas->index = l_wr_mas->r_min; 2914 2915 if ((l_mas->index == l_wr_mas->r_min) && 2916 (l_slot && 2917 !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) { 2918 if (l_slot > 1) 2919 l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1; 2920 else 2921 l_mas->index = l_mas->min; 2922 2923 l_mas->offset = l_slot - 1; 2924 l_wr_mas->r_min = l_mas->index; 2925 } 2926 2927 if (!r_wr_mas->content) { 2928 if (r_mas->last < r_wr_mas->r_max) 2929 r_mas->last = r_wr_mas->r_max; 2930 r_mas->offset++; 2931 } else if ((r_mas->last == r_wr_mas->r_max) && 2932 (r_mas->last < r_mas->max) && 2933 !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) { 2934 r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots, 2935 r_wr_mas->type, r_mas->offset + 1); 2936 r_mas->offset++; 2937 r_wr_mas->r_max = r_mas->last; 2938 } 2939 } 2940 2941 static inline void *mas_state_walk(struct ma_state *mas) 2942 { 2943 void *entry; 2944 2945 entry = mas_start(mas); 2946 if (mas_is_none(mas)) 2947 return NULL; 2948 2949 if (mas_is_ptr(mas)) 2950 return entry; 2951 2952 return mtree_range_walk(mas); 2953 } 2954 2955 /* 2956 * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up 2957 * to date. 2958 * 2959 * @mas: The maple state. 2960 * 2961 * Note: Leaves mas in undesirable state. 2962 * Return: The entry for @mas->index or %NULL on dead node. 2963 */ 2964 static inline void *mtree_lookup_walk(struct ma_state *mas) 2965 { 2966 unsigned long *pivots; 2967 unsigned char offset; 2968 struct maple_node *node; 2969 struct maple_enode *next; 2970 enum maple_type type; 2971 void __rcu **slots; 2972 unsigned char end; 2973 2974 next = mas->node; 2975 do { 2976 node = mte_to_node(next); 2977 type = mte_node_type(next); 2978 pivots = ma_pivots(node, type); 2979 end = mt_pivots[type]; 2980 offset = 0; 2981 do { 2982 if (pivots[offset] >= mas->index) 2983 break; 2984 } while (++offset < end); 2985 2986 slots = ma_slots(node, type); 2987 next = mt_slot(mas->tree, slots, offset); 2988 if (unlikely(ma_dead_node(node))) 2989 goto dead_node; 2990 } while (!ma_is_leaf(type)); 2991 2992 return (void *)next; 2993 2994 dead_node: 2995 mas_reset(mas); 2996 return NULL; 2997 } 2998 2999 static void mte_destroy_walk(struct maple_enode *, struct maple_tree *); 3000 /* 3001 * mas_new_root() - Create a new root node that only contains the entry passed 3002 * in. 3003 * @mas: The maple state 3004 * @entry: The entry to store. 3005 * 3006 * Only valid when the index == 0 and the last == ULONG_MAX 3007 */ 3008 static inline void mas_new_root(struct ma_state *mas, void *entry) 3009 { 3010 struct maple_enode *root = mas_root_locked(mas); 3011 enum maple_type type = maple_leaf_64; 3012 struct maple_node *node; 3013 void __rcu **slots; 3014 unsigned long *pivots; 3015 3016 WARN_ON_ONCE(mas->index || mas->last != ULONG_MAX); 3017 3018 if (!entry) { 3019 mt_set_height(mas->tree, 0); 3020 rcu_assign_pointer(mas->tree->ma_root, entry); 3021 mas->status = ma_start; 3022 goto done; 3023 } 3024 3025 node = mas_pop_node(mas); 3026 pivots = ma_pivots(node, type); 3027 slots = ma_slots(node, type); 3028 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3029 mas->node = mt_mk_node(node, type); 3030 mas->status = ma_active; 3031 rcu_assign_pointer(slots[0], entry); 3032 pivots[0] = mas->last; 3033 mt_set_height(mas->tree, 1); 3034 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3035 3036 done: 3037 if (xa_is_node(root)) 3038 mte_destroy_walk(root, mas->tree); 3039 } 3040 /* 3041 * mas_wr_spanning_store() - Create a subtree with the store operation completed 3042 * and new nodes where necessary, then place the sub-tree in the actual tree. 3043 * Note that mas is expected to point to the node which caused the store to 3044 * span. 3045 * @wr_mas: The maple write state 3046 */ 3047 static void mas_wr_spanning_store(struct ma_wr_state *wr_mas) 3048 { 3049 struct maple_copy cp; 3050 struct ma_state *mas; 3051 struct ma_state sib; 3052 3053 /* Left and Right side of spanning store */ 3054 MA_STATE(r_mas, NULL, 0, 0); 3055 MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry); 3056 3057 /* 3058 * A store operation that spans multiple nodes is called a spanning 3059 * store and is handled early in the store call stack by the function 3060 * mas_is_span_wr(). When a spanning store is identified, the maple 3061 * state is duplicated. The first maple state walks the left tree path 3062 * to ``index``, the duplicate walks the right tree path to ``last``. 3063 * The data in the two nodes are combined into a single node, two nodes, 3064 * or possibly three nodes (see the 3-way split above). A ``NULL`` 3065 * written to the last entry of a node is considered a spanning store as 3066 * a rebalance is required for the operation to complete and an overflow 3067 * of data may happen. 3068 */ 3069 mas = wr_mas->mas; 3070 trace_ma_op(TP_FCT, mas); 3071 3072 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 3073 return mas_new_root(mas, wr_mas->entry); 3074 /* 3075 * Node rebalancing may occur due to this store, so there may be three new 3076 * entries per level plus a new root. 3077 */ 3078 3079 /* 3080 * Set up right side. Need to get to the next offset after the spanning 3081 * store to ensure it's not NULL and to combine both the next node and 3082 * the node with the start together. 3083 */ 3084 r_mas = *mas; 3085 /* Avoid overflow, walk to next slot in the tree. */ 3086 if (r_mas.last + 1) 3087 r_mas.last++; 3088 3089 r_mas.index = r_mas.last; 3090 mas_wr_walk_index(&r_wr_mas); 3091 r_mas.last = r_mas.index = mas->last; 3092 r_wr_mas.end_piv = r_wr_mas.r_max; 3093 3094 /* Set up left side. */ 3095 mas_wr_walk_index(wr_mas); 3096 3097 if (!wr_mas->entry) { 3098 mas_extend_spanning_null(wr_mas, &r_wr_mas); 3099 mas->last = r_mas.last; 3100 } 3101 3102 /* expanding NULLs may make this cover the entire range */ 3103 if (!mas->index && r_mas.last == ULONG_MAX) { 3104 mas_set_range(mas, 0, ULONG_MAX); 3105 return mas_new_root(mas, wr_mas->entry); 3106 } 3107 3108 cp_leaf_init(&cp, mas, wr_mas, &r_wr_mas); 3109 do { 3110 spanning_data(&cp, wr_mas, &r_wr_mas, &sib); 3111 multi_src_setup(&cp, wr_mas, &r_wr_mas, &sib); 3112 dst_setup(&cp, mas, wr_mas->type); 3113 cp_data_write(&cp, mas); 3114 } while (spanning_ascend(&cp, mas, wr_mas, &r_wr_mas, &sib)); 3115 3116 mas_wmb_replace(mas, &cp); 3117 } 3118 3119 /* 3120 * mas_wr_node_store() - Attempt to store the value in a node 3121 * @wr_mas: The maple write state 3122 * 3123 * Attempts to reuse the node, but may allocate. 3124 */ 3125 static inline void mas_wr_node_store(struct ma_wr_state *wr_mas) 3126 { 3127 unsigned char dst_offset, offset_end; 3128 unsigned char copy_size, node_pivots; 3129 struct maple_node reuse, *newnode; 3130 unsigned long *dst_pivots; 3131 void __rcu **dst_slots; 3132 unsigned char new_end; 3133 struct ma_state *mas; 3134 bool in_rcu; 3135 3136 mas = wr_mas->mas; 3137 trace_ma_op(TP_FCT, mas); 3138 in_rcu = mt_in_rcu(mas->tree); 3139 offset_end = wr_mas->offset_end; 3140 node_pivots = mt_pivots[wr_mas->type]; 3141 /* Assume last adds an entry */ 3142 new_end = mas->end + 1 - offset_end + mas->offset; 3143 if (mas->last == wr_mas->end_piv) { 3144 offset_end++; /* don't copy this offset */ 3145 new_end--; 3146 } 3147 3148 /* set up node. */ 3149 if (in_rcu) { 3150 newnode = mas_pop_node(mas); 3151 } else { 3152 memset(&reuse, 0, sizeof(struct maple_node)); 3153 newnode = &reuse; 3154 } 3155 3156 newnode->parent = mas_mn(mas)->parent; 3157 dst_pivots = ma_pivots(newnode, wr_mas->type); 3158 dst_slots = ma_slots(newnode, wr_mas->type); 3159 /* Copy from start to insert point */ 3160 if (mas->offset) { 3161 memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset); 3162 memcpy(dst_slots, wr_mas->slots, sizeof(void __rcu *) * mas->offset); 3163 } 3164 3165 /* Handle insert of new range starting after old range */ 3166 if (wr_mas->r_min < mas->index) { 3167 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content); 3168 dst_pivots[mas->offset++] = mas->index - 1; 3169 new_end++; 3170 } 3171 3172 /* Store the new entry and range end. */ 3173 if (mas->offset < node_pivots) 3174 dst_pivots[mas->offset] = mas->last; 3175 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry); 3176 3177 /* 3178 * this range wrote to the end of the node or it overwrote the rest of 3179 * the data 3180 */ 3181 if (offset_end > mas->end) 3182 goto done; 3183 3184 dst_offset = mas->offset + 1; 3185 /* Copy to the end of node if necessary. */ 3186 copy_size = mas->end - offset_end + 1; 3187 memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end, 3188 sizeof(void __rcu *) * copy_size); 3189 memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end, 3190 sizeof(unsigned long) * (copy_size - 1)); 3191 3192 if (new_end < node_pivots) 3193 dst_pivots[new_end] = mas->max; 3194 3195 done: 3196 mas_leaf_set_meta(newnode, maple_leaf_64, new_end); 3197 if (in_rcu) { 3198 struct maple_enode *old_enode = mas->node; 3199 3200 mas->node = mt_mk_node(newnode, wr_mas->type); 3201 mas_replace_node(mas, old_enode, mas_mt_height(mas)); 3202 } else { 3203 memcpy(wr_mas->node, newnode, sizeof(struct maple_node)); 3204 } 3205 trace_ma_write(TP_FCT, mas, 0, wr_mas->entry); 3206 mas_update_gap(mas); 3207 mas->end = new_end; 3208 } 3209 3210 /* 3211 * mas_wr_slot_store: Attempt to store a value in a slot. 3212 * @wr_mas: the maple write state 3213 */ 3214 static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas) 3215 { 3216 struct ma_state *mas = wr_mas->mas; 3217 unsigned char offset = mas->offset; 3218 void __rcu **slots = wr_mas->slots; 3219 bool gap = false; 3220 3221 gap |= !mt_slot_locked(mas->tree, slots, offset); 3222 gap |= !mt_slot_locked(mas->tree, slots, offset + 1); 3223 3224 if (wr_mas->offset_end - offset == 1) { 3225 if (mas->index == wr_mas->r_min) { 3226 /* Overwriting the range and a part of the next one */ 3227 rcu_assign_pointer(slots[offset], wr_mas->entry); 3228 wr_mas->pivots[offset] = mas->last; 3229 } else { 3230 /* Overwriting a part of the range and the next one */ 3231 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 3232 wr_mas->pivots[offset] = mas->index - 1; 3233 mas->offset++; /* Keep mas accurate. */ 3234 } 3235 } else { 3236 WARN_ON_ONCE(mt_in_rcu(mas->tree)); 3237 /* 3238 * Expand the range, only partially overwriting the previous and 3239 * next ranges 3240 */ 3241 gap |= !mt_slot_locked(mas->tree, slots, offset + 2); 3242 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 3243 wr_mas->pivots[offset] = mas->index - 1; 3244 wr_mas->pivots[offset + 1] = mas->last; 3245 mas->offset++; /* Keep mas accurate. */ 3246 } 3247 3248 trace_ma_write(TP_FCT, mas, 0, wr_mas->entry); 3249 /* 3250 * Only update gap when the new entry is empty or there is an empty 3251 * entry in the original two ranges. 3252 */ 3253 if (!wr_mas->entry || gap) 3254 mas_update_gap(mas); 3255 } 3256 3257 static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas) 3258 { 3259 struct ma_state *mas = wr_mas->mas; 3260 3261 if (!wr_mas->slots[wr_mas->offset_end]) { 3262 /* If this one is null, the next and prev are not */ 3263 mas->last = wr_mas->end_piv; 3264 } else { 3265 /* Check next slot(s) if we are overwriting the end */ 3266 if ((mas->last == wr_mas->end_piv) && 3267 (mas->end != wr_mas->offset_end) && 3268 !wr_mas->slots[wr_mas->offset_end + 1]) { 3269 wr_mas->offset_end++; 3270 if (wr_mas->offset_end == mas->end) 3271 mas->last = mas->max; 3272 else 3273 mas->last = wr_mas->pivots[wr_mas->offset_end]; 3274 wr_mas->end_piv = mas->last; 3275 } 3276 } 3277 3278 if (!wr_mas->content) { 3279 /* If this one is null, the next and prev are not */ 3280 mas->index = wr_mas->r_min; 3281 } else { 3282 /* Check prev slot if we are overwriting the start */ 3283 if (mas->index == wr_mas->r_min && mas->offset && 3284 !wr_mas->slots[mas->offset - 1]) { 3285 mas->offset--; 3286 wr_mas->r_min = mas->index = 3287 mas_safe_min(mas, wr_mas->pivots, mas->offset); 3288 wr_mas->r_max = wr_mas->pivots[mas->offset]; 3289 } 3290 } 3291 } 3292 3293 static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas) 3294 { 3295 while ((wr_mas->offset_end < wr_mas->mas->end) && 3296 (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end])) 3297 wr_mas->offset_end++; 3298 3299 if (wr_mas->offset_end < wr_mas->mas->end) 3300 wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end]; 3301 else 3302 wr_mas->end_piv = wr_mas->mas->max; 3303 } 3304 3305 static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas) 3306 { 3307 struct ma_state *mas = wr_mas->mas; 3308 unsigned char new_end = mas->end + 2; 3309 3310 new_end -= wr_mas->offset_end - mas->offset; 3311 if (wr_mas->r_min == mas->index) 3312 new_end--; 3313 3314 if (wr_mas->end_piv == mas->last) 3315 new_end--; 3316 3317 return new_end; 3318 } 3319 3320 /* 3321 * mas_wr_append: Attempt to append 3322 * @wr_mas: the maple write state 3323 * 3324 * This is currently unsafe in rcu mode since the end of the node may be cached 3325 * by readers while the node contents may be updated which could result in 3326 * inaccurate information. 3327 */ 3328 static inline void mas_wr_append(struct ma_wr_state *wr_mas) 3329 { 3330 struct ma_state *mas = wr_mas->mas; 3331 void __rcu **slots; 3332 unsigned char end = mas->end; 3333 unsigned char new_end = mas_wr_new_end(wr_mas); 3334 3335 if (new_end < mt_pivots[wr_mas->type]) { 3336 wr_mas->pivots[new_end] = wr_mas->pivots[end]; 3337 ma_set_meta(wr_mas->node, wr_mas->type, 0, new_end); 3338 } 3339 3340 slots = wr_mas->slots; 3341 if (new_end == end + 1) { 3342 if (mas->last == wr_mas->r_max) { 3343 /* Append to end of range */ 3344 rcu_assign_pointer(slots[new_end], wr_mas->entry); 3345 wr_mas->pivots[end] = mas->index - 1; 3346 mas->offset = new_end; 3347 } else { 3348 /* Append to start of range */ 3349 rcu_assign_pointer(slots[new_end], wr_mas->content); 3350 wr_mas->pivots[end] = mas->last; 3351 rcu_assign_pointer(slots[end], wr_mas->entry); 3352 } 3353 } else { 3354 /* Append to the range without touching any boundaries. */ 3355 rcu_assign_pointer(slots[new_end], wr_mas->content); 3356 wr_mas->pivots[end + 1] = mas->last; 3357 rcu_assign_pointer(slots[end + 1], wr_mas->entry); 3358 wr_mas->pivots[end] = mas->index - 1; 3359 mas->offset = end + 1; 3360 } 3361 3362 if (!wr_mas->content || !wr_mas->entry) 3363 mas_update_gap(mas); 3364 3365 mas->end = new_end; 3366 trace_ma_write(TP_FCT, mas, new_end, wr_mas->entry); 3367 } 3368 3369 /* 3370 * split_ascend() - See if a split operation has to keep walking up the tree 3371 * @cp: The maple_copy node 3372 * @wr_mas: The maple write state 3373 * @sib: the maple state of the sibling 3374 * 3375 * Return: true if another split operation on the next level is needed, false 3376 * otherwise 3377 */ 3378 static inline bool split_ascend(struct maple_copy *cp, 3379 struct ma_wr_state *wr_mas, struct ma_state *sib, 3380 struct ma_state *parent) 3381 { 3382 struct ma_state *mas; 3383 unsigned long min, max; 3384 3385 mas = wr_mas->mas; 3386 min = mas->min; /* push right, or normal split */ 3387 max = mas->max; 3388 wr_mas->offset_end = parent->offset; 3389 if (sib->end) { 3390 if (sib->max < mas->min) { 3391 min = sib->min; /* push left */ 3392 parent->offset--; 3393 } else { 3394 max = sib->max; /* push right */ 3395 wr_mas->offset_end++; 3396 } 3397 } 3398 3399 cp_dst_to_slots(cp, min, max, mas); 3400 if (cp_is_new_root(cp, mas)) 3401 return false; 3402 3403 if (cp_converged(cp, mas, sib)) 3404 return false; 3405 3406 cp->height++; 3407 copy_tree_location(parent, mas); 3408 wr_mas_setup(wr_mas, mas); 3409 return true; 3410 } 3411 3412 /* 3413 * split_data() - Calculate the @cp data, populate @sib if the data can be 3414 * pushed into a sibling. 3415 * @cp: The maple copy node 3416 * @wr_mas: The left write maple state 3417 * @sib: The maple state of the sibling. 3418 * 3419 * Note: @cp->data is a size and not indexed by 0. @sib->end may be set to 0 to 3420 * indicate it will not be used. 3421 * 3422 */ 3423 static inline void split_data(struct maple_copy *cp, 3424 struct ma_wr_state *wr_mas, struct ma_state *sib, 3425 struct ma_state *parent) 3426 { 3427 cp_data_calc(cp, wr_mas, wr_mas); 3428 if (cp->data <= mt_slots[wr_mas->type]) { 3429 sib->end = 0; 3430 return; 3431 } 3432 3433 push_data_sib(cp, wr_mas->mas, sib, parent); 3434 if (sib->end) 3435 cp->data += sib->end + 1; 3436 } 3437 3438 /* 3439 * mas_wr_split() - Expand one node into two 3440 * @wr_mas: The write maple state 3441 */ 3442 static void mas_wr_split(struct ma_wr_state *wr_mas) 3443 { 3444 struct ma_state parent; 3445 struct ma_state *mas; 3446 struct maple_copy cp; 3447 struct ma_state sib; 3448 3449 mas = wr_mas->mas; 3450 trace_ma_write(TP_FCT, wr_mas->mas, 0, wr_mas->entry); 3451 parent = *mas; 3452 cp_leaf_init(&cp, mas, wr_mas, wr_mas); 3453 do { 3454 if (!mte_is_root(parent.node)) { 3455 mas_ascend(&parent); 3456 parent.end = mas_data_end(&parent); 3457 } 3458 split_data(&cp, wr_mas, &sib, &parent); 3459 multi_src_setup(&cp, wr_mas, wr_mas, &sib); 3460 dst_setup(&cp, mas, wr_mas->type); 3461 cp_data_write(&cp, mas); 3462 } while (split_ascend(&cp, wr_mas, &sib, &parent)); 3463 3464 mas_wmb_replace(mas, &cp); 3465 } 3466 3467 /* 3468 * mas_wr_rebalance() - Insufficient data in one node needs to either get data 3469 * from a sibling or absorb a sibling all together. 3470 * @wr_mas: The write maple state 3471 * 3472 * Rebalance is different than a spanning store in that the write state is 3473 * already at the leaf node that's being altered. 3474 */ 3475 static void mas_wr_rebalance(struct ma_wr_state *wr_mas) 3476 { 3477 struct ma_state parent; 3478 struct ma_state *mas; 3479 struct maple_copy cp; 3480 struct ma_state sib; 3481 3482 /* 3483 * Rebalancing occurs if a node is insufficient. Data is rebalanced 3484 * against the node to the right if it exists, otherwise the node to the 3485 * left of this node is rebalanced against this node. If rebalancing 3486 * causes just one node to be produced instead of two, then the parent 3487 * is also examined and rebalanced if it is insufficient. Every level 3488 * tries to combine the data in the same way. If one node contains the 3489 * entire range of the tree, then that node is used as a new root node. 3490 */ 3491 3492 mas = wr_mas->mas; 3493 trace_ma_op(TP_FCT, mas); 3494 parent = *mas; 3495 cp_leaf_init(&cp, mas, wr_mas, wr_mas); 3496 do { 3497 if (!mte_is_root(parent.node)) { 3498 mas_ascend(&parent); 3499 parent.end = mas_data_end(&parent); 3500 } 3501 rebalance_data(&cp, wr_mas, &sib, &parent); 3502 multi_src_setup(&cp, wr_mas, wr_mas, &sib); 3503 dst_setup(&cp, mas, wr_mas->type); 3504 cp_data_write(&cp, mas); 3505 } while (rebalance_ascend(&cp, wr_mas, &sib, &parent)); 3506 3507 mas_wmb_replace(mas, &cp); 3508 } 3509 3510 /* 3511 * mas_wr_store_entry() - Internal call to store a value 3512 * @wr_mas: The maple write state 3513 */ 3514 static inline void mas_wr_store_entry(struct ma_wr_state *wr_mas) 3515 { 3516 struct ma_state *mas = wr_mas->mas; 3517 3518 switch (mas->store_type) { 3519 case wr_exact_fit: 3520 rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry); 3521 if (!!wr_mas->entry ^ !!wr_mas->content) 3522 mas_update_gap(mas); 3523 break; 3524 case wr_append: 3525 mas_wr_append(wr_mas); 3526 break; 3527 case wr_slot_store: 3528 mas_wr_slot_store(wr_mas); 3529 break; 3530 case wr_node_store: 3531 mas_wr_node_store(wr_mas); 3532 break; 3533 case wr_spanning_store: 3534 mas_wr_spanning_store(wr_mas); 3535 break; 3536 case wr_split_store: 3537 mas_wr_split(wr_mas); 3538 break; 3539 case wr_rebalance: 3540 mas_wr_rebalance(wr_mas); 3541 break; 3542 case wr_new_root: 3543 mas_new_root(mas, wr_mas->entry); 3544 break; 3545 case wr_store_root: 3546 mas_store_root(mas, wr_mas->entry); 3547 break; 3548 case wr_invalid: 3549 MT_BUG_ON(mas->tree, 1); 3550 } 3551 } 3552 3553 static inline void mas_wr_prealloc_setup(struct ma_wr_state *wr_mas) 3554 { 3555 struct ma_state *mas = wr_mas->mas; 3556 3557 if (!mas_is_active(mas)) { 3558 if (mas_is_start(mas)) 3559 goto set_content; 3560 3561 if (unlikely(mas_is_paused(mas))) 3562 goto reset; 3563 3564 if (unlikely(mas_is_none(mas))) 3565 goto reset; 3566 3567 if (unlikely(mas_is_overflow(mas))) 3568 goto reset; 3569 3570 if (unlikely(mas_is_underflow(mas))) 3571 goto reset; 3572 } 3573 3574 /* 3575 * A less strict version of mas_is_span_wr() where we allow spanning 3576 * writes within this node. This is to stop partial walks in 3577 * mas_prealloc() from being reset. 3578 */ 3579 if (mas->last > mas->max) 3580 goto reset; 3581 3582 if (wr_mas->entry) 3583 goto set_content; 3584 3585 if (mte_is_leaf(mas->node) && mas->last == mas->max) 3586 goto reset; 3587 3588 goto set_content; 3589 3590 reset: 3591 mas_reset(mas); 3592 set_content: 3593 wr_mas->content = mas_start(mas); 3594 } 3595 3596 /** 3597 * mas_prealloc_calc() - Calculate number of nodes needed for a 3598 * given store oepration 3599 * @wr_mas: The maple write state 3600 * @entry: The entry to store into the tree 3601 * 3602 * Return: Number of nodes required for preallocation. 3603 */ 3604 static inline void mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry) 3605 { 3606 struct ma_state *mas = wr_mas->mas; 3607 unsigned char height = mas_mt_height(mas); 3608 int ret = height * 3 + 1; 3609 unsigned char delta = height - wr_mas->vacant_height; 3610 3611 switch (mas->store_type) { 3612 case wr_exact_fit: 3613 case wr_append: 3614 case wr_slot_store: 3615 ret = 0; 3616 break; 3617 case wr_spanning_store: 3618 if (wr_mas->sufficient_height < wr_mas->vacant_height) 3619 ret = (height - wr_mas->sufficient_height) * 3 + 1; 3620 else 3621 ret = delta * 3 + 1; 3622 break; 3623 case wr_split_store: 3624 ret = delta * 2 + 1; 3625 break; 3626 case wr_rebalance: 3627 if (wr_mas->sufficient_height < wr_mas->vacant_height) 3628 ret = (height - wr_mas->sufficient_height) * 2 + 1; 3629 else 3630 ret = delta * 2 + 1; 3631 break; 3632 case wr_node_store: 3633 ret = mt_in_rcu(mas->tree) ? 1 : 0; 3634 break; 3635 case wr_new_root: 3636 ret = 1; 3637 break; 3638 case wr_store_root: 3639 if (likely((mas->last != 0) || (mas->index != 0))) 3640 ret = 1; 3641 else if (((unsigned long) (entry) & 3) == 2) 3642 ret = 1; 3643 else 3644 ret = 0; 3645 break; 3646 case wr_invalid: 3647 WARN_ON_ONCE(1); 3648 } 3649 3650 mas->node_request = ret; 3651 } 3652 3653 /* 3654 * mas_wr_store_type() - Determine the store type for a given 3655 * store operation. 3656 * @wr_mas: The maple write state 3657 * 3658 * Return: the type of store needed for the operation 3659 */ 3660 static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas) 3661 { 3662 struct ma_state *mas = wr_mas->mas; 3663 unsigned char new_end; 3664 3665 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 3666 return wr_store_root; 3667 3668 if (unlikely(!mas_wr_walk(wr_mas))) 3669 return wr_spanning_store; 3670 3671 /* At this point, we are at the leaf node that needs to be altered. */ 3672 mas_wr_end_piv(wr_mas); 3673 if (!wr_mas->entry) 3674 mas_wr_extend_null(wr_mas); 3675 3676 if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last)) 3677 return wr_exact_fit; 3678 3679 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 3680 return wr_new_root; 3681 3682 new_end = mas_wr_new_end(wr_mas); 3683 /* Potential spanning rebalance collapsing a node */ 3684 if (new_end < mt_min_slots[wr_mas->type]) { 3685 if (!mte_is_root(mas->node)) 3686 return wr_rebalance; 3687 return wr_node_store; 3688 } 3689 3690 if (new_end >= mt_slots[wr_mas->type]) 3691 return wr_split_store; 3692 3693 if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end)) 3694 return wr_append; 3695 3696 if ((new_end == mas->end) && (!mt_in_rcu(mas->tree) || 3697 (wr_mas->offset_end - mas->offset == 1))) 3698 return wr_slot_store; 3699 3700 return wr_node_store; 3701 } 3702 3703 /** 3704 * mas_wr_preallocate() - Preallocate enough nodes for a store operation 3705 * @wr_mas: The maple write state 3706 * @entry: The entry that will be stored 3707 * 3708 */ 3709 static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry) 3710 { 3711 struct ma_state *mas = wr_mas->mas; 3712 3713 mas_wr_prealloc_setup(wr_mas); 3714 mas->store_type = mas_wr_store_type(wr_mas); 3715 mas_prealloc_calc(wr_mas, entry); 3716 if (!mas->node_request) 3717 return; 3718 3719 mas_alloc_nodes(mas, GFP_NOWAIT); 3720 } 3721 3722 /** 3723 * mas_insert() - Internal call to insert a value 3724 * @mas: The maple state 3725 * @entry: The entry to store 3726 * 3727 * Return: %NULL or the contents that already exists at the requested index 3728 * otherwise. The maple state needs to be checked for error conditions. 3729 */ 3730 static inline void *mas_insert(struct ma_state *mas, void *entry) 3731 { 3732 MA_WR_STATE(wr_mas, mas, entry); 3733 3734 /* 3735 * Inserting a new range inserts either 0, 1, or 2 pivots within the 3736 * tree. If the insert fits exactly into an existing gap with a value 3737 * of NULL, then the slot only needs to be written with the new value. 3738 * If the range being inserted is adjacent to another range, then only a 3739 * single pivot needs to be inserted (as well as writing the entry). If 3740 * the new range is within a gap but does not touch any other ranges, 3741 * then two pivots need to be inserted: the start - 1, and the end. As 3742 * usual, the entry must be written. Most operations require a new node 3743 * to be allocated and replace an existing node to ensure RCU safety, 3744 * when in RCU mode. The exception to requiring a newly allocated node 3745 * is when inserting at the end of a node (appending). When done 3746 * carefully, appending can reuse the node in place. 3747 */ 3748 wr_mas.content = mas_start(mas); 3749 if (wr_mas.content) 3750 goto exists; 3751 3752 mas_wr_preallocate(&wr_mas, entry); 3753 if (mas_is_err(mas)) 3754 return NULL; 3755 3756 /* spanning writes always overwrite something */ 3757 if (mas->store_type == wr_spanning_store) 3758 goto exists; 3759 3760 /* At this point, we are at the leaf node that needs to be altered. */ 3761 if (mas->store_type != wr_new_root && mas->store_type != wr_store_root) { 3762 wr_mas.offset_end = mas->offset; 3763 wr_mas.end_piv = wr_mas.r_max; 3764 3765 if (wr_mas.content || (mas->last > wr_mas.r_max)) 3766 goto exists; 3767 } 3768 3769 mas_wr_store_entry(&wr_mas); 3770 return wr_mas.content; 3771 3772 exists: 3773 mas_set_err(mas, -EEXIST); 3774 return wr_mas.content; 3775 3776 } 3777 3778 /** 3779 * mas_alloc_cyclic() - Internal call to find somewhere to store an entry 3780 * @mas: The maple state. 3781 * @startp: Pointer to ID. 3782 * @range_lo: Lower bound of range to search. 3783 * @range_hi: Upper bound of range to search. 3784 * @entry: The entry to store. 3785 * @next: Pointer to next ID to allocate. 3786 * @gfp: The GFP_FLAGS to use for allocations. 3787 * 3788 * Return: 0 if the allocation succeeded without wrapping, 1 if the 3789 * allocation succeeded after wrapping, or -EBUSY if there are no 3790 * free entries. 3791 */ 3792 int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp, 3793 void *entry, unsigned long range_lo, unsigned long range_hi, 3794 unsigned long *next, gfp_t gfp) 3795 { 3796 unsigned long min = range_lo; 3797 int ret = 0; 3798 3799 range_lo = max(min, *next); 3800 ret = mas_empty_area(mas, range_lo, range_hi, 1); 3801 if ((mas->tree->ma_flags & MT_FLAGS_ALLOC_WRAPPED) && ret == 0) { 3802 mas->tree->ma_flags &= ~MT_FLAGS_ALLOC_WRAPPED; 3803 ret = 1; 3804 } 3805 if (ret < 0 && range_lo > min) { 3806 mas_reset(mas); 3807 ret = mas_empty_area(mas, min, range_hi, 1); 3808 if (ret == 0) 3809 ret = 1; 3810 } 3811 if (ret < 0) 3812 return ret; 3813 3814 do { 3815 mas_insert(mas, entry); 3816 } while (mas_nomem(mas, gfp)); 3817 if (mas_is_err(mas)) 3818 return xa_err(mas->node); 3819 3820 *startp = mas->index; 3821 *next = *startp + 1; 3822 if (*next == 0) 3823 mas->tree->ma_flags |= MT_FLAGS_ALLOC_WRAPPED; 3824 3825 mas_destroy(mas); 3826 return ret; 3827 } 3828 EXPORT_SYMBOL(mas_alloc_cyclic); 3829 3830 static __always_inline void mas_rewalk(struct ma_state *mas, unsigned long index) 3831 { 3832 retry: 3833 mas_set(mas, index); 3834 mas_state_walk(mas); 3835 if (mas_is_start(mas)) 3836 goto retry; 3837 } 3838 3839 static __always_inline bool mas_rewalk_if_dead(struct ma_state *mas, 3840 struct maple_node *node, const unsigned long index) 3841 { 3842 if (unlikely(ma_dead_node(node))) { 3843 mas_rewalk(mas, index); 3844 return true; 3845 } 3846 return false; 3847 } 3848 3849 /* 3850 * mas_prev_node() - Find the prev non-null entry at the same level in the 3851 * tree. The prev value will be mas->node[mas->offset] or the status will be 3852 * ma_none. 3853 * @mas: The maple state 3854 * @min: The lower limit to search 3855 * 3856 * The prev node value will be mas->node[mas->offset] or the status will be 3857 * ma_none. 3858 * Return: 1 if the node is dead, 0 otherwise. 3859 */ 3860 static int mas_prev_node(struct ma_state *mas, unsigned long min) 3861 { 3862 enum maple_type mt; 3863 int offset, level; 3864 void __rcu **slots; 3865 struct maple_node *node; 3866 unsigned long *pivots; 3867 unsigned long max; 3868 3869 node = mas_mn(mas); 3870 if (!mas->min) 3871 goto no_entry; 3872 3873 max = mas->min - 1; 3874 if (max < min) 3875 goto no_entry; 3876 3877 level = 0; 3878 do { 3879 if (ma_is_root(node)) 3880 goto no_entry; 3881 3882 /* Walk up. */ 3883 if (unlikely(mas_ascend(mas))) 3884 return 1; 3885 offset = mas->offset; 3886 level++; 3887 node = mas_mn(mas); 3888 } while (!offset); 3889 3890 offset--; 3891 mt = mte_node_type(mas->node); 3892 while (level > 1) { 3893 level--; 3894 slots = ma_slots(node, mt); 3895 mas->node = mas_slot(mas, slots, offset); 3896 if (unlikely(ma_dead_node(node))) 3897 return 1; 3898 3899 mt = mte_node_type(mas->node); 3900 node = mas_mn(mas); 3901 pivots = ma_pivots(node, mt); 3902 offset = ma_data_end(node, mt, pivots, max); 3903 if (unlikely(ma_dead_node(node))) 3904 return 1; 3905 } 3906 3907 slots = ma_slots(node, mt); 3908 mas->node = mas_slot(mas, slots, offset); 3909 pivots = ma_pivots(node, mt); 3910 if (unlikely(ma_dead_node(node))) 3911 return 1; 3912 3913 if (likely(offset)) 3914 mas->min = pivots[offset - 1] + 1; 3915 mas->max = max; 3916 mas->offset = mas_data_end(mas); 3917 if (unlikely(mte_dead_node(mas->node))) 3918 return 1; 3919 3920 mas->end = mas->offset; 3921 return 0; 3922 3923 no_entry: 3924 if (unlikely(ma_dead_node(node))) 3925 return 1; 3926 3927 mas->status = ma_underflow; 3928 return 0; 3929 } 3930 3931 /* 3932 * mas_prev_slot() - Get the entry in the previous slot 3933 * 3934 * @mas: The maple state 3935 * @min: The minimum starting range 3936 * @empty: Can be empty 3937 * 3938 * Return: The entry in the previous slot which is possibly NULL 3939 */ 3940 static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty) 3941 { 3942 void *entry; 3943 void __rcu **slots; 3944 unsigned long pivot; 3945 enum maple_type type; 3946 unsigned long *pivots; 3947 struct maple_node *node; 3948 unsigned long save_point = mas->index; 3949 3950 retry: 3951 node = mas_mn(mas); 3952 type = mte_node_type(mas->node); 3953 pivots = ma_pivots(node, type); 3954 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 3955 goto retry; 3956 3957 if (mas->min <= min) { 3958 pivot = mas_safe_min(mas, pivots, mas->offset); 3959 3960 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 3961 goto retry; 3962 3963 if (pivot <= min) 3964 goto underflow; 3965 } 3966 3967 again: 3968 if (likely(mas->offset)) { 3969 mas->offset--; 3970 mas->last = mas->index - 1; 3971 mas->index = mas_safe_min(mas, pivots, mas->offset); 3972 } else { 3973 if (mas->index <= min) 3974 goto underflow; 3975 3976 if (mas_prev_node(mas, min)) { 3977 mas_rewalk(mas, save_point); 3978 goto retry; 3979 } 3980 3981 if (WARN_ON_ONCE(mas_is_underflow(mas))) 3982 return NULL; 3983 3984 mas->last = mas->max; 3985 node = mas_mn(mas); 3986 type = mte_node_type(mas->node); 3987 pivots = ma_pivots(node, type); 3988 mas->index = pivots[mas->offset - 1] + 1; 3989 } 3990 3991 slots = ma_slots(node, type); 3992 entry = mas_slot(mas, slots, mas->offset); 3993 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 3994 goto retry; 3995 3996 if (likely(entry)) 3997 return entry; 3998 3999 if (!empty) { 4000 if (mas->index <= min) 4001 goto underflow; 4002 4003 goto again; 4004 } 4005 4006 return entry; 4007 4008 underflow: 4009 mas->status = ma_underflow; 4010 return NULL; 4011 } 4012 4013 /* 4014 * mas_next_node() - Get the next node at the same level in the tree. 4015 * @mas: The maple state 4016 * @node: The maple node 4017 * @max: The maximum pivot value to check. 4018 * 4019 * The next value will be mas->node[mas->offset] or the status will have 4020 * overflowed. 4021 * Return: 1 on dead node, 0 otherwise. 4022 */ 4023 static int mas_next_node(struct ma_state *mas, struct maple_node *node, 4024 unsigned long max) 4025 { 4026 unsigned long min; 4027 unsigned long *pivots; 4028 struct maple_enode *enode; 4029 struct maple_node *tmp; 4030 int level = 0; 4031 unsigned char node_end; 4032 enum maple_type mt; 4033 void __rcu **slots; 4034 4035 if (mas->max >= max) 4036 goto overflow; 4037 4038 min = mas->max + 1; 4039 level = 0; 4040 do { 4041 if (ma_is_root(node)) 4042 goto overflow; 4043 4044 /* Walk up. */ 4045 if (unlikely(mas_ascend(mas))) 4046 return 1; 4047 4048 level++; 4049 node = mas_mn(mas); 4050 mt = mte_node_type(mas->node); 4051 pivots = ma_pivots(node, mt); 4052 node_end = ma_data_end(node, mt, pivots, mas->max); 4053 if (unlikely(ma_dead_node(node))) 4054 return 1; 4055 4056 } while (unlikely(mas->offset == node_end)); 4057 4058 slots = ma_slots(node, mt); 4059 mas->offset++; 4060 enode = mas_slot(mas, slots, mas->offset); 4061 if (unlikely(ma_dead_node(node))) 4062 return 1; 4063 4064 if (level > 1) 4065 mas->offset = 0; 4066 4067 while (unlikely(level > 1)) { 4068 level--; 4069 mas->node = enode; 4070 node = mas_mn(mas); 4071 mt = mte_node_type(mas->node); 4072 slots = ma_slots(node, mt); 4073 enode = mas_slot(mas, slots, 0); 4074 if (unlikely(ma_dead_node(node))) 4075 return 1; 4076 } 4077 4078 if (!mas->offset) 4079 pivots = ma_pivots(node, mt); 4080 4081 mas->max = mas_safe_pivot(mas, pivots, mas->offset, mt); 4082 tmp = mte_to_node(enode); 4083 mt = mte_node_type(enode); 4084 pivots = ma_pivots(tmp, mt); 4085 mas->end = ma_data_end(tmp, mt, pivots, mas->max); 4086 if (unlikely(ma_dead_node(node))) 4087 return 1; 4088 4089 mas->node = enode; 4090 mas->min = min; 4091 return 0; 4092 4093 overflow: 4094 if (unlikely(ma_dead_node(node))) 4095 return 1; 4096 4097 mas->status = ma_overflow; 4098 return 0; 4099 } 4100 4101 /* 4102 * mas_next_slot() - Get the entry in the next slot 4103 * 4104 * @mas: The maple state 4105 * @max: The maximum starting range 4106 * @empty: Can be empty 4107 * 4108 * Return: The entry in the next slot which is possibly NULL 4109 */ 4110 static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty) 4111 { 4112 void __rcu **slots; 4113 unsigned long *pivots; 4114 unsigned long pivot; 4115 enum maple_type type; 4116 struct maple_node *node; 4117 unsigned long save_point = mas->last; 4118 void *entry; 4119 4120 retry: 4121 node = mas_mn(mas); 4122 type = mte_node_type(mas->node); 4123 pivots = ma_pivots(node, type); 4124 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4125 goto retry; 4126 4127 if (mas->max >= max) { 4128 if (likely(mas->offset < mas->end)) 4129 pivot = pivots[mas->offset]; 4130 else 4131 pivot = mas->max; 4132 4133 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4134 goto retry; 4135 4136 if (pivot >= max) { /* Was at the limit, next will extend beyond */ 4137 mas->status = ma_overflow; 4138 return NULL; 4139 } 4140 } 4141 4142 if (likely(mas->offset < mas->end)) { 4143 mas->index = pivots[mas->offset] + 1; 4144 again: 4145 mas->offset++; 4146 if (likely(mas->offset < mas->end)) 4147 mas->last = pivots[mas->offset]; 4148 else 4149 mas->last = mas->max; 4150 } else { 4151 if (mas->last >= max) { 4152 mas->status = ma_overflow; 4153 return NULL; 4154 } 4155 4156 if (mas_next_node(mas, node, max)) { 4157 mas_rewalk(mas, save_point); 4158 goto retry; 4159 } 4160 4161 if (WARN_ON_ONCE(mas_is_overflow(mas))) 4162 return NULL; 4163 4164 mas->offset = 0; 4165 mas->index = mas->min; 4166 node = mas_mn(mas); 4167 type = mte_node_type(mas->node); 4168 pivots = ma_pivots(node, type); 4169 mas->last = pivots[0]; 4170 } 4171 4172 slots = ma_slots(node, type); 4173 entry = mt_slot(mas->tree, slots, mas->offset); 4174 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4175 goto retry; 4176 4177 if (entry) 4178 return entry; 4179 4180 4181 if (!empty) { 4182 if (mas->last >= max) { 4183 mas->status = ma_overflow; 4184 return NULL; 4185 } 4186 4187 mas->index = mas->last + 1; 4188 goto again; 4189 } 4190 4191 return entry; 4192 } 4193 4194 /* 4195 * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the 4196 * highest gap address of a given size in a given node and descend. 4197 * @mas: The maple state 4198 * @size: The needed size. 4199 * 4200 * Return: True if found in a leaf, false otherwise. 4201 * 4202 */ 4203 static bool mas_rev_awalk(struct ma_state *mas, unsigned long size, 4204 unsigned long *gap_min, unsigned long *gap_max) 4205 { 4206 enum maple_type type = mte_node_type(mas->node); 4207 struct maple_node *node = mas_mn(mas); 4208 unsigned long *pivots, *gaps; 4209 void __rcu **slots; 4210 unsigned long gap = 0; 4211 unsigned long max, min; 4212 unsigned char offset; 4213 4214 if (unlikely(mas_is_err(mas))) 4215 return true; 4216 4217 if (ma_is_dense(type)) { 4218 /* dense nodes. */ 4219 mas->offset = (unsigned char)(mas->index - mas->min); 4220 return true; 4221 } 4222 4223 pivots = ma_pivots(node, type); 4224 slots = ma_slots(node, type); 4225 gaps = ma_gaps(node, type); 4226 offset = mas->offset; 4227 min = mas_safe_min(mas, pivots, offset); 4228 /* Skip out of bounds. */ 4229 while (mas->last < min) 4230 min = mas_safe_min(mas, pivots, --offset); 4231 4232 max = mas_safe_pivot(mas, pivots, offset, type); 4233 while (mas->index <= max) { 4234 gap = 0; 4235 if (gaps) 4236 gap = gaps[offset]; 4237 else if (!mas_slot(mas, slots, offset)) 4238 gap = max - min + 1; 4239 4240 if (gap) { 4241 if ((size <= gap) && (size <= mas->last - min + 1)) 4242 break; 4243 4244 if (!gaps) { 4245 /* Skip the next slot, it cannot be a gap. */ 4246 if (offset < 2) 4247 goto ascend; 4248 4249 offset -= 2; 4250 max = pivots[offset]; 4251 min = mas_safe_min(mas, pivots, offset); 4252 continue; 4253 } 4254 } 4255 4256 if (!offset) 4257 goto ascend; 4258 4259 offset--; 4260 max = min - 1; 4261 min = mas_safe_min(mas, pivots, offset); 4262 } 4263 4264 if (unlikely((mas->index > max) || (size - 1 > max - mas->index))) 4265 goto no_space; 4266 4267 if (unlikely(ma_is_leaf(type))) { 4268 mas->offset = offset; 4269 *gap_min = min; 4270 *gap_max = min + gap - 1; 4271 return true; 4272 } 4273 4274 /* descend, only happens under lock. */ 4275 mas->node = mas_slot(mas, slots, offset); 4276 mas->min = min; 4277 mas->max = max; 4278 mas->offset = mas_data_end(mas); 4279 return false; 4280 4281 ascend: 4282 if (!mte_is_root(mas->node)) 4283 return false; 4284 4285 no_space: 4286 mas_set_err(mas, -EBUSY); 4287 return false; 4288 } 4289 4290 static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) 4291 { 4292 enum maple_type type = mte_node_type(mas->node); 4293 unsigned long pivot, min, gap = 0; 4294 unsigned char offset, data_end; 4295 unsigned long *gaps, *pivots; 4296 void __rcu **slots; 4297 struct maple_node *node; 4298 bool found = false; 4299 4300 if (ma_is_dense(type)) { 4301 mas->offset = (unsigned char)(mas->index - mas->min); 4302 return true; 4303 } 4304 4305 node = mas_mn(mas); 4306 pivots = ma_pivots(node, type); 4307 slots = ma_slots(node, type); 4308 gaps = ma_gaps(node, type); 4309 offset = mas->offset; 4310 min = mas_safe_min(mas, pivots, offset); 4311 data_end = ma_data_end(node, type, pivots, mas->max); 4312 for (; offset <= data_end; offset++) { 4313 pivot = mas_safe_pivot(mas, pivots, offset, type); 4314 4315 /* Not within lower bounds */ 4316 if (mas->index > pivot) 4317 goto next_slot; 4318 4319 if (gaps) 4320 gap = gaps[offset]; 4321 else if (!mas_slot(mas, slots, offset)) 4322 gap = min(pivot, mas->last) - max(mas->index, min) + 1; 4323 else 4324 goto next_slot; 4325 4326 if (gap >= size) { 4327 if (ma_is_leaf(type)) { 4328 found = true; 4329 break; 4330 } 4331 4332 mas->node = mas_slot(mas, slots, offset); 4333 mas->min = min; 4334 mas->max = pivot; 4335 offset = 0; 4336 break; 4337 } 4338 next_slot: 4339 min = pivot + 1; 4340 if (mas->last <= pivot) { 4341 mas_set_err(mas, -EBUSY); 4342 return true; 4343 } 4344 } 4345 4346 mas->offset = offset; 4347 return found; 4348 } 4349 4350 /** 4351 * mas_walk() - Search for @mas->index in the tree. 4352 * @mas: The maple state. 4353 * 4354 * mas->index and mas->last will be set to the range if there is a value. If 4355 * mas->status is ma_none, reset to ma_start 4356 * 4357 * Return: the entry at the location or %NULL. 4358 */ 4359 void *mas_walk(struct ma_state *mas) 4360 { 4361 void *entry; 4362 4363 if (!mas_is_active(mas) && !mas_is_start(mas)) 4364 mas->status = ma_start; 4365 retry: 4366 entry = mas_state_walk(mas); 4367 if (mas_is_start(mas)) { 4368 goto retry; 4369 } else if (mas_is_none(mas)) { 4370 mas->index = 0; 4371 mas->last = ULONG_MAX; 4372 } else if (mas_is_ptr(mas)) { 4373 if (!mas->index) { 4374 mas->last = 0; 4375 return entry; 4376 } 4377 4378 mas->index = 1; 4379 mas->last = ULONG_MAX; 4380 mas->status = ma_none; 4381 return NULL; 4382 } 4383 4384 return entry; 4385 } 4386 EXPORT_SYMBOL_GPL(mas_walk); 4387 4388 static inline bool mas_rewind_node(struct ma_state *mas) 4389 { 4390 unsigned char slot; 4391 4392 do { 4393 if (mte_is_root(mas->node)) { 4394 slot = mas->offset; 4395 if (!slot) 4396 return false; 4397 } else { 4398 mas_ascend(mas); 4399 slot = mas->offset; 4400 } 4401 } while (!slot); 4402 4403 mas->offset = --slot; 4404 return true; 4405 } 4406 4407 /* 4408 * mas_skip_node() - Internal function. Skip over a node. 4409 * @mas: The maple state. 4410 * 4411 * Return: true if there is another node, false otherwise. 4412 */ 4413 static inline bool mas_skip_node(struct ma_state *mas) 4414 { 4415 if (mas_is_err(mas)) 4416 return false; 4417 4418 do { 4419 if (mte_is_root(mas->node)) { 4420 if (mas->offset >= mas_data_end(mas)) { 4421 mas_set_err(mas, -EBUSY); 4422 return false; 4423 } 4424 } else { 4425 mas_ascend(mas); 4426 } 4427 } while (mas->offset >= mas_data_end(mas)); 4428 4429 mas->offset++; 4430 return true; 4431 } 4432 4433 /* 4434 * mas_awalk() - Allocation walk. Search from low address to high, for a gap of 4435 * @size 4436 * @mas: The maple state 4437 * @size: The size of the gap required 4438 * 4439 * Search between @mas->index and @mas->last for a gap of @size. 4440 */ 4441 static inline void mas_awalk(struct ma_state *mas, unsigned long size) 4442 { 4443 struct maple_enode *last = NULL; 4444 4445 /* 4446 * There are 4 options: 4447 * go to child (descend) 4448 * go back to parent (ascend) 4449 * no gap found. (return, error == -EBUSY) 4450 * found the gap. (return) 4451 */ 4452 while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) { 4453 if (last == mas->node) 4454 mas_skip_node(mas); 4455 else 4456 last = mas->node; 4457 } 4458 } 4459 4460 /* 4461 * mas_sparse_area() - Internal function. Return upper or lower limit when 4462 * searching for a gap in an empty tree. 4463 * @mas: The maple state 4464 * @min: the minimum range 4465 * @max: The maximum range 4466 * @size: The size of the gap 4467 * @fwd: Searching forward or back 4468 */ 4469 static inline int mas_sparse_area(struct ma_state *mas, unsigned long min, 4470 unsigned long max, unsigned long size, bool fwd) 4471 { 4472 if (!unlikely(mas_is_none(mas)) && min == 0) { 4473 min++; 4474 /* 4475 * At this time, min is increased, we need to recheck whether 4476 * the size is satisfied. 4477 */ 4478 if (min > max || max - min + 1 < size) 4479 return -EBUSY; 4480 } 4481 /* mas_is_ptr */ 4482 4483 if (fwd) { 4484 mas->index = min; 4485 mas->last = min + size - 1; 4486 } else { 4487 mas->last = max; 4488 mas->index = max - size + 1; 4489 } 4490 return 0; 4491 } 4492 4493 /* 4494 * mas_empty_area() - Get the lowest address within the range that is 4495 * sufficient for the size requested. 4496 * @mas: The maple state 4497 * @min: The lowest value of the range 4498 * @max: The highest value of the range 4499 * @size: The size needed 4500 */ 4501 int mas_empty_area(struct ma_state *mas, unsigned long min, 4502 unsigned long max, unsigned long size) 4503 { 4504 unsigned char offset; 4505 unsigned long *pivots; 4506 enum maple_type mt; 4507 struct maple_node *node; 4508 4509 if (min > max) 4510 return -EINVAL; 4511 4512 if (size == 0 || max - min < size - 1) 4513 return -EINVAL; 4514 4515 if (mas_is_start(mas)) 4516 mas_start(mas); 4517 else if (mas->offset >= 2) 4518 mas->offset -= 2; 4519 else if (!mas_skip_node(mas)) 4520 return -EBUSY; 4521 4522 /* Empty set */ 4523 if (mas_is_none(mas) || mas_is_ptr(mas)) 4524 return mas_sparse_area(mas, min, max, size, true); 4525 4526 /* The start of the window can only be within these values */ 4527 mas->index = min; 4528 mas->last = max; 4529 mas_awalk(mas, size); 4530 4531 if (unlikely(mas_is_err(mas))) 4532 return xa_err(mas->node); 4533 4534 offset = mas->offset; 4535 node = mas_mn(mas); 4536 mt = mte_node_type(mas->node); 4537 pivots = ma_pivots(node, mt); 4538 min = mas_safe_min(mas, pivots, offset); 4539 if (mas->index < min) 4540 mas->index = min; 4541 mas->last = mas->index + size - 1; 4542 mas->end = ma_data_end(node, mt, pivots, mas->max); 4543 return 0; 4544 } 4545 EXPORT_SYMBOL_GPL(mas_empty_area); 4546 4547 /* 4548 * mas_empty_area_rev() - Get the highest address within the range that is 4549 * sufficient for the size requested. 4550 * @mas: The maple state 4551 * @min: The lowest value of the range 4552 * @max: The highest value of the range 4553 * @size: The size needed 4554 */ 4555 int mas_empty_area_rev(struct ma_state *mas, unsigned long min, 4556 unsigned long max, unsigned long size) 4557 { 4558 struct maple_enode *last = mas->node; 4559 4560 if (min > max) 4561 return -EINVAL; 4562 4563 if (size == 0 || max - min < size - 1) 4564 return -EINVAL; 4565 4566 if (mas_is_start(mas)) 4567 mas_start(mas); 4568 else if ((mas->offset < 2) && (!mas_rewind_node(mas))) 4569 return -EBUSY; 4570 4571 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 4572 return mas_sparse_area(mas, min, max, size, false); 4573 else if (mas->offset >= 2) 4574 mas->offset -= 2; 4575 else 4576 mas->offset = mas_data_end(mas); 4577 4578 4579 /* The start of the window can only be within these values. */ 4580 mas->index = min; 4581 mas->last = max; 4582 4583 while (!mas_rev_awalk(mas, size, &min, &max)) { 4584 if (last == mas->node) { 4585 if (!mas_rewind_node(mas)) 4586 return -EBUSY; 4587 } else { 4588 last = mas->node; 4589 } 4590 } 4591 4592 if (mas_is_err(mas)) 4593 return xa_err(mas->node); 4594 4595 if (unlikely(mas->offset == MAPLE_NODE_SLOTS)) 4596 return -EBUSY; 4597 4598 /* Trim the upper limit to the max. */ 4599 if (max < mas->last) 4600 mas->last = max; 4601 4602 mas->index = mas->last - size + 1; 4603 mas->end = mas_data_end(mas); 4604 return 0; 4605 } 4606 EXPORT_SYMBOL_GPL(mas_empty_area_rev); 4607 4608 /* 4609 * mte_dead_leaves() - Mark all leaves of a node as dead. 4610 * @enode: the encoded node 4611 * @mt: the maple tree 4612 * @slots: Pointer to the slot array 4613 * 4614 * Must hold the write lock. 4615 * 4616 * Return: The number of leaves marked as dead. 4617 */ 4618 static inline 4619 unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt, 4620 void __rcu **slots) 4621 { 4622 struct maple_node *node; 4623 enum maple_type type; 4624 void *entry; 4625 int offset; 4626 4627 for (offset = 0; offset < mt_slot_count(enode); offset++) { 4628 entry = mt_slot(mt, slots, offset); 4629 type = mte_node_type(entry); 4630 node = mte_to_node(entry); 4631 /* Use both node and type to catch LE & BE metadata */ 4632 if (!node || !type) 4633 break; 4634 4635 mte_set_node_dead(entry); 4636 node->type = type; 4637 rcu_assign_pointer(slots[offset], node); 4638 } 4639 4640 return offset; 4641 } 4642 4643 /** 4644 * mte_dead_walk() - Walk down a dead tree to just before the leaves 4645 * @enode: The maple encoded node 4646 * @offset: The starting offset 4647 * 4648 * Note: This can only be used from the RCU callback context. 4649 */ 4650 static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset) 4651 { 4652 struct maple_node *node, *next; 4653 void __rcu **slots = NULL; 4654 4655 next = mte_to_node(*enode); 4656 do { 4657 *enode = ma_enode_ptr(next); 4658 node = mte_to_node(*enode); 4659 slots = ma_slots(node, node->type); 4660 next = rcu_dereference_protected(slots[offset], 4661 lock_is_held(&rcu_callback_map)); 4662 offset = 0; 4663 } while (!ma_is_leaf(next->type)); 4664 4665 return slots; 4666 } 4667 4668 /** 4669 * mt_free_walk() - Walk & free a tree in the RCU callback context 4670 * @head: The RCU head that's within the node. 4671 * 4672 * Note: This can only be used from the RCU callback context. 4673 */ 4674 static void mt_free_walk(struct rcu_head *head) 4675 { 4676 void __rcu **slots; 4677 struct maple_node *node, *start; 4678 struct maple_enode *enode; 4679 unsigned char offset; 4680 enum maple_type type; 4681 4682 node = container_of(head, struct maple_node, rcu); 4683 4684 if (ma_is_leaf(node->type)) 4685 goto free_leaf; 4686 4687 start = node; 4688 enode = mt_mk_node(node, node->type); 4689 slots = mte_dead_walk(&enode, 0); 4690 node = mte_to_node(enode); 4691 do { 4692 mt_free_bulk(node->slot_len, slots); 4693 offset = node->parent_slot + 1; 4694 enode = node->piv_parent; 4695 if (mte_to_node(enode) == node) 4696 goto free_leaf; 4697 4698 type = mte_node_type(enode); 4699 slots = ma_slots(mte_to_node(enode), type); 4700 if ((offset < mt_slots[type]) && 4701 rcu_dereference_protected(slots[offset], 4702 lock_is_held(&rcu_callback_map))) 4703 slots = mte_dead_walk(&enode, offset); 4704 node = mte_to_node(enode); 4705 } while ((node != start) || (node->slot_len < offset)); 4706 4707 slots = ma_slots(node, node->type); 4708 mt_free_bulk(node->slot_len, slots); 4709 4710 free_leaf: 4711 kfree(node); 4712 } 4713 4714 static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, 4715 struct maple_tree *mt, struct maple_enode *prev, unsigned char offset) 4716 { 4717 struct maple_node *node; 4718 struct maple_enode *next = *enode; 4719 void __rcu **slots = NULL; 4720 enum maple_type type; 4721 unsigned char next_offset = 0; 4722 4723 do { 4724 *enode = next; 4725 node = mte_to_node(*enode); 4726 type = mte_node_type(*enode); 4727 slots = ma_slots(node, type); 4728 next = mt_slot_locked(mt, slots, next_offset); 4729 if ((mte_dead_node(next))) 4730 next = mt_slot_locked(mt, slots, ++next_offset); 4731 4732 mte_set_node_dead(*enode); 4733 node->type = type; 4734 node->piv_parent = prev; 4735 node->parent_slot = offset; 4736 offset = next_offset; 4737 next_offset = 0; 4738 prev = *enode; 4739 } while (!mte_is_leaf(next)); 4740 4741 return slots; 4742 } 4743 4744 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 4745 bool free) 4746 { 4747 void __rcu **slots; 4748 struct maple_node *node = mte_to_node(enode); 4749 struct maple_enode *start; 4750 4751 if (mte_is_leaf(enode)) { 4752 mte_set_node_dead(enode); 4753 node->type = mte_node_type(enode); 4754 goto free_leaf; 4755 } 4756 4757 start = enode; 4758 slots = mte_destroy_descend(&enode, mt, start, 0); 4759 node = mte_to_node(enode); // Updated in the above call. 4760 do { 4761 enum maple_type type; 4762 unsigned char offset; 4763 struct maple_enode *parent, *tmp; 4764 4765 node->slot_len = mte_dead_leaves(enode, mt, slots); 4766 if (free) 4767 mt_free_bulk(node->slot_len, slots); 4768 offset = node->parent_slot + 1; 4769 enode = node->piv_parent; 4770 if (mte_to_node(enode) == node) 4771 goto free_leaf; 4772 4773 type = mte_node_type(enode); 4774 slots = ma_slots(mte_to_node(enode), type); 4775 if (offset >= mt_slots[type]) 4776 goto next; 4777 4778 tmp = mt_slot_locked(mt, slots, offset); 4779 if (mte_node_type(tmp) && mte_to_node(tmp)) { 4780 parent = enode; 4781 enode = tmp; 4782 slots = mte_destroy_descend(&enode, mt, parent, offset); 4783 } 4784 next: 4785 node = mte_to_node(enode); 4786 } while (start != enode); 4787 4788 node = mte_to_node(enode); 4789 node->slot_len = mte_dead_leaves(enode, mt, slots); 4790 if (free) 4791 mt_free_bulk(node->slot_len, slots); 4792 4793 free_leaf: 4794 if (free) 4795 kfree(node); 4796 else 4797 mt_clear_meta(mt, node, node->type); 4798 } 4799 4800 /* 4801 * mte_destroy_walk() - Free a tree or sub-tree. 4802 * @enode: the encoded maple node (maple_enode) to start 4803 * @mt: the tree to free - needed for node types. 4804 * 4805 * Must hold the write lock. 4806 */ 4807 static inline void mte_destroy_walk(struct maple_enode *enode, 4808 struct maple_tree *mt) 4809 { 4810 struct maple_node *node = mte_to_node(enode); 4811 4812 if (mt_in_rcu(mt)) { 4813 mt_destroy_walk(enode, mt, false); 4814 call_rcu(&node->rcu, mt_free_walk); 4815 } else { 4816 mt_destroy_walk(enode, mt, true); 4817 } 4818 } 4819 /* Interface */ 4820 4821 /** 4822 * mas_store() - Store an @entry. 4823 * @mas: The maple state. 4824 * @entry: The entry to store. 4825 * 4826 * The @mas->index and @mas->last is used to set the range for the @entry. 4827 * 4828 * Return: the first entry between mas->index and mas->last or %NULL. 4829 */ 4830 void *mas_store(struct ma_state *mas, void *entry) 4831 { 4832 MA_WR_STATE(wr_mas, mas, entry); 4833 4834 trace_ma_write(TP_FCT, mas, 0, entry); 4835 #ifdef CONFIG_DEBUG_MAPLE_TREE 4836 if (MAS_WARN_ON(mas, mas->index > mas->last)) 4837 pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last, 4838 entry); 4839 4840 if (mas->index > mas->last) { 4841 mas_set_err(mas, -EINVAL); 4842 return NULL; 4843 } 4844 4845 #endif 4846 4847 /* 4848 * Storing is the same operation as insert with the added caveat that it 4849 * can overwrite entries. Although this seems simple enough, one may 4850 * want to examine what happens if a single store operation was to 4851 * overwrite multiple entries within a self-balancing B-Tree. 4852 */ 4853 mas_wr_prealloc_setup(&wr_mas); 4854 mas->store_type = mas_wr_store_type(&wr_mas); 4855 if (mas->mas_flags & MA_STATE_PREALLOC) { 4856 mas_wr_store_entry(&wr_mas); 4857 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 4858 return wr_mas.content; 4859 } 4860 4861 mas_prealloc_calc(&wr_mas, entry); 4862 if (!mas->node_request) 4863 goto store; 4864 4865 mas_alloc_nodes(mas, GFP_NOWAIT); 4866 if (mas_is_err(mas)) 4867 return NULL; 4868 4869 store: 4870 mas_wr_store_entry(&wr_mas); 4871 mas_destroy(mas); 4872 return wr_mas.content; 4873 } 4874 EXPORT_SYMBOL_GPL(mas_store); 4875 4876 /** 4877 * mas_store_gfp() - Store a value into the tree. 4878 * @mas: The maple state 4879 * @entry: The entry to store 4880 * @gfp: The GFP_FLAGS to use for allocations if necessary. 4881 * 4882 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 4883 * be allocated. 4884 */ 4885 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp) 4886 { 4887 unsigned long index = mas->index; 4888 unsigned long last = mas->last; 4889 MA_WR_STATE(wr_mas, mas, entry); 4890 int ret = 0; 4891 4892 retry: 4893 mas_wr_preallocate(&wr_mas, entry); 4894 if (unlikely(mas_nomem(mas, gfp))) { 4895 if (!entry) 4896 __mas_set_range(mas, index, last); 4897 goto retry; 4898 } 4899 4900 if (mas_is_err(mas)) { 4901 ret = xa_err(mas->node); 4902 goto out; 4903 } 4904 4905 mas_wr_store_entry(&wr_mas); 4906 out: 4907 mas_destroy(mas); 4908 return ret; 4909 } 4910 EXPORT_SYMBOL_GPL(mas_store_gfp); 4911 4912 /** 4913 * mas_store_prealloc() - Store a value into the tree using memory 4914 * preallocated in the maple state. 4915 * @mas: The maple state 4916 * @entry: The entry to store. 4917 */ 4918 void mas_store_prealloc(struct ma_state *mas, void *entry) 4919 { 4920 MA_WR_STATE(wr_mas, mas, entry); 4921 4922 if (mas->store_type == wr_store_root) { 4923 mas_wr_prealloc_setup(&wr_mas); 4924 goto store; 4925 } 4926 4927 mas_wr_walk_descend(&wr_mas); 4928 if (mas->store_type != wr_spanning_store) { 4929 /* set wr_mas->content to current slot */ 4930 wr_mas.content = mas_slot_locked(mas, wr_mas.slots, mas->offset); 4931 mas_wr_end_piv(&wr_mas); 4932 } 4933 4934 store: 4935 trace_ma_write(TP_FCT, mas, 0, entry); 4936 mas_wr_store_entry(&wr_mas); 4937 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 4938 mas_destroy(mas); 4939 } 4940 EXPORT_SYMBOL_GPL(mas_store_prealloc); 4941 4942 /** 4943 * mas_preallocate() - Preallocate enough nodes for a store operation 4944 * @mas: The maple state 4945 * @entry: The entry that will be stored 4946 * @gfp: The GFP_FLAGS to use for allocations. 4947 * 4948 * Return: 0 on success, -ENOMEM if memory could not be allocated. 4949 */ 4950 int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) 4951 { 4952 MA_WR_STATE(wr_mas, mas, entry); 4953 4954 mas_wr_prealloc_setup(&wr_mas); 4955 mas->store_type = mas_wr_store_type(&wr_mas); 4956 mas_prealloc_calc(&wr_mas, entry); 4957 if (!mas->node_request) 4958 goto set_flag; 4959 4960 mas->mas_flags &= ~MA_STATE_PREALLOC; 4961 mas_alloc_nodes(mas, gfp); 4962 if (mas_is_err(mas)) { 4963 int ret = xa_err(mas->node); 4964 4965 mas->node_request = 0; 4966 mas_destroy(mas); 4967 mas_reset(mas); 4968 return ret; 4969 } 4970 4971 set_flag: 4972 mas->mas_flags |= MA_STATE_PREALLOC; 4973 return 0; 4974 } 4975 EXPORT_SYMBOL_GPL(mas_preallocate); 4976 4977 /* 4978 * mas_destroy() - destroy a maple state. 4979 * @mas: The maple state 4980 * 4981 * Upon completion, check the left-most node and rebalance against the node to 4982 * the right if necessary. Frees any allocated nodes associated with this maple 4983 * state. 4984 */ 4985 void mas_destroy(struct ma_state *mas) 4986 { 4987 mas->mas_flags &= ~MA_STATE_PREALLOC; 4988 mas_empty_nodes(mas); 4989 } 4990 EXPORT_SYMBOL_GPL(mas_destroy); 4991 4992 static void mas_may_activate(struct ma_state *mas) 4993 { 4994 if (!mas->node) { 4995 mas->status = ma_start; 4996 } else if (mas->index > mas->max || mas->index < mas->min) { 4997 mas->status = ma_start; 4998 } else { 4999 mas->status = ma_active; 5000 } 5001 } 5002 5003 static bool mas_next_setup(struct ma_state *mas, unsigned long max, 5004 void **entry) 5005 { 5006 bool was_none = mas_is_none(mas); 5007 5008 if (unlikely(mas->last >= max)) { 5009 mas->status = ma_overflow; 5010 return true; 5011 } 5012 5013 switch (mas->status) { 5014 case ma_active: 5015 return false; 5016 case ma_none: 5017 fallthrough; 5018 case ma_pause: 5019 mas->status = ma_start; 5020 fallthrough; 5021 case ma_start: 5022 mas_walk(mas); /* Retries on dead nodes handled by mas_walk */ 5023 break; 5024 case ma_overflow: 5025 /* Overflowed before, but the max changed */ 5026 mas_may_activate(mas); 5027 break; 5028 case ma_underflow: 5029 /* The user expects the mas to be one before where it is */ 5030 mas_may_activate(mas); 5031 *entry = mas_walk(mas); 5032 if (*entry) 5033 return true; 5034 break; 5035 case ma_root: 5036 break; 5037 case ma_error: 5038 return true; 5039 } 5040 5041 if (likely(mas_is_active(mas))) /* Fast path */ 5042 return false; 5043 5044 if (mas_is_ptr(mas)) { 5045 *entry = NULL; 5046 if (was_none && mas->index == 0) { 5047 mas->index = mas->last = 0; 5048 return true; 5049 } 5050 mas->index = 1; 5051 mas->last = ULONG_MAX; 5052 mas->status = ma_none; 5053 return true; 5054 } 5055 5056 if (mas_is_none(mas)) 5057 return true; 5058 5059 return false; 5060 } 5061 5062 /** 5063 * mas_next() - Get the next entry. 5064 * @mas: The maple state 5065 * @max: The maximum index to check. 5066 * 5067 * Returns the next entry after @mas->index. 5068 * Must hold rcu_read_lock or the write lock. 5069 * Can return the zero entry. 5070 * 5071 * Return: The next entry or %NULL 5072 */ 5073 void *mas_next(struct ma_state *mas, unsigned long max) 5074 { 5075 void *entry = NULL; 5076 5077 if (mas_next_setup(mas, max, &entry)) 5078 return entry; 5079 5080 /* Retries on dead nodes handled by mas_next_slot */ 5081 return mas_next_slot(mas, max, false); 5082 } 5083 EXPORT_SYMBOL_GPL(mas_next); 5084 5085 /** 5086 * mas_next_range() - Advance the maple state to the next range 5087 * @mas: The maple state 5088 * @max: The maximum index to check. 5089 * 5090 * Sets @mas->index and @mas->last to the range. 5091 * Must hold rcu_read_lock or the write lock. 5092 * Can return the zero entry. 5093 * 5094 * Return: The next entry or %NULL 5095 */ 5096 void *mas_next_range(struct ma_state *mas, unsigned long max) 5097 { 5098 void *entry = NULL; 5099 5100 if (mas_next_setup(mas, max, &entry)) 5101 return entry; 5102 5103 /* Retries on dead nodes handled by mas_next_slot */ 5104 return mas_next_slot(mas, max, true); 5105 } 5106 EXPORT_SYMBOL_GPL(mas_next_range); 5107 5108 /** 5109 * mt_next() - get the next value in the maple tree 5110 * @mt: The maple tree 5111 * @index: The start index 5112 * @max: The maximum index to check 5113 * 5114 * Takes RCU read lock internally to protect the search, which does not 5115 * protect the returned pointer after dropping RCU read lock. 5116 * See also: Documentation/core-api/maple_tree.rst 5117 * 5118 * Return: The entry higher than @index or %NULL if nothing is found. 5119 */ 5120 void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max) 5121 { 5122 void *entry = NULL; 5123 MA_STATE(mas, mt, index, index); 5124 5125 rcu_read_lock(); 5126 entry = mas_next(&mas, max); 5127 rcu_read_unlock(); 5128 return entry; 5129 } 5130 EXPORT_SYMBOL_GPL(mt_next); 5131 5132 static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry) 5133 { 5134 if (unlikely(mas->index <= min)) { 5135 mas->status = ma_underflow; 5136 return true; 5137 } 5138 5139 switch (mas->status) { 5140 case ma_active: 5141 return false; 5142 case ma_start: 5143 break; 5144 case ma_none: 5145 fallthrough; 5146 case ma_pause: 5147 mas->status = ma_start; 5148 break; 5149 case ma_underflow: 5150 /* underflowed before but the min changed */ 5151 mas_may_activate(mas); 5152 break; 5153 case ma_overflow: 5154 /* User expects mas to be one after where it is */ 5155 mas_may_activate(mas); 5156 *entry = mas_walk(mas); 5157 if (*entry) 5158 return true; 5159 break; 5160 case ma_root: 5161 break; 5162 case ma_error: 5163 return true; 5164 } 5165 5166 if (mas_is_start(mas)) 5167 mas_walk(mas); 5168 5169 if (unlikely(mas_is_ptr(mas))) { 5170 if (!mas->index) { 5171 mas->status = ma_none; 5172 return true; 5173 } 5174 mas->index = mas->last = 0; 5175 *entry = mas_root(mas); 5176 return true; 5177 } 5178 5179 if (mas_is_none(mas)) { 5180 if (mas->index) { 5181 /* Walked to out-of-range pointer? */ 5182 mas->index = mas->last = 0; 5183 mas->status = ma_root; 5184 *entry = mas_root(mas); 5185 return true; 5186 } 5187 return true; 5188 } 5189 5190 return false; 5191 } 5192 5193 /** 5194 * mas_prev() - Get the previous entry 5195 * @mas: The maple state 5196 * @min: The minimum value to check. 5197 * 5198 * Must hold rcu_read_lock or the write lock. 5199 * Will reset mas to ma_start if the status is ma_none. Will stop on not 5200 * searchable nodes. 5201 * 5202 * Return: the previous value or %NULL. 5203 */ 5204 void *mas_prev(struct ma_state *mas, unsigned long min) 5205 { 5206 void *entry = NULL; 5207 5208 if (mas_prev_setup(mas, min, &entry)) 5209 return entry; 5210 5211 return mas_prev_slot(mas, min, false); 5212 } 5213 EXPORT_SYMBOL_GPL(mas_prev); 5214 5215 /** 5216 * mas_prev_range() - Advance to the previous range 5217 * @mas: The maple state 5218 * @min: The minimum value to check. 5219 * 5220 * Sets @mas->index and @mas->last to the range. 5221 * Must hold rcu_read_lock or the write lock. 5222 * Will reset mas to ma_start if the node is ma_none. Will stop on not 5223 * searchable nodes. 5224 * 5225 * Return: the previous value or %NULL. 5226 */ 5227 void *mas_prev_range(struct ma_state *mas, unsigned long min) 5228 { 5229 void *entry = NULL; 5230 5231 if (mas_prev_setup(mas, min, &entry)) 5232 return entry; 5233 5234 return mas_prev_slot(mas, min, true); 5235 } 5236 EXPORT_SYMBOL_GPL(mas_prev_range); 5237 5238 /** 5239 * mt_prev() - get the previous value in the maple tree 5240 * @mt: The maple tree 5241 * @index: The start index 5242 * @min: The minimum index to check 5243 * 5244 * Takes RCU read lock internally to protect the search, which does not 5245 * protect the returned pointer after dropping RCU read lock. 5246 * See also: Documentation/core-api/maple_tree.rst 5247 * 5248 * Return: The entry before @index or %NULL if nothing is found. 5249 */ 5250 void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min) 5251 { 5252 void *entry = NULL; 5253 MA_STATE(mas, mt, index, index); 5254 5255 rcu_read_lock(); 5256 entry = mas_prev(&mas, min); 5257 rcu_read_unlock(); 5258 return entry; 5259 } 5260 EXPORT_SYMBOL_GPL(mt_prev); 5261 5262 /** 5263 * mas_pause() - Pause a mas_find/mas_for_each to drop the lock. 5264 * @mas: The maple state to pause 5265 * 5266 * Some users need to pause a walk and drop the lock they're holding in 5267 * order to yield to a higher priority thread or carry out an operation 5268 * on an entry. Those users should call this function before they drop 5269 * the lock. It resets the @mas to be suitable for the next iteration 5270 * of the loop after the user has reacquired the lock. If most entries 5271 * found during a walk require you to call mas_pause(), the mt_for_each() 5272 * iterator may be more appropriate. 5273 * 5274 */ 5275 void mas_pause(struct ma_state *mas) 5276 { 5277 mas->status = ma_pause; 5278 mas->node = NULL; 5279 } 5280 EXPORT_SYMBOL_GPL(mas_pause); 5281 5282 /** 5283 * mas_find_setup() - Internal function to set up mas_find*(). 5284 * @mas: The maple state 5285 * @max: The maximum index 5286 * @entry: Pointer to the entry 5287 * 5288 * Returns: True if entry is the answer, false otherwise. 5289 */ 5290 static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long max, void **entry) 5291 { 5292 switch (mas->status) { 5293 case ma_active: 5294 if (mas->last < max) 5295 return false; 5296 return true; 5297 case ma_start: 5298 break; 5299 case ma_pause: 5300 if (unlikely(mas->last >= max)) 5301 return true; 5302 5303 mas->index = ++mas->last; 5304 mas->status = ma_start; 5305 break; 5306 case ma_none: 5307 if (unlikely(mas->last >= max)) 5308 return true; 5309 5310 mas->index = mas->last; 5311 mas->status = ma_start; 5312 break; 5313 case ma_underflow: 5314 /* mas is pointing at entry before unable to go lower */ 5315 if (unlikely(mas->index >= max)) { 5316 mas->status = ma_overflow; 5317 return true; 5318 } 5319 5320 mas_may_activate(mas); 5321 *entry = mas_walk(mas); 5322 if (*entry) 5323 return true; 5324 break; 5325 case ma_overflow: 5326 if (unlikely(mas->last >= max)) 5327 return true; 5328 5329 mas_may_activate(mas); 5330 *entry = mas_walk(mas); 5331 if (*entry) 5332 return true; 5333 break; 5334 case ma_root: 5335 break; 5336 case ma_error: 5337 return true; 5338 } 5339 5340 if (mas_is_start(mas)) { 5341 /* First run or continue */ 5342 if (mas->index > max) 5343 return true; 5344 5345 *entry = mas_walk(mas); 5346 if (*entry) 5347 return true; 5348 5349 } 5350 5351 if (unlikely(mas_is_ptr(mas))) 5352 goto ptr_out_of_range; 5353 5354 if (unlikely(mas_is_none(mas))) 5355 return true; 5356 5357 if (mas->index == max) 5358 return true; 5359 5360 return false; 5361 5362 ptr_out_of_range: 5363 mas->status = ma_none; 5364 mas->index = 1; 5365 mas->last = ULONG_MAX; 5366 return true; 5367 } 5368 5369 /** 5370 * mas_find() - On the first call, find the entry at or after mas->index up to 5371 * %max. Otherwise, find the entry after mas->index. 5372 * @mas: The maple state 5373 * @max: The maximum value to check. 5374 * 5375 * Must hold rcu_read_lock or the write lock. 5376 * If an entry exists, last and index are updated accordingly. 5377 * May set @mas->status to ma_overflow. 5378 * 5379 * Return: The entry or %NULL. 5380 */ 5381 void *mas_find(struct ma_state *mas, unsigned long max) 5382 { 5383 void *entry = NULL; 5384 5385 if (mas_find_setup(mas, max, &entry)) 5386 return entry; 5387 5388 /* Retries on dead nodes handled by mas_next_slot */ 5389 entry = mas_next_slot(mas, max, false); 5390 /* Ignore overflow */ 5391 mas->status = ma_active; 5392 return entry; 5393 } 5394 EXPORT_SYMBOL_GPL(mas_find); 5395 5396 /** 5397 * mas_find_range() - On the first call, find the entry at or after 5398 * mas->index up to %max. Otherwise, advance to the next slot mas->index. 5399 * @mas: The maple state 5400 * @max: The maximum value to check. 5401 * 5402 * Must hold rcu_read_lock or the write lock. 5403 * If an entry exists, last and index are updated accordingly. 5404 * May set @mas->status to ma_overflow. 5405 * 5406 * Return: The entry or %NULL. 5407 */ 5408 void *mas_find_range(struct ma_state *mas, unsigned long max) 5409 { 5410 void *entry = NULL; 5411 5412 if (mas_find_setup(mas, max, &entry)) 5413 return entry; 5414 5415 /* Retries on dead nodes handled by mas_next_slot */ 5416 return mas_next_slot(mas, max, true); 5417 } 5418 EXPORT_SYMBOL_GPL(mas_find_range); 5419 5420 /** 5421 * mas_find_rev_setup() - Internal function to set up mas_find_*_rev() 5422 * @mas: The maple state 5423 * @min: The minimum index 5424 * @entry: Pointer to the entry 5425 * 5426 * Returns: True if entry is the answer, false otherwise. 5427 */ 5428 static bool mas_find_rev_setup(struct ma_state *mas, unsigned long min, 5429 void **entry) 5430 { 5431 5432 switch (mas->status) { 5433 case ma_active: 5434 goto active; 5435 case ma_start: 5436 break; 5437 case ma_pause: 5438 if (unlikely(mas->index <= min)) { 5439 mas->status = ma_underflow; 5440 return true; 5441 } 5442 mas->last = --mas->index; 5443 mas->status = ma_start; 5444 break; 5445 case ma_none: 5446 if (mas->index <= min) 5447 goto none; 5448 5449 mas->last = mas->index; 5450 mas->status = ma_start; 5451 break; 5452 case ma_overflow: /* user expects the mas to be one after where it is */ 5453 if (unlikely(mas->index <= min)) { 5454 mas->status = ma_underflow; 5455 return true; 5456 } 5457 5458 mas->status = ma_active; 5459 break; 5460 case ma_underflow: /* user expects the mas to be one before where it is */ 5461 if (unlikely(mas->index <= min)) 5462 return true; 5463 5464 mas->status = ma_active; 5465 break; 5466 case ma_root: 5467 break; 5468 case ma_error: 5469 return true; 5470 } 5471 5472 if (mas_is_start(mas)) { 5473 /* First run or continue */ 5474 if (mas->index < min) 5475 return true; 5476 5477 *entry = mas_walk(mas); 5478 if (*entry) 5479 return true; 5480 } 5481 5482 if (unlikely(mas_is_ptr(mas))) 5483 goto none; 5484 5485 if (unlikely(mas_is_none(mas))) { 5486 /* 5487 * Walked to the location, and there was nothing so the previous 5488 * location is 0. 5489 */ 5490 mas->last = mas->index = 0; 5491 mas->status = ma_root; 5492 *entry = mas_root(mas); 5493 return true; 5494 } 5495 5496 active: 5497 if (mas->index < min) 5498 return true; 5499 5500 return false; 5501 5502 none: 5503 mas->status = ma_none; 5504 return true; 5505 } 5506 5507 /** 5508 * mas_find_rev: On the first call, find the first non-null entry at or below 5509 * mas->index down to %min. Otherwise find the first non-null entry below 5510 * mas->index down to %min. 5511 * @mas: The maple state 5512 * @min: The minimum value to check. 5513 * 5514 * Must hold rcu_read_lock or the write lock. 5515 * If an entry exists, last and index are updated accordingly. 5516 * May set @mas->status to ma_underflow. 5517 * 5518 * Return: The entry or %NULL. 5519 */ 5520 void *mas_find_rev(struct ma_state *mas, unsigned long min) 5521 { 5522 void *entry = NULL; 5523 5524 if (mas_find_rev_setup(mas, min, &entry)) 5525 return entry; 5526 5527 /* Retries on dead nodes handled by mas_prev_slot */ 5528 return mas_prev_slot(mas, min, false); 5529 5530 } 5531 EXPORT_SYMBOL_GPL(mas_find_rev); 5532 5533 /** 5534 * mas_find_range_rev: On the first call, find the first non-null entry at or 5535 * below mas->index down to %min. Otherwise advance to the previous slot after 5536 * mas->index down to %min. 5537 * @mas: The maple state 5538 * @min: The minimum value to check. 5539 * 5540 * Must hold rcu_read_lock or the write lock. 5541 * If an entry exists, last and index are updated accordingly. 5542 * May set @mas->status to ma_underflow. 5543 * 5544 * Return: The entry or %NULL. 5545 */ 5546 void *mas_find_range_rev(struct ma_state *mas, unsigned long min) 5547 { 5548 void *entry = NULL; 5549 5550 if (mas_find_rev_setup(mas, min, &entry)) 5551 return entry; 5552 5553 /* Retries on dead nodes handled by mas_prev_slot */ 5554 return mas_prev_slot(mas, min, true); 5555 } 5556 EXPORT_SYMBOL_GPL(mas_find_range_rev); 5557 5558 /** 5559 * mas_erase() - Find the range in which index resides and erase the entire 5560 * range. 5561 * @mas: The maple state 5562 * 5563 * Must hold the write lock. 5564 * Searches for @mas->index, sets @mas->index and @mas->last to the range and 5565 * erases that range. 5566 * 5567 * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated. 5568 */ 5569 void *mas_erase(struct ma_state *mas) 5570 { 5571 void *entry; 5572 unsigned long index = mas->index; 5573 MA_WR_STATE(wr_mas, mas, NULL); 5574 5575 if (!mas_is_active(mas) || !mas_is_start(mas)) 5576 mas->status = ma_start; 5577 5578 write_retry: 5579 entry = mas_state_walk(mas); 5580 if (!entry) 5581 return NULL; 5582 5583 /* Must reset to ensure spanning writes of last slot are detected */ 5584 mas_reset(mas); 5585 mas_wr_preallocate(&wr_mas, NULL); 5586 if (mas_nomem(mas, GFP_KERNEL)) { 5587 /* in case the range of entry changed when unlocked */ 5588 mas->index = mas->last = index; 5589 goto write_retry; 5590 } 5591 5592 if (mas_is_err(mas)) 5593 goto out; 5594 5595 mas_wr_store_entry(&wr_mas); 5596 out: 5597 mas_destroy(mas); 5598 return entry; 5599 } 5600 EXPORT_SYMBOL_GPL(mas_erase); 5601 5602 /** 5603 * mas_nomem() - Check if there was an error allocating and do the allocation 5604 * if necessary If there are allocations, then free them. 5605 * @mas: The maple state 5606 * @gfp: The GFP_FLAGS to use for allocations 5607 * Return: true on allocation, false otherwise. 5608 */ 5609 bool mas_nomem(struct ma_state *mas, gfp_t gfp) 5610 __must_hold(mas->tree->ma_lock) 5611 { 5612 if (likely(mas->node != MA_ERROR(-ENOMEM))) 5613 return false; 5614 5615 if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) { 5616 mtree_unlock(mas->tree); 5617 mas_alloc_nodes(mas, gfp); 5618 mtree_lock(mas->tree); 5619 } else { 5620 mas_alloc_nodes(mas, gfp); 5621 } 5622 5623 if (!mas->sheaf && !mas->alloc) 5624 return false; 5625 5626 mas->status = ma_start; 5627 return true; 5628 } 5629 5630 void __init maple_tree_init(void) 5631 { 5632 struct kmem_cache_args args = { 5633 .align = sizeof(struct maple_node), 5634 .sheaf_capacity = 32, 5635 }; 5636 5637 maple_node_cache = kmem_cache_create("maple_node", 5638 sizeof(struct maple_node), &args, 5639 SLAB_PANIC); 5640 } 5641 5642 /** 5643 * mtree_load() - Load a value stored in a maple tree 5644 * @mt: The maple tree 5645 * @index: The index to load 5646 * 5647 * Return: the entry or %NULL 5648 */ 5649 void *mtree_load(struct maple_tree *mt, unsigned long index) 5650 { 5651 MA_STATE(mas, mt, index, index); 5652 void *entry; 5653 5654 trace_ma_read(TP_FCT, &mas); 5655 rcu_read_lock(); 5656 retry: 5657 entry = mas_start(&mas); 5658 if (unlikely(mas_is_none(&mas))) 5659 goto unlock; 5660 5661 if (unlikely(mas_is_ptr(&mas))) { 5662 if (index) 5663 entry = NULL; 5664 5665 goto unlock; 5666 } 5667 5668 entry = mtree_lookup_walk(&mas); 5669 if (!entry && unlikely(mas_is_start(&mas))) 5670 goto retry; 5671 unlock: 5672 rcu_read_unlock(); 5673 if (xa_is_zero(entry)) 5674 return NULL; 5675 5676 return entry; 5677 } 5678 EXPORT_SYMBOL(mtree_load); 5679 5680 /** 5681 * mtree_store_range() - Store an entry at a given range. 5682 * @mt: The maple tree 5683 * @index: The start of the range 5684 * @last: The end of the range 5685 * @entry: The entry to store 5686 * @gfp: The GFP_FLAGS to use for allocations 5687 * 5688 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5689 * be allocated. 5690 */ 5691 int mtree_store_range(struct maple_tree *mt, unsigned long index, 5692 unsigned long last, void *entry, gfp_t gfp) 5693 { 5694 MA_STATE(mas, mt, index, last); 5695 int ret = 0; 5696 5697 trace_ma_write(TP_FCT, &mas, 0, entry); 5698 if (WARN_ON_ONCE(xa_is_advanced(entry))) 5699 return -EINVAL; 5700 5701 if (index > last) 5702 return -EINVAL; 5703 5704 mtree_lock(mt); 5705 ret = mas_store_gfp(&mas, entry, gfp); 5706 mtree_unlock(mt); 5707 5708 return ret; 5709 } 5710 EXPORT_SYMBOL(mtree_store_range); 5711 5712 /** 5713 * mtree_store() - Store an entry at a given index. 5714 * @mt: The maple tree 5715 * @index: The index to store the value 5716 * @entry: The entry to store 5717 * @gfp: The GFP_FLAGS to use for allocations 5718 * 5719 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5720 * be allocated. 5721 */ 5722 int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, 5723 gfp_t gfp) 5724 { 5725 return mtree_store_range(mt, index, index, entry, gfp); 5726 } 5727 EXPORT_SYMBOL(mtree_store); 5728 5729 /** 5730 * mtree_insert_range() - Insert an entry at a given range if there is no value. 5731 * @mt: The maple tree 5732 * @first: The start of the range 5733 * @last: The end of the range 5734 * @entry: The entry to store 5735 * @gfp: The GFP_FLAGS to use for allocations. 5736 * 5737 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 5738 * request, -ENOMEM if memory could not be allocated. 5739 */ 5740 int mtree_insert_range(struct maple_tree *mt, unsigned long first, 5741 unsigned long last, void *entry, gfp_t gfp) 5742 { 5743 MA_STATE(ms, mt, first, last); 5744 int ret = 0; 5745 5746 if (WARN_ON_ONCE(xa_is_advanced(entry))) 5747 return -EINVAL; 5748 5749 if (first > last) 5750 return -EINVAL; 5751 5752 mtree_lock(mt); 5753 retry: 5754 mas_insert(&ms, entry); 5755 if (mas_nomem(&ms, gfp)) 5756 goto retry; 5757 5758 mtree_unlock(mt); 5759 if (mas_is_err(&ms)) 5760 ret = xa_err(ms.node); 5761 5762 mas_destroy(&ms); 5763 return ret; 5764 } 5765 EXPORT_SYMBOL(mtree_insert_range); 5766 5767 /** 5768 * mtree_insert() - Insert an entry at a given index if there is no value. 5769 * @mt: The maple tree 5770 * @index : The index to store the value 5771 * @entry: The entry to store 5772 * @gfp: The GFP_FLAGS to use for allocations. 5773 * 5774 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 5775 * request, -ENOMEM if memory could not be allocated. 5776 */ 5777 int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry, 5778 gfp_t gfp) 5779 { 5780 return mtree_insert_range(mt, index, index, entry, gfp); 5781 } 5782 EXPORT_SYMBOL(mtree_insert); 5783 5784 int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, 5785 void *entry, unsigned long size, unsigned long min, 5786 unsigned long max, gfp_t gfp) 5787 { 5788 int ret = 0; 5789 5790 MA_STATE(mas, mt, 0, 0); 5791 if (!mt_is_alloc(mt)) 5792 return -EINVAL; 5793 5794 if (WARN_ON_ONCE(mt_is_reserved(entry))) 5795 return -EINVAL; 5796 5797 mtree_lock(mt); 5798 retry: 5799 ret = mas_empty_area(&mas, min, max, size); 5800 if (ret) 5801 goto unlock; 5802 5803 mas_insert(&mas, entry); 5804 /* 5805 * mas_nomem() may release the lock, causing the allocated area 5806 * to be unavailable, so try to allocate a free area again. 5807 */ 5808 if (mas_nomem(&mas, gfp)) 5809 goto retry; 5810 5811 if (mas_is_err(&mas)) 5812 ret = xa_err(mas.node); 5813 else 5814 *startp = mas.index; 5815 5816 unlock: 5817 mtree_unlock(mt); 5818 mas_destroy(&mas); 5819 return ret; 5820 } 5821 EXPORT_SYMBOL(mtree_alloc_range); 5822 5823 /** 5824 * mtree_alloc_cyclic() - Find somewhere to store this entry in the tree. 5825 * @mt: The maple tree. 5826 * @startp: Pointer to ID. 5827 * @range_lo: Lower bound of range to search. 5828 * @range_hi: Upper bound of range to search. 5829 * @entry: The entry to store. 5830 * @next: Pointer to next ID to allocate. 5831 * @gfp: The GFP_FLAGS to use for allocations. 5832 * 5833 * Finds an empty entry in @mt after @next, stores the new index into 5834 * the @id pointer, stores the entry at that index, then updates @next. 5835 * 5836 * @mt must be initialized with the MT_FLAGS_ALLOC_RANGE flag. 5837 * 5838 * Context: Any context. Takes and releases the mt.lock. May sleep if 5839 * the @gfp flags permit. 5840 * 5841 * Return: 0 if the allocation succeeded without wrapping, 1 if the 5842 * allocation succeeded after wrapping, -ENOMEM if memory could not be 5843 * allocated, -EINVAL if @mt cannot be used, or -EBUSY if there are no 5844 * free entries. 5845 */ 5846 int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp, 5847 void *entry, unsigned long range_lo, unsigned long range_hi, 5848 unsigned long *next, gfp_t gfp) 5849 { 5850 int ret; 5851 5852 MA_STATE(mas, mt, 0, 0); 5853 5854 if (!mt_is_alloc(mt)) 5855 return -EINVAL; 5856 if (WARN_ON_ONCE(mt_is_reserved(entry))) 5857 return -EINVAL; 5858 mtree_lock(mt); 5859 ret = mas_alloc_cyclic(&mas, startp, entry, range_lo, range_hi, 5860 next, gfp); 5861 mtree_unlock(mt); 5862 return ret; 5863 } 5864 EXPORT_SYMBOL(mtree_alloc_cyclic); 5865 5866 int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, 5867 void *entry, unsigned long size, unsigned long min, 5868 unsigned long max, gfp_t gfp) 5869 { 5870 int ret = 0; 5871 5872 MA_STATE(mas, mt, 0, 0); 5873 if (!mt_is_alloc(mt)) 5874 return -EINVAL; 5875 5876 if (WARN_ON_ONCE(mt_is_reserved(entry))) 5877 return -EINVAL; 5878 5879 mtree_lock(mt); 5880 retry: 5881 ret = mas_empty_area_rev(&mas, min, max, size); 5882 if (ret) 5883 goto unlock; 5884 5885 mas_insert(&mas, entry); 5886 /* 5887 * mas_nomem() may release the lock, causing the allocated area 5888 * to be unavailable, so try to allocate a free area again. 5889 */ 5890 if (mas_nomem(&mas, gfp)) 5891 goto retry; 5892 5893 if (mas_is_err(&mas)) 5894 ret = xa_err(mas.node); 5895 else 5896 *startp = mas.index; 5897 5898 unlock: 5899 mtree_unlock(mt); 5900 mas_destroy(&mas); 5901 return ret; 5902 } 5903 EXPORT_SYMBOL(mtree_alloc_rrange); 5904 5905 /** 5906 * mtree_erase() - Find an index and erase the entire range. 5907 * @mt: The maple tree 5908 * @index: The index to erase 5909 * 5910 * Erasing is the same as a walk to an entry then a store of a NULL to that 5911 * ENTIRE range. In fact, it is implemented as such using the advanced API. 5912 * 5913 * Return: The entry stored at the @index or %NULL 5914 */ 5915 void *mtree_erase(struct maple_tree *mt, unsigned long index) 5916 { 5917 void *entry = NULL; 5918 5919 MA_STATE(mas, mt, index, index); 5920 trace_ma_op(TP_FCT, &mas); 5921 5922 mtree_lock(mt); 5923 entry = mas_erase(&mas); 5924 mtree_unlock(mt); 5925 5926 return entry; 5927 } 5928 EXPORT_SYMBOL(mtree_erase); 5929 5930 /* 5931 * mas_dup_free() - Free an incomplete duplication of a tree. 5932 * @mas: The maple state of a incomplete tree. 5933 * 5934 * The parameter @mas->node passed in indicates that the allocation failed on 5935 * this node. This function frees all nodes starting from @mas->node in the 5936 * reverse order of mas_dup_build(). There is no need to hold the source tree 5937 * lock at this time. 5938 */ 5939 static void mas_dup_free(struct ma_state *mas) 5940 { 5941 struct maple_node *node; 5942 enum maple_type type; 5943 void __rcu **slots; 5944 unsigned char count, i; 5945 5946 /* Maybe the first node allocation failed. */ 5947 if (mas_is_none(mas)) 5948 return; 5949 5950 while (!mte_is_root(mas->node)) { 5951 mas_ascend(mas); 5952 if (mas->offset) { 5953 mas->offset--; 5954 do { 5955 mas_descend(mas); 5956 mas->offset = mas_data_end(mas); 5957 } while (!mte_is_leaf(mas->node)); 5958 5959 mas_ascend(mas); 5960 } 5961 5962 node = mte_to_node(mas->node); 5963 type = mte_node_type(mas->node); 5964 slots = ma_slots(node, type); 5965 count = mas_data_end(mas) + 1; 5966 for (i = 0; i < count; i++) 5967 ((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK; 5968 mt_free_bulk(count, slots); 5969 } 5970 5971 node = mte_to_node(mas->node); 5972 kfree(node); 5973 } 5974 5975 /* 5976 * mas_copy_node() - Copy a maple node and replace the parent. 5977 * @mas: The maple state of source tree. 5978 * @new_mas: The maple state of new tree. 5979 * @parent: The parent of the new node. 5980 * 5981 * Copy @mas->node to @new_mas->node, set @parent to be the parent of 5982 * @new_mas->node. If memory allocation fails, @mas is set to -ENOMEM. 5983 */ 5984 static inline void mas_copy_node(struct ma_state *mas, struct ma_state *new_mas, 5985 struct maple_pnode *parent) 5986 { 5987 struct maple_node *node = mte_to_node(mas->node); 5988 struct maple_node *new_node = mte_to_node(new_mas->node); 5989 unsigned long val; 5990 5991 /* Copy the node completely. */ 5992 memcpy(new_node, node, sizeof(struct maple_node)); 5993 /* Update the parent node pointer. */ 5994 val = (unsigned long)node->parent & MAPLE_NODE_MASK; 5995 new_node->parent = ma_parent_ptr(val | (unsigned long)parent); 5996 } 5997 5998 /* 5999 * mas_dup_alloc() - Allocate child nodes for a maple node. 6000 * @mas: The maple state of source tree. 6001 * @new_mas: The maple state of new tree. 6002 * @gfp: The GFP_FLAGS to use for allocations. 6003 * 6004 * This function allocates child nodes for @new_mas->node during the duplication 6005 * process. If memory allocation fails, @mas is set to -ENOMEM. 6006 */ 6007 static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas, 6008 gfp_t gfp) 6009 { 6010 struct maple_node *node = mte_to_node(mas->node); 6011 struct maple_node *new_node = mte_to_node(new_mas->node); 6012 enum maple_type type; 6013 unsigned char count, i; 6014 void __rcu **slots; 6015 void __rcu **new_slots; 6016 unsigned long val; 6017 6018 /* Allocate memory for child nodes. */ 6019 type = mte_node_type(mas->node); 6020 new_slots = ma_slots(new_node, type); 6021 count = mas->node_request = mas_data_end(mas) + 1; 6022 mas_alloc_nodes(mas, gfp); 6023 if (unlikely(mas_is_err(mas))) 6024 return; 6025 6026 slots = ma_slots(node, type); 6027 for (i = 0; i < count; i++) { 6028 val = (unsigned long)mt_slot_locked(mas->tree, slots, i); 6029 val &= MAPLE_NODE_MASK; 6030 /* 6031 * Warning, see rcu_assign_pointer() documentation. Since this 6032 * is a duplication of a tree, there are no readers walking the 6033 * tree until after the rcu_assign_pointer() call in 6034 * mas_dup_build(). 6035 */ 6036 RCU_INIT_POINTER(new_slots[i], 6037 ma_mnode_ptr((unsigned long)mas_pop_node(mas) | 6038 val)); 6039 } 6040 } 6041 6042 /* 6043 * mas_dup_build() - Build a new maple tree from a source tree 6044 * @mas: The maple state of source tree, need to be in MAS_START state. 6045 * @new_mas: The maple state of new tree, need to be in MAS_START state. 6046 * @gfp: The GFP_FLAGS to use for allocations. 6047 * 6048 * This function builds a new tree in DFS preorder. If the memory allocation 6049 * fails, the error code -ENOMEM will be set in @mas, and @new_mas points to the 6050 * last node. mas_dup_free() will free the incomplete duplication of a tree. 6051 * 6052 * Note that the attributes of the two trees need to be exactly the same, and the 6053 * new tree needs to be empty, otherwise -EINVAL will be set in @mas. 6054 */ 6055 static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas, 6056 gfp_t gfp) 6057 { 6058 struct maple_node *node; 6059 struct maple_pnode *parent = NULL; 6060 struct maple_enode *root; 6061 enum maple_type type; 6062 6063 if (unlikely(mt_attr(mas->tree) != mt_attr(new_mas->tree)) || 6064 unlikely(!mtree_empty(new_mas->tree))) { 6065 mas_set_err(mas, -EINVAL); 6066 return; 6067 } 6068 6069 root = mas_start(mas); 6070 if (mas_is_ptr(mas) || mas_is_none(mas)) 6071 goto set_new_tree; 6072 6073 node = mt_alloc_one(gfp); 6074 if (!node) { 6075 new_mas->status = ma_none; 6076 mas_set_err(mas, -ENOMEM); 6077 return; 6078 } 6079 6080 type = mte_node_type(mas->node); 6081 root = mt_mk_node(node, type); 6082 new_mas->node = root; 6083 new_mas->min = 0; 6084 new_mas->max = ULONG_MAX; 6085 root = mte_mk_root(root); 6086 while (1) { 6087 mas_copy_node(mas, new_mas, parent); 6088 if (!mte_is_leaf(mas->node)) { 6089 /* Only allocate child nodes for non-leaf nodes. */ 6090 mas_dup_alloc(mas, new_mas, gfp); 6091 if (unlikely(mas_is_err(mas))) 6092 goto empty_mas; 6093 } else { 6094 /* 6095 * This is the last leaf node and duplication is 6096 * completed. 6097 */ 6098 if (mas->max == ULONG_MAX) 6099 goto done; 6100 6101 /* This is not the last leaf node and needs to go up. */ 6102 do { 6103 mas_ascend(mas); 6104 mas_ascend(new_mas); 6105 } while (mas->offset == mas_data_end(mas)); 6106 6107 /* Move to the next subtree. */ 6108 mas->offset++; 6109 new_mas->offset++; 6110 } 6111 6112 mas_descend(mas); 6113 parent = ma_parent_ptr(mte_to_node(new_mas->node)); 6114 mas_descend(new_mas); 6115 mas->offset = 0; 6116 new_mas->offset = 0; 6117 } 6118 done: 6119 /* Specially handle the parent of the root node. */ 6120 mte_to_node(root)->parent = ma_parent_ptr(mas_tree_parent(new_mas)); 6121 set_new_tree: 6122 /* Make them the same height */ 6123 new_mas->tree->ma_flags = mas->tree->ma_flags; 6124 rcu_assign_pointer(new_mas->tree->ma_root, root); 6125 empty_mas: 6126 mas_empty_nodes(mas); 6127 } 6128 6129 /** 6130 * __mt_dup(): Duplicate an entire maple tree 6131 * @mt: The source maple tree 6132 * @new: The new maple tree 6133 * @gfp: The GFP_FLAGS to use for allocations 6134 * 6135 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order 6136 * traversal. It uses memcpy() to copy nodes in the source tree and allocate 6137 * new child nodes in non-leaf nodes. The new node is exactly the same as the 6138 * source node except for all the addresses stored in it. It will be faster than 6139 * traversing all elements in the source tree and inserting them one by one into 6140 * the new tree. 6141 * The user needs to ensure that the attributes of the source tree and the new 6142 * tree are the same, and the new tree needs to be an empty tree, otherwise 6143 * -EINVAL will be returned. 6144 * Note that the user needs to manually lock the source tree and the new tree. 6145 * 6146 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If 6147 * the attributes of the two trees are different or the new tree is not an empty 6148 * tree. 6149 */ 6150 int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp) 6151 { 6152 int ret = 0; 6153 MA_STATE(mas, mt, 0, 0); 6154 MA_STATE(new_mas, new, 0, 0); 6155 6156 mas_dup_build(&mas, &new_mas, gfp); 6157 if (unlikely(mas_is_err(&mas))) { 6158 ret = xa_err(mas.node); 6159 if (ret == -ENOMEM) 6160 mas_dup_free(&new_mas); 6161 } 6162 6163 return ret; 6164 } 6165 EXPORT_SYMBOL(__mt_dup); 6166 6167 /** 6168 * mtree_dup(): Duplicate an entire maple tree 6169 * @mt: The source maple tree 6170 * @new: The new maple tree 6171 * @gfp: The GFP_FLAGS to use for allocations 6172 * 6173 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order 6174 * traversal. It uses memcpy() to copy nodes in the source tree and allocate 6175 * new child nodes in non-leaf nodes. The new node is exactly the same as the 6176 * source node except for all the addresses stored in it. It will be faster than 6177 * traversing all elements in the source tree and inserting them one by one into 6178 * the new tree. 6179 * The user needs to ensure that the attributes of the source tree and the new 6180 * tree are the same, and the new tree needs to be an empty tree, otherwise 6181 * -EINVAL will be returned. 6182 * 6183 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If 6184 * the attributes of the two trees are different or the new tree is not an empty 6185 * tree. 6186 */ 6187 int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp) 6188 { 6189 int ret = 0; 6190 MA_STATE(mas, mt, 0, 0); 6191 MA_STATE(new_mas, new, 0, 0); 6192 6193 mas_lock(&new_mas); 6194 mas_lock_nested(&mas, SINGLE_DEPTH_NESTING); 6195 mas_dup_build(&mas, &new_mas, gfp); 6196 mas_unlock(&mas); 6197 if (unlikely(mas_is_err(&mas))) { 6198 ret = xa_err(mas.node); 6199 if (ret == -ENOMEM) 6200 mas_dup_free(&new_mas); 6201 } 6202 6203 mas_unlock(&new_mas); 6204 return ret; 6205 } 6206 EXPORT_SYMBOL(mtree_dup); 6207 6208 /** 6209 * __mt_destroy() - Walk and free all nodes of a locked maple tree. 6210 * @mt: The maple tree 6211 * 6212 * Note: Does not handle locking. 6213 */ 6214 void __mt_destroy(struct maple_tree *mt) 6215 { 6216 void *root = mt_root_locked(mt); 6217 6218 rcu_assign_pointer(mt->ma_root, NULL); 6219 if (xa_is_node(root)) 6220 mte_destroy_walk(root, mt); 6221 6222 mt->ma_flags = mt_attr(mt); 6223 } 6224 EXPORT_SYMBOL_GPL(__mt_destroy); 6225 6226 /** 6227 * mtree_destroy() - Destroy a maple tree 6228 * @mt: The maple tree 6229 * 6230 * Frees all resources used by the tree. Handles locking. 6231 */ 6232 void mtree_destroy(struct maple_tree *mt) 6233 { 6234 mtree_lock(mt); 6235 __mt_destroy(mt); 6236 mtree_unlock(mt); 6237 } 6238 EXPORT_SYMBOL(mtree_destroy); 6239 6240 /** 6241 * mt_find() - Search from the start up until an entry is found. 6242 * @mt: The maple tree 6243 * @index: Pointer which contains the start location of the search 6244 * @max: The maximum value of the search range 6245 * 6246 * Takes RCU read lock internally to protect the search, which does not 6247 * protect the returned pointer after dropping RCU read lock. 6248 * See also: Documentation/core-api/maple_tree.rst 6249 * 6250 * In case that an entry is found @index is updated to point to the next 6251 * possible entry independent whether the found entry is occupying a 6252 * single index or a range if indices. 6253 * 6254 * Return: The entry at or after the @index or %NULL 6255 */ 6256 void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max) 6257 { 6258 MA_STATE(mas, mt, *index, *index); 6259 void *entry; 6260 #ifdef CONFIG_DEBUG_MAPLE_TREE 6261 unsigned long copy = *index; 6262 #endif 6263 6264 trace_ma_read(TP_FCT, &mas); 6265 6266 if ((*index) > max) 6267 return NULL; 6268 6269 rcu_read_lock(); 6270 retry: 6271 entry = mas_state_walk(&mas); 6272 if (mas_is_start(&mas)) 6273 goto retry; 6274 6275 if (unlikely(xa_is_zero(entry))) 6276 entry = NULL; 6277 6278 if (entry) 6279 goto unlock; 6280 6281 while (mas_is_active(&mas) && (mas.last < max)) { 6282 entry = mas_next_slot(&mas, max, false); 6283 if (likely(entry && !xa_is_zero(entry))) 6284 break; 6285 } 6286 6287 if (unlikely(xa_is_zero(entry))) 6288 entry = NULL; 6289 unlock: 6290 rcu_read_unlock(); 6291 if (likely(entry)) { 6292 *index = mas.last + 1; 6293 #ifdef CONFIG_DEBUG_MAPLE_TREE 6294 if (MT_WARN_ON(mt, (*index) && ((*index) <= copy))) 6295 pr_err("index not increased! %lx <= %lx\n", 6296 *index, copy); 6297 #endif 6298 } 6299 6300 return entry; 6301 } 6302 EXPORT_SYMBOL(mt_find); 6303 6304 /** 6305 * mt_find_after() - Search from the start up until an entry is found. 6306 * @mt: The maple tree 6307 * @index: Pointer which contains the start location of the search 6308 * @max: The maximum value to check 6309 * 6310 * Same as mt_find() except that it checks @index for 0 before 6311 * searching. If @index == 0, the search is aborted. This covers a wrap 6312 * around of @index to 0 in an iterator loop. 6313 * 6314 * Return: The entry at or after the @index or %NULL 6315 */ 6316 void *mt_find_after(struct maple_tree *mt, unsigned long *index, 6317 unsigned long max) 6318 { 6319 if (!(*index)) 6320 return NULL; 6321 6322 return mt_find(mt, index, max); 6323 } 6324 EXPORT_SYMBOL(mt_find_after); 6325 6326 #ifdef CONFIG_DEBUG_MAPLE_TREE 6327 atomic_t maple_tree_tests_run; 6328 EXPORT_SYMBOL_GPL(maple_tree_tests_run); 6329 atomic_t maple_tree_tests_passed; 6330 EXPORT_SYMBOL_GPL(maple_tree_tests_passed); 6331 6332 #ifndef __KERNEL__ 6333 extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int); 6334 void mt_set_non_kernel(unsigned int val) 6335 { 6336 kmem_cache_set_non_kernel(maple_node_cache, val); 6337 } 6338 6339 extern void kmem_cache_set_callback(struct kmem_cache *cachep, 6340 void (*callback)(void *)); 6341 void mt_set_callback(void (*callback)(void *)) 6342 { 6343 kmem_cache_set_callback(maple_node_cache, callback); 6344 } 6345 6346 extern void kmem_cache_set_private(struct kmem_cache *cachep, void *private); 6347 void mt_set_private(void *private) 6348 { 6349 kmem_cache_set_private(maple_node_cache, private); 6350 } 6351 6352 extern unsigned long kmem_cache_get_alloc(struct kmem_cache *); 6353 unsigned long mt_get_alloc_size(void) 6354 { 6355 return kmem_cache_get_alloc(maple_node_cache); 6356 } 6357 6358 extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *); 6359 void mt_zero_nr_tallocated(void) 6360 { 6361 kmem_cache_zero_nr_tallocated(maple_node_cache); 6362 } 6363 6364 extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *); 6365 unsigned int mt_nr_tallocated(void) 6366 { 6367 return kmem_cache_nr_tallocated(maple_node_cache); 6368 } 6369 6370 extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *); 6371 unsigned int mt_nr_allocated(void) 6372 { 6373 return kmem_cache_nr_allocated(maple_node_cache); 6374 } 6375 6376 void mt_cache_shrink(void) 6377 { 6378 } 6379 #else 6380 /* 6381 * mt_cache_shrink() - For testing, don't use this. 6382 * 6383 * Certain testcases can trigger an OOM when combined with other memory 6384 * debugging configuration options. This function is used to reduce the 6385 * possibility of an out of memory even due to kmem_cache objects remaining 6386 * around for longer than usual. 6387 */ 6388 void mt_cache_shrink(void) 6389 { 6390 kmem_cache_shrink(maple_node_cache); 6391 6392 } 6393 EXPORT_SYMBOL_GPL(mt_cache_shrink); 6394 6395 #endif /* not defined __KERNEL__ */ 6396 /* 6397 * mas_get_slot() - Get the entry in the maple state node stored at @offset. 6398 * @mas: The maple state 6399 * @offset: The offset into the slot array to fetch. 6400 * 6401 * Return: The entry stored at @offset. 6402 */ 6403 static inline struct maple_enode *mas_get_slot(struct ma_state *mas, 6404 unsigned char offset) 6405 { 6406 return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)), 6407 offset); 6408 } 6409 6410 /* Depth first search, post-order */ 6411 static void mas_dfs_postorder(struct ma_state *mas, unsigned long max) 6412 { 6413 6414 struct maple_enode *p, *mn = mas->node; 6415 unsigned long p_min, p_max; 6416 6417 mas_next_node(mas, mas_mn(mas), max); 6418 if (!mas_is_overflow(mas)) 6419 return; 6420 6421 if (mte_is_root(mn)) 6422 return; 6423 6424 mas->node = mn; 6425 mas_ascend(mas); 6426 do { 6427 p = mas->node; 6428 p_min = mas->min; 6429 p_max = mas->max; 6430 mas_prev_node(mas, 0); 6431 } while (!mas_is_underflow(mas)); 6432 6433 mas->node = p; 6434 mas->max = p_max; 6435 mas->min = p_min; 6436 } 6437 6438 /* Tree validations */ 6439 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6440 unsigned long min, unsigned long max, unsigned int depth, 6441 enum mt_dump_format format); 6442 static void mt_dump_range(unsigned long min, unsigned long max, 6443 unsigned int depth, enum mt_dump_format format) 6444 { 6445 static const char spaces[] = " "; 6446 6447 switch(format) { 6448 case mt_dump_hex: 6449 if (min == max) 6450 pr_info("%.*s%lx: ", depth * 2, spaces, min); 6451 else 6452 pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max); 6453 break; 6454 case mt_dump_dec: 6455 if (min == max) 6456 pr_info("%.*s%lu: ", depth * 2, spaces, min); 6457 else 6458 pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max); 6459 } 6460 } 6461 6462 static void mt_dump_entry(void *entry, unsigned long min, unsigned long max, 6463 unsigned int depth, enum mt_dump_format format) 6464 { 6465 mt_dump_range(min, max, depth, format); 6466 6467 if (xa_is_value(entry)) 6468 pr_cont("value %ld (0x%lx) [" PTR_FMT "]\n", xa_to_value(entry), 6469 xa_to_value(entry), entry); 6470 else if (xa_is_zero(entry)) 6471 pr_cont("zero (%ld)\n", xa_to_internal(entry)); 6472 else if (mt_is_reserved(entry)) 6473 pr_cont("UNKNOWN ENTRY (" PTR_FMT ")\n", entry); 6474 else 6475 pr_cont(PTR_FMT "\n", entry); 6476 } 6477 6478 static void mt_dump_range64(const struct maple_tree *mt, void *entry, 6479 unsigned long min, unsigned long max, unsigned int depth, 6480 enum mt_dump_format format) 6481 { 6482 struct maple_range_64 *node = &mte_to_node(entry)->mr64; 6483 bool leaf = mte_is_leaf(entry); 6484 unsigned long first = min; 6485 int i; 6486 6487 pr_cont(" contents: "); 6488 for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) { 6489 switch(format) { 6490 case mt_dump_hex: 6491 pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]); 6492 break; 6493 case mt_dump_dec: 6494 pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]); 6495 } 6496 } 6497 pr_cont(PTR_FMT "\n", node->slot[i]); 6498 for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) { 6499 unsigned long last = max; 6500 6501 if (i < (MAPLE_RANGE64_SLOTS - 1)) 6502 last = node->pivot[i]; 6503 else if (!node->slot[i] && max != mt_node_max(entry)) 6504 break; 6505 if (last == 0 && i > 0) 6506 break; 6507 if (leaf) 6508 mt_dump_entry(mt_slot(mt, node->slot, i), 6509 first, last, depth + 1, format); 6510 else if (node->slot[i]) 6511 mt_dump_node(mt, mt_slot(mt, node->slot, i), 6512 first, last, depth + 1, format); 6513 6514 if (last == max) 6515 break; 6516 if (last > max) { 6517 switch(format) { 6518 case mt_dump_hex: 6519 pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n", 6520 node, last, max, i); 6521 break; 6522 case mt_dump_dec: 6523 pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", 6524 node, last, max, i); 6525 } 6526 } 6527 first = last + 1; 6528 } 6529 } 6530 6531 static void mt_dump_arange64(const struct maple_tree *mt, void *entry, 6532 unsigned long min, unsigned long max, unsigned int depth, 6533 enum mt_dump_format format) 6534 { 6535 struct maple_arange_64 *node = &mte_to_node(entry)->ma64; 6536 unsigned long first = min; 6537 int i; 6538 6539 pr_cont(" contents: "); 6540 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 6541 switch (format) { 6542 case mt_dump_hex: 6543 pr_cont("%lx ", node->gap[i]); 6544 break; 6545 case mt_dump_dec: 6546 pr_cont("%lu ", node->gap[i]); 6547 } 6548 } 6549 pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap); 6550 for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) { 6551 switch (format) { 6552 case mt_dump_hex: 6553 pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]); 6554 break; 6555 case mt_dump_dec: 6556 pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]); 6557 } 6558 } 6559 pr_cont(PTR_FMT "\n", node->slot[i]); 6560 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 6561 unsigned long last = max; 6562 6563 if (i < (MAPLE_ARANGE64_SLOTS - 1)) 6564 last = node->pivot[i]; 6565 else if (!node->slot[i]) 6566 break; 6567 if (last == 0 && i > 0) 6568 break; 6569 if (node->slot[i]) 6570 mt_dump_node(mt, mt_slot(mt, node->slot, i), 6571 first, last, depth + 1, format); 6572 6573 if (last == max) 6574 break; 6575 if (last > max) { 6576 switch(format) { 6577 case mt_dump_hex: 6578 pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n", 6579 node, last, max, i); 6580 break; 6581 case mt_dump_dec: 6582 pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", 6583 node, last, max, i); 6584 } 6585 } 6586 first = last + 1; 6587 } 6588 } 6589 6590 static void mt_dump_node(const struct maple_tree *mt, void *entry, 6591 unsigned long min, unsigned long max, unsigned int depth, 6592 enum mt_dump_format format) 6593 { 6594 struct maple_node *node = mte_to_node(entry); 6595 unsigned int type = mte_node_type(entry); 6596 unsigned int i; 6597 6598 mt_dump_range(min, max, depth, format); 6599 6600 pr_cont("node " PTR_FMT " depth %d type %d parent " PTR_FMT, node, 6601 depth, type, node ? node->parent : NULL); 6602 switch (type) { 6603 case maple_dense: 6604 pr_cont("\n"); 6605 for (i = 0; i < MAPLE_NODE_SLOTS; i++) { 6606 if (min + i > max) 6607 pr_cont("OUT OF RANGE: "); 6608 mt_dump_entry(mt_slot(mt, node->slot, i), 6609 min + i, min + i, depth, format); 6610 } 6611 break; 6612 case maple_leaf_64: 6613 case maple_range_64: 6614 mt_dump_range64(mt, entry, min, max, depth, format); 6615 break; 6616 case maple_arange_64: 6617 mt_dump_arange64(mt, entry, min, max, depth, format); 6618 break; 6619 6620 default: 6621 pr_cont(" UNKNOWN TYPE\n"); 6622 } 6623 } 6624 6625 void mt_dump(const struct maple_tree *mt, enum mt_dump_format format) 6626 { 6627 void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt)); 6628 6629 pr_info("maple_tree(" PTR_FMT ") flags %X, height %u root " PTR_FMT "\n", 6630 mt, mt->ma_flags, mt_height(mt), entry); 6631 if (xa_is_node(entry)) 6632 mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format); 6633 else if (entry) 6634 mt_dump_entry(entry, 0, 0, 0, format); 6635 else 6636 pr_info("(empty)\n"); 6637 } 6638 EXPORT_SYMBOL_GPL(mt_dump); 6639 6640 /* 6641 * Calculate the maximum gap in a node and check if that's what is reported in 6642 * the parent (unless root). 6643 */ 6644 static void mas_validate_gaps(struct ma_state *mas) 6645 { 6646 struct maple_enode *mte = mas->node; 6647 struct maple_node *p_mn, *node = mte_to_node(mte); 6648 enum maple_type mt = mte_node_type(mas->node); 6649 unsigned long gap = 0, max_gap = 0; 6650 unsigned long p_end, p_start = mas->min; 6651 unsigned char p_slot, offset; 6652 unsigned long *gaps = NULL; 6653 unsigned long *pivots = ma_pivots(node, mt); 6654 unsigned int i; 6655 6656 if (ma_is_dense(mt)) { 6657 for (i = 0; i < mt_slot_count(mte); i++) { 6658 if (mas_get_slot(mas, i)) { 6659 if (gap > max_gap) 6660 max_gap = gap; 6661 gap = 0; 6662 continue; 6663 } 6664 gap++; 6665 } 6666 goto counted; 6667 } 6668 6669 gaps = ma_gaps(node, mt); 6670 for (i = 0; i < mt_slot_count(mte); i++) { 6671 p_end = mas_safe_pivot(mas, pivots, i, mt); 6672 6673 if (!gaps) { 6674 if (!mas_get_slot(mas, i)) 6675 gap = p_end - p_start + 1; 6676 } else { 6677 void *entry = mas_get_slot(mas, i); 6678 6679 gap = gaps[i]; 6680 MT_BUG_ON(mas->tree, !entry); 6681 6682 if (gap > p_end - p_start + 1) { 6683 pr_err(PTR_FMT "[%u] %lu >= %lu - %lu + 1 (%lu)\n", 6684 mas_mn(mas), i, gap, p_end, p_start, 6685 p_end - p_start + 1); 6686 MT_BUG_ON(mas->tree, gap > p_end - p_start + 1); 6687 } 6688 } 6689 6690 if (gap > max_gap) 6691 max_gap = gap; 6692 6693 p_start = p_end + 1; 6694 if (p_end >= mas->max) 6695 break; 6696 } 6697 6698 counted: 6699 if (mt == maple_arange_64) { 6700 MT_BUG_ON(mas->tree, !gaps); 6701 offset = ma_meta_gap(node); 6702 if (offset > i) { 6703 pr_err("gap offset " PTR_FMT "[%u] is invalid\n", node, offset); 6704 MT_BUG_ON(mas->tree, 1); 6705 } 6706 6707 if (gaps[offset] != max_gap) { 6708 pr_err("gap " PTR_FMT "[%u] is not the largest gap %lu\n", 6709 node, offset, max_gap); 6710 MT_BUG_ON(mas->tree, 1); 6711 } 6712 6713 for (i++ ; i < mt_slot_count(mte); i++) { 6714 if (gaps[i] != 0) { 6715 pr_err("gap " PTR_FMT "[%u] beyond node limit != 0\n", 6716 node, i); 6717 MT_BUG_ON(mas->tree, 1); 6718 } 6719 } 6720 } 6721 6722 if (mte_is_root(mte)) 6723 return; 6724 6725 p_slot = mte_parent_slot(mas->node); 6726 p_mn = mte_parent(mte); 6727 MT_BUG_ON(mas->tree, max_gap > mas->max); 6728 if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) { 6729 pr_err("gap " PTR_FMT "[%u] != %lu\n", p_mn, p_slot, max_gap); 6730 mt_dump(mas->tree, mt_dump_hex); 6731 MT_BUG_ON(mas->tree, 1); 6732 } 6733 } 6734 6735 static void mas_validate_parent_slot(struct ma_state *mas) 6736 { 6737 struct maple_node *parent; 6738 struct maple_enode *node; 6739 enum maple_type p_type; 6740 unsigned char p_slot; 6741 void __rcu **slots; 6742 int i; 6743 6744 if (mte_is_root(mas->node)) 6745 return; 6746 6747 p_slot = mte_parent_slot(mas->node); 6748 p_type = mas_parent_type(mas, mas->node); 6749 parent = mte_parent(mas->node); 6750 slots = ma_slots(parent, p_type); 6751 MT_BUG_ON(mas->tree, mas_mn(mas) == parent); 6752 6753 /* Check prev/next parent slot for duplicate node entry */ 6754 6755 for (i = 0; i < mt_slots[p_type]; i++) { 6756 node = mas_slot(mas, slots, i); 6757 if (i == p_slot) { 6758 if (node != mas->node) 6759 pr_err("parent " PTR_FMT "[%u] does not have " PTR_FMT "\n", 6760 parent, i, mas_mn(mas)); 6761 MT_BUG_ON(mas->tree, node != mas->node); 6762 } else if (node == mas->node) { 6763 pr_err("Invalid child " PTR_FMT " at parent " PTR_FMT "[%u] p_slot %u\n", 6764 mas_mn(mas), parent, i, p_slot); 6765 MT_BUG_ON(mas->tree, node == mas->node); 6766 } 6767 } 6768 } 6769 6770 static void mas_validate_child_slot(struct ma_state *mas) 6771 { 6772 enum maple_type type = mte_node_type(mas->node); 6773 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 6774 unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type); 6775 struct maple_enode *child; 6776 unsigned char i; 6777 6778 if (mte_is_leaf(mas->node)) 6779 return; 6780 6781 for (i = 0; i < mt_slots[type]; i++) { 6782 child = mas_slot(mas, slots, i); 6783 6784 if (!child) { 6785 pr_err("Non-leaf node lacks child at " PTR_FMT "[%u]\n", 6786 mas_mn(mas), i); 6787 MT_BUG_ON(mas->tree, 1); 6788 } 6789 6790 if (mte_parent_slot(child) != i) { 6791 pr_err("Slot error at " PTR_FMT "[%u]: child " PTR_FMT " has pslot %u\n", 6792 mas_mn(mas), i, mte_to_node(child), 6793 mte_parent_slot(child)); 6794 MT_BUG_ON(mas->tree, 1); 6795 } 6796 6797 if (mte_parent(child) != mte_to_node(mas->node)) { 6798 pr_err("child " PTR_FMT " has parent " PTR_FMT " not " PTR_FMT "\n", 6799 mte_to_node(child), mte_parent(child), 6800 mte_to_node(mas->node)); 6801 MT_BUG_ON(mas->tree, 1); 6802 } 6803 6804 if (i < mt_pivots[type] && pivots[i] == mas->max) 6805 break; 6806 } 6807 } 6808 6809 /* 6810 * Validate all pivots are within mas->min and mas->max, check metadata ends 6811 * where the maximum ends and ensure there is no slots or pivots set outside of 6812 * the end of the data. 6813 */ 6814 static void mas_validate_limits(struct ma_state *mas) 6815 { 6816 int i; 6817 unsigned long prev_piv = 0; 6818 enum maple_type type = mte_node_type(mas->node); 6819 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 6820 unsigned long *pivots = ma_pivots(mas_mn(mas), type); 6821 6822 for (i = 0; i < mt_slots[type]; i++) { 6823 unsigned long piv; 6824 6825 piv = mas_safe_pivot(mas, pivots, i, type); 6826 6827 if (!piv && (i != 0)) { 6828 pr_err("Missing node limit pivot at " PTR_FMT "[%u]", 6829 mas_mn(mas), i); 6830 MAS_WARN_ON(mas, 1); 6831 } 6832 6833 if (prev_piv > piv) { 6834 pr_err(PTR_FMT "[%u] piv %lu < prev_piv %lu\n", 6835 mas_mn(mas), i, piv, prev_piv); 6836 MAS_WARN_ON(mas, piv < prev_piv); 6837 } 6838 6839 if (piv < mas->min) { 6840 pr_err(PTR_FMT "[%u] %lu < %lu\n", mas_mn(mas), i, 6841 piv, mas->min); 6842 MAS_WARN_ON(mas, piv < mas->min); 6843 } 6844 if (piv > mas->max) { 6845 pr_err(PTR_FMT "[%u] %lu > %lu\n", mas_mn(mas), i, 6846 piv, mas->max); 6847 MAS_WARN_ON(mas, piv > mas->max); 6848 } 6849 prev_piv = piv; 6850 if (piv == mas->max) 6851 break; 6852 } 6853 6854 if (mas_data_end(mas) != i) { 6855 pr_err("node" PTR_FMT ": data_end %u != the last slot offset %u\n", 6856 mas_mn(mas), mas_data_end(mas), i); 6857 MT_BUG_ON(mas->tree, 1); 6858 } 6859 6860 for (i += 1; i < mt_slots[type]; i++) { 6861 void *entry = mas_slot(mas, slots, i); 6862 6863 if (entry && (i != mt_slots[type] - 1)) { 6864 pr_err(PTR_FMT "[%u] should not have entry " PTR_FMT "\n", 6865 mas_mn(mas), i, entry); 6866 MT_BUG_ON(mas->tree, entry != NULL); 6867 } 6868 6869 if (i < mt_pivots[type]) { 6870 unsigned long piv = pivots[i]; 6871 6872 if (!piv) 6873 continue; 6874 6875 pr_err(PTR_FMT "[%u] should not have piv %lu\n", 6876 mas_mn(mas), i, piv); 6877 MAS_WARN_ON(mas, i < mt_pivots[type] - 1); 6878 } 6879 } 6880 } 6881 6882 static void mt_validate_nulls(struct maple_tree *mt) 6883 { 6884 void *entry, *last = (void *)1; 6885 unsigned char offset = 0; 6886 void __rcu **slots; 6887 MA_STATE(mas, mt, 0, 0); 6888 6889 mas_start(&mas); 6890 if (mas_is_none(&mas) || (mas_is_ptr(&mas))) 6891 return; 6892 6893 while (!mte_is_leaf(mas.node)) 6894 mas_descend(&mas); 6895 6896 slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node)); 6897 do { 6898 entry = mas_slot(&mas, slots, offset); 6899 if (!last && !entry) { 6900 pr_err("Sequential nulls end at " PTR_FMT "[%u]\n", 6901 mas_mn(&mas), offset); 6902 } 6903 MT_BUG_ON(mt, !last && !entry); 6904 last = entry; 6905 if (offset == mas_data_end(&mas)) { 6906 mas_next_node(&mas, mas_mn(&mas), ULONG_MAX); 6907 if (mas_is_overflow(&mas)) 6908 return; 6909 offset = 0; 6910 slots = ma_slots(mte_to_node(mas.node), 6911 mte_node_type(mas.node)); 6912 } else { 6913 offset++; 6914 } 6915 6916 } while (!mas_is_overflow(&mas)); 6917 } 6918 6919 /* 6920 * validate a maple tree by checking: 6921 * 1. The limits (pivots are within mas->min to mas->max) 6922 * 2. The gap is correctly set in the parents 6923 */ 6924 void mt_validate(struct maple_tree *mt) 6925 __must_hold(mas->tree->ma_lock) 6926 { 6927 unsigned char end; 6928 6929 MA_STATE(mas, mt, 0, 0); 6930 mas_start(&mas); 6931 if (!mas_is_active(&mas)) 6932 return; 6933 6934 while (!mte_is_leaf(mas.node)) 6935 mas_descend(&mas); 6936 6937 while (!mas_is_overflow(&mas)) { 6938 MAS_WARN_ON(&mas, mte_dead_node(mas.node)); 6939 end = mas_data_end(&mas); 6940 if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) && 6941 (!mte_is_root(mas.node)))) { 6942 pr_err("Invalid size %u of " PTR_FMT "\n", 6943 end, mas_mn(&mas)); 6944 } 6945 6946 mas_validate_parent_slot(&mas); 6947 mas_validate_limits(&mas); 6948 mas_validate_child_slot(&mas); 6949 if (mt_is_alloc(mt)) 6950 mas_validate_gaps(&mas); 6951 mas_dfs_postorder(&mas, ULONG_MAX); 6952 } 6953 mt_validate_nulls(mt); 6954 } 6955 EXPORT_SYMBOL_GPL(mt_validate); 6956 6957 void mas_dump(const struct ma_state *mas) 6958 { 6959 pr_err("MAS: tree=" PTR_FMT " enode=" PTR_FMT " ", 6960 mas->tree, mas->node); 6961 switch (mas->status) { 6962 case ma_active: 6963 pr_err("(ma_active)"); 6964 break; 6965 case ma_none: 6966 pr_err("(ma_none)"); 6967 break; 6968 case ma_root: 6969 pr_err("(ma_root)"); 6970 break; 6971 case ma_start: 6972 pr_err("(ma_start) "); 6973 break; 6974 case ma_pause: 6975 pr_err("(ma_pause) "); 6976 break; 6977 case ma_overflow: 6978 pr_err("(ma_overflow) "); 6979 break; 6980 case ma_underflow: 6981 pr_err("(ma_underflow) "); 6982 break; 6983 case ma_error: 6984 pr_err("(ma_error) "); 6985 break; 6986 } 6987 6988 pr_err("Store Type: "); 6989 switch (mas->store_type) { 6990 case wr_invalid: 6991 pr_err("invalid store type\n"); 6992 break; 6993 case wr_new_root: 6994 pr_err("new_root\n"); 6995 break; 6996 case wr_store_root: 6997 pr_err("store_root\n"); 6998 break; 6999 case wr_exact_fit: 7000 pr_err("exact_fit\n"); 7001 break; 7002 case wr_split_store: 7003 pr_err("split_store\n"); 7004 break; 7005 case wr_slot_store: 7006 pr_err("slot_store\n"); 7007 break; 7008 case wr_append: 7009 pr_err("append\n"); 7010 break; 7011 case wr_node_store: 7012 pr_err("node_store\n"); 7013 break; 7014 case wr_spanning_store: 7015 pr_err("spanning_store\n"); 7016 break; 7017 case wr_rebalance: 7018 pr_err("rebalance\n"); 7019 break; 7020 } 7021 7022 pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end, 7023 mas->index, mas->last); 7024 pr_err(" min=%lx max=%lx sheaf=" PTR_FMT ", request %lu depth=%u, flags=%x\n", 7025 mas->min, mas->max, mas->sheaf, mas->node_request, mas->depth, 7026 mas->mas_flags); 7027 if (mas->index > mas->last) 7028 pr_err("Check index & last\n"); 7029 } 7030 EXPORT_SYMBOL_GPL(mas_dump); 7031 7032 void mas_wr_dump(const struct ma_wr_state *wr_mas) 7033 { 7034 pr_err("WR_MAS: node=" PTR_FMT " r_min=%lx r_max=%lx\n", 7035 wr_mas->node, wr_mas->r_min, wr_mas->r_max); 7036 pr_err(" type=%u off_end=%u, node_end=%u, end_piv=%lx\n", 7037 wr_mas->type, wr_mas->offset_end, wr_mas->mas->end, 7038 wr_mas->end_piv); 7039 } 7040 EXPORT_SYMBOL_GPL(mas_wr_dump); 7041 7042 #endif /* CONFIG_DEBUG_MAPLE_TREE */ 7043