1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Maple Tree implementation 4 * Copyright (c) 2018-2022 Oracle Corporation 5 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com> 6 * Matthew Wilcox <willy@infradead.org> 7 * Copyright (c) 2023 ByteDance 8 * Author: Peng Zhang <zhangpeng.00@bytedance.com> 9 */ 10 11 /* 12 * DOC: Interesting implementation details of the Maple Tree 13 * 14 * Each node type has a number of slots for entries and a number of slots for 15 * pivots. In the case of dense nodes, the pivots are implied by the position 16 * and are simply the slot index + the minimum of the node. 17 * 18 * In regular B-Tree terms, pivots are called keys. The term pivot is used to 19 * indicate that the tree is specifying ranges. Pivots may appear in the 20 * subtree with an entry attached to the value whereas keys are unique to a 21 * specific position of a B-tree. Pivot values are inclusive of the slot with 22 * the same index. 23 * 24 * 25 * The following illustrates the layout of a range64 nodes slots and pivots. 26 * 27 * 28 * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 | 29 * ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ 30 * │ │ │ │ │ │ │ │ └─ Implied maximum 31 * │ │ │ │ │ │ │ └─ Pivot 14 32 * │ │ │ │ │ │ └─ Pivot 13 33 * │ │ │ │ │ └─ Pivot 12 34 * │ │ │ │ └─ Pivot 11 35 * │ │ │ └─ Pivot 2 36 * │ │ └─ Pivot 1 37 * │ └─ Pivot 0 38 * └─ Implied minimum 39 * 40 * Slot contents: 41 * Internal (non-leaf) nodes contain pointers to other nodes. 42 * Leaf nodes contain entries. 43 * 44 * The location of interest is often referred to as an offset. All offsets have 45 * a slot, but the last offset has an implied pivot from the node above (or 46 * UINT_MAX for the root node. 47 * 48 * Ranges complicate certain write activities. When modifying any of 49 * the B-tree variants, it is known that one entry will either be added or 50 * deleted. When modifying the Maple Tree, one store operation may overwrite 51 * the entire data set, or one half of the tree, or the middle half of the tree. 52 * 53 */ 54 55 56 #include <linux/maple_tree.h> 57 #include <linux/xarray.h> 58 #include <linux/types.h> 59 #include <linux/export.h> 60 #include <linux/slab.h> 61 #include <linux/limits.h> 62 #include <asm/barrier.h> 63 64 #define CREATE_TRACE_POINTS 65 #include <trace/events/maple_tree.h> 66 67 /* 68 * Kernel pointer hashing renders much of the maple tree dump useless as tagged 69 * pointers get hashed to arbitrary values. 70 * 71 * If CONFIG_DEBUG_VM_MAPLE_TREE is set we are in a debug mode where it is 72 * permissible to bypass this. Otherwise remain cautious and retain the hashing. 73 * 74 * Userland doesn't know about %px so also use %p there. 75 */ 76 #if defined(__KERNEL__) && defined(CONFIG_DEBUG_VM_MAPLE_TREE) 77 #define PTR_FMT "%px" 78 #else 79 #define PTR_FMT "%p" 80 #endif 81 82 #define MA_ROOT_PARENT 1 83 84 /* 85 * Maple state flags 86 * * MA_STATE_BULK - Bulk insert mode 87 * * MA_STATE_REBALANCE - Indicate a rebalance during bulk insert 88 * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation 89 */ 90 #define MA_STATE_BULK 1 91 #define MA_STATE_REBALANCE 2 92 #define MA_STATE_PREALLOC 4 93 94 #define ma_parent_ptr(x) ((struct maple_pnode *)(x)) 95 #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT) 96 #define ma_mnode_ptr(x) ((struct maple_node *)(x)) 97 #define ma_enode_ptr(x) ((struct maple_enode *)(x)) 98 static struct kmem_cache *maple_node_cache; 99 100 #ifdef CONFIG_DEBUG_MAPLE_TREE 101 static const unsigned long mt_max[] = { 102 [maple_dense] = MAPLE_NODE_SLOTS, 103 [maple_leaf_64] = ULONG_MAX, 104 [maple_range_64] = ULONG_MAX, 105 [maple_arange_64] = ULONG_MAX, 106 }; 107 #define mt_node_max(x) mt_max[mte_node_type(x)] 108 #endif 109 110 static const unsigned char mt_slots[] = { 111 [maple_dense] = MAPLE_NODE_SLOTS, 112 [maple_leaf_64] = MAPLE_RANGE64_SLOTS, 113 [maple_range_64] = MAPLE_RANGE64_SLOTS, 114 [maple_arange_64] = MAPLE_ARANGE64_SLOTS, 115 }; 116 #define mt_slot_count(x) mt_slots[mte_node_type(x)] 117 118 static const unsigned char mt_pivots[] = { 119 [maple_dense] = 0, 120 [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1, 121 [maple_range_64] = MAPLE_RANGE64_SLOTS - 1, 122 [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1, 123 }; 124 #define mt_pivot_count(x) mt_pivots[mte_node_type(x)] 125 126 static const unsigned char mt_min_slots[] = { 127 [maple_dense] = MAPLE_NODE_SLOTS / 2, 128 [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 129 [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, 130 [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1, 131 }; 132 #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)] 133 134 #define MAPLE_BIG_NODE_SLOTS (MAPLE_RANGE64_SLOTS * 2 + 2) 135 #define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1) 136 137 struct maple_big_node { 138 unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1]; 139 union { 140 struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS]; 141 struct { 142 unsigned long padding[MAPLE_BIG_NODE_GAPS]; 143 unsigned long gap[MAPLE_BIG_NODE_GAPS]; 144 }; 145 }; 146 unsigned char b_end; 147 enum maple_type type; 148 }; 149 150 /* 151 * The maple_subtree_state is used to build a tree to replace a segment of an 152 * existing tree in a more atomic way. Any walkers of the older tree will hit a 153 * dead node and restart on updates. 154 */ 155 struct maple_subtree_state { 156 struct ma_state *orig_l; /* Original left side of subtree */ 157 struct ma_state *orig_r; /* Original right side of subtree */ 158 struct ma_state *l; /* New left side of subtree */ 159 struct ma_state *m; /* New middle of subtree (rare) */ 160 struct ma_state *r; /* New right side of subtree */ 161 struct ma_topiary *free; /* nodes to be freed */ 162 struct ma_topiary *destroy; /* Nodes to be destroyed (walked and freed) */ 163 struct maple_big_node *bn; 164 }; 165 166 #ifdef CONFIG_KASAN_STACK 167 /* Prevent mas_wr_bnode() from exceeding the stack frame limit */ 168 #define noinline_for_kasan noinline_for_stack 169 #else 170 #define noinline_for_kasan inline 171 #endif 172 173 /* Functions */ mt_alloc_one(gfp_t gfp)174 static inline struct maple_node *mt_alloc_one(gfp_t gfp) 175 { 176 return kmem_cache_alloc(maple_node_cache, gfp); 177 } 178 mt_alloc_bulk(gfp_t gfp,size_t size,void ** nodes)179 static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes) 180 { 181 return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes); 182 } 183 mt_free_one(struct maple_node * node)184 static inline void mt_free_one(struct maple_node *node) 185 { 186 kmem_cache_free(maple_node_cache, node); 187 } 188 mt_free_bulk(size_t size,void __rcu ** nodes)189 static inline void mt_free_bulk(size_t size, void __rcu **nodes) 190 { 191 kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); 192 } 193 mt_free_rcu(struct rcu_head * head)194 static void mt_free_rcu(struct rcu_head *head) 195 { 196 struct maple_node *node = container_of(head, struct maple_node, rcu); 197 198 kmem_cache_free(maple_node_cache, node); 199 } 200 201 /* 202 * ma_free_rcu() - Use rcu callback to free a maple node 203 * @node: The node to free 204 * 205 * The maple tree uses the parent pointer to indicate this node is no longer in 206 * use and will be freed. 207 */ ma_free_rcu(struct maple_node * node)208 static void ma_free_rcu(struct maple_node *node) 209 { 210 WARN_ON(node->parent != ma_parent_ptr(node)); 211 call_rcu(&node->rcu, mt_free_rcu); 212 } 213 mas_set_height(struct ma_state * mas)214 static void mas_set_height(struct ma_state *mas) 215 { 216 unsigned int new_flags = mas->tree->ma_flags; 217 218 new_flags &= ~MT_FLAGS_HEIGHT_MASK; 219 MAS_BUG_ON(mas, mas->depth > MAPLE_HEIGHT_MAX); 220 new_flags |= mas->depth << MT_FLAGS_HEIGHT_OFFSET; 221 mas->tree->ma_flags = new_flags; 222 } 223 mas_mt_height(struct ma_state * mas)224 static unsigned int mas_mt_height(struct ma_state *mas) 225 { 226 return mt_height(mas->tree); 227 } 228 mt_attr(struct maple_tree * mt)229 static inline unsigned int mt_attr(struct maple_tree *mt) 230 { 231 return mt->ma_flags & ~MT_FLAGS_HEIGHT_MASK; 232 } 233 mte_node_type(const struct maple_enode * entry)234 static __always_inline enum maple_type mte_node_type( 235 const struct maple_enode *entry) 236 { 237 return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) & 238 MAPLE_NODE_TYPE_MASK; 239 } 240 ma_is_dense(const enum maple_type type)241 static __always_inline bool ma_is_dense(const enum maple_type type) 242 { 243 return type < maple_leaf_64; 244 } 245 ma_is_leaf(const enum maple_type type)246 static __always_inline bool ma_is_leaf(const enum maple_type type) 247 { 248 return type < maple_range_64; 249 } 250 mte_is_leaf(const struct maple_enode * entry)251 static __always_inline bool mte_is_leaf(const struct maple_enode *entry) 252 { 253 return ma_is_leaf(mte_node_type(entry)); 254 } 255 256 /* 257 * We also reserve values with the bottom two bits set to '10' which are 258 * below 4096 259 */ mt_is_reserved(const void * entry)260 static __always_inline bool mt_is_reserved(const void *entry) 261 { 262 return ((unsigned long)entry < MAPLE_RESERVED_RANGE) && 263 xa_is_internal(entry); 264 } 265 mas_set_err(struct ma_state * mas,long err)266 static __always_inline void mas_set_err(struct ma_state *mas, long err) 267 { 268 mas->node = MA_ERROR(err); 269 mas->status = ma_error; 270 } 271 mas_is_ptr(const struct ma_state * mas)272 static __always_inline bool mas_is_ptr(const struct ma_state *mas) 273 { 274 return mas->status == ma_root; 275 } 276 mas_is_start(const struct ma_state * mas)277 static __always_inline bool mas_is_start(const struct ma_state *mas) 278 { 279 return mas->status == ma_start; 280 } 281 mas_is_none(const struct ma_state * mas)282 static __always_inline bool mas_is_none(const struct ma_state *mas) 283 { 284 return mas->status == ma_none; 285 } 286 mas_is_paused(const struct ma_state * mas)287 static __always_inline bool mas_is_paused(const struct ma_state *mas) 288 { 289 return mas->status == ma_pause; 290 } 291 mas_is_overflow(struct ma_state * mas)292 static __always_inline bool mas_is_overflow(struct ma_state *mas) 293 { 294 return mas->status == ma_overflow; 295 } 296 mas_is_underflow(struct ma_state * mas)297 static inline bool mas_is_underflow(struct ma_state *mas) 298 { 299 return mas->status == ma_underflow; 300 } 301 mte_to_node(const struct maple_enode * entry)302 static __always_inline struct maple_node *mte_to_node( 303 const struct maple_enode *entry) 304 { 305 return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK); 306 } 307 308 /* 309 * mte_to_mat() - Convert a maple encoded node to a maple topiary node. 310 * @entry: The maple encoded node 311 * 312 * Return: a maple topiary pointer 313 */ mte_to_mat(const struct maple_enode * entry)314 static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry) 315 { 316 return (struct maple_topiary *) 317 ((unsigned long)entry & ~MAPLE_NODE_MASK); 318 } 319 320 /* 321 * mas_mn() - Get the maple state node. 322 * @mas: The maple state 323 * 324 * Return: the maple node (not encoded - bare pointer). 325 */ mas_mn(const struct ma_state * mas)326 static inline struct maple_node *mas_mn(const struct ma_state *mas) 327 { 328 return mte_to_node(mas->node); 329 } 330 331 /* 332 * mte_set_node_dead() - Set a maple encoded node as dead. 333 * @mn: The maple encoded node. 334 */ mte_set_node_dead(struct maple_enode * mn)335 static inline void mte_set_node_dead(struct maple_enode *mn) 336 { 337 mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn)); 338 smp_wmb(); /* Needed for RCU */ 339 } 340 341 /* Bit 1 indicates the root is a node */ 342 #define MAPLE_ROOT_NODE 0x02 343 /* maple_type stored bit 3-6 */ 344 #define MAPLE_ENODE_TYPE_SHIFT 0x03 345 /* Bit 2 means a NULL somewhere below */ 346 #define MAPLE_ENODE_NULL 0x04 347 mt_mk_node(const struct maple_node * node,enum maple_type type)348 static inline struct maple_enode *mt_mk_node(const struct maple_node *node, 349 enum maple_type type) 350 { 351 return (void *)((unsigned long)node | 352 (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL); 353 } 354 mte_mk_root(const struct maple_enode * node)355 static inline void *mte_mk_root(const struct maple_enode *node) 356 { 357 return (void *)((unsigned long)node | MAPLE_ROOT_NODE); 358 } 359 mte_safe_root(const struct maple_enode * node)360 static inline void *mte_safe_root(const struct maple_enode *node) 361 { 362 return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE); 363 } 364 mte_set_full(const struct maple_enode * node)365 static inline void __maybe_unused *mte_set_full(const struct maple_enode *node) 366 { 367 return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL); 368 } 369 mte_clear_full(const struct maple_enode * node)370 static inline void __maybe_unused *mte_clear_full(const struct maple_enode *node) 371 { 372 return (void *)((unsigned long)node | MAPLE_ENODE_NULL); 373 } 374 mte_has_null(const struct maple_enode * node)375 static inline bool __maybe_unused mte_has_null(const struct maple_enode *node) 376 { 377 return (unsigned long)node & MAPLE_ENODE_NULL; 378 } 379 ma_is_root(struct maple_node * node)380 static __always_inline bool ma_is_root(struct maple_node *node) 381 { 382 return ((unsigned long)node->parent & MA_ROOT_PARENT); 383 } 384 mte_is_root(const struct maple_enode * node)385 static __always_inline bool mte_is_root(const struct maple_enode *node) 386 { 387 return ma_is_root(mte_to_node(node)); 388 } 389 mas_is_root_limits(const struct ma_state * mas)390 static inline bool mas_is_root_limits(const struct ma_state *mas) 391 { 392 return !mas->min && mas->max == ULONG_MAX; 393 } 394 mt_is_alloc(struct maple_tree * mt)395 static __always_inline bool mt_is_alloc(struct maple_tree *mt) 396 { 397 return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE); 398 } 399 400 /* 401 * The Parent Pointer 402 * Excluding root, the parent pointer is 256B aligned like all other tree nodes. 403 * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16 404 * bit values need an extra bit to store the offset. This extra bit comes from 405 * a reuse of the last bit in the node type. This is possible by using bit 1 to 406 * indicate if bit 2 is part of the type or the slot. 407 * 408 * Note types: 409 * 0x??1 = Root 410 * 0x?00 = 16 bit nodes 411 * 0x010 = 32 bit nodes 412 * 0x110 = 64 bit nodes 413 * 414 * Slot size and alignment 415 * 0b??1 : Root 416 * 0b?00 : 16 bit values, type in 0-1, slot in 2-7 417 * 0b010 : 32 bit values, type in 0-2, slot in 3-7 418 * 0b110 : 64 bit values, type in 0-2, slot in 3-7 419 */ 420 421 #define MAPLE_PARENT_ROOT 0x01 422 423 #define MAPLE_PARENT_SLOT_SHIFT 0x03 424 #define MAPLE_PARENT_SLOT_MASK 0xF8 425 426 #define MAPLE_PARENT_16B_SLOT_SHIFT 0x02 427 #define MAPLE_PARENT_16B_SLOT_MASK 0xFC 428 429 #define MAPLE_PARENT_RANGE64 0x06 430 #define MAPLE_PARENT_RANGE32 0x04 431 #define MAPLE_PARENT_NOT_RANGE16 0x02 432 433 /* 434 * mte_parent_shift() - Get the parent shift for the slot storage. 435 * @parent: The parent pointer cast as an unsigned long 436 * Return: The shift into that pointer to the star to of the slot 437 */ mte_parent_shift(unsigned long parent)438 static inline unsigned long mte_parent_shift(unsigned long parent) 439 { 440 /* Note bit 1 == 0 means 16B */ 441 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 442 return MAPLE_PARENT_SLOT_SHIFT; 443 444 return MAPLE_PARENT_16B_SLOT_SHIFT; 445 } 446 447 /* 448 * mte_parent_slot_mask() - Get the slot mask for the parent. 449 * @parent: The parent pointer cast as an unsigned long. 450 * Return: The slot mask for that parent. 451 */ mte_parent_slot_mask(unsigned long parent)452 static inline unsigned long mte_parent_slot_mask(unsigned long parent) 453 { 454 /* Note bit 1 == 0 means 16B */ 455 if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) 456 return MAPLE_PARENT_SLOT_MASK; 457 458 return MAPLE_PARENT_16B_SLOT_MASK; 459 } 460 461 /* 462 * mas_parent_type() - Return the maple_type of the parent from the stored 463 * parent type. 464 * @mas: The maple state 465 * @enode: The maple_enode to extract the parent's enum 466 * Return: The node->parent maple_type 467 */ 468 static inline mas_parent_type(struct ma_state * mas,struct maple_enode * enode)469 enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode) 470 { 471 unsigned long p_type; 472 473 p_type = (unsigned long)mte_to_node(enode)->parent; 474 if (WARN_ON(p_type & MAPLE_PARENT_ROOT)) 475 return 0; 476 477 p_type &= MAPLE_NODE_MASK; 478 p_type &= ~mte_parent_slot_mask(p_type); 479 switch (p_type) { 480 case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */ 481 if (mt_is_alloc(mas->tree)) 482 return maple_arange_64; 483 return maple_range_64; 484 } 485 486 return 0; 487 } 488 489 /* 490 * mas_set_parent() - Set the parent node and encode the slot 491 * @mas: The maple state 492 * @enode: The encoded maple node. 493 * @parent: The encoded maple node that is the parent of @enode. 494 * @slot: The slot that @enode resides in @parent. 495 * 496 * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the 497 * parent type. 498 */ 499 static inline mas_set_parent(struct ma_state * mas,struct maple_enode * enode,const struct maple_enode * parent,unsigned char slot)500 void mas_set_parent(struct ma_state *mas, struct maple_enode *enode, 501 const struct maple_enode *parent, unsigned char slot) 502 { 503 unsigned long val = (unsigned long)parent; 504 unsigned long shift; 505 unsigned long type; 506 enum maple_type p_type = mte_node_type(parent); 507 508 MAS_BUG_ON(mas, p_type == maple_dense); 509 MAS_BUG_ON(mas, p_type == maple_leaf_64); 510 511 switch (p_type) { 512 case maple_range_64: 513 case maple_arange_64: 514 shift = MAPLE_PARENT_SLOT_SHIFT; 515 type = MAPLE_PARENT_RANGE64; 516 break; 517 default: 518 case maple_dense: 519 case maple_leaf_64: 520 shift = type = 0; 521 break; 522 } 523 524 val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */ 525 val |= (slot << shift) | type; 526 mte_to_node(enode)->parent = ma_parent_ptr(val); 527 } 528 529 /* 530 * mte_parent_slot() - get the parent slot of @enode. 531 * @enode: The encoded maple node. 532 * 533 * Return: The slot in the parent node where @enode resides. 534 */ 535 static __always_inline mte_parent_slot(const struct maple_enode * enode)536 unsigned int mte_parent_slot(const struct maple_enode *enode) 537 { 538 unsigned long val = (unsigned long)mte_to_node(enode)->parent; 539 540 if (unlikely(val & MA_ROOT_PARENT)) 541 return 0; 542 543 /* 544 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost 545 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT 546 */ 547 return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val); 548 } 549 550 /* 551 * mte_parent() - Get the parent of @node. 552 * @enode: The encoded maple node. 553 * 554 * Return: The parent maple node. 555 */ 556 static __always_inline mte_parent(const struct maple_enode * enode)557 struct maple_node *mte_parent(const struct maple_enode *enode) 558 { 559 return (void *)((unsigned long) 560 (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK); 561 } 562 563 /* 564 * ma_dead_node() - check if the @enode is dead. 565 * @enode: The encoded maple node 566 * 567 * Return: true if dead, false otherwise. 568 */ ma_dead_node(const struct maple_node * node)569 static __always_inline bool ma_dead_node(const struct maple_node *node) 570 { 571 struct maple_node *parent; 572 573 /* Do not reorder reads from the node prior to the parent check */ 574 smp_rmb(); 575 parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); 576 return (parent == node); 577 } 578 579 /* 580 * mte_dead_node() - check if the @enode is dead. 581 * @enode: The encoded maple node 582 * 583 * Return: true if dead, false otherwise. 584 */ mte_dead_node(const struct maple_enode * enode)585 static __always_inline bool mte_dead_node(const struct maple_enode *enode) 586 { 587 struct maple_node *parent, *node; 588 589 node = mte_to_node(enode); 590 /* Do not reorder reads from the node prior to the parent check */ 591 smp_rmb(); 592 parent = mte_parent(enode); 593 return (parent == node); 594 } 595 596 /* 597 * mas_allocated() - Get the number of nodes allocated in a maple state. 598 * @mas: The maple state 599 * 600 * The ma_state alloc member is overloaded to hold a pointer to the first 601 * allocated node or to the number of requested nodes to allocate. If bit 0 is 602 * set, then the alloc contains the number of requested nodes. If there is an 603 * allocated node, then the total allocated nodes is in that node. 604 * 605 * Return: The total number of nodes allocated 606 */ mas_allocated(const struct ma_state * mas)607 static inline unsigned long mas_allocated(const struct ma_state *mas) 608 { 609 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) 610 return 0; 611 612 return mas->alloc->total; 613 } 614 615 /* 616 * mas_set_alloc_req() - Set the requested number of allocations. 617 * @mas: the maple state 618 * @count: the number of allocations. 619 * 620 * The requested number of allocations is either in the first allocated node, 621 * located in @mas->alloc->request_count, or directly in @mas->alloc if there is 622 * no allocated node. Set the request either in the node or do the necessary 623 * encoding to store in @mas->alloc directly. 624 */ mas_set_alloc_req(struct ma_state * mas,unsigned long count)625 static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count) 626 { 627 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) { 628 if (!count) 629 mas->alloc = NULL; 630 else 631 mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U); 632 return; 633 } 634 635 mas->alloc->request_count = count; 636 } 637 638 /* 639 * mas_alloc_req() - get the requested number of allocations. 640 * @mas: The maple state 641 * 642 * The alloc count is either stored directly in @mas, or in 643 * @mas->alloc->request_count if there is at least one node allocated. Decode 644 * the request count if it's stored directly in @mas->alloc. 645 * 646 * Return: The allocation request count. 647 */ mas_alloc_req(const struct ma_state * mas)648 static inline unsigned int mas_alloc_req(const struct ma_state *mas) 649 { 650 if ((unsigned long)mas->alloc & 0x1) 651 return (unsigned long)(mas->alloc) >> 1; 652 else if (mas->alloc) 653 return mas->alloc->request_count; 654 return 0; 655 } 656 657 /* 658 * ma_pivots() - Get a pointer to the maple node pivots. 659 * @node: the maple node 660 * @type: the node type 661 * 662 * In the event of a dead node, this array may be %NULL 663 * 664 * Return: A pointer to the maple node pivots 665 */ ma_pivots(struct maple_node * node,enum maple_type type)666 static inline unsigned long *ma_pivots(struct maple_node *node, 667 enum maple_type type) 668 { 669 switch (type) { 670 case maple_arange_64: 671 return node->ma64.pivot; 672 case maple_range_64: 673 case maple_leaf_64: 674 return node->mr64.pivot; 675 case maple_dense: 676 return NULL; 677 } 678 return NULL; 679 } 680 681 /* 682 * ma_gaps() - Get a pointer to the maple node gaps. 683 * @node: the maple node 684 * @type: the node type 685 * 686 * Return: A pointer to the maple node gaps 687 */ ma_gaps(struct maple_node * node,enum maple_type type)688 static inline unsigned long *ma_gaps(struct maple_node *node, 689 enum maple_type type) 690 { 691 switch (type) { 692 case maple_arange_64: 693 return node->ma64.gap; 694 case maple_range_64: 695 case maple_leaf_64: 696 case maple_dense: 697 return NULL; 698 } 699 return NULL; 700 } 701 702 /* 703 * mas_safe_pivot() - get the pivot at @piv or mas->max. 704 * @mas: The maple state 705 * @pivots: The pointer to the maple node pivots 706 * @piv: The pivot to fetch 707 * @type: The maple node type 708 * 709 * Return: The pivot at @piv within the limit of the @pivots array, @mas->max 710 * otherwise. 711 */ 712 static __always_inline unsigned long mas_safe_pivot(const struct ma_state * mas,unsigned long * pivots,unsigned char piv,enum maple_type type)713 mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots, 714 unsigned char piv, enum maple_type type) 715 { 716 if (piv >= mt_pivots[type]) 717 return mas->max; 718 719 return pivots[piv]; 720 } 721 722 /* 723 * mas_safe_min() - Return the minimum for a given offset. 724 * @mas: The maple state 725 * @pivots: The pointer to the maple node pivots 726 * @offset: The offset into the pivot array 727 * 728 * Return: The minimum range value that is contained in @offset. 729 */ 730 static inline unsigned long mas_safe_min(struct ma_state * mas,unsigned long * pivots,unsigned char offset)731 mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset) 732 { 733 if (likely(offset)) 734 return pivots[offset - 1] + 1; 735 736 return mas->min; 737 } 738 739 /* 740 * mte_set_pivot() - Set a pivot to a value in an encoded maple node. 741 * @mn: The encoded maple node 742 * @piv: The pivot offset 743 * @val: The value of the pivot 744 */ mte_set_pivot(struct maple_enode * mn,unsigned char piv,unsigned long val)745 static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv, 746 unsigned long val) 747 { 748 struct maple_node *node = mte_to_node(mn); 749 enum maple_type type = mte_node_type(mn); 750 751 BUG_ON(piv >= mt_pivots[type]); 752 switch (type) { 753 case maple_range_64: 754 case maple_leaf_64: 755 node->mr64.pivot[piv] = val; 756 break; 757 case maple_arange_64: 758 node->ma64.pivot[piv] = val; 759 break; 760 case maple_dense: 761 break; 762 } 763 764 } 765 766 /* 767 * ma_slots() - Get a pointer to the maple node slots. 768 * @mn: The maple node 769 * @mt: The maple node type 770 * 771 * Return: A pointer to the maple node slots 772 */ ma_slots(struct maple_node * mn,enum maple_type mt)773 static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt) 774 { 775 switch (mt) { 776 case maple_arange_64: 777 return mn->ma64.slot; 778 case maple_range_64: 779 case maple_leaf_64: 780 return mn->mr64.slot; 781 case maple_dense: 782 return mn->slot; 783 } 784 785 return NULL; 786 } 787 mt_write_locked(const struct maple_tree * mt)788 static inline bool mt_write_locked(const struct maple_tree *mt) 789 { 790 return mt_external_lock(mt) ? mt_write_lock_is_held(mt) : 791 lockdep_is_held(&mt->ma_lock); 792 } 793 mt_locked(const struct maple_tree * mt)794 static __always_inline bool mt_locked(const struct maple_tree *mt) 795 { 796 return mt_external_lock(mt) ? mt_lock_is_held(mt) : 797 lockdep_is_held(&mt->ma_lock); 798 } 799 mt_slot(const struct maple_tree * mt,void __rcu ** slots,unsigned char offset)800 static __always_inline void *mt_slot(const struct maple_tree *mt, 801 void __rcu **slots, unsigned char offset) 802 { 803 return rcu_dereference_check(slots[offset], mt_locked(mt)); 804 } 805 mt_slot_locked(struct maple_tree * mt,void __rcu ** slots,unsigned char offset)806 static __always_inline void *mt_slot_locked(struct maple_tree *mt, 807 void __rcu **slots, unsigned char offset) 808 { 809 return rcu_dereference_protected(slots[offset], mt_write_locked(mt)); 810 } 811 /* 812 * mas_slot_locked() - Get the slot value when holding the maple tree lock. 813 * @mas: The maple state 814 * @slots: The pointer to the slots 815 * @offset: The offset into the slots array to fetch 816 * 817 * Return: The entry stored in @slots at the @offset. 818 */ mas_slot_locked(struct ma_state * mas,void __rcu ** slots,unsigned char offset)819 static __always_inline void *mas_slot_locked(struct ma_state *mas, 820 void __rcu **slots, unsigned char offset) 821 { 822 return mt_slot_locked(mas->tree, slots, offset); 823 } 824 825 /* 826 * mas_slot() - Get the slot value when not holding the maple tree lock. 827 * @mas: The maple state 828 * @slots: The pointer to the slots 829 * @offset: The offset into the slots array to fetch 830 * 831 * Return: The entry stored in @slots at the @offset 832 */ mas_slot(struct ma_state * mas,void __rcu ** slots,unsigned char offset)833 static __always_inline void *mas_slot(struct ma_state *mas, void __rcu **slots, 834 unsigned char offset) 835 { 836 return mt_slot(mas->tree, slots, offset); 837 } 838 839 /* 840 * mas_root() - Get the maple tree root. 841 * @mas: The maple state. 842 * 843 * Return: The pointer to the root of the tree 844 */ mas_root(struct ma_state * mas)845 static __always_inline void *mas_root(struct ma_state *mas) 846 { 847 return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree)); 848 } 849 mt_root_locked(struct maple_tree * mt)850 static inline void *mt_root_locked(struct maple_tree *mt) 851 { 852 return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt)); 853 } 854 855 /* 856 * mas_root_locked() - Get the maple tree root when holding the maple tree lock. 857 * @mas: The maple state. 858 * 859 * Return: The pointer to the root of the tree 860 */ mas_root_locked(struct ma_state * mas)861 static inline void *mas_root_locked(struct ma_state *mas) 862 { 863 return mt_root_locked(mas->tree); 864 } 865 ma_meta(struct maple_node * mn,enum maple_type mt)866 static inline struct maple_metadata *ma_meta(struct maple_node *mn, 867 enum maple_type mt) 868 { 869 switch (mt) { 870 case maple_arange_64: 871 return &mn->ma64.meta; 872 default: 873 return &mn->mr64.meta; 874 } 875 } 876 877 /* 878 * ma_set_meta() - Set the metadata information of a node. 879 * @mn: The maple node 880 * @mt: The maple node type 881 * @offset: The offset of the highest sub-gap in this node. 882 * @end: The end of the data in this node. 883 */ ma_set_meta(struct maple_node * mn,enum maple_type mt,unsigned char offset,unsigned char end)884 static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, 885 unsigned char offset, unsigned char end) 886 { 887 struct maple_metadata *meta = ma_meta(mn, mt); 888 889 meta->gap = offset; 890 meta->end = end; 891 } 892 893 /* 894 * mt_clear_meta() - clear the metadata information of a node, if it exists 895 * @mt: The maple tree 896 * @mn: The maple node 897 * @type: The maple node type 898 */ mt_clear_meta(struct maple_tree * mt,struct maple_node * mn,enum maple_type type)899 static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn, 900 enum maple_type type) 901 { 902 struct maple_metadata *meta; 903 unsigned long *pivots; 904 void __rcu **slots; 905 void *next; 906 907 switch (type) { 908 case maple_range_64: 909 pivots = mn->mr64.pivot; 910 if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { 911 slots = mn->mr64.slot; 912 next = mt_slot_locked(mt, slots, 913 MAPLE_RANGE64_SLOTS - 1); 914 if (unlikely((mte_to_node(next) && 915 mte_node_type(next)))) 916 return; /* no metadata, could be node */ 917 } 918 fallthrough; 919 case maple_arange_64: 920 meta = ma_meta(mn, type); 921 break; 922 default: 923 return; 924 } 925 926 meta->gap = 0; 927 meta->end = 0; 928 } 929 930 /* 931 * ma_meta_end() - Get the data end of a node from the metadata 932 * @mn: The maple node 933 * @mt: The maple node type 934 */ ma_meta_end(struct maple_node * mn,enum maple_type mt)935 static inline unsigned char ma_meta_end(struct maple_node *mn, 936 enum maple_type mt) 937 { 938 struct maple_metadata *meta = ma_meta(mn, mt); 939 940 return meta->end; 941 } 942 943 /* 944 * ma_meta_gap() - Get the largest gap location of a node from the metadata 945 * @mn: The maple node 946 */ ma_meta_gap(struct maple_node * mn)947 static inline unsigned char ma_meta_gap(struct maple_node *mn) 948 { 949 return mn->ma64.meta.gap; 950 } 951 952 /* 953 * ma_set_meta_gap() - Set the largest gap location in a nodes metadata 954 * @mn: The maple node 955 * @mt: The maple node type 956 * @offset: The location of the largest gap. 957 */ ma_set_meta_gap(struct maple_node * mn,enum maple_type mt,unsigned char offset)958 static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt, 959 unsigned char offset) 960 { 961 962 struct maple_metadata *meta = ma_meta(mn, mt); 963 964 meta->gap = offset; 965 } 966 967 /* 968 * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes. 969 * @mat: the ma_topiary, a linked list of dead nodes. 970 * @dead_enode: the node to be marked as dead and added to the tail of the list 971 * 972 * Add the @dead_enode to the linked list in @mat. 973 */ mat_add(struct ma_topiary * mat,struct maple_enode * dead_enode)974 static inline void mat_add(struct ma_topiary *mat, 975 struct maple_enode *dead_enode) 976 { 977 mte_set_node_dead(dead_enode); 978 mte_to_mat(dead_enode)->next = NULL; 979 if (!mat->tail) { 980 mat->tail = mat->head = dead_enode; 981 return; 982 } 983 984 mte_to_mat(mat->tail)->next = dead_enode; 985 mat->tail = dead_enode; 986 } 987 988 static void mt_free_walk(struct rcu_head *head); 989 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 990 bool free); 991 /* 992 * mas_mat_destroy() - Free all nodes and subtrees in a dead list. 993 * @mas: the maple state 994 * @mat: the ma_topiary linked list of dead nodes to free. 995 * 996 * Destroy walk a dead list. 997 */ mas_mat_destroy(struct ma_state * mas,struct ma_topiary * mat)998 static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat) 999 { 1000 struct maple_enode *next; 1001 struct maple_node *node; 1002 bool in_rcu = mt_in_rcu(mas->tree); 1003 1004 while (mat->head) { 1005 next = mte_to_mat(mat->head)->next; 1006 node = mte_to_node(mat->head); 1007 mt_destroy_walk(mat->head, mas->tree, !in_rcu); 1008 if (in_rcu) 1009 call_rcu(&node->rcu, mt_free_walk); 1010 mat->head = next; 1011 } 1012 } 1013 /* 1014 * mas_descend() - Descend into the slot stored in the ma_state. 1015 * @mas: the maple state. 1016 * 1017 * Note: Not RCU safe, only use in write side or debug code. 1018 */ mas_descend(struct ma_state * mas)1019 static inline void mas_descend(struct ma_state *mas) 1020 { 1021 enum maple_type type; 1022 unsigned long *pivots; 1023 struct maple_node *node; 1024 void __rcu **slots; 1025 1026 node = mas_mn(mas); 1027 type = mte_node_type(mas->node); 1028 pivots = ma_pivots(node, type); 1029 slots = ma_slots(node, type); 1030 1031 if (mas->offset) 1032 mas->min = pivots[mas->offset - 1] + 1; 1033 mas->max = mas_safe_pivot(mas, pivots, mas->offset, type); 1034 mas->node = mas_slot(mas, slots, mas->offset); 1035 } 1036 1037 /* 1038 * mte_set_gap() - Set a maple node gap. 1039 * @mn: The encoded maple node 1040 * @gap: The offset of the gap to set 1041 * @val: The gap value 1042 */ mte_set_gap(const struct maple_enode * mn,unsigned char gap,unsigned long val)1043 static inline void mte_set_gap(const struct maple_enode *mn, 1044 unsigned char gap, unsigned long val) 1045 { 1046 switch (mte_node_type(mn)) { 1047 default: 1048 break; 1049 case maple_arange_64: 1050 mte_to_node(mn)->ma64.gap[gap] = val; 1051 break; 1052 } 1053 } 1054 1055 /* 1056 * mas_ascend() - Walk up a level of the tree. 1057 * @mas: The maple state 1058 * 1059 * Sets the @mas->max and @mas->min to the correct values when walking up. This 1060 * may cause several levels of walking up to find the correct min and max. 1061 * May find a dead node which will cause a premature return. 1062 * Return: 1 on dead node, 0 otherwise 1063 */ mas_ascend(struct ma_state * mas)1064 static int mas_ascend(struct ma_state *mas) 1065 { 1066 struct maple_enode *p_enode; /* parent enode. */ 1067 struct maple_enode *a_enode; /* ancestor enode. */ 1068 struct maple_node *a_node; /* ancestor node. */ 1069 struct maple_node *p_node; /* parent node. */ 1070 unsigned char a_slot; 1071 enum maple_type a_type; 1072 unsigned long min, max; 1073 unsigned long *pivots; 1074 bool set_max = false, set_min = false; 1075 1076 a_node = mas_mn(mas); 1077 if (ma_is_root(a_node)) { 1078 mas->offset = 0; 1079 return 0; 1080 } 1081 1082 p_node = mte_parent(mas->node); 1083 if (unlikely(a_node == p_node)) 1084 return 1; 1085 1086 a_type = mas_parent_type(mas, mas->node); 1087 mas->offset = mte_parent_slot(mas->node); 1088 a_enode = mt_mk_node(p_node, a_type); 1089 1090 /* Check to make sure all parent information is still accurate */ 1091 if (p_node != mte_parent(mas->node)) 1092 return 1; 1093 1094 mas->node = a_enode; 1095 1096 if (mte_is_root(a_enode)) { 1097 mas->max = ULONG_MAX; 1098 mas->min = 0; 1099 return 0; 1100 } 1101 1102 min = 0; 1103 max = ULONG_MAX; 1104 if (!mas->offset) { 1105 min = mas->min; 1106 set_min = true; 1107 } 1108 1109 if (mas->max == ULONG_MAX) 1110 set_max = true; 1111 1112 do { 1113 p_enode = a_enode; 1114 a_type = mas_parent_type(mas, p_enode); 1115 a_node = mte_parent(p_enode); 1116 a_slot = mte_parent_slot(p_enode); 1117 a_enode = mt_mk_node(a_node, a_type); 1118 pivots = ma_pivots(a_node, a_type); 1119 1120 if (unlikely(ma_dead_node(a_node))) 1121 return 1; 1122 1123 if (!set_min && a_slot) { 1124 set_min = true; 1125 min = pivots[a_slot - 1] + 1; 1126 } 1127 1128 if (!set_max && a_slot < mt_pivots[a_type]) { 1129 set_max = true; 1130 max = pivots[a_slot]; 1131 } 1132 1133 if (unlikely(ma_dead_node(a_node))) 1134 return 1; 1135 1136 if (unlikely(ma_is_root(a_node))) 1137 break; 1138 1139 } while (!set_min || !set_max); 1140 1141 mas->max = max; 1142 mas->min = min; 1143 return 0; 1144 } 1145 1146 /* 1147 * mas_pop_node() - Get a previously allocated maple node from the maple state. 1148 * @mas: The maple state 1149 * 1150 * Return: A pointer to a maple node. 1151 */ mas_pop_node(struct ma_state * mas)1152 static inline struct maple_node *mas_pop_node(struct ma_state *mas) 1153 { 1154 struct maple_alloc *ret, *node = mas->alloc; 1155 unsigned long total = mas_allocated(mas); 1156 unsigned int req = mas_alloc_req(mas); 1157 1158 /* nothing or a request pending. */ 1159 if (WARN_ON(!total)) 1160 return NULL; 1161 1162 if (total == 1) { 1163 /* single allocation in this ma_state */ 1164 mas->alloc = NULL; 1165 ret = node; 1166 goto single_node; 1167 } 1168 1169 if (node->node_count == 1) { 1170 /* Single allocation in this node. */ 1171 mas->alloc = node->slot[0]; 1172 mas->alloc->total = node->total - 1; 1173 ret = node; 1174 goto new_head; 1175 } 1176 node->total--; 1177 ret = node->slot[--node->node_count]; 1178 node->slot[node->node_count] = NULL; 1179 1180 single_node: 1181 new_head: 1182 if (req) { 1183 req++; 1184 mas_set_alloc_req(mas, req); 1185 } 1186 1187 memset(ret, 0, sizeof(*ret)); 1188 return (struct maple_node *)ret; 1189 } 1190 1191 /* 1192 * mas_push_node() - Push a node back on the maple state allocation. 1193 * @mas: The maple state 1194 * @used: The used maple node 1195 * 1196 * Stores the maple node back into @mas->alloc for reuse. Updates allocated and 1197 * requested node count as necessary. 1198 */ mas_push_node(struct ma_state * mas,struct maple_node * used)1199 static inline void mas_push_node(struct ma_state *mas, struct maple_node *used) 1200 { 1201 struct maple_alloc *reuse = (struct maple_alloc *)used; 1202 struct maple_alloc *head = mas->alloc; 1203 unsigned long count; 1204 unsigned int requested = mas_alloc_req(mas); 1205 1206 count = mas_allocated(mas); 1207 1208 reuse->request_count = 0; 1209 reuse->node_count = 0; 1210 if (count) { 1211 if (head->node_count < MAPLE_ALLOC_SLOTS) { 1212 head->slot[head->node_count++] = reuse; 1213 head->total++; 1214 goto done; 1215 } 1216 reuse->slot[0] = head; 1217 reuse->node_count = 1; 1218 } 1219 1220 reuse->total = count + 1; 1221 mas->alloc = reuse; 1222 done: 1223 if (requested > 1) 1224 mas_set_alloc_req(mas, requested - 1); 1225 } 1226 1227 /* 1228 * mas_alloc_nodes() - Allocate nodes into a maple state 1229 * @mas: The maple state 1230 * @gfp: The GFP Flags 1231 */ mas_alloc_nodes(struct ma_state * mas,gfp_t gfp)1232 static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) 1233 { 1234 struct maple_alloc *node; 1235 unsigned long allocated = mas_allocated(mas); 1236 unsigned int requested = mas_alloc_req(mas); 1237 unsigned int count; 1238 void **slots = NULL; 1239 unsigned int max_req = 0; 1240 1241 if (!requested) 1242 return; 1243 1244 mas_set_alloc_req(mas, 0); 1245 if (mas->mas_flags & MA_STATE_PREALLOC) { 1246 if (allocated) 1247 return; 1248 BUG_ON(!allocated); 1249 WARN_ON(!allocated); 1250 } 1251 1252 if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) { 1253 node = (struct maple_alloc *)mt_alloc_one(gfp); 1254 if (!node) 1255 goto nomem_one; 1256 1257 if (allocated) { 1258 node->slot[0] = mas->alloc; 1259 node->node_count = 1; 1260 } else { 1261 node->node_count = 0; 1262 } 1263 1264 mas->alloc = node; 1265 node->total = ++allocated; 1266 node->request_count = 0; 1267 requested--; 1268 } 1269 1270 node = mas->alloc; 1271 while (requested) { 1272 max_req = MAPLE_ALLOC_SLOTS - node->node_count; 1273 slots = (void **)&node->slot[node->node_count]; 1274 max_req = min(requested, max_req); 1275 count = mt_alloc_bulk(gfp, max_req, slots); 1276 if (!count) 1277 goto nomem_bulk; 1278 1279 if (node->node_count == 0) { 1280 node->slot[0]->node_count = 0; 1281 node->slot[0]->request_count = 0; 1282 } 1283 1284 node->node_count += count; 1285 allocated += count; 1286 /* find a non-full node*/ 1287 do { 1288 node = node->slot[0]; 1289 } while (unlikely(node->node_count == MAPLE_ALLOC_SLOTS)); 1290 requested -= count; 1291 } 1292 mas->alloc->total = allocated; 1293 return; 1294 1295 nomem_bulk: 1296 /* Clean up potential freed allocations on bulk failure */ 1297 memset(slots, 0, max_req * sizeof(unsigned long)); 1298 mas->alloc->total = allocated; 1299 nomem_one: 1300 mas_set_alloc_req(mas, requested); 1301 mas_set_err(mas, -ENOMEM); 1302 } 1303 1304 /* 1305 * mas_free() - Free an encoded maple node 1306 * @mas: The maple state 1307 * @used: The encoded maple node to free. 1308 * 1309 * Uses rcu free if necessary, pushes @used back on the maple state allocations 1310 * otherwise. 1311 */ mas_free(struct ma_state * mas,struct maple_enode * used)1312 static inline void mas_free(struct ma_state *mas, struct maple_enode *used) 1313 { 1314 struct maple_node *tmp = mte_to_node(used); 1315 1316 if (mt_in_rcu(mas->tree)) 1317 ma_free_rcu(tmp); 1318 else 1319 mas_push_node(mas, tmp); 1320 } 1321 1322 /* 1323 * mas_node_count_gfp() - Check if enough nodes are allocated and request more 1324 * if there is not enough nodes. 1325 * @mas: The maple state 1326 * @count: The number of nodes needed 1327 * @gfp: the gfp flags 1328 */ mas_node_count_gfp(struct ma_state * mas,int count,gfp_t gfp)1329 static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp) 1330 { 1331 unsigned long allocated = mas_allocated(mas); 1332 1333 if (allocated < count) { 1334 mas_set_alloc_req(mas, count - allocated); 1335 mas_alloc_nodes(mas, gfp); 1336 } 1337 } 1338 1339 /* 1340 * mas_node_count() - Check if enough nodes are allocated and request more if 1341 * there is not enough nodes. 1342 * @mas: The maple state 1343 * @count: The number of nodes needed 1344 * 1345 * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags. 1346 */ mas_node_count(struct ma_state * mas,int count)1347 static void mas_node_count(struct ma_state *mas, int count) 1348 { 1349 return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN); 1350 } 1351 1352 /* 1353 * mas_start() - Sets up maple state for operations. 1354 * @mas: The maple state. 1355 * 1356 * If mas->status == mas_start, then set the min, max and depth to 1357 * defaults. 1358 * 1359 * Return: 1360 * - If mas->node is an error or not mas_start, return NULL. 1361 * - If it's an empty tree: NULL & mas->status == ma_none 1362 * - If it's a single entry: The entry & mas->status == ma_root 1363 * - If it's a tree: NULL & mas->status == ma_active 1364 */ mas_start(struct ma_state * mas)1365 static inline struct maple_enode *mas_start(struct ma_state *mas) 1366 { 1367 if (likely(mas_is_start(mas))) { 1368 struct maple_enode *root; 1369 1370 mas->min = 0; 1371 mas->max = ULONG_MAX; 1372 1373 retry: 1374 mas->depth = 0; 1375 root = mas_root(mas); 1376 /* Tree with nodes */ 1377 if (likely(xa_is_node(root))) { 1378 mas->depth = 1; 1379 mas->status = ma_active; 1380 mas->node = mte_safe_root(root); 1381 mas->offset = 0; 1382 if (mte_dead_node(mas->node)) 1383 goto retry; 1384 1385 return NULL; 1386 } 1387 1388 mas->node = NULL; 1389 /* empty tree */ 1390 if (unlikely(!root)) { 1391 mas->status = ma_none; 1392 mas->offset = MAPLE_NODE_SLOTS; 1393 return NULL; 1394 } 1395 1396 /* Single entry tree */ 1397 mas->status = ma_root; 1398 mas->offset = MAPLE_NODE_SLOTS; 1399 1400 /* Single entry tree. */ 1401 if (mas->index > 0) 1402 return NULL; 1403 1404 return root; 1405 } 1406 1407 return NULL; 1408 } 1409 1410 /* 1411 * ma_data_end() - Find the end of the data in a node. 1412 * @node: The maple node 1413 * @type: The maple node type 1414 * @pivots: The array of pivots in the node 1415 * @max: The maximum value in the node 1416 * 1417 * Uses metadata to find the end of the data when possible. 1418 * Return: The zero indexed last slot with data (may be null). 1419 */ ma_data_end(struct maple_node * node,enum maple_type type,unsigned long * pivots,unsigned long max)1420 static __always_inline unsigned char ma_data_end(struct maple_node *node, 1421 enum maple_type type, unsigned long *pivots, unsigned long max) 1422 { 1423 unsigned char offset; 1424 1425 if (!pivots) 1426 return 0; 1427 1428 if (type == maple_arange_64) 1429 return ma_meta_end(node, type); 1430 1431 offset = mt_pivots[type] - 1; 1432 if (likely(!pivots[offset])) 1433 return ma_meta_end(node, type); 1434 1435 if (likely(pivots[offset] == max)) 1436 return offset; 1437 1438 return mt_pivots[type]; 1439 } 1440 1441 /* 1442 * mas_data_end() - Find the end of the data (slot). 1443 * @mas: the maple state 1444 * 1445 * This method is optimized to check the metadata of a node if the node type 1446 * supports data end metadata. 1447 * 1448 * Return: The zero indexed last slot with data (may be null). 1449 */ mas_data_end(struct ma_state * mas)1450 static inline unsigned char mas_data_end(struct ma_state *mas) 1451 { 1452 enum maple_type type; 1453 struct maple_node *node; 1454 unsigned char offset; 1455 unsigned long *pivots; 1456 1457 type = mte_node_type(mas->node); 1458 node = mas_mn(mas); 1459 if (type == maple_arange_64) 1460 return ma_meta_end(node, type); 1461 1462 pivots = ma_pivots(node, type); 1463 if (unlikely(ma_dead_node(node))) 1464 return 0; 1465 1466 offset = mt_pivots[type] - 1; 1467 if (likely(!pivots[offset])) 1468 return ma_meta_end(node, type); 1469 1470 if (likely(pivots[offset] == mas->max)) 1471 return offset; 1472 1473 return mt_pivots[type]; 1474 } 1475 1476 /* 1477 * mas_leaf_max_gap() - Returns the largest gap in a leaf node 1478 * @mas: the maple state 1479 * 1480 * Return: The maximum gap in the leaf. 1481 */ mas_leaf_max_gap(struct ma_state * mas)1482 static unsigned long mas_leaf_max_gap(struct ma_state *mas) 1483 { 1484 enum maple_type mt; 1485 unsigned long pstart, gap, max_gap; 1486 struct maple_node *mn; 1487 unsigned long *pivots; 1488 void __rcu **slots; 1489 unsigned char i; 1490 unsigned char max_piv; 1491 1492 mt = mte_node_type(mas->node); 1493 mn = mas_mn(mas); 1494 slots = ma_slots(mn, mt); 1495 max_gap = 0; 1496 if (unlikely(ma_is_dense(mt))) { 1497 gap = 0; 1498 for (i = 0; i < mt_slots[mt]; i++) { 1499 if (slots[i]) { 1500 if (gap > max_gap) 1501 max_gap = gap; 1502 gap = 0; 1503 } else { 1504 gap++; 1505 } 1506 } 1507 if (gap > max_gap) 1508 max_gap = gap; 1509 return max_gap; 1510 } 1511 1512 /* 1513 * Check the first implied pivot optimizes the loop below and slot 1 may 1514 * be skipped if there is a gap in slot 0. 1515 */ 1516 pivots = ma_pivots(mn, mt); 1517 if (likely(!slots[0])) { 1518 max_gap = pivots[0] - mas->min + 1; 1519 i = 2; 1520 } else { 1521 i = 1; 1522 } 1523 1524 /* reduce max_piv as the special case is checked before the loop */ 1525 max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1; 1526 /* 1527 * Check end implied pivot which can only be a gap on the right most 1528 * node. 1529 */ 1530 if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) { 1531 gap = ULONG_MAX - pivots[max_piv]; 1532 if (gap > max_gap) 1533 max_gap = gap; 1534 1535 if (max_gap > pivots[max_piv] - mas->min) 1536 return max_gap; 1537 } 1538 1539 for (; i <= max_piv; i++) { 1540 /* data == no gap. */ 1541 if (likely(slots[i])) 1542 continue; 1543 1544 pstart = pivots[i - 1]; 1545 gap = pivots[i] - pstart; 1546 if (gap > max_gap) 1547 max_gap = gap; 1548 1549 /* There cannot be two gaps in a row. */ 1550 i++; 1551 } 1552 return max_gap; 1553 } 1554 1555 /* 1556 * ma_max_gap() - Get the maximum gap in a maple node (non-leaf) 1557 * @node: The maple node 1558 * @gaps: The pointer to the gaps 1559 * @mt: The maple node type 1560 * @off: Pointer to store the offset location of the gap. 1561 * 1562 * Uses the metadata data end to scan backwards across set gaps. 1563 * 1564 * Return: The maximum gap value 1565 */ 1566 static inline unsigned long ma_max_gap(struct maple_node * node,unsigned long * gaps,enum maple_type mt,unsigned char * off)1567 ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt, 1568 unsigned char *off) 1569 { 1570 unsigned char offset, i; 1571 unsigned long max_gap = 0; 1572 1573 i = offset = ma_meta_end(node, mt); 1574 do { 1575 if (gaps[i] > max_gap) { 1576 max_gap = gaps[i]; 1577 offset = i; 1578 } 1579 } while (i--); 1580 1581 *off = offset; 1582 return max_gap; 1583 } 1584 1585 /* 1586 * mas_max_gap() - find the largest gap in a non-leaf node and set the slot. 1587 * @mas: The maple state. 1588 * 1589 * Return: The gap value. 1590 */ mas_max_gap(struct ma_state * mas)1591 static inline unsigned long mas_max_gap(struct ma_state *mas) 1592 { 1593 unsigned long *gaps; 1594 unsigned char offset; 1595 enum maple_type mt; 1596 struct maple_node *node; 1597 1598 mt = mte_node_type(mas->node); 1599 if (ma_is_leaf(mt)) 1600 return mas_leaf_max_gap(mas); 1601 1602 node = mas_mn(mas); 1603 MAS_BUG_ON(mas, mt != maple_arange_64); 1604 offset = ma_meta_gap(node); 1605 gaps = ma_gaps(node, mt); 1606 return gaps[offset]; 1607 } 1608 1609 /* 1610 * mas_parent_gap() - Set the parent gap and any gaps above, as needed 1611 * @mas: The maple state 1612 * @offset: The gap offset in the parent to set 1613 * @new: The new gap value. 1614 * 1615 * Set the parent gap then continue to set the gap upwards, using the metadata 1616 * of the parent to see if it is necessary to check the node above. 1617 */ mas_parent_gap(struct ma_state * mas,unsigned char offset,unsigned long new)1618 static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset, 1619 unsigned long new) 1620 { 1621 unsigned long meta_gap = 0; 1622 struct maple_node *pnode; 1623 struct maple_enode *penode; 1624 unsigned long *pgaps; 1625 unsigned char meta_offset; 1626 enum maple_type pmt; 1627 1628 pnode = mte_parent(mas->node); 1629 pmt = mas_parent_type(mas, mas->node); 1630 penode = mt_mk_node(pnode, pmt); 1631 pgaps = ma_gaps(pnode, pmt); 1632 1633 ascend: 1634 MAS_BUG_ON(mas, pmt != maple_arange_64); 1635 meta_offset = ma_meta_gap(pnode); 1636 meta_gap = pgaps[meta_offset]; 1637 1638 pgaps[offset] = new; 1639 1640 if (meta_gap == new) 1641 return; 1642 1643 if (offset != meta_offset) { 1644 if (meta_gap > new) 1645 return; 1646 1647 ma_set_meta_gap(pnode, pmt, offset); 1648 } else if (new < meta_gap) { 1649 new = ma_max_gap(pnode, pgaps, pmt, &meta_offset); 1650 ma_set_meta_gap(pnode, pmt, meta_offset); 1651 } 1652 1653 if (ma_is_root(pnode)) 1654 return; 1655 1656 /* Go to the parent node. */ 1657 pnode = mte_parent(penode); 1658 pmt = mas_parent_type(mas, penode); 1659 pgaps = ma_gaps(pnode, pmt); 1660 offset = mte_parent_slot(penode); 1661 penode = mt_mk_node(pnode, pmt); 1662 goto ascend; 1663 } 1664 1665 /* 1666 * mas_update_gap() - Update a nodes gaps and propagate up if necessary. 1667 * @mas: the maple state. 1668 */ mas_update_gap(struct ma_state * mas)1669 static inline void mas_update_gap(struct ma_state *mas) 1670 { 1671 unsigned char pslot; 1672 unsigned long p_gap; 1673 unsigned long max_gap; 1674 1675 if (!mt_is_alloc(mas->tree)) 1676 return; 1677 1678 if (mte_is_root(mas->node)) 1679 return; 1680 1681 max_gap = mas_max_gap(mas); 1682 1683 pslot = mte_parent_slot(mas->node); 1684 p_gap = ma_gaps(mte_parent(mas->node), 1685 mas_parent_type(mas, mas->node))[pslot]; 1686 1687 if (p_gap != max_gap) 1688 mas_parent_gap(mas, pslot, max_gap); 1689 } 1690 1691 /* 1692 * mas_adopt_children() - Set the parent pointer of all nodes in @parent to 1693 * @parent with the slot encoded. 1694 * @mas: the maple state (for the tree) 1695 * @parent: the maple encoded node containing the children. 1696 */ mas_adopt_children(struct ma_state * mas,struct maple_enode * parent)1697 static inline void mas_adopt_children(struct ma_state *mas, 1698 struct maple_enode *parent) 1699 { 1700 enum maple_type type = mte_node_type(parent); 1701 struct maple_node *node = mte_to_node(parent); 1702 void __rcu **slots = ma_slots(node, type); 1703 unsigned long *pivots = ma_pivots(node, type); 1704 struct maple_enode *child; 1705 unsigned char offset; 1706 1707 offset = ma_data_end(node, type, pivots, mas->max); 1708 do { 1709 child = mas_slot_locked(mas, slots, offset); 1710 mas_set_parent(mas, child, parent, offset); 1711 } while (offset--); 1712 } 1713 1714 /* 1715 * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old 1716 * node as dead. 1717 * @mas: the maple state with the new node 1718 * @old_enode: The old maple encoded node to replace. 1719 */ mas_put_in_tree(struct ma_state * mas,struct maple_enode * old_enode)1720 static inline void mas_put_in_tree(struct ma_state *mas, 1721 struct maple_enode *old_enode) 1722 __must_hold(mas->tree->ma_lock) 1723 { 1724 unsigned char offset; 1725 void __rcu **slots; 1726 1727 if (mte_is_root(mas->node)) { 1728 mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas)); 1729 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 1730 mas_set_height(mas); 1731 } else { 1732 1733 offset = mte_parent_slot(mas->node); 1734 slots = ma_slots(mte_parent(mas->node), 1735 mas_parent_type(mas, mas->node)); 1736 rcu_assign_pointer(slots[offset], mas->node); 1737 } 1738 1739 mte_set_node_dead(old_enode); 1740 } 1741 1742 /* 1743 * mas_replace_node() - Replace a node by putting it in the tree, marking it 1744 * dead, and freeing it. 1745 * the parent encoding to locate the maple node in the tree. 1746 * @mas: the ma_state with @mas->node pointing to the new node. 1747 * @old_enode: The old maple encoded node. 1748 */ mas_replace_node(struct ma_state * mas,struct maple_enode * old_enode)1749 static inline void mas_replace_node(struct ma_state *mas, 1750 struct maple_enode *old_enode) 1751 __must_hold(mas->tree->ma_lock) 1752 { 1753 mas_put_in_tree(mas, old_enode); 1754 mas_free(mas, old_enode); 1755 } 1756 1757 /* 1758 * mas_find_child() - Find a child who has the parent @mas->node. 1759 * @mas: the maple state with the parent. 1760 * @child: the maple state to store the child. 1761 */ mas_find_child(struct ma_state * mas,struct ma_state * child)1762 static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child) 1763 __must_hold(mas->tree->ma_lock) 1764 { 1765 enum maple_type mt; 1766 unsigned char offset; 1767 unsigned char end; 1768 unsigned long *pivots; 1769 struct maple_enode *entry; 1770 struct maple_node *node; 1771 void __rcu **slots; 1772 1773 mt = mte_node_type(mas->node); 1774 node = mas_mn(mas); 1775 slots = ma_slots(node, mt); 1776 pivots = ma_pivots(node, mt); 1777 end = ma_data_end(node, mt, pivots, mas->max); 1778 for (offset = mas->offset; offset <= end; offset++) { 1779 entry = mas_slot_locked(mas, slots, offset); 1780 if (mte_parent(entry) == node) { 1781 *child = *mas; 1782 mas->offset = offset + 1; 1783 child->offset = offset; 1784 mas_descend(child); 1785 child->offset = 0; 1786 return true; 1787 } 1788 } 1789 return false; 1790 } 1791 1792 /* 1793 * mab_shift_right() - Shift the data in mab right. Note, does not clean out the 1794 * old data or set b_node->b_end. 1795 * @b_node: the maple_big_node 1796 * @shift: the shift count 1797 */ mab_shift_right(struct maple_big_node * b_node,unsigned char shift)1798 static inline void mab_shift_right(struct maple_big_node *b_node, 1799 unsigned char shift) 1800 { 1801 unsigned long size = b_node->b_end * sizeof(unsigned long); 1802 1803 memmove(b_node->pivot + shift, b_node->pivot, size); 1804 memmove(b_node->slot + shift, b_node->slot, size); 1805 if (b_node->type == maple_arange_64) 1806 memmove(b_node->gap + shift, b_node->gap, size); 1807 } 1808 1809 /* 1810 * mab_middle_node() - Check if a middle node is needed (unlikely) 1811 * @b_node: the maple_big_node that contains the data. 1812 * @split: the potential split location 1813 * @slot_count: the size that can be stored in a single node being considered. 1814 * 1815 * Return: true if a middle node is required. 1816 */ mab_middle_node(struct maple_big_node * b_node,int split,unsigned char slot_count)1817 static inline bool mab_middle_node(struct maple_big_node *b_node, int split, 1818 unsigned char slot_count) 1819 { 1820 unsigned char size = b_node->b_end; 1821 1822 if (size >= 2 * slot_count) 1823 return true; 1824 1825 if (!b_node->slot[split] && (size >= 2 * slot_count - 1)) 1826 return true; 1827 1828 return false; 1829 } 1830 1831 /* 1832 * mab_no_null_split() - ensure the split doesn't fall on a NULL 1833 * @b_node: the maple_big_node with the data 1834 * @split: the suggested split location 1835 * @slot_count: the number of slots in the node being considered. 1836 * 1837 * Return: the split location. 1838 */ mab_no_null_split(struct maple_big_node * b_node,unsigned char split,unsigned char slot_count)1839 static inline int mab_no_null_split(struct maple_big_node *b_node, 1840 unsigned char split, unsigned char slot_count) 1841 { 1842 if (!b_node->slot[split]) { 1843 /* 1844 * If the split is less than the max slot && the right side will 1845 * still be sufficient, then increment the split on NULL. 1846 */ 1847 if ((split < slot_count - 1) && 1848 (b_node->b_end - split) > (mt_min_slots[b_node->type])) 1849 split++; 1850 else 1851 split--; 1852 } 1853 return split; 1854 } 1855 1856 /* 1857 * mab_calc_split() - Calculate the split location and if there needs to be two 1858 * splits. 1859 * @mas: The maple state 1860 * @bn: The maple_big_node with the data 1861 * @mid_split: The second split, if required. 0 otherwise. 1862 * 1863 * Return: The first split location. The middle split is set in @mid_split. 1864 */ mab_calc_split(struct ma_state * mas,struct maple_big_node * bn,unsigned char * mid_split)1865 static inline int mab_calc_split(struct ma_state *mas, 1866 struct maple_big_node *bn, unsigned char *mid_split) 1867 { 1868 unsigned char b_end = bn->b_end; 1869 int split = b_end / 2; /* Assume equal split. */ 1870 unsigned char slot_count = mt_slots[bn->type]; 1871 1872 /* 1873 * To support gap tracking, all NULL entries are kept together and a node cannot 1874 * end on a NULL entry, with the exception of the left-most leaf. The 1875 * limitation means that the split of a node must be checked for this condition 1876 * and be able to put more data in one direction or the other. 1877 */ 1878 if (unlikely((mas->mas_flags & MA_STATE_BULK))) { 1879 *mid_split = 0; 1880 split = b_end - mt_min_slots[bn->type]; 1881 1882 if (!ma_is_leaf(bn->type)) 1883 return split; 1884 1885 mas->mas_flags |= MA_STATE_REBALANCE; 1886 if (!bn->slot[split]) 1887 split--; 1888 return split; 1889 } 1890 1891 /* 1892 * Although extremely rare, it is possible to enter what is known as the 3-way 1893 * split scenario. The 3-way split comes about by means of a store of a range 1894 * that overwrites the end and beginning of two full nodes. The result is a set 1895 * of entries that cannot be stored in 2 nodes. Sometimes, these two nodes can 1896 * also be located in different parent nodes which are also full. This can 1897 * carry upwards all the way to the root in the worst case. 1898 */ 1899 if (unlikely(mab_middle_node(bn, split, slot_count))) { 1900 split = b_end / 3; 1901 *mid_split = split * 2; 1902 } else { 1903 *mid_split = 0; 1904 } 1905 1906 /* Avoid ending a node on a NULL entry */ 1907 split = mab_no_null_split(bn, split, slot_count); 1908 1909 if (unlikely(*mid_split)) 1910 *mid_split = mab_no_null_split(bn, *mid_split, slot_count); 1911 1912 return split; 1913 } 1914 1915 /* 1916 * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node 1917 * and set @b_node->b_end to the next free slot. 1918 * @mas: The maple state 1919 * @mas_start: The starting slot to copy 1920 * @mas_end: The end slot to copy (inclusively) 1921 * @b_node: The maple_big_node to place the data 1922 * @mab_start: The starting location in maple_big_node to store the data. 1923 */ mas_mab_cp(struct ma_state * mas,unsigned char mas_start,unsigned char mas_end,struct maple_big_node * b_node,unsigned char mab_start)1924 static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start, 1925 unsigned char mas_end, struct maple_big_node *b_node, 1926 unsigned char mab_start) 1927 { 1928 enum maple_type mt; 1929 struct maple_node *node; 1930 void __rcu **slots; 1931 unsigned long *pivots, *gaps; 1932 int i = mas_start, j = mab_start; 1933 unsigned char piv_end; 1934 1935 node = mas_mn(mas); 1936 mt = mte_node_type(mas->node); 1937 pivots = ma_pivots(node, mt); 1938 if (!i) { 1939 b_node->pivot[j] = pivots[i++]; 1940 if (unlikely(i > mas_end)) 1941 goto complete; 1942 j++; 1943 } 1944 1945 piv_end = min(mas_end, mt_pivots[mt]); 1946 for (; i < piv_end; i++, j++) { 1947 b_node->pivot[j] = pivots[i]; 1948 if (unlikely(!b_node->pivot[j])) 1949 goto complete; 1950 1951 if (unlikely(mas->max == b_node->pivot[j])) 1952 goto complete; 1953 } 1954 1955 b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt); 1956 1957 complete: 1958 b_node->b_end = ++j; 1959 j -= mab_start; 1960 slots = ma_slots(node, mt); 1961 memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j); 1962 if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) { 1963 gaps = ma_gaps(node, mt); 1964 memcpy(b_node->gap + mab_start, gaps + mas_start, 1965 sizeof(unsigned long) * j); 1966 } 1967 } 1968 1969 /* 1970 * mas_leaf_set_meta() - Set the metadata of a leaf if possible. 1971 * @node: The maple node 1972 * @mt: The maple type 1973 * @end: The node end 1974 */ mas_leaf_set_meta(struct maple_node * node,enum maple_type mt,unsigned char end)1975 static inline void mas_leaf_set_meta(struct maple_node *node, 1976 enum maple_type mt, unsigned char end) 1977 { 1978 if (end < mt_slots[mt] - 1) 1979 ma_set_meta(node, mt, 0, end); 1980 } 1981 1982 /* 1983 * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node. 1984 * @b_node: the maple_big_node that has the data 1985 * @mab_start: the start location in @b_node. 1986 * @mab_end: The end location in @b_node (inclusively) 1987 * @mas: The maple state with the maple encoded node. 1988 */ mab_mas_cp(struct maple_big_node * b_node,unsigned char mab_start,unsigned char mab_end,struct ma_state * mas,bool new_max)1989 static inline void mab_mas_cp(struct maple_big_node *b_node, 1990 unsigned char mab_start, unsigned char mab_end, 1991 struct ma_state *mas, bool new_max) 1992 { 1993 int i, j = 0; 1994 enum maple_type mt = mte_node_type(mas->node); 1995 struct maple_node *node = mte_to_node(mas->node); 1996 void __rcu **slots = ma_slots(node, mt); 1997 unsigned long *pivots = ma_pivots(node, mt); 1998 unsigned long *gaps = NULL; 1999 unsigned char end; 2000 2001 if (mab_end - mab_start > mt_pivots[mt]) 2002 mab_end--; 2003 2004 if (!pivots[mt_pivots[mt] - 1]) 2005 slots[mt_pivots[mt]] = NULL; 2006 2007 i = mab_start; 2008 do { 2009 pivots[j++] = b_node->pivot[i++]; 2010 } while (i <= mab_end && likely(b_node->pivot[i])); 2011 2012 memcpy(slots, b_node->slot + mab_start, 2013 sizeof(void *) * (i - mab_start)); 2014 2015 if (new_max) 2016 mas->max = b_node->pivot[i - 1]; 2017 2018 end = j - 1; 2019 if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) { 2020 unsigned long max_gap = 0; 2021 unsigned char offset = 0; 2022 2023 gaps = ma_gaps(node, mt); 2024 do { 2025 gaps[--j] = b_node->gap[--i]; 2026 if (gaps[j] > max_gap) { 2027 offset = j; 2028 max_gap = gaps[j]; 2029 } 2030 } while (j); 2031 2032 ma_set_meta(node, mt, offset, end); 2033 } else { 2034 mas_leaf_set_meta(node, mt, end); 2035 } 2036 } 2037 2038 /* 2039 * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert. 2040 * @mas: The maple state 2041 * @end: The maple node end 2042 * @mt: The maple node type 2043 */ mas_bulk_rebalance(struct ma_state * mas,unsigned char end,enum maple_type mt)2044 static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end, 2045 enum maple_type mt) 2046 { 2047 if (!(mas->mas_flags & MA_STATE_BULK)) 2048 return; 2049 2050 if (mte_is_root(mas->node)) 2051 return; 2052 2053 if (end > mt_min_slots[mt]) { 2054 mas->mas_flags &= ~MA_STATE_REBALANCE; 2055 return; 2056 } 2057 } 2058 2059 /* 2060 * mas_store_b_node() - Store an @entry into the b_node while also copying the 2061 * data from a maple encoded node. 2062 * @wr_mas: the maple write state 2063 * @b_node: the maple_big_node to fill with data 2064 * @offset_end: the offset to end copying 2065 * 2066 * Return: The actual end of the data stored in @b_node 2067 */ mas_store_b_node(struct ma_wr_state * wr_mas,struct maple_big_node * b_node,unsigned char offset_end)2068 static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas, 2069 struct maple_big_node *b_node, unsigned char offset_end) 2070 { 2071 unsigned char slot; 2072 unsigned char b_end; 2073 /* Possible underflow of piv will wrap back to 0 before use. */ 2074 unsigned long piv; 2075 struct ma_state *mas = wr_mas->mas; 2076 2077 b_node->type = wr_mas->type; 2078 b_end = 0; 2079 slot = mas->offset; 2080 if (slot) { 2081 /* Copy start data up to insert. */ 2082 mas_mab_cp(mas, 0, slot - 1, b_node, 0); 2083 b_end = b_node->b_end; 2084 piv = b_node->pivot[b_end - 1]; 2085 } else 2086 piv = mas->min - 1; 2087 2088 if (piv + 1 < mas->index) { 2089 /* Handle range starting after old range */ 2090 b_node->slot[b_end] = wr_mas->content; 2091 if (!wr_mas->content) 2092 b_node->gap[b_end] = mas->index - 1 - piv; 2093 b_node->pivot[b_end++] = mas->index - 1; 2094 } 2095 2096 /* Store the new entry. */ 2097 mas->offset = b_end; 2098 b_node->slot[b_end] = wr_mas->entry; 2099 b_node->pivot[b_end] = mas->last; 2100 2101 /* Appended. */ 2102 if (mas->last >= mas->max) 2103 goto b_end; 2104 2105 /* Handle new range ending before old range ends */ 2106 piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type); 2107 if (piv > mas->last) { 2108 if (piv == ULONG_MAX) 2109 mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type); 2110 2111 if (offset_end != slot) 2112 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 2113 offset_end); 2114 2115 b_node->slot[++b_end] = wr_mas->content; 2116 if (!wr_mas->content) 2117 b_node->gap[b_end] = piv - mas->last + 1; 2118 b_node->pivot[b_end] = piv; 2119 } 2120 2121 slot = offset_end + 1; 2122 if (slot > mas->end) 2123 goto b_end; 2124 2125 /* Copy end data to the end of the node. */ 2126 mas_mab_cp(mas, slot, mas->end + 1, b_node, ++b_end); 2127 b_node->b_end--; 2128 return; 2129 2130 b_end: 2131 b_node->b_end = b_end; 2132 } 2133 2134 /* 2135 * mas_prev_sibling() - Find the previous node with the same parent. 2136 * @mas: the maple state 2137 * 2138 * Return: True if there is a previous sibling, false otherwise. 2139 */ mas_prev_sibling(struct ma_state * mas)2140 static inline bool mas_prev_sibling(struct ma_state *mas) 2141 { 2142 unsigned int p_slot = mte_parent_slot(mas->node); 2143 2144 /* For root node, p_slot is set to 0 by mte_parent_slot(). */ 2145 if (!p_slot) 2146 return false; 2147 2148 mas_ascend(mas); 2149 mas->offset = p_slot - 1; 2150 mas_descend(mas); 2151 return true; 2152 } 2153 2154 /* 2155 * mas_next_sibling() - Find the next node with the same parent. 2156 * @mas: the maple state 2157 * 2158 * Return: true if there is a next sibling, false otherwise. 2159 */ mas_next_sibling(struct ma_state * mas)2160 static inline bool mas_next_sibling(struct ma_state *mas) 2161 { 2162 MA_STATE(parent, mas->tree, mas->index, mas->last); 2163 2164 if (mte_is_root(mas->node)) 2165 return false; 2166 2167 parent = *mas; 2168 mas_ascend(&parent); 2169 parent.offset = mte_parent_slot(mas->node) + 1; 2170 if (parent.offset > mas_data_end(&parent)) 2171 return false; 2172 2173 *mas = parent; 2174 mas_descend(mas); 2175 return true; 2176 } 2177 2178 /* 2179 * mas_node_or_none() - Set the enode and state. 2180 * @mas: the maple state 2181 * @enode: The encoded maple node. 2182 * 2183 * Set the node to the enode and the status. 2184 */ mas_node_or_none(struct ma_state * mas,struct maple_enode * enode)2185 static inline void mas_node_or_none(struct ma_state *mas, 2186 struct maple_enode *enode) 2187 { 2188 if (enode) { 2189 mas->node = enode; 2190 mas->status = ma_active; 2191 } else { 2192 mas->node = NULL; 2193 mas->status = ma_none; 2194 } 2195 } 2196 2197 /* 2198 * mas_wr_node_walk() - Find the correct offset for the index in the @mas. 2199 * If @mas->index cannot be found within the containing 2200 * node, we traverse to the last entry in the node. 2201 * @wr_mas: The maple write state 2202 * 2203 * Uses mas_slot_locked() and does not need to worry about dead nodes. 2204 */ mas_wr_node_walk(struct ma_wr_state * wr_mas)2205 static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) 2206 { 2207 struct ma_state *mas = wr_mas->mas; 2208 unsigned char count, offset; 2209 2210 if (unlikely(ma_is_dense(wr_mas->type))) { 2211 wr_mas->r_max = wr_mas->r_min = mas->index; 2212 mas->offset = mas->index = mas->min; 2213 return; 2214 } 2215 2216 wr_mas->node = mas_mn(wr_mas->mas); 2217 wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type); 2218 count = mas->end = ma_data_end(wr_mas->node, wr_mas->type, 2219 wr_mas->pivots, mas->max); 2220 offset = mas->offset; 2221 2222 while (offset < count && mas->index > wr_mas->pivots[offset]) 2223 offset++; 2224 2225 wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max; 2226 wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset); 2227 wr_mas->offset_end = mas->offset = offset; 2228 } 2229 2230 /* 2231 * mast_rebalance_next() - Rebalance against the next node 2232 * @mast: The maple subtree state 2233 */ mast_rebalance_next(struct maple_subtree_state * mast)2234 static inline void mast_rebalance_next(struct maple_subtree_state *mast) 2235 { 2236 unsigned char b_end = mast->bn->b_end; 2237 2238 mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node), 2239 mast->bn, b_end); 2240 mast->orig_r->last = mast->orig_r->max; 2241 } 2242 2243 /* 2244 * mast_rebalance_prev() - Rebalance against the previous node 2245 * @mast: The maple subtree state 2246 */ mast_rebalance_prev(struct maple_subtree_state * mast)2247 static inline void mast_rebalance_prev(struct maple_subtree_state *mast) 2248 { 2249 unsigned char end = mas_data_end(mast->orig_l) + 1; 2250 unsigned char b_end = mast->bn->b_end; 2251 2252 mab_shift_right(mast->bn, end); 2253 mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0); 2254 mast->l->min = mast->orig_l->min; 2255 mast->orig_l->index = mast->orig_l->min; 2256 mast->bn->b_end = end + b_end; 2257 mast->l->offset += end; 2258 } 2259 2260 /* 2261 * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring 2262 * the node to the right. Checking the nodes to the right then the left at each 2263 * level upwards until root is reached. 2264 * Data is copied into the @mast->bn. 2265 * @mast: The maple_subtree_state. 2266 */ 2267 static inline mast_spanning_rebalance(struct maple_subtree_state * mast)2268 bool mast_spanning_rebalance(struct maple_subtree_state *mast) 2269 { 2270 struct ma_state r_tmp = *mast->orig_r; 2271 struct ma_state l_tmp = *mast->orig_l; 2272 unsigned char depth = 0; 2273 2274 do { 2275 mas_ascend(mast->orig_r); 2276 mas_ascend(mast->orig_l); 2277 depth++; 2278 if (mast->orig_r->offset < mas_data_end(mast->orig_r)) { 2279 mast->orig_r->offset++; 2280 do { 2281 mas_descend(mast->orig_r); 2282 mast->orig_r->offset = 0; 2283 } while (--depth); 2284 2285 mast_rebalance_next(mast); 2286 *mast->orig_l = l_tmp; 2287 return true; 2288 } else if (mast->orig_l->offset != 0) { 2289 mast->orig_l->offset--; 2290 do { 2291 mas_descend(mast->orig_l); 2292 mast->orig_l->offset = 2293 mas_data_end(mast->orig_l); 2294 } while (--depth); 2295 2296 mast_rebalance_prev(mast); 2297 *mast->orig_r = r_tmp; 2298 return true; 2299 } 2300 } while (!mte_is_root(mast->orig_r->node)); 2301 2302 *mast->orig_r = r_tmp; 2303 *mast->orig_l = l_tmp; 2304 return false; 2305 } 2306 2307 /* 2308 * mast_ascend() - Ascend the original left and right maple states. 2309 * @mast: the maple subtree state. 2310 * 2311 * Ascend the original left and right sides. Set the offsets to point to the 2312 * data already in the new tree (@mast->l and @mast->r). 2313 */ mast_ascend(struct maple_subtree_state * mast)2314 static inline void mast_ascend(struct maple_subtree_state *mast) 2315 { 2316 MA_WR_STATE(wr_mas, mast->orig_r, NULL); 2317 mas_ascend(mast->orig_l); 2318 mas_ascend(mast->orig_r); 2319 2320 mast->orig_r->offset = 0; 2321 mast->orig_r->index = mast->r->max; 2322 /* last should be larger than or equal to index */ 2323 if (mast->orig_r->last < mast->orig_r->index) 2324 mast->orig_r->last = mast->orig_r->index; 2325 2326 wr_mas.type = mte_node_type(mast->orig_r->node); 2327 mas_wr_node_walk(&wr_mas); 2328 /* Set up the left side of things */ 2329 mast->orig_l->offset = 0; 2330 mast->orig_l->index = mast->l->min; 2331 wr_mas.mas = mast->orig_l; 2332 wr_mas.type = mte_node_type(mast->orig_l->node); 2333 mas_wr_node_walk(&wr_mas); 2334 2335 mast->bn->type = wr_mas.type; 2336 } 2337 2338 /* 2339 * mas_new_ma_node() - Create and return a new maple node. Helper function. 2340 * @mas: the maple state with the allocations. 2341 * @b_node: the maple_big_node with the type encoding. 2342 * 2343 * Use the node type from the maple_big_node to allocate a new node from the 2344 * ma_state. This function exists mainly for code readability. 2345 * 2346 * Return: A new maple encoded node 2347 */ 2348 static inline struct maple_enode mas_new_ma_node(struct ma_state * mas,struct maple_big_node * b_node)2349 *mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node) 2350 { 2351 return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type); 2352 } 2353 2354 /* 2355 * mas_mab_to_node() - Set up right and middle nodes 2356 * 2357 * @mas: the maple state that contains the allocations. 2358 * @b_node: the node which contains the data. 2359 * @left: The pointer which will have the left node 2360 * @right: The pointer which may have the right node 2361 * @middle: the pointer which may have the middle node (rare) 2362 * @mid_split: the split location for the middle node 2363 * 2364 * Return: the split of left. 2365 */ mas_mab_to_node(struct ma_state * mas,struct maple_big_node * b_node,struct maple_enode ** left,struct maple_enode ** right,struct maple_enode ** middle,unsigned char * mid_split)2366 static inline unsigned char mas_mab_to_node(struct ma_state *mas, 2367 struct maple_big_node *b_node, struct maple_enode **left, 2368 struct maple_enode **right, struct maple_enode **middle, 2369 unsigned char *mid_split) 2370 { 2371 unsigned char split = 0; 2372 unsigned char slot_count = mt_slots[b_node->type]; 2373 2374 *left = mas_new_ma_node(mas, b_node); 2375 *right = NULL; 2376 *middle = NULL; 2377 *mid_split = 0; 2378 2379 if (b_node->b_end < slot_count) { 2380 split = b_node->b_end; 2381 } else { 2382 split = mab_calc_split(mas, b_node, mid_split); 2383 *right = mas_new_ma_node(mas, b_node); 2384 } 2385 2386 if (*mid_split) 2387 *middle = mas_new_ma_node(mas, b_node); 2388 2389 return split; 2390 2391 } 2392 2393 /* 2394 * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end 2395 * pointer. 2396 * @b_node: the big node to add the entry 2397 * @mas: the maple state to get the pivot (mas->max) 2398 * @entry: the entry to add, if NULL nothing happens. 2399 */ mab_set_b_end(struct maple_big_node * b_node,struct ma_state * mas,void * entry)2400 static inline void mab_set_b_end(struct maple_big_node *b_node, 2401 struct ma_state *mas, 2402 void *entry) 2403 { 2404 if (!entry) 2405 return; 2406 2407 b_node->slot[b_node->b_end] = entry; 2408 if (mt_is_alloc(mas->tree)) 2409 b_node->gap[b_node->b_end] = mas_max_gap(mas); 2410 b_node->pivot[b_node->b_end++] = mas->max; 2411 } 2412 2413 /* 2414 * mas_set_split_parent() - combine_then_separate helper function. Sets the parent 2415 * of @mas->node to either @left or @right, depending on @slot and @split 2416 * 2417 * @mas: the maple state with the node that needs a parent 2418 * @left: possible parent 1 2419 * @right: possible parent 2 2420 * @slot: the slot the mas->node was placed 2421 * @split: the split location between @left and @right 2422 */ mas_set_split_parent(struct ma_state * mas,struct maple_enode * left,struct maple_enode * right,unsigned char * slot,unsigned char split)2423 static inline void mas_set_split_parent(struct ma_state *mas, 2424 struct maple_enode *left, 2425 struct maple_enode *right, 2426 unsigned char *slot, unsigned char split) 2427 { 2428 if (mas_is_none(mas)) 2429 return; 2430 2431 if ((*slot) <= split) 2432 mas_set_parent(mas, mas->node, left, *slot); 2433 else if (right) 2434 mas_set_parent(mas, mas->node, right, (*slot) - split - 1); 2435 2436 (*slot)++; 2437 } 2438 2439 /* 2440 * mte_mid_split_check() - Check if the next node passes the mid-split 2441 * @l: Pointer to left encoded maple node. 2442 * @m: Pointer to middle encoded maple node. 2443 * @r: Pointer to right encoded maple node. 2444 * @slot: The offset 2445 * @split: The split location. 2446 * @mid_split: The middle split. 2447 */ mte_mid_split_check(struct maple_enode ** l,struct maple_enode ** r,struct maple_enode * right,unsigned char slot,unsigned char * split,unsigned char mid_split)2448 static inline void mte_mid_split_check(struct maple_enode **l, 2449 struct maple_enode **r, 2450 struct maple_enode *right, 2451 unsigned char slot, 2452 unsigned char *split, 2453 unsigned char mid_split) 2454 { 2455 if (*r == right) 2456 return; 2457 2458 if (slot < mid_split) 2459 return; 2460 2461 *l = *r; 2462 *r = right; 2463 *split = mid_split; 2464 } 2465 2466 /* 2467 * mast_set_split_parents() - Helper function to set three nodes parents. Slot 2468 * is taken from @mast->l. 2469 * @mast: the maple subtree state 2470 * @left: the left node 2471 * @right: the right node 2472 * @split: the split location. 2473 */ mast_set_split_parents(struct maple_subtree_state * mast,struct maple_enode * left,struct maple_enode * middle,struct maple_enode * right,unsigned char split,unsigned char mid_split)2474 static inline void mast_set_split_parents(struct maple_subtree_state *mast, 2475 struct maple_enode *left, 2476 struct maple_enode *middle, 2477 struct maple_enode *right, 2478 unsigned char split, 2479 unsigned char mid_split) 2480 { 2481 unsigned char slot; 2482 struct maple_enode *l = left; 2483 struct maple_enode *r = right; 2484 2485 if (mas_is_none(mast->l)) 2486 return; 2487 2488 if (middle) 2489 r = middle; 2490 2491 slot = mast->l->offset; 2492 2493 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2494 mas_set_split_parent(mast->l, l, r, &slot, split); 2495 2496 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2497 mas_set_split_parent(mast->m, l, r, &slot, split); 2498 2499 mte_mid_split_check(&l, &r, right, slot, &split, mid_split); 2500 mas_set_split_parent(mast->r, l, r, &slot, split); 2501 } 2502 2503 /* 2504 * mas_topiary_node() - Dispose of a single node 2505 * @mas: The maple state for pushing nodes 2506 * @in_rcu: If the tree is in rcu mode 2507 * 2508 * The node will either be RCU freed or pushed back on the maple state. 2509 */ mas_topiary_node(struct ma_state * mas,struct ma_state * tmp_mas,bool in_rcu)2510 static inline void mas_topiary_node(struct ma_state *mas, 2511 struct ma_state *tmp_mas, bool in_rcu) 2512 { 2513 struct maple_node *tmp; 2514 struct maple_enode *enode; 2515 2516 if (mas_is_none(tmp_mas)) 2517 return; 2518 2519 enode = tmp_mas->node; 2520 tmp = mte_to_node(enode); 2521 mte_set_node_dead(enode); 2522 if (in_rcu) 2523 ma_free_rcu(tmp); 2524 else 2525 mas_push_node(mas, tmp); 2526 } 2527 2528 /* 2529 * mas_topiary_replace() - Replace the data with new data, then repair the 2530 * parent links within the new tree. Iterate over the dead sub-tree and collect 2531 * the dead subtrees and topiary the nodes that are no longer of use. 2532 * 2533 * The new tree will have up to three children with the correct parent. Keep 2534 * track of the new entries as they need to be followed to find the next level 2535 * of new entries. 2536 * 2537 * The old tree will have up to three children with the old parent. Keep track 2538 * of the old entries as they may have more nodes below replaced. Nodes within 2539 * [index, last] are dead subtrees, others need to be freed and followed. 2540 * 2541 * @mas: The maple state pointing at the new data 2542 * @old_enode: The maple encoded node being replaced 2543 * 2544 */ mas_topiary_replace(struct ma_state * mas,struct maple_enode * old_enode)2545 static inline void mas_topiary_replace(struct ma_state *mas, 2546 struct maple_enode *old_enode) 2547 { 2548 struct ma_state tmp[3], tmp_next[3]; 2549 MA_TOPIARY(subtrees, mas->tree); 2550 bool in_rcu; 2551 int i, n; 2552 2553 /* Place data in tree & then mark node as old */ 2554 mas_put_in_tree(mas, old_enode); 2555 2556 /* Update the parent pointers in the tree */ 2557 tmp[0] = *mas; 2558 tmp[0].offset = 0; 2559 tmp[1].status = ma_none; 2560 tmp[2].status = ma_none; 2561 while (!mte_is_leaf(tmp[0].node)) { 2562 n = 0; 2563 for (i = 0; i < 3; i++) { 2564 if (mas_is_none(&tmp[i])) 2565 continue; 2566 2567 while (n < 3) { 2568 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2569 break; 2570 n++; 2571 } 2572 2573 mas_adopt_children(&tmp[i], tmp[i].node); 2574 } 2575 2576 if (MAS_WARN_ON(mas, n == 0)) 2577 break; 2578 2579 while (n < 3) 2580 tmp_next[n++].status = ma_none; 2581 2582 for (i = 0; i < 3; i++) 2583 tmp[i] = tmp_next[i]; 2584 } 2585 2586 /* Collect the old nodes that need to be discarded */ 2587 if (mte_is_leaf(old_enode)) 2588 return mas_free(mas, old_enode); 2589 2590 tmp[0] = *mas; 2591 tmp[0].offset = 0; 2592 tmp[0].node = old_enode; 2593 tmp[1].status = ma_none; 2594 tmp[2].status = ma_none; 2595 in_rcu = mt_in_rcu(mas->tree); 2596 do { 2597 n = 0; 2598 for (i = 0; i < 3; i++) { 2599 if (mas_is_none(&tmp[i])) 2600 continue; 2601 2602 while (n < 3) { 2603 if (!mas_find_child(&tmp[i], &tmp_next[n])) 2604 break; 2605 2606 if ((tmp_next[n].min >= tmp_next->index) && 2607 (tmp_next[n].max <= tmp_next->last)) { 2608 mat_add(&subtrees, tmp_next[n].node); 2609 tmp_next[n].status = ma_none; 2610 } else { 2611 n++; 2612 } 2613 } 2614 } 2615 2616 if (MAS_WARN_ON(mas, n == 0)) 2617 break; 2618 2619 while (n < 3) 2620 tmp_next[n++].status = ma_none; 2621 2622 for (i = 0; i < 3; i++) { 2623 mas_topiary_node(mas, &tmp[i], in_rcu); 2624 tmp[i] = tmp_next[i]; 2625 } 2626 } while (!mte_is_leaf(tmp[0].node)); 2627 2628 for (i = 0; i < 3; i++) 2629 mas_topiary_node(mas, &tmp[i], in_rcu); 2630 2631 mas_mat_destroy(mas, &subtrees); 2632 } 2633 2634 /* 2635 * mas_wmb_replace() - Write memory barrier and replace 2636 * @mas: The maple state 2637 * @old_enode: The old maple encoded node that is being replaced. 2638 * 2639 * Updates gap as necessary. 2640 */ mas_wmb_replace(struct ma_state * mas,struct maple_enode * old_enode)2641 static inline void mas_wmb_replace(struct ma_state *mas, 2642 struct maple_enode *old_enode) 2643 { 2644 /* Insert the new data in the tree */ 2645 mas_topiary_replace(mas, old_enode); 2646 2647 if (mte_is_leaf(mas->node)) 2648 return; 2649 2650 mas_update_gap(mas); 2651 } 2652 2653 /* 2654 * mast_cp_to_nodes() - Copy data out to nodes. 2655 * @mast: The maple subtree state 2656 * @left: The left encoded maple node 2657 * @middle: The middle encoded maple node 2658 * @right: The right encoded maple node 2659 * @split: The location to split between left and (middle ? middle : right) 2660 * @mid_split: The location to split between middle and right. 2661 */ mast_cp_to_nodes(struct maple_subtree_state * mast,struct maple_enode * left,struct maple_enode * middle,struct maple_enode * right,unsigned char split,unsigned char mid_split)2662 static inline void mast_cp_to_nodes(struct maple_subtree_state *mast, 2663 struct maple_enode *left, struct maple_enode *middle, 2664 struct maple_enode *right, unsigned char split, unsigned char mid_split) 2665 { 2666 bool new_lmax = true; 2667 2668 mas_node_or_none(mast->l, left); 2669 mas_node_or_none(mast->m, middle); 2670 mas_node_or_none(mast->r, right); 2671 2672 mast->l->min = mast->orig_l->min; 2673 if (split == mast->bn->b_end) { 2674 mast->l->max = mast->orig_r->max; 2675 new_lmax = false; 2676 } 2677 2678 mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax); 2679 2680 if (middle) { 2681 mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true); 2682 mast->m->min = mast->bn->pivot[split] + 1; 2683 split = mid_split; 2684 } 2685 2686 mast->r->max = mast->orig_r->max; 2687 if (right) { 2688 mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false); 2689 mast->r->min = mast->bn->pivot[split] + 1; 2690 } 2691 } 2692 2693 /* 2694 * mast_combine_cp_left - Copy in the original left side of the tree into the 2695 * combined data set in the maple subtree state big node. 2696 * @mast: The maple subtree state 2697 */ mast_combine_cp_left(struct maple_subtree_state * mast)2698 static inline void mast_combine_cp_left(struct maple_subtree_state *mast) 2699 { 2700 unsigned char l_slot = mast->orig_l->offset; 2701 2702 if (!l_slot) 2703 return; 2704 2705 mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0); 2706 } 2707 2708 /* 2709 * mast_combine_cp_right: Copy in the original right side of the tree into the 2710 * combined data set in the maple subtree state big node. 2711 * @mast: The maple subtree state 2712 */ mast_combine_cp_right(struct maple_subtree_state * mast)2713 static inline void mast_combine_cp_right(struct maple_subtree_state *mast) 2714 { 2715 if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max) 2716 return; 2717 2718 mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1, 2719 mt_slot_count(mast->orig_r->node), mast->bn, 2720 mast->bn->b_end); 2721 mast->orig_r->last = mast->orig_r->max; 2722 } 2723 2724 /* 2725 * mast_sufficient: Check if the maple subtree state has enough data in the big 2726 * node to create at least one sufficient node 2727 * @mast: the maple subtree state 2728 */ mast_sufficient(struct maple_subtree_state * mast)2729 static inline bool mast_sufficient(struct maple_subtree_state *mast) 2730 { 2731 if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node)) 2732 return true; 2733 2734 return false; 2735 } 2736 2737 /* 2738 * mast_overflow: Check if there is too much data in the subtree state for a 2739 * single node. 2740 * @mast: The maple subtree state 2741 */ mast_overflow(struct maple_subtree_state * mast)2742 static inline bool mast_overflow(struct maple_subtree_state *mast) 2743 { 2744 if (mast->bn->b_end >= mt_slot_count(mast->orig_l->node)) 2745 return true; 2746 2747 return false; 2748 } 2749 mtree_range_walk(struct ma_state * mas)2750 static inline void *mtree_range_walk(struct ma_state *mas) 2751 { 2752 unsigned long *pivots; 2753 unsigned char offset; 2754 struct maple_node *node; 2755 struct maple_enode *next, *last; 2756 enum maple_type type; 2757 void __rcu **slots; 2758 unsigned char end; 2759 unsigned long max, min; 2760 unsigned long prev_max, prev_min; 2761 2762 next = mas->node; 2763 min = mas->min; 2764 max = mas->max; 2765 do { 2766 last = next; 2767 node = mte_to_node(next); 2768 type = mte_node_type(next); 2769 pivots = ma_pivots(node, type); 2770 end = ma_data_end(node, type, pivots, max); 2771 prev_min = min; 2772 prev_max = max; 2773 if (pivots[0] >= mas->index) { 2774 offset = 0; 2775 max = pivots[0]; 2776 goto next; 2777 } 2778 2779 offset = 1; 2780 while (offset < end) { 2781 if (pivots[offset] >= mas->index) { 2782 max = pivots[offset]; 2783 break; 2784 } 2785 offset++; 2786 } 2787 2788 min = pivots[offset - 1] + 1; 2789 next: 2790 slots = ma_slots(node, type); 2791 next = mt_slot(mas->tree, slots, offset); 2792 if (unlikely(ma_dead_node(node))) 2793 goto dead_node; 2794 } while (!ma_is_leaf(type)); 2795 2796 mas->end = end; 2797 mas->offset = offset; 2798 mas->index = min; 2799 mas->last = max; 2800 mas->min = prev_min; 2801 mas->max = prev_max; 2802 mas->node = last; 2803 return (void *)next; 2804 2805 dead_node: 2806 mas_reset(mas); 2807 return NULL; 2808 } 2809 2810 /* 2811 * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers. 2812 * @mas: The starting maple state 2813 * @mast: The maple_subtree_state, keeps track of 4 maple states. 2814 * @count: The estimated count of iterations needed. 2815 * 2816 * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root 2817 * is hit. First @b_node is split into two entries which are inserted into the 2818 * next iteration of the loop. @b_node is returned populated with the final 2819 * iteration. @mas is used to obtain allocations. orig_l_mas keeps track of the 2820 * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last 2821 * to account of what has been copied into the new sub-tree. The update of 2822 * orig_l_mas->last is used in mas_consume to find the slots that will need to 2823 * be either freed or destroyed. orig_l_mas->depth keeps track of the height of 2824 * the new sub-tree in case the sub-tree becomes the full tree. 2825 */ mas_spanning_rebalance(struct ma_state * mas,struct maple_subtree_state * mast,unsigned char count)2826 static void mas_spanning_rebalance(struct ma_state *mas, 2827 struct maple_subtree_state *mast, unsigned char count) 2828 { 2829 unsigned char split, mid_split; 2830 unsigned char slot = 0; 2831 struct maple_enode *left = NULL, *middle = NULL, *right = NULL; 2832 struct maple_enode *old_enode; 2833 2834 MA_STATE(l_mas, mas->tree, mas->index, mas->index); 2835 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2836 MA_STATE(m_mas, mas->tree, mas->index, mas->index); 2837 2838 /* 2839 * The tree needs to be rebalanced and leaves need to be kept at the same level. 2840 * Rebalancing is done by use of the ``struct maple_topiary``. 2841 */ 2842 mast->l = &l_mas; 2843 mast->m = &m_mas; 2844 mast->r = &r_mas; 2845 l_mas.status = r_mas.status = m_mas.status = ma_none; 2846 2847 /* Check if this is not root and has sufficient data. */ 2848 if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) && 2849 unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type])) 2850 mast_spanning_rebalance(mast); 2851 2852 l_mas.depth = 0; 2853 2854 /* 2855 * Each level of the tree is examined and balanced, pushing data to the left or 2856 * right, or rebalancing against left or right nodes is employed to avoid 2857 * rippling up the tree to limit the amount of churn. Once a new sub-section of 2858 * the tree is created, there may be a mix of new and old nodes. The old nodes 2859 * will have the incorrect parent pointers and currently be in two trees: the 2860 * original tree and the partially new tree. To remedy the parent pointers in 2861 * the old tree, the new data is swapped into the active tree and a walk down 2862 * the tree is performed and the parent pointers are updated. 2863 * See mas_topiary_replace() for more information. 2864 */ 2865 while (count--) { 2866 mast->bn->b_end--; 2867 mast->bn->type = mte_node_type(mast->orig_l->node); 2868 split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle, 2869 &mid_split); 2870 mast_set_split_parents(mast, left, middle, right, split, 2871 mid_split); 2872 mast_cp_to_nodes(mast, left, middle, right, split, mid_split); 2873 2874 /* 2875 * Copy data from next level in the tree to mast->bn from next 2876 * iteration 2877 */ 2878 memset(mast->bn, 0, sizeof(struct maple_big_node)); 2879 mast->bn->type = mte_node_type(left); 2880 l_mas.depth++; 2881 2882 /* Root already stored in l->node. */ 2883 if (mas_is_root_limits(mast->l)) 2884 goto new_root; 2885 2886 mast_ascend(mast); 2887 mast_combine_cp_left(mast); 2888 l_mas.offset = mast->bn->b_end; 2889 mab_set_b_end(mast->bn, &l_mas, left); 2890 mab_set_b_end(mast->bn, &m_mas, middle); 2891 mab_set_b_end(mast->bn, &r_mas, right); 2892 2893 /* Copy anything necessary out of the right node. */ 2894 mast_combine_cp_right(mast); 2895 mast->orig_l->last = mast->orig_l->max; 2896 2897 if (mast_sufficient(mast)) 2898 continue; 2899 2900 if (mast_overflow(mast)) 2901 continue; 2902 2903 /* May be a new root stored in mast->bn */ 2904 if (mas_is_root_limits(mast->orig_l)) 2905 break; 2906 2907 mast_spanning_rebalance(mast); 2908 2909 /* rebalancing from other nodes may require another loop. */ 2910 if (!count) 2911 count++; 2912 } 2913 2914 l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), 2915 mte_node_type(mast->orig_l->node)); 2916 l_mas.depth++; 2917 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true); 2918 mas_set_parent(mas, left, l_mas.node, slot); 2919 if (middle) 2920 mas_set_parent(mas, middle, l_mas.node, ++slot); 2921 2922 if (right) 2923 mas_set_parent(mas, right, l_mas.node, ++slot); 2924 2925 if (mas_is_root_limits(mast->l)) { 2926 new_root: 2927 mas_mn(mast->l)->parent = ma_parent_ptr(mas_tree_parent(mas)); 2928 while (!mte_is_root(mast->orig_l->node)) 2929 mast_ascend(mast); 2930 } else { 2931 mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent; 2932 } 2933 2934 old_enode = mast->orig_l->node; 2935 mas->depth = l_mas.depth; 2936 mas->node = l_mas.node; 2937 mas->min = l_mas.min; 2938 mas->max = l_mas.max; 2939 mas->offset = l_mas.offset; 2940 mas_wmb_replace(mas, old_enode); 2941 mtree_range_walk(mas); 2942 return; 2943 } 2944 2945 /* 2946 * mas_rebalance() - Rebalance a given node. 2947 * @mas: The maple state 2948 * @b_node: The big maple node. 2949 * 2950 * Rebalance two nodes into a single node or two new nodes that are sufficient. 2951 * Continue upwards until tree is sufficient. 2952 */ mas_rebalance(struct ma_state * mas,struct maple_big_node * b_node)2953 static inline void mas_rebalance(struct ma_state *mas, 2954 struct maple_big_node *b_node) 2955 { 2956 char empty_count = mas_mt_height(mas); 2957 struct maple_subtree_state mast; 2958 unsigned char shift, b_end = ++b_node->b_end; 2959 2960 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 2961 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2962 2963 trace_ma_op(__func__, mas); 2964 2965 /* 2966 * Rebalancing occurs if a node is insufficient. Data is rebalanced 2967 * against the node to the right if it exists, otherwise the node to the 2968 * left of this node is rebalanced against this node. If rebalancing 2969 * causes just one node to be produced instead of two, then the parent 2970 * is also examined and rebalanced if it is insufficient. Every level 2971 * tries to combine the data in the same way. If one node contains the 2972 * entire range of the tree, then that node is used as a new root node. 2973 */ 2974 2975 mast.orig_l = &l_mas; 2976 mast.orig_r = &r_mas; 2977 mast.bn = b_node; 2978 mast.bn->type = mte_node_type(mas->node); 2979 2980 l_mas = r_mas = *mas; 2981 2982 if (mas_next_sibling(&r_mas)) { 2983 mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end); 2984 r_mas.last = r_mas.index = r_mas.max; 2985 } else { 2986 mas_prev_sibling(&l_mas); 2987 shift = mas_data_end(&l_mas) + 1; 2988 mab_shift_right(b_node, shift); 2989 mas->offset += shift; 2990 mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0); 2991 b_node->b_end = shift + b_end; 2992 l_mas.index = l_mas.last = l_mas.min; 2993 } 2994 2995 return mas_spanning_rebalance(mas, &mast, empty_count); 2996 } 2997 2998 /* 2999 * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple 3000 * state. 3001 * @mas: The maple state 3002 * @end: The end of the left-most node. 3003 * 3004 * During a mass-insert event (such as forking), it may be necessary to 3005 * rebalance the left-most node when it is not sufficient. 3006 */ mas_destroy_rebalance(struct ma_state * mas,unsigned char end)3007 static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end) 3008 { 3009 enum maple_type mt = mte_node_type(mas->node); 3010 struct maple_node reuse, *newnode, *parent, *new_left, *left, *node; 3011 struct maple_enode *eparent, *old_eparent; 3012 unsigned char offset, tmp, split = mt_slots[mt] / 2; 3013 void __rcu **l_slots, **slots; 3014 unsigned long *l_pivs, *pivs, gap; 3015 bool in_rcu = mt_in_rcu(mas->tree); 3016 3017 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 3018 3019 l_mas = *mas; 3020 mas_prev_sibling(&l_mas); 3021 3022 /* set up node. */ 3023 if (in_rcu) { 3024 newnode = mas_pop_node(mas); 3025 } else { 3026 newnode = &reuse; 3027 } 3028 3029 node = mas_mn(mas); 3030 newnode->parent = node->parent; 3031 slots = ma_slots(newnode, mt); 3032 pivs = ma_pivots(newnode, mt); 3033 left = mas_mn(&l_mas); 3034 l_slots = ma_slots(left, mt); 3035 l_pivs = ma_pivots(left, mt); 3036 if (!l_slots[split]) 3037 split++; 3038 tmp = mas_data_end(&l_mas) - split; 3039 3040 memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp); 3041 memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp); 3042 pivs[tmp] = l_mas.max; 3043 memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end); 3044 memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end); 3045 3046 l_mas.max = l_pivs[split]; 3047 mas->min = l_mas.max + 1; 3048 old_eparent = mt_mk_node(mte_parent(l_mas.node), 3049 mas_parent_type(&l_mas, l_mas.node)); 3050 tmp += end; 3051 if (!in_rcu) { 3052 unsigned char max_p = mt_pivots[mt]; 3053 unsigned char max_s = mt_slots[mt]; 3054 3055 if (tmp < max_p) 3056 memset(pivs + tmp, 0, 3057 sizeof(unsigned long) * (max_p - tmp)); 3058 3059 if (tmp < mt_slots[mt]) 3060 memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp)); 3061 3062 memcpy(node, newnode, sizeof(struct maple_node)); 3063 ma_set_meta(node, mt, 0, tmp - 1); 3064 mte_set_pivot(old_eparent, mte_parent_slot(l_mas.node), 3065 l_pivs[split]); 3066 3067 /* Remove data from l_pivs. */ 3068 tmp = split + 1; 3069 memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp)); 3070 memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp)); 3071 ma_set_meta(left, mt, 0, split); 3072 eparent = old_eparent; 3073 3074 goto done; 3075 } 3076 3077 /* RCU requires replacing both l_mas, mas, and parent. */ 3078 mas->node = mt_mk_node(newnode, mt); 3079 ma_set_meta(newnode, mt, 0, tmp); 3080 3081 new_left = mas_pop_node(mas); 3082 new_left->parent = left->parent; 3083 mt = mte_node_type(l_mas.node); 3084 slots = ma_slots(new_left, mt); 3085 pivs = ma_pivots(new_left, mt); 3086 memcpy(slots, l_slots, sizeof(void *) * split); 3087 memcpy(pivs, l_pivs, sizeof(unsigned long) * split); 3088 ma_set_meta(new_left, mt, 0, split); 3089 l_mas.node = mt_mk_node(new_left, mt); 3090 3091 /* replace parent. */ 3092 offset = mte_parent_slot(mas->node); 3093 mt = mas_parent_type(&l_mas, l_mas.node); 3094 parent = mas_pop_node(mas); 3095 slots = ma_slots(parent, mt); 3096 pivs = ma_pivots(parent, mt); 3097 memcpy(parent, mte_to_node(old_eparent), sizeof(struct maple_node)); 3098 rcu_assign_pointer(slots[offset], mas->node); 3099 rcu_assign_pointer(slots[offset - 1], l_mas.node); 3100 pivs[offset - 1] = l_mas.max; 3101 eparent = mt_mk_node(parent, mt); 3102 done: 3103 gap = mas_leaf_max_gap(mas); 3104 mte_set_gap(eparent, mte_parent_slot(mas->node), gap); 3105 gap = mas_leaf_max_gap(&l_mas); 3106 mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap); 3107 mas_ascend(mas); 3108 3109 if (in_rcu) { 3110 mas_replace_node(mas, old_eparent); 3111 mas_adopt_children(mas, mas->node); 3112 } 3113 3114 mas_update_gap(mas); 3115 } 3116 3117 /* 3118 * mas_split_final_node() - Split the final node in a subtree operation. 3119 * @mast: the maple subtree state 3120 * @mas: The maple state 3121 * @height: The height of the tree in case it's a new root. 3122 */ mas_split_final_node(struct maple_subtree_state * mast,struct ma_state * mas,int height)3123 static inline void mas_split_final_node(struct maple_subtree_state *mast, 3124 struct ma_state *mas, int height) 3125 { 3126 struct maple_enode *ancestor; 3127 3128 if (mte_is_root(mas->node)) { 3129 if (mt_is_alloc(mas->tree)) 3130 mast->bn->type = maple_arange_64; 3131 else 3132 mast->bn->type = maple_range_64; 3133 mas->depth = height; 3134 } 3135 /* 3136 * Only a single node is used here, could be root. 3137 * The Big_node data should just fit in a single node. 3138 */ 3139 ancestor = mas_new_ma_node(mas, mast->bn); 3140 mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset); 3141 mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset); 3142 mte_to_node(ancestor)->parent = mas_mn(mas)->parent; 3143 3144 mast->l->node = ancestor; 3145 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true); 3146 mas->offset = mast->bn->b_end - 1; 3147 } 3148 3149 /* 3150 * mast_fill_bnode() - Copy data into the big node in the subtree state 3151 * @mast: The maple subtree state 3152 * @mas: the maple state 3153 * @skip: The number of entries to skip for new nodes insertion. 3154 */ mast_fill_bnode(struct maple_subtree_state * mast,struct ma_state * mas,unsigned char skip)3155 static inline void mast_fill_bnode(struct maple_subtree_state *mast, 3156 struct ma_state *mas, 3157 unsigned char skip) 3158 { 3159 bool cp = true; 3160 unsigned char split; 3161 3162 memset(mast->bn, 0, sizeof(struct maple_big_node)); 3163 3164 if (mte_is_root(mas->node)) { 3165 cp = false; 3166 } else { 3167 mas_ascend(mas); 3168 mas->offset = mte_parent_slot(mas->node); 3169 } 3170 3171 if (cp && mast->l->offset) 3172 mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0); 3173 3174 split = mast->bn->b_end; 3175 mab_set_b_end(mast->bn, mast->l, mast->l->node); 3176 mast->r->offset = mast->bn->b_end; 3177 mab_set_b_end(mast->bn, mast->r, mast->r->node); 3178 if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max) 3179 cp = false; 3180 3181 if (cp) 3182 mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1, 3183 mast->bn, mast->bn->b_end); 3184 3185 mast->bn->b_end--; 3186 mast->bn->type = mte_node_type(mas->node); 3187 } 3188 3189 /* 3190 * mast_split_data() - Split the data in the subtree state big node into regular 3191 * nodes. 3192 * @mast: The maple subtree state 3193 * @mas: The maple state 3194 * @split: The location to split the big node 3195 */ mast_split_data(struct maple_subtree_state * mast,struct ma_state * mas,unsigned char split)3196 static inline void mast_split_data(struct maple_subtree_state *mast, 3197 struct ma_state *mas, unsigned char split) 3198 { 3199 unsigned char p_slot; 3200 3201 mab_mas_cp(mast->bn, 0, split, mast->l, true); 3202 mte_set_pivot(mast->r->node, 0, mast->r->max); 3203 mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false); 3204 mast->l->offset = mte_parent_slot(mas->node); 3205 mast->l->max = mast->bn->pivot[split]; 3206 mast->r->min = mast->l->max + 1; 3207 if (mte_is_leaf(mas->node)) 3208 return; 3209 3210 p_slot = mast->orig_l->offset; 3211 mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node, 3212 &p_slot, split); 3213 mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node, 3214 &p_slot, split); 3215 } 3216 3217 /* 3218 * mas_push_data() - Instead of splitting a node, it is beneficial to push the 3219 * data to the right or left node if there is room. 3220 * @mas: The maple state 3221 * @height: The current height of the maple state 3222 * @mast: The maple subtree state 3223 * @left: Push left or not. 3224 * 3225 * Keeping the height of the tree low means faster lookups. 3226 * 3227 * Return: True if pushed, false otherwise. 3228 */ mas_push_data(struct ma_state * mas,int height,struct maple_subtree_state * mast,bool left)3229 static inline bool mas_push_data(struct ma_state *mas, int height, 3230 struct maple_subtree_state *mast, bool left) 3231 { 3232 unsigned char slot_total = mast->bn->b_end; 3233 unsigned char end, space, split; 3234 3235 MA_STATE(tmp_mas, mas->tree, mas->index, mas->last); 3236 tmp_mas = *mas; 3237 tmp_mas.depth = mast->l->depth; 3238 3239 if (left && !mas_prev_sibling(&tmp_mas)) 3240 return false; 3241 else if (!left && !mas_next_sibling(&tmp_mas)) 3242 return false; 3243 3244 end = mas_data_end(&tmp_mas); 3245 slot_total += end; 3246 space = 2 * mt_slot_count(mas->node) - 2; 3247 /* -2 instead of -1 to ensure there isn't a triple split */ 3248 if (ma_is_leaf(mast->bn->type)) 3249 space--; 3250 3251 if (mas->max == ULONG_MAX) 3252 space--; 3253 3254 if (slot_total >= space) 3255 return false; 3256 3257 /* Get the data; Fill mast->bn */ 3258 mast->bn->b_end++; 3259 if (left) { 3260 mab_shift_right(mast->bn, end + 1); 3261 mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0); 3262 mast->bn->b_end = slot_total + 1; 3263 } else { 3264 mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end); 3265 } 3266 3267 /* Configure mast for splitting of mast->bn */ 3268 split = mt_slots[mast->bn->type] - 2; 3269 if (left) { 3270 /* Switch mas to prev node */ 3271 *mas = tmp_mas; 3272 /* Start using mast->l for the left side. */ 3273 tmp_mas.node = mast->l->node; 3274 *mast->l = tmp_mas; 3275 } else { 3276 tmp_mas.node = mast->r->node; 3277 *mast->r = tmp_mas; 3278 split = slot_total - split; 3279 } 3280 split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]); 3281 /* Update parent slot for split calculation. */ 3282 if (left) 3283 mast->orig_l->offset += end + 1; 3284 3285 mast_split_data(mast, mas, split); 3286 mast_fill_bnode(mast, mas, 2); 3287 mas_split_final_node(mast, mas, height + 1); 3288 return true; 3289 } 3290 3291 /* 3292 * mas_split() - Split data that is too big for one node into two. 3293 * @mas: The maple state 3294 * @b_node: The maple big node 3295 */ mas_split(struct ma_state * mas,struct maple_big_node * b_node)3296 static void mas_split(struct ma_state *mas, struct maple_big_node *b_node) 3297 { 3298 struct maple_subtree_state mast; 3299 int height = 0; 3300 unsigned char mid_split, split = 0; 3301 struct maple_enode *old; 3302 3303 /* 3304 * Splitting is handled differently from any other B-tree; the Maple 3305 * Tree splits upwards. Splitting up means that the split operation 3306 * occurs when the walk of the tree hits the leaves and not on the way 3307 * down. The reason for splitting up is that it is impossible to know 3308 * how much space will be needed until the leaf is (or leaves are) 3309 * reached. Since overwriting data is allowed and a range could 3310 * overwrite more than one range or result in changing one entry into 3 3311 * entries, it is impossible to know if a split is required until the 3312 * data is examined. 3313 * 3314 * Splitting is a balancing act between keeping allocations to a minimum 3315 * and avoiding a 'jitter' event where a tree is expanded to make room 3316 * for an entry followed by a contraction when the entry is removed. To 3317 * accomplish the balance, there are empty slots remaining in both left 3318 * and right nodes after a split. 3319 */ 3320 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 3321 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 3322 MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last); 3323 MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last); 3324 3325 trace_ma_op(__func__, mas); 3326 mas->depth = mas_mt_height(mas); 3327 3328 mast.l = &l_mas; 3329 mast.r = &r_mas; 3330 mast.orig_l = &prev_l_mas; 3331 mast.orig_r = &prev_r_mas; 3332 mast.bn = b_node; 3333 3334 while (height++ <= mas->depth) { 3335 if (mt_slots[b_node->type] > b_node->b_end) { 3336 mas_split_final_node(&mast, mas, height); 3337 break; 3338 } 3339 3340 l_mas = r_mas = *mas; 3341 l_mas.node = mas_new_ma_node(mas, b_node); 3342 r_mas.node = mas_new_ma_node(mas, b_node); 3343 /* 3344 * Another way that 'jitter' is avoided is to terminate a split up early if the 3345 * left or right node has space to spare. This is referred to as "pushing left" 3346 * or "pushing right" and is similar to the B* tree, except the nodes left or 3347 * right can rarely be reused due to RCU, but the ripple upwards is halted which 3348 * is a significant savings. 3349 */ 3350 /* Try to push left. */ 3351 if (mas_push_data(mas, height, &mast, true)) 3352 break; 3353 /* Try to push right. */ 3354 if (mas_push_data(mas, height, &mast, false)) 3355 break; 3356 3357 split = mab_calc_split(mas, b_node, &mid_split); 3358 mast_split_data(&mast, mas, split); 3359 /* 3360 * Usually correct, mab_mas_cp in the above call overwrites 3361 * r->max. 3362 */ 3363 mast.r->max = mas->max; 3364 mast_fill_bnode(&mast, mas, 1); 3365 prev_l_mas = *mast.l; 3366 prev_r_mas = *mast.r; 3367 } 3368 3369 /* Set the original node as dead */ 3370 old = mas->node; 3371 mas->node = l_mas.node; 3372 mas_wmb_replace(mas, old); 3373 mtree_range_walk(mas); 3374 return; 3375 } 3376 3377 /* 3378 * mas_commit_b_node() - Commit the big node into the tree. 3379 * @wr_mas: The maple write state 3380 * @b_node: The maple big node 3381 */ mas_commit_b_node(struct ma_wr_state * wr_mas,struct maple_big_node * b_node)3382 static noinline_for_kasan void mas_commit_b_node(struct ma_wr_state *wr_mas, 3383 struct maple_big_node *b_node) 3384 { 3385 enum store_type type = wr_mas->mas->store_type; 3386 3387 WARN_ON_ONCE(type != wr_rebalance && type != wr_split_store); 3388 3389 if (type == wr_rebalance) 3390 return mas_rebalance(wr_mas->mas, b_node); 3391 3392 return mas_split(wr_mas->mas, b_node); 3393 } 3394 3395 /* 3396 * mas_root_expand() - Expand a root to a node 3397 * @mas: The maple state 3398 * @entry: The entry to store into the tree 3399 */ mas_root_expand(struct ma_state * mas,void * entry)3400 static inline void mas_root_expand(struct ma_state *mas, void *entry) 3401 { 3402 void *contents = mas_root_locked(mas); 3403 enum maple_type type = maple_leaf_64; 3404 struct maple_node *node; 3405 void __rcu **slots; 3406 unsigned long *pivots; 3407 int slot = 0; 3408 3409 node = mas_pop_node(mas); 3410 pivots = ma_pivots(node, type); 3411 slots = ma_slots(node, type); 3412 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3413 mas->node = mt_mk_node(node, type); 3414 mas->status = ma_active; 3415 3416 if (mas->index) { 3417 if (contents) { 3418 rcu_assign_pointer(slots[slot], contents); 3419 if (likely(mas->index > 1)) 3420 slot++; 3421 } 3422 pivots[slot++] = mas->index - 1; 3423 } 3424 3425 rcu_assign_pointer(slots[slot], entry); 3426 mas->offset = slot; 3427 pivots[slot] = mas->last; 3428 if (mas->last != ULONG_MAX) 3429 pivots[++slot] = ULONG_MAX; 3430 3431 mas->depth = 1; 3432 mas_set_height(mas); 3433 ma_set_meta(node, maple_leaf_64, 0, slot); 3434 /* swap the new root into the tree */ 3435 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3436 return; 3437 } 3438 3439 /* 3440 * mas_store_root() - Storing value into root. 3441 * @mas: The maple state 3442 * @entry: The entry to store. 3443 * 3444 * There is no root node now and we are storing a value into the root - this 3445 * function either assigns the pointer or expands into a node. 3446 */ mas_store_root(struct ma_state * mas,void * entry)3447 static inline void mas_store_root(struct ma_state *mas, void *entry) 3448 { 3449 if (!entry) { 3450 if (!mas->index) 3451 rcu_assign_pointer(mas->tree->ma_root, NULL); 3452 } else if (likely((mas->last != 0) || (mas->index != 0))) 3453 mas_root_expand(mas, entry); 3454 else if (((unsigned long) (entry) & 3) == 2) 3455 mas_root_expand(mas, entry); 3456 else { 3457 rcu_assign_pointer(mas->tree->ma_root, entry); 3458 mas->status = ma_start; 3459 } 3460 } 3461 3462 /* 3463 * mas_is_span_wr() - Check if the write needs to be treated as a write that 3464 * spans the node. 3465 * @wr_mas: The maple write state 3466 * 3467 * Spanning writes are writes that start in one node and end in another OR if 3468 * the write of a %NULL will cause the node to end with a %NULL. 3469 * 3470 * Return: True if this is a spanning write, false otherwise. 3471 */ mas_is_span_wr(struct ma_wr_state * wr_mas)3472 static bool mas_is_span_wr(struct ma_wr_state *wr_mas) 3473 { 3474 unsigned long max = wr_mas->r_max; 3475 unsigned long last = wr_mas->mas->last; 3476 enum maple_type type = wr_mas->type; 3477 void *entry = wr_mas->entry; 3478 3479 /* Contained in this pivot, fast path */ 3480 if (last < max) 3481 return false; 3482 3483 if (ma_is_leaf(type)) { 3484 max = wr_mas->mas->max; 3485 if (last < max) 3486 return false; 3487 } 3488 3489 if (last == max) { 3490 /* 3491 * The last entry of leaf node cannot be NULL unless it is the 3492 * rightmost node (writing ULONG_MAX), otherwise it spans slots. 3493 */ 3494 if (entry || last == ULONG_MAX) 3495 return false; 3496 } 3497 3498 trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry); 3499 return true; 3500 } 3501 mas_wr_walk_descend(struct ma_wr_state * wr_mas)3502 static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas) 3503 { 3504 wr_mas->type = mte_node_type(wr_mas->mas->node); 3505 mas_wr_node_walk(wr_mas); 3506 wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type); 3507 } 3508 mas_wr_walk_traverse(struct ma_wr_state * wr_mas)3509 static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas) 3510 { 3511 wr_mas->mas->max = wr_mas->r_max; 3512 wr_mas->mas->min = wr_mas->r_min; 3513 wr_mas->mas->node = wr_mas->content; 3514 wr_mas->mas->offset = 0; 3515 wr_mas->mas->depth++; 3516 } 3517 /* 3518 * mas_wr_walk() - Walk the tree for a write. 3519 * @wr_mas: The maple write state 3520 * 3521 * Uses mas_slot_locked() and does not need to worry about dead nodes. 3522 * 3523 * Return: True if it's contained in a node, false on spanning write. 3524 */ mas_wr_walk(struct ma_wr_state * wr_mas)3525 static bool mas_wr_walk(struct ma_wr_state *wr_mas) 3526 { 3527 struct ma_state *mas = wr_mas->mas; 3528 3529 while (true) { 3530 mas_wr_walk_descend(wr_mas); 3531 if (unlikely(mas_is_span_wr(wr_mas))) 3532 return false; 3533 3534 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3535 mas->offset); 3536 if (ma_is_leaf(wr_mas->type)) 3537 return true; 3538 3539 mas_wr_walk_traverse(wr_mas); 3540 } 3541 3542 return true; 3543 } 3544 mas_wr_walk_index(struct ma_wr_state * wr_mas)3545 static void mas_wr_walk_index(struct ma_wr_state *wr_mas) 3546 { 3547 struct ma_state *mas = wr_mas->mas; 3548 3549 while (true) { 3550 mas_wr_walk_descend(wr_mas); 3551 wr_mas->content = mas_slot_locked(mas, wr_mas->slots, 3552 mas->offset); 3553 if (ma_is_leaf(wr_mas->type)) 3554 return; 3555 mas_wr_walk_traverse(wr_mas); 3556 } 3557 } 3558 /* 3559 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs. 3560 * @l_wr_mas: The left maple write state 3561 * @r_wr_mas: The right maple write state 3562 */ mas_extend_spanning_null(struct ma_wr_state * l_wr_mas,struct ma_wr_state * r_wr_mas)3563 static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas, 3564 struct ma_wr_state *r_wr_mas) 3565 { 3566 struct ma_state *r_mas = r_wr_mas->mas; 3567 struct ma_state *l_mas = l_wr_mas->mas; 3568 unsigned char l_slot; 3569 3570 l_slot = l_mas->offset; 3571 if (!l_wr_mas->content) 3572 l_mas->index = l_wr_mas->r_min; 3573 3574 if ((l_mas->index == l_wr_mas->r_min) && 3575 (l_slot && 3576 !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) { 3577 if (l_slot > 1) 3578 l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1; 3579 else 3580 l_mas->index = l_mas->min; 3581 3582 l_mas->offset = l_slot - 1; 3583 } 3584 3585 if (!r_wr_mas->content) { 3586 if (r_mas->last < r_wr_mas->r_max) 3587 r_mas->last = r_wr_mas->r_max; 3588 r_mas->offset++; 3589 } else if ((r_mas->last == r_wr_mas->r_max) && 3590 (r_mas->last < r_mas->max) && 3591 !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) { 3592 r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots, 3593 r_wr_mas->type, r_mas->offset + 1); 3594 r_mas->offset++; 3595 } 3596 } 3597 mas_state_walk(struct ma_state * mas)3598 static inline void *mas_state_walk(struct ma_state *mas) 3599 { 3600 void *entry; 3601 3602 entry = mas_start(mas); 3603 if (mas_is_none(mas)) 3604 return NULL; 3605 3606 if (mas_is_ptr(mas)) 3607 return entry; 3608 3609 return mtree_range_walk(mas); 3610 } 3611 3612 /* 3613 * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up 3614 * to date. 3615 * 3616 * @mas: The maple state. 3617 * 3618 * Note: Leaves mas in undesirable state. 3619 * Return: The entry for @mas->index or %NULL on dead node. 3620 */ mtree_lookup_walk(struct ma_state * mas)3621 static inline void *mtree_lookup_walk(struct ma_state *mas) 3622 { 3623 unsigned long *pivots; 3624 unsigned char offset; 3625 struct maple_node *node; 3626 struct maple_enode *next; 3627 enum maple_type type; 3628 void __rcu **slots; 3629 unsigned char end; 3630 3631 next = mas->node; 3632 do { 3633 node = mte_to_node(next); 3634 type = mte_node_type(next); 3635 pivots = ma_pivots(node, type); 3636 end = mt_pivots[type]; 3637 offset = 0; 3638 do { 3639 if (pivots[offset] >= mas->index) 3640 break; 3641 } while (++offset < end); 3642 3643 slots = ma_slots(node, type); 3644 next = mt_slot(mas->tree, slots, offset); 3645 if (unlikely(ma_dead_node(node))) 3646 goto dead_node; 3647 } while (!ma_is_leaf(type)); 3648 3649 return (void *)next; 3650 3651 dead_node: 3652 mas_reset(mas); 3653 return NULL; 3654 } 3655 3656 static void mte_destroy_walk(struct maple_enode *, struct maple_tree *); 3657 /* 3658 * mas_new_root() - Create a new root node that only contains the entry passed 3659 * in. 3660 * @mas: The maple state 3661 * @entry: The entry to store. 3662 * 3663 * Only valid when the index == 0 and the last == ULONG_MAX 3664 */ mas_new_root(struct ma_state * mas,void * entry)3665 static inline void mas_new_root(struct ma_state *mas, void *entry) 3666 { 3667 struct maple_enode *root = mas_root_locked(mas); 3668 enum maple_type type = maple_leaf_64; 3669 struct maple_node *node; 3670 void __rcu **slots; 3671 unsigned long *pivots; 3672 3673 WARN_ON_ONCE(mas->index || mas->last != ULONG_MAX); 3674 3675 if (!entry) { 3676 mas->depth = 0; 3677 mas_set_height(mas); 3678 rcu_assign_pointer(mas->tree->ma_root, entry); 3679 mas->status = ma_start; 3680 goto done; 3681 } 3682 3683 node = mas_pop_node(mas); 3684 pivots = ma_pivots(node, type); 3685 slots = ma_slots(node, type); 3686 node->parent = ma_parent_ptr(mas_tree_parent(mas)); 3687 mas->node = mt_mk_node(node, type); 3688 mas->status = ma_active; 3689 rcu_assign_pointer(slots[0], entry); 3690 pivots[0] = mas->last; 3691 mas->depth = 1; 3692 mas_set_height(mas); 3693 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); 3694 3695 done: 3696 if (xa_is_node(root)) 3697 mte_destroy_walk(root, mas->tree); 3698 3699 return; 3700 } 3701 /* 3702 * mas_wr_spanning_store() - Create a subtree with the store operation completed 3703 * and new nodes where necessary, then place the sub-tree in the actual tree. 3704 * Note that mas is expected to point to the node which caused the store to 3705 * span. 3706 * @wr_mas: The maple write state 3707 */ mas_wr_spanning_store(struct ma_wr_state * wr_mas)3708 static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas) 3709 { 3710 struct maple_subtree_state mast; 3711 struct maple_big_node b_node; 3712 struct ma_state *mas; 3713 unsigned char height; 3714 3715 /* Left and Right side of spanning store */ 3716 MA_STATE(l_mas, NULL, 0, 0); 3717 MA_STATE(r_mas, NULL, 0, 0); 3718 MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry); 3719 MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry); 3720 3721 /* 3722 * A store operation that spans multiple nodes is called a spanning 3723 * store and is handled early in the store call stack by the function 3724 * mas_is_span_wr(). When a spanning store is identified, the maple 3725 * state is duplicated. The first maple state walks the left tree path 3726 * to ``index``, the duplicate walks the right tree path to ``last``. 3727 * The data in the two nodes are combined into a single node, two nodes, 3728 * or possibly three nodes (see the 3-way split above). A ``NULL`` 3729 * written to the last entry of a node is considered a spanning store as 3730 * a rebalance is required for the operation to complete and an overflow 3731 * of data may happen. 3732 */ 3733 mas = wr_mas->mas; 3734 trace_ma_op(__func__, mas); 3735 3736 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 3737 return mas_new_root(mas, wr_mas->entry); 3738 /* 3739 * Node rebalancing may occur due to this store, so there may be three new 3740 * entries per level plus a new root. 3741 */ 3742 height = mas_mt_height(mas); 3743 3744 /* 3745 * Set up right side. Need to get to the next offset after the spanning 3746 * store to ensure it's not NULL and to combine both the next node and 3747 * the node with the start together. 3748 */ 3749 r_mas = *mas; 3750 /* Avoid overflow, walk to next slot in the tree. */ 3751 if (r_mas.last + 1) 3752 r_mas.last++; 3753 3754 r_mas.index = r_mas.last; 3755 mas_wr_walk_index(&r_wr_mas); 3756 r_mas.last = r_mas.index = mas->last; 3757 3758 /* Set up left side. */ 3759 l_mas = *mas; 3760 mas_wr_walk_index(&l_wr_mas); 3761 3762 if (!wr_mas->entry) { 3763 mas_extend_spanning_null(&l_wr_mas, &r_wr_mas); 3764 mas->offset = l_mas.offset; 3765 mas->index = l_mas.index; 3766 mas->last = l_mas.last = r_mas.last; 3767 } 3768 3769 /* expanding NULLs may make this cover the entire range */ 3770 if (!l_mas.index && r_mas.last == ULONG_MAX) { 3771 mas_set_range(mas, 0, ULONG_MAX); 3772 return mas_new_root(mas, wr_mas->entry); 3773 } 3774 3775 memset(&b_node, 0, sizeof(struct maple_big_node)); 3776 /* Copy l_mas and store the value in b_node. */ 3777 mas_store_b_node(&l_wr_mas, &b_node, l_mas.end); 3778 /* Copy r_mas into b_node if there is anything to copy. */ 3779 if (r_mas.max > r_mas.last) 3780 mas_mab_cp(&r_mas, r_mas.offset, r_mas.end, 3781 &b_node, b_node.b_end + 1); 3782 else 3783 b_node.b_end++; 3784 3785 /* Stop spanning searches by searching for just index. */ 3786 l_mas.index = l_mas.last = mas->index; 3787 3788 mast.bn = &b_node; 3789 mast.orig_l = &l_mas; 3790 mast.orig_r = &r_mas; 3791 /* Combine l_mas and r_mas and split them up evenly again. */ 3792 return mas_spanning_rebalance(mas, &mast, height + 1); 3793 } 3794 3795 /* 3796 * mas_wr_node_store() - Attempt to store the value in a node 3797 * @wr_mas: The maple write state 3798 * 3799 * Attempts to reuse the node, but may allocate. 3800 */ mas_wr_node_store(struct ma_wr_state * wr_mas,unsigned char new_end)3801 static inline void mas_wr_node_store(struct ma_wr_state *wr_mas, 3802 unsigned char new_end) 3803 { 3804 struct ma_state *mas = wr_mas->mas; 3805 void __rcu **dst_slots; 3806 unsigned long *dst_pivots; 3807 unsigned char dst_offset, offset_end = wr_mas->offset_end; 3808 struct maple_node reuse, *newnode; 3809 unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type]; 3810 bool in_rcu = mt_in_rcu(mas->tree); 3811 3812 if (mas->last == wr_mas->end_piv) 3813 offset_end++; /* don't copy this offset */ 3814 else if (unlikely(wr_mas->r_max == ULONG_MAX)) 3815 mas_bulk_rebalance(mas, mas->end, wr_mas->type); 3816 3817 /* set up node. */ 3818 if (in_rcu) { 3819 newnode = mas_pop_node(mas); 3820 } else { 3821 memset(&reuse, 0, sizeof(struct maple_node)); 3822 newnode = &reuse; 3823 } 3824 3825 newnode->parent = mas_mn(mas)->parent; 3826 dst_pivots = ma_pivots(newnode, wr_mas->type); 3827 dst_slots = ma_slots(newnode, wr_mas->type); 3828 /* Copy from start to insert point */ 3829 memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset); 3830 memcpy(dst_slots, wr_mas->slots, sizeof(void *) * mas->offset); 3831 3832 /* Handle insert of new range starting after old range */ 3833 if (wr_mas->r_min < mas->index) { 3834 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content); 3835 dst_pivots[mas->offset++] = mas->index - 1; 3836 } 3837 3838 /* Store the new entry and range end. */ 3839 if (mas->offset < node_pivots) 3840 dst_pivots[mas->offset] = mas->last; 3841 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry); 3842 3843 /* 3844 * this range wrote to the end of the node or it overwrote the rest of 3845 * the data 3846 */ 3847 if (offset_end > mas->end) 3848 goto done; 3849 3850 dst_offset = mas->offset + 1; 3851 /* Copy to the end of node if necessary. */ 3852 copy_size = mas->end - offset_end + 1; 3853 memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end, 3854 sizeof(void *) * copy_size); 3855 memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end, 3856 sizeof(unsigned long) * (copy_size - 1)); 3857 3858 if (new_end < node_pivots) 3859 dst_pivots[new_end] = mas->max; 3860 3861 done: 3862 mas_leaf_set_meta(newnode, maple_leaf_64, new_end); 3863 if (in_rcu) { 3864 struct maple_enode *old_enode = mas->node; 3865 3866 mas->node = mt_mk_node(newnode, wr_mas->type); 3867 mas_replace_node(mas, old_enode); 3868 } else { 3869 memcpy(wr_mas->node, newnode, sizeof(struct maple_node)); 3870 } 3871 trace_ma_write(__func__, mas, 0, wr_mas->entry); 3872 mas_update_gap(mas); 3873 mas->end = new_end; 3874 return; 3875 } 3876 3877 /* 3878 * mas_wr_slot_store: Attempt to store a value in a slot. 3879 * @wr_mas: the maple write state 3880 */ mas_wr_slot_store(struct ma_wr_state * wr_mas)3881 static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas) 3882 { 3883 struct ma_state *mas = wr_mas->mas; 3884 unsigned char offset = mas->offset; 3885 void __rcu **slots = wr_mas->slots; 3886 bool gap = false; 3887 3888 gap |= !mt_slot_locked(mas->tree, slots, offset); 3889 gap |= !mt_slot_locked(mas->tree, slots, offset + 1); 3890 3891 if (wr_mas->offset_end - offset == 1) { 3892 if (mas->index == wr_mas->r_min) { 3893 /* Overwriting the range and a part of the next one */ 3894 rcu_assign_pointer(slots[offset], wr_mas->entry); 3895 wr_mas->pivots[offset] = mas->last; 3896 } else { 3897 /* Overwriting a part of the range and the next one */ 3898 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 3899 wr_mas->pivots[offset] = mas->index - 1; 3900 mas->offset++; /* Keep mas accurate. */ 3901 } 3902 } else { 3903 WARN_ON_ONCE(mt_in_rcu(mas->tree)); 3904 /* 3905 * Expand the range, only partially overwriting the previous and 3906 * next ranges 3907 */ 3908 gap |= !mt_slot_locked(mas->tree, slots, offset + 2); 3909 rcu_assign_pointer(slots[offset + 1], wr_mas->entry); 3910 wr_mas->pivots[offset] = mas->index - 1; 3911 wr_mas->pivots[offset + 1] = mas->last; 3912 mas->offset++; /* Keep mas accurate. */ 3913 } 3914 3915 trace_ma_write(__func__, mas, 0, wr_mas->entry); 3916 /* 3917 * Only update gap when the new entry is empty or there is an empty 3918 * entry in the original two ranges. 3919 */ 3920 if (!wr_mas->entry || gap) 3921 mas_update_gap(mas); 3922 3923 return; 3924 } 3925 mas_wr_extend_null(struct ma_wr_state * wr_mas)3926 static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas) 3927 { 3928 struct ma_state *mas = wr_mas->mas; 3929 3930 if (!wr_mas->slots[wr_mas->offset_end]) { 3931 /* If this one is null, the next and prev are not */ 3932 mas->last = wr_mas->end_piv; 3933 } else { 3934 /* Check next slot(s) if we are overwriting the end */ 3935 if ((mas->last == wr_mas->end_piv) && 3936 (mas->end != wr_mas->offset_end) && 3937 !wr_mas->slots[wr_mas->offset_end + 1]) { 3938 wr_mas->offset_end++; 3939 if (wr_mas->offset_end == mas->end) 3940 mas->last = mas->max; 3941 else 3942 mas->last = wr_mas->pivots[wr_mas->offset_end]; 3943 wr_mas->end_piv = mas->last; 3944 } 3945 } 3946 3947 if (!wr_mas->content) { 3948 /* If this one is null, the next and prev are not */ 3949 mas->index = wr_mas->r_min; 3950 } else { 3951 /* Check prev slot if we are overwriting the start */ 3952 if (mas->index == wr_mas->r_min && mas->offset && 3953 !wr_mas->slots[mas->offset - 1]) { 3954 mas->offset--; 3955 wr_mas->r_min = mas->index = 3956 mas_safe_min(mas, wr_mas->pivots, mas->offset); 3957 wr_mas->r_max = wr_mas->pivots[mas->offset]; 3958 } 3959 } 3960 } 3961 mas_wr_end_piv(struct ma_wr_state * wr_mas)3962 static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas) 3963 { 3964 while ((wr_mas->offset_end < wr_mas->mas->end) && 3965 (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end])) 3966 wr_mas->offset_end++; 3967 3968 if (wr_mas->offset_end < wr_mas->mas->end) 3969 wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end]; 3970 else 3971 wr_mas->end_piv = wr_mas->mas->max; 3972 } 3973 mas_wr_new_end(struct ma_wr_state * wr_mas)3974 static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas) 3975 { 3976 struct ma_state *mas = wr_mas->mas; 3977 unsigned char new_end = mas->end + 2; 3978 3979 new_end -= wr_mas->offset_end - mas->offset; 3980 if (wr_mas->r_min == mas->index) 3981 new_end--; 3982 3983 if (wr_mas->end_piv == mas->last) 3984 new_end--; 3985 3986 return new_end; 3987 } 3988 3989 /* 3990 * mas_wr_append: Attempt to append 3991 * @wr_mas: the maple write state 3992 * @new_end: The end of the node after the modification 3993 * 3994 * This is currently unsafe in rcu mode since the end of the node may be cached 3995 * by readers while the node contents may be updated which could result in 3996 * inaccurate information. 3997 */ mas_wr_append(struct ma_wr_state * wr_mas,unsigned char new_end)3998 static inline void mas_wr_append(struct ma_wr_state *wr_mas, 3999 unsigned char new_end) 4000 { 4001 struct ma_state *mas = wr_mas->mas; 4002 void __rcu **slots; 4003 unsigned char end = mas->end; 4004 4005 if (new_end < mt_pivots[wr_mas->type]) { 4006 wr_mas->pivots[new_end] = wr_mas->pivots[end]; 4007 ma_set_meta(wr_mas->node, wr_mas->type, 0, new_end); 4008 } 4009 4010 slots = wr_mas->slots; 4011 if (new_end == end + 1) { 4012 if (mas->last == wr_mas->r_max) { 4013 /* Append to end of range */ 4014 rcu_assign_pointer(slots[new_end], wr_mas->entry); 4015 wr_mas->pivots[end] = mas->index - 1; 4016 mas->offset = new_end; 4017 } else { 4018 /* Append to start of range */ 4019 rcu_assign_pointer(slots[new_end], wr_mas->content); 4020 wr_mas->pivots[end] = mas->last; 4021 rcu_assign_pointer(slots[end], wr_mas->entry); 4022 } 4023 } else { 4024 /* Append to the range without touching any boundaries. */ 4025 rcu_assign_pointer(slots[new_end], wr_mas->content); 4026 wr_mas->pivots[end + 1] = mas->last; 4027 rcu_assign_pointer(slots[end + 1], wr_mas->entry); 4028 wr_mas->pivots[end] = mas->index - 1; 4029 mas->offset = end + 1; 4030 } 4031 4032 if (!wr_mas->content || !wr_mas->entry) 4033 mas_update_gap(mas); 4034 4035 mas->end = new_end; 4036 trace_ma_write(__func__, mas, new_end, wr_mas->entry); 4037 return; 4038 } 4039 4040 /* 4041 * mas_wr_bnode() - Slow path for a modification. 4042 * @wr_mas: The write maple state 4043 * 4044 * This is where split, rebalance end up. 4045 */ mas_wr_bnode(struct ma_wr_state * wr_mas)4046 static void mas_wr_bnode(struct ma_wr_state *wr_mas) 4047 { 4048 struct maple_big_node b_node; 4049 4050 trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry); 4051 memset(&b_node, 0, sizeof(struct maple_big_node)); 4052 mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end); 4053 mas_commit_b_node(wr_mas, &b_node); 4054 } 4055 4056 /* 4057 * mas_wr_store_entry() - Internal call to store a value 4058 * @wr_mas: The maple write state 4059 */ mas_wr_store_entry(struct ma_wr_state * wr_mas)4060 static inline void mas_wr_store_entry(struct ma_wr_state *wr_mas) 4061 { 4062 struct ma_state *mas = wr_mas->mas; 4063 unsigned char new_end = mas_wr_new_end(wr_mas); 4064 4065 switch (mas->store_type) { 4066 case wr_invalid: 4067 MT_BUG_ON(mas->tree, 1); 4068 return; 4069 case wr_new_root: 4070 mas_new_root(mas, wr_mas->entry); 4071 break; 4072 case wr_store_root: 4073 mas_store_root(mas, wr_mas->entry); 4074 break; 4075 case wr_exact_fit: 4076 rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry); 4077 if (!!wr_mas->entry ^ !!wr_mas->content) 4078 mas_update_gap(mas); 4079 break; 4080 case wr_append: 4081 mas_wr_append(wr_mas, new_end); 4082 break; 4083 case wr_slot_store: 4084 mas_wr_slot_store(wr_mas); 4085 break; 4086 case wr_node_store: 4087 mas_wr_node_store(wr_mas, new_end); 4088 break; 4089 case wr_spanning_store: 4090 mas_wr_spanning_store(wr_mas); 4091 break; 4092 case wr_split_store: 4093 case wr_rebalance: 4094 mas_wr_bnode(wr_mas); 4095 break; 4096 } 4097 4098 return; 4099 } 4100 mas_wr_prealloc_setup(struct ma_wr_state * wr_mas)4101 static inline void mas_wr_prealloc_setup(struct ma_wr_state *wr_mas) 4102 { 4103 struct ma_state *mas = wr_mas->mas; 4104 4105 if (!mas_is_active(mas)) { 4106 if (mas_is_start(mas)) 4107 goto set_content; 4108 4109 if (unlikely(mas_is_paused(mas))) 4110 goto reset; 4111 4112 if (unlikely(mas_is_none(mas))) 4113 goto reset; 4114 4115 if (unlikely(mas_is_overflow(mas))) 4116 goto reset; 4117 4118 if (unlikely(mas_is_underflow(mas))) 4119 goto reset; 4120 } 4121 4122 /* 4123 * A less strict version of mas_is_span_wr() where we allow spanning 4124 * writes within this node. This is to stop partial walks in 4125 * mas_prealloc() from being reset. 4126 */ 4127 if (mas->last > mas->max) 4128 goto reset; 4129 4130 if (wr_mas->entry) 4131 goto set_content; 4132 4133 if (mte_is_leaf(mas->node) && mas->last == mas->max) 4134 goto reset; 4135 4136 goto set_content; 4137 4138 reset: 4139 mas_reset(mas); 4140 set_content: 4141 wr_mas->content = mas_start(mas); 4142 } 4143 4144 /** 4145 * mas_prealloc_calc() - Calculate number of nodes needed for a 4146 * given store oepration 4147 * @mas: The maple state 4148 * @entry: The entry to store into the tree 4149 * 4150 * Return: Number of nodes required for preallocation. 4151 */ mas_prealloc_calc(struct ma_state * mas,void * entry)4152 static inline int mas_prealloc_calc(struct ma_state *mas, void *entry) 4153 { 4154 int ret = mas_mt_height(mas) * 3 + 1; 4155 4156 switch (mas->store_type) { 4157 case wr_invalid: 4158 WARN_ON_ONCE(1); 4159 break; 4160 case wr_new_root: 4161 ret = 1; 4162 break; 4163 case wr_store_root: 4164 if (likely((mas->last != 0) || (mas->index != 0))) 4165 ret = 1; 4166 else if (((unsigned long) (entry) & 3) == 2) 4167 ret = 1; 4168 else 4169 ret = 0; 4170 break; 4171 case wr_spanning_store: 4172 ret = mas_mt_height(mas) * 3 + 1; 4173 break; 4174 case wr_split_store: 4175 ret = mas_mt_height(mas) * 2 + 1; 4176 break; 4177 case wr_rebalance: 4178 ret = mas_mt_height(mas) * 2 - 1; 4179 break; 4180 case wr_node_store: 4181 ret = mt_in_rcu(mas->tree) ? 1 : 0; 4182 break; 4183 case wr_append: 4184 case wr_exact_fit: 4185 case wr_slot_store: 4186 ret = 0; 4187 } 4188 4189 return ret; 4190 } 4191 4192 /* 4193 * mas_wr_store_type() - Determine the store type for a given 4194 * store operation. 4195 * @wr_mas: The maple write state 4196 * 4197 * Return: the type of store needed for the operation 4198 */ mas_wr_store_type(struct ma_wr_state * wr_mas)4199 static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas) 4200 { 4201 struct ma_state *mas = wr_mas->mas; 4202 unsigned char new_end; 4203 4204 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 4205 return wr_store_root; 4206 4207 if (unlikely(!mas_wr_walk(wr_mas))) 4208 return wr_spanning_store; 4209 4210 /* At this point, we are at the leaf node that needs to be altered. */ 4211 mas_wr_end_piv(wr_mas); 4212 if (!wr_mas->entry) 4213 mas_wr_extend_null(wr_mas); 4214 4215 if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last)) 4216 return wr_exact_fit; 4217 4218 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 4219 return wr_new_root; 4220 4221 new_end = mas_wr_new_end(wr_mas); 4222 /* Potential spanning rebalance collapsing a node */ 4223 if (new_end < mt_min_slots[wr_mas->type]) { 4224 if (!mte_is_root(mas->node) && !(mas->mas_flags & MA_STATE_BULK)) 4225 return wr_rebalance; 4226 return wr_node_store; 4227 } 4228 4229 if (new_end >= mt_slots[wr_mas->type]) 4230 return wr_split_store; 4231 4232 if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end)) 4233 return wr_append; 4234 4235 if ((new_end == mas->end) && (!mt_in_rcu(mas->tree) || 4236 (wr_mas->offset_end - mas->offset == 1))) 4237 return wr_slot_store; 4238 4239 return wr_node_store; 4240 } 4241 4242 /** 4243 * mas_wr_preallocate() - Preallocate enough nodes for a store operation 4244 * @wr_mas: The maple write state 4245 * @entry: The entry that will be stored 4246 * 4247 */ mas_wr_preallocate(struct ma_wr_state * wr_mas,void * entry)4248 static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry) 4249 { 4250 struct ma_state *mas = wr_mas->mas; 4251 int request; 4252 4253 mas_wr_prealloc_setup(wr_mas); 4254 mas->store_type = mas_wr_store_type(wr_mas); 4255 request = mas_prealloc_calc(mas, entry); 4256 if (!request) 4257 return; 4258 4259 mas_node_count(mas, request); 4260 } 4261 4262 /** 4263 * mas_insert() - Internal call to insert a value 4264 * @mas: The maple state 4265 * @entry: The entry to store 4266 * 4267 * Return: %NULL or the contents that already exists at the requested index 4268 * otherwise. The maple state needs to be checked for error conditions. 4269 */ mas_insert(struct ma_state * mas,void * entry)4270 static inline void *mas_insert(struct ma_state *mas, void *entry) 4271 { 4272 MA_WR_STATE(wr_mas, mas, entry); 4273 4274 /* 4275 * Inserting a new range inserts either 0, 1, or 2 pivots within the 4276 * tree. If the insert fits exactly into an existing gap with a value 4277 * of NULL, then the slot only needs to be written with the new value. 4278 * If the range being inserted is adjacent to another range, then only a 4279 * single pivot needs to be inserted (as well as writing the entry). If 4280 * the new range is within a gap but does not touch any other ranges, 4281 * then two pivots need to be inserted: the start - 1, and the end. As 4282 * usual, the entry must be written. Most operations require a new node 4283 * to be allocated and replace an existing node to ensure RCU safety, 4284 * when in RCU mode. The exception to requiring a newly allocated node 4285 * is when inserting at the end of a node (appending). When done 4286 * carefully, appending can reuse the node in place. 4287 */ 4288 wr_mas.content = mas_start(mas); 4289 if (wr_mas.content) 4290 goto exists; 4291 4292 mas_wr_preallocate(&wr_mas, entry); 4293 if (mas_is_err(mas)) 4294 return NULL; 4295 4296 /* spanning writes always overwrite something */ 4297 if (mas->store_type == wr_spanning_store) 4298 goto exists; 4299 4300 /* At this point, we are at the leaf node that needs to be altered. */ 4301 if (mas->store_type != wr_new_root && mas->store_type != wr_store_root) { 4302 wr_mas.offset_end = mas->offset; 4303 wr_mas.end_piv = wr_mas.r_max; 4304 4305 if (wr_mas.content || (mas->last > wr_mas.r_max)) 4306 goto exists; 4307 } 4308 4309 mas_wr_store_entry(&wr_mas); 4310 return wr_mas.content; 4311 4312 exists: 4313 mas_set_err(mas, -EEXIST); 4314 return wr_mas.content; 4315 4316 } 4317 4318 /** 4319 * mas_alloc_cyclic() - Internal call to find somewhere to store an entry 4320 * @mas: The maple state. 4321 * @startp: Pointer to ID. 4322 * @range_lo: Lower bound of range to search. 4323 * @range_hi: Upper bound of range to search. 4324 * @entry: The entry to store. 4325 * @next: Pointer to next ID to allocate. 4326 * @gfp: The GFP_FLAGS to use for allocations. 4327 * 4328 * Return: 0 if the allocation succeeded without wrapping, 1 if the 4329 * allocation succeeded after wrapping, or -EBUSY if there are no 4330 * free entries. 4331 */ mas_alloc_cyclic(struct ma_state * mas,unsigned long * startp,void * entry,unsigned long range_lo,unsigned long range_hi,unsigned long * next,gfp_t gfp)4332 int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp, 4333 void *entry, unsigned long range_lo, unsigned long range_hi, 4334 unsigned long *next, gfp_t gfp) 4335 { 4336 unsigned long min = range_lo; 4337 int ret = 0; 4338 4339 range_lo = max(min, *next); 4340 ret = mas_empty_area(mas, range_lo, range_hi, 1); 4341 if ((mas->tree->ma_flags & MT_FLAGS_ALLOC_WRAPPED) && ret == 0) { 4342 mas->tree->ma_flags &= ~MT_FLAGS_ALLOC_WRAPPED; 4343 ret = 1; 4344 } 4345 if (ret < 0 && range_lo > min) { 4346 mas_reset(mas); 4347 ret = mas_empty_area(mas, min, range_hi, 1); 4348 if (ret == 0) 4349 ret = 1; 4350 } 4351 if (ret < 0) 4352 return ret; 4353 4354 do { 4355 mas_insert(mas, entry); 4356 } while (mas_nomem(mas, gfp)); 4357 if (mas_is_err(mas)) 4358 return xa_err(mas->node); 4359 4360 *startp = mas->index; 4361 *next = *startp + 1; 4362 if (*next == 0) 4363 mas->tree->ma_flags |= MT_FLAGS_ALLOC_WRAPPED; 4364 4365 mas_destroy(mas); 4366 return ret; 4367 } 4368 EXPORT_SYMBOL(mas_alloc_cyclic); 4369 mas_rewalk(struct ma_state * mas,unsigned long index)4370 static __always_inline void mas_rewalk(struct ma_state *mas, unsigned long index) 4371 { 4372 retry: 4373 mas_set(mas, index); 4374 mas_state_walk(mas); 4375 if (mas_is_start(mas)) 4376 goto retry; 4377 } 4378 mas_rewalk_if_dead(struct ma_state * mas,struct maple_node * node,const unsigned long index)4379 static __always_inline bool mas_rewalk_if_dead(struct ma_state *mas, 4380 struct maple_node *node, const unsigned long index) 4381 { 4382 if (unlikely(ma_dead_node(node))) { 4383 mas_rewalk(mas, index); 4384 return true; 4385 } 4386 return false; 4387 } 4388 4389 /* 4390 * mas_prev_node() - Find the prev non-null entry at the same level in the 4391 * tree. The prev value will be mas->node[mas->offset] or the status will be 4392 * ma_none. 4393 * @mas: The maple state 4394 * @min: The lower limit to search 4395 * 4396 * The prev node value will be mas->node[mas->offset] or the status will be 4397 * ma_none. 4398 * Return: 1 if the node is dead, 0 otherwise. 4399 */ mas_prev_node(struct ma_state * mas,unsigned long min)4400 static int mas_prev_node(struct ma_state *mas, unsigned long min) 4401 { 4402 enum maple_type mt; 4403 int offset, level; 4404 void __rcu **slots; 4405 struct maple_node *node; 4406 unsigned long *pivots; 4407 unsigned long max; 4408 4409 node = mas_mn(mas); 4410 if (!mas->min) 4411 goto no_entry; 4412 4413 max = mas->min - 1; 4414 if (max < min) 4415 goto no_entry; 4416 4417 level = 0; 4418 do { 4419 if (ma_is_root(node)) 4420 goto no_entry; 4421 4422 /* Walk up. */ 4423 if (unlikely(mas_ascend(mas))) 4424 return 1; 4425 offset = mas->offset; 4426 level++; 4427 node = mas_mn(mas); 4428 } while (!offset); 4429 4430 offset--; 4431 mt = mte_node_type(mas->node); 4432 while (level > 1) { 4433 level--; 4434 slots = ma_slots(node, mt); 4435 mas->node = mas_slot(mas, slots, offset); 4436 if (unlikely(ma_dead_node(node))) 4437 return 1; 4438 4439 mt = mte_node_type(mas->node); 4440 node = mas_mn(mas); 4441 pivots = ma_pivots(node, mt); 4442 offset = ma_data_end(node, mt, pivots, max); 4443 if (unlikely(ma_dead_node(node))) 4444 return 1; 4445 } 4446 4447 slots = ma_slots(node, mt); 4448 mas->node = mas_slot(mas, slots, offset); 4449 pivots = ma_pivots(node, mt); 4450 if (unlikely(ma_dead_node(node))) 4451 return 1; 4452 4453 if (likely(offset)) 4454 mas->min = pivots[offset - 1] + 1; 4455 mas->max = max; 4456 mas->offset = mas_data_end(mas); 4457 if (unlikely(mte_dead_node(mas->node))) 4458 return 1; 4459 4460 mas->end = mas->offset; 4461 return 0; 4462 4463 no_entry: 4464 if (unlikely(ma_dead_node(node))) 4465 return 1; 4466 4467 mas->status = ma_underflow; 4468 return 0; 4469 } 4470 4471 /* 4472 * mas_prev_slot() - Get the entry in the previous slot 4473 * 4474 * @mas: The maple state 4475 * @min: The minimum starting range 4476 * @empty: Can be empty 4477 * 4478 * Return: The entry in the previous slot which is possibly NULL 4479 */ mas_prev_slot(struct ma_state * mas,unsigned long min,bool empty)4480 static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty) 4481 { 4482 void *entry; 4483 void __rcu **slots; 4484 unsigned long pivot; 4485 enum maple_type type; 4486 unsigned long *pivots; 4487 struct maple_node *node; 4488 unsigned long save_point = mas->index; 4489 4490 retry: 4491 node = mas_mn(mas); 4492 type = mte_node_type(mas->node); 4493 pivots = ma_pivots(node, type); 4494 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4495 goto retry; 4496 4497 if (mas->min <= min) { 4498 pivot = mas_safe_min(mas, pivots, mas->offset); 4499 4500 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4501 goto retry; 4502 4503 if (pivot <= min) 4504 goto underflow; 4505 } 4506 4507 again: 4508 if (likely(mas->offset)) { 4509 mas->offset--; 4510 mas->last = mas->index - 1; 4511 mas->index = mas_safe_min(mas, pivots, mas->offset); 4512 } else { 4513 if (mas->index <= min) 4514 goto underflow; 4515 4516 if (mas_prev_node(mas, min)) { 4517 mas_rewalk(mas, save_point); 4518 goto retry; 4519 } 4520 4521 if (WARN_ON_ONCE(mas_is_underflow(mas))) 4522 return NULL; 4523 4524 mas->last = mas->max; 4525 node = mas_mn(mas); 4526 type = mte_node_type(mas->node); 4527 pivots = ma_pivots(node, type); 4528 mas->index = pivots[mas->offset - 1] + 1; 4529 } 4530 4531 slots = ma_slots(node, type); 4532 entry = mas_slot(mas, slots, mas->offset); 4533 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4534 goto retry; 4535 4536 4537 if (likely(entry)) 4538 return entry; 4539 4540 if (!empty) { 4541 if (mas->index <= min) { 4542 mas->status = ma_underflow; 4543 return NULL; 4544 } 4545 4546 goto again; 4547 } 4548 4549 return entry; 4550 4551 underflow: 4552 mas->status = ma_underflow; 4553 return NULL; 4554 } 4555 4556 /* 4557 * mas_next_node() - Get the next node at the same level in the tree. 4558 * @mas: The maple state 4559 * @node: The maple node 4560 * @max: The maximum pivot value to check. 4561 * 4562 * The next value will be mas->node[mas->offset] or the status will have 4563 * overflowed. 4564 * Return: 1 on dead node, 0 otherwise. 4565 */ mas_next_node(struct ma_state * mas,struct maple_node * node,unsigned long max)4566 static int mas_next_node(struct ma_state *mas, struct maple_node *node, 4567 unsigned long max) 4568 { 4569 unsigned long min; 4570 unsigned long *pivots; 4571 struct maple_enode *enode; 4572 struct maple_node *tmp; 4573 int level = 0; 4574 unsigned char node_end; 4575 enum maple_type mt; 4576 void __rcu **slots; 4577 4578 if (mas->max >= max) 4579 goto overflow; 4580 4581 min = mas->max + 1; 4582 level = 0; 4583 do { 4584 if (ma_is_root(node)) 4585 goto overflow; 4586 4587 /* Walk up. */ 4588 if (unlikely(mas_ascend(mas))) 4589 return 1; 4590 4591 level++; 4592 node = mas_mn(mas); 4593 mt = mte_node_type(mas->node); 4594 pivots = ma_pivots(node, mt); 4595 node_end = ma_data_end(node, mt, pivots, mas->max); 4596 if (unlikely(ma_dead_node(node))) 4597 return 1; 4598 4599 } while (unlikely(mas->offset == node_end)); 4600 4601 slots = ma_slots(node, mt); 4602 mas->offset++; 4603 enode = mas_slot(mas, slots, mas->offset); 4604 if (unlikely(ma_dead_node(node))) 4605 return 1; 4606 4607 if (level > 1) 4608 mas->offset = 0; 4609 4610 while (unlikely(level > 1)) { 4611 level--; 4612 mas->node = enode; 4613 node = mas_mn(mas); 4614 mt = mte_node_type(mas->node); 4615 slots = ma_slots(node, mt); 4616 enode = mas_slot(mas, slots, 0); 4617 if (unlikely(ma_dead_node(node))) 4618 return 1; 4619 } 4620 4621 if (!mas->offset) 4622 pivots = ma_pivots(node, mt); 4623 4624 mas->max = mas_safe_pivot(mas, pivots, mas->offset, mt); 4625 tmp = mte_to_node(enode); 4626 mt = mte_node_type(enode); 4627 pivots = ma_pivots(tmp, mt); 4628 mas->end = ma_data_end(tmp, mt, pivots, mas->max); 4629 if (unlikely(ma_dead_node(node))) 4630 return 1; 4631 4632 mas->node = enode; 4633 mas->min = min; 4634 return 0; 4635 4636 overflow: 4637 if (unlikely(ma_dead_node(node))) 4638 return 1; 4639 4640 mas->status = ma_overflow; 4641 return 0; 4642 } 4643 4644 /* 4645 * mas_next_slot() - Get the entry in the next slot 4646 * 4647 * @mas: The maple state 4648 * @max: The maximum starting range 4649 * @empty: Can be empty 4650 * 4651 * Return: The entry in the next slot which is possibly NULL 4652 */ mas_next_slot(struct ma_state * mas,unsigned long max,bool empty)4653 static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty) 4654 { 4655 void __rcu **slots; 4656 unsigned long *pivots; 4657 unsigned long pivot; 4658 enum maple_type type; 4659 struct maple_node *node; 4660 unsigned long save_point = mas->last; 4661 void *entry; 4662 4663 retry: 4664 node = mas_mn(mas); 4665 type = mte_node_type(mas->node); 4666 pivots = ma_pivots(node, type); 4667 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4668 goto retry; 4669 4670 if (mas->max >= max) { 4671 if (likely(mas->offset < mas->end)) 4672 pivot = pivots[mas->offset]; 4673 else 4674 pivot = mas->max; 4675 4676 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4677 goto retry; 4678 4679 if (pivot >= max) { /* Was at the limit, next will extend beyond */ 4680 mas->status = ma_overflow; 4681 return NULL; 4682 } 4683 } 4684 4685 if (likely(mas->offset < mas->end)) { 4686 mas->index = pivots[mas->offset] + 1; 4687 again: 4688 mas->offset++; 4689 if (likely(mas->offset < mas->end)) 4690 mas->last = pivots[mas->offset]; 4691 else 4692 mas->last = mas->max; 4693 } else { 4694 if (mas->last >= max) { 4695 mas->status = ma_overflow; 4696 return NULL; 4697 } 4698 4699 if (mas_next_node(mas, node, max)) { 4700 mas_rewalk(mas, save_point); 4701 goto retry; 4702 } 4703 4704 if (WARN_ON_ONCE(mas_is_overflow(mas))) 4705 return NULL; 4706 4707 mas->offset = 0; 4708 mas->index = mas->min; 4709 node = mas_mn(mas); 4710 type = mte_node_type(mas->node); 4711 pivots = ma_pivots(node, type); 4712 mas->last = pivots[0]; 4713 } 4714 4715 slots = ma_slots(node, type); 4716 entry = mt_slot(mas->tree, slots, mas->offset); 4717 if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) 4718 goto retry; 4719 4720 if (entry) 4721 return entry; 4722 4723 4724 if (!empty) { 4725 if (mas->last >= max) { 4726 mas->status = ma_overflow; 4727 return NULL; 4728 } 4729 4730 mas->index = mas->last + 1; 4731 goto again; 4732 } 4733 4734 return entry; 4735 } 4736 4737 /* 4738 * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the 4739 * highest gap address of a given size in a given node and descend. 4740 * @mas: The maple state 4741 * @size: The needed size. 4742 * 4743 * Return: True if found in a leaf, false otherwise. 4744 * 4745 */ mas_rev_awalk(struct ma_state * mas,unsigned long size,unsigned long * gap_min,unsigned long * gap_max)4746 static bool mas_rev_awalk(struct ma_state *mas, unsigned long size, 4747 unsigned long *gap_min, unsigned long *gap_max) 4748 { 4749 enum maple_type type = mte_node_type(mas->node); 4750 struct maple_node *node = mas_mn(mas); 4751 unsigned long *pivots, *gaps; 4752 void __rcu **slots; 4753 unsigned long gap = 0; 4754 unsigned long max, min; 4755 unsigned char offset; 4756 4757 if (unlikely(mas_is_err(mas))) 4758 return true; 4759 4760 if (ma_is_dense(type)) { 4761 /* dense nodes. */ 4762 mas->offset = (unsigned char)(mas->index - mas->min); 4763 return true; 4764 } 4765 4766 pivots = ma_pivots(node, type); 4767 slots = ma_slots(node, type); 4768 gaps = ma_gaps(node, type); 4769 offset = mas->offset; 4770 min = mas_safe_min(mas, pivots, offset); 4771 /* Skip out of bounds. */ 4772 while (mas->last < min) 4773 min = mas_safe_min(mas, pivots, --offset); 4774 4775 max = mas_safe_pivot(mas, pivots, offset, type); 4776 while (mas->index <= max) { 4777 gap = 0; 4778 if (gaps) 4779 gap = gaps[offset]; 4780 else if (!mas_slot(mas, slots, offset)) 4781 gap = max - min + 1; 4782 4783 if (gap) { 4784 if ((size <= gap) && (size <= mas->last - min + 1)) 4785 break; 4786 4787 if (!gaps) { 4788 /* Skip the next slot, it cannot be a gap. */ 4789 if (offset < 2) 4790 goto ascend; 4791 4792 offset -= 2; 4793 max = pivots[offset]; 4794 min = mas_safe_min(mas, pivots, offset); 4795 continue; 4796 } 4797 } 4798 4799 if (!offset) 4800 goto ascend; 4801 4802 offset--; 4803 max = min - 1; 4804 min = mas_safe_min(mas, pivots, offset); 4805 } 4806 4807 if (unlikely((mas->index > max) || (size - 1 > max - mas->index))) 4808 goto no_space; 4809 4810 if (unlikely(ma_is_leaf(type))) { 4811 mas->offset = offset; 4812 *gap_min = min; 4813 *gap_max = min + gap - 1; 4814 return true; 4815 } 4816 4817 /* descend, only happens under lock. */ 4818 mas->node = mas_slot(mas, slots, offset); 4819 mas->min = min; 4820 mas->max = max; 4821 mas->offset = mas_data_end(mas); 4822 return false; 4823 4824 ascend: 4825 if (!mte_is_root(mas->node)) 4826 return false; 4827 4828 no_space: 4829 mas_set_err(mas, -EBUSY); 4830 return false; 4831 } 4832 mas_anode_descend(struct ma_state * mas,unsigned long size)4833 static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) 4834 { 4835 enum maple_type type = mte_node_type(mas->node); 4836 unsigned long pivot, min, gap = 0; 4837 unsigned char offset, data_end; 4838 unsigned long *gaps, *pivots; 4839 void __rcu **slots; 4840 struct maple_node *node; 4841 bool found = false; 4842 4843 if (ma_is_dense(type)) { 4844 mas->offset = (unsigned char)(mas->index - mas->min); 4845 return true; 4846 } 4847 4848 node = mas_mn(mas); 4849 pivots = ma_pivots(node, type); 4850 slots = ma_slots(node, type); 4851 gaps = ma_gaps(node, type); 4852 offset = mas->offset; 4853 min = mas_safe_min(mas, pivots, offset); 4854 data_end = ma_data_end(node, type, pivots, mas->max); 4855 for (; offset <= data_end; offset++) { 4856 pivot = mas_safe_pivot(mas, pivots, offset, type); 4857 4858 /* Not within lower bounds */ 4859 if (mas->index > pivot) 4860 goto next_slot; 4861 4862 if (gaps) 4863 gap = gaps[offset]; 4864 else if (!mas_slot(mas, slots, offset)) 4865 gap = min(pivot, mas->last) - max(mas->index, min) + 1; 4866 else 4867 goto next_slot; 4868 4869 if (gap >= size) { 4870 if (ma_is_leaf(type)) { 4871 found = true; 4872 break; 4873 } 4874 4875 mas->node = mas_slot(mas, slots, offset); 4876 mas->min = min; 4877 mas->max = pivot; 4878 offset = 0; 4879 break; 4880 } 4881 next_slot: 4882 min = pivot + 1; 4883 if (mas->last <= pivot) { 4884 mas_set_err(mas, -EBUSY); 4885 return true; 4886 } 4887 } 4888 4889 mas->offset = offset; 4890 return found; 4891 } 4892 4893 /** 4894 * mas_walk() - Search for @mas->index in the tree. 4895 * @mas: The maple state. 4896 * 4897 * mas->index and mas->last will be set to the range if there is a value. If 4898 * mas->status is ma_none, reset to ma_start 4899 * 4900 * Return: the entry at the location or %NULL. 4901 */ mas_walk(struct ma_state * mas)4902 void *mas_walk(struct ma_state *mas) 4903 { 4904 void *entry; 4905 4906 if (!mas_is_active(mas) || !mas_is_start(mas)) 4907 mas->status = ma_start; 4908 retry: 4909 entry = mas_state_walk(mas); 4910 if (mas_is_start(mas)) { 4911 goto retry; 4912 } else if (mas_is_none(mas)) { 4913 mas->index = 0; 4914 mas->last = ULONG_MAX; 4915 } else if (mas_is_ptr(mas)) { 4916 if (!mas->index) { 4917 mas->last = 0; 4918 return entry; 4919 } 4920 4921 mas->index = 1; 4922 mas->last = ULONG_MAX; 4923 mas->status = ma_none; 4924 return NULL; 4925 } 4926 4927 return entry; 4928 } 4929 EXPORT_SYMBOL_GPL(mas_walk); 4930 mas_rewind_node(struct ma_state * mas)4931 static inline bool mas_rewind_node(struct ma_state *mas) 4932 { 4933 unsigned char slot; 4934 4935 do { 4936 if (mte_is_root(mas->node)) { 4937 slot = mas->offset; 4938 if (!slot) 4939 return false; 4940 } else { 4941 mas_ascend(mas); 4942 slot = mas->offset; 4943 } 4944 } while (!slot); 4945 4946 mas->offset = --slot; 4947 return true; 4948 } 4949 4950 /* 4951 * mas_skip_node() - Internal function. Skip over a node. 4952 * @mas: The maple state. 4953 * 4954 * Return: true if there is another node, false otherwise. 4955 */ mas_skip_node(struct ma_state * mas)4956 static inline bool mas_skip_node(struct ma_state *mas) 4957 { 4958 if (mas_is_err(mas)) 4959 return false; 4960 4961 do { 4962 if (mte_is_root(mas->node)) { 4963 if (mas->offset >= mas_data_end(mas)) { 4964 mas_set_err(mas, -EBUSY); 4965 return false; 4966 } 4967 } else { 4968 mas_ascend(mas); 4969 } 4970 } while (mas->offset >= mas_data_end(mas)); 4971 4972 mas->offset++; 4973 return true; 4974 } 4975 4976 /* 4977 * mas_awalk() - Allocation walk. Search from low address to high, for a gap of 4978 * @size 4979 * @mas: The maple state 4980 * @size: The size of the gap required 4981 * 4982 * Search between @mas->index and @mas->last for a gap of @size. 4983 */ mas_awalk(struct ma_state * mas,unsigned long size)4984 static inline void mas_awalk(struct ma_state *mas, unsigned long size) 4985 { 4986 struct maple_enode *last = NULL; 4987 4988 /* 4989 * There are 4 options: 4990 * go to child (descend) 4991 * go back to parent (ascend) 4992 * no gap found. (return, error == -EBUSY) 4993 * found the gap. (return) 4994 */ 4995 while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) { 4996 if (last == mas->node) 4997 mas_skip_node(mas); 4998 else 4999 last = mas->node; 5000 } 5001 } 5002 5003 /* 5004 * mas_sparse_area() - Internal function. Return upper or lower limit when 5005 * searching for a gap in an empty tree. 5006 * @mas: The maple state 5007 * @min: the minimum range 5008 * @max: The maximum range 5009 * @size: The size of the gap 5010 * @fwd: Searching forward or back 5011 */ mas_sparse_area(struct ma_state * mas,unsigned long min,unsigned long max,unsigned long size,bool fwd)5012 static inline int mas_sparse_area(struct ma_state *mas, unsigned long min, 5013 unsigned long max, unsigned long size, bool fwd) 5014 { 5015 if (!unlikely(mas_is_none(mas)) && min == 0) { 5016 min++; 5017 /* 5018 * At this time, min is increased, we need to recheck whether 5019 * the size is satisfied. 5020 */ 5021 if (min > max || max - min + 1 < size) 5022 return -EBUSY; 5023 } 5024 /* mas_is_ptr */ 5025 5026 if (fwd) { 5027 mas->index = min; 5028 mas->last = min + size - 1; 5029 } else { 5030 mas->last = max; 5031 mas->index = max - size + 1; 5032 } 5033 return 0; 5034 } 5035 5036 /* 5037 * mas_empty_area() - Get the lowest address within the range that is 5038 * sufficient for the size requested. 5039 * @mas: The maple state 5040 * @min: The lowest value of the range 5041 * @max: The highest value of the range 5042 * @size: The size needed 5043 */ mas_empty_area(struct ma_state * mas,unsigned long min,unsigned long max,unsigned long size)5044 int mas_empty_area(struct ma_state *mas, unsigned long min, 5045 unsigned long max, unsigned long size) 5046 { 5047 unsigned char offset; 5048 unsigned long *pivots; 5049 enum maple_type mt; 5050 struct maple_node *node; 5051 5052 if (min > max) 5053 return -EINVAL; 5054 5055 if (size == 0 || max - min < size - 1) 5056 return -EINVAL; 5057 5058 if (mas_is_start(mas)) 5059 mas_start(mas); 5060 else if (mas->offset >= 2) 5061 mas->offset -= 2; 5062 else if (!mas_skip_node(mas)) 5063 return -EBUSY; 5064 5065 /* Empty set */ 5066 if (mas_is_none(mas) || mas_is_ptr(mas)) 5067 return mas_sparse_area(mas, min, max, size, true); 5068 5069 /* The start of the window can only be within these values */ 5070 mas->index = min; 5071 mas->last = max; 5072 mas_awalk(mas, size); 5073 5074 if (unlikely(mas_is_err(mas))) 5075 return xa_err(mas->node); 5076 5077 offset = mas->offset; 5078 node = mas_mn(mas); 5079 mt = mte_node_type(mas->node); 5080 pivots = ma_pivots(node, mt); 5081 min = mas_safe_min(mas, pivots, offset); 5082 if (mas->index < min) 5083 mas->index = min; 5084 mas->last = mas->index + size - 1; 5085 mas->end = ma_data_end(node, mt, pivots, mas->max); 5086 return 0; 5087 } 5088 EXPORT_SYMBOL_GPL(mas_empty_area); 5089 5090 /* 5091 * mas_empty_area_rev() - Get the highest address within the range that is 5092 * sufficient for the size requested. 5093 * @mas: The maple state 5094 * @min: The lowest value of the range 5095 * @max: The highest value of the range 5096 * @size: The size needed 5097 */ mas_empty_area_rev(struct ma_state * mas,unsigned long min,unsigned long max,unsigned long size)5098 int mas_empty_area_rev(struct ma_state *mas, unsigned long min, 5099 unsigned long max, unsigned long size) 5100 { 5101 struct maple_enode *last = mas->node; 5102 5103 if (min > max) 5104 return -EINVAL; 5105 5106 if (size == 0 || max - min < size - 1) 5107 return -EINVAL; 5108 5109 if (mas_is_start(mas)) 5110 mas_start(mas); 5111 else if ((mas->offset < 2) && (!mas_rewind_node(mas))) 5112 return -EBUSY; 5113 5114 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) 5115 return mas_sparse_area(mas, min, max, size, false); 5116 else if (mas->offset >= 2) 5117 mas->offset -= 2; 5118 else 5119 mas->offset = mas_data_end(mas); 5120 5121 5122 /* The start of the window can only be within these values. */ 5123 mas->index = min; 5124 mas->last = max; 5125 5126 while (!mas_rev_awalk(mas, size, &min, &max)) { 5127 if (last == mas->node) { 5128 if (!mas_rewind_node(mas)) 5129 return -EBUSY; 5130 } else { 5131 last = mas->node; 5132 } 5133 } 5134 5135 if (mas_is_err(mas)) 5136 return xa_err(mas->node); 5137 5138 if (unlikely(mas->offset == MAPLE_NODE_SLOTS)) 5139 return -EBUSY; 5140 5141 /* Trim the upper limit to the max. */ 5142 if (max < mas->last) 5143 mas->last = max; 5144 5145 mas->index = mas->last - size + 1; 5146 mas->end = mas_data_end(mas); 5147 return 0; 5148 } 5149 EXPORT_SYMBOL_GPL(mas_empty_area_rev); 5150 5151 /* 5152 * mte_dead_leaves() - Mark all leaves of a node as dead. 5153 * @enode: the encoded node 5154 * @mt: the maple tree 5155 * @slots: Pointer to the slot array 5156 * 5157 * Must hold the write lock. 5158 * 5159 * Return: The number of leaves marked as dead. 5160 */ 5161 static inline mte_dead_leaves(struct maple_enode * enode,struct maple_tree * mt,void __rcu ** slots)5162 unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt, 5163 void __rcu **slots) 5164 { 5165 struct maple_node *node; 5166 enum maple_type type; 5167 void *entry; 5168 int offset; 5169 5170 for (offset = 0; offset < mt_slot_count(enode); offset++) { 5171 entry = mt_slot(mt, slots, offset); 5172 type = mte_node_type(entry); 5173 node = mte_to_node(entry); 5174 /* Use both node and type to catch LE & BE metadata */ 5175 if (!node || !type) 5176 break; 5177 5178 mte_set_node_dead(entry); 5179 node->type = type; 5180 rcu_assign_pointer(slots[offset], node); 5181 } 5182 5183 return offset; 5184 } 5185 5186 /** 5187 * mte_dead_walk() - Walk down a dead tree to just before the leaves 5188 * @enode: The maple encoded node 5189 * @offset: The starting offset 5190 * 5191 * Note: This can only be used from the RCU callback context. 5192 */ mte_dead_walk(struct maple_enode ** enode,unsigned char offset)5193 static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset) 5194 { 5195 struct maple_node *node, *next; 5196 void __rcu **slots = NULL; 5197 5198 next = mte_to_node(*enode); 5199 do { 5200 *enode = ma_enode_ptr(next); 5201 node = mte_to_node(*enode); 5202 slots = ma_slots(node, node->type); 5203 next = rcu_dereference_protected(slots[offset], 5204 lock_is_held(&rcu_callback_map)); 5205 offset = 0; 5206 } while (!ma_is_leaf(next->type)); 5207 5208 return slots; 5209 } 5210 5211 /** 5212 * mt_free_walk() - Walk & free a tree in the RCU callback context 5213 * @head: The RCU head that's within the node. 5214 * 5215 * Note: This can only be used from the RCU callback context. 5216 */ mt_free_walk(struct rcu_head * head)5217 static void mt_free_walk(struct rcu_head *head) 5218 { 5219 void __rcu **slots; 5220 struct maple_node *node, *start; 5221 struct maple_enode *enode; 5222 unsigned char offset; 5223 enum maple_type type; 5224 5225 node = container_of(head, struct maple_node, rcu); 5226 5227 if (ma_is_leaf(node->type)) 5228 goto free_leaf; 5229 5230 start = node; 5231 enode = mt_mk_node(node, node->type); 5232 slots = mte_dead_walk(&enode, 0); 5233 node = mte_to_node(enode); 5234 do { 5235 mt_free_bulk(node->slot_len, slots); 5236 offset = node->parent_slot + 1; 5237 enode = node->piv_parent; 5238 if (mte_to_node(enode) == node) 5239 goto free_leaf; 5240 5241 type = mte_node_type(enode); 5242 slots = ma_slots(mte_to_node(enode), type); 5243 if ((offset < mt_slots[type]) && 5244 rcu_dereference_protected(slots[offset], 5245 lock_is_held(&rcu_callback_map))) 5246 slots = mte_dead_walk(&enode, offset); 5247 node = mte_to_node(enode); 5248 } while ((node != start) || (node->slot_len < offset)); 5249 5250 slots = ma_slots(node, node->type); 5251 mt_free_bulk(node->slot_len, slots); 5252 5253 free_leaf: 5254 mt_free_rcu(&node->rcu); 5255 } 5256 mte_destroy_descend(struct maple_enode ** enode,struct maple_tree * mt,struct maple_enode * prev,unsigned char offset)5257 static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, 5258 struct maple_tree *mt, struct maple_enode *prev, unsigned char offset) 5259 { 5260 struct maple_node *node; 5261 struct maple_enode *next = *enode; 5262 void __rcu **slots = NULL; 5263 enum maple_type type; 5264 unsigned char next_offset = 0; 5265 5266 do { 5267 *enode = next; 5268 node = mte_to_node(*enode); 5269 type = mte_node_type(*enode); 5270 slots = ma_slots(node, type); 5271 next = mt_slot_locked(mt, slots, next_offset); 5272 if ((mte_dead_node(next))) 5273 next = mt_slot_locked(mt, slots, ++next_offset); 5274 5275 mte_set_node_dead(*enode); 5276 node->type = type; 5277 node->piv_parent = prev; 5278 node->parent_slot = offset; 5279 offset = next_offset; 5280 next_offset = 0; 5281 prev = *enode; 5282 } while (!mte_is_leaf(next)); 5283 5284 return slots; 5285 } 5286 mt_destroy_walk(struct maple_enode * enode,struct maple_tree * mt,bool free)5287 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, 5288 bool free) 5289 { 5290 void __rcu **slots; 5291 struct maple_node *node = mte_to_node(enode); 5292 struct maple_enode *start; 5293 5294 if (mte_is_leaf(enode)) { 5295 node->type = mte_node_type(enode); 5296 goto free_leaf; 5297 } 5298 5299 start = enode; 5300 slots = mte_destroy_descend(&enode, mt, start, 0); 5301 node = mte_to_node(enode); // Updated in the above call. 5302 do { 5303 enum maple_type type; 5304 unsigned char offset; 5305 struct maple_enode *parent, *tmp; 5306 5307 node->slot_len = mte_dead_leaves(enode, mt, slots); 5308 if (free) 5309 mt_free_bulk(node->slot_len, slots); 5310 offset = node->parent_slot + 1; 5311 enode = node->piv_parent; 5312 if (mte_to_node(enode) == node) 5313 goto free_leaf; 5314 5315 type = mte_node_type(enode); 5316 slots = ma_slots(mte_to_node(enode), type); 5317 if (offset >= mt_slots[type]) 5318 goto next; 5319 5320 tmp = mt_slot_locked(mt, slots, offset); 5321 if (mte_node_type(tmp) && mte_to_node(tmp)) { 5322 parent = enode; 5323 enode = tmp; 5324 slots = mte_destroy_descend(&enode, mt, parent, offset); 5325 } 5326 next: 5327 node = mte_to_node(enode); 5328 } while (start != enode); 5329 5330 node = mte_to_node(enode); 5331 node->slot_len = mte_dead_leaves(enode, mt, slots); 5332 if (free) 5333 mt_free_bulk(node->slot_len, slots); 5334 5335 free_leaf: 5336 if (free) 5337 mt_free_rcu(&node->rcu); 5338 else 5339 mt_clear_meta(mt, node, node->type); 5340 } 5341 5342 /* 5343 * mte_destroy_walk() - Free a tree or sub-tree. 5344 * @enode: the encoded maple node (maple_enode) to start 5345 * @mt: the tree to free - needed for node types. 5346 * 5347 * Must hold the write lock. 5348 */ mte_destroy_walk(struct maple_enode * enode,struct maple_tree * mt)5349 static inline void mte_destroy_walk(struct maple_enode *enode, 5350 struct maple_tree *mt) 5351 { 5352 struct maple_node *node = mte_to_node(enode); 5353 5354 if (mt_in_rcu(mt)) { 5355 mt_destroy_walk(enode, mt, false); 5356 call_rcu(&node->rcu, mt_free_walk); 5357 } else { 5358 mt_destroy_walk(enode, mt, true); 5359 } 5360 } 5361 /* Interface */ 5362 5363 /** 5364 * mas_store() - Store an @entry. 5365 * @mas: The maple state. 5366 * @entry: The entry to store. 5367 * 5368 * The @mas->index and @mas->last is used to set the range for the @entry. 5369 * 5370 * Return: the first entry between mas->index and mas->last or %NULL. 5371 */ mas_store(struct ma_state * mas,void * entry)5372 void *mas_store(struct ma_state *mas, void *entry) 5373 { 5374 int request; 5375 MA_WR_STATE(wr_mas, mas, entry); 5376 5377 trace_ma_write(__func__, mas, 0, entry); 5378 #ifdef CONFIG_DEBUG_MAPLE_TREE 5379 if (MAS_WARN_ON(mas, mas->index > mas->last)) 5380 pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last, 5381 entry); 5382 5383 if (mas->index > mas->last) { 5384 mas_set_err(mas, -EINVAL); 5385 return NULL; 5386 } 5387 5388 #endif 5389 5390 /* 5391 * Storing is the same operation as insert with the added caveat that it 5392 * can overwrite entries. Although this seems simple enough, one may 5393 * want to examine what happens if a single store operation was to 5394 * overwrite multiple entries within a self-balancing B-Tree. 5395 */ 5396 mas_wr_prealloc_setup(&wr_mas); 5397 mas->store_type = mas_wr_store_type(&wr_mas); 5398 if (mas->mas_flags & MA_STATE_PREALLOC) { 5399 mas_wr_store_entry(&wr_mas); 5400 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5401 return wr_mas.content; 5402 } 5403 5404 request = mas_prealloc_calc(mas, entry); 5405 if (!request) 5406 goto store; 5407 5408 mas_node_count(mas, request); 5409 if (mas_is_err(mas)) 5410 return NULL; 5411 5412 store: 5413 mas_wr_store_entry(&wr_mas); 5414 mas_destroy(mas); 5415 return wr_mas.content; 5416 } 5417 EXPORT_SYMBOL_GPL(mas_store); 5418 5419 /** 5420 * mas_store_gfp() - Store a value into the tree. 5421 * @mas: The maple state 5422 * @entry: The entry to store 5423 * @gfp: The GFP_FLAGS to use for allocations if necessary. 5424 * 5425 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 5426 * be allocated. 5427 */ mas_store_gfp(struct ma_state * mas,void * entry,gfp_t gfp)5428 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp) 5429 { 5430 unsigned long index = mas->index; 5431 unsigned long last = mas->last; 5432 MA_WR_STATE(wr_mas, mas, entry); 5433 int ret = 0; 5434 5435 retry: 5436 mas_wr_preallocate(&wr_mas, entry); 5437 if (unlikely(mas_nomem(mas, gfp))) { 5438 if (!entry) 5439 __mas_set_range(mas, index, last); 5440 goto retry; 5441 } 5442 5443 if (mas_is_err(mas)) { 5444 ret = xa_err(mas->node); 5445 goto out; 5446 } 5447 5448 mas_wr_store_entry(&wr_mas); 5449 out: 5450 mas_destroy(mas); 5451 return ret; 5452 } 5453 EXPORT_SYMBOL_GPL(mas_store_gfp); 5454 5455 /** 5456 * mas_store_prealloc() - Store a value into the tree using memory 5457 * preallocated in the maple state. 5458 * @mas: The maple state 5459 * @entry: The entry to store. 5460 */ mas_store_prealloc(struct ma_state * mas,void * entry)5461 void mas_store_prealloc(struct ma_state *mas, void *entry) 5462 { 5463 MA_WR_STATE(wr_mas, mas, entry); 5464 5465 if (mas->store_type == wr_store_root) { 5466 mas_wr_prealloc_setup(&wr_mas); 5467 goto store; 5468 } 5469 5470 mas_wr_walk_descend(&wr_mas); 5471 if (mas->store_type != wr_spanning_store) { 5472 /* set wr_mas->content to current slot */ 5473 wr_mas.content = mas_slot_locked(mas, wr_mas.slots, mas->offset); 5474 mas_wr_end_piv(&wr_mas); 5475 } 5476 5477 store: 5478 trace_ma_write(__func__, mas, 0, entry); 5479 mas_wr_store_entry(&wr_mas); 5480 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5481 mas_destroy(mas); 5482 } 5483 EXPORT_SYMBOL_GPL(mas_store_prealloc); 5484 5485 /** 5486 * mas_preallocate() - Preallocate enough nodes for a store operation 5487 * @mas: The maple state 5488 * @entry: The entry that will be stored 5489 * @gfp: The GFP_FLAGS to use for allocations. 5490 * 5491 * Return: 0 on success, -ENOMEM if memory could not be allocated. 5492 */ mas_preallocate(struct ma_state * mas,void * entry,gfp_t gfp)5493 int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) 5494 { 5495 MA_WR_STATE(wr_mas, mas, entry); 5496 int ret = 0; 5497 int request; 5498 5499 mas_wr_prealloc_setup(&wr_mas); 5500 mas->store_type = mas_wr_store_type(&wr_mas); 5501 request = mas_prealloc_calc(mas, entry); 5502 if (!request) 5503 return ret; 5504 5505 mas_node_count_gfp(mas, request, gfp); 5506 if (mas_is_err(mas)) { 5507 mas_set_alloc_req(mas, 0); 5508 ret = xa_err(mas->node); 5509 mas_destroy(mas); 5510 mas_reset(mas); 5511 return ret; 5512 } 5513 5514 mas->mas_flags |= MA_STATE_PREALLOC; 5515 return ret; 5516 } 5517 EXPORT_SYMBOL_GPL(mas_preallocate); 5518 5519 /* 5520 * mas_destroy() - destroy a maple state. 5521 * @mas: The maple state 5522 * 5523 * Upon completion, check the left-most node and rebalance against the node to 5524 * the right if necessary. Frees any allocated nodes associated with this maple 5525 * state. 5526 */ mas_destroy(struct ma_state * mas)5527 void mas_destroy(struct ma_state *mas) 5528 { 5529 struct maple_alloc *node; 5530 unsigned long total; 5531 5532 /* 5533 * When using mas_for_each() to insert an expected number of elements, 5534 * it is possible that the number inserted is less than the expected 5535 * number. To fix an invalid final node, a check is performed here to 5536 * rebalance the previous node with the final node. 5537 */ 5538 if (mas->mas_flags & MA_STATE_REBALANCE) { 5539 unsigned char end; 5540 if (mas_is_err(mas)) 5541 mas_reset(mas); 5542 mas_start(mas); 5543 mtree_range_walk(mas); 5544 end = mas->end + 1; 5545 if (end < mt_min_slot_count(mas->node) - 1) 5546 mas_destroy_rebalance(mas, end); 5547 5548 mas->mas_flags &= ~MA_STATE_REBALANCE; 5549 } 5550 mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC); 5551 5552 total = mas_allocated(mas); 5553 while (total) { 5554 node = mas->alloc; 5555 mas->alloc = node->slot[0]; 5556 if (node->node_count > 1) { 5557 size_t count = node->node_count - 1; 5558 5559 mt_free_bulk(count, (void __rcu **)&node->slot[1]); 5560 total -= count; 5561 } 5562 mt_free_one(ma_mnode_ptr(node)); 5563 total--; 5564 } 5565 5566 mas->alloc = NULL; 5567 } 5568 EXPORT_SYMBOL_GPL(mas_destroy); 5569 5570 /* 5571 * mas_expected_entries() - Set the expected number of entries that will be inserted. 5572 * @mas: The maple state 5573 * @nr_entries: The number of expected entries. 5574 * 5575 * This will attempt to pre-allocate enough nodes to store the expected number 5576 * of entries. The allocations will occur using the bulk allocator interface 5577 * for speed. Please call mas_destroy() on the @mas after inserting the entries 5578 * to ensure any unused nodes are freed. 5579 * 5580 * Return: 0 on success, -ENOMEM if memory could not be allocated. 5581 */ mas_expected_entries(struct ma_state * mas,unsigned long nr_entries)5582 int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries) 5583 { 5584 int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2; 5585 struct maple_enode *enode = mas->node; 5586 int nr_nodes; 5587 int ret; 5588 5589 /* 5590 * Sometimes it is necessary to duplicate a tree to a new tree, such as 5591 * forking a process and duplicating the VMAs from one tree to a new 5592 * tree. When such a situation arises, it is known that the new tree is 5593 * not going to be used until the entire tree is populated. For 5594 * performance reasons, it is best to use a bulk load with RCU disabled. 5595 * This allows for optimistic splitting that favours the left and reuse 5596 * of nodes during the operation. 5597 */ 5598 5599 /* Optimize splitting for bulk insert in-order */ 5600 mas->mas_flags |= MA_STATE_BULK; 5601 5602 /* 5603 * Avoid overflow, assume a gap between each entry and a trailing null. 5604 * If this is wrong, it just means allocation can happen during 5605 * insertion of entries. 5606 */ 5607 nr_nodes = max(nr_entries, nr_entries * 2 + 1); 5608 if (!mt_is_alloc(mas->tree)) 5609 nonleaf_cap = MAPLE_RANGE64_SLOTS - 2; 5610 5611 /* Leaves; reduce slots to keep space for expansion */ 5612 nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2); 5613 /* Internal nodes */ 5614 nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap); 5615 /* Add working room for split (2 nodes) + new parents */ 5616 mas_node_count_gfp(mas, nr_nodes + 3, GFP_KERNEL); 5617 5618 /* Detect if allocations run out */ 5619 mas->mas_flags |= MA_STATE_PREALLOC; 5620 5621 if (!mas_is_err(mas)) 5622 return 0; 5623 5624 ret = xa_err(mas->node); 5625 mas->node = enode; 5626 mas_destroy(mas); 5627 return ret; 5628 5629 } 5630 EXPORT_SYMBOL_GPL(mas_expected_entries); 5631 mas_next_setup(struct ma_state * mas,unsigned long max,void ** entry)5632 static bool mas_next_setup(struct ma_state *mas, unsigned long max, 5633 void **entry) 5634 { 5635 bool was_none = mas_is_none(mas); 5636 5637 if (unlikely(mas->last >= max)) { 5638 mas->status = ma_overflow; 5639 return true; 5640 } 5641 5642 switch (mas->status) { 5643 case ma_active: 5644 return false; 5645 case ma_none: 5646 fallthrough; 5647 case ma_pause: 5648 mas->status = ma_start; 5649 fallthrough; 5650 case ma_start: 5651 mas_walk(mas); /* Retries on dead nodes handled by mas_walk */ 5652 break; 5653 case ma_overflow: 5654 /* Overflowed before, but the max changed */ 5655 mas->status = ma_active; 5656 break; 5657 case ma_underflow: 5658 /* The user expects the mas to be one before where it is */ 5659 mas->status = ma_active; 5660 *entry = mas_walk(mas); 5661 if (*entry) 5662 return true; 5663 break; 5664 case ma_root: 5665 break; 5666 case ma_error: 5667 return true; 5668 } 5669 5670 if (likely(mas_is_active(mas))) /* Fast path */ 5671 return false; 5672 5673 if (mas_is_ptr(mas)) { 5674 *entry = NULL; 5675 if (was_none && mas->index == 0) { 5676 mas->index = mas->last = 0; 5677 return true; 5678 } 5679 mas->index = 1; 5680 mas->last = ULONG_MAX; 5681 mas->status = ma_none; 5682 return true; 5683 } 5684 5685 if (mas_is_none(mas)) 5686 return true; 5687 5688 return false; 5689 } 5690 5691 /** 5692 * mas_next() - Get the next entry. 5693 * @mas: The maple state 5694 * @max: The maximum index to check. 5695 * 5696 * Returns the next entry after @mas->index. 5697 * Must hold rcu_read_lock or the write lock. 5698 * Can return the zero entry. 5699 * 5700 * Return: The next entry or %NULL 5701 */ mas_next(struct ma_state * mas,unsigned long max)5702 void *mas_next(struct ma_state *mas, unsigned long max) 5703 { 5704 void *entry = NULL; 5705 5706 if (mas_next_setup(mas, max, &entry)) 5707 return entry; 5708 5709 /* Retries on dead nodes handled by mas_next_slot */ 5710 return mas_next_slot(mas, max, false); 5711 } 5712 EXPORT_SYMBOL_GPL(mas_next); 5713 5714 /** 5715 * mas_next_range() - Advance the maple state to the next range 5716 * @mas: The maple state 5717 * @max: The maximum index to check. 5718 * 5719 * Sets @mas->index and @mas->last to the range. 5720 * Must hold rcu_read_lock or the write lock. 5721 * Can return the zero entry. 5722 * 5723 * Return: The next entry or %NULL 5724 */ mas_next_range(struct ma_state * mas,unsigned long max)5725 void *mas_next_range(struct ma_state *mas, unsigned long max) 5726 { 5727 void *entry = NULL; 5728 5729 if (mas_next_setup(mas, max, &entry)) 5730 return entry; 5731 5732 /* Retries on dead nodes handled by mas_next_slot */ 5733 return mas_next_slot(mas, max, true); 5734 } 5735 EXPORT_SYMBOL_GPL(mas_next_range); 5736 5737 /** 5738 * mt_next() - get the next value in the maple tree 5739 * @mt: The maple tree 5740 * @index: The start index 5741 * @max: The maximum index to check 5742 * 5743 * Takes RCU read lock internally to protect the search, which does not 5744 * protect the returned pointer after dropping RCU read lock. 5745 * See also: Documentation/core-api/maple_tree.rst 5746 * 5747 * Return: The entry higher than @index or %NULL if nothing is found. 5748 */ mt_next(struct maple_tree * mt,unsigned long index,unsigned long max)5749 void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max) 5750 { 5751 void *entry = NULL; 5752 MA_STATE(mas, mt, index, index); 5753 5754 rcu_read_lock(); 5755 entry = mas_next(&mas, max); 5756 rcu_read_unlock(); 5757 return entry; 5758 } 5759 EXPORT_SYMBOL_GPL(mt_next); 5760 mas_prev_setup(struct ma_state * mas,unsigned long min,void ** entry)5761 static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry) 5762 { 5763 if (unlikely(mas->index <= min)) { 5764 mas->status = ma_underflow; 5765 return true; 5766 } 5767 5768 switch (mas->status) { 5769 case ma_active: 5770 return false; 5771 case ma_start: 5772 break; 5773 case ma_none: 5774 fallthrough; 5775 case ma_pause: 5776 mas->status = ma_start; 5777 break; 5778 case ma_underflow: 5779 /* underflowed before but the min changed */ 5780 mas->status = ma_active; 5781 break; 5782 case ma_overflow: 5783 /* User expects mas to be one after where it is */ 5784 mas->status = ma_active; 5785 *entry = mas_walk(mas); 5786 if (*entry) 5787 return true; 5788 break; 5789 case ma_root: 5790 break; 5791 case ma_error: 5792 return true; 5793 } 5794 5795 if (mas_is_start(mas)) 5796 mas_walk(mas); 5797 5798 if (unlikely(mas_is_ptr(mas))) { 5799 if (!mas->index) { 5800 mas->status = ma_none; 5801 return true; 5802 } 5803 mas->index = mas->last = 0; 5804 *entry = mas_root(mas); 5805 return true; 5806 } 5807 5808 if (mas_is_none(mas)) { 5809 if (mas->index) { 5810 /* Walked to out-of-range pointer? */ 5811 mas->index = mas->last = 0; 5812 mas->status = ma_root; 5813 *entry = mas_root(mas); 5814 return true; 5815 } 5816 return true; 5817 } 5818 5819 return false; 5820 } 5821 5822 /** 5823 * mas_prev() - Get the previous entry 5824 * @mas: The maple state 5825 * @min: The minimum value to check. 5826 * 5827 * Must hold rcu_read_lock or the write lock. 5828 * Will reset mas to ma_start if the status is ma_none. Will stop on not 5829 * searchable nodes. 5830 * 5831 * Return: the previous value or %NULL. 5832 */ mas_prev(struct ma_state * mas,unsigned long min)5833 void *mas_prev(struct ma_state *mas, unsigned long min) 5834 { 5835 void *entry = NULL; 5836 5837 if (mas_prev_setup(mas, min, &entry)) 5838 return entry; 5839 5840 return mas_prev_slot(mas, min, false); 5841 } 5842 EXPORT_SYMBOL_GPL(mas_prev); 5843 5844 /** 5845 * mas_prev_range() - Advance to the previous range 5846 * @mas: The maple state 5847 * @min: The minimum value to check. 5848 * 5849 * Sets @mas->index and @mas->last to the range. 5850 * Must hold rcu_read_lock or the write lock. 5851 * Will reset mas to ma_start if the node is ma_none. Will stop on not 5852 * searchable nodes. 5853 * 5854 * Return: the previous value or %NULL. 5855 */ mas_prev_range(struct ma_state * mas,unsigned long min)5856 void *mas_prev_range(struct ma_state *mas, unsigned long min) 5857 { 5858 void *entry = NULL; 5859 5860 if (mas_prev_setup(mas, min, &entry)) 5861 return entry; 5862 5863 return mas_prev_slot(mas, min, true); 5864 } 5865 EXPORT_SYMBOL_GPL(mas_prev_range); 5866 5867 /** 5868 * mt_prev() - get the previous value in the maple tree 5869 * @mt: The maple tree 5870 * @index: The start index 5871 * @min: The minimum index to check 5872 * 5873 * Takes RCU read lock internally to protect the search, which does not 5874 * protect the returned pointer after dropping RCU read lock. 5875 * See also: Documentation/core-api/maple_tree.rst 5876 * 5877 * Return: The entry before @index or %NULL if nothing is found. 5878 */ mt_prev(struct maple_tree * mt,unsigned long index,unsigned long min)5879 void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min) 5880 { 5881 void *entry = NULL; 5882 MA_STATE(mas, mt, index, index); 5883 5884 rcu_read_lock(); 5885 entry = mas_prev(&mas, min); 5886 rcu_read_unlock(); 5887 return entry; 5888 } 5889 EXPORT_SYMBOL_GPL(mt_prev); 5890 5891 /** 5892 * mas_pause() - Pause a mas_find/mas_for_each to drop the lock. 5893 * @mas: The maple state to pause 5894 * 5895 * Some users need to pause a walk and drop the lock they're holding in 5896 * order to yield to a higher priority thread or carry out an operation 5897 * on an entry. Those users should call this function before they drop 5898 * the lock. It resets the @mas to be suitable for the next iteration 5899 * of the loop after the user has reacquired the lock. If most entries 5900 * found during a walk require you to call mas_pause(), the mt_for_each() 5901 * iterator may be more appropriate. 5902 * 5903 */ mas_pause(struct ma_state * mas)5904 void mas_pause(struct ma_state *mas) 5905 { 5906 mas->status = ma_pause; 5907 mas->node = NULL; 5908 } 5909 EXPORT_SYMBOL_GPL(mas_pause); 5910 5911 /** 5912 * mas_find_setup() - Internal function to set up mas_find*(). 5913 * @mas: The maple state 5914 * @max: The maximum index 5915 * @entry: Pointer to the entry 5916 * 5917 * Returns: True if entry is the answer, false otherwise. 5918 */ mas_find_setup(struct ma_state * mas,unsigned long max,void ** entry)5919 static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long max, void **entry) 5920 { 5921 switch (mas->status) { 5922 case ma_active: 5923 if (mas->last < max) 5924 return false; 5925 return true; 5926 case ma_start: 5927 break; 5928 case ma_pause: 5929 if (unlikely(mas->last >= max)) 5930 return true; 5931 5932 mas->index = ++mas->last; 5933 mas->status = ma_start; 5934 break; 5935 case ma_none: 5936 if (unlikely(mas->last >= max)) 5937 return true; 5938 5939 mas->index = mas->last; 5940 mas->status = ma_start; 5941 break; 5942 case ma_underflow: 5943 /* mas is pointing at entry before unable to go lower */ 5944 if (unlikely(mas->index >= max)) { 5945 mas->status = ma_overflow; 5946 return true; 5947 } 5948 5949 mas->status = ma_active; 5950 *entry = mas_walk(mas); 5951 if (*entry) 5952 return true; 5953 break; 5954 case ma_overflow: 5955 if (unlikely(mas->last >= max)) 5956 return true; 5957 5958 mas->status = ma_active; 5959 *entry = mas_walk(mas); 5960 if (*entry) 5961 return true; 5962 break; 5963 case ma_root: 5964 break; 5965 case ma_error: 5966 return true; 5967 } 5968 5969 if (mas_is_start(mas)) { 5970 /* First run or continue */ 5971 if (mas->index > max) 5972 return true; 5973 5974 *entry = mas_walk(mas); 5975 if (*entry) 5976 return true; 5977 5978 } 5979 5980 if (unlikely(mas_is_ptr(mas))) 5981 goto ptr_out_of_range; 5982 5983 if (unlikely(mas_is_none(mas))) 5984 return true; 5985 5986 if (mas->index == max) 5987 return true; 5988 5989 return false; 5990 5991 ptr_out_of_range: 5992 mas->status = ma_none; 5993 mas->index = 1; 5994 mas->last = ULONG_MAX; 5995 return true; 5996 } 5997 5998 /** 5999 * mas_find() - On the first call, find the entry at or after mas->index up to 6000 * %max. Otherwise, find the entry after mas->index. 6001 * @mas: The maple state 6002 * @max: The maximum value to check. 6003 * 6004 * Must hold rcu_read_lock or the write lock. 6005 * If an entry exists, last and index are updated accordingly. 6006 * May set @mas->status to ma_overflow. 6007 * 6008 * Return: The entry or %NULL. 6009 */ mas_find(struct ma_state * mas,unsigned long max)6010 void *mas_find(struct ma_state *mas, unsigned long max) 6011 { 6012 void *entry = NULL; 6013 6014 if (mas_find_setup(mas, max, &entry)) 6015 return entry; 6016 6017 /* Retries on dead nodes handled by mas_next_slot */ 6018 entry = mas_next_slot(mas, max, false); 6019 /* Ignore overflow */ 6020 mas->status = ma_active; 6021 return entry; 6022 } 6023 EXPORT_SYMBOL_GPL(mas_find); 6024 6025 /** 6026 * mas_find_range() - On the first call, find the entry at or after 6027 * mas->index up to %max. Otherwise, advance to the next slot mas->index. 6028 * @mas: The maple state 6029 * @max: The maximum value to check. 6030 * 6031 * Must hold rcu_read_lock or the write lock. 6032 * If an entry exists, last and index are updated accordingly. 6033 * May set @mas->status to ma_overflow. 6034 * 6035 * Return: The entry or %NULL. 6036 */ mas_find_range(struct ma_state * mas,unsigned long max)6037 void *mas_find_range(struct ma_state *mas, unsigned long max) 6038 { 6039 void *entry = NULL; 6040 6041 if (mas_find_setup(mas, max, &entry)) 6042 return entry; 6043 6044 /* Retries on dead nodes handled by mas_next_slot */ 6045 return mas_next_slot(mas, max, true); 6046 } 6047 EXPORT_SYMBOL_GPL(mas_find_range); 6048 6049 /** 6050 * mas_find_rev_setup() - Internal function to set up mas_find_*_rev() 6051 * @mas: The maple state 6052 * @min: The minimum index 6053 * @entry: Pointer to the entry 6054 * 6055 * Returns: True if entry is the answer, false otherwise. 6056 */ mas_find_rev_setup(struct ma_state * mas,unsigned long min,void ** entry)6057 static bool mas_find_rev_setup(struct ma_state *mas, unsigned long min, 6058 void **entry) 6059 { 6060 6061 switch (mas->status) { 6062 case ma_active: 6063 goto active; 6064 case ma_start: 6065 break; 6066 case ma_pause: 6067 if (unlikely(mas->index <= min)) { 6068 mas->status = ma_underflow; 6069 return true; 6070 } 6071 mas->last = --mas->index; 6072 mas->status = ma_start; 6073 break; 6074 case ma_none: 6075 if (mas->index <= min) 6076 goto none; 6077 6078 mas->last = mas->index; 6079 mas->status = ma_start; 6080 break; 6081 case ma_overflow: /* user expects the mas to be one after where it is */ 6082 if (unlikely(mas->index <= min)) { 6083 mas->status = ma_underflow; 6084 return true; 6085 } 6086 6087 mas->status = ma_active; 6088 break; 6089 case ma_underflow: /* user expects the mas to be one before where it is */ 6090 if (unlikely(mas->index <= min)) 6091 return true; 6092 6093 mas->status = ma_active; 6094 break; 6095 case ma_root: 6096 break; 6097 case ma_error: 6098 return true; 6099 } 6100 6101 if (mas_is_start(mas)) { 6102 /* First run or continue */ 6103 if (mas->index < min) 6104 return true; 6105 6106 *entry = mas_walk(mas); 6107 if (*entry) 6108 return true; 6109 } 6110 6111 if (unlikely(mas_is_ptr(mas))) 6112 goto none; 6113 6114 if (unlikely(mas_is_none(mas))) { 6115 /* 6116 * Walked to the location, and there was nothing so the previous 6117 * location is 0. 6118 */ 6119 mas->last = mas->index = 0; 6120 mas->status = ma_root; 6121 *entry = mas_root(mas); 6122 return true; 6123 } 6124 6125 active: 6126 if (mas->index < min) 6127 return true; 6128 6129 return false; 6130 6131 none: 6132 mas->status = ma_none; 6133 return true; 6134 } 6135 6136 /** 6137 * mas_find_rev: On the first call, find the first non-null entry at or below 6138 * mas->index down to %min. Otherwise find the first non-null entry below 6139 * mas->index down to %min. 6140 * @mas: The maple state 6141 * @min: The minimum value to check. 6142 * 6143 * Must hold rcu_read_lock or the write lock. 6144 * If an entry exists, last and index are updated accordingly. 6145 * May set @mas->status to ma_underflow. 6146 * 6147 * Return: The entry or %NULL. 6148 */ mas_find_rev(struct ma_state * mas,unsigned long min)6149 void *mas_find_rev(struct ma_state *mas, unsigned long min) 6150 { 6151 void *entry = NULL; 6152 6153 if (mas_find_rev_setup(mas, min, &entry)) 6154 return entry; 6155 6156 /* Retries on dead nodes handled by mas_prev_slot */ 6157 return mas_prev_slot(mas, min, false); 6158 6159 } 6160 EXPORT_SYMBOL_GPL(mas_find_rev); 6161 6162 /** 6163 * mas_find_range_rev: On the first call, find the first non-null entry at or 6164 * below mas->index down to %min. Otherwise advance to the previous slot after 6165 * mas->index down to %min. 6166 * @mas: The maple state 6167 * @min: The minimum value to check. 6168 * 6169 * Must hold rcu_read_lock or the write lock. 6170 * If an entry exists, last and index are updated accordingly. 6171 * May set @mas->status to ma_underflow. 6172 * 6173 * Return: The entry or %NULL. 6174 */ mas_find_range_rev(struct ma_state * mas,unsigned long min)6175 void *mas_find_range_rev(struct ma_state *mas, unsigned long min) 6176 { 6177 void *entry = NULL; 6178 6179 if (mas_find_rev_setup(mas, min, &entry)) 6180 return entry; 6181 6182 /* Retries on dead nodes handled by mas_prev_slot */ 6183 return mas_prev_slot(mas, min, true); 6184 } 6185 EXPORT_SYMBOL_GPL(mas_find_range_rev); 6186 6187 /** 6188 * mas_erase() - Find the range in which index resides and erase the entire 6189 * range. 6190 * @mas: The maple state 6191 * 6192 * Must hold the write lock. 6193 * Searches for @mas->index, sets @mas->index and @mas->last to the range and 6194 * erases that range. 6195 * 6196 * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated. 6197 */ mas_erase(struct ma_state * mas)6198 void *mas_erase(struct ma_state *mas) 6199 { 6200 void *entry; 6201 unsigned long index = mas->index; 6202 MA_WR_STATE(wr_mas, mas, NULL); 6203 6204 if (!mas_is_active(mas) || !mas_is_start(mas)) 6205 mas->status = ma_start; 6206 6207 write_retry: 6208 entry = mas_state_walk(mas); 6209 if (!entry) 6210 return NULL; 6211 6212 /* Must reset to ensure spanning writes of last slot are detected */ 6213 mas_reset(mas); 6214 mas_wr_preallocate(&wr_mas, NULL); 6215 if (mas_nomem(mas, GFP_KERNEL)) { 6216 /* in case the range of entry changed when unlocked */ 6217 mas->index = mas->last = index; 6218 goto write_retry; 6219 } 6220 6221 if (mas_is_err(mas)) 6222 goto out; 6223 6224 mas_wr_store_entry(&wr_mas); 6225 out: 6226 mas_destroy(mas); 6227 return entry; 6228 } 6229 EXPORT_SYMBOL_GPL(mas_erase); 6230 6231 /** 6232 * mas_nomem() - Check if there was an error allocating and do the allocation 6233 * if necessary If there are allocations, then free them. 6234 * @mas: The maple state 6235 * @gfp: The GFP_FLAGS to use for allocations 6236 * Return: true on allocation, false otherwise. 6237 */ mas_nomem(struct ma_state * mas,gfp_t gfp)6238 bool mas_nomem(struct ma_state *mas, gfp_t gfp) 6239 __must_hold(mas->tree->ma_lock) 6240 { 6241 if (likely(mas->node != MA_ERROR(-ENOMEM))) 6242 return false; 6243 6244 if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) { 6245 mtree_unlock(mas->tree); 6246 mas_alloc_nodes(mas, gfp); 6247 mtree_lock(mas->tree); 6248 } else { 6249 mas_alloc_nodes(mas, gfp); 6250 } 6251 6252 if (!mas_allocated(mas)) 6253 return false; 6254 6255 mas->status = ma_start; 6256 return true; 6257 } 6258 maple_tree_init(void)6259 void __init maple_tree_init(void) 6260 { 6261 maple_node_cache = kmem_cache_create("maple_node", 6262 sizeof(struct maple_node), sizeof(struct maple_node), 6263 SLAB_PANIC, NULL); 6264 } 6265 6266 /** 6267 * mtree_load() - Load a value stored in a maple tree 6268 * @mt: The maple tree 6269 * @index: The index to load 6270 * 6271 * Return: the entry or %NULL 6272 */ mtree_load(struct maple_tree * mt,unsigned long index)6273 void *mtree_load(struct maple_tree *mt, unsigned long index) 6274 { 6275 MA_STATE(mas, mt, index, index); 6276 void *entry; 6277 6278 trace_ma_read(__func__, &mas); 6279 rcu_read_lock(); 6280 retry: 6281 entry = mas_start(&mas); 6282 if (unlikely(mas_is_none(&mas))) 6283 goto unlock; 6284 6285 if (unlikely(mas_is_ptr(&mas))) { 6286 if (index) 6287 entry = NULL; 6288 6289 goto unlock; 6290 } 6291 6292 entry = mtree_lookup_walk(&mas); 6293 if (!entry && unlikely(mas_is_start(&mas))) 6294 goto retry; 6295 unlock: 6296 rcu_read_unlock(); 6297 if (xa_is_zero(entry)) 6298 return NULL; 6299 6300 return entry; 6301 } 6302 EXPORT_SYMBOL(mtree_load); 6303 6304 /** 6305 * mtree_store_range() - Store an entry at a given range. 6306 * @mt: The maple tree 6307 * @index: The start of the range 6308 * @last: The end of the range 6309 * @entry: The entry to store 6310 * @gfp: The GFP_FLAGS to use for allocations 6311 * 6312 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 6313 * be allocated. 6314 */ mtree_store_range(struct maple_tree * mt,unsigned long index,unsigned long last,void * entry,gfp_t gfp)6315 int mtree_store_range(struct maple_tree *mt, unsigned long index, 6316 unsigned long last, void *entry, gfp_t gfp) 6317 { 6318 MA_STATE(mas, mt, index, last); 6319 int ret = 0; 6320 6321 trace_ma_write(__func__, &mas, 0, entry); 6322 if (WARN_ON_ONCE(xa_is_advanced(entry))) 6323 return -EINVAL; 6324 6325 if (index > last) 6326 return -EINVAL; 6327 6328 mtree_lock(mt); 6329 ret = mas_store_gfp(&mas, entry, gfp); 6330 mtree_unlock(mt); 6331 6332 return ret; 6333 } 6334 EXPORT_SYMBOL(mtree_store_range); 6335 6336 /** 6337 * mtree_store() - Store an entry at a given index. 6338 * @mt: The maple tree 6339 * @index: The index to store the value 6340 * @entry: The entry to store 6341 * @gfp: The GFP_FLAGS to use for allocations 6342 * 6343 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not 6344 * be allocated. 6345 */ mtree_store(struct maple_tree * mt,unsigned long index,void * entry,gfp_t gfp)6346 int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, 6347 gfp_t gfp) 6348 { 6349 return mtree_store_range(mt, index, index, entry, gfp); 6350 } 6351 EXPORT_SYMBOL(mtree_store); 6352 6353 /** 6354 * mtree_insert_range() - Insert an entry at a given range if there is no value. 6355 * @mt: The maple tree 6356 * @first: The start of the range 6357 * @last: The end of the range 6358 * @entry: The entry to store 6359 * @gfp: The GFP_FLAGS to use for allocations. 6360 * 6361 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 6362 * request, -ENOMEM if memory could not be allocated. 6363 */ mtree_insert_range(struct maple_tree * mt,unsigned long first,unsigned long last,void * entry,gfp_t gfp)6364 int mtree_insert_range(struct maple_tree *mt, unsigned long first, 6365 unsigned long last, void *entry, gfp_t gfp) 6366 { 6367 MA_STATE(ms, mt, first, last); 6368 int ret = 0; 6369 6370 if (WARN_ON_ONCE(xa_is_advanced(entry))) 6371 return -EINVAL; 6372 6373 if (first > last) 6374 return -EINVAL; 6375 6376 mtree_lock(mt); 6377 retry: 6378 mas_insert(&ms, entry); 6379 if (mas_nomem(&ms, gfp)) 6380 goto retry; 6381 6382 mtree_unlock(mt); 6383 if (mas_is_err(&ms)) 6384 ret = xa_err(ms.node); 6385 6386 mas_destroy(&ms); 6387 return ret; 6388 } 6389 EXPORT_SYMBOL(mtree_insert_range); 6390 6391 /** 6392 * mtree_insert() - Insert an entry at a given index if there is no value. 6393 * @mt: The maple tree 6394 * @index : The index to store the value 6395 * @entry: The entry to store 6396 * @gfp: The GFP_FLAGS to use for allocations. 6397 * 6398 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid 6399 * request, -ENOMEM if memory could not be allocated. 6400 */ mtree_insert(struct maple_tree * mt,unsigned long index,void * entry,gfp_t gfp)6401 int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry, 6402 gfp_t gfp) 6403 { 6404 return mtree_insert_range(mt, index, index, entry, gfp); 6405 } 6406 EXPORT_SYMBOL(mtree_insert); 6407 mtree_alloc_range(struct maple_tree * mt,unsigned long * startp,void * entry,unsigned long size,unsigned long min,unsigned long max,gfp_t gfp)6408 int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, 6409 void *entry, unsigned long size, unsigned long min, 6410 unsigned long max, gfp_t gfp) 6411 { 6412 int ret = 0; 6413 6414 MA_STATE(mas, mt, 0, 0); 6415 if (!mt_is_alloc(mt)) 6416 return -EINVAL; 6417 6418 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6419 return -EINVAL; 6420 6421 mtree_lock(mt); 6422 retry: 6423 ret = mas_empty_area(&mas, min, max, size); 6424 if (ret) 6425 goto unlock; 6426 6427 mas_insert(&mas, entry); 6428 /* 6429 * mas_nomem() may release the lock, causing the allocated area 6430 * to be unavailable, so try to allocate a free area again. 6431 */ 6432 if (mas_nomem(&mas, gfp)) 6433 goto retry; 6434 6435 if (mas_is_err(&mas)) 6436 ret = xa_err(mas.node); 6437 else 6438 *startp = mas.index; 6439 6440 unlock: 6441 mtree_unlock(mt); 6442 mas_destroy(&mas); 6443 return ret; 6444 } 6445 EXPORT_SYMBOL(mtree_alloc_range); 6446 6447 /** 6448 * mtree_alloc_cyclic() - Find somewhere to store this entry in the tree. 6449 * @mt: The maple tree. 6450 * @startp: Pointer to ID. 6451 * @range_lo: Lower bound of range to search. 6452 * @range_hi: Upper bound of range to search. 6453 * @entry: The entry to store. 6454 * @next: Pointer to next ID to allocate. 6455 * @gfp: The GFP_FLAGS to use for allocations. 6456 * 6457 * Finds an empty entry in @mt after @next, stores the new index into 6458 * the @id pointer, stores the entry at that index, then updates @next. 6459 * 6460 * @mt must be initialized with the MT_FLAGS_ALLOC_RANGE flag. 6461 * 6462 * Context: Any context. Takes and releases the mt.lock. May sleep if 6463 * the @gfp flags permit. 6464 * 6465 * Return: 0 if the allocation succeeded without wrapping, 1 if the 6466 * allocation succeeded after wrapping, -ENOMEM if memory could not be 6467 * allocated, -EINVAL if @mt cannot be used, or -EBUSY if there are no 6468 * free entries. 6469 */ mtree_alloc_cyclic(struct maple_tree * mt,unsigned long * startp,void * entry,unsigned long range_lo,unsigned long range_hi,unsigned long * next,gfp_t gfp)6470 int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp, 6471 void *entry, unsigned long range_lo, unsigned long range_hi, 6472 unsigned long *next, gfp_t gfp) 6473 { 6474 int ret; 6475 6476 MA_STATE(mas, mt, 0, 0); 6477 6478 if (!mt_is_alloc(mt)) 6479 return -EINVAL; 6480 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6481 return -EINVAL; 6482 mtree_lock(mt); 6483 ret = mas_alloc_cyclic(&mas, startp, entry, range_lo, range_hi, 6484 next, gfp); 6485 mtree_unlock(mt); 6486 return ret; 6487 } 6488 EXPORT_SYMBOL(mtree_alloc_cyclic); 6489 mtree_alloc_rrange(struct maple_tree * mt,unsigned long * startp,void * entry,unsigned long size,unsigned long min,unsigned long max,gfp_t gfp)6490 int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, 6491 void *entry, unsigned long size, unsigned long min, 6492 unsigned long max, gfp_t gfp) 6493 { 6494 int ret = 0; 6495 6496 MA_STATE(mas, mt, 0, 0); 6497 if (!mt_is_alloc(mt)) 6498 return -EINVAL; 6499 6500 if (WARN_ON_ONCE(mt_is_reserved(entry))) 6501 return -EINVAL; 6502 6503 mtree_lock(mt); 6504 retry: 6505 ret = mas_empty_area_rev(&mas, min, max, size); 6506 if (ret) 6507 goto unlock; 6508 6509 mas_insert(&mas, entry); 6510 /* 6511 * mas_nomem() may release the lock, causing the allocated area 6512 * to be unavailable, so try to allocate a free area again. 6513 */ 6514 if (mas_nomem(&mas, gfp)) 6515 goto retry; 6516 6517 if (mas_is_err(&mas)) 6518 ret = xa_err(mas.node); 6519 else 6520 *startp = mas.index; 6521 6522 unlock: 6523 mtree_unlock(mt); 6524 mas_destroy(&mas); 6525 return ret; 6526 } 6527 EXPORT_SYMBOL(mtree_alloc_rrange); 6528 6529 /** 6530 * mtree_erase() - Find an index and erase the entire range. 6531 * @mt: The maple tree 6532 * @index: The index to erase 6533 * 6534 * Erasing is the same as a walk to an entry then a store of a NULL to that 6535 * ENTIRE range. In fact, it is implemented as such using the advanced API. 6536 * 6537 * Return: The entry stored at the @index or %NULL 6538 */ mtree_erase(struct maple_tree * mt,unsigned long index)6539 void *mtree_erase(struct maple_tree *mt, unsigned long index) 6540 { 6541 void *entry = NULL; 6542 6543 MA_STATE(mas, mt, index, index); 6544 trace_ma_op(__func__, &mas); 6545 6546 mtree_lock(mt); 6547 entry = mas_erase(&mas); 6548 mtree_unlock(mt); 6549 6550 return entry; 6551 } 6552 EXPORT_SYMBOL(mtree_erase); 6553 6554 /* 6555 * mas_dup_free() - Free an incomplete duplication of a tree. 6556 * @mas: The maple state of a incomplete tree. 6557 * 6558 * The parameter @mas->node passed in indicates that the allocation failed on 6559 * this node. This function frees all nodes starting from @mas->node in the 6560 * reverse order of mas_dup_build(). There is no need to hold the source tree 6561 * lock at this time. 6562 */ mas_dup_free(struct ma_state * mas)6563 static void mas_dup_free(struct ma_state *mas) 6564 { 6565 struct maple_node *node; 6566 enum maple_type type; 6567 void __rcu **slots; 6568 unsigned char count, i; 6569 6570 /* Maybe the first node allocation failed. */ 6571 if (mas_is_none(mas)) 6572 return; 6573 6574 while (!mte_is_root(mas->node)) { 6575 mas_ascend(mas); 6576 if (mas->offset) { 6577 mas->offset--; 6578 do { 6579 mas_descend(mas); 6580 mas->offset = mas_data_end(mas); 6581 } while (!mte_is_leaf(mas->node)); 6582 6583 mas_ascend(mas); 6584 } 6585 6586 node = mte_to_node(mas->node); 6587 type = mte_node_type(mas->node); 6588 slots = ma_slots(node, type); 6589 count = mas_data_end(mas) + 1; 6590 for (i = 0; i < count; i++) 6591 ((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK; 6592 mt_free_bulk(count, slots); 6593 } 6594 6595 node = mte_to_node(mas->node); 6596 mt_free_one(node); 6597 } 6598 6599 /* 6600 * mas_copy_node() - Copy a maple node and replace the parent. 6601 * @mas: The maple state of source tree. 6602 * @new_mas: The maple state of new tree. 6603 * @parent: The parent of the new node. 6604 * 6605 * Copy @mas->node to @new_mas->node, set @parent to be the parent of 6606 * @new_mas->node. If memory allocation fails, @mas is set to -ENOMEM. 6607 */ mas_copy_node(struct ma_state * mas,struct ma_state * new_mas,struct maple_pnode * parent)6608 static inline void mas_copy_node(struct ma_state *mas, struct ma_state *new_mas, 6609 struct maple_pnode *parent) 6610 { 6611 struct maple_node *node = mte_to_node(mas->node); 6612 struct maple_node *new_node = mte_to_node(new_mas->node); 6613 unsigned long val; 6614 6615 /* Copy the node completely. */ 6616 memcpy(new_node, node, sizeof(struct maple_node)); 6617 /* Update the parent node pointer. */ 6618 val = (unsigned long)node->parent & MAPLE_NODE_MASK; 6619 new_node->parent = ma_parent_ptr(val | (unsigned long)parent); 6620 } 6621 6622 /* 6623 * mas_dup_alloc() - Allocate child nodes for a maple node. 6624 * @mas: The maple state of source tree. 6625 * @new_mas: The maple state of new tree. 6626 * @gfp: The GFP_FLAGS to use for allocations. 6627 * 6628 * This function allocates child nodes for @new_mas->node during the duplication 6629 * process. If memory allocation fails, @mas is set to -ENOMEM. 6630 */ mas_dup_alloc(struct ma_state * mas,struct ma_state * new_mas,gfp_t gfp)6631 static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas, 6632 gfp_t gfp) 6633 { 6634 struct maple_node *node = mte_to_node(mas->node); 6635 struct maple_node *new_node = mte_to_node(new_mas->node); 6636 enum maple_type type; 6637 unsigned char request, count, i; 6638 void __rcu **slots; 6639 void __rcu **new_slots; 6640 unsigned long val; 6641 6642 /* Allocate memory for child nodes. */ 6643 type = mte_node_type(mas->node); 6644 new_slots = ma_slots(new_node, type); 6645 request = mas_data_end(mas) + 1; 6646 count = mt_alloc_bulk(gfp, request, (void **)new_slots); 6647 if (unlikely(count < request)) { 6648 memset(new_slots, 0, request * sizeof(void *)); 6649 mas_set_err(mas, -ENOMEM); 6650 return; 6651 } 6652 6653 /* Restore node type information in slots. */ 6654 slots = ma_slots(node, type); 6655 for (i = 0; i < count; i++) { 6656 val = (unsigned long)mt_slot_locked(mas->tree, slots, i); 6657 val &= MAPLE_NODE_MASK; 6658 ((unsigned long *)new_slots)[i] |= val; 6659 } 6660 } 6661 6662 /* 6663 * mas_dup_build() - Build a new maple tree from a source tree 6664 * @mas: The maple state of source tree, need to be in MAS_START state. 6665 * @new_mas: The maple state of new tree, need to be in MAS_START state. 6666 * @gfp: The GFP_FLAGS to use for allocations. 6667 * 6668 * This function builds a new tree in DFS preorder. If the memory allocation 6669 * fails, the error code -ENOMEM will be set in @mas, and @new_mas points to the 6670 * last node. mas_dup_free() will free the incomplete duplication of a tree. 6671 * 6672 * Note that the attributes of the two trees need to be exactly the same, and the 6673 * new tree needs to be empty, otherwise -EINVAL will be set in @mas. 6674 */ mas_dup_build(struct ma_state * mas,struct ma_state * new_mas,gfp_t gfp)6675 static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas, 6676 gfp_t gfp) 6677 { 6678 struct maple_node *node; 6679 struct maple_pnode *parent = NULL; 6680 struct maple_enode *root; 6681 enum maple_type type; 6682 6683 if (unlikely(mt_attr(mas->tree) != mt_attr(new_mas->tree)) || 6684 unlikely(!mtree_empty(new_mas->tree))) { 6685 mas_set_err(mas, -EINVAL); 6686 return; 6687 } 6688 6689 root = mas_start(mas); 6690 if (mas_is_ptr(mas) || mas_is_none(mas)) 6691 goto set_new_tree; 6692 6693 node = mt_alloc_one(gfp); 6694 if (!node) { 6695 new_mas->status = ma_none; 6696 mas_set_err(mas, -ENOMEM); 6697 return; 6698 } 6699 6700 type = mte_node_type(mas->node); 6701 root = mt_mk_node(node, type); 6702 new_mas->node = root; 6703 new_mas->min = 0; 6704 new_mas->max = ULONG_MAX; 6705 root = mte_mk_root(root); 6706 while (1) { 6707 mas_copy_node(mas, new_mas, parent); 6708 if (!mte_is_leaf(mas->node)) { 6709 /* Only allocate child nodes for non-leaf nodes. */ 6710 mas_dup_alloc(mas, new_mas, gfp); 6711 if (unlikely(mas_is_err(mas))) 6712 return; 6713 } else { 6714 /* 6715 * This is the last leaf node and duplication is 6716 * completed. 6717 */ 6718 if (mas->max == ULONG_MAX) 6719 goto done; 6720 6721 /* This is not the last leaf node and needs to go up. */ 6722 do { 6723 mas_ascend(mas); 6724 mas_ascend(new_mas); 6725 } while (mas->offset == mas_data_end(mas)); 6726 6727 /* Move to the next subtree. */ 6728 mas->offset++; 6729 new_mas->offset++; 6730 } 6731 6732 mas_descend(mas); 6733 parent = ma_parent_ptr(mte_to_node(new_mas->node)); 6734 mas_descend(new_mas); 6735 mas->offset = 0; 6736 new_mas->offset = 0; 6737 } 6738 done: 6739 /* Specially handle the parent of the root node. */ 6740 mte_to_node(root)->parent = ma_parent_ptr(mas_tree_parent(new_mas)); 6741 set_new_tree: 6742 /* Make them the same height */ 6743 new_mas->tree->ma_flags = mas->tree->ma_flags; 6744 rcu_assign_pointer(new_mas->tree->ma_root, root); 6745 } 6746 6747 /** 6748 * __mt_dup(): Duplicate an entire maple tree 6749 * @mt: The source maple tree 6750 * @new: The new maple tree 6751 * @gfp: The GFP_FLAGS to use for allocations 6752 * 6753 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order 6754 * traversal. It uses memcpy() to copy nodes in the source tree and allocate 6755 * new child nodes in non-leaf nodes. The new node is exactly the same as the 6756 * source node except for all the addresses stored in it. It will be faster than 6757 * traversing all elements in the source tree and inserting them one by one into 6758 * the new tree. 6759 * The user needs to ensure that the attributes of the source tree and the new 6760 * tree are the same, and the new tree needs to be an empty tree, otherwise 6761 * -EINVAL will be returned. 6762 * Note that the user needs to manually lock the source tree and the new tree. 6763 * 6764 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If 6765 * the attributes of the two trees are different or the new tree is not an empty 6766 * tree. 6767 */ __mt_dup(struct maple_tree * mt,struct maple_tree * new,gfp_t gfp)6768 int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp) 6769 { 6770 int ret = 0; 6771 MA_STATE(mas, mt, 0, 0); 6772 MA_STATE(new_mas, new, 0, 0); 6773 6774 mas_dup_build(&mas, &new_mas, gfp); 6775 if (unlikely(mas_is_err(&mas))) { 6776 ret = xa_err(mas.node); 6777 if (ret == -ENOMEM) 6778 mas_dup_free(&new_mas); 6779 } 6780 6781 return ret; 6782 } 6783 EXPORT_SYMBOL(__mt_dup); 6784 6785 /** 6786 * mtree_dup(): Duplicate an entire maple tree 6787 * @mt: The source maple tree 6788 * @new: The new maple tree 6789 * @gfp: The GFP_FLAGS to use for allocations 6790 * 6791 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order 6792 * traversal. It uses memcpy() to copy nodes in the source tree and allocate 6793 * new child nodes in non-leaf nodes. The new node is exactly the same as the 6794 * source node except for all the addresses stored in it. It will be faster than 6795 * traversing all elements in the source tree and inserting them one by one into 6796 * the new tree. 6797 * The user needs to ensure that the attributes of the source tree and the new 6798 * tree are the same, and the new tree needs to be an empty tree, otherwise 6799 * -EINVAL will be returned. 6800 * 6801 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If 6802 * the attributes of the two trees are different or the new tree is not an empty 6803 * tree. 6804 */ mtree_dup(struct maple_tree * mt,struct maple_tree * new,gfp_t gfp)6805 int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp) 6806 { 6807 int ret = 0; 6808 MA_STATE(mas, mt, 0, 0); 6809 MA_STATE(new_mas, new, 0, 0); 6810 6811 mas_lock(&new_mas); 6812 mas_lock_nested(&mas, SINGLE_DEPTH_NESTING); 6813 mas_dup_build(&mas, &new_mas, gfp); 6814 mas_unlock(&mas); 6815 if (unlikely(mas_is_err(&mas))) { 6816 ret = xa_err(mas.node); 6817 if (ret == -ENOMEM) 6818 mas_dup_free(&new_mas); 6819 } 6820 6821 mas_unlock(&new_mas); 6822 return ret; 6823 } 6824 EXPORT_SYMBOL(mtree_dup); 6825 6826 /** 6827 * __mt_destroy() - Walk and free all nodes of a locked maple tree. 6828 * @mt: The maple tree 6829 * 6830 * Note: Does not handle locking. 6831 */ __mt_destroy(struct maple_tree * mt)6832 void __mt_destroy(struct maple_tree *mt) 6833 { 6834 void *root = mt_root_locked(mt); 6835 6836 rcu_assign_pointer(mt->ma_root, NULL); 6837 if (xa_is_node(root)) 6838 mte_destroy_walk(root, mt); 6839 6840 mt->ma_flags = mt_attr(mt); 6841 } 6842 EXPORT_SYMBOL_GPL(__mt_destroy); 6843 6844 /** 6845 * mtree_destroy() - Destroy a maple tree 6846 * @mt: The maple tree 6847 * 6848 * Frees all resources used by the tree. Handles locking. 6849 */ mtree_destroy(struct maple_tree * mt)6850 void mtree_destroy(struct maple_tree *mt) 6851 { 6852 mtree_lock(mt); 6853 __mt_destroy(mt); 6854 mtree_unlock(mt); 6855 } 6856 EXPORT_SYMBOL(mtree_destroy); 6857 6858 /** 6859 * mt_find() - Search from the start up until an entry is found. 6860 * @mt: The maple tree 6861 * @index: Pointer which contains the start location of the search 6862 * @max: The maximum value of the search range 6863 * 6864 * Takes RCU read lock internally to protect the search, which does not 6865 * protect the returned pointer after dropping RCU read lock. 6866 * See also: Documentation/core-api/maple_tree.rst 6867 * 6868 * In case that an entry is found @index is updated to point to the next 6869 * possible entry independent whether the found entry is occupying a 6870 * single index or a range if indices. 6871 * 6872 * Return: The entry at or after the @index or %NULL 6873 */ mt_find(struct maple_tree * mt,unsigned long * index,unsigned long max)6874 void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max) 6875 { 6876 MA_STATE(mas, mt, *index, *index); 6877 void *entry; 6878 #ifdef CONFIG_DEBUG_MAPLE_TREE 6879 unsigned long copy = *index; 6880 #endif 6881 6882 trace_ma_read(__func__, &mas); 6883 6884 if ((*index) > max) 6885 return NULL; 6886 6887 rcu_read_lock(); 6888 retry: 6889 entry = mas_state_walk(&mas); 6890 if (mas_is_start(&mas)) 6891 goto retry; 6892 6893 if (unlikely(xa_is_zero(entry))) 6894 entry = NULL; 6895 6896 if (entry) 6897 goto unlock; 6898 6899 while (mas_is_active(&mas) && (mas.last < max)) { 6900 entry = mas_next_slot(&mas, max, false); 6901 if (likely(entry && !xa_is_zero(entry))) 6902 break; 6903 } 6904 6905 if (unlikely(xa_is_zero(entry))) 6906 entry = NULL; 6907 unlock: 6908 rcu_read_unlock(); 6909 if (likely(entry)) { 6910 *index = mas.last + 1; 6911 #ifdef CONFIG_DEBUG_MAPLE_TREE 6912 if (MT_WARN_ON(mt, (*index) && ((*index) <= copy))) 6913 pr_err("index not increased! %lx <= %lx\n", 6914 *index, copy); 6915 #endif 6916 } 6917 6918 return entry; 6919 } 6920 EXPORT_SYMBOL(mt_find); 6921 6922 /** 6923 * mt_find_after() - Search from the start up until an entry is found. 6924 * @mt: The maple tree 6925 * @index: Pointer which contains the start location of the search 6926 * @max: The maximum value to check 6927 * 6928 * Same as mt_find() except that it checks @index for 0 before 6929 * searching. If @index == 0, the search is aborted. This covers a wrap 6930 * around of @index to 0 in an iterator loop. 6931 * 6932 * Return: The entry at or after the @index or %NULL 6933 */ mt_find_after(struct maple_tree * mt,unsigned long * index,unsigned long max)6934 void *mt_find_after(struct maple_tree *mt, unsigned long *index, 6935 unsigned long max) 6936 { 6937 if (!(*index)) 6938 return NULL; 6939 6940 return mt_find(mt, index, max); 6941 } 6942 EXPORT_SYMBOL(mt_find_after); 6943 6944 #ifdef CONFIG_DEBUG_MAPLE_TREE 6945 atomic_t maple_tree_tests_run; 6946 EXPORT_SYMBOL_GPL(maple_tree_tests_run); 6947 atomic_t maple_tree_tests_passed; 6948 EXPORT_SYMBOL_GPL(maple_tree_tests_passed); 6949 6950 #ifndef __KERNEL__ 6951 extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int); mt_set_non_kernel(unsigned int val)6952 void mt_set_non_kernel(unsigned int val) 6953 { 6954 kmem_cache_set_non_kernel(maple_node_cache, val); 6955 } 6956 6957 extern void kmem_cache_set_callback(struct kmem_cache *cachep, 6958 void (*callback)(void *)); mt_set_callback(void (* callback)(void *))6959 void mt_set_callback(void (*callback)(void *)) 6960 { 6961 kmem_cache_set_callback(maple_node_cache, callback); 6962 } 6963 6964 extern void kmem_cache_set_private(struct kmem_cache *cachep, void *private); mt_set_private(void * private)6965 void mt_set_private(void *private) 6966 { 6967 kmem_cache_set_private(maple_node_cache, private); 6968 } 6969 6970 extern unsigned long kmem_cache_get_alloc(struct kmem_cache *); mt_get_alloc_size(void)6971 unsigned long mt_get_alloc_size(void) 6972 { 6973 return kmem_cache_get_alloc(maple_node_cache); 6974 } 6975 6976 extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *); mt_zero_nr_tallocated(void)6977 void mt_zero_nr_tallocated(void) 6978 { 6979 kmem_cache_zero_nr_tallocated(maple_node_cache); 6980 } 6981 6982 extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *); mt_nr_tallocated(void)6983 unsigned int mt_nr_tallocated(void) 6984 { 6985 return kmem_cache_nr_tallocated(maple_node_cache); 6986 } 6987 6988 extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *); mt_nr_allocated(void)6989 unsigned int mt_nr_allocated(void) 6990 { 6991 return kmem_cache_nr_allocated(maple_node_cache); 6992 } 6993 mt_cache_shrink(void)6994 void mt_cache_shrink(void) 6995 { 6996 } 6997 #else 6998 /* 6999 * mt_cache_shrink() - For testing, don't use this. 7000 * 7001 * Certain testcases can trigger an OOM when combined with other memory 7002 * debugging configuration options. This function is used to reduce the 7003 * possibility of an out of memory even due to kmem_cache objects remaining 7004 * around for longer than usual. 7005 */ mt_cache_shrink(void)7006 void mt_cache_shrink(void) 7007 { 7008 kmem_cache_shrink(maple_node_cache); 7009 7010 } 7011 EXPORT_SYMBOL_GPL(mt_cache_shrink); 7012 7013 #endif /* not defined __KERNEL__ */ 7014 /* 7015 * mas_get_slot() - Get the entry in the maple state node stored at @offset. 7016 * @mas: The maple state 7017 * @offset: The offset into the slot array to fetch. 7018 * 7019 * Return: The entry stored at @offset. 7020 */ mas_get_slot(struct ma_state * mas,unsigned char offset)7021 static inline struct maple_enode *mas_get_slot(struct ma_state *mas, 7022 unsigned char offset) 7023 { 7024 return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)), 7025 offset); 7026 } 7027 7028 /* Depth first search, post-order */ mas_dfs_postorder(struct ma_state * mas,unsigned long max)7029 static void mas_dfs_postorder(struct ma_state *mas, unsigned long max) 7030 { 7031 7032 struct maple_enode *p, *mn = mas->node; 7033 unsigned long p_min, p_max; 7034 7035 mas_next_node(mas, mas_mn(mas), max); 7036 if (!mas_is_overflow(mas)) 7037 return; 7038 7039 if (mte_is_root(mn)) 7040 return; 7041 7042 mas->node = mn; 7043 mas_ascend(mas); 7044 do { 7045 p = mas->node; 7046 p_min = mas->min; 7047 p_max = mas->max; 7048 mas_prev_node(mas, 0); 7049 } while (!mas_is_underflow(mas)); 7050 7051 mas->node = p; 7052 mas->max = p_max; 7053 mas->min = p_min; 7054 } 7055 7056 /* Tree validations */ 7057 static void mt_dump_node(const struct maple_tree *mt, void *entry, 7058 unsigned long min, unsigned long max, unsigned int depth, 7059 enum mt_dump_format format); mt_dump_range(unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)7060 static void mt_dump_range(unsigned long min, unsigned long max, 7061 unsigned int depth, enum mt_dump_format format) 7062 { 7063 static const char spaces[] = " "; 7064 7065 switch(format) { 7066 case mt_dump_hex: 7067 if (min == max) 7068 pr_info("%.*s%lx: ", depth * 2, spaces, min); 7069 else 7070 pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max); 7071 break; 7072 case mt_dump_dec: 7073 if (min == max) 7074 pr_info("%.*s%lu: ", depth * 2, spaces, min); 7075 else 7076 pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max); 7077 } 7078 } 7079 mt_dump_entry(void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)7080 static void mt_dump_entry(void *entry, unsigned long min, unsigned long max, 7081 unsigned int depth, enum mt_dump_format format) 7082 { 7083 mt_dump_range(min, max, depth, format); 7084 7085 if (xa_is_value(entry)) 7086 pr_cont("value %ld (0x%lx) [" PTR_FMT "]\n", xa_to_value(entry), 7087 xa_to_value(entry), entry); 7088 else if (xa_is_zero(entry)) 7089 pr_cont("zero (%ld)\n", xa_to_internal(entry)); 7090 else if (mt_is_reserved(entry)) 7091 pr_cont("UNKNOWN ENTRY (" PTR_FMT ")\n", entry); 7092 else 7093 pr_cont(PTR_FMT "\n", entry); 7094 } 7095 mt_dump_range64(const struct maple_tree * mt,void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)7096 static void mt_dump_range64(const struct maple_tree *mt, void *entry, 7097 unsigned long min, unsigned long max, unsigned int depth, 7098 enum mt_dump_format format) 7099 { 7100 struct maple_range_64 *node = &mte_to_node(entry)->mr64; 7101 bool leaf = mte_is_leaf(entry); 7102 unsigned long first = min; 7103 int i; 7104 7105 pr_cont(" contents: "); 7106 for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) { 7107 switch(format) { 7108 case mt_dump_hex: 7109 pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]); 7110 break; 7111 case mt_dump_dec: 7112 pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]); 7113 } 7114 } 7115 pr_cont(PTR_FMT "\n", node->slot[i]); 7116 for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) { 7117 unsigned long last = max; 7118 7119 if (i < (MAPLE_RANGE64_SLOTS - 1)) 7120 last = node->pivot[i]; 7121 else if (!node->slot[i] && max != mt_node_max(entry)) 7122 break; 7123 if (last == 0 && i > 0) 7124 break; 7125 if (leaf) 7126 mt_dump_entry(mt_slot(mt, node->slot, i), 7127 first, last, depth + 1, format); 7128 else if (node->slot[i]) 7129 mt_dump_node(mt, mt_slot(mt, node->slot, i), 7130 first, last, depth + 1, format); 7131 7132 if (last == max) 7133 break; 7134 if (last > max) { 7135 switch(format) { 7136 case mt_dump_hex: 7137 pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n", 7138 node, last, max, i); 7139 break; 7140 case mt_dump_dec: 7141 pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", 7142 node, last, max, i); 7143 } 7144 } 7145 first = last + 1; 7146 } 7147 } 7148 mt_dump_arange64(const struct maple_tree * mt,void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)7149 static void mt_dump_arange64(const struct maple_tree *mt, void *entry, 7150 unsigned long min, unsigned long max, unsigned int depth, 7151 enum mt_dump_format format) 7152 { 7153 struct maple_arange_64 *node = &mte_to_node(entry)->ma64; 7154 unsigned long first = min; 7155 int i; 7156 7157 pr_cont(" contents: "); 7158 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 7159 switch (format) { 7160 case mt_dump_hex: 7161 pr_cont("%lx ", node->gap[i]); 7162 break; 7163 case mt_dump_dec: 7164 pr_cont("%lu ", node->gap[i]); 7165 } 7166 } 7167 pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap); 7168 for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) { 7169 switch (format) { 7170 case mt_dump_hex: 7171 pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]); 7172 break; 7173 case mt_dump_dec: 7174 pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]); 7175 } 7176 } 7177 pr_cont(PTR_FMT "\n", node->slot[i]); 7178 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { 7179 unsigned long last = max; 7180 7181 if (i < (MAPLE_ARANGE64_SLOTS - 1)) 7182 last = node->pivot[i]; 7183 else if (!node->slot[i]) 7184 break; 7185 if (last == 0 && i > 0) 7186 break; 7187 if (node->slot[i]) 7188 mt_dump_node(mt, mt_slot(mt, node->slot, i), 7189 first, last, depth + 1, format); 7190 7191 if (last == max) 7192 break; 7193 if (last > max) { 7194 switch(format) { 7195 case mt_dump_hex: 7196 pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n", 7197 node, last, max, i); 7198 break; 7199 case mt_dump_dec: 7200 pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", 7201 node, last, max, i); 7202 } 7203 } 7204 first = last + 1; 7205 } 7206 } 7207 mt_dump_node(const struct maple_tree * mt,void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)7208 static void mt_dump_node(const struct maple_tree *mt, void *entry, 7209 unsigned long min, unsigned long max, unsigned int depth, 7210 enum mt_dump_format format) 7211 { 7212 struct maple_node *node = mte_to_node(entry); 7213 unsigned int type = mte_node_type(entry); 7214 unsigned int i; 7215 7216 mt_dump_range(min, max, depth, format); 7217 7218 pr_cont("node " PTR_FMT " depth %d type %d parent " PTR_FMT, node, 7219 depth, type, node ? node->parent : NULL); 7220 switch (type) { 7221 case maple_dense: 7222 pr_cont("\n"); 7223 for (i = 0; i < MAPLE_NODE_SLOTS; i++) { 7224 if (min + i > max) 7225 pr_cont("OUT OF RANGE: "); 7226 mt_dump_entry(mt_slot(mt, node->slot, i), 7227 min + i, min + i, depth, format); 7228 } 7229 break; 7230 case maple_leaf_64: 7231 case maple_range_64: 7232 mt_dump_range64(mt, entry, min, max, depth, format); 7233 break; 7234 case maple_arange_64: 7235 mt_dump_arange64(mt, entry, min, max, depth, format); 7236 break; 7237 7238 default: 7239 pr_cont(" UNKNOWN TYPE\n"); 7240 } 7241 } 7242 mt_dump(const struct maple_tree * mt,enum mt_dump_format format)7243 void mt_dump(const struct maple_tree *mt, enum mt_dump_format format) 7244 { 7245 void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt)); 7246 7247 pr_info("maple_tree(" PTR_FMT ") flags %X, height %u root " PTR_FMT "\n", 7248 mt, mt->ma_flags, mt_height(mt), entry); 7249 if (xa_is_node(entry)) 7250 mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format); 7251 else if (entry) 7252 mt_dump_entry(entry, 0, 0, 0, format); 7253 else 7254 pr_info("(empty)\n"); 7255 } 7256 EXPORT_SYMBOL_GPL(mt_dump); 7257 7258 /* 7259 * Calculate the maximum gap in a node and check if that's what is reported in 7260 * the parent (unless root). 7261 */ mas_validate_gaps(struct ma_state * mas)7262 static void mas_validate_gaps(struct ma_state *mas) 7263 { 7264 struct maple_enode *mte = mas->node; 7265 struct maple_node *p_mn, *node = mte_to_node(mte); 7266 enum maple_type mt = mte_node_type(mas->node); 7267 unsigned long gap = 0, max_gap = 0; 7268 unsigned long p_end, p_start = mas->min; 7269 unsigned char p_slot, offset; 7270 unsigned long *gaps = NULL; 7271 unsigned long *pivots = ma_pivots(node, mt); 7272 unsigned int i; 7273 7274 if (ma_is_dense(mt)) { 7275 for (i = 0; i < mt_slot_count(mte); i++) { 7276 if (mas_get_slot(mas, i)) { 7277 if (gap > max_gap) 7278 max_gap = gap; 7279 gap = 0; 7280 continue; 7281 } 7282 gap++; 7283 } 7284 goto counted; 7285 } 7286 7287 gaps = ma_gaps(node, mt); 7288 for (i = 0; i < mt_slot_count(mte); i++) { 7289 p_end = mas_safe_pivot(mas, pivots, i, mt); 7290 7291 if (!gaps) { 7292 if (!mas_get_slot(mas, i)) 7293 gap = p_end - p_start + 1; 7294 } else { 7295 void *entry = mas_get_slot(mas, i); 7296 7297 gap = gaps[i]; 7298 MT_BUG_ON(mas->tree, !entry); 7299 7300 if (gap > p_end - p_start + 1) { 7301 pr_err(PTR_FMT "[%u] %lu >= %lu - %lu + 1 (%lu)\n", 7302 mas_mn(mas), i, gap, p_end, p_start, 7303 p_end - p_start + 1); 7304 MT_BUG_ON(mas->tree, gap > p_end - p_start + 1); 7305 } 7306 } 7307 7308 if (gap > max_gap) 7309 max_gap = gap; 7310 7311 p_start = p_end + 1; 7312 if (p_end >= mas->max) 7313 break; 7314 } 7315 7316 counted: 7317 if (mt == maple_arange_64) { 7318 MT_BUG_ON(mas->tree, !gaps); 7319 offset = ma_meta_gap(node); 7320 if (offset > i) { 7321 pr_err("gap offset " PTR_FMT "[%u] is invalid\n", node, offset); 7322 MT_BUG_ON(mas->tree, 1); 7323 } 7324 7325 if (gaps[offset] != max_gap) { 7326 pr_err("gap " PTR_FMT "[%u] is not the largest gap %lu\n", 7327 node, offset, max_gap); 7328 MT_BUG_ON(mas->tree, 1); 7329 } 7330 7331 for (i++ ; i < mt_slot_count(mte); i++) { 7332 if (gaps[i] != 0) { 7333 pr_err("gap " PTR_FMT "[%u] beyond node limit != 0\n", 7334 node, i); 7335 MT_BUG_ON(mas->tree, 1); 7336 } 7337 } 7338 } 7339 7340 if (mte_is_root(mte)) 7341 return; 7342 7343 p_slot = mte_parent_slot(mas->node); 7344 p_mn = mte_parent(mte); 7345 MT_BUG_ON(mas->tree, max_gap > mas->max); 7346 if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) { 7347 pr_err("gap " PTR_FMT "[%u] != %lu\n", p_mn, p_slot, max_gap); 7348 mt_dump(mas->tree, mt_dump_hex); 7349 MT_BUG_ON(mas->tree, 1); 7350 } 7351 } 7352 mas_validate_parent_slot(struct ma_state * mas)7353 static void mas_validate_parent_slot(struct ma_state *mas) 7354 { 7355 struct maple_node *parent; 7356 struct maple_enode *node; 7357 enum maple_type p_type; 7358 unsigned char p_slot; 7359 void __rcu **slots; 7360 int i; 7361 7362 if (mte_is_root(mas->node)) 7363 return; 7364 7365 p_slot = mte_parent_slot(mas->node); 7366 p_type = mas_parent_type(mas, mas->node); 7367 parent = mte_parent(mas->node); 7368 slots = ma_slots(parent, p_type); 7369 MT_BUG_ON(mas->tree, mas_mn(mas) == parent); 7370 7371 /* Check prev/next parent slot for duplicate node entry */ 7372 7373 for (i = 0; i < mt_slots[p_type]; i++) { 7374 node = mas_slot(mas, slots, i); 7375 if (i == p_slot) { 7376 if (node != mas->node) 7377 pr_err("parent " PTR_FMT "[%u] does not have " PTR_FMT "\n", 7378 parent, i, mas_mn(mas)); 7379 MT_BUG_ON(mas->tree, node != mas->node); 7380 } else if (node == mas->node) { 7381 pr_err("Invalid child " PTR_FMT " at parent " PTR_FMT "[%u] p_slot %u\n", 7382 mas_mn(mas), parent, i, p_slot); 7383 MT_BUG_ON(mas->tree, node == mas->node); 7384 } 7385 } 7386 } 7387 mas_validate_child_slot(struct ma_state * mas)7388 static void mas_validate_child_slot(struct ma_state *mas) 7389 { 7390 enum maple_type type = mte_node_type(mas->node); 7391 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 7392 unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type); 7393 struct maple_enode *child; 7394 unsigned char i; 7395 7396 if (mte_is_leaf(mas->node)) 7397 return; 7398 7399 for (i = 0; i < mt_slots[type]; i++) { 7400 child = mas_slot(mas, slots, i); 7401 7402 if (!child) { 7403 pr_err("Non-leaf node lacks child at " PTR_FMT "[%u]\n", 7404 mas_mn(mas), i); 7405 MT_BUG_ON(mas->tree, 1); 7406 } 7407 7408 if (mte_parent_slot(child) != i) { 7409 pr_err("Slot error at " PTR_FMT "[%u]: child " PTR_FMT " has pslot %u\n", 7410 mas_mn(mas), i, mte_to_node(child), 7411 mte_parent_slot(child)); 7412 MT_BUG_ON(mas->tree, 1); 7413 } 7414 7415 if (mte_parent(child) != mte_to_node(mas->node)) { 7416 pr_err("child " PTR_FMT " has parent " PTR_FMT " not " PTR_FMT "\n", 7417 mte_to_node(child), mte_parent(child), 7418 mte_to_node(mas->node)); 7419 MT_BUG_ON(mas->tree, 1); 7420 } 7421 7422 if (i < mt_pivots[type] && pivots[i] == mas->max) 7423 break; 7424 } 7425 } 7426 7427 /* 7428 * Validate all pivots are within mas->min and mas->max, check metadata ends 7429 * where the maximum ends and ensure there is no slots or pivots set outside of 7430 * the end of the data. 7431 */ mas_validate_limits(struct ma_state * mas)7432 static void mas_validate_limits(struct ma_state *mas) 7433 { 7434 int i; 7435 unsigned long prev_piv = 0; 7436 enum maple_type type = mte_node_type(mas->node); 7437 void __rcu **slots = ma_slots(mte_to_node(mas->node), type); 7438 unsigned long *pivots = ma_pivots(mas_mn(mas), type); 7439 7440 for (i = 0; i < mt_slots[type]; i++) { 7441 unsigned long piv; 7442 7443 piv = mas_safe_pivot(mas, pivots, i, type); 7444 7445 if (!piv && (i != 0)) { 7446 pr_err("Missing node limit pivot at " PTR_FMT "[%u]", 7447 mas_mn(mas), i); 7448 MAS_WARN_ON(mas, 1); 7449 } 7450 7451 if (prev_piv > piv) { 7452 pr_err(PTR_FMT "[%u] piv %lu < prev_piv %lu\n", 7453 mas_mn(mas), i, piv, prev_piv); 7454 MAS_WARN_ON(mas, piv < prev_piv); 7455 } 7456 7457 if (piv < mas->min) { 7458 pr_err(PTR_FMT "[%u] %lu < %lu\n", mas_mn(mas), i, 7459 piv, mas->min); 7460 MAS_WARN_ON(mas, piv < mas->min); 7461 } 7462 if (piv > mas->max) { 7463 pr_err(PTR_FMT "[%u] %lu > %lu\n", mas_mn(mas), i, 7464 piv, mas->max); 7465 MAS_WARN_ON(mas, piv > mas->max); 7466 } 7467 prev_piv = piv; 7468 if (piv == mas->max) 7469 break; 7470 } 7471 7472 if (mas_data_end(mas) != i) { 7473 pr_err("node" PTR_FMT ": data_end %u != the last slot offset %u\n", 7474 mas_mn(mas), mas_data_end(mas), i); 7475 MT_BUG_ON(mas->tree, 1); 7476 } 7477 7478 for (i += 1; i < mt_slots[type]; i++) { 7479 void *entry = mas_slot(mas, slots, i); 7480 7481 if (entry && (i != mt_slots[type] - 1)) { 7482 pr_err(PTR_FMT "[%u] should not have entry " PTR_FMT "\n", 7483 mas_mn(mas), i, entry); 7484 MT_BUG_ON(mas->tree, entry != NULL); 7485 } 7486 7487 if (i < mt_pivots[type]) { 7488 unsigned long piv = pivots[i]; 7489 7490 if (!piv) 7491 continue; 7492 7493 pr_err(PTR_FMT "[%u] should not have piv %lu\n", 7494 mas_mn(mas), i, piv); 7495 MAS_WARN_ON(mas, i < mt_pivots[type] - 1); 7496 } 7497 } 7498 } 7499 mt_validate_nulls(struct maple_tree * mt)7500 static void mt_validate_nulls(struct maple_tree *mt) 7501 { 7502 void *entry, *last = (void *)1; 7503 unsigned char offset = 0; 7504 void __rcu **slots; 7505 MA_STATE(mas, mt, 0, 0); 7506 7507 mas_start(&mas); 7508 if (mas_is_none(&mas) || (mas_is_ptr(&mas))) 7509 return; 7510 7511 while (!mte_is_leaf(mas.node)) 7512 mas_descend(&mas); 7513 7514 slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node)); 7515 do { 7516 entry = mas_slot(&mas, slots, offset); 7517 if (!last && !entry) { 7518 pr_err("Sequential nulls end at " PTR_FMT "[%u]\n", 7519 mas_mn(&mas), offset); 7520 } 7521 MT_BUG_ON(mt, !last && !entry); 7522 last = entry; 7523 if (offset == mas_data_end(&mas)) { 7524 mas_next_node(&mas, mas_mn(&mas), ULONG_MAX); 7525 if (mas_is_overflow(&mas)) 7526 return; 7527 offset = 0; 7528 slots = ma_slots(mte_to_node(mas.node), 7529 mte_node_type(mas.node)); 7530 } else { 7531 offset++; 7532 } 7533 7534 } while (!mas_is_overflow(&mas)); 7535 } 7536 7537 /* 7538 * validate a maple tree by checking: 7539 * 1. The limits (pivots are within mas->min to mas->max) 7540 * 2. The gap is correctly set in the parents 7541 */ mt_validate(struct maple_tree * mt)7542 void mt_validate(struct maple_tree *mt) 7543 __must_hold(mas->tree->ma_lock) 7544 { 7545 unsigned char end; 7546 7547 MA_STATE(mas, mt, 0, 0); 7548 mas_start(&mas); 7549 if (!mas_is_active(&mas)) 7550 return; 7551 7552 while (!mte_is_leaf(mas.node)) 7553 mas_descend(&mas); 7554 7555 while (!mas_is_overflow(&mas)) { 7556 MAS_WARN_ON(&mas, mte_dead_node(mas.node)); 7557 end = mas_data_end(&mas); 7558 if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) && 7559 (!mte_is_root(mas.node)))) { 7560 pr_err("Invalid size %u of " PTR_FMT "\n", 7561 end, mas_mn(&mas)); 7562 } 7563 7564 mas_validate_parent_slot(&mas); 7565 mas_validate_limits(&mas); 7566 mas_validate_child_slot(&mas); 7567 if (mt_is_alloc(mt)) 7568 mas_validate_gaps(&mas); 7569 mas_dfs_postorder(&mas, ULONG_MAX); 7570 } 7571 mt_validate_nulls(mt); 7572 } 7573 EXPORT_SYMBOL_GPL(mt_validate); 7574 mas_dump(const struct ma_state * mas)7575 void mas_dump(const struct ma_state *mas) 7576 { 7577 pr_err("MAS: tree=" PTR_FMT " enode=" PTR_FMT " ", 7578 mas->tree, mas->node); 7579 switch (mas->status) { 7580 case ma_active: 7581 pr_err("(ma_active)"); 7582 break; 7583 case ma_none: 7584 pr_err("(ma_none)"); 7585 break; 7586 case ma_root: 7587 pr_err("(ma_root)"); 7588 break; 7589 case ma_start: 7590 pr_err("(ma_start) "); 7591 break; 7592 case ma_pause: 7593 pr_err("(ma_pause) "); 7594 break; 7595 case ma_overflow: 7596 pr_err("(ma_overflow) "); 7597 break; 7598 case ma_underflow: 7599 pr_err("(ma_underflow) "); 7600 break; 7601 case ma_error: 7602 pr_err("(ma_error) "); 7603 break; 7604 } 7605 7606 pr_err("Store Type: "); 7607 switch (mas->store_type) { 7608 case wr_invalid: 7609 pr_err("invalid store type\n"); 7610 break; 7611 case wr_new_root: 7612 pr_err("new_root\n"); 7613 break; 7614 case wr_store_root: 7615 pr_err("store_root\n"); 7616 break; 7617 case wr_exact_fit: 7618 pr_err("exact_fit\n"); 7619 break; 7620 case wr_split_store: 7621 pr_err("split_store\n"); 7622 break; 7623 case wr_slot_store: 7624 pr_err("slot_store\n"); 7625 break; 7626 case wr_append: 7627 pr_err("append\n"); 7628 break; 7629 case wr_node_store: 7630 pr_err("node_store\n"); 7631 break; 7632 case wr_spanning_store: 7633 pr_err("spanning_store\n"); 7634 break; 7635 case wr_rebalance: 7636 pr_err("rebalance\n"); 7637 break; 7638 } 7639 7640 pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end, 7641 mas->index, mas->last); 7642 pr_err(" min=%lx max=%lx alloc=" PTR_FMT ", depth=%u, flags=%x\n", 7643 mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags); 7644 if (mas->index > mas->last) 7645 pr_err("Check index & last\n"); 7646 } 7647 EXPORT_SYMBOL_GPL(mas_dump); 7648 mas_wr_dump(const struct ma_wr_state * wr_mas)7649 void mas_wr_dump(const struct ma_wr_state *wr_mas) 7650 { 7651 pr_err("WR_MAS: node=" PTR_FMT " r_min=%lx r_max=%lx\n", 7652 wr_mas->node, wr_mas->r_min, wr_mas->r_max); 7653 pr_err(" type=%u off_end=%u, node_end=%u, end_piv=%lx\n", 7654 wr_mas->type, wr_mas->offset_end, wr_mas->mas->end, 7655 wr_mas->end_piv); 7656 } 7657 EXPORT_SYMBOL_GPL(mas_wr_dump); 7658 7659 #endif /* CONFIG_DEBUG_MAPLE_TREE */ 7660