1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Generic associative array implementation. 3 * 4 * See Documentation/core-api/assoc_array.rst for information. 5 * 6 * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. 7 * Written by David Howells (dhowells@redhat.com) 8 */ 9 //#define DEBUG 10 #include <linux/rcupdate.h> 11 #include <linux/slab.h> 12 #include <linux/err.h> 13 #include <linux/assoc_array_priv.h> 14 15 /* 16 * Iterate over an associative array. The caller must hold the RCU read lock 17 * or better. 18 */ 19 static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root, 20 const struct assoc_array_ptr *stop, 21 int (*iterator)(const void *leaf, 22 void *iterator_data), 23 void *iterator_data) 24 { 25 const struct assoc_array_shortcut *shortcut; 26 const struct assoc_array_node *node; 27 const struct assoc_array_ptr *cursor, *ptr, *parent; 28 unsigned long has_meta; 29 int slot, ret; 30 31 cursor = root; 32 33 begin_node: 34 if (assoc_array_ptr_is_shortcut(cursor)) { 35 /* Descend through a shortcut */ 36 shortcut = assoc_array_ptr_to_shortcut(cursor); 37 cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */ 38 } 39 40 node = assoc_array_ptr_to_node(cursor); 41 slot = 0; 42 43 /* We perform two passes of each node. 44 * 45 * The first pass does all the leaves in this node. This means we 46 * don't miss any leaves if the node is split up by insertion whilst 47 * we're iterating over the branches rooted here (we may, however, see 48 * some leaves twice). 49 */ 50 has_meta = 0; 51 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 52 ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ 53 has_meta |= (unsigned long)ptr; 54 if (ptr && assoc_array_ptr_is_leaf(ptr)) { 55 /* We need a barrier between the read of the pointer, 56 * which is supplied by the above READ_ONCE(). 57 */ 58 /* Invoke the callback */ 59 ret = iterator(assoc_array_ptr_to_leaf(ptr), 60 iterator_data); 61 if (ret) 62 return ret; 63 } 64 } 65 66 /* The second pass attends to all the metadata pointers. If we follow 67 * one of these we may find that we don't come back here, but rather go 68 * back to a replacement node with the leaves in a different layout. 69 * 70 * We are guaranteed to make progress, however, as the slot number for 71 * a particular portion of the key space cannot change - and we 72 * continue at the back pointer + 1. 73 */ 74 if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE)) 75 goto finished_node; 76 slot = 0; 77 78 continue_node: 79 node = assoc_array_ptr_to_node(cursor); 80 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 81 ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ 82 if (assoc_array_ptr_is_meta(ptr)) { 83 cursor = ptr; 84 goto begin_node; 85 } 86 } 87 88 finished_node: 89 /* Move up to the parent (may need to skip back over a shortcut) */ 90 parent = READ_ONCE(node->back_pointer); /* Address dependency. */ 91 slot = node->parent_slot; 92 if (parent == stop) 93 return 0; 94 95 if (assoc_array_ptr_is_shortcut(parent)) { 96 shortcut = assoc_array_ptr_to_shortcut(parent); 97 cursor = parent; 98 parent = READ_ONCE(shortcut->back_pointer); /* Address dependency. */ 99 slot = shortcut->parent_slot; 100 if (parent == stop) 101 return 0; 102 } 103 104 /* Ascend to next slot in parent node */ 105 cursor = parent; 106 slot++; 107 goto continue_node; 108 } 109 110 /** 111 * assoc_array_iterate - Pass all objects in the array to a callback 112 * @array: The array to iterate over. 113 * @iterator: The callback function. 114 * @iterator_data: Private data for the callback function. 115 * 116 * Iterate over all the objects in an associative array. Each one will be 117 * presented to the iterator function. 118 * 119 * If the array is being modified concurrently with the iteration then it is 120 * possible that some objects in the array will be passed to the iterator 121 * callback more than once - though every object should be passed at least 122 * once. If this is undesirable then the caller must lock against modification 123 * for the duration of this function. 124 * 125 * The function will return 0 if no objects were in the array or else it will 126 * return the result of the last iterator function called. Iteration stops 127 * immediately if any call to the iteration function results in a non-zero 128 * return. 129 * 130 * The caller should hold the RCU read lock or better if concurrent 131 * modification is possible. 132 */ 133 int assoc_array_iterate(const struct assoc_array *array, 134 int (*iterator)(const void *object, 135 void *iterator_data), 136 void *iterator_data) 137 { 138 struct assoc_array_ptr *root = READ_ONCE(array->root); /* Address dependency. */ 139 140 if (!root) 141 return 0; 142 return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data); 143 } 144 145 enum assoc_array_walk_status { 146 assoc_array_walk_tree_empty, 147 assoc_array_walk_found_terminal_node, 148 assoc_array_walk_found_wrong_shortcut, 149 }; 150 151 struct assoc_array_walk_result { 152 struct { 153 struct assoc_array_node *node; /* Node in which leaf might be found */ 154 int level; 155 int slot; 156 } terminal_node; 157 struct { 158 struct assoc_array_shortcut *shortcut; 159 int level; 160 int sc_level; 161 unsigned long sc_segments; 162 unsigned long dissimilarity; 163 } wrong_shortcut; 164 }; 165 166 /* 167 * Navigate through the internal tree looking for the closest node to the key. 168 */ 169 static enum assoc_array_walk_status 170 assoc_array_walk(const struct assoc_array *array, 171 const struct assoc_array_ops *ops, 172 const void *index_key, 173 struct assoc_array_walk_result *result) 174 { 175 struct assoc_array_shortcut *shortcut; 176 struct assoc_array_node *node; 177 struct assoc_array_ptr *cursor, *ptr; 178 unsigned long sc_segments, dissimilarity; 179 unsigned long segments; 180 int level, sc_level, next_sc_level; 181 int slot; 182 183 pr_devel("-->%s()\n", __func__); 184 185 cursor = READ_ONCE(array->root); /* Address dependency. */ 186 if (!cursor) 187 return assoc_array_walk_tree_empty; 188 189 level = 0; 190 191 /* Use segments from the key for the new leaf to navigate through the 192 * internal tree, skipping through nodes and shortcuts that are on 193 * route to the destination. Eventually we'll come to a slot that is 194 * either empty or contains a leaf at which point we've found a node in 195 * which the leaf we're looking for might be found or into which it 196 * should be inserted. 197 */ 198 jumped: 199 segments = ops->get_key_chunk(index_key, level); 200 pr_devel("segments[%d]: %lx\n", level, segments); 201 202 if (assoc_array_ptr_is_shortcut(cursor)) 203 goto follow_shortcut; 204 205 consider_node: 206 node = assoc_array_ptr_to_node(cursor); 207 slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); 208 slot &= ASSOC_ARRAY_FAN_MASK; 209 ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ 210 211 pr_devel("consider slot %x [ix=%d type=%lu]\n", 212 slot, level, (unsigned long)ptr & 3); 213 214 if (!assoc_array_ptr_is_meta(ptr)) { 215 /* The node doesn't have a node/shortcut pointer in the slot 216 * corresponding to the index key that we have to follow. 217 */ 218 result->terminal_node.node = node; 219 result->terminal_node.level = level; 220 result->terminal_node.slot = slot; 221 pr_devel("<--%s() = terminal_node\n", __func__); 222 return assoc_array_walk_found_terminal_node; 223 } 224 225 if (assoc_array_ptr_is_node(ptr)) { 226 /* There is a pointer to a node in the slot corresponding to 227 * this index key segment, so we need to follow it. 228 */ 229 cursor = ptr; 230 level += ASSOC_ARRAY_LEVEL_STEP; 231 if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) 232 goto consider_node; 233 goto jumped; 234 } 235 236 /* There is a shortcut in the slot corresponding to the index key 237 * segment. We follow the shortcut if its partial index key matches 238 * this leaf's. Otherwise we need to split the shortcut. 239 */ 240 cursor = ptr; 241 follow_shortcut: 242 shortcut = assoc_array_ptr_to_shortcut(cursor); 243 pr_devel("shortcut to %d\n", shortcut->skip_to_level); 244 sc_level = level + ASSOC_ARRAY_LEVEL_STEP; 245 BUG_ON(sc_level > shortcut->skip_to_level); 246 247 do { 248 /* Check the leaf against the shortcut's index key a word at a 249 * time, trimming the final word (the shortcut stores the index 250 * key completely from the root to the shortcut's target). 251 */ 252 if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0) 253 segments = ops->get_key_chunk(index_key, sc_level); 254 255 sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT]; 256 dissimilarity = segments ^ sc_segments; 257 258 if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) { 259 /* Trim segments that are beyond the shortcut */ 260 int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK; 261 dissimilarity &= ~(ULONG_MAX << shift); 262 next_sc_level = shortcut->skip_to_level; 263 } else { 264 next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE; 265 next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 266 } 267 268 if (dissimilarity != 0) { 269 /* This shortcut points elsewhere */ 270 result->wrong_shortcut.shortcut = shortcut; 271 result->wrong_shortcut.level = level; 272 result->wrong_shortcut.sc_level = sc_level; 273 result->wrong_shortcut.sc_segments = sc_segments; 274 result->wrong_shortcut.dissimilarity = dissimilarity; 275 return assoc_array_walk_found_wrong_shortcut; 276 } 277 278 sc_level = next_sc_level; 279 } while (sc_level < shortcut->skip_to_level); 280 281 /* The shortcut matches the leaf's index to this point. */ 282 cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */ 283 if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { 284 level = sc_level; 285 goto jumped; 286 } else { 287 level = sc_level; 288 goto consider_node; 289 } 290 } 291 292 /** 293 * assoc_array_find - Find an object by index key 294 * @array: The associative array to search. 295 * @ops: The operations to use. 296 * @index_key: The key to the object. 297 * 298 * Find an object in an associative array by walking through the internal tree 299 * to the node that should contain the object and then searching the leaves 300 * there. NULL is returned if the requested object was not found in the array. 301 * 302 * The caller must hold the RCU read lock or better. 303 */ 304 void *assoc_array_find(const struct assoc_array *array, 305 const struct assoc_array_ops *ops, 306 const void *index_key) 307 { 308 struct assoc_array_walk_result result; 309 const struct assoc_array_node *node; 310 const struct assoc_array_ptr *ptr; 311 const void *leaf; 312 int slot; 313 314 if (assoc_array_walk(array, ops, index_key, &result) != 315 assoc_array_walk_found_terminal_node) 316 return NULL; 317 318 node = result.terminal_node.node; 319 320 /* If the target key is available to us, it's has to be pointed to by 321 * the terminal node. 322 */ 323 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 324 ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ 325 if (ptr && assoc_array_ptr_is_leaf(ptr)) { 326 /* We need a barrier between the read of the pointer 327 * and dereferencing the pointer - but only if we are 328 * actually going to dereference it. 329 */ 330 leaf = assoc_array_ptr_to_leaf(ptr); 331 if (ops->compare_object(leaf, index_key)) 332 return (void *)leaf; 333 } 334 } 335 336 return NULL; 337 } 338 339 /* 340 * Destructively iterate over an associative array. The caller must prevent 341 * other simultaneous accesses. 342 */ 343 static void assoc_array_destroy_subtree(struct assoc_array_ptr *root, 344 const struct assoc_array_ops *ops) 345 { 346 struct assoc_array_shortcut *shortcut; 347 struct assoc_array_node *node; 348 struct assoc_array_ptr *cursor, *parent = NULL; 349 int slot = -1; 350 351 pr_devel("-->%s()\n", __func__); 352 353 cursor = root; 354 if (!cursor) { 355 pr_devel("empty\n"); 356 return; 357 } 358 359 move_to_meta: 360 if (assoc_array_ptr_is_shortcut(cursor)) { 361 /* Descend through a shortcut */ 362 pr_devel("[%d] shortcut\n", slot); 363 BUG_ON(!assoc_array_ptr_is_shortcut(cursor)); 364 shortcut = assoc_array_ptr_to_shortcut(cursor); 365 BUG_ON(shortcut->back_pointer != parent); 366 BUG_ON(slot != -1 && shortcut->parent_slot != slot); 367 parent = cursor; 368 cursor = shortcut->next_node; 369 slot = -1; 370 BUG_ON(!assoc_array_ptr_is_node(cursor)); 371 } 372 373 pr_devel("[%d] node\n", slot); 374 node = assoc_array_ptr_to_node(cursor); 375 BUG_ON(node->back_pointer != parent); 376 BUG_ON(slot != -1 && node->parent_slot != slot); 377 slot = 0; 378 379 continue_node: 380 pr_devel("Node %p [back=%p]\n", node, node->back_pointer); 381 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 382 struct assoc_array_ptr *ptr = node->slots[slot]; 383 if (!ptr) 384 continue; 385 if (assoc_array_ptr_is_meta(ptr)) { 386 parent = cursor; 387 cursor = ptr; 388 goto move_to_meta; 389 } 390 391 if (ops) { 392 pr_devel("[%d] free leaf\n", slot); 393 ops->free_object(assoc_array_ptr_to_leaf(ptr)); 394 } 395 } 396 397 parent = node->back_pointer; 398 slot = node->parent_slot; 399 pr_devel("free node\n"); 400 kfree(node); 401 if (!parent) 402 return; /* Done */ 403 404 /* Move back up to the parent (may need to free a shortcut on 405 * the way up) */ 406 if (assoc_array_ptr_is_shortcut(parent)) { 407 shortcut = assoc_array_ptr_to_shortcut(parent); 408 BUG_ON(shortcut->next_node != cursor); 409 cursor = parent; 410 parent = shortcut->back_pointer; 411 slot = shortcut->parent_slot; 412 pr_devel("free shortcut\n"); 413 kfree(shortcut); 414 if (!parent) 415 return; 416 417 BUG_ON(!assoc_array_ptr_is_node(parent)); 418 } 419 420 /* Ascend to next slot in parent node */ 421 pr_devel("ascend to %p[%d]\n", parent, slot); 422 cursor = parent; 423 node = assoc_array_ptr_to_node(cursor); 424 slot++; 425 goto continue_node; 426 } 427 428 /** 429 * assoc_array_destroy - Destroy an associative array 430 * @array: The array to destroy. 431 * @ops: The operations to use. 432 * 433 * Discard all metadata and free all objects in an associative array. The 434 * array will be empty and ready to use again upon completion. This function 435 * cannot fail. 436 * 437 * The caller must prevent all other accesses whilst this takes place as no 438 * attempt is made to adjust pointers gracefully to permit RCU readlock-holding 439 * accesses to continue. On the other hand, no memory allocation is required. 440 */ 441 void assoc_array_destroy(struct assoc_array *array, 442 const struct assoc_array_ops *ops) 443 { 444 assoc_array_destroy_subtree(array->root, ops); 445 array->root = NULL; 446 } 447 448 /* 449 * Handle insertion into an empty tree. 450 */ 451 static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit) 452 { 453 struct assoc_array_node *new_n0; 454 455 pr_devel("-->%s()\n", __func__); 456 457 new_n0 = kzalloc_obj(struct assoc_array_node, GFP_KERNEL); 458 if (!new_n0) 459 return false; 460 461 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 462 edit->leaf_p = &new_n0->slots[0]; 463 edit->adjust_count_on = new_n0; 464 edit->set[0].ptr = &edit->array->root; 465 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 466 467 pr_devel("<--%s() = ok [no root]\n", __func__); 468 return true; 469 } 470 471 /* 472 * Handle insertion into a terminal node. 473 */ 474 static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, 475 const struct assoc_array_ops *ops, 476 const void *index_key, 477 struct assoc_array_walk_result *result) 478 { 479 struct assoc_array_shortcut *shortcut, *new_s0; 480 struct assoc_array_node *node, *new_n0, *new_n1, *side; 481 struct assoc_array_ptr *ptr; 482 unsigned long dissimilarity, base_seg, blank; 483 size_t keylen; 484 bool have_meta; 485 int level, diff; 486 int slot, next_slot, free_slot, i, j; 487 488 node = result->terminal_node.node; 489 level = result->terminal_node.level; 490 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot; 491 492 pr_devel("-->%s()\n", __func__); 493 494 /* We arrived at a node which doesn't have an onward node or shortcut 495 * pointer that we have to follow. This means that (a) the leaf we 496 * want must go here (either by insertion or replacement) or (b) we 497 * need to split this node and insert in one of the fragments. 498 */ 499 free_slot = -1; 500 501 /* Firstly, we have to check the leaves in this node to see if there's 502 * a matching one we should replace in place. 503 */ 504 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 505 ptr = node->slots[i]; 506 if (!ptr) { 507 free_slot = i; 508 continue; 509 } 510 if (assoc_array_ptr_is_leaf(ptr) && 511 ops->compare_object(assoc_array_ptr_to_leaf(ptr), 512 index_key)) { 513 pr_devel("replace in slot %d\n", i); 514 edit->leaf_p = &node->slots[i]; 515 edit->dead_leaf = node->slots[i]; 516 pr_devel("<--%s() = ok [replace]\n", __func__); 517 return true; 518 } 519 } 520 521 /* If there is a free slot in this node then we can just insert the 522 * leaf here. 523 */ 524 if (free_slot >= 0) { 525 pr_devel("insert in free slot %d\n", free_slot); 526 edit->leaf_p = &node->slots[free_slot]; 527 edit->adjust_count_on = node; 528 pr_devel("<--%s() = ok [insert]\n", __func__); 529 return true; 530 } 531 532 /* The node has no spare slots - so we're either going to have to split 533 * it or insert another node before it. 534 * 535 * Whatever, we're going to need at least two new nodes - so allocate 536 * those now. We may also need a new shortcut, but we deal with that 537 * when we need it. 538 */ 539 new_n0 = kzalloc_obj(struct assoc_array_node, GFP_KERNEL); 540 if (!new_n0) 541 return false; 542 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 543 new_n1 = kzalloc_obj(struct assoc_array_node, GFP_KERNEL); 544 if (!new_n1) 545 return false; 546 edit->new_meta[1] = assoc_array_node_to_ptr(new_n1); 547 548 /* We need to find out how similar the leaves are. */ 549 pr_devel("no spare slots\n"); 550 have_meta = false; 551 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 552 ptr = node->slots[i]; 553 if (assoc_array_ptr_is_meta(ptr)) { 554 edit->segment_cache[i] = 0xff; 555 have_meta = true; 556 continue; 557 } 558 base_seg = ops->get_object_key_chunk( 559 assoc_array_ptr_to_leaf(ptr), level); 560 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 561 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; 562 } 563 564 if (have_meta) { 565 pr_devel("have meta\n"); 566 goto split_node; 567 } 568 569 /* The node contains only leaves */ 570 dissimilarity = 0; 571 base_seg = edit->segment_cache[0]; 572 for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++) 573 dissimilarity |= edit->segment_cache[i] ^ base_seg; 574 575 pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity); 576 577 if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) { 578 /* The old leaves all cluster in the same slot. We will need 579 * to insert a shortcut if the new node wants to cluster with them. 580 */ 581 if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0) 582 goto all_leaves_cluster_together; 583 584 /* Otherwise all the old leaves cluster in the same slot, but 585 * the new leaf wants to go into a different slot - so we 586 * create a new node (n0) to hold the new leaf and a pointer to 587 * a new node (n1) holding all the old leaves. 588 * 589 * This can be done by falling through to the node splitting 590 * path. 591 */ 592 pr_devel("present leaves cluster but not new leaf\n"); 593 } 594 595 split_node: 596 pr_devel("split node\n"); 597 598 /* We need to split the current node. The node must contain anything 599 * from a single leaf (in the one leaf case, this leaf will cluster 600 * with the new leaf) and the rest meta-pointers, to all leaves, some 601 * of which may cluster. 602 * 603 * It won't contain the case in which all the current leaves plus the 604 * new leaves want to cluster in the same slot. 605 * 606 * We need to expel at least two leaves out of a set consisting of the 607 * leaves in the node and the new leaf. The current meta pointers can 608 * just be copied as they shouldn't cluster with any of the leaves. 609 * 610 * We need a new node (n0) to replace the current one and a new node to 611 * take the expelled nodes (n1). 612 */ 613 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 614 new_n0->back_pointer = node->back_pointer; 615 new_n0->parent_slot = node->parent_slot; 616 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 617 new_n1->parent_slot = -1; /* Need to calculate this */ 618 619 do_split_node: 620 pr_devel("do_split_node\n"); 621 622 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 623 new_n1->nr_leaves_on_branch = 0; 624 625 /* Begin by finding two matching leaves. There have to be at least two 626 * that match - even if there are meta pointers - because any leaf that 627 * would match a slot with a meta pointer in it must be somewhere 628 * behind that meta pointer and cannot be here. Further, given N 629 * remaining leaf slots, we now have N+1 leaves to go in them. 630 */ 631 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 632 slot = edit->segment_cache[i]; 633 if (slot != 0xff) 634 for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++) 635 if (edit->segment_cache[j] == slot) 636 goto found_slot_for_multiple_occupancy; 637 } 638 found_slot_for_multiple_occupancy: 639 pr_devel("same slot: %x %x [%02x]\n", i, j, slot); 640 BUG_ON(i >= ASSOC_ARRAY_FAN_OUT); 641 BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1); 642 BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT); 643 644 new_n1->parent_slot = slot; 645 646 /* Metadata pointers cannot change slot */ 647 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) 648 if (assoc_array_ptr_is_meta(node->slots[i])) 649 new_n0->slots[i] = node->slots[i]; 650 else 651 new_n0->slots[i] = NULL; 652 BUG_ON(new_n0->slots[slot] != NULL); 653 new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1); 654 655 /* Filter the leaf pointers between the new nodes */ 656 free_slot = -1; 657 next_slot = 0; 658 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 659 if (assoc_array_ptr_is_meta(node->slots[i])) 660 continue; 661 if (edit->segment_cache[i] == slot) { 662 new_n1->slots[next_slot++] = node->slots[i]; 663 new_n1->nr_leaves_on_branch++; 664 } else { 665 do { 666 free_slot++; 667 } while (new_n0->slots[free_slot] != NULL); 668 new_n0->slots[free_slot] = node->slots[i]; 669 } 670 } 671 672 pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot); 673 674 if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) { 675 do { 676 free_slot++; 677 } while (new_n0->slots[free_slot] != NULL); 678 edit->leaf_p = &new_n0->slots[free_slot]; 679 edit->adjust_count_on = new_n0; 680 } else { 681 edit->leaf_p = &new_n1->slots[next_slot++]; 682 edit->adjust_count_on = new_n1; 683 } 684 685 BUG_ON(next_slot <= 1); 686 687 edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0); 688 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 689 if (edit->segment_cache[i] == 0xff) { 690 ptr = node->slots[i]; 691 BUG_ON(assoc_array_ptr_is_leaf(ptr)); 692 if (assoc_array_ptr_is_node(ptr)) { 693 side = assoc_array_ptr_to_node(ptr); 694 edit->set_backpointers[i] = &side->back_pointer; 695 } else { 696 shortcut = assoc_array_ptr_to_shortcut(ptr); 697 edit->set_backpointers[i] = &shortcut->back_pointer; 698 } 699 } 700 } 701 702 ptr = node->back_pointer; 703 if (!ptr) 704 edit->set[0].ptr = &edit->array->root; 705 else if (assoc_array_ptr_is_node(ptr)) 706 edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot]; 707 else 708 edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node; 709 edit->excised_meta[0] = assoc_array_node_to_ptr(node); 710 pr_devel("<--%s() = ok [split node]\n", __func__); 711 return true; 712 713 all_leaves_cluster_together: 714 /* All the leaves, new and old, want to cluster together in this node 715 * in the same slot, so we have to replace this node with a shortcut to 716 * skip over the identical parts of the key and then place a pair of 717 * nodes, one inside the other, at the end of the shortcut and 718 * distribute the keys between them. 719 * 720 * Firstly we need to work out where the leaves start diverging as a 721 * bit position into their keys so that we know how big the shortcut 722 * needs to be. 723 * 724 * We only need to make a single pass of N of the N+1 leaves because if 725 * any keys differ between themselves at bit X then at least one of 726 * them must also differ with the base key at bit X or before. 727 */ 728 pr_devel("all leaves cluster together\n"); 729 diff = INT_MAX; 730 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 731 int x = ops->diff_objects(assoc_array_ptr_to_leaf(node->slots[i]), 732 index_key); 733 if (x < diff) { 734 BUG_ON(x < 0); 735 diff = x; 736 } 737 } 738 BUG_ON(diff == INT_MAX); 739 BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP); 740 741 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); 742 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 743 744 new_s0 = kzalloc_flex(*new_s0, index_key, keylen, GFP_KERNEL); 745 if (!new_s0) 746 return false; 747 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0); 748 749 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); 750 new_s0->back_pointer = node->back_pointer; 751 new_s0->parent_slot = node->parent_slot; 752 new_s0->next_node = assoc_array_node_to_ptr(new_n0); 753 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); 754 new_n0->parent_slot = 0; 755 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 756 new_n1->parent_slot = -1; /* Need to calculate this */ 757 758 new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK; 759 pr_devel("skip_to_level = %d [diff %d]\n", level, diff); 760 BUG_ON(level <= 0); 761 762 for (i = 0; i < keylen; i++) 763 new_s0->index_key[i] = 764 ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE); 765 766 if (level & ASSOC_ARRAY_KEY_CHUNK_MASK) { 767 blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); 768 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); 769 new_s0->index_key[keylen - 1] &= ~blank; 770 } 771 772 /* This now reduces to a node splitting exercise for which we'll need 773 * to regenerate the disparity table. 774 */ 775 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 776 ptr = node->slots[i]; 777 base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr), 778 level); 779 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 780 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; 781 } 782 783 base_seg = ops->get_key_chunk(index_key, level); 784 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 785 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK; 786 goto do_split_node; 787 } 788 789 /* 790 * Handle insertion into the middle of a shortcut. 791 */ 792 static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, 793 const struct assoc_array_ops *ops, 794 struct assoc_array_walk_result *result) 795 { 796 struct assoc_array_shortcut *shortcut, *new_s0, *new_s1; 797 struct assoc_array_node *node, *new_n0, *side; 798 unsigned long sc_segments, dissimilarity, blank; 799 size_t keylen; 800 int level, sc_level, diff; 801 int sc_slot; 802 803 shortcut = result->wrong_shortcut.shortcut; 804 level = result->wrong_shortcut.level; 805 sc_level = result->wrong_shortcut.sc_level; 806 sc_segments = result->wrong_shortcut.sc_segments; 807 dissimilarity = result->wrong_shortcut.dissimilarity; 808 809 pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n", 810 __func__, level, dissimilarity, sc_level); 811 812 /* We need to split a shortcut and insert a node between the two 813 * pieces. Zero-length pieces will be dispensed with entirely. 814 * 815 * First of all, we need to find out in which level the first 816 * difference was. 817 */ 818 diff = __ffs(dissimilarity); 819 diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK; 820 diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK; 821 pr_devel("diff=%d\n", diff); 822 823 if (!shortcut->back_pointer) { 824 edit->set[0].ptr = &edit->array->root; 825 } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) { 826 node = assoc_array_ptr_to_node(shortcut->back_pointer); 827 edit->set[0].ptr = &node->slots[shortcut->parent_slot]; 828 } else { 829 BUG(); 830 } 831 832 edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut); 833 834 /* Create a new node now since we're going to need it anyway */ 835 new_n0 = kzalloc_obj(struct assoc_array_node, GFP_KERNEL); 836 if (!new_n0) 837 return false; 838 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 839 edit->adjust_count_on = new_n0; 840 841 /* Insert a new shortcut before the new node if this segment isn't of 842 * zero length - otherwise we just connect the new node directly to the 843 * parent. 844 */ 845 level += ASSOC_ARRAY_LEVEL_STEP; 846 if (diff > level) { 847 pr_devel("pre-shortcut %d...%d\n", level, diff); 848 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); 849 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 850 851 new_s0 = kzalloc_flex(*new_s0, index_key, keylen, GFP_KERNEL); 852 if (!new_s0) 853 return false; 854 edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0); 855 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); 856 new_s0->back_pointer = shortcut->back_pointer; 857 new_s0->parent_slot = shortcut->parent_slot; 858 new_s0->next_node = assoc_array_node_to_ptr(new_n0); 859 new_s0->skip_to_level = diff; 860 861 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); 862 new_n0->parent_slot = 0; 863 864 memcpy(new_s0->index_key, shortcut->index_key, 865 flex_array_size(new_s0, index_key, keylen)); 866 867 blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); 868 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank); 869 new_s0->index_key[keylen - 1] &= ~blank; 870 } else { 871 pr_devel("no pre-shortcut\n"); 872 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 873 new_n0->back_pointer = shortcut->back_pointer; 874 new_n0->parent_slot = shortcut->parent_slot; 875 } 876 877 side = assoc_array_ptr_to_node(shortcut->next_node); 878 new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch; 879 880 /* We need to know which slot in the new node is going to take a 881 * metadata pointer. 882 */ 883 sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); 884 sc_slot &= ASSOC_ARRAY_FAN_MASK; 885 886 pr_devel("new slot %lx >> %d -> %d\n", 887 sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot); 888 889 /* Determine whether we need to follow the new node with a replacement 890 * for the current shortcut. We could in theory reuse the current 891 * shortcut if its parent slot number doesn't change - but that's a 892 * 1-in-16 chance so not worth expending the code upon. 893 */ 894 level = diff + ASSOC_ARRAY_LEVEL_STEP; 895 if (level < shortcut->skip_to_level) { 896 pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level); 897 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 898 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 899 900 new_s1 = kzalloc_flex(*new_s1, index_key, keylen, GFP_KERNEL); 901 if (!new_s1) 902 return false; 903 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1); 904 905 new_s1->back_pointer = assoc_array_node_to_ptr(new_n0); 906 new_s1->parent_slot = sc_slot; 907 new_s1->next_node = shortcut->next_node; 908 new_s1->skip_to_level = shortcut->skip_to_level; 909 910 new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1); 911 912 memcpy(new_s1->index_key, shortcut->index_key, 913 flex_array_size(new_s1, index_key, keylen)); 914 915 edit->set[1].ptr = &side->back_pointer; 916 edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1); 917 } else { 918 pr_devel("no post-shortcut\n"); 919 920 /* We don't have to replace the pointed-to node as long as we 921 * use memory barriers to make sure the parent slot number is 922 * changed before the back pointer (the parent slot number is 923 * irrelevant to the old parent shortcut). 924 */ 925 new_n0->slots[sc_slot] = shortcut->next_node; 926 edit->set_parent_slot[0].p = &side->parent_slot; 927 edit->set_parent_slot[0].to = sc_slot; 928 edit->set[1].ptr = &side->back_pointer; 929 edit->set[1].to = assoc_array_node_to_ptr(new_n0); 930 } 931 932 /* Install the new leaf in a spare slot in the new node. */ 933 if (sc_slot == 0) 934 edit->leaf_p = &new_n0->slots[1]; 935 else 936 edit->leaf_p = &new_n0->slots[0]; 937 938 pr_devel("<--%s() = ok [split shortcut]\n", __func__); 939 return true; 940 } 941 942 /** 943 * assoc_array_insert - Script insertion of an object into an associative array 944 * @array: The array to insert into. 945 * @ops: The operations to use. 946 * @index_key: The key to insert at. 947 * @object: The object to insert. 948 * 949 * Precalculate and preallocate a script for the insertion or replacement of an 950 * object in an associative array. This results in an edit script that can 951 * either be applied or cancelled. 952 * 953 * The function returns a pointer to an edit script or -ENOMEM. 954 * 955 * The caller should lock against other modifications and must continue to hold 956 * the lock until assoc_array_apply_edit() has been called. 957 * 958 * Accesses to the tree may take place concurrently with this function, 959 * provided they hold the RCU read lock. 960 */ 961 struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, 962 const struct assoc_array_ops *ops, 963 const void *index_key, 964 void *object) 965 { 966 struct assoc_array_walk_result result; 967 struct assoc_array_edit *edit; 968 969 pr_devel("-->%s()\n", __func__); 970 971 /* The leaf pointer we're given must not have the bottom bit set as we 972 * use those for type-marking the pointer. NULL pointers are also not 973 * allowed as they indicate an empty slot but we have to allow them 974 * here as they can be updated later. 975 */ 976 BUG_ON(assoc_array_ptr_is_meta(object)); 977 978 edit = kzalloc_obj(struct assoc_array_edit, GFP_KERNEL); 979 if (!edit) 980 return ERR_PTR(-ENOMEM); 981 edit->array = array; 982 edit->ops = ops; 983 edit->leaf = assoc_array_leaf_to_ptr(object); 984 edit->adjust_count_by = 1; 985 986 switch (assoc_array_walk(array, ops, index_key, &result)) { 987 case assoc_array_walk_tree_empty: 988 /* Allocate a root node if there isn't one yet */ 989 if (!assoc_array_insert_in_empty_tree(edit)) 990 goto enomem; 991 return edit; 992 993 case assoc_array_walk_found_terminal_node: 994 /* We found a node that doesn't have a node/shortcut pointer in 995 * the slot corresponding to the index key that we have to 996 * follow. 997 */ 998 if (!assoc_array_insert_into_terminal_node(edit, ops, index_key, 999 &result)) 1000 goto enomem; 1001 return edit; 1002 1003 case assoc_array_walk_found_wrong_shortcut: 1004 /* We found a shortcut that didn't match our key in a slot we 1005 * needed to follow. 1006 */ 1007 if (!assoc_array_insert_mid_shortcut(edit, ops, &result)) 1008 goto enomem; 1009 return edit; 1010 } 1011 1012 enomem: 1013 /* Clean up after an out of memory error */ 1014 pr_devel("enomem\n"); 1015 assoc_array_cancel_edit(edit); 1016 return ERR_PTR(-ENOMEM); 1017 } 1018 1019 /** 1020 * assoc_array_insert_set_object - Set the new object pointer in an edit script 1021 * @edit: The edit script to modify. 1022 * @object: The object pointer to set. 1023 * 1024 * Change the object to be inserted in an edit script. The object pointed to 1025 * by the old object is not freed. This must be done prior to applying the 1026 * script. 1027 */ 1028 void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object) 1029 { 1030 BUG_ON(!object); 1031 edit->leaf = assoc_array_leaf_to_ptr(object); 1032 } 1033 1034 struct assoc_array_delete_collapse_context { 1035 struct assoc_array_node *node; 1036 const void *skip_leaf; 1037 int slot; 1038 }; 1039 1040 /* 1041 * Subtree collapse to node iterator. 1042 */ 1043 static int assoc_array_delete_collapse_iterator(const void *leaf, 1044 void *iterator_data) 1045 { 1046 struct assoc_array_delete_collapse_context *collapse = iterator_data; 1047 1048 if (leaf == collapse->skip_leaf) 1049 return 0; 1050 1051 BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT); 1052 1053 collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf); 1054 return 0; 1055 } 1056 1057 /** 1058 * assoc_array_delete - Script deletion of an object from an associative array 1059 * @array: The array to search. 1060 * @ops: The operations to use. 1061 * @index_key: The key to the object. 1062 * 1063 * Precalculate and preallocate a script for the deletion of an object from an 1064 * associative array. This results in an edit script that can either be 1065 * applied or cancelled. 1066 * 1067 * The function returns a pointer to an edit script if the object was found, 1068 * NULL if the object was not found or -ENOMEM. 1069 * 1070 * The caller should lock against other modifications and must continue to hold 1071 * the lock until assoc_array_apply_edit() has been called. 1072 * 1073 * Accesses to the tree may take place concurrently with this function, 1074 * provided they hold the RCU read lock. 1075 */ 1076 struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, 1077 const struct assoc_array_ops *ops, 1078 const void *index_key) 1079 { 1080 struct assoc_array_delete_collapse_context collapse; 1081 struct assoc_array_walk_result result; 1082 struct assoc_array_node *node, *new_n0; 1083 struct assoc_array_edit *edit; 1084 struct assoc_array_ptr *ptr; 1085 bool has_meta; 1086 int slot, i; 1087 1088 pr_devel("-->%s()\n", __func__); 1089 1090 edit = kzalloc_obj(struct assoc_array_edit, GFP_KERNEL); 1091 if (!edit) 1092 return ERR_PTR(-ENOMEM); 1093 edit->array = array; 1094 edit->ops = ops; 1095 edit->adjust_count_by = -1; 1096 1097 switch (assoc_array_walk(array, ops, index_key, &result)) { 1098 case assoc_array_walk_found_terminal_node: 1099 /* We found a node that should contain the leaf we've been 1100 * asked to remove - *if* it's in the tree. 1101 */ 1102 pr_devel("terminal_node\n"); 1103 node = result.terminal_node.node; 1104 1105 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1106 ptr = node->slots[slot]; 1107 if (ptr && 1108 assoc_array_ptr_is_leaf(ptr) && 1109 ops->compare_object(assoc_array_ptr_to_leaf(ptr), 1110 index_key)) 1111 goto found_leaf; 1112 } 1113 fallthrough; 1114 case assoc_array_walk_tree_empty: 1115 case assoc_array_walk_found_wrong_shortcut: 1116 default: 1117 assoc_array_cancel_edit(edit); 1118 pr_devel("not found\n"); 1119 return NULL; 1120 } 1121 1122 found_leaf: 1123 BUG_ON(array->nr_leaves_on_tree <= 0); 1124 1125 /* In the simplest form of deletion we just clear the slot and release 1126 * the leaf after a suitable interval. 1127 */ 1128 edit->dead_leaf = node->slots[slot]; 1129 edit->set[0].ptr = &node->slots[slot]; 1130 edit->set[0].to = NULL; 1131 edit->adjust_count_on = node; 1132 1133 /* If that concludes erasure of the last leaf, then delete the entire 1134 * internal array. 1135 */ 1136 if (array->nr_leaves_on_tree == 1) { 1137 edit->set[1].ptr = &array->root; 1138 edit->set[1].to = NULL; 1139 edit->adjust_count_on = NULL; 1140 edit->excised_subtree = array->root; 1141 pr_devel("all gone\n"); 1142 return edit; 1143 } 1144 1145 /* However, we'd also like to clear up some metadata blocks if we 1146 * possibly can. 1147 * 1148 * We go for a simple algorithm of: if this node has FAN_OUT or fewer 1149 * leaves in it, then attempt to collapse it - and attempt to 1150 * recursively collapse up the tree. 1151 * 1152 * We could also try and collapse in partially filled subtrees to take 1153 * up space in this node. 1154 */ 1155 if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { 1156 struct assoc_array_node *parent, *grandparent; 1157 struct assoc_array_ptr *ptr; 1158 1159 /* First of all, we need to know if this node has metadata so 1160 * that we don't try collapsing if all the leaves are already 1161 * here. 1162 */ 1163 has_meta = false; 1164 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 1165 ptr = node->slots[i]; 1166 if (assoc_array_ptr_is_meta(ptr)) { 1167 has_meta = true; 1168 break; 1169 } 1170 } 1171 1172 pr_devel("leaves: %ld [m=%d]\n", 1173 node->nr_leaves_on_branch - 1, has_meta); 1174 1175 /* Look further up the tree to see if we can collapse this node 1176 * into a more proximal node too. 1177 */ 1178 parent = node; 1179 collapse_up: 1180 pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch); 1181 1182 ptr = parent->back_pointer; 1183 if (!ptr) 1184 goto do_collapse; 1185 if (assoc_array_ptr_is_shortcut(ptr)) { 1186 struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr); 1187 ptr = s->back_pointer; 1188 if (!ptr) 1189 goto do_collapse; 1190 } 1191 1192 grandparent = assoc_array_ptr_to_node(ptr); 1193 if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { 1194 parent = grandparent; 1195 goto collapse_up; 1196 } 1197 1198 do_collapse: 1199 /* There's no point collapsing if the original node has no meta 1200 * pointers to discard and if we didn't merge into one of that 1201 * node's ancestry. 1202 */ 1203 if (has_meta || parent != node) { 1204 node = parent; 1205 1206 /* Create a new node to collapse into */ 1207 new_n0 = kzalloc_obj(struct assoc_array_node, 1208 GFP_KERNEL); 1209 if (!new_n0) 1210 goto enomem; 1211 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 1212 1213 new_n0->back_pointer = node->back_pointer; 1214 new_n0->parent_slot = node->parent_slot; 1215 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 1216 edit->adjust_count_on = new_n0; 1217 1218 collapse.node = new_n0; 1219 collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf); 1220 collapse.slot = 0; 1221 assoc_array_subtree_iterate(assoc_array_node_to_ptr(node), 1222 node->back_pointer, 1223 assoc_array_delete_collapse_iterator, 1224 &collapse); 1225 pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch); 1226 BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1); 1227 1228 if (!node->back_pointer) { 1229 edit->set[1].ptr = &array->root; 1230 } else if (assoc_array_ptr_is_leaf(node->back_pointer)) { 1231 BUG(); 1232 } else if (assoc_array_ptr_is_node(node->back_pointer)) { 1233 struct assoc_array_node *p = 1234 assoc_array_ptr_to_node(node->back_pointer); 1235 edit->set[1].ptr = &p->slots[node->parent_slot]; 1236 } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) { 1237 struct assoc_array_shortcut *s = 1238 assoc_array_ptr_to_shortcut(node->back_pointer); 1239 edit->set[1].ptr = &s->next_node; 1240 } 1241 edit->set[1].to = assoc_array_node_to_ptr(new_n0); 1242 edit->excised_subtree = assoc_array_node_to_ptr(node); 1243 } 1244 } 1245 1246 return edit; 1247 1248 enomem: 1249 /* Clean up after an out of memory error */ 1250 pr_devel("enomem\n"); 1251 assoc_array_cancel_edit(edit); 1252 return ERR_PTR(-ENOMEM); 1253 } 1254 1255 /** 1256 * assoc_array_clear - Script deletion of all objects from an associative array 1257 * @array: The array to clear. 1258 * @ops: The operations to use. 1259 * 1260 * Precalculate and preallocate a script for the deletion of all the objects 1261 * from an associative array. This results in an edit script that can either 1262 * be applied or cancelled. 1263 * 1264 * The function returns a pointer to an edit script if there are objects to be 1265 * deleted, NULL if there are no objects in the array or -ENOMEM. 1266 * 1267 * The caller should lock against other modifications and must continue to hold 1268 * the lock until assoc_array_apply_edit() has been called. 1269 * 1270 * Accesses to the tree may take place concurrently with this function, 1271 * provided they hold the RCU read lock. 1272 */ 1273 struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, 1274 const struct assoc_array_ops *ops) 1275 { 1276 struct assoc_array_edit *edit; 1277 1278 pr_devel("-->%s()\n", __func__); 1279 1280 if (!array->root) 1281 return NULL; 1282 1283 edit = kzalloc_obj(struct assoc_array_edit, GFP_KERNEL); 1284 if (!edit) 1285 return ERR_PTR(-ENOMEM); 1286 edit->array = array; 1287 edit->ops = ops; 1288 edit->set[1].ptr = &array->root; 1289 edit->set[1].to = NULL; 1290 edit->excised_subtree = array->root; 1291 edit->ops_for_excised_subtree = ops; 1292 pr_devel("all gone\n"); 1293 return edit; 1294 } 1295 1296 /* 1297 * Handle the deferred destruction after an applied edit. 1298 */ 1299 static void assoc_array_rcu_cleanup(struct rcu_head *head) 1300 { 1301 struct assoc_array_edit *edit = 1302 container_of(head, struct assoc_array_edit, rcu); 1303 int i; 1304 1305 pr_devel("-->%s()\n", __func__); 1306 1307 if (edit->dead_leaf) 1308 edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf)); 1309 for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++) 1310 if (edit->excised_meta[i]) 1311 kfree(assoc_array_ptr_to_node(edit->excised_meta[i])); 1312 1313 if (edit->excised_subtree) { 1314 BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree)); 1315 if (assoc_array_ptr_is_node(edit->excised_subtree)) { 1316 struct assoc_array_node *n = 1317 assoc_array_ptr_to_node(edit->excised_subtree); 1318 n->back_pointer = NULL; 1319 } else { 1320 struct assoc_array_shortcut *s = 1321 assoc_array_ptr_to_shortcut(edit->excised_subtree); 1322 s->back_pointer = NULL; 1323 } 1324 assoc_array_destroy_subtree(edit->excised_subtree, 1325 edit->ops_for_excised_subtree); 1326 } 1327 1328 kfree(edit); 1329 } 1330 1331 /** 1332 * assoc_array_apply_edit - Apply an edit script to an associative array 1333 * @edit: The script to apply. 1334 * 1335 * Apply an edit script to an associative array to effect an insertion, 1336 * deletion or clearance. As the edit script includes preallocated memory, 1337 * this is guaranteed not to fail. 1338 * 1339 * The edit script, dead objects and dead metadata will be scheduled for 1340 * destruction after an RCU grace period to permit those doing read-only 1341 * accesses on the array to continue to do so under the RCU read lock whilst 1342 * the edit is taking place. 1343 */ 1344 void assoc_array_apply_edit(struct assoc_array_edit *edit) 1345 { 1346 struct assoc_array_shortcut *shortcut; 1347 struct assoc_array_node *node; 1348 struct assoc_array_ptr *ptr; 1349 int i; 1350 1351 pr_devel("-->%s()\n", __func__); 1352 1353 smp_wmb(); 1354 if (edit->leaf_p) 1355 *edit->leaf_p = edit->leaf; 1356 1357 smp_wmb(); 1358 for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++) 1359 if (edit->set_parent_slot[i].p) 1360 *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to; 1361 1362 smp_wmb(); 1363 for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++) 1364 if (edit->set_backpointers[i]) 1365 *edit->set_backpointers[i] = edit->set_backpointers_to; 1366 1367 smp_wmb(); 1368 for (i = 0; i < ARRAY_SIZE(edit->set); i++) 1369 if (edit->set[i].ptr) 1370 *edit->set[i].ptr = edit->set[i].to; 1371 1372 if (edit->array->root == NULL) { 1373 edit->array->nr_leaves_on_tree = 0; 1374 } else if (edit->adjust_count_on) { 1375 node = edit->adjust_count_on; 1376 for (;;) { 1377 node->nr_leaves_on_branch += edit->adjust_count_by; 1378 1379 ptr = node->back_pointer; 1380 if (!ptr) 1381 break; 1382 if (assoc_array_ptr_is_shortcut(ptr)) { 1383 shortcut = assoc_array_ptr_to_shortcut(ptr); 1384 ptr = shortcut->back_pointer; 1385 if (!ptr) 1386 break; 1387 } 1388 BUG_ON(!assoc_array_ptr_is_node(ptr)); 1389 node = assoc_array_ptr_to_node(ptr); 1390 } 1391 1392 edit->array->nr_leaves_on_tree += edit->adjust_count_by; 1393 } 1394 1395 call_rcu(&edit->rcu, assoc_array_rcu_cleanup); 1396 } 1397 1398 /** 1399 * assoc_array_cancel_edit - Discard an edit script. 1400 * @edit: The script to discard. 1401 * 1402 * Free an edit script and all the preallocated data it holds without making 1403 * any changes to the associative array it was intended for. 1404 * 1405 * NOTE! In the case of an insertion script, this does _not_ release the leaf 1406 * that was to be inserted. That is left to the caller. 1407 */ 1408 void assoc_array_cancel_edit(struct assoc_array_edit *edit) 1409 { 1410 struct assoc_array_ptr *ptr; 1411 int i; 1412 1413 pr_devel("-->%s()\n", __func__); 1414 1415 /* Clean up after an out of memory error */ 1416 for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) { 1417 ptr = edit->new_meta[i]; 1418 if (ptr) { 1419 if (assoc_array_ptr_is_node(ptr)) 1420 kfree(assoc_array_ptr_to_node(ptr)); 1421 else 1422 kfree(assoc_array_ptr_to_shortcut(ptr)); 1423 } 1424 } 1425 kfree(edit); 1426 } 1427 1428 /** 1429 * assoc_array_gc - Garbage collect an associative array. 1430 * @array: The array to clean. 1431 * @ops: The operations to use. 1432 * @iterator: A callback function to pass judgement on each object. 1433 * @iterator_data: Private data for the callback function. 1434 * 1435 * Collect garbage from an associative array and pack down the internal tree to 1436 * save memory. 1437 * 1438 * The iterator function is asked to pass judgement upon each object in the 1439 * array. If it returns false, the object is discard and if it returns true, 1440 * the object is kept. If it returns true, it must increment the object's 1441 * usage count (or whatever it needs to do to retain it) before returning. 1442 * 1443 * This function returns 0 if successful or -ENOMEM if out of memory. In the 1444 * latter case, the array is not changed. 1445 * 1446 * The caller should lock against other modifications and must continue to hold 1447 * the lock until assoc_array_apply_edit() has been called. 1448 * 1449 * Accesses to the tree may take place concurrently with this function, 1450 * provided they hold the RCU read lock. 1451 */ 1452 int assoc_array_gc(struct assoc_array *array, 1453 const struct assoc_array_ops *ops, 1454 bool (*iterator)(void *object, void *iterator_data), 1455 void *iterator_data) 1456 { 1457 struct assoc_array_shortcut *shortcut, *new_s; 1458 struct assoc_array_node *node, *new_n; 1459 struct assoc_array_edit *edit; 1460 struct assoc_array_ptr *cursor, *ptr; 1461 struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp; 1462 unsigned long nr_leaves_on_tree; 1463 bool retained; 1464 int keylen, slot, nr_free, next_slot, i; 1465 1466 pr_devel("-->%s()\n", __func__); 1467 1468 if (!array->root) 1469 return 0; 1470 1471 edit = kzalloc_obj(struct assoc_array_edit, GFP_KERNEL); 1472 if (!edit) 1473 return -ENOMEM; 1474 edit->array = array; 1475 edit->ops = ops; 1476 edit->ops_for_excised_subtree = ops; 1477 edit->set[0].ptr = &array->root; 1478 edit->excised_subtree = array->root; 1479 1480 new_root = new_parent = NULL; 1481 new_ptr_pp = &new_root; 1482 cursor = array->root; 1483 1484 descend: 1485 /* If this point is a shortcut, then we need to duplicate it and 1486 * advance the target cursor. 1487 */ 1488 if (assoc_array_ptr_is_shortcut(cursor)) { 1489 shortcut = assoc_array_ptr_to_shortcut(cursor); 1490 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 1491 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 1492 new_s = kmalloc_flex(*new_s, index_key, keylen, GFP_KERNEL); 1493 if (!new_s) 1494 goto enomem; 1495 pr_devel("dup shortcut %p -> %p\n", shortcut, new_s); 1496 memcpy(new_s, shortcut, struct_size(new_s, index_key, keylen)); 1497 new_s->back_pointer = new_parent; 1498 new_s->parent_slot = shortcut->parent_slot; 1499 *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s); 1500 new_ptr_pp = &new_s->next_node; 1501 cursor = shortcut->next_node; 1502 } 1503 1504 /* Duplicate the node at this position */ 1505 node = assoc_array_ptr_to_node(cursor); 1506 new_n = kzalloc_obj(struct assoc_array_node, GFP_KERNEL); 1507 if (!new_n) 1508 goto enomem; 1509 pr_devel("dup node %p -> %p\n", node, new_n); 1510 new_n->back_pointer = new_parent; 1511 new_n->parent_slot = node->parent_slot; 1512 *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n); 1513 new_ptr_pp = NULL; 1514 slot = 0; 1515 1516 continue_node: 1517 /* Filter across any leaves and gc any subtrees */ 1518 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1519 ptr = node->slots[slot]; 1520 if (!ptr) 1521 continue; 1522 1523 if (assoc_array_ptr_is_leaf(ptr)) { 1524 if (iterator(assoc_array_ptr_to_leaf(ptr), 1525 iterator_data)) 1526 /* The iterator will have done any reference 1527 * counting on the object for us. 1528 */ 1529 new_n->slots[slot] = ptr; 1530 continue; 1531 } 1532 1533 new_ptr_pp = &new_n->slots[slot]; 1534 cursor = ptr; 1535 goto descend; 1536 } 1537 1538 retry_compress: 1539 pr_devel("-- compress node %p --\n", new_n); 1540 1541 /* Count up the number of empty slots in this node and work out the 1542 * subtree leaf count. 1543 */ 1544 new_n->nr_leaves_on_branch = 0; 1545 nr_free = 0; 1546 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1547 ptr = new_n->slots[slot]; 1548 if (!ptr) 1549 nr_free++; 1550 else if (assoc_array_ptr_is_leaf(ptr)) 1551 new_n->nr_leaves_on_branch++; 1552 } 1553 pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch); 1554 1555 /* See what we can fold in */ 1556 retained = false; 1557 next_slot = 0; 1558 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1559 struct assoc_array_shortcut *s; 1560 struct assoc_array_node *child; 1561 1562 ptr = new_n->slots[slot]; 1563 if (!ptr || assoc_array_ptr_is_leaf(ptr)) 1564 continue; 1565 1566 s = NULL; 1567 if (assoc_array_ptr_is_shortcut(ptr)) { 1568 s = assoc_array_ptr_to_shortcut(ptr); 1569 ptr = s->next_node; 1570 } 1571 1572 child = assoc_array_ptr_to_node(ptr); 1573 new_n->nr_leaves_on_branch += child->nr_leaves_on_branch; 1574 1575 if (child->nr_leaves_on_branch <= nr_free + 1) { 1576 /* Fold the child node into this one */ 1577 pr_devel("[%d] fold node %lu/%d [nx %d]\n", 1578 slot, child->nr_leaves_on_branch, nr_free + 1, 1579 next_slot); 1580 1581 /* We would already have reaped an intervening shortcut 1582 * on the way back up the tree. 1583 */ 1584 BUG_ON(s); 1585 1586 new_n->slots[slot] = NULL; 1587 nr_free++; 1588 if (slot < next_slot) 1589 next_slot = slot; 1590 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 1591 struct assoc_array_ptr *p = child->slots[i]; 1592 if (!p) 1593 continue; 1594 BUG_ON(assoc_array_ptr_is_meta(p)); 1595 while (new_n->slots[next_slot]) 1596 next_slot++; 1597 BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT); 1598 new_n->slots[next_slot++] = p; 1599 nr_free--; 1600 } 1601 kfree(child); 1602 } else { 1603 pr_devel("[%d] retain node %lu/%d [nx %d]\n", 1604 slot, child->nr_leaves_on_branch, nr_free + 1, 1605 next_slot); 1606 retained = true; 1607 } 1608 } 1609 1610 if (retained && new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { 1611 pr_devel("internal nodes remain despite enough space, retrying\n"); 1612 goto retry_compress; 1613 } 1614 pr_devel("after: %lu\n", new_n->nr_leaves_on_branch); 1615 1616 nr_leaves_on_tree = new_n->nr_leaves_on_branch; 1617 1618 /* Excise this node if it is singly occupied by a shortcut */ 1619 if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) { 1620 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) 1621 if ((ptr = new_n->slots[slot])) 1622 break; 1623 1624 if (assoc_array_ptr_is_meta(ptr) && 1625 assoc_array_ptr_is_shortcut(ptr)) { 1626 pr_devel("excise node %p with 1 shortcut\n", new_n); 1627 new_s = assoc_array_ptr_to_shortcut(ptr); 1628 new_parent = new_n->back_pointer; 1629 slot = new_n->parent_slot; 1630 kfree(new_n); 1631 if (!new_parent) { 1632 new_s->back_pointer = NULL; 1633 new_s->parent_slot = 0; 1634 new_root = ptr; 1635 goto gc_complete; 1636 } 1637 1638 if (assoc_array_ptr_is_shortcut(new_parent)) { 1639 /* We can discard any preceding shortcut also */ 1640 struct assoc_array_shortcut *s = 1641 assoc_array_ptr_to_shortcut(new_parent); 1642 1643 pr_devel("excise preceding shortcut\n"); 1644 1645 new_parent = new_s->back_pointer = s->back_pointer; 1646 slot = new_s->parent_slot = s->parent_slot; 1647 kfree(s); 1648 if (!new_parent) { 1649 new_s->back_pointer = NULL; 1650 new_s->parent_slot = 0; 1651 new_root = ptr; 1652 goto gc_complete; 1653 } 1654 } 1655 1656 new_s->back_pointer = new_parent; 1657 new_s->parent_slot = slot; 1658 new_n = assoc_array_ptr_to_node(new_parent); 1659 new_n->slots[slot] = ptr; 1660 goto ascend_old_tree; 1661 } 1662 } 1663 1664 /* Excise any shortcuts we might encounter that point to nodes that 1665 * only contain leaves. 1666 */ 1667 ptr = new_n->back_pointer; 1668 if (!ptr) 1669 goto gc_complete; 1670 1671 if (assoc_array_ptr_is_shortcut(ptr)) { 1672 new_s = assoc_array_ptr_to_shortcut(ptr); 1673 new_parent = new_s->back_pointer; 1674 slot = new_s->parent_slot; 1675 1676 if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { 1677 struct assoc_array_node *n; 1678 1679 pr_devel("excise shortcut\n"); 1680 new_n->back_pointer = new_parent; 1681 new_n->parent_slot = slot; 1682 kfree(new_s); 1683 if (!new_parent) { 1684 new_root = assoc_array_node_to_ptr(new_n); 1685 goto gc_complete; 1686 } 1687 1688 n = assoc_array_ptr_to_node(new_parent); 1689 n->slots[slot] = assoc_array_node_to_ptr(new_n); 1690 } 1691 } else { 1692 new_parent = ptr; 1693 } 1694 new_n = assoc_array_ptr_to_node(new_parent); 1695 1696 ascend_old_tree: 1697 ptr = node->back_pointer; 1698 if (assoc_array_ptr_is_shortcut(ptr)) { 1699 shortcut = assoc_array_ptr_to_shortcut(ptr); 1700 slot = shortcut->parent_slot; 1701 cursor = shortcut->back_pointer; 1702 if (!cursor) 1703 goto gc_complete; 1704 } else { 1705 slot = node->parent_slot; 1706 cursor = ptr; 1707 } 1708 BUG_ON(!cursor); 1709 node = assoc_array_ptr_to_node(cursor); 1710 slot++; 1711 goto continue_node; 1712 1713 gc_complete: 1714 edit->set[0].to = new_root; 1715 assoc_array_apply_edit(edit); 1716 array->nr_leaves_on_tree = nr_leaves_on_tree; 1717 return 0; 1718 1719 enomem: 1720 pr_devel("enomem\n"); 1721 assoc_array_destroy_subtree(new_root, edit->ops); 1722 kfree(edit); 1723 return -ENOMEM; 1724 } 1725