1 /* Generic associative array implementation. 2 * 3 * See Documentation/assoc_array.txt for information. 4 * 5 * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. 6 * Written by David Howells (dhowells@redhat.com) 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public Licence 10 * as published by the Free Software Foundation; either version 11 * 2 of the Licence, or (at your option) any later version. 12 */ 13 //#define DEBUG 14 #include <linux/rcupdate.h> 15 #include <linux/slab.h> 16 #include <linux/err.h> 17 #include <linux/assoc_array_priv.h> 18 19 /* 20 * Iterate over an associative array. The caller must hold the RCU read lock 21 * or better. 22 */ 23 static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root, 24 const struct assoc_array_ptr *stop, 25 int (*iterator)(const void *leaf, 26 void *iterator_data), 27 void *iterator_data) 28 { 29 const struct assoc_array_shortcut *shortcut; 30 const struct assoc_array_node *node; 31 const struct assoc_array_ptr *cursor, *ptr, *parent; 32 unsigned long has_meta; 33 int slot, ret; 34 35 cursor = root; 36 37 begin_node: 38 if (assoc_array_ptr_is_shortcut(cursor)) { 39 /* Descend through a shortcut */ 40 shortcut = assoc_array_ptr_to_shortcut(cursor); 41 smp_read_barrier_depends(); 42 cursor = ACCESS_ONCE(shortcut->next_node); 43 } 44 45 node = assoc_array_ptr_to_node(cursor); 46 smp_read_barrier_depends(); 47 slot = 0; 48 49 /* We perform two passes of each node. 50 * 51 * The first pass does all the leaves in this node. This means we 52 * don't miss any leaves if the node is split up by insertion whilst 53 * we're iterating over the branches rooted here (we may, however, see 54 * some leaves twice). 55 */ 56 has_meta = 0; 57 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 58 ptr = ACCESS_ONCE(node->slots[slot]); 59 has_meta |= (unsigned long)ptr; 60 if (ptr && assoc_array_ptr_is_leaf(ptr)) { 61 /* We need a barrier between the read of the pointer 62 * and dereferencing the pointer - but only if we are 63 * actually going to dereference it. 64 */ 65 smp_read_barrier_depends(); 66 67 /* Invoke the callback */ 68 ret = iterator(assoc_array_ptr_to_leaf(ptr), 69 iterator_data); 70 if (ret) 71 return ret; 72 } 73 } 74 75 /* The second pass attends to all the metadata pointers. If we follow 76 * one of these we may find that we don't come back here, but rather go 77 * back to a replacement node with the leaves in a different layout. 78 * 79 * We are guaranteed to make progress, however, as the slot number for 80 * a particular portion of the key space cannot change - and we 81 * continue at the back pointer + 1. 82 */ 83 if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE)) 84 goto finished_node; 85 slot = 0; 86 87 continue_node: 88 node = assoc_array_ptr_to_node(cursor); 89 smp_read_barrier_depends(); 90 91 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 92 ptr = ACCESS_ONCE(node->slots[slot]); 93 if (assoc_array_ptr_is_meta(ptr)) { 94 cursor = ptr; 95 goto begin_node; 96 } 97 } 98 99 finished_node: 100 /* Move up to the parent (may need to skip back over a shortcut) */ 101 parent = ACCESS_ONCE(node->back_pointer); 102 slot = node->parent_slot; 103 if (parent == stop) 104 return 0; 105 106 if (assoc_array_ptr_is_shortcut(parent)) { 107 shortcut = assoc_array_ptr_to_shortcut(parent); 108 smp_read_barrier_depends(); 109 cursor = parent; 110 parent = ACCESS_ONCE(shortcut->back_pointer); 111 slot = shortcut->parent_slot; 112 if (parent == stop) 113 return 0; 114 } 115 116 /* Ascend to next slot in parent node */ 117 cursor = parent; 118 slot++; 119 goto continue_node; 120 } 121 122 /** 123 * assoc_array_iterate - Pass all objects in the array to a callback 124 * @array: The array to iterate over. 125 * @iterator: The callback function. 126 * @iterator_data: Private data for the callback function. 127 * 128 * Iterate over all the objects in an associative array. Each one will be 129 * presented to the iterator function. 130 * 131 * If the array is being modified concurrently with the iteration then it is 132 * possible that some objects in the array will be passed to the iterator 133 * callback more than once - though every object should be passed at least 134 * once. If this is undesirable then the caller must lock against modification 135 * for the duration of this function. 136 * 137 * The function will return 0 if no objects were in the array or else it will 138 * return the result of the last iterator function called. Iteration stops 139 * immediately if any call to the iteration function results in a non-zero 140 * return. 141 * 142 * The caller should hold the RCU read lock or better if concurrent 143 * modification is possible. 144 */ 145 int assoc_array_iterate(const struct assoc_array *array, 146 int (*iterator)(const void *object, 147 void *iterator_data), 148 void *iterator_data) 149 { 150 struct assoc_array_ptr *root = ACCESS_ONCE(array->root); 151 152 if (!root) 153 return 0; 154 return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data); 155 } 156 157 enum assoc_array_walk_status { 158 assoc_array_walk_tree_empty, 159 assoc_array_walk_found_terminal_node, 160 assoc_array_walk_found_wrong_shortcut, 161 }; 162 163 struct assoc_array_walk_result { 164 struct { 165 struct assoc_array_node *node; /* Node in which leaf might be found */ 166 int level; 167 int slot; 168 } terminal_node; 169 struct { 170 struct assoc_array_shortcut *shortcut; 171 int level; 172 int sc_level; 173 unsigned long sc_segments; 174 unsigned long dissimilarity; 175 } wrong_shortcut; 176 }; 177 178 /* 179 * Navigate through the internal tree looking for the closest node to the key. 180 */ 181 static enum assoc_array_walk_status 182 assoc_array_walk(const struct assoc_array *array, 183 const struct assoc_array_ops *ops, 184 const void *index_key, 185 struct assoc_array_walk_result *result) 186 { 187 struct assoc_array_shortcut *shortcut; 188 struct assoc_array_node *node; 189 struct assoc_array_ptr *cursor, *ptr; 190 unsigned long sc_segments, dissimilarity; 191 unsigned long segments; 192 int level, sc_level, next_sc_level; 193 int slot; 194 195 pr_devel("-->%s()\n", __func__); 196 197 cursor = ACCESS_ONCE(array->root); 198 if (!cursor) 199 return assoc_array_walk_tree_empty; 200 201 level = 0; 202 203 /* Use segments from the key for the new leaf to navigate through the 204 * internal tree, skipping through nodes and shortcuts that are on 205 * route to the destination. Eventually we'll come to a slot that is 206 * either empty or contains a leaf at which point we've found a node in 207 * which the leaf we're looking for might be found or into which it 208 * should be inserted. 209 */ 210 jumped: 211 segments = ops->get_key_chunk(index_key, level); 212 pr_devel("segments[%d]: %lx\n", level, segments); 213 214 if (assoc_array_ptr_is_shortcut(cursor)) 215 goto follow_shortcut; 216 217 consider_node: 218 node = assoc_array_ptr_to_node(cursor); 219 smp_read_barrier_depends(); 220 221 slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); 222 slot &= ASSOC_ARRAY_FAN_MASK; 223 ptr = ACCESS_ONCE(node->slots[slot]); 224 225 pr_devel("consider slot %x [ix=%d type=%lu]\n", 226 slot, level, (unsigned long)ptr & 3); 227 228 if (!assoc_array_ptr_is_meta(ptr)) { 229 /* The node doesn't have a node/shortcut pointer in the slot 230 * corresponding to the index key that we have to follow. 231 */ 232 result->terminal_node.node = node; 233 result->terminal_node.level = level; 234 result->terminal_node.slot = slot; 235 pr_devel("<--%s() = terminal_node\n", __func__); 236 return assoc_array_walk_found_terminal_node; 237 } 238 239 if (assoc_array_ptr_is_node(ptr)) { 240 /* There is a pointer to a node in the slot corresponding to 241 * this index key segment, so we need to follow it. 242 */ 243 cursor = ptr; 244 level += ASSOC_ARRAY_LEVEL_STEP; 245 if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) 246 goto consider_node; 247 goto jumped; 248 } 249 250 /* There is a shortcut in the slot corresponding to the index key 251 * segment. We follow the shortcut if its partial index key matches 252 * this leaf's. Otherwise we need to split the shortcut. 253 */ 254 cursor = ptr; 255 follow_shortcut: 256 shortcut = assoc_array_ptr_to_shortcut(cursor); 257 smp_read_barrier_depends(); 258 pr_devel("shortcut to %d\n", shortcut->skip_to_level); 259 sc_level = level + ASSOC_ARRAY_LEVEL_STEP; 260 BUG_ON(sc_level > shortcut->skip_to_level); 261 262 do { 263 /* Check the leaf against the shortcut's index key a word at a 264 * time, trimming the final word (the shortcut stores the index 265 * key completely from the root to the shortcut's target). 266 */ 267 if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0) 268 segments = ops->get_key_chunk(index_key, sc_level); 269 270 sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT]; 271 dissimilarity = segments ^ sc_segments; 272 273 if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) { 274 /* Trim segments that are beyond the shortcut */ 275 int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK; 276 dissimilarity &= ~(ULONG_MAX << shift); 277 next_sc_level = shortcut->skip_to_level; 278 } else { 279 next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE; 280 next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 281 } 282 283 if (dissimilarity != 0) { 284 /* This shortcut points elsewhere */ 285 result->wrong_shortcut.shortcut = shortcut; 286 result->wrong_shortcut.level = level; 287 result->wrong_shortcut.sc_level = sc_level; 288 result->wrong_shortcut.sc_segments = sc_segments; 289 result->wrong_shortcut.dissimilarity = dissimilarity; 290 return assoc_array_walk_found_wrong_shortcut; 291 } 292 293 sc_level = next_sc_level; 294 } while (sc_level < shortcut->skip_to_level); 295 296 /* The shortcut matches the leaf's index to this point. */ 297 cursor = ACCESS_ONCE(shortcut->next_node); 298 if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { 299 level = sc_level; 300 goto jumped; 301 } else { 302 level = sc_level; 303 goto consider_node; 304 } 305 } 306 307 /** 308 * assoc_array_find - Find an object by index key 309 * @array: The associative array to search. 310 * @ops: The operations to use. 311 * @index_key: The key to the object. 312 * 313 * Find an object in an associative array by walking through the internal tree 314 * to the node that should contain the object and then searching the leaves 315 * there. NULL is returned if the requested object was not found in the array. 316 * 317 * The caller must hold the RCU read lock or better. 318 */ 319 void *assoc_array_find(const struct assoc_array *array, 320 const struct assoc_array_ops *ops, 321 const void *index_key) 322 { 323 struct assoc_array_walk_result result; 324 const struct assoc_array_node *node; 325 const struct assoc_array_ptr *ptr; 326 const void *leaf; 327 int slot; 328 329 if (assoc_array_walk(array, ops, index_key, &result) != 330 assoc_array_walk_found_terminal_node) 331 return NULL; 332 333 node = result.terminal_node.node; 334 smp_read_barrier_depends(); 335 336 /* If the target key is available to us, it's has to be pointed to by 337 * the terminal node. 338 */ 339 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 340 ptr = ACCESS_ONCE(node->slots[slot]); 341 if (ptr && assoc_array_ptr_is_leaf(ptr)) { 342 /* We need a barrier between the read of the pointer 343 * and dereferencing the pointer - but only if we are 344 * actually going to dereference it. 345 */ 346 leaf = assoc_array_ptr_to_leaf(ptr); 347 smp_read_barrier_depends(); 348 if (ops->compare_object(leaf, index_key)) 349 return (void *)leaf; 350 } 351 } 352 353 return NULL; 354 } 355 356 /* 357 * Destructively iterate over an associative array. The caller must prevent 358 * other simultaneous accesses. 359 */ 360 static void assoc_array_destroy_subtree(struct assoc_array_ptr *root, 361 const struct assoc_array_ops *ops) 362 { 363 struct assoc_array_shortcut *shortcut; 364 struct assoc_array_node *node; 365 struct assoc_array_ptr *cursor, *parent = NULL; 366 int slot = -1; 367 368 pr_devel("-->%s()\n", __func__); 369 370 cursor = root; 371 if (!cursor) { 372 pr_devel("empty\n"); 373 return; 374 } 375 376 move_to_meta: 377 if (assoc_array_ptr_is_shortcut(cursor)) { 378 /* Descend through a shortcut */ 379 pr_devel("[%d] shortcut\n", slot); 380 BUG_ON(!assoc_array_ptr_is_shortcut(cursor)); 381 shortcut = assoc_array_ptr_to_shortcut(cursor); 382 BUG_ON(shortcut->back_pointer != parent); 383 BUG_ON(slot != -1 && shortcut->parent_slot != slot); 384 parent = cursor; 385 cursor = shortcut->next_node; 386 slot = -1; 387 BUG_ON(!assoc_array_ptr_is_node(cursor)); 388 } 389 390 pr_devel("[%d] node\n", slot); 391 node = assoc_array_ptr_to_node(cursor); 392 BUG_ON(node->back_pointer != parent); 393 BUG_ON(slot != -1 && node->parent_slot != slot); 394 slot = 0; 395 396 continue_node: 397 pr_devel("Node %p [back=%p]\n", node, node->back_pointer); 398 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 399 struct assoc_array_ptr *ptr = node->slots[slot]; 400 if (!ptr) 401 continue; 402 if (assoc_array_ptr_is_meta(ptr)) { 403 parent = cursor; 404 cursor = ptr; 405 goto move_to_meta; 406 } 407 408 if (ops) { 409 pr_devel("[%d] free leaf\n", slot); 410 ops->free_object(assoc_array_ptr_to_leaf(ptr)); 411 } 412 } 413 414 parent = node->back_pointer; 415 slot = node->parent_slot; 416 pr_devel("free node\n"); 417 kfree(node); 418 if (!parent) 419 return; /* Done */ 420 421 /* Move back up to the parent (may need to free a shortcut on 422 * the way up) */ 423 if (assoc_array_ptr_is_shortcut(parent)) { 424 shortcut = assoc_array_ptr_to_shortcut(parent); 425 BUG_ON(shortcut->next_node != cursor); 426 cursor = parent; 427 parent = shortcut->back_pointer; 428 slot = shortcut->parent_slot; 429 pr_devel("free shortcut\n"); 430 kfree(shortcut); 431 if (!parent) 432 return; 433 434 BUG_ON(!assoc_array_ptr_is_node(parent)); 435 } 436 437 /* Ascend to next slot in parent node */ 438 pr_devel("ascend to %p[%d]\n", parent, slot); 439 cursor = parent; 440 node = assoc_array_ptr_to_node(cursor); 441 slot++; 442 goto continue_node; 443 } 444 445 /** 446 * assoc_array_destroy - Destroy an associative array 447 * @array: The array to destroy. 448 * @ops: The operations to use. 449 * 450 * Discard all metadata and free all objects in an associative array. The 451 * array will be empty and ready to use again upon completion. This function 452 * cannot fail. 453 * 454 * The caller must prevent all other accesses whilst this takes place as no 455 * attempt is made to adjust pointers gracefully to permit RCU readlock-holding 456 * accesses to continue. On the other hand, no memory allocation is required. 457 */ 458 void assoc_array_destroy(struct assoc_array *array, 459 const struct assoc_array_ops *ops) 460 { 461 assoc_array_destroy_subtree(array->root, ops); 462 array->root = NULL; 463 } 464 465 /* 466 * Handle insertion into an empty tree. 467 */ 468 static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit) 469 { 470 struct assoc_array_node *new_n0; 471 472 pr_devel("-->%s()\n", __func__); 473 474 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 475 if (!new_n0) 476 return false; 477 478 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 479 edit->leaf_p = &new_n0->slots[0]; 480 edit->adjust_count_on = new_n0; 481 edit->set[0].ptr = &edit->array->root; 482 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 483 484 pr_devel("<--%s() = ok [no root]\n", __func__); 485 return true; 486 } 487 488 /* 489 * Handle insertion into a terminal node. 490 */ 491 static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, 492 const struct assoc_array_ops *ops, 493 const void *index_key, 494 struct assoc_array_walk_result *result) 495 { 496 struct assoc_array_shortcut *shortcut, *new_s0; 497 struct assoc_array_node *node, *new_n0, *new_n1, *side; 498 struct assoc_array_ptr *ptr; 499 unsigned long dissimilarity, base_seg, blank; 500 size_t keylen; 501 bool have_meta; 502 int level, diff; 503 int slot, next_slot, free_slot, i, j; 504 505 node = result->terminal_node.node; 506 level = result->terminal_node.level; 507 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot; 508 509 pr_devel("-->%s()\n", __func__); 510 511 /* We arrived at a node which doesn't have an onward node or shortcut 512 * pointer that we have to follow. This means that (a) the leaf we 513 * want must go here (either by insertion or replacement) or (b) we 514 * need to split this node and insert in one of the fragments. 515 */ 516 free_slot = -1; 517 518 /* Firstly, we have to check the leaves in this node to see if there's 519 * a matching one we should replace in place. 520 */ 521 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 522 ptr = node->slots[i]; 523 if (!ptr) { 524 free_slot = i; 525 continue; 526 } 527 if (assoc_array_ptr_is_leaf(ptr) && 528 ops->compare_object(assoc_array_ptr_to_leaf(ptr), 529 index_key)) { 530 pr_devel("replace in slot %d\n", i); 531 edit->leaf_p = &node->slots[i]; 532 edit->dead_leaf = node->slots[i]; 533 pr_devel("<--%s() = ok [replace]\n", __func__); 534 return true; 535 } 536 } 537 538 /* If there is a free slot in this node then we can just insert the 539 * leaf here. 540 */ 541 if (free_slot >= 0) { 542 pr_devel("insert in free slot %d\n", free_slot); 543 edit->leaf_p = &node->slots[free_slot]; 544 edit->adjust_count_on = node; 545 pr_devel("<--%s() = ok [insert]\n", __func__); 546 return true; 547 } 548 549 /* The node has no spare slots - so we're either going to have to split 550 * it or insert another node before it. 551 * 552 * Whatever, we're going to need at least two new nodes - so allocate 553 * those now. We may also need a new shortcut, but we deal with that 554 * when we need it. 555 */ 556 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 557 if (!new_n0) 558 return false; 559 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 560 new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 561 if (!new_n1) 562 return false; 563 edit->new_meta[1] = assoc_array_node_to_ptr(new_n1); 564 565 /* We need to find out how similar the leaves are. */ 566 pr_devel("no spare slots\n"); 567 have_meta = false; 568 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 569 ptr = node->slots[i]; 570 if (assoc_array_ptr_is_meta(ptr)) { 571 edit->segment_cache[i] = 0xff; 572 have_meta = true; 573 continue; 574 } 575 base_seg = ops->get_object_key_chunk( 576 assoc_array_ptr_to_leaf(ptr), level); 577 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 578 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; 579 } 580 581 if (have_meta) { 582 pr_devel("have meta\n"); 583 goto split_node; 584 } 585 586 /* The node contains only leaves */ 587 dissimilarity = 0; 588 base_seg = edit->segment_cache[0]; 589 for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++) 590 dissimilarity |= edit->segment_cache[i] ^ base_seg; 591 592 pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity); 593 594 if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) { 595 /* The old leaves all cluster in the same slot. We will need 596 * to insert a shortcut if the new node wants to cluster with them. 597 */ 598 if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0) 599 goto all_leaves_cluster_together; 600 601 /* Otherwise we can just insert a new node ahead of the old 602 * one. 603 */ 604 goto present_leaves_cluster_but_not_new_leaf; 605 } 606 607 split_node: 608 pr_devel("split node\n"); 609 610 /* We need to split the current node; we know that the node doesn't 611 * simply contain a full set of leaves that cluster together (it 612 * contains meta pointers and/or non-clustering leaves). 613 * 614 * We need to expel at least two leaves out of a set consisting of the 615 * leaves in the node and the new leaf. 616 * 617 * We need a new node (n0) to replace the current one and a new node to 618 * take the expelled nodes (n1). 619 */ 620 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 621 new_n0->back_pointer = node->back_pointer; 622 new_n0->parent_slot = node->parent_slot; 623 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 624 new_n1->parent_slot = -1; /* Need to calculate this */ 625 626 do_split_node: 627 pr_devel("do_split_node\n"); 628 629 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 630 new_n1->nr_leaves_on_branch = 0; 631 632 /* Begin by finding two matching leaves. There have to be at least two 633 * that match - even if there are meta pointers - because any leaf that 634 * would match a slot with a meta pointer in it must be somewhere 635 * behind that meta pointer and cannot be here. Further, given N 636 * remaining leaf slots, we now have N+1 leaves to go in them. 637 */ 638 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 639 slot = edit->segment_cache[i]; 640 if (slot != 0xff) 641 for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++) 642 if (edit->segment_cache[j] == slot) 643 goto found_slot_for_multiple_occupancy; 644 } 645 found_slot_for_multiple_occupancy: 646 pr_devel("same slot: %x %x [%02x]\n", i, j, slot); 647 BUG_ON(i >= ASSOC_ARRAY_FAN_OUT); 648 BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1); 649 BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT); 650 651 new_n1->parent_slot = slot; 652 653 /* Metadata pointers cannot change slot */ 654 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) 655 if (assoc_array_ptr_is_meta(node->slots[i])) 656 new_n0->slots[i] = node->slots[i]; 657 else 658 new_n0->slots[i] = NULL; 659 BUG_ON(new_n0->slots[slot] != NULL); 660 new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1); 661 662 /* Filter the leaf pointers between the new nodes */ 663 free_slot = -1; 664 next_slot = 0; 665 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 666 if (assoc_array_ptr_is_meta(node->slots[i])) 667 continue; 668 if (edit->segment_cache[i] == slot) { 669 new_n1->slots[next_slot++] = node->slots[i]; 670 new_n1->nr_leaves_on_branch++; 671 } else { 672 do { 673 free_slot++; 674 } while (new_n0->slots[free_slot] != NULL); 675 new_n0->slots[free_slot] = node->slots[i]; 676 } 677 } 678 679 pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot); 680 681 if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) { 682 do { 683 free_slot++; 684 } while (new_n0->slots[free_slot] != NULL); 685 edit->leaf_p = &new_n0->slots[free_slot]; 686 edit->adjust_count_on = new_n0; 687 } else { 688 edit->leaf_p = &new_n1->slots[next_slot++]; 689 edit->adjust_count_on = new_n1; 690 } 691 692 BUG_ON(next_slot <= 1); 693 694 edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0); 695 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 696 if (edit->segment_cache[i] == 0xff) { 697 ptr = node->slots[i]; 698 BUG_ON(assoc_array_ptr_is_leaf(ptr)); 699 if (assoc_array_ptr_is_node(ptr)) { 700 side = assoc_array_ptr_to_node(ptr); 701 edit->set_backpointers[i] = &side->back_pointer; 702 } else { 703 shortcut = assoc_array_ptr_to_shortcut(ptr); 704 edit->set_backpointers[i] = &shortcut->back_pointer; 705 } 706 } 707 } 708 709 ptr = node->back_pointer; 710 if (!ptr) 711 edit->set[0].ptr = &edit->array->root; 712 else if (assoc_array_ptr_is_node(ptr)) 713 edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot]; 714 else 715 edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node; 716 edit->excised_meta[0] = assoc_array_node_to_ptr(node); 717 pr_devel("<--%s() = ok [split node]\n", __func__); 718 return true; 719 720 present_leaves_cluster_but_not_new_leaf: 721 /* All the old leaves cluster in the same slot, but the new leaf wants 722 * to go into a different slot, so we create a new node to hold the new 723 * leaf and a pointer to a new node holding all the old leaves. 724 */ 725 pr_devel("present leaves cluster but not new leaf\n"); 726 727 new_n0->back_pointer = node->back_pointer; 728 new_n0->parent_slot = node->parent_slot; 729 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 730 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 731 new_n1->parent_slot = edit->segment_cache[0]; 732 new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch; 733 edit->adjust_count_on = new_n0; 734 735 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) 736 new_n1->slots[i] = node->slots[i]; 737 738 new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0); 739 edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]]; 740 741 edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot]; 742 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 743 edit->excised_meta[0] = assoc_array_node_to_ptr(node); 744 pr_devel("<--%s() = ok [insert node before]\n", __func__); 745 return true; 746 747 all_leaves_cluster_together: 748 /* All the leaves, new and old, want to cluster together in this node 749 * in the same slot, so we have to replace this node with a shortcut to 750 * skip over the identical parts of the key and then place a pair of 751 * nodes, one inside the other, at the end of the shortcut and 752 * distribute the keys between them. 753 * 754 * Firstly we need to work out where the leaves start diverging as a 755 * bit position into their keys so that we know how big the shortcut 756 * needs to be. 757 * 758 * We only need to make a single pass of N of the N+1 leaves because if 759 * any keys differ between themselves at bit X then at least one of 760 * them must also differ with the base key at bit X or before. 761 */ 762 pr_devel("all leaves cluster together\n"); 763 diff = INT_MAX; 764 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 765 int x = ops->diff_objects(assoc_array_ptr_to_leaf(node->slots[i]), 766 index_key); 767 if (x < diff) { 768 BUG_ON(x < 0); 769 diff = x; 770 } 771 } 772 BUG_ON(diff == INT_MAX); 773 BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP); 774 775 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); 776 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 777 778 new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + 779 keylen * sizeof(unsigned long), GFP_KERNEL); 780 if (!new_s0) 781 return false; 782 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0); 783 784 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); 785 new_s0->back_pointer = node->back_pointer; 786 new_s0->parent_slot = node->parent_slot; 787 new_s0->next_node = assoc_array_node_to_ptr(new_n0); 788 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); 789 new_n0->parent_slot = 0; 790 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 791 new_n1->parent_slot = -1; /* Need to calculate this */ 792 793 new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK; 794 pr_devel("skip_to_level = %d [diff %d]\n", level, diff); 795 BUG_ON(level <= 0); 796 797 for (i = 0; i < keylen; i++) 798 new_s0->index_key[i] = 799 ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE); 800 801 blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); 802 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); 803 new_s0->index_key[keylen - 1] &= ~blank; 804 805 /* This now reduces to a node splitting exercise for which we'll need 806 * to regenerate the disparity table. 807 */ 808 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 809 ptr = node->slots[i]; 810 base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr), 811 level); 812 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 813 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; 814 } 815 816 base_seg = ops->get_key_chunk(index_key, level); 817 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 818 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK; 819 goto do_split_node; 820 } 821 822 /* 823 * Handle insertion into the middle of a shortcut. 824 */ 825 static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, 826 const struct assoc_array_ops *ops, 827 struct assoc_array_walk_result *result) 828 { 829 struct assoc_array_shortcut *shortcut, *new_s0, *new_s1; 830 struct assoc_array_node *node, *new_n0, *side; 831 unsigned long sc_segments, dissimilarity, blank; 832 size_t keylen; 833 int level, sc_level, diff; 834 int sc_slot; 835 836 shortcut = result->wrong_shortcut.shortcut; 837 level = result->wrong_shortcut.level; 838 sc_level = result->wrong_shortcut.sc_level; 839 sc_segments = result->wrong_shortcut.sc_segments; 840 dissimilarity = result->wrong_shortcut.dissimilarity; 841 842 pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n", 843 __func__, level, dissimilarity, sc_level); 844 845 /* We need to split a shortcut and insert a node between the two 846 * pieces. Zero-length pieces will be dispensed with entirely. 847 * 848 * First of all, we need to find out in which level the first 849 * difference was. 850 */ 851 diff = __ffs(dissimilarity); 852 diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK; 853 diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK; 854 pr_devel("diff=%d\n", diff); 855 856 if (!shortcut->back_pointer) { 857 edit->set[0].ptr = &edit->array->root; 858 } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) { 859 node = assoc_array_ptr_to_node(shortcut->back_pointer); 860 edit->set[0].ptr = &node->slots[shortcut->parent_slot]; 861 } else { 862 BUG(); 863 } 864 865 edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut); 866 867 /* Create a new node now since we're going to need it anyway */ 868 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 869 if (!new_n0) 870 return false; 871 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 872 edit->adjust_count_on = new_n0; 873 874 /* Insert a new shortcut before the new node if this segment isn't of 875 * zero length - otherwise we just connect the new node directly to the 876 * parent. 877 */ 878 level += ASSOC_ARRAY_LEVEL_STEP; 879 if (diff > level) { 880 pr_devel("pre-shortcut %d...%d\n", level, diff); 881 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); 882 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 883 884 new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + 885 keylen * sizeof(unsigned long), GFP_KERNEL); 886 if (!new_s0) 887 return false; 888 edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0); 889 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); 890 new_s0->back_pointer = shortcut->back_pointer; 891 new_s0->parent_slot = shortcut->parent_slot; 892 new_s0->next_node = assoc_array_node_to_ptr(new_n0); 893 new_s0->skip_to_level = diff; 894 895 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); 896 new_n0->parent_slot = 0; 897 898 memcpy(new_s0->index_key, shortcut->index_key, 899 keylen * sizeof(unsigned long)); 900 901 blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); 902 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank); 903 new_s0->index_key[keylen - 1] &= ~blank; 904 } else { 905 pr_devel("no pre-shortcut\n"); 906 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 907 new_n0->back_pointer = shortcut->back_pointer; 908 new_n0->parent_slot = shortcut->parent_slot; 909 } 910 911 side = assoc_array_ptr_to_node(shortcut->next_node); 912 new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch; 913 914 /* We need to know which slot in the new node is going to take a 915 * metadata pointer. 916 */ 917 sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); 918 sc_slot &= ASSOC_ARRAY_FAN_MASK; 919 920 pr_devel("new slot %lx >> %d -> %d\n", 921 sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot); 922 923 /* Determine whether we need to follow the new node with a replacement 924 * for the current shortcut. We could in theory reuse the current 925 * shortcut if its parent slot number doesn't change - but that's a 926 * 1-in-16 chance so not worth expending the code upon. 927 */ 928 level = diff + ASSOC_ARRAY_LEVEL_STEP; 929 if (level < shortcut->skip_to_level) { 930 pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level); 931 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 932 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 933 934 new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) + 935 keylen * sizeof(unsigned long), GFP_KERNEL); 936 if (!new_s1) 937 return false; 938 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1); 939 940 new_s1->back_pointer = assoc_array_node_to_ptr(new_n0); 941 new_s1->parent_slot = sc_slot; 942 new_s1->next_node = shortcut->next_node; 943 new_s1->skip_to_level = shortcut->skip_to_level; 944 945 new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1); 946 947 memcpy(new_s1->index_key, shortcut->index_key, 948 keylen * sizeof(unsigned long)); 949 950 edit->set[1].ptr = &side->back_pointer; 951 edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1); 952 } else { 953 pr_devel("no post-shortcut\n"); 954 955 /* We don't have to replace the pointed-to node as long as we 956 * use memory barriers to make sure the parent slot number is 957 * changed before the back pointer (the parent slot number is 958 * irrelevant to the old parent shortcut). 959 */ 960 new_n0->slots[sc_slot] = shortcut->next_node; 961 edit->set_parent_slot[0].p = &side->parent_slot; 962 edit->set_parent_slot[0].to = sc_slot; 963 edit->set[1].ptr = &side->back_pointer; 964 edit->set[1].to = assoc_array_node_to_ptr(new_n0); 965 } 966 967 /* Install the new leaf in a spare slot in the new node. */ 968 if (sc_slot == 0) 969 edit->leaf_p = &new_n0->slots[1]; 970 else 971 edit->leaf_p = &new_n0->slots[0]; 972 973 pr_devel("<--%s() = ok [split shortcut]\n", __func__); 974 return edit; 975 } 976 977 /** 978 * assoc_array_insert - Script insertion of an object into an associative array 979 * @array: The array to insert into. 980 * @ops: The operations to use. 981 * @index_key: The key to insert at. 982 * @object: The object to insert. 983 * 984 * Precalculate and preallocate a script for the insertion or replacement of an 985 * object in an associative array. This results in an edit script that can 986 * either be applied or cancelled. 987 * 988 * The function returns a pointer to an edit script or -ENOMEM. 989 * 990 * The caller should lock against other modifications and must continue to hold 991 * the lock until assoc_array_apply_edit() has been called. 992 * 993 * Accesses to the tree may take place concurrently with this function, 994 * provided they hold the RCU read lock. 995 */ 996 struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, 997 const struct assoc_array_ops *ops, 998 const void *index_key, 999 void *object) 1000 { 1001 struct assoc_array_walk_result result; 1002 struct assoc_array_edit *edit; 1003 1004 pr_devel("-->%s()\n", __func__); 1005 1006 /* The leaf pointer we're given must not have the bottom bit set as we 1007 * use those for type-marking the pointer. NULL pointers are also not 1008 * allowed as they indicate an empty slot but we have to allow them 1009 * here as they can be updated later. 1010 */ 1011 BUG_ON(assoc_array_ptr_is_meta(object)); 1012 1013 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1014 if (!edit) 1015 return ERR_PTR(-ENOMEM); 1016 edit->array = array; 1017 edit->ops = ops; 1018 edit->leaf = assoc_array_leaf_to_ptr(object); 1019 edit->adjust_count_by = 1; 1020 1021 switch (assoc_array_walk(array, ops, index_key, &result)) { 1022 case assoc_array_walk_tree_empty: 1023 /* Allocate a root node if there isn't one yet */ 1024 if (!assoc_array_insert_in_empty_tree(edit)) 1025 goto enomem; 1026 return edit; 1027 1028 case assoc_array_walk_found_terminal_node: 1029 /* We found a node that doesn't have a node/shortcut pointer in 1030 * the slot corresponding to the index key that we have to 1031 * follow. 1032 */ 1033 if (!assoc_array_insert_into_terminal_node(edit, ops, index_key, 1034 &result)) 1035 goto enomem; 1036 return edit; 1037 1038 case assoc_array_walk_found_wrong_shortcut: 1039 /* We found a shortcut that didn't match our key in a slot we 1040 * needed to follow. 1041 */ 1042 if (!assoc_array_insert_mid_shortcut(edit, ops, &result)) 1043 goto enomem; 1044 return edit; 1045 } 1046 1047 enomem: 1048 /* Clean up after an out of memory error */ 1049 pr_devel("enomem\n"); 1050 assoc_array_cancel_edit(edit); 1051 return ERR_PTR(-ENOMEM); 1052 } 1053 1054 /** 1055 * assoc_array_insert_set_object - Set the new object pointer in an edit script 1056 * @edit: The edit script to modify. 1057 * @object: The object pointer to set. 1058 * 1059 * Change the object to be inserted in an edit script. The object pointed to 1060 * by the old object is not freed. This must be done prior to applying the 1061 * script. 1062 */ 1063 void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object) 1064 { 1065 BUG_ON(!object); 1066 edit->leaf = assoc_array_leaf_to_ptr(object); 1067 } 1068 1069 struct assoc_array_delete_collapse_context { 1070 struct assoc_array_node *node; 1071 const void *skip_leaf; 1072 int slot; 1073 }; 1074 1075 /* 1076 * Subtree collapse to node iterator. 1077 */ 1078 static int assoc_array_delete_collapse_iterator(const void *leaf, 1079 void *iterator_data) 1080 { 1081 struct assoc_array_delete_collapse_context *collapse = iterator_data; 1082 1083 if (leaf == collapse->skip_leaf) 1084 return 0; 1085 1086 BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT); 1087 1088 collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf); 1089 return 0; 1090 } 1091 1092 /** 1093 * assoc_array_delete - Script deletion of an object from an associative array 1094 * @array: The array to search. 1095 * @ops: The operations to use. 1096 * @index_key: The key to the object. 1097 * 1098 * Precalculate and preallocate a script for the deletion of an object from an 1099 * associative array. This results in an edit script that can either be 1100 * applied or cancelled. 1101 * 1102 * The function returns a pointer to an edit script if the object was found, 1103 * NULL if the object was not found or -ENOMEM. 1104 * 1105 * The caller should lock against other modifications and must continue to hold 1106 * the lock until assoc_array_apply_edit() has been called. 1107 * 1108 * Accesses to the tree may take place concurrently with this function, 1109 * provided they hold the RCU read lock. 1110 */ 1111 struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, 1112 const struct assoc_array_ops *ops, 1113 const void *index_key) 1114 { 1115 struct assoc_array_delete_collapse_context collapse; 1116 struct assoc_array_walk_result result; 1117 struct assoc_array_node *node, *new_n0; 1118 struct assoc_array_edit *edit; 1119 struct assoc_array_ptr *ptr; 1120 bool has_meta; 1121 int slot, i; 1122 1123 pr_devel("-->%s()\n", __func__); 1124 1125 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1126 if (!edit) 1127 return ERR_PTR(-ENOMEM); 1128 edit->array = array; 1129 edit->ops = ops; 1130 edit->adjust_count_by = -1; 1131 1132 switch (assoc_array_walk(array, ops, index_key, &result)) { 1133 case assoc_array_walk_found_terminal_node: 1134 /* We found a node that should contain the leaf we've been 1135 * asked to remove - *if* it's in the tree. 1136 */ 1137 pr_devel("terminal_node\n"); 1138 node = result.terminal_node.node; 1139 1140 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1141 ptr = node->slots[slot]; 1142 if (ptr && 1143 assoc_array_ptr_is_leaf(ptr) && 1144 ops->compare_object(assoc_array_ptr_to_leaf(ptr), 1145 index_key)) 1146 goto found_leaf; 1147 } 1148 case assoc_array_walk_tree_empty: 1149 case assoc_array_walk_found_wrong_shortcut: 1150 default: 1151 assoc_array_cancel_edit(edit); 1152 pr_devel("not found\n"); 1153 return NULL; 1154 } 1155 1156 found_leaf: 1157 BUG_ON(array->nr_leaves_on_tree <= 0); 1158 1159 /* In the simplest form of deletion we just clear the slot and release 1160 * the leaf after a suitable interval. 1161 */ 1162 edit->dead_leaf = node->slots[slot]; 1163 edit->set[0].ptr = &node->slots[slot]; 1164 edit->set[0].to = NULL; 1165 edit->adjust_count_on = node; 1166 1167 /* If that concludes erasure of the last leaf, then delete the entire 1168 * internal array. 1169 */ 1170 if (array->nr_leaves_on_tree == 1) { 1171 edit->set[1].ptr = &array->root; 1172 edit->set[1].to = NULL; 1173 edit->adjust_count_on = NULL; 1174 edit->excised_subtree = array->root; 1175 pr_devel("all gone\n"); 1176 return edit; 1177 } 1178 1179 /* However, we'd also like to clear up some metadata blocks if we 1180 * possibly can. 1181 * 1182 * We go for a simple algorithm of: if this node has FAN_OUT or fewer 1183 * leaves in it, then attempt to collapse it - and attempt to 1184 * recursively collapse up the tree. 1185 * 1186 * We could also try and collapse in partially filled subtrees to take 1187 * up space in this node. 1188 */ 1189 if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { 1190 struct assoc_array_node *parent, *grandparent; 1191 struct assoc_array_ptr *ptr; 1192 1193 /* First of all, we need to know if this node has metadata so 1194 * that we don't try collapsing if all the leaves are already 1195 * here. 1196 */ 1197 has_meta = false; 1198 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 1199 ptr = node->slots[i]; 1200 if (assoc_array_ptr_is_meta(ptr)) { 1201 has_meta = true; 1202 break; 1203 } 1204 } 1205 1206 pr_devel("leaves: %ld [m=%d]\n", 1207 node->nr_leaves_on_branch - 1, has_meta); 1208 1209 /* Look further up the tree to see if we can collapse this node 1210 * into a more proximal node too. 1211 */ 1212 parent = node; 1213 collapse_up: 1214 pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch); 1215 1216 ptr = parent->back_pointer; 1217 if (!ptr) 1218 goto do_collapse; 1219 if (assoc_array_ptr_is_shortcut(ptr)) { 1220 struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr); 1221 ptr = s->back_pointer; 1222 if (!ptr) 1223 goto do_collapse; 1224 } 1225 1226 grandparent = assoc_array_ptr_to_node(ptr); 1227 if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { 1228 parent = grandparent; 1229 goto collapse_up; 1230 } 1231 1232 do_collapse: 1233 /* There's no point collapsing if the original node has no meta 1234 * pointers to discard and if we didn't merge into one of that 1235 * node's ancestry. 1236 */ 1237 if (has_meta || parent != node) { 1238 node = parent; 1239 1240 /* Create a new node to collapse into */ 1241 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 1242 if (!new_n0) 1243 goto enomem; 1244 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 1245 1246 new_n0->back_pointer = node->back_pointer; 1247 new_n0->parent_slot = node->parent_slot; 1248 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 1249 edit->adjust_count_on = new_n0; 1250 1251 collapse.node = new_n0; 1252 collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf); 1253 collapse.slot = 0; 1254 assoc_array_subtree_iterate(assoc_array_node_to_ptr(node), 1255 node->back_pointer, 1256 assoc_array_delete_collapse_iterator, 1257 &collapse); 1258 pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch); 1259 BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1); 1260 1261 if (!node->back_pointer) { 1262 edit->set[1].ptr = &array->root; 1263 } else if (assoc_array_ptr_is_leaf(node->back_pointer)) { 1264 BUG(); 1265 } else if (assoc_array_ptr_is_node(node->back_pointer)) { 1266 struct assoc_array_node *p = 1267 assoc_array_ptr_to_node(node->back_pointer); 1268 edit->set[1].ptr = &p->slots[node->parent_slot]; 1269 } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) { 1270 struct assoc_array_shortcut *s = 1271 assoc_array_ptr_to_shortcut(node->back_pointer); 1272 edit->set[1].ptr = &s->next_node; 1273 } 1274 edit->set[1].to = assoc_array_node_to_ptr(new_n0); 1275 edit->excised_subtree = assoc_array_node_to_ptr(node); 1276 } 1277 } 1278 1279 return edit; 1280 1281 enomem: 1282 /* Clean up after an out of memory error */ 1283 pr_devel("enomem\n"); 1284 assoc_array_cancel_edit(edit); 1285 return ERR_PTR(-ENOMEM); 1286 } 1287 1288 /** 1289 * assoc_array_clear - Script deletion of all objects from an associative array 1290 * @array: The array to clear. 1291 * @ops: The operations to use. 1292 * 1293 * Precalculate and preallocate a script for the deletion of all the objects 1294 * from an associative array. This results in an edit script that can either 1295 * be applied or cancelled. 1296 * 1297 * The function returns a pointer to an edit script if there are objects to be 1298 * deleted, NULL if there are no objects in the array or -ENOMEM. 1299 * 1300 * The caller should lock against other modifications and must continue to hold 1301 * the lock until assoc_array_apply_edit() has been called. 1302 * 1303 * Accesses to the tree may take place concurrently with this function, 1304 * provided they hold the RCU read lock. 1305 */ 1306 struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, 1307 const struct assoc_array_ops *ops) 1308 { 1309 struct assoc_array_edit *edit; 1310 1311 pr_devel("-->%s()\n", __func__); 1312 1313 if (!array->root) 1314 return NULL; 1315 1316 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1317 if (!edit) 1318 return ERR_PTR(-ENOMEM); 1319 edit->array = array; 1320 edit->ops = ops; 1321 edit->set[1].ptr = &array->root; 1322 edit->set[1].to = NULL; 1323 edit->excised_subtree = array->root; 1324 edit->ops_for_excised_subtree = ops; 1325 pr_devel("all gone\n"); 1326 return edit; 1327 } 1328 1329 /* 1330 * Handle the deferred destruction after an applied edit. 1331 */ 1332 static void assoc_array_rcu_cleanup(struct rcu_head *head) 1333 { 1334 struct assoc_array_edit *edit = 1335 container_of(head, struct assoc_array_edit, rcu); 1336 int i; 1337 1338 pr_devel("-->%s()\n", __func__); 1339 1340 if (edit->dead_leaf) 1341 edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf)); 1342 for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++) 1343 if (edit->excised_meta[i]) 1344 kfree(assoc_array_ptr_to_node(edit->excised_meta[i])); 1345 1346 if (edit->excised_subtree) { 1347 BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree)); 1348 if (assoc_array_ptr_is_node(edit->excised_subtree)) { 1349 struct assoc_array_node *n = 1350 assoc_array_ptr_to_node(edit->excised_subtree); 1351 n->back_pointer = NULL; 1352 } else { 1353 struct assoc_array_shortcut *s = 1354 assoc_array_ptr_to_shortcut(edit->excised_subtree); 1355 s->back_pointer = NULL; 1356 } 1357 assoc_array_destroy_subtree(edit->excised_subtree, 1358 edit->ops_for_excised_subtree); 1359 } 1360 1361 kfree(edit); 1362 } 1363 1364 /** 1365 * assoc_array_apply_edit - Apply an edit script to an associative array 1366 * @edit: The script to apply. 1367 * 1368 * Apply an edit script to an associative array to effect an insertion, 1369 * deletion or clearance. As the edit script includes preallocated memory, 1370 * this is guaranteed not to fail. 1371 * 1372 * The edit script, dead objects and dead metadata will be scheduled for 1373 * destruction after an RCU grace period to permit those doing read-only 1374 * accesses on the array to continue to do so under the RCU read lock whilst 1375 * the edit is taking place. 1376 */ 1377 void assoc_array_apply_edit(struct assoc_array_edit *edit) 1378 { 1379 struct assoc_array_shortcut *shortcut; 1380 struct assoc_array_node *node; 1381 struct assoc_array_ptr *ptr; 1382 int i; 1383 1384 pr_devel("-->%s()\n", __func__); 1385 1386 smp_wmb(); 1387 if (edit->leaf_p) 1388 *edit->leaf_p = edit->leaf; 1389 1390 smp_wmb(); 1391 for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++) 1392 if (edit->set_parent_slot[i].p) 1393 *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to; 1394 1395 smp_wmb(); 1396 for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++) 1397 if (edit->set_backpointers[i]) 1398 *edit->set_backpointers[i] = edit->set_backpointers_to; 1399 1400 smp_wmb(); 1401 for (i = 0; i < ARRAY_SIZE(edit->set); i++) 1402 if (edit->set[i].ptr) 1403 *edit->set[i].ptr = edit->set[i].to; 1404 1405 if (edit->array->root == NULL) { 1406 edit->array->nr_leaves_on_tree = 0; 1407 } else if (edit->adjust_count_on) { 1408 node = edit->adjust_count_on; 1409 for (;;) { 1410 node->nr_leaves_on_branch += edit->adjust_count_by; 1411 1412 ptr = node->back_pointer; 1413 if (!ptr) 1414 break; 1415 if (assoc_array_ptr_is_shortcut(ptr)) { 1416 shortcut = assoc_array_ptr_to_shortcut(ptr); 1417 ptr = shortcut->back_pointer; 1418 if (!ptr) 1419 break; 1420 } 1421 BUG_ON(!assoc_array_ptr_is_node(ptr)); 1422 node = assoc_array_ptr_to_node(ptr); 1423 } 1424 1425 edit->array->nr_leaves_on_tree += edit->adjust_count_by; 1426 } 1427 1428 call_rcu(&edit->rcu, assoc_array_rcu_cleanup); 1429 } 1430 1431 /** 1432 * assoc_array_cancel_edit - Discard an edit script. 1433 * @edit: The script to discard. 1434 * 1435 * Free an edit script and all the preallocated data it holds without making 1436 * any changes to the associative array it was intended for. 1437 * 1438 * NOTE! In the case of an insertion script, this does _not_ release the leaf 1439 * that was to be inserted. That is left to the caller. 1440 */ 1441 void assoc_array_cancel_edit(struct assoc_array_edit *edit) 1442 { 1443 struct assoc_array_ptr *ptr; 1444 int i; 1445 1446 pr_devel("-->%s()\n", __func__); 1447 1448 /* Clean up after an out of memory error */ 1449 for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) { 1450 ptr = edit->new_meta[i]; 1451 if (ptr) { 1452 if (assoc_array_ptr_is_node(ptr)) 1453 kfree(assoc_array_ptr_to_node(ptr)); 1454 else 1455 kfree(assoc_array_ptr_to_shortcut(ptr)); 1456 } 1457 } 1458 kfree(edit); 1459 } 1460 1461 /** 1462 * assoc_array_gc - Garbage collect an associative array. 1463 * @array: The array to clean. 1464 * @ops: The operations to use. 1465 * @iterator: A callback function to pass judgement on each object. 1466 * @iterator_data: Private data for the callback function. 1467 * 1468 * Collect garbage from an associative array and pack down the internal tree to 1469 * save memory. 1470 * 1471 * The iterator function is asked to pass judgement upon each object in the 1472 * array. If it returns false, the object is discard and if it returns true, 1473 * the object is kept. If it returns true, it must increment the object's 1474 * usage count (or whatever it needs to do to retain it) before returning. 1475 * 1476 * This function returns 0 if successful or -ENOMEM if out of memory. In the 1477 * latter case, the array is not changed. 1478 * 1479 * The caller should lock against other modifications and must continue to hold 1480 * the lock until assoc_array_apply_edit() has been called. 1481 * 1482 * Accesses to the tree may take place concurrently with this function, 1483 * provided they hold the RCU read lock. 1484 */ 1485 int assoc_array_gc(struct assoc_array *array, 1486 const struct assoc_array_ops *ops, 1487 bool (*iterator)(void *object, void *iterator_data), 1488 void *iterator_data) 1489 { 1490 struct assoc_array_shortcut *shortcut, *new_s; 1491 struct assoc_array_node *node, *new_n; 1492 struct assoc_array_edit *edit; 1493 struct assoc_array_ptr *cursor, *ptr; 1494 struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp; 1495 unsigned long nr_leaves_on_tree; 1496 int keylen, slot, nr_free, next_slot, i; 1497 1498 pr_devel("-->%s()\n", __func__); 1499 1500 if (!array->root) 1501 return 0; 1502 1503 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1504 if (!edit) 1505 return -ENOMEM; 1506 edit->array = array; 1507 edit->ops = ops; 1508 edit->ops_for_excised_subtree = ops; 1509 edit->set[0].ptr = &array->root; 1510 edit->excised_subtree = array->root; 1511 1512 new_root = new_parent = NULL; 1513 new_ptr_pp = &new_root; 1514 cursor = array->root; 1515 1516 descend: 1517 /* If this point is a shortcut, then we need to duplicate it and 1518 * advance the target cursor. 1519 */ 1520 if (assoc_array_ptr_is_shortcut(cursor)) { 1521 shortcut = assoc_array_ptr_to_shortcut(cursor); 1522 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 1523 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 1524 new_s = kmalloc(sizeof(struct assoc_array_shortcut) + 1525 keylen * sizeof(unsigned long), GFP_KERNEL); 1526 if (!new_s) 1527 goto enomem; 1528 pr_devel("dup shortcut %p -> %p\n", shortcut, new_s); 1529 memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) + 1530 keylen * sizeof(unsigned long))); 1531 new_s->back_pointer = new_parent; 1532 new_s->parent_slot = shortcut->parent_slot; 1533 *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s); 1534 new_ptr_pp = &new_s->next_node; 1535 cursor = shortcut->next_node; 1536 } 1537 1538 /* Duplicate the node at this position */ 1539 node = assoc_array_ptr_to_node(cursor); 1540 new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 1541 if (!new_n) 1542 goto enomem; 1543 pr_devel("dup node %p -> %p\n", node, new_n); 1544 new_n->back_pointer = new_parent; 1545 new_n->parent_slot = node->parent_slot; 1546 *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n); 1547 new_ptr_pp = NULL; 1548 slot = 0; 1549 1550 continue_node: 1551 /* Filter across any leaves and gc any subtrees */ 1552 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1553 ptr = node->slots[slot]; 1554 if (!ptr) 1555 continue; 1556 1557 if (assoc_array_ptr_is_leaf(ptr)) { 1558 if (iterator(assoc_array_ptr_to_leaf(ptr), 1559 iterator_data)) 1560 /* The iterator will have done any reference 1561 * counting on the object for us. 1562 */ 1563 new_n->slots[slot] = ptr; 1564 continue; 1565 } 1566 1567 new_ptr_pp = &new_n->slots[slot]; 1568 cursor = ptr; 1569 goto descend; 1570 } 1571 1572 pr_devel("-- compress node %p --\n", new_n); 1573 1574 /* Count up the number of empty slots in this node and work out the 1575 * subtree leaf count. 1576 */ 1577 new_n->nr_leaves_on_branch = 0; 1578 nr_free = 0; 1579 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1580 ptr = new_n->slots[slot]; 1581 if (!ptr) 1582 nr_free++; 1583 else if (assoc_array_ptr_is_leaf(ptr)) 1584 new_n->nr_leaves_on_branch++; 1585 } 1586 pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch); 1587 1588 /* See what we can fold in */ 1589 next_slot = 0; 1590 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1591 struct assoc_array_shortcut *s; 1592 struct assoc_array_node *child; 1593 1594 ptr = new_n->slots[slot]; 1595 if (!ptr || assoc_array_ptr_is_leaf(ptr)) 1596 continue; 1597 1598 s = NULL; 1599 if (assoc_array_ptr_is_shortcut(ptr)) { 1600 s = assoc_array_ptr_to_shortcut(ptr); 1601 ptr = s->next_node; 1602 } 1603 1604 child = assoc_array_ptr_to_node(ptr); 1605 new_n->nr_leaves_on_branch += child->nr_leaves_on_branch; 1606 1607 if (child->nr_leaves_on_branch <= nr_free + 1) { 1608 /* Fold the child node into this one */ 1609 pr_devel("[%d] fold node %lu/%d [nx %d]\n", 1610 slot, child->nr_leaves_on_branch, nr_free + 1, 1611 next_slot); 1612 1613 /* We would already have reaped an intervening shortcut 1614 * on the way back up the tree. 1615 */ 1616 BUG_ON(s); 1617 1618 new_n->slots[slot] = NULL; 1619 nr_free++; 1620 if (slot < next_slot) 1621 next_slot = slot; 1622 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 1623 struct assoc_array_ptr *p = child->slots[i]; 1624 if (!p) 1625 continue; 1626 BUG_ON(assoc_array_ptr_is_meta(p)); 1627 while (new_n->slots[next_slot]) 1628 next_slot++; 1629 BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT); 1630 new_n->slots[next_slot++] = p; 1631 nr_free--; 1632 } 1633 kfree(child); 1634 } else { 1635 pr_devel("[%d] retain node %lu/%d [nx %d]\n", 1636 slot, child->nr_leaves_on_branch, nr_free + 1, 1637 next_slot); 1638 } 1639 } 1640 1641 pr_devel("after: %lu\n", new_n->nr_leaves_on_branch); 1642 1643 nr_leaves_on_tree = new_n->nr_leaves_on_branch; 1644 1645 /* Excise this node if it is singly occupied by a shortcut */ 1646 if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) { 1647 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) 1648 if ((ptr = new_n->slots[slot])) 1649 break; 1650 1651 if (assoc_array_ptr_is_meta(ptr) && 1652 assoc_array_ptr_is_shortcut(ptr)) { 1653 pr_devel("excise node %p with 1 shortcut\n", new_n); 1654 new_s = assoc_array_ptr_to_shortcut(ptr); 1655 new_parent = new_n->back_pointer; 1656 slot = new_n->parent_slot; 1657 kfree(new_n); 1658 if (!new_parent) { 1659 new_s->back_pointer = NULL; 1660 new_s->parent_slot = 0; 1661 new_root = ptr; 1662 goto gc_complete; 1663 } 1664 1665 if (assoc_array_ptr_is_shortcut(new_parent)) { 1666 /* We can discard any preceding shortcut also */ 1667 struct assoc_array_shortcut *s = 1668 assoc_array_ptr_to_shortcut(new_parent); 1669 1670 pr_devel("excise preceding shortcut\n"); 1671 1672 new_parent = new_s->back_pointer = s->back_pointer; 1673 slot = new_s->parent_slot = s->parent_slot; 1674 kfree(s); 1675 if (!new_parent) { 1676 new_s->back_pointer = NULL; 1677 new_s->parent_slot = 0; 1678 new_root = ptr; 1679 goto gc_complete; 1680 } 1681 } 1682 1683 new_s->back_pointer = new_parent; 1684 new_s->parent_slot = slot; 1685 new_n = assoc_array_ptr_to_node(new_parent); 1686 new_n->slots[slot] = ptr; 1687 goto ascend_old_tree; 1688 } 1689 } 1690 1691 /* Excise any shortcuts we might encounter that point to nodes that 1692 * only contain leaves. 1693 */ 1694 ptr = new_n->back_pointer; 1695 if (!ptr) 1696 goto gc_complete; 1697 1698 if (assoc_array_ptr_is_shortcut(ptr)) { 1699 new_s = assoc_array_ptr_to_shortcut(ptr); 1700 new_parent = new_s->back_pointer; 1701 slot = new_s->parent_slot; 1702 1703 if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { 1704 struct assoc_array_node *n; 1705 1706 pr_devel("excise shortcut\n"); 1707 new_n->back_pointer = new_parent; 1708 new_n->parent_slot = slot; 1709 kfree(new_s); 1710 if (!new_parent) { 1711 new_root = assoc_array_node_to_ptr(new_n); 1712 goto gc_complete; 1713 } 1714 1715 n = assoc_array_ptr_to_node(new_parent); 1716 n->slots[slot] = assoc_array_node_to_ptr(new_n); 1717 } 1718 } else { 1719 new_parent = ptr; 1720 } 1721 new_n = assoc_array_ptr_to_node(new_parent); 1722 1723 ascend_old_tree: 1724 ptr = node->back_pointer; 1725 if (assoc_array_ptr_is_shortcut(ptr)) { 1726 shortcut = assoc_array_ptr_to_shortcut(ptr); 1727 slot = shortcut->parent_slot; 1728 cursor = shortcut->back_pointer; 1729 if (!cursor) 1730 goto gc_complete; 1731 } else { 1732 slot = node->parent_slot; 1733 cursor = ptr; 1734 } 1735 BUG_ON(!cursor); 1736 node = assoc_array_ptr_to_node(cursor); 1737 slot++; 1738 goto continue_node; 1739 1740 gc_complete: 1741 edit->set[0].to = new_root; 1742 assoc_array_apply_edit(edit); 1743 array->nr_leaves_on_tree = nr_leaves_on_tree; 1744 return 0; 1745 1746 enomem: 1747 pr_devel("enomem\n"); 1748 assoc_array_destroy_subtree(new_root, edit->ops); 1749 kfree(edit); 1750 return -ENOMEM; 1751 } 1752