1 /* Generic associative array implementation. 2 * 3 * See Documentation/assoc_array.txt for information. 4 * 5 * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. 6 * Written by David Howells (dhowells@redhat.com) 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public Licence 10 * as published by the Free Software Foundation; either version 11 * 2 of the Licence, or (at your option) any later version. 12 */ 13 //#define DEBUG 14 #include <linux/slab.h> 15 #include <linux/err.h> 16 #include <linux/assoc_array_priv.h> 17 18 /* 19 * Iterate over an associative array. The caller must hold the RCU read lock 20 * or better. 21 */ 22 static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root, 23 const struct assoc_array_ptr *stop, 24 int (*iterator)(const void *leaf, 25 void *iterator_data), 26 void *iterator_data) 27 { 28 const struct assoc_array_shortcut *shortcut; 29 const struct assoc_array_node *node; 30 const struct assoc_array_ptr *cursor, *ptr, *parent; 31 unsigned long has_meta; 32 int slot, ret; 33 34 cursor = root; 35 36 begin_node: 37 if (assoc_array_ptr_is_shortcut(cursor)) { 38 /* Descend through a shortcut */ 39 shortcut = assoc_array_ptr_to_shortcut(cursor); 40 smp_read_barrier_depends(); 41 cursor = ACCESS_ONCE(shortcut->next_node); 42 } 43 44 node = assoc_array_ptr_to_node(cursor); 45 smp_read_barrier_depends(); 46 slot = 0; 47 48 /* We perform two passes of each node. 49 * 50 * The first pass does all the leaves in this node. This means we 51 * don't miss any leaves if the node is split up by insertion whilst 52 * we're iterating over the branches rooted here (we may, however, see 53 * some leaves twice). 54 */ 55 has_meta = 0; 56 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 57 ptr = ACCESS_ONCE(node->slots[slot]); 58 has_meta |= (unsigned long)ptr; 59 if (ptr && assoc_array_ptr_is_leaf(ptr)) { 60 /* We need a barrier between the read of the pointer 61 * and dereferencing the pointer - but only if we are 62 * actually going to dereference it. 63 */ 64 smp_read_barrier_depends(); 65 66 /* Invoke the callback */ 67 ret = iterator(assoc_array_ptr_to_leaf(ptr), 68 iterator_data); 69 if (ret) 70 return ret; 71 } 72 } 73 74 /* The second pass attends to all the metadata pointers. If we follow 75 * one of these we may find that we don't come back here, but rather go 76 * back to a replacement node with the leaves in a different layout. 77 * 78 * We are guaranteed to make progress, however, as the slot number for 79 * a particular portion of the key space cannot change - and we 80 * continue at the back pointer + 1. 81 */ 82 if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE)) 83 goto finished_node; 84 slot = 0; 85 86 continue_node: 87 node = assoc_array_ptr_to_node(cursor); 88 smp_read_barrier_depends(); 89 90 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 91 ptr = ACCESS_ONCE(node->slots[slot]); 92 if (assoc_array_ptr_is_meta(ptr)) { 93 cursor = ptr; 94 goto begin_node; 95 } 96 } 97 98 finished_node: 99 /* Move up to the parent (may need to skip back over a shortcut) */ 100 parent = ACCESS_ONCE(node->back_pointer); 101 slot = node->parent_slot; 102 if (parent == stop) 103 return 0; 104 105 if (assoc_array_ptr_is_shortcut(parent)) { 106 shortcut = assoc_array_ptr_to_shortcut(parent); 107 smp_read_barrier_depends(); 108 cursor = parent; 109 parent = ACCESS_ONCE(shortcut->back_pointer); 110 slot = shortcut->parent_slot; 111 if (parent == stop) 112 return 0; 113 } 114 115 /* Ascend to next slot in parent node */ 116 cursor = parent; 117 slot++; 118 goto continue_node; 119 } 120 121 /** 122 * assoc_array_iterate - Pass all objects in the array to a callback 123 * @array: The array to iterate over. 124 * @iterator: The callback function. 125 * @iterator_data: Private data for the callback function. 126 * 127 * Iterate over all the objects in an associative array. Each one will be 128 * presented to the iterator function. 129 * 130 * If the array is being modified concurrently with the iteration then it is 131 * possible that some objects in the array will be passed to the iterator 132 * callback more than once - though every object should be passed at least 133 * once. If this is undesirable then the caller must lock against modification 134 * for the duration of this function. 135 * 136 * The function will return 0 if no objects were in the array or else it will 137 * return the result of the last iterator function called. Iteration stops 138 * immediately if any call to the iteration function results in a non-zero 139 * return. 140 * 141 * The caller should hold the RCU read lock or better if concurrent 142 * modification is possible. 143 */ 144 int assoc_array_iterate(const struct assoc_array *array, 145 int (*iterator)(const void *object, 146 void *iterator_data), 147 void *iterator_data) 148 { 149 struct assoc_array_ptr *root = ACCESS_ONCE(array->root); 150 151 if (!root) 152 return 0; 153 return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data); 154 } 155 156 enum assoc_array_walk_status { 157 assoc_array_walk_tree_empty, 158 assoc_array_walk_found_terminal_node, 159 assoc_array_walk_found_wrong_shortcut, 160 }; 161 162 struct assoc_array_walk_result { 163 struct { 164 struct assoc_array_node *node; /* Node in which leaf might be found */ 165 int level; 166 int slot; 167 } terminal_node; 168 struct { 169 struct assoc_array_shortcut *shortcut; 170 int level; 171 int sc_level; 172 unsigned long sc_segments; 173 unsigned long dissimilarity; 174 } wrong_shortcut; 175 }; 176 177 /* 178 * Navigate through the internal tree looking for the closest node to the key. 179 */ 180 static enum assoc_array_walk_status 181 assoc_array_walk(const struct assoc_array *array, 182 const struct assoc_array_ops *ops, 183 const void *index_key, 184 struct assoc_array_walk_result *result) 185 { 186 struct assoc_array_shortcut *shortcut; 187 struct assoc_array_node *node; 188 struct assoc_array_ptr *cursor, *ptr; 189 unsigned long sc_segments, dissimilarity; 190 unsigned long segments; 191 int level, sc_level, next_sc_level; 192 int slot; 193 194 pr_devel("-->%s()\n", __func__); 195 196 cursor = ACCESS_ONCE(array->root); 197 if (!cursor) 198 return assoc_array_walk_tree_empty; 199 200 level = 0; 201 202 /* Use segments from the key for the new leaf to navigate through the 203 * internal tree, skipping through nodes and shortcuts that are on 204 * route to the destination. Eventually we'll come to a slot that is 205 * either empty or contains a leaf at which point we've found a node in 206 * which the leaf we're looking for might be found or into which it 207 * should be inserted. 208 */ 209 jumped: 210 segments = ops->get_key_chunk(index_key, level); 211 pr_devel("segments[%d]: %lx\n", level, segments); 212 213 if (assoc_array_ptr_is_shortcut(cursor)) 214 goto follow_shortcut; 215 216 consider_node: 217 node = assoc_array_ptr_to_node(cursor); 218 smp_read_barrier_depends(); 219 220 slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); 221 slot &= ASSOC_ARRAY_FAN_MASK; 222 ptr = ACCESS_ONCE(node->slots[slot]); 223 224 pr_devel("consider slot %x [ix=%d type=%lu]\n", 225 slot, level, (unsigned long)ptr & 3); 226 227 if (!assoc_array_ptr_is_meta(ptr)) { 228 /* The node doesn't have a node/shortcut pointer in the slot 229 * corresponding to the index key that we have to follow. 230 */ 231 result->terminal_node.node = node; 232 result->terminal_node.level = level; 233 result->terminal_node.slot = slot; 234 pr_devel("<--%s() = terminal_node\n", __func__); 235 return assoc_array_walk_found_terminal_node; 236 } 237 238 if (assoc_array_ptr_is_node(ptr)) { 239 /* There is a pointer to a node in the slot corresponding to 240 * this index key segment, so we need to follow it. 241 */ 242 cursor = ptr; 243 level += ASSOC_ARRAY_LEVEL_STEP; 244 if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) 245 goto consider_node; 246 goto jumped; 247 } 248 249 /* There is a shortcut in the slot corresponding to the index key 250 * segment. We follow the shortcut if its partial index key matches 251 * this leaf's. Otherwise we need to split the shortcut. 252 */ 253 cursor = ptr; 254 follow_shortcut: 255 shortcut = assoc_array_ptr_to_shortcut(cursor); 256 smp_read_barrier_depends(); 257 pr_devel("shortcut to %d\n", shortcut->skip_to_level); 258 sc_level = level + ASSOC_ARRAY_LEVEL_STEP; 259 BUG_ON(sc_level > shortcut->skip_to_level); 260 261 do { 262 /* Check the leaf against the shortcut's index key a word at a 263 * time, trimming the final word (the shortcut stores the index 264 * key completely from the root to the shortcut's target). 265 */ 266 if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0) 267 segments = ops->get_key_chunk(index_key, sc_level); 268 269 sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT]; 270 dissimilarity = segments ^ sc_segments; 271 272 if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) { 273 /* Trim segments that are beyond the shortcut */ 274 int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK; 275 dissimilarity &= ~(ULONG_MAX << shift); 276 next_sc_level = shortcut->skip_to_level; 277 } else { 278 next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE; 279 next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 280 } 281 282 if (dissimilarity != 0) { 283 /* This shortcut points elsewhere */ 284 result->wrong_shortcut.shortcut = shortcut; 285 result->wrong_shortcut.level = level; 286 result->wrong_shortcut.sc_level = sc_level; 287 result->wrong_shortcut.sc_segments = sc_segments; 288 result->wrong_shortcut.dissimilarity = dissimilarity; 289 return assoc_array_walk_found_wrong_shortcut; 290 } 291 292 sc_level = next_sc_level; 293 } while (sc_level < shortcut->skip_to_level); 294 295 /* The shortcut matches the leaf's index to this point. */ 296 cursor = ACCESS_ONCE(shortcut->next_node); 297 if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { 298 level = sc_level; 299 goto jumped; 300 } else { 301 level = sc_level; 302 goto consider_node; 303 } 304 } 305 306 /** 307 * assoc_array_find - Find an object by index key 308 * @array: The associative array to search. 309 * @ops: The operations to use. 310 * @index_key: The key to the object. 311 * 312 * Find an object in an associative array by walking through the internal tree 313 * to the node that should contain the object and then searching the leaves 314 * there. NULL is returned if the requested object was not found in the array. 315 * 316 * The caller must hold the RCU read lock or better. 317 */ 318 void *assoc_array_find(const struct assoc_array *array, 319 const struct assoc_array_ops *ops, 320 const void *index_key) 321 { 322 struct assoc_array_walk_result result; 323 const struct assoc_array_node *node; 324 const struct assoc_array_ptr *ptr; 325 const void *leaf; 326 int slot; 327 328 if (assoc_array_walk(array, ops, index_key, &result) != 329 assoc_array_walk_found_terminal_node) 330 return NULL; 331 332 node = result.terminal_node.node; 333 smp_read_barrier_depends(); 334 335 /* If the target key is available to us, it's has to be pointed to by 336 * the terminal node. 337 */ 338 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 339 ptr = ACCESS_ONCE(node->slots[slot]); 340 if (ptr && assoc_array_ptr_is_leaf(ptr)) { 341 /* We need a barrier between the read of the pointer 342 * and dereferencing the pointer - but only if we are 343 * actually going to dereference it. 344 */ 345 leaf = assoc_array_ptr_to_leaf(ptr); 346 smp_read_barrier_depends(); 347 if (ops->compare_object(leaf, index_key)) 348 return (void *)leaf; 349 } 350 } 351 352 return NULL; 353 } 354 355 /* 356 * Destructively iterate over an associative array. The caller must prevent 357 * other simultaneous accesses. 358 */ 359 static void assoc_array_destroy_subtree(struct assoc_array_ptr *root, 360 const struct assoc_array_ops *ops) 361 { 362 struct assoc_array_shortcut *shortcut; 363 struct assoc_array_node *node; 364 struct assoc_array_ptr *cursor, *parent = NULL; 365 int slot = -1; 366 367 pr_devel("-->%s()\n", __func__); 368 369 cursor = root; 370 if (!cursor) { 371 pr_devel("empty\n"); 372 return; 373 } 374 375 move_to_meta: 376 if (assoc_array_ptr_is_shortcut(cursor)) { 377 /* Descend through a shortcut */ 378 pr_devel("[%d] shortcut\n", slot); 379 BUG_ON(!assoc_array_ptr_is_shortcut(cursor)); 380 shortcut = assoc_array_ptr_to_shortcut(cursor); 381 BUG_ON(shortcut->back_pointer != parent); 382 BUG_ON(slot != -1 && shortcut->parent_slot != slot); 383 parent = cursor; 384 cursor = shortcut->next_node; 385 slot = -1; 386 BUG_ON(!assoc_array_ptr_is_node(cursor)); 387 } 388 389 pr_devel("[%d] node\n", slot); 390 node = assoc_array_ptr_to_node(cursor); 391 BUG_ON(node->back_pointer != parent); 392 BUG_ON(slot != -1 && node->parent_slot != slot); 393 slot = 0; 394 395 continue_node: 396 pr_devel("Node %p [back=%p]\n", node, node->back_pointer); 397 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 398 struct assoc_array_ptr *ptr = node->slots[slot]; 399 if (!ptr) 400 continue; 401 if (assoc_array_ptr_is_meta(ptr)) { 402 parent = cursor; 403 cursor = ptr; 404 goto move_to_meta; 405 } 406 407 if (ops) { 408 pr_devel("[%d] free leaf\n", slot); 409 ops->free_object(assoc_array_ptr_to_leaf(ptr)); 410 } 411 } 412 413 parent = node->back_pointer; 414 slot = node->parent_slot; 415 pr_devel("free node\n"); 416 kfree(node); 417 if (!parent) 418 return; /* Done */ 419 420 /* Move back up to the parent (may need to free a shortcut on 421 * the way up) */ 422 if (assoc_array_ptr_is_shortcut(parent)) { 423 shortcut = assoc_array_ptr_to_shortcut(parent); 424 BUG_ON(shortcut->next_node != cursor); 425 cursor = parent; 426 parent = shortcut->back_pointer; 427 slot = shortcut->parent_slot; 428 pr_devel("free shortcut\n"); 429 kfree(shortcut); 430 if (!parent) 431 return; 432 433 BUG_ON(!assoc_array_ptr_is_node(parent)); 434 } 435 436 /* Ascend to next slot in parent node */ 437 pr_devel("ascend to %p[%d]\n", parent, slot); 438 cursor = parent; 439 node = assoc_array_ptr_to_node(cursor); 440 slot++; 441 goto continue_node; 442 } 443 444 /** 445 * assoc_array_destroy - Destroy an associative array 446 * @array: The array to destroy. 447 * @ops: The operations to use. 448 * 449 * Discard all metadata and free all objects in an associative array. The 450 * array will be empty and ready to use again upon completion. This function 451 * cannot fail. 452 * 453 * The caller must prevent all other accesses whilst this takes place as no 454 * attempt is made to adjust pointers gracefully to permit RCU readlock-holding 455 * accesses to continue. On the other hand, no memory allocation is required. 456 */ 457 void assoc_array_destroy(struct assoc_array *array, 458 const struct assoc_array_ops *ops) 459 { 460 assoc_array_destroy_subtree(array->root, ops); 461 array->root = NULL; 462 } 463 464 /* 465 * Handle insertion into an empty tree. 466 */ 467 static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit) 468 { 469 struct assoc_array_node *new_n0; 470 471 pr_devel("-->%s()\n", __func__); 472 473 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 474 if (!new_n0) 475 return false; 476 477 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 478 edit->leaf_p = &new_n0->slots[0]; 479 edit->adjust_count_on = new_n0; 480 edit->set[0].ptr = &edit->array->root; 481 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 482 483 pr_devel("<--%s() = ok [no root]\n", __func__); 484 return true; 485 } 486 487 /* 488 * Handle insertion into a terminal node. 489 */ 490 static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, 491 const struct assoc_array_ops *ops, 492 const void *index_key, 493 struct assoc_array_walk_result *result) 494 { 495 struct assoc_array_shortcut *shortcut, *new_s0; 496 struct assoc_array_node *node, *new_n0, *new_n1, *side; 497 struct assoc_array_ptr *ptr; 498 unsigned long dissimilarity, base_seg, blank; 499 size_t keylen; 500 bool have_meta; 501 int level, diff; 502 int slot, next_slot, free_slot, i, j; 503 504 node = result->terminal_node.node; 505 level = result->terminal_node.level; 506 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot; 507 508 pr_devel("-->%s()\n", __func__); 509 510 /* We arrived at a node which doesn't have an onward node or shortcut 511 * pointer that we have to follow. This means that (a) the leaf we 512 * want must go here (either by insertion or replacement) or (b) we 513 * need to split this node and insert in one of the fragments. 514 */ 515 free_slot = -1; 516 517 /* Firstly, we have to check the leaves in this node to see if there's 518 * a matching one we should replace in place. 519 */ 520 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 521 ptr = node->slots[i]; 522 if (!ptr) { 523 free_slot = i; 524 continue; 525 } 526 if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) { 527 pr_devel("replace in slot %d\n", i); 528 edit->leaf_p = &node->slots[i]; 529 edit->dead_leaf = node->slots[i]; 530 pr_devel("<--%s() = ok [replace]\n", __func__); 531 return true; 532 } 533 } 534 535 /* If there is a free slot in this node then we can just insert the 536 * leaf here. 537 */ 538 if (free_slot >= 0) { 539 pr_devel("insert in free slot %d\n", free_slot); 540 edit->leaf_p = &node->slots[free_slot]; 541 edit->adjust_count_on = node; 542 pr_devel("<--%s() = ok [insert]\n", __func__); 543 return true; 544 } 545 546 /* The node has no spare slots - so we're either going to have to split 547 * it or insert another node before it. 548 * 549 * Whatever, we're going to need at least two new nodes - so allocate 550 * those now. We may also need a new shortcut, but we deal with that 551 * when we need it. 552 */ 553 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 554 if (!new_n0) 555 return false; 556 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 557 new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 558 if (!new_n1) 559 return false; 560 edit->new_meta[1] = assoc_array_node_to_ptr(new_n1); 561 562 /* We need to find out how similar the leaves are. */ 563 pr_devel("no spare slots\n"); 564 have_meta = false; 565 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 566 ptr = node->slots[i]; 567 if (assoc_array_ptr_is_meta(ptr)) { 568 edit->segment_cache[i] = 0xff; 569 have_meta = true; 570 continue; 571 } 572 base_seg = ops->get_object_key_chunk( 573 assoc_array_ptr_to_leaf(ptr), level); 574 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 575 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; 576 } 577 578 if (have_meta) { 579 pr_devel("have meta\n"); 580 goto split_node; 581 } 582 583 /* The node contains only leaves */ 584 dissimilarity = 0; 585 base_seg = edit->segment_cache[0]; 586 for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++) 587 dissimilarity |= edit->segment_cache[i] ^ base_seg; 588 589 pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity); 590 591 if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) { 592 /* The old leaves all cluster in the same slot. We will need 593 * to insert a shortcut if the new node wants to cluster with them. 594 */ 595 if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0) 596 goto all_leaves_cluster_together; 597 598 /* Otherwise we can just insert a new node ahead of the old 599 * one. 600 */ 601 goto present_leaves_cluster_but_not_new_leaf; 602 } 603 604 split_node: 605 pr_devel("split node\n"); 606 607 /* We need to split the current node; we know that the node doesn't 608 * simply contain a full set of leaves that cluster together (it 609 * contains meta pointers and/or non-clustering leaves). 610 * 611 * We need to expel at least two leaves out of a set consisting of the 612 * leaves in the node and the new leaf. 613 * 614 * We need a new node (n0) to replace the current one and a new node to 615 * take the expelled nodes (n1). 616 */ 617 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 618 new_n0->back_pointer = node->back_pointer; 619 new_n0->parent_slot = node->parent_slot; 620 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 621 new_n1->parent_slot = -1; /* Need to calculate this */ 622 623 do_split_node: 624 pr_devel("do_split_node\n"); 625 626 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 627 new_n1->nr_leaves_on_branch = 0; 628 629 /* Begin by finding two matching leaves. There have to be at least two 630 * that match - even if there are meta pointers - because any leaf that 631 * would match a slot with a meta pointer in it must be somewhere 632 * behind that meta pointer and cannot be here. Further, given N 633 * remaining leaf slots, we now have N+1 leaves to go in them. 634 */ 635 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 636 slot = edit->segment_cache[i]; 637 if (slot != 0xff) 638 for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++) 639 if (edit->segment_cache[j] == slot) 640 goto found_slot_for_multiple_occupancy; 641 } 642 found_slot_for_multiple_occupancy: 643 pr_devel("same slot: %x %x [%02x]\n", i, j, slot); 644 BUG_ON(i >= ASSOC_ARRAY_FAN_OUT); 645 BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1); 646 BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT); 647 648 new_n1->parent_slot = slot; 649 650 /* Metadata pointers cannot change slot */ 651 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) 652 if (assoc_array_ptr_is_meta(node->slots[i])) 653 new_n0->slots[i] = node->slots[i]; 654 else 655 new_n0->slots[i] = NULL; 656 BUG_ON(new_n0->slots[slot] != NULL); 657 new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1); 658 659 /* Filter the leaf pointers between the new nodes */ 660 free_slot = -1; 661 next_slot = 0; 662 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 663 if (assoc_array_ptr_is_meta(node->slots[i])) 664 continue; 665 if (edit->segment_cache[i] == slot) { 666 new_n1->slots[next_slot++] = node->slots[i]; 667 new_n1->nr_leaves_on_branch++; 668 } else { 669 do { 670 free_slot++; 671 } while (new_n0->slots[free_slot] != NULL); 672 new_n0->slots[free_slot] = node->slots[i]; 673 } 674 } 675 676 pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot); 677 678 if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) { 679 do { 680 free_slot++; 681 } while (new_n0->slots[free_slot] != NULL); 682 edit->leaf_p = &new_n0->slots[free_slot]; 683 edit->adjust_count_on = new_n0; 684 } else { 685 edit->leaf_p = &new_n1->slots[next_slot++]; 686 edit->adjust_count_on = new_n1; 687 } 688 689 BUG_ON(next_slot <= 1); 690 691 edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0); 692 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 693 if (edit->segment_cache[i] == 0xff) { 694 ptr = node->slots[i]; 695 BUG_ON(assoc_array_ptr_is_leaf(ptr)); 696 if (assoc_array_ptr_is_node(ptr)) { 697 side = assoc_array_ptr_to_node(ptr); 698 edit->set_backpointers[i] = &side->back_pointer; 699 } else { 700 shortcut = assoc_array_ptr_to_shortcut(ptr); 701 edit->set_backpointers[i] = &shortcut->back_pointer; 702 } 703 } 704 } 705 706 ptr = node->back_pointer; 707 if (!ptr) 708 edit->set[0].ptr = &edit->array->root; 709 else if (assoc_array_ptr_is_node(ptr)) 710 edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot]; 711 else 712 edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node; 713 edit->excised_meta[0] = assoc_array_node_to_ptr(node); 714 pr_devel("<--%s() = ok [split node]\n", __func__); 715 return true; 716 717 present_leaves_cluster_but_not_new_leaf: 718 /* All the old leaves cluster in the same slot, but the new leaf wants 719 * to go into a different slot, so we create a new node to hold the new 720 * leaf and a pointer to a new node holding all the old leaves. 721 */ 722 pr_devel("present leaves cluster but not new leaf\n"); 723 724 new_n0->back_pointer = node->back_pointer; 725 new_n0->parent_slot = node->parent_slot; 726 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 727 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 728 new_n1->parent_slot = edit->segment_cache[0]; 729 new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch; 730 edit->adjust_count_on = new_n0; 731 732 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) 733 new_n1->slots[i] = node->slots[i]; 734 735 new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0); 736 edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]]; 737 738 edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot]; 739 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 740 edit->excised_meta[0] = assoc_array_node_to_ptr(node); 741 pr_devel("<--%s() = ok [insert node before]\n", __func__); 742 return true; 743 744 all_leaves_cluster_together: 745 /* All the leaves, new and old, want to cluster together in this node 746 * in the same slot, so we have to replace this node with a shortcut to 747 * skip over the identical parts of the key and then place a pair of 748 * nodes, one inside the other, at the end of the shortcut and 749 * distribute the keys between them. 750 * 751 * Firstly we need to work out where the leaves start diverging as a 752 * bit position into their keys so that we know how big the shortcut 753 * needs to be. 754 * 755 * We only need to make a single pass of N of the N+1 leaves because if 756 * any keys differ between themselves at bit X then at least one of 757 * them must also differ with the base key at bit X or before. 758 */ 759 pr_devel("all leaves cluster together\n"); 760 diff = INT_MAX; 761 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 762 int x = ops->diff_objects(assoc_array_ptr_to_leaf(node->slots[i]), 763 index_key); 764 if (x < diff) { 765 BUG_ON(x < 0); 766 diff = x; 767 } 768 } 769 BUG_ON(diff == INT_MAX); 770 BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP); 771 772 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); 773 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 774 775 new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + 776 keylen * sizeof(unsigned long), GFP_KERNEL); 777 if (!new_s0) 778 return false; 779 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0); 780 781 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); 782 new_s0->back_pointer = node->back_pointer; 783 new_s0->parent_slot = node->parent_slot; 784 new_s0->next_node = assoc_array_node_to_ptr(new_n0); 785 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); 786 new_n0->parent_slot = 0; 787 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 788 new_n1->parent_slot = -1; /* Need to calculate this */ 789 790 new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK; 791 pr_devel("skip_to_level = %d [diff %d]\n", level, diff); 792 BUG_ON(level <= 0); 793 794 for (i = 0; i < keylen; i++) 795 new_s0->index_key[i] = 796 ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE); 797 798 blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); 799 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); 800 new_s0->index_key[keylen - 1] &= ~blank; 801 802 /* This now reduces to a node splitting exercise for which we'll need 803 * to regenerate the disparity table. 804 */ 805 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 806 ptr = node->slots[i]; 807 base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr), 808 level); 809 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 810 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; 811 } 812 813 base_seg = ops->get_key_chunk(index_key, level); 814 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 815 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK; 816 goto do_split_node; 817 } 818 819 /* 820 * Handle insertion into the middle of a shortcut. 821 */ 822 static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, 823 const struct assoc_array_ops *ops, 824 struct assoc_array_walk_result *result) 825 { 826 struct assoc_array_shortcut *shortcut, *new_s0, *new_s1; 827 struct assoc_array_node *node, *new_n0, *side; 828 unsigned long sc_segments, dissimilarity, blank; 829 size_t keylen; 830 int level, sc_level, diff; 831 int sc_slot; 832 833 shortcut = result->wrong_shortcut.shortcut; 834 level = result->wrong_shortcut.level; 835 sc_level = result->wrong_shortcut.sc_level; 836 sc_segments = result->wrong_shortcut.sc_segments; 837 dissimilarity = result->wrong_shortcut.dissimilarity; 838 839 pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n", 840 __func__, level, dissimilarity, sc_level); 841 842 /* We need to split a shortcut and insert a node between the two 843 * pieces. Zero-length pieces will be dispensed with entirely. 844 * 845 * First of all, we need to find out in which level the first 846 * difference was. 847 */ 848 diff = __ffs(dissimilarity); 849 diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK; 850 diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK; 851 pr_devel("diff=%d\n", diff); 852 853 if (!shortcut->back_pointer) { 854 edit->set[0].ptr = &edit->array->root; 855 } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) { 856 node = assoc_array_ptr_to_node(shortcut->back_pointer); 857 edit->set[0].ptr = &node->slots[shortcut->parent_slot]; 858 } else { 859 BUG(); 860 } 861 862 edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut); 863 864 /* Create a new node now since we're going to need it anyway */ 865 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 866 if (!new_n0) 867 return false; 868 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 869 edit->adjust_count_on = new_n0; 870 871 /* Insert a new shortcut before the new node if this segment isn't of 872 * zero length - otherwise we just connect the new node directly to the 873 * parent. 874 */ 875 level += ASSOC_ARRAY_LEVEL_STEP; 876 if (diff > level) { 877 pr_devel("pre-shortcut %d...%d\n", level, diff); 878 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); 879 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 880 881 new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + 882 keylen * sizeof(unsigned long), GFP_KERNEL); 883 if (!new_s0) 884 return false; 885 edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0); 886 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); 887 new_s0->back_pointer = shortcut->back_pointer; 888 new_s0->parent_slot = shortcut->parent_slot; 889 new_s0->next_node = assoc_array_node_to_ptr(new_n0); 890 new_s0->skip_to_level = diff; 891 892 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); 893 new_n0->parent_slot = 0; 894 895 memcpy(new_s0->index_key, shortcut->index_key, 896 keylen * sizeof(unsigned long)); 897 898 blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); 899 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank); 900 new_s0->index_key[keylen - 1] &= ~blank; 901 } else { 902 pr_devel("no pre-shortcut\n"); 903 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 904 new_n0->back_pointer = shortcut->back_pointer; 905 new_n0->parent_slot = shortcut->parent_slot; 906 } 907 908 side = assoc_array_ptr_to_node(shortcut->next_node); 909 new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch; 910 911 /* We need to know which slot in the new node is going to take a 912 * metadata pointer. 913 */ 914 sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); 915 sc_slot &= ASSOC_ARRAY_FAN_MASK; 916 917 pr_devel("new slot %lx >> %d -> %d\n", 918 sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot); 919 920 /* Determine whether we need to follow the new node with a replacement 921 * for the current shortcut. We could in theory reuse the current 922 * shortcut if its parent slot number doesn't change - but that's a 923 * 1-in-16 chance so not worth expending the code upon. 924 */ 925 level = diff + ASSOC_ARRAY_LEVEL_STEP; 926 if (level < shortcut->skip_to_level) { 927 pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level); 928 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 929 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 930 931 new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) + 932 keylen * sizeof(unsigned long), GFP_KERNEL); 933 if (!new_s1) 934 return false; 935 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1); 936 937 new_s1->back_pointer = assoc_array_node_to_ptr(new_n0); 938 new_s1->parent_slot = sc_slot; 939 new_s1->next_node = shortcut->next_node; 940 new_s1->skip_to_level = shortcut->skip_to_level; 941 942 new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1); 943 944 memcpy(new_s1->index_key, shortcut->index_key, 945 keylen * sizeof(unsigned long)); 946 947 edit->set[1].ptr = &side->back_pointer; 948 edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1); 949 } else { 950 pr_devel("no post-shortcut\n"); 951 952 /* We don't have to replace the pointed-to node as long as we 953 * use memory barriers to make sure the parent slot number is 954 * changed before the back pointer (the parent slot number is 955 * irrelevant to the old parent shortcut). 956 */ 957 new_n0->slots[sc_slot] = shortcut->next_node; 958 edit->set_parent_slot[0].p = &side->parent_slot; 959 edit->set_parent_slot[0].to = sc_slot; 960 edit->set[1].ptr = &side->back_pointer; 961 edit->set[1].to = assoc_array_node_to_ptr(new_n0); 962 } 963 964 /* Install the new leaf in a spare slot in the new node. */ 965 if (sc_slot == 0) 966 edit->leaf_p = &new_n0->slots[1]; 967 else 968 edit->leaf_p = &new_n0->slots[0]; 969 970 pr_devel("<--%s() = ok [split shortcut]\n", __func__); 971 return edit; 972 } 973 974 /** 975 * assoc_array_insert - Script insertion of an object into an associative array 976 * @array: The array to insert into. 977 * @ops: The operations to use. 978 * @index_key: The key to insert at. 979 * @object: The object to insert. 980 * 981 * Precalculate and preallocate a script for the insertion or replacement of an 982 * object in an associative array. This results in an edit script that can 983 * either be applied or cancelled. 984 * 985 * The function returns a pointer to an edit script or -ENOMEM. 986 * 987 * The caller should lock against other modifications and must continue to hold 988 * the lock until assoc_array_apply_edit() has been called. 989 * 990 * Accesses to the tree may take place concurrently with this function, 991 * provided they hold the RCU read lock. 992 */ 993 struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, 994 const struct assoc_array_ops *ops, 995 const void *index_key, 996 void *object) 997 { 998 struct assoc_array_walk_result result; 999 struct assoc_array_edit *edit; 1000 1001 pr_devel("-->%s()\n", __func__); 1002 1003 /* The leaf pointer we're given must not have the bottom bit set as we 1004 * use those for type-marking the pointer. NULL pointers are also not 1005 * allowed as they indicate an empty slot but we have to allow them 1006 * here as they can be updated later. 1007 */ 1008 BUG_ON(assoc_array_ptr_is_meta(object)); 1009 1010 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1011 if (!edit) 1012 return ERR_PTR(-ENOMEM); 1013 edit->array = array; 1014 edit->ops = ops; 1015 edit->leaf = assoc_array_leaf_to_ptr(object); 1016 edit->adjust_count_by = 1; 1017 1018 switch (assoc_array_walk(array, ops, index_key, &result)) { 1019 case assoc_array_walk_tree_empty: 1020 /* Allocate a root node if there isn't one yet */ 1021 if (!assoc_array_insert_in_empty_tree(edit)) 1022 goto enomem; 1023 return edit; 1024 1025 case assoc_array_walk_found_terminal_node: 1026 /* We found a node that doesn't have a node/shortcut pointer in 1027 * the slot corresponding to the index key that we have to 1028 * follow. 1029 */ 1030 if (!assoc_array_insert_into_terminal_node(edit, ops, index_key, 1031 &result)) 1032 goto enomem; 1033 return edit; 1034 1035 case assoc_array_walk_found_wrong_shortcut: 1036 /* We found a shortcut that didn't match our key in a slot we 1037 * needed to follow. 1038 */ 1039 if (!assoc_array_insert_mid_shortcut(edit, ops, &result)) 1040 goto enomem; 1041 return edit; 1042 } 1043 1044 enomem: 1045 /* Clean up after an out of memory error */ 1046 pr_devel("enomem\n"); 1047 assoc_array_cancel_edit(edit); 1048 return ERR_PTR(-ENOMEM); 1049 } 1050 1051 /** 1052 * assoc_array_insert_set_object - Set the new object pointer in an edit script 1053 * @edit: The edit script to modify. 1054 * @object: The object pointer to set. 1055 * 1056 * Change the object to be inserted in an edit script. The object pointed to 1057 * by the old object is not freed. This must be done prior to applying the 1058 * script. 1059 */ 1060 void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object) 1061 { 1062 BUG_ON(!object); 1063 edit->leaf = assoc_array_leaf_to_ptr(object); 1064 } 1065 1066 struct assoc_array_delete_collapse_context { 1067 struct assoc_array_node *node; 1068 const void *skip_leaf; 1069 int slot; 1070 }; 1071 1072 /* 1073 * Subtree collapse to node iterator. 1074 */ 1075 static int assoc_array_delete_collapse_iterator(const void *leaf, 1076 void *iterator_data) 1077 { 1078 struct assoc_array_delete_collapse_context *collapse = iterator_data; 1079 1080 if (leaf == collapse->skip_leaf) 1081 return 0; 1082 1083 BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT); 1084 1085 collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf); 1086 return 0; 1087 } 1088 1089 /** 1090 * assoc_array_delete - Script deletion of an object from an associative array 1091 * @array: The array to search. 1092 * @ops: The operations to use. 1093 * @index_key: The key to the object. 1094 * 1095 * Precalculate and preallocate a script for the deletion of an object from an 1096 * associative array. This results in an edit script that can either be 1097 * applied or cancelled. 1098 * 1099 * The function returns a pointer to an edit script if the object was found, 1100 * NULL if the object was not found or -ENOMEM. 1101 * 1102 * The caller should lock against other modifications and must continue to hold 1103 * the lock until assoc_array_apply_edit() has been called. 1104 * 1105 * Accesses to the tree may take place concurrently with this function, 1106 * provided they hold the RCU read lock. 1107 */ 1108 struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, 1109 const struct assoc_array_ops *ops, 1110 const void *index_key) 1111 { 1112 struct assoc_array_delete_collapse_context collapse; 1113 struct assoc_array_walk_result result; 1114 struct assoc_array_node *node, *new_n0; 1115 struct assoc_array_edit *edit; 1116 struct assoc_array_ptr *ptr; 1117 bool has_meta; 1118 int slot, i; 1119 1120 pr_devel("-->%s()\n", __func__); 1121 1122 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1123 if (!edit) 1124 return ERR_PTR(-ENOMEM); 1125 edit->array = array; 1126 edit->ops = ops; 1127 edit->adjust_count_by = -1; 1128 1129 switch (assoc_array_walk(array, ops, index_key, &result)) { 1130 case assoc_array_walk_found_terminal_node: 1131 /* We found a node that should contain the leaf we've been 1132 * asked to remove - *if* it's in the tree. 1133 */ 1134 pr_devel("terminal_node\n"); 1135 node = result.terminal_node.node; 1136 1137 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1138 ptr = node->slots[slot]; 1139 if (ptr && 1140 assoc_array_ptr_is_leaf(ptr) && 1141 ops->compare_object(assoc_array_ptr_to_leaf(ptr), 1142 index_key)) 1143 goto found_leaf; 1144 } 1145 case assoc_array_walk_tree_empty: 1146 case assoc_array_walk_found_wrong_shortcut: 1147 default: 1148 assoc_array_cancel_edit(edit); 1149 pr_devel("not found\n"); 1150 return NULL; 1151 } 1152 1153 found_leaf: 1154 BUG_ON(array->nr_leaves_on_tree <= 0); 1155 1156 /* In the simplest form of deletion we just clear the slot and release 1157 * the leaf after a suitable interval. 1158 */ 1159 edit->dead_leaf = node->slots[slot]; 1160 edit->set[0].ptr = &node->slots[slot]; 1161 edit->set[0].to = NULL; 1162 edit->adjust_count_on = node; 1163 1164 /* If that concludes erasure of the last leaf, then delete the entire 1165 * internal array. 1166 */ 1167 if (array->nr_leaves_on_tree == 1) { 1168 edit->set[1].ptr = &array->root; 1169 edit->set[1].to = NULL; 1170 edit->adjust_count_on = NULL; 1171 edit->excised_subtree = array->root; 1172 pr_devel("all gone\n"); 1173 return edit; 1174 } 1175 1176 /* However, we'd also like to clear up some metadata blocks if we 1177 * possibly can. 1178 * 1179 * We go for a simple algorithm of: if this node has FAN_OUT or fewer 1180 * leaves in it, then attempt to collapse it - and attempt to 1181 * recursively collapse up the tree. 1182 * 1183 * We could also try and collapse in partially filled subtrees to take 1184 * up space in this node. 1185 */ 1186 if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { 1187 struct assoc_array_node *parent, *grandparent; 1188 struct assoc_array_ptr *ptr; 1189 1190 /* First of all, we need to know if this node has metadata so 1191 * that we don't try collapsing if all the leaves are already 1192 * here. 1193 */ 1194 has_meta = false; 1195 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 1196 ptr = node->slots[i]; 1197 if (assoc_array_ptr_is_meta(ptr)) { 1198 has_meta = true; 1199 break; 1200 } 1201 } 1202 1203 pr_devel("leaves: %ld [m=%d]\n", 1204 node->nr_leaves_on_branch - 1, has_meta); 1205 1206 /* Look further up the tree to see if we can collapse this node 1207 * into a more proximal node too. 1208 */ 1209 parent = node; 1210 collapse_up: 1211 pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch); 1212 1213 ptr = parent->back_pointer; 1214 if (!ptr) 1215 goto do_collapse; 1216 if (assoc_array_ptr_is_shortcut(ptr)) { 1217 struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr); 1218 ptr = s->back_pointer; 1219 if (!ptr) 1220 goto do_collapse; 1221 } 1222 1223 grandparent = assoc_array_ptr_to_node(ptr); 1224 if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { 1225 parent = grandparent; 1226 goto collapse_up; 1227 } 1228 1229 do_collapse: 1230 /* There's no point collapsing if the original node has no meta 1231 * pointers to discard and if we didn't merge into one of that 1232 * node's ancestry. 1233 */ 1234 if (has_meta || parent != node) { 1235 node = parent; 1236 1237 /* Create a new node to collapse into */ 1238 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 1239 if (!new_n0) 1240 goto enomem; 1241 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 1242 1243 new_n0->back_pointer = node->back_pointer; 1244 new_n0->parent_slot = node->parent_slot; 1245 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 1246 edit->adjust_count_on = new_n0; 1247 1248 collapse.node = new_n0; 1249 collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf); 1250 collapse.slot = 0; 1251 assoc_array_subtree_iterate(assoc_array_node_to_ptr(node), 1252 node->back_pointer, 1253 assoc_array_delete_collapse_iterator, 1254 &collapse); 1255 pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch); 1256 BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1); 1257 1258 if (!node->back_pointer) { 1259 edit->set[1].ptr = &array->root; 1260 } else if (assoc_array_ptr_is_leaf(node->back_pointer)) { 1261 BUG(); 1262 } else if (assoc_array_ptr_is_node(node->back_pointer)) { 1263 struct assoc_array_node *p = 1264 assoc_array_ptr_to_node(node->back_pointer); 1265 edit->set[1].ptr = &p->slots[node->parent_slot]; 1266 } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) { 1267 struct assoc_array_shortcut *s = 1268 assoc_array_ptr_to_shortcut(node->back_pointer); 1269 edit->set[1].ptr = &s->next_node; 1270 } 1271 edit->set[1].to = assoc_array_node_to_ptr(new_n0); 1272 edit->excised_subtree = assoc_array_node_to_ptr(node); 1273 } 1274 } 1275 1276 return edit; 1277 1278 enomem: 1279 /* Clean up after an out of memory error */ 1280 pr_devel("enomem\n"); 1281 assoc_array_cancel_edit(edit); 1282 return ERR_PTR(-ENOMEM); 1283 } 1284 1285 /** 1286 * assoc_array_clear - Script deletion of all objects from an associative array 1287 * @array: The array to clear. 1288 * @ops: The operations to use. 1289 * 1290 * Precalculate and preallocate a script for the deletion of all the objects 1291 * from an associative array. This results in an edit script that can either 1292 * be applied or cancelled. 1293 * 1294 * The function returns a pointer to an edit script if there are objects to be 1295 * deleted, NULL if there are no objects in the array or -ENOMEM. 1296 * 1297 * The caller should lock against other modifications and must continue to hold 1298 * the lock until assoc_array_apply_edit() has been called. 1299 * 1300 * Accesses to the tree may take place concurrently with this function, 1301 * provided they hold the RCU read lock. 1302 */ 1303 struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, 1304 const struct assoc_array_ops *ops) 1305 { 1306 struct assoc_array_edit *edit; 1307 1308 pr_devel("-->%s()\n", __func__); 1309 1310 if (!array->root) 1311 return NULL; 1312 1313 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1314 if (!edit) 1315 return ERR_PTR(-ENOMEM); 1316 edit->array = array; 1317 edit->ops = ops; 1318 edit->set[1].ptr = &array->root; 1319 edit->set[1].to = NULL; 1320 edit->excised_subtree = array->root; 1321 edit->ops_for_excised_subtree = ops; 1322 pr_devel("all gone\n"); 1323 return edit; 1324 } 1325 1326 /* 1327 * Handle the deferred destruction after an applied edit. 1328 */ 1329 static void assoc_array_rcu_cleanup(struct rcu_head *head) 1330 { 1331 struct assoc_array_edit *edit = 1332 container_of(head, struct assoc_array_edit, rcu); 1333 int i; 1334 1335 pr_devel("-->%s()\n", __func__); 1336 1337 if (edit->dead_leaf) 1338 edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf)); 1339 for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++) 1340 if (edit->excised_meta[i]) 1341 kfree(assoc_array_ptr_to_node(edit->excised_meta[i])); 1342 1343 if (edit->excised_subtree) { 1344 BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree)); 1345 if (assoc_array_ptr_is_node(edit->excised_subtree)) { 1346 struct assoc_array_node *n = 1347 assoc_array_ptr_to_node(edit->excised_subtree); 1348 n->back_pointer = NULL; 1349 } else { 1350 struct assoc_array_shortcut *s = 1351 assoc_array_ptr_to_shortcut(edit->excised_subtree); 1352 s->back_pointer = NULL; 1353 } 1354 assoc_array_destroy_subtree(edit->excised_subtree, 1355 edit->ops_for_excised_subtree); 1356 } 1357 1358 kfree(edit); 1359 } 1360 1361 /** 1362 * assoc_array_apply_edit - Apply an edit script to an associative array 1363 * @edit: The script to apply. 1364 * 1365 * Apply an edit script to an associative array to effect an insertion, 1366 * deletion or clearance. As the edit script includes preallocated memory, 1367 * this is guaranteed not to fail. 1368 * 1369 * The edit script, dead objects and dead metadata will be scheduled for 1370 * destruction after an RCU grace period to permit those doing read-only 1371 * accesses on the array to continue to do so under the RCU read lock whilst 1372 * the edit is taking place. 1373 */ 1374 void assoc_array_apply_edit(struct assoc_array_edit *edit) 1375 { 1376 struct assoc_array_shortcut *shortcut; 1377 struct assoc_array_node *node; 1378 struct assoc_array_ptr *ptr; 1379 int i; 1380 1381 pr_devel("-->%s()\n", __func__); 1382 1383 smp_wmb(); 1384 if (edit->leaf_p) 1385 *edit->leaf_p = edit->leaf; 1386 1387 smp_wmb(); 1388 for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++) 1389 if (edit->set_parent_slot[i].p) 1390 *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to; 1391 1392 smp_wmb(); 1393 for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++) 1394 if (edit->set_backpointers[i]) 1395 *edit->set_backpointers[i] = edit->set_backpointers_to; 1396 1397 smp_wmb(); 1398 for (i = 0; i < ARRAY_SIZE(edit->set); i++) 1399 if (edit->set[i].ptr) 1400 *edit->set[i].ptr = edit->set[i].to; 1401 1402 if (edit->array->root == NULL) { 1403 edit->array->nr_leaves_on_tree = 0; 1404 } else if (edit->adjust_count_on) { 1405 node = edit->adjust_count_on; 1406 for (;;) { 1407 node->nr_leaves_on_branch += edit->adjust_count_by; 1408 1409 ptr = node->back_pointer; 1410 if (!ptr) 1411 break; 1412 if (assoc_array_ptr_is_shortcut(ptr)) { 1413 shortcut = assoc_array_ptr_to_shortcut(ptr); 1414 ptr = shortcut->back_pointer; 1415 if (!ptr) 1416 break; 1417 } 1418 BUG_ON(!assoc_array_ptr_is_node(ptr)); 1419 node = assoc_array_ptr_to_node(ptr); 1420 } 1421 1422 edit->array->nr_leaves_on_tree += edit->adjust_count_by; 1423 } 1424 1425 call_rcu(&edit->rcu, assoc_array_rcu_cleanup); 1426 } 1427 1428 /** 1429 * assoc_array_cancel_edit - Discard an edit script. 1430 * @edit: The script to discard. 1431 * 1432 * Free an edit script and all the preallocated data it holds without making 1433 * any changes to the associative array it was intended for. 1434 * 1435 * NOTE! In the case of an insertion script, this does _not_ release the leaf 1436 * that was to be inserted. That is left to the caller. 1437 */ 1438 void assoc_array_cancel_edit(struct assoc_array_edit *edit) 1439 { 1440 struct assoc_array_ptr *ptr; 1441 int i; 1442 1443 pr_devel("-->%s()\n", __func__); 1444 1445 /* Clean up after an out of memory error */ 1446 for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) { 1447 ptr = edit->new_meta[i]; 1448 if (ptr) { 1449 if (assoc_array_ptr_is_node(ptr)) 1450 kfree(assoc_array_ptr_to_node(ptr)); 1451 else 1452 kfree(assoc_array_ptr_to_shortcut(ptr)); 1453 } 1454 } 1455 kfree(edit); 1456 } 1457 1458 /** 1459 * assoc_array_gc - Garbage collect an associative array. 1460 * @array: The array to clean. 1461 * @ops: The operations to use. 1462 * @iterator: A callback function to pass judgement on each object. 1463 * @iterator_data: Private data for the callback function. 1464 * 1465 * Collect garbage from an associative array and pack down the internal tree to 1466 * save memory. 1467 * 1468 * The iterator function is asked to pass judgement upon each object in the 1469 * array. If it returns false, the object is discard and if it returns true, 1470 * the object is kept. If it returns true, it must increment the object's 1471 * usage count (or whatever it needs to do to retain it) before returning. 1472 * 1473 * This function returns 0 if successful or -ENOMEM if out of memory. In the 1474 * latter case, the array is not changed. 1475 * 1476 * The caller should lock against other modifications and must continue to hold 1477 * the lock until assoc_array_apply_edit() has been called. 1478 * 1479 * Accesses to the tree may take place concurrently with this function, 1480 * provided they hold the RCU read lock. 1481 */ 1482 int assoc_array_gc(struct assoc_array *array, 1483 const struct assoc_array_ops *ops, 1484 bool (*iterator)(void *object, void *iterator_data), 1485 void *iterator_data) 1486 { 1487 struct assoc_array_shortcut *shortcut, *new_s; 1488 struct assoc_array_node *node, *new_n; 1489 struct assoc_array_edit *edit; 1490 struct assoc_array_ptr *cursor, *ptr; 1491 struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp; 1492 unsigned long nr_leaves_on_tree; 1493 int keylen, slot, nr_free, next_slot, i; 1494 1495 pr_devel("-->%s()\n", __func__); 1496 1497 if (!array->root) 1498 return 0; 1499 1500 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1501 if (!edit) 1502 return -ENOMEM; 1503 edit->array = array; 1504 edit->ops = ops; 1505 edit->ops_for_excised_subtree = ops; 1506 edit->set[0].ptr = &array->root; 1507 edit->excised_subtree = array->root; 1508 1509 new_root = new_parent = NULL; 1510 new_ptr_pp = &new_root; 1511 cursor = array->root; 1512 1513 descend: 1514 /* If this point is a shortcut, then we need to duplicate it and 1515 * advance the target cursor. 1516 */ 1517 if (assoc_array_ptr_is_shortcut(cursor)) { 1518 shortcut = assoc_array_ptr_to_shortcut(cursor); 1519 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 1520 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 1521 new_s = kmalloc(sizeof(struct assoc_array_shortcut) + 1522 keylen * sizeof(unsigned long), GFP_KERNEL); 1523 if (!new_s) 1524 goto enomem; 1525 pr_devel("dup shortcut %p -> %p\n", shortcut, new_s); 1526 memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) + 1527 keylen * sizeof(unsigned long))); 1528 new_s->back_pointer = new_parent; 1529 new_s->parent_slot = shortcut->parent_slot; 1530 *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s); 1531 new_ptr_pp = &new_s->next_node; 1532 cursor = shortcut->next_node; 1533 } 1534 1535 /* Duplicate the node at this position */ 1536 node = assoc_array_ptr_to_node(cursor); 1537 new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 1538 if (!new_n) 1539 goto enomem; 1540 pr_devel("dup node %p -> %p\n", node, new_n); 1541 new_n->back_pointer = new_parent; 1542 new_n->parent_slot = node->parent_slot; 1543 *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n); 1544 new_ptr_pp = NULL; 1545 slot = 0; 1546 1547 continue_node: 1548 /* Filter across any leaves and gc any subtrees */ 1549 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1550 ptr = node->slots[slot]; 1551 if (!ptr) 1552 continue; 1553 1554 if (assoc_array_ptr_is_leaf(ptr)) { 1555 if (iterator(assoc_array_ptr_to_leaf(ptr), 1556 iterator_data)) 1557 /* The iterator will have done any reference 1558 * counting on the object for us. 1559 */ 1560 new_n->slots[slot] = ptr; 1561 continue; 1562 } 1563 1564 new_ptr_pp = &new_n->slots[slot]; 1565 cursor = ptr; 1566 goto descend; 1567 } 1568 1569 pr_devel("-- compress node %p --\n", new_n); 1570 1571 /* Count up the number of empty slots in this node and work out the 1572 * subtree leaf count. 1573 */ 1574 new_n->nr_leaves_on_branch = 0; 1575 nr_free = 0; 1576 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1577 ptr = new_n->slots[slot]; 1578 if (!ptr) 1579 nr_free++; 1580 else if (assoc_array_ptr_is_leaf(ptr)) 1581 new_n->nr_leaves_on_branch++; 1582 } 1583 pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch); 1584 1585 /* See what we can fold in */ 1586 next_slot = 0; 1587 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1588 struct assoc_array_shortcut *s; 1589 struct assoc_array_node *child; 1590 1591 ptr = new_n->slots[slot]; 1592 if (!ptr || assoc_array_ptr_is_leaf(ptr)) 1593 continue; 1594 1595 s = NULL; 1596 if (assoc_array_ptr_is_shortcut(ptr)) { 1597 s = assoc_array_ptr_to_shortcut(ptr); 1598 ptr = s->next_node; 1599 } 1600 1601 child = assoc_array_ptr_to_node(ptr); 1602 new_n->nr_leaves_on_branch += child->nr_leaves_on_branch; 1603 1604 if (child->nr_leaves_on_branch <= nr_free + 1) { 1605 /* Fold the child node into this one */ 1606 pr_devel("[%d] fold node %lu/%d [nx %d]\n", 1607 slot, child->nr_leaves_on_branch, nr_free + 1, 1608 next_slot); 1609 1610 /* We would already have reaped an intervening shortcut 1611 * on the way back up the tree. 1612 */ 1613 BUG_ON(s); 1614 1615 new_n->slots[slot] = NULL; 1616 nr_free++; 1617 if (slot < next_slot) 1618 next_slot = slot; 1619 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 1620 struct assoc_array_ptr *p = child->slots[i]; 1621 if (!p) 1622 continue; 1623 BUG_ON(assoc_array_ptr_is_meta(p)); 1624 while (new_n->slots[next_slot]) 1625 next_slot++; 1626 BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT); 1627 new_n->slots[next_slot++] = p; 1628 nr_free--; 1629 } 1630 kfree(child); 1631 } else { 1632 pr_devel("[%d] retain node %lu/%d [nx %d]\n", 1633 slot, child->nr_leaves_on_branch, nr_free + 1, 1634 next_slot); 1635 } 1636 } 1637 1638 pr_devel("after: %lu\n", new_n->nr_leaves_on_branch); 1639 1640 nr_leaves_on_tree = new_n->nr_leaves_on_branch; 1641 1642 /* Excise this node if it is singly occupied by a shortcut */ 1643 if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) { 1644 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) 1645 if ((ptr = new_n->slots[slot])) 1646 break; 1647 1648 if (assoc_array_ptr_is_meta(ptr) && 1649 assoc_array_ptr_is_shortcut(ptr)) { 1650 pr_devel("excise node %p with 1 shortcut\n", new_n); 1651 new_s = assoc_array_ptr_to_shortcut(ptr); 1652 new_parent = new_n->back_pointer; 1653 slot = new_n->parent_slot; 1654 kfree(new_n); 1655 if (!new_parent) { 1656 new_s->back_pointer = NULL; 1657 new_s->parent_slot = 0; 1658 new_root = ptr; 1659 goto gc_complete; 1660 } 1661 1662 if (assoc_array_ptr_is_shortcut(new_parent)) { 1663 /* We can discard any preceding shortcut also */ 1664 struct assoc_array_shortcut *s = 1665 assoc_array_ptr_to_shortcut(new_parent); 1666 1667 pr_devel("excise preceding shortcut\n"); 1668 1669 new_parent = new_s->back_pointer = s->back_pointer; 1670 slot = new_s->parent_slot = s->parent_slot; 1671 kfree(s); 1672 if (!new_parent) { 1673 new_s->back_pointer = NULL; 1674 new_s->parent_slot = 0; 1675 new_root = ptr; 1676 goto gc_complete; 1677 } 1678 } 1679 1680 new_s->back_pointer = new_parent; 1681 new_s->parent_slot = slot; 1682 new_n = assoc_array_ptr_to_node(new_parent); 1683 new_n->slots[slot] = ptr; 1684 goto ascend_old_tree; 1685 } 1686 } 1687 1688 /* Excise any shortcuts we might encounter that point to nodes that 1689 * only contain leaves. 1690 */ 1691 ptr = new_n->back_pointer; 1692 if (!ptr) 1693 goto gc_complete; 1694 1695 if (assoc_array_ptr_is_shortcut(ptr)) { 1696 new_s = assoc_array_ptr_to_shortcut(ptr); 1697 new_parent = new_s->back_pointer; 1698 slot = new_s->parent_slot; 1699 1700 if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { 1701 struct assoc_array_node *n; 1702 1703 pr_devel("excise shortcut\n"); 1704 new_n->back_pointer = new_parent; 1705 new_n->parent_slot = slot; 1706 kfree(new_s); 1707 if (!new_parent) { 1708 new_root = assoc_array_node_to_ptr(new_n); 1709 goto gc_complete; 1710 } 1711 1712 n = assoc_array_ptr_to_node(new_parent); 1713 n->slots[slot] = assoc_array_node_to_ptr(new_n); 1714 } 1715 } else { 1716 new_parent = ptr; 1717 } 1718 new_n = assoc_array_ptr_to_node(new_parent); 1719 1720 ascend_old_tree: 1721 ptr = node->back_pointer; 1722 if (assoc_array_ptr_is_shortcut(ptr)) { 1723 shortcut = assoc_array_ptr_to_shortcut(ptr); 1724 slot = shortcut->parent_slot; 1725 cursor = shortcut->back_pointer; 1726 if (!cursor) 1727 goto gc_complete; 1728 } else { 1729 slot = node->parent_slot; 1730 cursor = ptr; 1731 } 1732 BUG_ON(!cursor); 1733 node = assoc_array_ptr_to_node(cursor); 1734 slot++; 1735 goto continue_node; 1736 1737 gc_complete: 1738 edit->set[0].to = new_root; 1739 assoc_array_apply_edit(edit); 1740 array->nr_leaves_on_tree = nr_leaves_on_tree; 1741 return 0; 1742 1743 enomem: 1744 pr_devel("enomem\n"); 1745 assoc_array_destroy_subtree(new_root, edit->ops); 1746 kfree(edit); 1747 return -ENOMEM; 1748 } 1749