1 /* Generic associative array implementation. 2 * 3 * See Documentation/core-api/assoc_array.rst for information. 4 * 5 * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. 6 * Written by David Howells (dhowells@redhat.com) 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public Licence 10 * as published by the Free Software Foundation; either version 11 * 2 of the Licence, or (at your option) any later version. 12 */ 13 //#define DEBUG 14 #include <linux/rcupdate.h> 15 #include <linux/slab.h> 16 #include <linux/err.h> 17 #include <linux/assoc_array_priv.h> 18 19 /* 20 * Iterate over an associative array. The caller must hold the RCU read lock 21 * or better. 22 */ 23 static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root, 24 const struct assoc_array_ptr *stop, 25 int (*iterator)(const void *leaf, 26 void *iterator_data), 27 void *iterator_data) 28 { 29 const struct assoc_array_shortcut *shortcut; 30 const struct assoc_array_node *node; 31 const struct assoc_array_ptr *cursor, *ptr, *parent; 32 unsigned long has_meta; 33 int slot, ret; 34 35 cursor = root; 36 37 begin_node: 38 if (assoc_array_ptr_is_shortcut(cursor)) { 39 /* Descend through a shortcut */ 40 shortcut = assoc_array_ptr_to_shortcut(cursor); 41 smp_read_barrier_depends(); 42 cursor = READ_ONCE(shortcut->next_node); 43 } 44 45 node = assoc_array_ptr_to_node(cursor); 46 smp_read_barrier_depends(); 47 slot = 0; 48 49 /* We perform two passes of each node. 50 * 51 * The first pass does all the leaves in this node. This means we 52 * don't miss any leaves if the node is split up by insertion whilst 53 * we're iterating over the branches rooted here (we may, however, see 54 * some leaves twice). 55 */ 56 has_meta = 0; 57 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 58 ptr = READ_ONCE(node->slots[slot]); 59 has_meta |= (unsigned long)ptr; 60 if (ptr && assoc_array_ptr_is_leaf(ptr)) { 61 /* We need a barrier between the read of the pointer 62 * and dereferencing the pointer - but only if we are 63 * actually going to dereference it. 64 */ 65 smp_read_barrier_depends(); 66 67 /* Invoke the callback */ 68 ret = iterator(assoc_array_ptr_to_leaf(ptr), 69 iterator_data); 70 if (ret) 71 return ret; 72 } 73 } 74 75 /* The second pass attends to all the metadata pointers. If we follow 76 * one of these we may find that we don't come back here, but rather go 77 * back to a replacement node with the leaves in a different layout. 78 * 79 * We are guaranteed to make progress, however, as the slot number for 80 * a particular portion of the key space cannot change - and we 81 * continue at the back pointer + 1. 82 */ 83 if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE)) 84 goto finished_node; 85 slot = 0; 86 87 continue_node: 88 node = assoc_array_ptr_to_node(cursor); 89 smp_read_barrier_depends(); 90 91 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 92 ptr = READ_ONCE(node->slots[slot]); 93 if (assoc_array_ptr_is_meta(ptr)) { 94 cursor = ptr; 95 goto begin_node; 96 } 97 } 98 99 finished_node: 100 /* Move up to the parent (may need to skip back over a shortcut) */ 101 parent = READ_ONCE(node->back_pointer); 102 slot = node->parent_slot; 103 if (parent == stop) 104 return 0; 105 106 if (assoc_array_ptr_is_shortcut(parent)) { 107 shortcut = assoc_array_ptr_to_shortcut(parent); 108 smp_read_barrier_depends(); 109 cursor = parent; 110 parent = READ_ONCE(shortcut->back_pointer); 111 slot = shortcut->parent_slot; 112 if (parent == stop) 113 return 0; 114 } 115 116 /* Ascend to next slot in parent node */ 117 cursor = parent; 118 slot++; 119 goto continue_node; 120 } 121 122 /** 123 * assoc_array_iterate - Pass all objects in the array to a callback 124 * @array: The array to iterate over. 125 * @iterator: The callback function. 126 * @iterator_data: Private data for the callback function. 127 * 128 * Iterate over all the objects in an associative array. Each one will be 129 * presented to the iterator function. 130 * 131 * If the array is being modified concurrently with the iteration then it is 132 * possible that some objects in the array will be passed to the iterator 133 * callback more than once - though every object should be passed at least 134 * once. If this is undesirable then the caller must lock against modification 135 * for the duration of this function. 136 * 137 * The function will return 0 if no objects were in the array or else it will 138 * return the result of the last iterator function called. Iteration stops 139 * immediately if any call to the iteration function results in a non-zero 140 * return. 141 * 142 * The caller should hold the RCU read lock or better if concurrent 143 * modification is possible. 144 */ 145 int assoc_array_iterate(const struct assoc_array *array, 146 int (*iterator)(const void *object, 147 void *iterator_data), 148 void *iterator_data) 149 { 150 struct assoc_array_ptr *root = READ_ONCE(array->root); 151 152 if (!root) 153 return 0; 154 return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data); 155 } 156 157 enum assoc_array_walk_status { 158 assoc_array_walk_tree_empty, 159 assoc_array_walk_found_terminal_node, 160 assoc_array_walk_found_wrong_shortcut, 161 }; 162 163 struct assoc_array_walk_result { 164 struct { 165 struct assoc_array_node *node; /* Node in which leaf might be found */ 166 int level; 167 int slot; 168 } terminal_node; 169 struct { 170 struct assoc_array_shortcut *shortcut; 171 int level; 172 int sc_level; 173 unsigned long sc_segments; 174 unsigned long dissimilarity; 175 } wrong_shortcut; 176 }; 177 178 /* 179 * Navigate through the internal tree looking for the closest node to the key. 180 */ 181 static enum assoc_array_walk_status 182 assoc_array_walk(const struct assoc_array *array, 183 const struct assoc_array_ops *ops, 184 const void *index_key, 185 struct assoc_array_walk_result *result) 186 { 187 struct assoc_array_shortcut *shortcut; 188 struct assoc_array_node *node; 189 struct assoc_array_ptr *cursor, *ptr; 190 unsigned long sc_segments, dissimilarity; 191 unsigned long segments; 192 int level, sc_level, next_sc_level; 193 int slot; 194 195 pr_devel("-->%s()\n", __func__); 196 197 cursor = READ_ONCE(array->root); 198 if (!cursor) 199 return assoc_array_walk_tree_empty; 200 201 level = 0; 202 203 /* Use segments from the key for the new leaf to navigate through the 204 * internal tree, skipping through nodes and shortcuts that are on 205 * route to the destination. Eventually we'll come to a slot that is 206 * either empty or contains a leaf at which point we've found a node in 207 * which the leaf we're looking for might be found or into which it 208 * should be inserted. 209 */ 210 jumped: 211 segments = ops->get_key_chunk(index_key, level); 212 pr_devel("segments[%d]: %lx\n", level, segments); 213 214 if (assoc_array_ptr_is_shortcut(cursor)) 215 goto follow_shortcut; 216 217 consider_node: 218 node = assoc_array_ptr_to_node(cursor); 219 smp_read_barrier_depends(); 220 221 slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); 222 slot &= ASSOC_ARRAY_FAN_MASK; 223 ptr = READ_ONCE(node->slots[slot]); 224 225 pr_devel("consider slot %x [ix=%d type=%lu]\n", 226 slot, level, (unsigned long)ptr & 3); 227 228 if (!assoc_array_ptr_is_meta(ptr)) { 229 /* The node doesn't have a node/shortcut pointer in the slot 230 * corresponding to the index key that we have to follow. 231 */ 232 result->terminal_node.node = node; 233 result->terminal_node.level = level; 234 result->terminal_node.slot = slot; 235 pr_devel("<--%s() = terminal_node\n", __func__); 236 return assoc_array_walk_found_terminal_node; 237 } 238 239 if (assoc_array_ptr_is_node(ptr)) { 240 /* There is a pointer to a node in the slot corresponding to 241 * this index key segment, so we need to follow it. 242 */ 243 cursor = ptr; 244 level += ASSOC_ARRAY_LEVEL_STEP; 245 if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) 246 goto consider_node; 247 goto jumped; 248 } 249 250 /* There is a shortcut in the slot corresponding to the index key 251 * segment. We follow the shortcut if its partial index key matches 252 * this leaf's. Otherwise we need to split the shortcut. 253 */ 254 cursor = ptr; 255 follow_shortcut: 256 shortcut = assoc_array_ptr_to_shortcut(cursor); 257 smp_read_barrier_depends(); 258 pr_devel("shortcut to %d\n", shortcut->skip_to_level); 259 sc_level = level + ASSOC_ARRAY_LEVEL_STEP; 260 BUG_ON(sc_level > shortcut->skip_to_level); 261 262 do { 263 /* Check the leaf against the shortcut's index key a word at a 264 * time, trimming the final word (the shortcut stores the index 265 * key completely from the root to the shortcut's target). 266 */ 267 if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0) 268 segments = ops->get_key_chunk(index_key, sc_level); 269 270 sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT]; 271 dissimilarity = segments ^ sc_segments; 272 273 if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) { 274 /* Trim segments that are beyond the shortcut */ 275 int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK; 276 dissimilarity &= ~(ULONG_MAX << shift); 277 next_sc_level = shortcut->skip_to_level; 278 } else { 279 next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE; 280 next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 281 } 282 283 if (dissimilarity != 0) { 284 /* This shortcut points elsewhere */ 285 result->wrong_shortcut.shortcut = shortcut; 286 result->wrong_shortcut.level = level; 287 result->wrong_shortcut.sc_level = sc_level; 288 result->wrong_shortcut.sc_segments = sc_segments; 289 result->wrong_shortcut.dissimilarity = dissimilarity; 290 return assoc_array_walk_found_wrong_shortcut; 291 } 292 293 sc_level = next_sc_level; 294 } while (sc_level < shortcut->skip_to_level); 295 296 /* The shortcut matches the leaf's index to this point. */ 297 cursor = READ_ONCE(shortcut->next_node); 298 if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { 299 level = sc_level; 300 goto jumped; 301 } else { 302 level = sc_level; 303 goto consider_node; 304 } 305 } 306 307 /** 308 * assoc_array_find - Find an object by index key 309 * @array: The associative array to search. 310 * @ops: The operations to use. 311 * @index_key: The key to the object. 312 * 313 * Find an object in an associative array by walking through the internal tree 314 * to the node that should contain the object and then searching the leaves 315 * there. NULL is returned if the requested object was not found in the array. 316 * 317 * The caller must hold the RCU read lock or better. 318 */ 319 void *assoc_array_find(const struct assoc_array *array, 320 const struct assoc_array_ops *ops, 321 const void *index_key) 322 { 323 struct assoc_array_walk_result result; 324 const struct assoc_array_node *node; 325 const struct assoc_array_ptr *ptr; 326 const void *leaf; 327 int slot; 328 329 if (assoc_array_walk(array, ops, index_key, &result) != 330 assoc_array_walk_found_terminal_node) 331 return NULL; 332 333 node = result.terminal_node.node; 334 smp_read_barrier_depends(); 335 336 /* If the target key is available to us, it's has to be pointed to by 337 * the terminal node. 338 */ 339 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 340 ptr = READ_ONCE(node->slots[slot]); 341 if (ptr && assoc_array_ptr_is_leaf(ptr)) { 342 /* We need a barrier between the read of the pointer 343 * and dereferencing the pointer - but only if we are 344 * actually going to dereference it. 345 */ 346 leaf = assoc_array_ptr_to_leaf(ptr); 347 smp_read_barrier_depends(); 348 if (ops->compare_object(leaf, index_key)) 349 return (void *)leaf; 350 } 351 } 352 353 return NULL; 354 } 355 356 /* 357 * Destructively iterate over an associative array. The caller must prevent 358 * other simultaneous accesses. 359 */ 360 static void assoc_array_destroy_subtree(struct assoc_array_ptr *root, 361 const struct assoc_array_ops *ops) 362 { 363 struct assoc_array_shortcut *shortcut; 364 struct assoc_array_node *node; 365 struct assoc_array_ptr *cursor, *parent = NULL; 366 int slot = -1; 367 368 pr_devel("-->%s()\n", __func__); 369 370 cursor = root; 371 if (!cursor) { 372 pr_devel("empty\n"); 373 return; 374 } 375 376 move_to_meta: 377 if (assoc_array_ptr_is_shortcut(cursor)) { 378 /* Descend through a shortcut */ 379 pr_devel("[%d] shortcut\n", slot); 380 BUG_ON(!assoc_array_ptr_is_shortcut(cursor)); 381 shortcut = assoc_array_ptr_to_shortcut(cursor); 382 BUG_ON(shortcut->back_pointer != parent); 383 BUG_ON(slot != -1 && shortcut->parent_slot != slot); 384 parent = cursor; 385 cursor = shortcut->next_node; 386 slot = -1; 387 BUG_ON(!assoc_array_ptr_is_node(cursor)); 388 } 389 390 pr_devel("[%d] node\n", slot); 391 node = assoc_array_ptr_to_node(cursor); 392 BUG_ON(node->back_pointer != parent); 393 BUG_ON(slot != -1 && node->parent_slot != slot); 394 slot = 0; 395 396 continue_node: 397 pr_devel("Node %p [back=%p]\n", node, node->back_pointer); 398 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 399 struct assoc_array_ptr *ptr = node->slots[slot]; 400 if (!ptr) 401 continue; 402 if (assoc_array_ptr_is_meta(ptr)) { 403 parent = cursor; 404 cursor = ptr; 405 goto move_to_meta; 406 } 407 408 if (ops) { 409 pr_devel("[%d] free leaf\n", slot); 410 ops->free_object(assoc_array_ptr_to_leaf(ptr)); 411 } 412 } 413 414 parent = node->back_pointer; 415 slot = node->parent_slot; 416 pr_devel("free node\n"); 417 kfree(node); 418 if (!parent) 419 return; /* Done */ 420 421 /* Move back up to the parent (may need to free a shortcut on 422 * the way up) */ 423 if (assoc_array_ptr_is_shortcut(parent)) { 424 shortcut = assoc_array_ptr_to_shortcut(parent); 425 BUG_ON(shortcut->next_node != cursor); 426 cursor = parent; 427 parent = shortcut->back_pointer; 428 slot = shortcut->parent_slot; 429 pr_devel("free shortcut\n"); 430 kfree(shortcut); 431 if (!parent) 432 return; 433 434 BUG_ON(!assoc_array_ptr_is_node(parent)); 435 } 436 437 /* Ascend to next slot in parent node */ 438 pr_devel("ascend to %p[%d]\n", parent, slot); 439 cursor = parent; 440 node = assoc_array_ptr_to_node(cursor); 441 slot++; 442 goto continue_node; 443 } 444 445 /** 446 * assoc_array_destroy - Destroy an associative array 447 * @array: The array to destroy. 448 * @ops: The operations to use. 449 * 450 * Discard all metadata and free all objects in an associative array. The 451 * array will be empty and ready to use again upon completion. This function 452 * cannot fail. 453 * 454 * The caller must prevent all other accesses whilst this takes place as no 455 * attempt is made to adjust pointers gracefully to permit RCU readlock-holding 456 * accesses to continue. On the other hand, no memory allocation is required. 457 */ 458 void assoc_array_destroy(struct assoc_array *array, 459 const struct assoc_array_ops *ops) 460 { 461 assoc_array_destroy_subtree(array->root, ops); 462 array->root = NULL; 463 } 464 465 /* 466 * Handle insertion into an empty tree. 467 */ 468 static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit) 469 { 470 struct assoc_array_node *new_n0; 471 472 pr_devel("-->%s()\n", __func__); 473 474 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 475 if (!new_n0) 476 return false; 477 478 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 479 edit->leaf_p = &new_n0->slots[0]; 480 edit->adjust_count_on = new_n0; 481 edit->set[0].ptr = &edit->array->root; 482 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 483 484 pr_devel("<--%s() = ok [no root]\n", __func__); 485 return true; 486 } 487 488 /* 489 * Handle insertion into a terminal node. 490 */ 491 static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, 492 const struct assoc_array_ops *ops, 493 const void *index_key, 494 struct assoc_array_walk_result *result) 495 { 496 struct assoc_array_shortcut *shortcut, *new_s0; 497 struct assoc_array_node *node, *new_n0, *new_n1, *side; 498 struct assoc_array_ptr *ptr; 499 unsigned long dissimilarity, base_seg, blank; 500 size_t keylen; 501 bool have_meta; 502 int level, diff; 503 int slot, next_slot, free_slot, i, j; 504 505 node = result->terminal_node.node; 506 level = result->terminal_node.level; 507 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot; 508 509 pr_devel("-->%s()\n", __func__); 510 511 /* We arrived at a node which doesn't have an onward node or shortcut 512 * pointer that we have to follow. This means that (a) the leaf we 513 * want must go here (either by insertion or replacement) or (b) we 514 * need to split this node and insert in one of the fragments. 515 */ 516 free_slot = -1; 517 518 /* Firstly, we have to check the leaves in this node to see if there's 519 * a matching one we should replace in place. 520 */ 521 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 522 ptr = node->slots[i]; 523 if (!ptr) { 524 free_slot = i; 525 continue; 526 } 527 if (assoc_array_ptr_is_leaf(ptr) && 528 ops->compare_object(assoc_array_ptr_to_leaf(ptr), 529 index_key)) { 530 pr_devel("replace in slot %d\n", i); 531 edit->leaf_p = &node->slots[i]; 532 edit->dead_leaf = node->slots[i]; 533 pr_devel("<--%s() = ok [replace]\n", __func__); 534 return true; 535 } 536 } 537 538 /* If there is a free slot in this node then we can just insert the 539 * leaf here. 540 */ 541 if (free_slot >= 0) { 542 pr_devel("insert in free slot %d\n", free_slot); 543 edit->leaf_p = &node->slots[free_slot]; 544 edit->adjust_count_on = node; 545 pr_devel("<--%s() = ok [insert]\n", __func__); 546 return true; 547 } 548 549 /* The node has no spare slots - so we're either going to have to split 550 * it or insert another node before it. 551 * 552 * Whatever, we're going to need at least two new nodes - so allocate 553 * those now. We may also need a new shortcut, but we deal with that 554 * when we need it. 555 */ 556 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 557 if (!new_n0) 558 return false; 559 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 560 new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 561 if (!new_n1) 562 return false; 563 edit->new_meta[1] = assoc_array_node_to_ptr(new_n1); 564 565 /* We need to find out how similar the leaves are. */ 566 pr_devel("no spare slots\n"); 567 have_meta = false; 568 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 569 ptr = node->slots[i]; 570 if (assoc_array_ptr_is_meta(ptr)) { 571 edit->segment_cache[i] = 0xff; 572 have_meta = true; 573 continue; 574 } 575 base_seg = ops->get_object_key_chunk( 576 assoc_array_ptr_to_leaf(ptr), level); 577 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 578 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; 579 } 580 581 if (have_meta) { 582 pr_devel("have meta\n"); 583 goto split_node; 584 } 585 586 /* The node contains only leaves */ 587 dissimilarity = 0; 588 base_seg = edit->segment_cache[0]; 589 for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++) 590 dissimilarity |= edit->segment_cache[i] ^ base_seg; 591 592 pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity); 593 594 if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) { 595 /* The old leaves all cluster in the same slot. We will need 596 * to insert a shortcut if the new node wants to cluster with them. 597 */ 598 if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0) 599 goto all_leaves_cluster_together; 600 601 /* Otherwise all the old leaves cluster in the same slot, but 602 * the new leaf wants to go into a different slot - so we 603 * create a new node (n0) to hold the new leaf and a pointer to 604 * a new node (n1) holding all the old leaves. 605 * 606 * This can be done by falling through to the node splitting 607 * path. 608 */ 609 pr_devel("present leaves cluster but not new leaf\n"); 610 } 611 612 split_node: 613 pr_devel("split node\n"); 614 615 /* We need to split the current node. The node must contain anything 616 * from a single leaf (in the one leaf case, this leaf will cluster 617 * with the new leaf) and the rest meta-pointers, to all leaves, some 618 * of which may cluster. 619 * 620 * It won't contain the case in which all the current leaves plus the 621 * new leaves want to cluster in the same slot. 622 * 623 * We need to expel at least two leaves out of a set consisting of the 624 * leaves in the node and the new leaf. The current meta pointers can 625 * just be copied as they shouldn't cluster with any of the leaves. 626 * 627 * We need a new node (n0) to replace the current one and a new node to 628 * take the expelled nodes (n1). 629 */ 630 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 631 new_n0->back_pointer = node->back_pointer; 632 new_n0->parent_slot = node->parent_slot; 633 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 634 new_n1->parent_slot = -1; /* Need to calculate this */ 635 636 do_split_node: 637 pr_devel("do_split_node\n"); 638 639 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 640 new_n1->nr_leaves_on_branch = 0; 641 642 /* Begin by finding two matching leaves. There have to be at least two 643 * that match - even if there are meta pointers - because any leaf that 644 * would match a slot with a meta pointer in it must be somewhere 645 * behind that meta pointer and cannot be here. Further, given N 646 * remaining leaf slots, we now have N+1 leaves to go in them. 647 */ 648 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 649 slot = edit->segment_cache[i]; 650 if (slot != 0xff) 651 for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++) 652 if (edit->segment_cache[j] == slot) 653 goto found_slot_for_multiple_occupancy; 654 } 655 found_slot_for_multiple_occupancy: 656 pr_devel("same slot: %x %x [%02x]\n", i, j, slot); 657 BUG_ON(i >= ASSOC_ARRAY_FAN_OUT); 658 BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1); 659 BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT); 660 661 new_n1->parent_slot = slot; 662 663 /* Metadata pointers cannot change slot */ 664 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) 665 if (assoc_array_ptr_is_meta(node->slots[i])) 666 new_n0->slots[i] = node->slots[i]; 667 else 668 new_n0->slots[i] = NULL; 669 BUG_ON(new_n0->slots[slot] != NULL); 670 new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1); 671 672 /* Filter the leaf pointers between the new nodes */ 673 free_slot = -1; 674 next_slot = 0; 675 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 676 if (assoc_array_ptr_is_meta(node->slots[i])) 677 continue; 678 if (edit->segment_cache[i] == slot) { 679 new_n1->slots[next_slot++] = node->slots[i]; 680 new_n1->nr_leaves_on_branch++; 681 } else { 682 do { 683 free_slot++; 684 } while (new_n0->slots[free_slot] != NULL); 685 new_n0->slots[free_slot] = node->slots[i]; 686 } 687 } 688 689 pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot); 690 691 if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) { 692 do { 693 free_slot++; 694 } while (new_n0->slots[free_slot] != NULL); 695 edit->leaf_p = &new_n0->slots[free_slot]; 696 edit->adjust_count_on = new_n0; 697 } else { 698 edit->leaf_p = &new_n1->slots[next_slot++]; 699 edit->adjust_count_on = new_n1; 700 } 701 702 BUG_ON(next_slot <= 1); 703 704 edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0); 705 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 706 if (edit->segment_cache[i] == 0xff) { 707 ptr = node->slots[i]; 708 BUG_ON(assoc_array_ptr_is_leaf(ptr)); 709 if (assoc_array_ptr_is_node(ptr)) { 710 side = assoc_array_ptr_to_node(ptr); 711 edit->set_backpointers[i] = &side->back_pointer; 712 } else { 713 shortcut = assoc_array_ptr_to_shortcut(ptr); 714 edit->set_backpointers[i] = &shortcut->back_pointer; 715 } 716 } 717 } 718 719 ptr = node->back_pointer; 720 if (!ptr) 721 edit->set[0].ptr = &edit->array->root; 722 else if (assoc_array_ptr_is_node(ptr)) 723 edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot]; 724 else 725 edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node; 726 edit->excised_meta[0] = assoc_array_node_to_ptr(node); 727 pr_devel("<--%s() = ok [split node]\n", __func__); 728 return true; 729 730 all_leaves_cluster_together: 731 /* All the leaves, new and old, want to cluster together in this node 732 * in the same slot, so we have to replace this node with a shortcut to 733 * skip over the identical parts of the key and then place a pair of 734 * nodes, one inside the other, at the end of the shortcut and 735 * distribute the keys between them. 736 * 737 * Firstly we need to work out where the leaves start diverging as a 738 * bit position into their keys so that we know how big the shortcut 739 * needs to be. 740 * 741 * We only need to make a single pass of N of the N+1 leaves because if 742 * any keys differ between themselves at bit X then at least one of 743 * them must also differ with the base key at bit X or before. 744 */ 745 pr_devel("all leaves cluster together\n"); 746 diff = INT_MAX; 747 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 748 int x = ops->diff_objects(assoc_array_ptr_to_leaf(node->slots[i]), 749 index_key); 750 if (x < diff) { 751 BUG_ON(x < 0); 752 diff = x; 753 } 754 } 755 BUG_ON(diff == INT_MAX); 756 BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP); 757 758 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); 759 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 760 761 new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + 762 keylen * sizeof(unsigned long), GFP_KERNEL); 763 if (!new_s0) 764 return false; 765 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0); 766 767 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); 768 new_s0->back_pointer = node->back_pointer; 769 new_s0->parent_slot = node->parent_slot; 770 new_s0->next_node = assoc_array_node_to_ptr(new_n0); 771 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); 772 new_n0->parent_slot = 0; 773 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); 774 new_n1->parent_slot = -1; /* Need to calculate this */ 775 776 new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK; 777 pr_devel("skip_to_level = %d [diff %d]\n", level, diff); 778 BUG_ON(level <= 0); 779 780 for (i = 0; i < keylen; i++) 781 new_s0->index_key[i] = 782 ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE); 783 784 blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); 785 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); 786 new_s0->index_key[keylen - 1] &= ~blank; 787 788 /* This now reduces to a node splitting exercise for which we'll need 789 * to regenerate the disparity table. 790 */ 791 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 792 ptr = node->slots[i]; 793 base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr), 794 level); 795 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 796 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; 797 } 798 799 base_seg = ops->get_key_chunk(index_key, level); 800 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; 801 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK; 802 goto do_split_node; 803 } 804 805 /* 806 * Handle insertion into the middle of a shortcut. 807 */ 808 static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, 809 const struct assoc_array_ops *ops, 810 struct assoc_array_walk_result *result) 811 { 812 struct assoc_array_shortcut *shortcut, *new_s0, *new_s1; 813 struct assoc_array_node *node, *new_n0, *side; 814 unsigned long sc_segments, dissimilarity, blank; 815 size_t keylen; 816 int level, sc_level, diff; 817 int sc_slot; 818 819 shortcut = result->wrong_shortcut.shortcut; 820 level = result->wrong_shortcut.level; 821 sc_level = result->wrong_shortcut.sc_level; 822 sc_segments = result->wrong_shortcut.sc_segments; 823 dissimilarity = result->wrong_shortcut.dissimilarity; 824 825 pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n", 826 __func__, level, dissimilarity, sc_level); 827 828 /* We need to split a shortcut and insert a node between the two 829 * pieces. Zero-length pieces will be dispensed with entirely. 830 * 831 * First of all, we need to find out in which level the first 832 * difference was. 833 */ 834 diff = __ffs(dissimilarity); 835 diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK; 836 diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK; 837 pr_devel("diff=%d\n", diff); 838 839 if (!shortcut->back_pointer) { 840 edit->set[0].ptr = &edit->array->root; 841 } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) { 842 node = assoc_array_ptr_to_node(shortcut->back_pointer); 843 edit->set[0].ptr = &node->slots[shortcut->parent_slot]; 844 } else { 845 BUG(); 846 } 847 848 edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut); 849 850 /* Create a new node now since we're going to need it anyway */ 851 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 852 if (!new_n0) 853 return false; 854 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 855 edit->adjust_count_on = new_n0; 856 857 /* Insert a new shortcut before the new node if this segment isn't of 858 * zero length - otherwise we just connect the new node directly to the 859 * parent. 860 */ 861 level += ASSOC_ARRAY_LEVEL_STEP; 862 if (diff > level) { 863 pr_devel("pre-shortcut %d...%d\n", level, diff); 864 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); 865 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 866 867 new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + 868 keylen * sizeof(unsigned long), GFP_KERNEL); 869 if (!new_s0) 870 return false; 871 edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0); 872 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); 873 new_s0->back_pointer = shortcut->back_pointer; 874 new_s0->parent_slot = shortcut->parent_slot; 875 new_s0->next_node = assoc_array_node_to_ptr(new_n0); 876 new_s0->skip_to_level = diff; 877 878 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); 879 new_n0->parent_slot = 0; 880 881 memcpy(new_s0->index_key, shortcut->index_key, 882 keylen * sizeof(unsigned long)); 883 884 blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); 885 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank); 886 new_s0->index_key[keylen - 1] &= ~blank; 887 } else { 888 pr_devel("no pre-shortcut\n"); 889 edit->set[0].to = assoc_array_node_to_ptr(new_n0); 890 new_n0->back_pointer = shortcut->back_pointer; 891 new_n0->parent_slot = shortcut->parent_slot; 892 } 893 894 side = assoc_array_ptr_to_node(shortcut->next_node); 895 new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch; 896 897 /* We need to know which slot in the new node is going to take a 898 * metadata pointer. 899 */ 900 sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); 901 sc_slot &= ASSOC_ARRAY_FAN_MASK; 902 903 pr_devel("new slot %lx >> %d -> %d\n", 904 sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot); 905 906 /* Determine whether we need to follow the new node with a replacement 907 * for the current shortcut. We could in theory reuse the current 908 * shortcut if its parent slot number doesn't change - but that's a 909 * 1-in-16 chance so not worth expending the code upon. 910 */ 911 level = diff + ASSOC_ARRAY_LEVEL_STEP; 912 if (level < shortcut->skip_to_level) { 913 pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level); 914 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 915 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 916 917 new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) + 918 keylen * sizeof(unsigned long), GFP_KERNEL); 919 if (!new_s1) 920 return false; 921 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1); 922 923 new_s1->back_pointer = assoc_array_node_to_ptr(new_n0); 924 new_s1->parent_slot = sc_slot; 925 new_s1->next_node = shortcut->next_node; 926 new_s1->skip_to_level = shortcut->skip_to_level; 927 928 new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1); 929 930 memcpy(new_s1->index_key, shortcut->index_key, 931 keylen * sizeof(unsigned long)); 932 933 edit->set[1].ptr = &side->back_pointer; 934 edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1); 935 } else { 936 pr_devel("no post-shortcut\n"); 937 938 /* We don't have to replace the pointed-to node as long as we 939 * use memory barriers to make sure the parent slot number is 940 * changed before the back pointer (the parent slot number is 941 * irrelevant to the old parent shortcut). 942 */ 943 new_n0->slots[sc_slot] = shortcut->next_node; 944 edit->set_parent_slot[0].p = &side->parent_slot; 945 edit->set_parent_slot[0].to = sc_slot; 946 edit->set[1].ptr = &side->back_pointer; 947 edit->set[1].to = assoc_array_node_to_ptr(new_n0); 948 } 949 950 /* Install the new leaf in a spare slot in the new node. */ 951 if (sc_slot == 0) 952 edit->leaf_p = &new_n0->slots[1]; 953 else 954 edit->leaf_p = &new_n0->slots[0]; 955 956 pr_devel("<--%s() = ok [split shortcut]\n", __func__); 957 return edit; 958 } 959 960 /** 961 * assoc_array_insert - Script insertion of an object into an associative array 962 * @array: The array to insert into. 963 * @ops: The operations to use. 964 * @index_key: The key to insert at. 965 * @object: The object to insert. 966 * 967 * Precalculate and preallocate a script for the insertion or replacement of an 968 * object in an associative array. This results in an edit script that can 969 * either be applied or cancelled. 970 * 971 * The function returns a pointer to an edit script or -ENOMEM. 972 * 973 * The caller should lock against other modifications and must continue to hold 974 * the lock until assoc_array_apply_edit() has been called. 975 * 976 * Accesses to the tree may take place concurrently with this function, 977 * provided they hold the RCU read lock. 978 */ 979 struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, 980 const struct assoc_array_ops *ops, 981 const void *index_key, 982 void *object) 983 { 984 struct assoc_array_walk_result result; 985 struct assoc_array_edit *edit; 986 987 pr_devel("-->%s()\n", __func__); 988 989 /* The leaf pointer we're given must not have the bottom bit set as we 990 * use those for type-marking the pointer. NULL pointers are also not 991 * allowed as they indicate an empty slot but we have to allow them 992 * here as they can be updated later. 993 */ 994 BUG_ON(assoc_array_ptr_is_meta(object)); 995 996 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 997 if (!edit) 998 return ERR_PTR(-ENOMEM); 999 edit->array = array; 1000 edit->ops = ops; 1001 edit->leaf = assoc_array_leaf_to_ptr(object); 1002 edit->adjust_count_by = 1; 1003 1004 switch (assoc_array_walk(array, ops, index_key, &result)) { 1005 case assoc_array_walk_tree_empty: 1006 /* Allocate a root node if there isn't one yet */ 1007 if (!assoc_array_insert_in_empty_tree(edit)) 1008 goto enomem; 1009 return edit; 1010 1011 case assoc_array_walk_found_terminal_node: 1012 /* We found a node that doesn't have a node/shortcut pointer in 1013 * the slot corresponding to the index key that we have to 1014 * follow. 1015 */ 1016 if (!assoc_array_insert_into_terminal_node(edit, ops, index_key, 1017 &result)) 1018 goto enomem; 1019 return edit; 1020 1021 case assoc_array_walk_found_wrong_shortcut: 1022 /* We found a shortcut that didn't match our key in a slot we 1023 * needed to follow. 1024 */ 1025 if (!assoc_array_insert_mid_shortcut(edit, ops, &result)) 1026 goto enomem; 1027 return edit; 1028 } 1029 1030 enomem: 1031 /* Clean up after an out of memory error */ 1032 pr_devel("enomem\n"); 1033 assoc_array_cancel_edit(edit); 1034 return ERR_PTR(-ENOMEM); 1035 } 1036 1037 /** 1038 * assoc_array_insert_set_object - Set the new object pointer in an edit script 1039 * @edit: The edit script to modify. 1040 * @object: The object pointer to set. 1041 * 1042 * Change the object to be inserted in an edit script. The object pointed to 1043 * by the old object is not freed. This must be done prior to applying the 1044 * script. 1045 */ 1046 void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object) 1047 { 1048 BUG_ON(!object); 1049 edit->leaf = assoc_array_leaf_to_ptr(object); 1050 } 1051 1052 struct assoc_array_delete_collapse_context { 1053 struct assoc_array_node *node; 1054 const void *skip_leaf; 1055 int slot; 1056 }; 1057 1058 /* 1059 * Subtree collapse to node iterator. 1060 */ 1061 static int assoc_array_delete_collapse_iterator(const void *leaf, 1062 void *iterator_data) 1063 { 1064 struct assoc_array_delete_collapse_context *collapse = iterator_data; 1065 1066 if (leaf == collapse->skip_leaf) 1067 return 0; 1068 1069 BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT); 1070 1071 collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf); 1072 return 0; 1073 } 1074 1075 /** 1076 * assoc_array_delete - Script deletion of an object from an associative array 1077 * @array: The array to search. 1078 * @ops: The operations to use. 1079 * @index_key: The key to the object. 1080 * 1081 * Precalculate and preallocate a script for the deletion of an object from an 1082 * associative array. This results in an edit script that can either be 1083 * applied or cancelled. 1084 * 1085 * The function returns a pointer to an edit script if the object was found, 1086 * NULL if the object was not found or -ENOMEM. 1087 * 1088 * The caller should lock against other modifications and must continue to hold 1089 * the lock until assoc_array_apply_edit() has been called. 1090 * 1091 * Accesses to the tree may take place concurrently with this function, 1092 * provided they hold the RCU read lock. 1093 */ 1094 struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, 1095 const struct assoc_array_ops *ops, 1096 const void *index_key) 1097 { 1098 struct assoc_array_delete_collapse_context collapse; 1099 struct assoc_array_walk_result result; 1100 struct assoc_array_node *node, *new_n0; 1101 struct assoc_array_edit *edit; 1102 struct assoc_array_ptr *ptr; 1103 bool has_meta; 1104 int slot, i; 1105 1106 pr_devel("-->%s()\n", __func__); 1107 1108 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1109 if (!edit) 1110 return ERR_PTR(-ENOMEM); 1111 edit->array = array; 1112 edit->ops = ops; 1113 edit->adjust_count_by = -1; 1114 1115 switch (assoc_array_walk(array, ops, index_key, &result)) { 1116 case assoc_array_walk_found_terminal_node: 1117 /* We found a node that should contain the leaf we've been 1118 * asked to remove - *if* it's in the tree. 1119 */ 1120 pr_devel("terminal_node\n"); 1121 node = result.terminal_node.node; 1122 1123 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1124 ptr = node->slots[slot]; 1125 if (ptr && 1126 assoc_array_ptr_is_leaf(ptr) && 1127 ops->compare_object(assoc_array_ptr_to_leaf(ptr), 1128 index_key)) 1129 goto found_leaf; 1130 } 1131 case assoc_array_walk_tree_empty: 1132 case assoc_array_walk_found_wrong_shortcut: 1133 default: 1134 assoc_array_cancel_edit(edit); 1135 pr_devel("not found\n"); 1136 return NULL; 1137 } 1138 1139 found_leaf: 1140 BUG_ON(array->nr_leaves_on_tree <= 0); 1141 1142 /* In the simplest form of deletion we just clear the slot and release 1143 * the leaf after a suitable interval. 1144 */ 1145 edit->dead_leaf = node->slots[slot]; 1146 edit->set[0].ptr = &node->slots[slot]; 1147 edit->set[0].to = NULL; 1148 edit->adjust_count_on = node; 1149 1150 /* If that concludes erasure of the last leaf, then delete the entire 1151 * internal array. 1152 */ 1153 if (array->nr_leaves_on_tree == 1) { 1154 edit->set[1].ptr = &array->root; 1155 edit->set[1].to = NULL; 1156 edit->adjust_count_on = NULL; 1157 edit->excised_subtree = array->root; 1158 pr_devel("all gone\n"); 1159 return edit; 1160 } 1161 1162 /* However, we'd also like to clear up some metadata blocks if we 1163 * possibly can. 1164 * 1165 * We go for a simple algorithm of: if this node has FAN_OUT or fewer 1166 * leaves in it, then attempt to collapse it - and attempt to 1167 * recursively collapse up the tree. 1168 * 1169 * We could also try and collapse in partially filled subtrees to take 1170 * up space in this node. 1171 */ 1172 if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { 1173 struct assoc_array_node *parent, *grandparent; 1174 struct assoc_array_ptr *ptr; 1175 1176 /* First of all, we need to know if this node has metadata so 1177 * that we don't try collapsing if all the leaves are already 1178 * here. 1179 */ 1180 has_meta = false; 1181 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 1182 ptr = node->slots[i]; 1183 if (assoc_array_ptr_is_meta(ptr)) { 1184 has_meta = true; 1185 break; 1186 } 1187 } 1188 1189 pr_devel("leaves: %ld [m=%d]\n", 1190 node->nr_leaves_on_branch - 1, has_meta); 1191 1192 /* Look further up the tree to see if we can collapse this node 1193 * into a more proximal node too. 1194 */ 1195 parent = node; 1196 collapse_up: 1197 pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch); 1198 1199 ptr = parent->back_pointer; 1200 if (!ptr) 1201 goto do_collapse; 1202 if (assoc_array_ptr_is_shortcut(ptr)) { 1203 struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr); 1204 ptr = s->back_pointer; 1205 if (!ptr) 1206 goto do_collapse; 1207 } 1208 1209 grandparent = assoc_array_ptr_to_node(ptr); 1210 if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { 1211 parent = grandparent; 1212 goto collapse_up; 1213 } 1214 1215 do_collapse: 1216 /* There's no point collapsing if the original node has no meta 1217 * pointers to discard and if we didn't merge into one of that 1218 * node's ancestry. 1219 */ 1220 if (has_meta || parent != node) { 1221 node = parent; 1222 1223 /* Create a new node to collapse into */ 1224 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 1225 if (!new_n0) 1226 goto enomem; 1227 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); 1228 1229 new_n0->back_pointer = node->back_pointer; 1230 new_n0->parent_slot = node->parent_slot; 1231 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; 1232 edit->adjust_count_on = new_n0; 1233 1234 collapse.node = new_n0; 1235 collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf); 1236 collapse.slot = 0; 1237 assoc_array_subtree_iterate(assoc_array_node_to_ptr(node), 1238 node->back_pointer, 1239 assoc_array_delete_collapse_iterator, 1240 &collapse); 1241 pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch); 1242 BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1); 1243 1244 if (!node->back_pointer) { 1245 edit->set[1].ptr = &array->root; 1246 } else if (assoc_array_ptr_is_leaf(node->back_pointer)) { 1247 BUG(); 1248 } else if (assoc_array_ptr_is_node(node->back_pointer)) { 1249 struct assoc_array_node *p = 1250 assoc_array_ptr_to_node(node->back_pointer); 1251 edit->set[1].ptr = &p->slots[node->parent_slot]; 1252 } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) { 1253 struct assoc_array_shortcut *s = 1254 assoc_array_ptr_to_shortcut(node->back_pointer); 1255 edit->set[1].ptr = &s->next_node; 1256 } 1257 edit->set[1].to = assoc_array_node_to_ptr(new_n0); 1258 edit->excised_subtree = assoc_array_node_to_ptr(node); 1259 } 1260 } 1261 1262 return edit; 1263 1264 enomem: 1265 /* Clean up after an out of memory error */ 1266 pr_devel("enomem\n"); 1267 assoc_array_cancel_edit(edit); 1268 return ERR_PTR(-ENOMEM); 1269 } 1270 1271 /** 1272 * assoc_array_clear - Script deletion of all objects from an associative array 1273 * @array: The array to clear. 1274 * @ops: The operations to use. 1275 * 1276 * Precalculate and preallocate a script for the deletion of all the objects 1277 * from an associative array. This results in an edit script that can either 1278 * be applied or cancelled. 1279 * 1280 * The function returns a pointer to an edit script if there are objects to be 1281 * deleted, NULL if there are no objects in the array or -ENOMEM. 1282 * 1283 * The caller should lock against other modifications and must continue to hold 1284 * the lock until assoc_array_apply_edit() has been called. 1285 * 1286 * Accesses to the tree may take place concurrently with this function, 1287 * provided they hold the RCU read lock. 1288 */ 1289 struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, 1290 const struct assoc_array_ops *ops) 1291 { 1292 struct assoc_array_edit *edit; 1293 1294 pr_devel("-->%s()\n", __func__); 1295 1296 if (!array->root) 1297 return NULL; 1298 1299 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1300 if (!edit) 1301 return ERR_PTR(-ENOMEM); 1302 edit->array = array; 1303 edit->ops = ops; 1304 edit->set[1].ptr = &array->root; 1305 edit->set[1].to = NULL; 1306 edit->excised_subtree = array->root; 1307 edit->ops_for_excised_subtree = ops; 1308 pr_devel("all gone\n"); 1309 return edit; 1310 } 1311 1312 /* 1313 * Handle the deferred destruction after an applied edit. 1314 */ 1315 static void assoc_array_rcu_cleanup(struct rcu_head *head) 1316 { 1317 struct assoc_array_edit *edit = 1318 container_of(head, struct assoc_array_edit, rcu); 1319 int i; 1320 1321 pr_devel("-->%s()\n", __func__); 1322 1323 if (edit->dead_leaf) 1324 edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf)); 1325 for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++) 1326 if (edit->excised_meta[i]) 1327 kfree(assoc_array_ptr_to_node(edit->excised_meta[i])); 1328 1329 if (edit->excised_subtree) { 1330 BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree)); 1331 if (assoc_array_ptr_is_node(edit->excised_subtree)) { 1332 struct assoc_array_node *n = 1333 assoc_array_ptr_to_node(edit->excised_subtree); 1334 n->back_pointer = NULL; 1335 } else { 1336 struct assoc_array_shortcut *s = 1337 assoc_array_ptr_to_shortcut(edit->excised_subtree); 1338 s->back_pointer = NULL; 1339 } 1340 assoc_array_destroy_subtree(edit->excised_subtree, 1341 edit->ops_for_excised_subtree); 1342 } 1343 1344 kfree(edit); 1345 } 1346 1347 /** 1348 * assoc_array_apply_edit - Apply an edit script to an associative array 1349 * @edit: The script to apply. 1350 * 1351 * Apply an edit script to an associative array to effect an insertion, 1352 * deletion or clearance. As the edit script includes preallocated memory, 1353 * this is guaranteed not to fail. 1354 * 1355 * The edit script, dead objects and dead metadata will be scheduled for 1356 * destruction after an RCU grace period to permit those doing read-only 1357 * accesses on the array to continue to do so under the RCU read lock whilst 1358 * the edit is taking place. 1359 */ 1360 void assoc_array_apply_edit(struct assoc_array_edit *edit) 1361 { 1362 struct assoc_array_shortcut *shortcut; 1363 struct assoc_array_node *node; 1364 struct assoc_array_ptr *ptr; 1365 int i; 1366 1367 pr_devel("-->%s()\n", __func__); 1368 1369 smp_wmb(); 1370 if (edit->leaf_p) 1371 *edit->leaf_p = edit->leaf; 1372 1373 smp_wmb(); 1374 for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++) 1375 if (edit->set_parent_slot[i].p) 1376 *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to; 1377 1378 smp_wmb(); 1379 for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++) 1380 if (edit->set_backpointers[i]) 1381 *edit->set_backpointers[i] = edit->set_backpointers_to; 1382 1383 smp_wmb(); 1384 for (i = 0; i < ARRAY_SIZE(edit->set); i++) 1385 if (edit->set[i].ptr) 1386 *edit->set[i].ptr = edit->set[i].to; 1387 1388 if (edit->array->root == NULL) { 1389 edit->array->nr_leaves_on_tree = 0; 1390 } else if (edit->adjust_count_on) { 1391 node = edit->adjust_count_on; 1392 for (;;) { 1393 node->nr_leaves_on_branch += edit->adjust_count_by; 1394 1395 ptr = node->back_pointer; 1396 if (!ptr) 1397 break; 1398 if (assoc_array_ptr_is_shortcut(ptr)) { 1399 shortcut = assoc_array_ptr_to_shortcut(ptr); 1400 ptr = shortcut->back_pointer; 1401 if (!ptr) 1402 break; 1403 } 1404 BUG_ON(!assoc_array_ptr_is_node(ptr)); 1405 node = assoc_array_ptr_to_node(ptr); 1406 } 1407 1408 edit->array->nr_leaves_on_tree += edit->adjust_count_by; 1409 } 1410 1411 call_rcu(&edit->rcu, assoc_array_rcu_cleanup); 1412 } 1413 1414 /** 1415 * assoc_array_cancel_edit - Discard an edit script. 1416 * @edit: The script to discard. 1417 * 1418 * Free an edit script and all the preallocated data it holds without making 1419 * any changes to the associative array it was intended for. 1420 * 1421 * NOTE! In the case of an insertion script, this does _not_ release the leaf 1422 * that was to be inserted. That is left to the caller. 1423 */ 1424 void assoc_array_cancel_edit(struct assoc_array_edit *edit) 1425 { 1426 struct assoc_array_ptr *ptr; 1427 int i; 1428 1429 pr_devel("-->%s()\n", __func__); 1430 1431 /* Clean up after an out of memory error */ 1432 for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) { 1433 ptr = edit->new_meta[i]; 1434 if (ptr) { 1435 if (assoc_array_ptr_is_node(ptr)) 1436 kfree(assoc_array_ptr_to_node(ptr)); 1437 else 1438 kfree(assoc_array_ptr_to_shortcut(ptr)); 1439 } 1440 } 1441 kfree(edit); 1442 } 1443 1444 /** 1445 * assoc_array_gc - Garbage collect an associative array. 1446 * @array: The array to clean. 1447 * @ops: The operations to use. 1448 * @iterator: A callback function to pass judgement on each object. 1449 * @iterator_data: Private data for the callback function. 1450 * 1451 * Collect garbage from an associative array and pack down the internal tree to 1452 * save memory. 1453 * 1454 * The iterator function is asked to pass judgement upon each object in the 1455 * array. If it returns false, the object is discard and if it returns true, 1456 * the object is kept. If it returns true, it must increment the object's 1457 * usage count (or whatever it needs to do to retain it) before returning. 1458 * 1459 * This function returns 0 if successful or -ENOMEM if out of memory. In the 1460 * latter case, the array is not changed. 1461 * 1462 * The caller should lock against other modifications and must continue to hold 1463 * the lock until assoc_array_apply_edit() has been called. 1464 * 1465 * Accesses to the tree may take place concurrently with this function, 1466 * provided they hold the RCU read lock. 1467 */ 1468 int assoc_array_gc(struct assoc_array *array, 1469 const struct assoc_array_ops *ops, 1470 bool (*iterator)(void *object, void *iterator_data), 1471 void *iterator_data) 1472 { 1473 struct assoc_array_shortcut *shortcut, *new_s; 1474 struct assoc_array_node *node, *new_n; 1475 struct assoc_array_edit *edit; 1476 struct assoc_array_ptr *cursor, *ptr; 1477 struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp; 1478 unsigned long nr_leaves_on_tree; 1479 int keylen, slot, nr_free, next_slot, i; 1480 1481 pr_devel("-->%s()\n", __func__); 1482 1483 if (!array->root) 1484 return 0; 1485 1486 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); 1487 if (!edit) 1488 return -ENOMEM; 1489 edit->array = array; 1490 edit->ops = ops; 1491 edit->ops_for_excised_subtree = ops; 1492 edit->set[0].ptr = &array->root; 1493 edit->excised_subtree = array->root; 1494 1495 new_root = new_parent = NULL; 1496 new_ptr_pp = &new_root; 1497 cursor = array->root; 1498 1499 descend: 1500 /* If this point is a shortcut, then we need to duplicate it and 1501 * advance the target cursor. 1502 */ 1503 if (assoc_array_ptr_is_shortcut(cursor)) { 1504 shortcut = assoc_array_ptr_to_shortcut(cursor); 1505 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); 1506 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; 1507 new_s = kmalloc(sizeof(struct assoc_array_shortcut) + 1508 keylen * sizeof(unsigned long), GFP_KERNEL); 1509 if (!new_s) 1510 goto enomem; 1511 pr_devel("dup shortcut %p -> %p\n", shortcut, new_s); 1512 memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) + 1513 keylen * sizeof(unsigned long))); 1514 new_s->back_pointer = new_parent; 1515 new_s->parent_slot = shortcut->parent_slot; 1516 *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s); 1517 new_ptr_pp = &new_s->next_node; 1518 cursor = shortcut->next_node; 1519 } 1520 1521 /* Duplicate the node at this position */ 1522 node = assoc_array_ptr_to_node(cursor); 1523 new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); 1524 if (!new_n) 1525 goto enomem; 1526 pr_devel("dup node %p -> %p\n", node, new_n); 1527 new_n->back_pointer = new_parent; 1528 new_n->parent_slot = node->parent_slot; 1529 *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n); 1530 new_ptr_pp = NULL; 1531 slot = 0; 1532 1533 continue_node: 1534 /* Filter across any leaves and gc any subtrees */ 1535 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1536 ptr = node->slots[slot]; 1537 if (!ptr) 1538 continue; 1539 1540 if (assoc_array_ptr_is_leaf(ptr)) { 1541 if (iterator(assoc_array_ptr_to_leaf(ptr), 1542 iterator_data)) 1543 /* The iterator will have done any reference 1544 * counting on the object for us. 1545 */ 1546 new_n->slots[slot] = ptr; 1547 continue; 1548 } 1549 1550 new_ptr_pp = &new_n->slots[slot]; 1551 cursor = ptr; 1552 goto descend; 1553 } 1554 1555 pr_devel("-- compress node %p --\n", new_n); 1556 1557 /* Count up the number of empty slots in this node and work out the 1558 * subtree leaf count. 1559 */ 1560 new_n->nr_leaves_on_branch = 0; 1561 nr_free = 0; 1562 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1563 ptr = new_n->slots[slot]; 1564 if (!ptr) 1565 nr_free++; 1566 else if (assoc_array_ptr_is_leaf(ptr)) 1567 new_n->nr_leaves_on_branch++; 1568 } 1569 pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch); 1570 1571 /* See what we can fold in */ 1572 next_slot = 0; 1573 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { 1574 struct assoc_array_shortcut *s; 1575 struct assoc_array_node *child; 1576 1577 ptr = new_n->slots[slot]; 1578 if (!ptr || assoc_array_ptr_is_leaf(ptr)) 1579 continue; 1580 1581 s = NULL; 1582 if (assoc_array_ptr_is_shortcut(ptr)) { 1583 s = assoc_array_ptr_to_shortcut(ptr); 1584 ptr = s->next_node; 1585 } 1586 1587 child = assoc_array_ptr_to_node(ptr); 1588 new_n->nr_leaves_on_branch += child->nr_leaves_on_branch; 1589 1590 if (child->nr_leaves_on_branch <= nr_free + 1) { 1591 /* Fold the child node into this one */ 1592 pr_devel("[%d] fold node %lu/%d [nx %d]\n", 1593 slot, child->nr_leaves_on_branch, nr_free + 1, 1594 next_slot); 1595 1596 /* We would already have reaped an intervening shortcut 1597 * on the way back up the tree. 1598 */ 1599 BUG_ON(s); 1600 1601 new_n->slots[slot] = NULL; 1602 nr_free++; 1603 if (slot < next_slot) 1604 next_slot = slot; 1605 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { 1606 struct assoc_array_ptr *p = child->slots[i]; 1607 if (!p) 1608 continue; 1609 BUG_ON(assoc_array_ptr_is_meta(p)); 1610 while (new_n->slots[next_slot]) 1611 next_slot++; 1612 BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT); 1613 new_n->slots[next_slot++] = p; 1614 nr_free--; 1615 } 1616 kfree(child); 1617 } else { 1618 pr_devel("[%d] retain node %lu/%d [nx %d]\n", 1619 slot, child->nr_leaves_on_branch, nr_free + 1, 1620 next_slot); 1621 } 1622 } 1623 1624 pr_devel("after: %lu\n", new_n->nr_leaves_on_branch); 1625 1626 nr_leaves_on_tree = new_n->nr_leaves_on_branch; 1627 1628 /* Excise this node if it is singly occupied by a shortcut */ 1629 if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) { 1630 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) 1631 if ((ptr = new_n->slots[slot])) 1632 break; 1633 1634 if (assoc_array_ptr_is_meta(ptr) && 1635 assoc_array_ptr_is_shortcut(ptr)) { 1636 pr_devel("excise node %p with 1 shortcut\n", new_n); 1637 new_s = assoc_array_ptr_to_shortcut(ptr); 1638 new_parent = new_n->back_pointer; 1639 slot = new_n->parent_slot; 1640 kfree(new_n); 1641 if (!new_parent) { 1642 new_s->back_pointer = NULL; 1643 new_s->parent_slot = 0; 1644 new_root = ptr; 1645 goto gc_complete; 1646 } 1647 1648 if (assoc_array_ptr_is_shortcut(new_parent)) { 1649 /* We can discard any preceding shortcut also */ 1650 struct assoc_array_shortcut *s = 1651 assoc_array_ptr_to_shortcut(new_parent); 1652 1653 pr_devel("excise preceding shortcut\n"); 1654 1655 new_parent = new_s->back_pointer = s->back_pointer; 1656 slot = new_s->parent_slot = s->parent_slot; 1657 kfree(s); 1658 if (!new_parent) { 1659 new_s->back_pointer = NULL; 1660 new_s->parent_slot = 0; 1661 new_root = ptr; 1662 goto gc_complete; 1663 } 1664 } 1665 1666 new_s->back_pointer = new_parent; 1667 new_s->parent_slot = slot; 1668 new_n = assoc_array_ptr_to_node(new_parent); 1669 new_n->slots[slot] = ptr; 1670 goto ascend_old_tree; 1671 } 1672 } 1673 1674 /* Excise any shortcuts we might encounter that point to nodes that 1675 * only contain leaves. 1676 */ 1677 ptr = new_n->back_pointer; 1678 if (!ptr) 1679 goto gc_complete; 1680 1681 if (assoc_array_ptr_is_shortcut(ptr)) { 1682 new_s = assoc_array_ptr_to_shortcut(ptr); 1683 new_parent = new_s->back_pointer; 1684 slot = new_s->parent_slot; 1685 1686 if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { 1687 struct assoc_array_node *n; 1688 1689 pr_devel("excise shortcut\n"); 1690 new_n->back_pointer = new_parent; 1691 new_n->parent_slot = slot; 1692 kfree(new_s); 1693 if (!new_parent) { 1694 new_root = assoc_array_node_to_ptr(new_n); 1695 goto gc_complete; 1696 } 1697 1698 n = assoc_array_ptr_to_node(new_parent); 1699 n->slots[slot] = assoc_array_node_to_ptr(new_n); 1700 } 1701 } else { 1702 new_parent = ptr; 1703 } 1704 new_n = assoc_array_ptr_to_node(new_parent); 1705 1706 ascend_old_tree: 1707 ptr = node->back_pointer; 1708 if (assoc_array_ptr_is_shortcut(ptr)) { 1709 shortcut = assoc_array_ptr_to_shortcut(ptr); 1710 slot = shortcut->parent_slot; 1711 cursor = shortcut->back_pointer; 1712 if (!cursor) 1713 goto gc_complete; 1714 } else { 1715 slot = node->parent_slot; 1716 cursor = ptr; 1717 } 1718 BUG_ON(!cursor); 1719 node = assoc_array_ptr_to_node(cursor); 1720 slot++; 1721 goto continue_node; 1722 1723 gc_complete: 1724 edit->set[0].to = new_root; 1725 assoc_array_apply_edit(edit); 1726 array->nr_leaves_on_tree = nr_leaves_on_tree; 1727 return 0; 1728 1729 enomem: 1730 pr_devel("enomem\n"); 1731 assoc_array_destroy_subtree(new_root, edit->ops); 1732 kfree(edit); 1733 return -ENOMEM; 1734 } 1735