1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/systm.h> 27 #include <sys/cmn_err.h> 28 #include <sys/kmem.h> 29 #include <sys/disp.h> 30 #include <sys/id_space.h> 31 #include <sys/atomic.h> 32 #include <rpc/rpc.h> 33 #include <nfs/nfs4.h> 34 #include <nfs/nfs4_db_impl.h> 35 36 static int rfs4_reap_interval = RFS4_REAP_INTERVAL; 37 38 static void rfs4_dbe_reap(rfs4_table_t *, time_t, uint32_t); 39 static void rfs4_dbe_destroy(rfs4_dbe_t *); 40 static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t); 41 static void rfs4_start_reaper(rfs4_table_t *); 42 43 id_t 44 rfs4_dbe_getid(rfs4_dbe_t *entry) 45 { 46 return (entry->dbe_id); 47 } 48 49 void 50 rfs4_dbe_hold(rfs4_dbe_t *entry) 51 { 52 atomic_add_32(&entry->dbe_refcnt, 1); 53 } 54 55 /* 56 * rfs4_dbe_rele_nolock only decrements the reference count of the entry. 57 */ 58 void 59 rfs4_dbe_rele_nolock(rfs4_dbe_t *entry) 60 { 61 atomic_add_32(&entry->dbe_refcnt, -1); 62 } 63 64 65 uint32_t 66 rfs4_dbe_refcnt(rfs4_dbe_t *entry) 67 { 68 return (entry->dbe_refcnt); 69 } 70 71 /* 72 * Mark an entry such that the dbsearch will skip it. 73 * Caller does not want this entry to be found any longer 74 */ 75 void 76 rfs4_dbe_invalidate(rfs4_dbe_t *entry) 77 { 78 entry->dbe_invalid = TRUE; 79 entry->dbe_skipsearch = TRUE; 80 } 81 82 /* 83 * Is this entry invalid? 84 */ 85 bool_t 86 rfs4_dbe_is_invalid(rfs4_dbe_t *entry) 87 { 88 return (entry->dbe_invalid); 89 } 90 91 time_t 92 rfs4_dbe_get_timerele(rfs4_dbe_t *entry) 93 { 94 return (entry->dbe_time_rele); 95 } 96 97 /* 98 * Use these to temporarily hide/unhide a db entry. 99 */ 100 void 101 rfs4_dbe_hide(rfs4_dbe_t *entry) 102 { 103 rfs4_dbe_lock(entry); 104 entry->dbe_skipsearch = TRUE; 105 rfs4_dbe_unlock(entry); 106 } 107 108 void 109 rfs4_dbe_unhide(rfs4_dbe_t *entry) 110 { 111 rfs4_dbe_lock(entry); 112 entry->dbe_skipsearch = FALSE; 113 rfs4_dbe_unlock(entry); 114 } 115 116 void 117 rfs4_dbe_rele(rfs4_dbe_t *entry) 118 { 119 mutex_enter(entry->dbe_lock); 120 ASSERT(entry->dbe_refcnt > 1); 121 atomic_add_32(&entry->dbe_refcnt, -1); 122 entry->dbe_time_rele = gethrestime_sec(); 123 mutex_exit(entry->dbe_lock); 124 } 125 126 void 127 rfs4_dbe_lock(rfs4_dbe_t *entry) 128 { 129 mutex_enter(entry->dbe_lock); 130 } 131 132 void 133 rfs4_dbe_unlock(rfs4_dbe_t *entry) 134 { 135 mutex_exit(entry->dbe_lock); 136 } 137 138 bool_t 139 rfs4_dbe_islocked(rfs4_dbe_t *entry) 140 { 141 return (mutex_owned(entry->dbe_lock)); 142 } 143 144 clock_t 145 rfs4_dbe_twait(rfs4_dbe_t *entry, clock_t timeout) 146 { 147 return (cv_timedwait(entry->dbe_cv, entry->dbe_lock, timeout)); 148 } 149 150 void 151 rfs4_dbe_cv_broadcast(rfs4_dbe_t *entry) 152 { 153 cv_broadcast(entry->dbe_cv); 154 } 155 156 /* ARGSUSED */ 157 static int 158 rfs4_dbe_kmem_constructor(void *obj, void *private, int kmflag) 159 { 160 rfs4_dbe_t *entry = obj; 161 162 mutex_init(entry->dbe_lock, NULL, MUTEX_DEFAULT, NULL); 163 cv_init(entry->dbe_cv, NULL, CV_DEFAULT, NULL); 164 165 return (0); 166 } 167 168 static void 169 rfs4_dbe_kmem_destructor(void *obj, void *private) 170 { 171 rfs4_dbe_t *entry = obj; 172 /*LINTED*/ 173 rfs4_table_t *table = private; 174 175 mutex_destroy(entry->dbe_lock); 176 cv_destroy(entry->dbe_cv); 177 } 178 179 rfs4_database_t * 180 rfs4_database_create(uint32_t flags) 181 { 182 rfs4_database_t *db; 183 184 db = kmem_alloc(sizeof (rfs4_database_t), KM_SLEEP); 185 mutex_init(db->db_lock, NULL, MUTEX_DEFAULT, NULL); 186 db->db_tables = NULL; 187 db->db_debug_flags = flags; 188 db->db_shutdown_count = 0; 189 cv_init(&db->db_shutdown_wait, NULL, CV_DEFAULT, NULL); 190 return (db); 191 } 192 193 194 /* 195 * The reaper threads that have been created for the tables in this 196 * database must be stopped and the entries in the tables released. 197 * Each table will be marked as "shutdown" and the reaper threads 198 * poked and they will see that a shutdown is in progress and cleanup 199 * and exit. This function waits for all reaper threads to stop 200 * before returning to the caller. 201 */ 202 void 203 rfs4_database_shutdown(rfs4_database_t *db) 204 { 205 rfs4_table_t *table; 206 207 mutex_enter(db->db_lock); 208 for (table = db->db_tables; table; table = table->dbt_tnext) { 209 table->dbt_reaper_shutdown = TRUE; 210 mutex_enter(&table->dbt_reaper_cv_lock); 211 cv_broadcast(&table->dbt_reaper_wait); 212 db->db_shutdown_count++; 213 mutex_exit(&table->dbt_reaper_cv_lock); 214 } 215 while (db->db_shutdown_count > 0) { 216 cv_wait(&db->db_shutdown_wait, db->db_lock); 217 } 218 mutex_exit(db->db_lock); 219 } 220 221 /* 222 * Given a database that has been "shutdown" by the function above all 223 * of the table tables are destroyed and then the database itself 224 * freed. 225 */ 226 void 227 rfs4_database_destroy(rfs4_database_t *db) 228 { 229 rfs4_table_t *next, *tmp; 230 231 for (next = db->db_tables; next; ) { 232 tmp = next; 233 next = tmp->dbt_tnext; 234 rfs4_table_destroy(db, tmp); 235 } 236 237 mutex_destroy(db->db_lock); 238 kmem_free(db, sizeof (rfs4_database_t)); 239 } 240 241 rfs4_table_t * 242 rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time, 243 uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *), 244 void (*destroy)(rfs4_entry_t), 245 bool_t (*expiry)(rfs4_entry_t), 246 uint32_t size, uint32_t hashsize, 247 uint32_t maxentries, id_t start) 248 { 249 rfs4_table_t *table; 250 int len; 251 char *cache_name; 252 char *id_name; 253 254 table = kmem_alloc(sizeof (rfs4_table_t), KM_SLEEP); 255 table->dbt_db = db; 256 rw_init(table->dbt_t_lock, NULL, RW_DEFAULT, NULL); 257 mutex_init(table->dbt_lock, NULL, MUTEX_DEFAULT, NULL); 258 mutex_init(&table->dbt_reaper_cv_lock, NULL, MUTEX_DEFAULT, NULL); 259 cv_init(&table->dbt_reaper_wait, NULL, CV_DEFAULT, NULL); 260 261 len = strlen(tabname); 262 table->dbt_name = kmem_alloc(len+1, KM_SLEEP); 263 cache_name = kmem_alloc(len + 12 /* "_entry_cache" */ + 1, KM_SLEEP); 264 (void) strcpy(table->dbt_name, tabname); 265 (void) sprintf(cache_name, "%s_entry_cache", table->dbt_name); 266 table->dbt_max_cache_time = max_cache_time; 267 table->dbt_usize = size; 268 table->dbt_len = hashsize; 269 table->dbt_count = 0; 270 table->dbt_idxcnt = 0; 271 table->dbt_ccnt = 0; 272 table->dbt_maxcnt = idxcnt; 273 table->dbt_indices = NULL; 274 table->dbt_id_space = NULL; 275 table->dbt_reaper_shutdown = FALSE; 276 277 if (start >= 0) { 278 if (maxentries + (uint32_t)start > (uint32_t)INT32_MAX) 279 maxentries = INT32_MAX - start; 280 id_name = kmem_alloc(len + 9 /* "_id_space" */ + 1, KM_SLEEP); 281 (void) sprintf(id_name, "%s_id_space", table->dbt_name); 282 table->dbt_id_space = id_space_create(id_name, start, 283 maxentries + start); 284 kmem_free(id_name, len + 10); 285 } 286 table->dbt_maxentries = maxentries; 287 table->dbt_create = create; 288 table->dbt_destroy = destroy; 289 table->dbt_expiry = expiry; 290 291 table->dbt_mem_cache = kmem_cache_create(cache_name, 292 sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size, 293 0, 294 rfs4_dbe_kmem_constructor, 295 rfs4_dbe_kmem_destructor, 296 NULL, 297 table, 298 NULL, 299 0); 300 kmem_free(cache_name, len+13); 301 302 table->dbt_debug = db->db_debug_flags; 303 304 mutex_enter(db->db_lock); 305 table->dbt_tnext = db->db_tables; 306 db->db_tables = table; 307 mutex_exit(db->db_lock); 308 309 rfs4_start_reaper(table); 310 311 return (table); 312 } 313 314 void 315 rfs4_table_destroy(rfs4_database_t *db, rfs4_table_t *table) 316 { 317 rfs4_table_t *p; 318 rfs4_index_t *idx; 319 320 ASSERT(table->dbt_count == 0); 321 322 mutex_enter(db->db_lock); 323 if (table == db->db_tables) 324 db->db_tables = table->dbt_tnext; 325 else { 326 for (p = db->db_tables; p; p = p->dbt_tnext) 327 if (p->dbt_tnext == table) { 328 p->dbt_tnext = table->dbt_tnext; 329 table->dbt_tnext = NULL; 330 break; 331 } 332 ASSERT(p != NULL); 333 } 334 mutex_exit(db->db_lock); 335 336 /* Destroy indices */ 337 while (table->dbt_indices) { 338 idx = table->dbt_indices; 339 table->dbt_indices = idx->dbi_inext; 340 rfs4_index_destroy(idx); 341 } 342 343 rw_destroy(table->dbt_t_lock); 344 mutex_destroy(table->dbt_lock); 345 mutex_destroy(&table->dbt_reaper_cv_lock); 346 cv_destroy(&table->dbt_reaper_wait); 347 348 kmem_free(table->dbt_name, strlen(table->dbt_name) + 1); 349 if (table->dbt_id_space) 350 id_space_destroy(table->dbt_id_space); 351 kmem_cache_destroy(table->dbt_mem_cache); 352 kmem_free(table, sizeof (rfs4_table_t)); 353 } 354 355 rfs4_index_t * 356 rfs4_index_create(rfs4_table_t *table, char *keyname, 357 uint32_t (*hash)(void *), 358 bool_t (compare)(rfs4_entry_t, void *), 359 void *(*mkkey)(rfs4_entry_t), 360 bool_t createable) 361 { 362 rfs4_index_t *idx; 363 364 ASSERT(table->dbt_idxcnt < table->dbt_maxcnt); 365 366 idx = kmem_alloc(sizeof (rfs4_index_t), KM_SLEEP); 367 368 idx->dbi_table = table; 369 idx->dbi_keyname = kmem_alloc(strlen(keyname) + 1, KM_SLEEP); 370 (void) strcpy(idx->dbi_keyname, keyname); 371 idx->dbi_hash = hash; 372 idx->dbi_compare = compare; 373 idx->dbi_mkkey = mkkey; 374 idx->dbi_tblidx = table->dbt_idxcnt; 375 table->dbt_idxcnt++; 376 if (createable) { 377 table->dbt_ccnt++; 378 if (table->dbt_ccnt > 1) 379 panic("Table %s currently can have only have one " 380 "index that will allow creation of entries", 381 table->dbt_name); 382 idx->dbi_createable = TRUE; 383 } else { 384 idx->dbi_createable = FALSE; 385 } 386 387 idx->dbi_inext = table->dbt_indices; 388 table->dbt_indices = idx; 389 idx->dbi_buckets = kmem_zalloc(sizeof (rfs4_bucket_t) * table->dbt_len, 390 KM_SLEEP); 391 392 return (idx); 393 } 394 395 void 396 rfs4_index_destroy(rfs4_index_t *idx) 397 { 398 kmem_free(idx->dbi_keyname, strlen(idx->dbi_keyname) + 1); 399 kmem_free(idx->dbi_buckets, 400 sizeof (rfs4_bucket_t) * idx->dbi_table->dbt_len); 401 kmem_free(idx, sizeof (rfs4_index_t)); 402 } 403 404 static void 405 rfs4_dbe_destroy(rfs4_dbe_t *entry) 406 { 407 rfs4_index_t *idx; 408 void *key; 409 int i; 410 rfs4_bucket_t *bp; 411 rfs4_table_t *table = entry->dbe_table; 412 rfs4_link_t *l; 413 414 NFS4_DEBUG(table->dbt_debug & DESTROY_DEBUG, 415 (CE_NOTE, "Destroying entry %p from %s", 416 (void*)entry, table->dbt_name)); 417 418 mutex_enter(entry->dbe_lock); 419 ASSERT(entry->dbe_refcnt == 0); 420 mutex_exit(entry->dbe_lock); 421 422 /* Unlink from all indices */ 423 for (idx = table->dbt_indices; idx; idx = idx->dbi_inext) { 424 l = &entry->dbe_indices[idx->dbi_tblidx]; 425 /* check and see if we were ever linked in to the index */ 426 if (INVALID_LINK(l)) { 427 ASSERT(l->next == NULL && l->prev == NULL); 428 continue; 429 } 430 key = idx->dbi_mkkey(entry->dbe_data); 431 i = HASH(idx, key); 432 bp = &idx->dbi_buckets[i]; 433 ASSERT(bp->dbk_head != NULL); 434 DEQUEUE_IDX(bp, &entry->dbe_indices[idx->dbi_tblidx]); 435 } 436 437 /* Destroy user data */ 438 if (table->dbt_destroy) 439 (*table->dbt_destroy)(entry->dbe_data); 440 441 if (table->dbt_id_space) 442 id_free(table->dbt_id_space, entry->dbe_id); 443 444 mutex_enter(table->dbt_lock); 445 table->dbt_count--; 446 mutex_exit(table->dbt_lock); 447 448 /* Destroy the entry itself */ 449 kmem_cache_free(table->dbt_mem_cache, entry); 450 } 451 452 453 static rfs4_dbe_t * 454 rfs4_dbe_create(rfs4_table_t *table, id_t id, rfs4_entry_t data) 455 { 456 rfs4_dbe_t *entry; 457 int i; 458 459 NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG, 460 (CE_NOTE, "Creating entry in table %s", table->dbt_name)); 461 462 entry = kmem_cache_alloc(table->dbt_mem_cache, KM_SLEEP); 463 464 entry->dbe_refcnt = 1; 465 entry->dbe_invalid = FALSE; 466 entry->dbe_skipsearch = FALSE; 467 entry->dbe_time_rele = 0; 468 entry->dbe_id = 0; 469 470 if (table->dbt_id_space) 471 entry->dbe_id = id; 472 entry->dbe_table = table; 473 474 for (i = 0; i < table->dbt_maxcnt; i++) { 475 entry->dbe_indices[i].next = entry->dbe_indices[i].prev = NULL; 476 entry->dbe_indices[i].entry = entry; 477 /* 478 * We mark the entry as not indexed by setting the low 479 * order bit, since address are word aligned. This has 480 * the advantage of causeing a trap if the address is 481 * used. After the entry is linked in to the 482 * corresponding index the bit will be cleared. 483 */ 484 INVALIDATE_ADDR(entry->dbe_indices[i].entry); 485 } 486 487 entry->dbe_data = (rfs4_entry_t)&entry->dbe_indices[table->dbt_maxcnt]; 488 bzero(entry->dbe_data, table->dbt_usize); 489 entry->dbe_data->dbe = entry; 490 491 if (!(*table->dbt_create)(entry->dbe_data, data)) { 492 kmem_cache_free(table->dbt_mem_cache, entry); 493 return (NULL); 494 } 495 496 mutex_enter(table->dbt_lock); 497 table->dbt_count++; 498 mutex_exit(table->dbt_lock); 499 500 return (entry); 501 } 502 503 rfs4_entry_t 504 rfs4_dbsearch(rfs4_index_t *idx, void *key, bool_t *create, void *arg, 505 rfs4_dbsearch_type_t dbsearch_type) 506 { 507 int already_done; 508 uint32_t i; 509 rfs4_table_t *table = idx->dbi_table; 510 rfs4_index_t *ip; 511 rfs4_bucket_t *bp; 512 rfs4_link_t *l; 513 rfs4_dbe_t *entry; 514 id_t id = -1; 515 516 i = HASH(idx, key); 517 bp = &idx->dbi_buckets[i]; 518 519 NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG, 520 (CE_NOTE, "Searching for key %p in table %s by %s", 521 key, table->dbt_name, idx->dbi_keyname)); 522 523 rw_enter(bp->dbk_lock, RW_READER); 524 retry: 525 for (l = bp->dbk_head; l; l = l->next) { 526 if (l->entry->dbe_refcnt > 0 && 527 (l->entry->dbe_skipsearch == FALSE || 528 (l->entry->dbe_skipsearch == TRUE && 529 dbsearch_type == RFS4_DBS_INVALID)) && 530 (*idx->dbi_compare)(l->entry->dbe_data, key)) { 531 mutex_enter(l->entry->dbe_lock); 532 if (l->entry->dbe_refcnt == 0) { 533 mutex_exit(l->entry->dbe_lock); 534 continue; 535 } 536 537 /* place an additional hold since we are returning */ 538 rfs4_dbe_hold(l->entry); 539 540 mutex_exit(l->entry->dbe_lock); 541 rw_exit(bp->dbk_lock); 542 543 *create = FALSE; 544 545 NFS4_DEBUG((table->dbt_debug & SEARCH_DEBUG), 546 (CE_NOTE, "Found entry %p for %p in table %s", 547 (void *)l->entry, key, table->dbt_name)); 548 549 if (id != -1) 550 id_free(table->dbt_id_space, id); 551 return (l->entry->dbe_data); 552 } 553 } 554 555 if (!*create || table->dbt_create == NULL || !idx->dbi_createable || 556 table->dbt_maxentries == table->dbt_count) { 557 NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG, 558 (CE_NOTE, "Entry for %p in %s not found", 559 key, table->dbt_name)); 560 561 rw_exit(bp->dbk_lock); 562 if (id != -1) 563 id_free(table->dbt_id_space, id); 564 return (NULL); 565 } 566 567 if (table->dbt_id_space && id == -1) { 568 /* get an id but don't sleep for it */ 569 id = id_alloc_nosleep(table->dbt_id_space); 570 if (id == -1) { 571 rw_exit(bp->dbk_lock); 572 573 /* get an id, ok to sleep for it here */ 574 id = id_alloc(table->dbt_id_space); 575 576 rw_enter(bp->dbk_lock, RW_WRITER); 577 goto retry; 578 } 579 } 580 581 /* get an exclusive lock on the bucket */ 582 if (rw_read_locked(bp->dbk_lock) && !rw_tryupgrade(bp->dbk_lock)) { 583 NFS4_DEBUG(table->dbt_debug & OTHER_DEBUG, 584 (CE_NOTE, "Trying to upgrade lock on " 585 "hash chain %d (%p) for %s by %s", 586 i, (void*)bp, table->dbt_name, idx->dbi_keyname)); 587 588 rw_exit(bp->dbk_lock); 589 rw_enter(bp->dbk_lock, RW_WRITER); 590 goto retry; 591 } 592 593 /* create entry */ 594 entry = rfs4_dbe_create(table, id, arg); 595 if (entry == NULL) { 596 rw_exit(bp->dbk_lock); 597 if (id != -1) 598 id_free(table->dbt_id_space, id); 599 600 NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG, 601 (CE_NOTE, "Constructor for table %s failed", 602 table->dbt_name)); 603 return (NULL); 604 } 605 606 /* 607 * Add one ref for entry into table's hash - only one 608 * reference added even though there may be multiple indices 609 */ 610 rfs4_dbe_hold(entry); 611 ENQUEUE(bp->dbk_head, &entry->dbe_indices[idx->dbi_tblidx]); 612 VALIDATE_ADDR(entry->dbe_indices[idx->dbi_tblidx].entry); 613 614 already_done = idx->dbi_tblidx; 615 rw_exit(bp->dbk_lock); 616 617 for (ip = table->dbt_indices; ip; ip = ip->dbi_inext) { 618 if (ip->dbi_tblidx == already_done) 619 continue; 620 l = &entry->dbe_indices[ip->dbi_tblidx]; 621 i = HASH(ip, ip->dbi_mkkey(entry->dbe_data)); 622 ASSERT(i < ip->dbi_table->dbt_len); 623 bp = &ip->dbi_buckets[i]; 624 ENQUEUE_IDX(bp, l); 625 } 626 627 NFS4_DEBUG( 628 table->dbt_debug & SEARCH_DEBUG || table->dbt_debug & CREATE_DEBUG, 629 (CE_NOTE, "Entry %p created for %s = %p in table %s", 630 (void*)entry, idx->dbi_keyname, (void*)key, table->dbt_name)); 631 632 return (entry->dbe_data); 633 } 634 635 /*ARGSUSED*/ 636 boolean_t 637 rfs4_cpr_callb(void *arg, int code) 638 { 639 rfs4_table_t *table = rfs4_client_tab; 640 rfs4_bucket_t *buckets, *bp; 641 rfs4_link_t *l; 642 rfs4_client_t *cp; 643 int i; 644 645 /* 646 * We get called for Suspend and Resume events. 647 * For the suspend case we simply don't care! Nor do we care if 648 * there are no clients. 649 */ 650 if (code == CB_CODE_CPR_CHKPT || table == NULL) { 651 return (B_TRUE); 652 } 653 654 buckets = table->dbt_indices->dbi_buckets; 655 656 /* 657 * When we get this far we are in the process of 658 * resuming the system from a previous suspend. 659 * 660 * We are going to blast through and update the 661 * last_access time for all the clients and in 662 * doing so extend them by one lease period. 663 */ 664 for (i = 0; i < table->dbt_len; i++) { 665 bp = &buckets[i]; 666 for (l = bp->dbk_head; l; l = l->next) { 667 cp = (rfs4_client_t *)l->entry->dbe_data; 668 cp->rc_last_access = gethrestime_sec(); 669 } 670 } 671 672 return (B_TRUE); 673 } 674 675 /* 676 * Given a table, lock each of the buckets and walk all entries (in 677 * turn locking those) and calling the provided "callout" function 678 * with the provided parameter. Obviously used to iterate across all 679 * entries in a particular table via the database locking hierarchy. 680 * Obviously the caller must not hold locks on any of the entries in 681 * the specified table. 682 */ 683 void 684 rfs4_dbe_walk(rfs4_table_t *table, 685 void (*callout)(rfs4_entry_t, void *), 686 void *data) 687 { 688 rfs4_bucket_t *buckets = table->dbt_indices->dbi_buckets, *bp; 689 rfs4_link_t *l; 690 rfs4_dbe_t *entry; 691 int i; 692 693 NFS4_DEBUG(table->dbt_debug & WALK_DEBUG, 694 (CE_NOTE, "Walking entries in %s", table->dbt_name)); 695 696 /* Walk the buckets looking for entries to release/destroy */ 697 for (i = 0; i < table->dbt_len; i++) { 698 bp = &buckets[i]; 699 rw_enter(bp->dbk_lock, RW_READER); 700 for (l = bp->dbk_head; l; l = l->next) { 701 entry = l->entry; 702 mutex_enter(entry->dbe_lock); 703 (*callout)(entry->dbe_data, data); 704 mutex_exit(entry->dbe_lock); 705 } 706 rw_exit(bp->dbk_lock); 707 } 708 709 NFS4_DEBUG(table->dbt_debug & WALK_DEBUG, 710 (CE_NOTE, "Walking entries complete %s", table->dbt_name)); 711 } 712 713 714 static void 715 rfs4_dbe_reap(rfs4_table_t *table, time_t cache_time, uint32_t desired) 716 { 717 rfs4_index_t *idx = table->dbt_indices; 718 rfs4_bucket_t *buckets = idx->dbi_buckets, *bp; 719 rfs4_link_t *l, *t; 720 rfs4_dbe_t *entry; 721 bool_t found; 722 int i; 723 int count = 0; 724 725 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG, 726 (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s", 727 desired, cache_time, table->dbt_name)); 728 729 /* Walk the buckets looking for entries to release/destroy */ 730 for (i = 0; i < table->dbt_len; i++) { 731 bp = &buckets[i]; 732 do { 733 found = FALSE; 734 rw_enter(bp->dbk_lock, RW_READER); 735 for (l = bp->dbk_head; l; l = l->next) { 736 entry = l->entry; 737 /* 738 * Examine an entry. Ref count of 1 means 739 * that the only reference is for the hash 740 * table reference. 741 */ 742 if (entry->dbe_refcnt != 1) 743 continue; 744 mutex_enter(entry->dbe_lock); 745 if ((entry->dbe_refcnt == 1) && 746 (table->dbt_reaper_shutdown || 747 table->dbt_expiry == NULL || 748 (*table->dbt_expiry)(entry->dbe_data))) { 749 entry->dbe_refcnt--; 750 count++; 751 found = TRUE; 752 } 753 mutex_exit(entry->dbe_lock); 754 } 755 if (found) { 756 if (!rw_tryupgrade(bp->dbk_lock)) { 757 rw_exit(bp->dbk_lock); 758 rw_enter(bp->dbk_lock, RW_WRITER); 759 } 760 761 l = bp->dbk_head; 762 while (l) { 763 t = l; 764 entry = t->entry; 765 l = l->next; 766 if (entry->dbe_refcnt == 0) { 767 DEQUEUE(bp->dbk_head, t); 768 t->next = NULL; 769 t->prev = NULL; 770 INVALIDATE_ADDR(t->entry); 771 rfs4_dbe_destroy(entry); 772 } 773 } 774 } 775 rw_exit(bp->dbk_lock); 776 /* 777 * delay slightly if there is more work to do 778 * with the expectation that other reaper 779 * threads are freeing data structures as well 780 * and in turn will reduce ref counts on 781 * entries in this table allowing them to be 782 * released. This is only done in the 783 * instance that the tables are being shut down. 784 */ 785 if (table->dbt_reaper_shutdown && bp->dbk_head != NULL) 786 delay(hz/100); 787 /* 788 * If this is a table shutdown, keep going until 789 * everything is gone 790 */ 791 } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL); 792 793 if (!table->dbt_reaper_shutdown && desired && count >= desired) 794 break; 795 } 796 797 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG, 798 (CE_NOTE, "Reaped %d entries older than %ld seconds in table %s", 799 count, cache_time, table->dbt_name)); 800 } 801 802 803 static void 804 reaper_thread(caddr_t *arg) 805 { 806 rfs4_table_t *table = (rfs4_table_t *)arg; 807 clock_t rc, time, wakeup; 808 809 NFS4_DEBUG(table->dbt_debug, 810 (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name)); 811 812 CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock, 813 callb_generic_cpr, "nfsv4Reaper"); 814 815 time = MIN(rfs4_reap_interval, table->dbt_max_cache_time); 816 wakeup = SEC_TO_TICK(time); 817 818 mutex_enter(&table->dbt_reaper_cv_lock); 819 do { 820 CALLB_CPR_SAFE_BEGIN(&table->dbt_reaper_cpr_info); 821 rc = cv_reltimedwait_sig(&table->dbt_reaper_wait, 822 &table->dbt_reaper_cv_lock, wakeup, TR_CLOCK_TICK); 823 CALLB_CPR_SAFE_END(&table->dbt_reaper_cpr_info, 824 &table->dbt_reaper_cv_lock); 825 rfs4_dbe_reap(table, table->dbt_max_cache_time, 0); 826 } while (rc != 0 && table->dbt_reaper_shutdown == FALSE); 827 828 CALLB_CPR_EXIT(&table->dbt_reaper_cpr_info); 829 830 NFS4_DEBUG(table->dbt_debug, 831 (CE_NOTE, "rfs4_reaper_thread exiting for %s", table->dbt_name)); 832 833 /* Notify the database shutdown processing that the table is shutdown */ 834 mutex_enter(table->dbt_db->db_lock); 835 table->dbt_db->db_shutdown_count--; 836 cv_signal(&table->dbt_db->db_shutdown_wait); 837 mutex_exit(table->dbt_db->db_lock); 838 } 839 840 static void 841 rfs4_start_reaper(rfs4_table_t *table) 842 { 843 if (table->dbt_max_cache_time == 0) 844 return; 845 846 (void) thread_create(NULL, 0, reaper_thread, table, 0, &p0, TS_RUN, 847 minclsyspri); 848 } 849 850 #ifdef DEBUG 851 void 852 rfs4_dbe_debug(rfs4_dbe_t *entry) 853 { 854 cmn_err(CE_NOTE, "Entry %p from table %s", 855 (void *)entry, entry->dbe_table->dbt_name); 856 cmn_err(CE_CONT, "\trefcnt = %d id = %d", 857 entry->dbe_refcnt, entry->dbe_id); 858 } 859 #endif 860