1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2018 Nexenta Systems, Inc. 28 */ 29 30 #include <sys/systm.h> 31 #include <sys/cmn_err.h> 32 #include <sys/kmem.h> 33 #include <sys/disp.h> 34 #include <sys/id_space.h> 35 #include <sys/atomic.h> 36 #include <rpc/rpc.h> 37 #include <nfs/nfs4.h> 38 #include <nfs/nfs4_db_impl.h> 39 #include <sys/sdt.h> 40 41 static int rfs4_reap_interval = RFS4_REAP_INTERVAL; 42 43 static void rfs4_dbe_reap(rfs4_table_t *, time_t, uint32_t); 44 static void rfs4_dbe_destroy(rfs4_dbe_t *); 45 static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t); 46 static void rfs4_start_reaper(rfs4_table_t *); 47 48 /* 49 * t_lowat - integer percentage of table entries /etc/system only 50 * t_hiwat - integer percentage of table entries /etc/system only 51 * t_lreap - integer percentage of table reap time mdb or /etc/system 52 * t_hreap - integer percentage of table reap time mdb or /etc/system 53 */ 54 uint32_t t_lowat = 50; /* reap at t_lreap when id's in use hit 50% */ 55 uint32_t t_hiwat = 75; /* reap at t_hreap when id's in use hit 75% */ 56 time_t t_lreap = 50; /* default to 50% of table's reap interval */ 57 time_t t_hreap = 10; /* default to 10% of table's reap interval */ 58 59 id_t 60 rfs4_dbe_getid(rfs4_dbe_t *entry) 61 { 62 return (entry->dbe_id); 63 } 64 65 void 66 rfs4_dbe_hold(rfs4_dbe_t *entry) 67 { 68 atomic_inc_32(&entry->dbe_refcnt); 69 } 70 71 /* 72 * rfs4_dbe_rele_nolock only decrements the reference count of the entry. 73 */ 74 void 75 rfs4_dbe_rele_nolock(rfs4_dbe_t *entry) 76 { 77 atomic_dec_32(&entry->dbe_refcnt); 78 } 79 80 81 uint32_t 82 rfs4_dbe_refcnt(rfs4_dbe_t *entry) 83 { 84 return (entry->dbe_refcnt); 85 } 86 87 /* 88 * Mark an entry such that the dbsearch will skip it. 89 * Caller does not want this entry to be found any longer 90 */ 91 void 92 rfs4_dbe_invalidate(rfs4_dbe_t *entry) 93 { 94 entry->dbe_invalid = TRUE; 95 entry->dbe_skipsearch = TRUE; 96 } 97 98 /* 99 * Is this entry invalid? 100 */ 101 bool_t 102 rfs4_dbe_is_invalid(rfs4_dbe_t *entry) 103 { 104 return (entry->dbe_invalid); 105 } 106 107 time_t 108 rfs4_dbe_get_timerele(rfs4_dbe_t *entry) 109 { 110 return (entry->dbe_time_rele); 111 } 112 113 /* 114 * Use these to temporarily hide/unhide a db entry. 115 */ 116 void 117 rfs4_dbe_hide(rfs4_dbe_t *entry) 118 { 119 rfs4_dbe_lock(entry); 120 entry->dbe_skipsearch = TRUE; 121 rfs4_dbe_unlock(entry); 122 } 123 124 void 125 rfs4_dbe_unhide(rfs4_dbe_t *entry) 126 { 127 rfs4_dbe_lock(entry); 128 entry->dbe_skipsearch = FALSE; 129 rfs4_dbe_unlock(entry); 130 } 131 132 void 133 rfs4_dbe_rele(rfs4_dbe_t *entry) 134 { 135 mutex_enter(entry->dbe_lock); 136 ASSERT(entry->dbe_refcnt > 1); 137 atomic_dec_32(&entry->dbe_refcnt); 138 entry->dbe_time_rele = gethrestime_sec(); 139 mutex_exit(entry->dbe_lock); 140 } 141 142 void 143 rfs4_dbe_lock(rfs4_dbe_t *entry) 144 { 145 mutex_enter(entry->dbe_lock); 146 } 147 148 void 149 rfs4_dbe_unlock(rfs4_dbe_t *entry) 150 { 151 mutex_exit(entry->dbe_lock); 152 } 153 154 bool_t 155 rfs4_dbe_islocked(rfs4_dbe_t *entry) 156 { 157 return (mutex_owned(entry->dbe_lock)); 158 } 159 160 clock_t 161 rfs4_dbe_twait(rfs4_dbe_t *entry, clock_t timeout) 162 { 163 return (cv_timedwait(entry->dbe_cv, entry->dbe_lock, timeout)); 164 } 165 166 void 167 rfs4_dbe_cv_broadcast(rfs4_dbe_t *entry) 168 { 169 cv_broadcast(entry->dbe_cv); 170 } 171 172 /* ARGSUSED */ 173 static int 174 rfs4_dbe_kmem_constructor(void *obj, void *private, int kmflag) 175 { 176 rfs4_dbe_t *entry = obj; 177 178 mutex_init(entry->dbe_lock, NULL, MUTEX_DEFAULT, NULL); 179 cv_init(entry->dbe_cv, NULL, CV_DEFAULT, NULL); 180 181 return (0); 182 } 183 184 static void 185 rfs4_dbe_kmem_destructor(void *obj, void *private) 186 { 187 rfs4_dbe_t *entry = obj; 188 /*LINTED*/ 189 rfs4_table_t *table = private; 190 191 mutex_destroy(entry->dbe_lock); 192 cv_destroy(entry->dbe_cv); 193 } 194 195 rfs4_database_t * 196 rfs4_database_create(uint32_t flags) 197 { 198 rfs4_database_t *db; 199 200 db = kmem_alloc(sizeof (rfs4_database_t), KM_SLEEP); 201 mutex_init(db->db_lock, NULL, MUTEX_DEFAULT, NULL); 202 db->db_tables = NULL; 203 db->db_debug_flags = flags; 204 db->db_shutdown_count = 0; 205 cv_init(&db->db_shutdown_wait, NULL, CV_DEFAULT, NULL); 206 return (db); 207 } 208 209 210 /* 211 * The reaper threads that have been created for the tables in this 212 * database must be stopped and the entries in the tables released. 213 * Each table will be marked as "shutdown" and the reaper threads 214 * poked and they will see that a shutdown is in progress and cleanup 215 * and exit. This function waits for all reaper threads to stop 216 * before returning to the caller. 217 */ 218 void 219 rfs4_database_shutdown(rfs4_database_t *db) 220 { 221 rfs4_table_t *table; 222 223 mutex_enter(db->db_lock); 224 for (table = db->db_tables; table; table = table->dbt_tnext) { 225 mutex_enter(&table->dbt_reaper_cv_lock); 226 table->dbt_reaper_shutdown = TRUE; 227 cv_broadcast(&table->dbt_reaper_wait); 228 db->db_shutdown_count++; 229 mutex_exit(&table->dbt_reaper_cv_lock); 230 } 231 while (db->db_shutdown_count > 0) { 232 cv_wait(&db->db_shutdown_wait, db->db_lock); 233 } 234 mutex_exit(db->db_lock); 235 } 236 237 /* 238 * Given a database that has been "shutdown" by the function above all 239 * of the table tables are destroyed and then the database itself 240 * freed. 241 */ 242 void 243 rfs4_database_destroy(rfs4_database_t *db) 244 { 245 rfs4_table_t *next, *tmp; 246 247 for (next = db->db_tables; next; ) { 248 tmp = next; 249 next = tmp->dbt_tnext; 250 rfs4_table_destroy(db, tmp); 251 } 252 253 mutex_destroy(db->db_lock); 254 kmem_free(db, sizeof (rfs4_database_t)); 255 } 256 257 /* 258 * Used to get the correct kmem_cache database for the state table being 259 * created. 260 * Helper function for rfs4_table_create 261 */ 262 static kmem_cache_t * 263 get_db_mem_cache(char *name) 264 { 265 int i; 266 267 for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) { 268 if (strcmp(name, rfs4_db_mem_cache_table[i].r_db_name) == 0) 269 return (rfs4_db_mem_cache_table[i].r_db_mem_cache); 270 } 271 /* 272 * There is no associated kmem cache for this NFS4 server state 273 * table name 274 */ 275 return (NULL); 276 } 277 278 /* 279 * Used to initialize the global NFSv4 server state database. 280 * Helper funtion for rfs4_state_g_init and called when module is loaded. 281 */ 282 kmem_cache_t * 283 /* CSTYLED */ 284 nfs4_init_mem_cache(char *cache_name, uint32_t idxcnt, uint32_t size, uint32_t idx) 285 { 286 kmem_cache_t *mem_cache = kmem_cache_create(cache_name, 287 sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size, 288 0, 289 rfs4_dbe_kmem_constructor, 290 rfs4_dbe_kmem_destructor, 291 NULL, 292 NULL, 293 NULL, 294 0); 295 (void) strlcpy(rfs4_db_mem_cache_table[idx].r_db_name, cache_name, 296 strlen(cache_name) + 1); 297 rfs4_db_mem_cache_table[idx].r_db_mem_cache = mem_cache; 298 return (mem_cache); 299 } 300 301 rfs4_table_t * 302 rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time, 303 uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *), 304 void (*destroy)(rfs4_entry_t), 305 bool_t (*expiry)(rfs4_entry_t), 306 uint32_t size, uint32_t hashsize, 307 uint32_t maxentries, id_t start) 308 { 309 rfs4_table_t *table; 310 int len; 311 char *cache_name; 312 char *id_name; 313 314 table = kmem_alloc(sizeof (rfs4_table_t), KM_SLEEP); 315 table->dbt_db = db; 316 rw_init(table->dbt_t_lock, NULL, RW_DEFAULT, NULL); 317 mutex_init(table->dbt_lock, NULL, MUTEX_DEFAULT, NULL); 318 mutex_init(&table->dbt_reaper_cv_lock, NULL, MUTEX_DEFAULT, NULL); 319 cv_init(&table->dbt_reaper_wait, NULL, CV_DEFAULT, NULL); 320 321 len = strlen(tabname); 322 table->dbt_name = kmem_alloc(len+1, KM_SLEEP); 323 cache_name = kmem_alloc(len + 12 /* "_entry_cache" */ + 1, KM_SLEEP); 324 (void) strcpy(table->dbt_name, tabname); 325 (void) sprintf(cache_name, "%s_entry_cache", table->dbt_name); 326 table->dbt_max_cache_time = max_cache_time; 327 table->dbt_usize = size; 328 table->dbt_len = hashsize; 329 table->dbt_count = 0; 330 table->dbt_idxcnt = 0; 331 table->dbt_ccnt = 0; 332 table->dbt_maxcnt = idxcnt; 333 table->dbt_indices = NULL; 334 table->dbt_id_space = NULL; 335 table->dbt_reaper_shutdown = FALSE; 336 337 if (start >= 0) { 338 if (maxentries + (uint32_t)start > (uint32_t)INT32_MAX) 339 maxentries = INT32_MAX - start; 340 id_name = kmem_alloc(len + 9 /* "_id_space" */ + 1, KM_SLEEP); 341 (void) sprintf(id_name, "%s_id_space", table->dbt_name); 342 table->dbt_id_space = id_space_create(id_name, start, 343 maxentries + start); 344 kmem_free(id_name, len + 10); 345 } 346 ASSERT(t_lowat != 0); 347 table->dbt_id_lwat = (maxentries * t_lowat) / 100; 348 ASSERT(t_hiwat != 0); 349 table->dbt_id_hwat = (maxentries * t_hiwat) / 100; 350 table->dbt_id_reap = MIN(rfs4_reap_interval, max_cache_time); 351 table->dbt_maxentries = maxentries; 352 table->dbt_create = create; 353 table->dbt_destroy = destroy; 354 table->dbt_expiry = expiry; 355 356 /* 357 * get the correct kmem_cache for this table type based on the name. 358 */ 359 table->dbt_mem_cache = get_db_mem_cache(cache_name); 360 361 kmem_free(cache_name, len+13); 362 363 table->dbt_debug = db->db_debug_flags; 364 365 mutex_enter(db->db_lock); 366 table->dbt_tnext = db->db_tables; 367 db->db_tables = table; 368 mutex_exit(db->db_lock); 369 370 rfs4_start_reaper(table); 371 372 return (table); 373 } 374 375 void 376 rfs4_table_destroy(rfs4_database_t *db, rfs4_table_t *table) 377 { 378 rfs4_table_t *p; 379 rfs4_index_t *idx; 380 381 ASSERT(table->dbt_count == 0); 382 383 mutex_enter(db->db_lock); 384 if (table == db->db_tables) 385 db->db_tables = table->dbt_tnext; 386 else { 387 for (p = db->db_tables; p; p = p->dbt_tnext) 388 if (p->dbt_tnext == table) { 389 p->dbt_tnext = table->dbt_tnext; 390 table->dbt_tnext = NULL; 391 break; 392 } 393 ASSERT(p != NULL); 394 } 395 mutex_exit(db->db_lock); 396 397 /* Destroy indices */ 398 while (table->dbt_indices) { 399 idx = table->dbt_indices; 400 table->dbt_indices = idx->dbi_inext; 401 rfs4_index_destroy(idx); 402 } 403 404 rw_destroy(table->dbt_t_lock); 405 mutex_destroy(table->dbt_lock); 406 mutex_destroy(&table->dbt_reaper_cv_lock); 407 cv_destroy(&table->dbt_reaper_wait); 408 409 kmem_free(table->dbt_name, strlen(table->dbt_name) + 1); 410 if (table->dbt_id_space) 411 id_space_destroy(table->dbt_id_space); 412 table->dbt_mem_cache = NULL; 413 kmem_free(table, sizeof (rfs4_table_t)); 414 } 415 416 rfs4_index_t * 417 rfs4_index_create(rfs4_table_t *table, char *keyname, 418 uint32_t (*hash)(void *), 419 bool_t (compare)(rfs4_entry_t, void *), 420 void *(*mkkey)(rfs4_entry_t), 421 bool_t createable) 422 { 423 rfs4_index_t *idx; 424 425 ASSERT(table->dbt_idxcnt < table->dbt_maxcnt); 426 427 idx = kmem_alloc(sizeof (rfs4_index_t), KM_SLEEP); 428 429 idx->dbi_table = table; 430 idx->dbi_keyname = kmem_alloc(strlen(keyname) + 1, KM_SLEEP); 431 (void) strcpy(idx->dbi_keyname, keyname); 432 idx->dbi_hash = hash; 433 idx->dbi_compare = compare; 434 idx->dbi_mkkey = mkkey; 435 idx->dbi_tblidx = table->dbt_idxcnt; 436 table->dbt_idxcnt++; 437 if (createable) { 438 table->dbt_ccnt++; 439 if (table->dbt_ccnt > 1) 440 panic("Table %s currently can have only have one " 441 "index that will allow creation of entries", 442 table->dbt_name); 443 idx->dbi_createable = TRUE; 444 } else { 445 idx->dbi_createable = FALSE; 446 } 447 448 idx->dbi_inext = table->dbt_indices; 449 table->dbt_indices = idx; 450 idx->dbi_buckets = kmem_zalloc(sizeof (rfs4_bucket_t) * table->dbt_len, 451 KM_SLEEP); 452 453 return (idx); 454 } 455 456 void 457 rfs4_index_destroy(rfs4_index_t *idx) 458 { 459 kmem_free(idx->dbi_keyname, strlen(idx->dbi_keyname) + 1); 460 kmem_free(idx->dbi_buckets, 461 sizeof (rfs4_bucket_t) * idx->dbi_table->dbt_len); 462 kmem_free(idx, sizeof (rfs4_index_t)); 463 } 464 465 static void 466 rfs4_dbe_destroy(rfs4_dbe_t *entry) 467 { 468 rfs4_index_t *idx; 469 void *key; 470 int i; 471 rfs4_bucket_t *bp; 472 rfs4_table_t *table = entry->dbe_table; 473 rfs4_link_t *l; 474 475 NFS4_DEBUG(table->dbt_debug & DESTROY_DEBUG, 476 (CE_NOTE, "Destroying entry %p from %s", 477 (void*)entry, table->dbt_name)); 478 479 mutex_enter(entry->dbe_lock); 480 ASSERT(entry->dbe_refcnt == 0); 481 mutex_exit(entry->dbe_lock); 482 483 /* Unlink from all indices */ 484 for (idx = table->dbt_indices; idx; idx = idx->dbi_inext) { 485 l = &entry->dbe_indices[idx->dbi_tblidx]; 486 /* check and see if we were ever linked in to the index */ 487 if (INVALID_LINK(l)) { 488 ASSERT(l->next == NULL && l->prev == NULL); 489 continue; 490 } 491 key = idx->dbi_mkkey(entry->dbe_data); 492 i = HASH(idx, key); 493 bp = &idx->dbi_buckets[i]; 494 ASSERT(bp->dbk_head != NULL); 495 DEQUEUE_IDX(bp, &entry->dbe_indices[idx->dbi_tblidx]); 496 } 497 498 /* Destroy user data */ 499 if (table->dbt_destroy) 500 (*table->dbt_destroy)(entry->dbe_data); 501 502 if (table->dbt_id_space) 503 id_free(table->dbt_id_space, entry->dbe_id); 504 505 mutex_enter(table->dbt_lock); 506 table->dbt_count--; 507 mutex_exit(table->dbt_lock); 508 509 /* Destroy the entry itself */ 510 kmem_cache_free(table->dbt_mem_cache, entry); 511 } 512 513 514 static rfs4_dbe_t * 515 rfs4_dbe_create(rfs4_table_t *table, id_t id, rfs4_entry_t data) 516 { 517 rfs4_dbe_t *entry; 518 int i; 519 520 NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG, 521 (CE_NOTE, "Creating entry in table %s", table->dbt_name)); 522 523 entry = kmem_cache_alloc(table->dbt_mem_cache, KM_SLEEP); 524 525 entry->dbe_refcnt = 1; 526 entry->dbe_invalid = FALSE; 527 entry->dbe_skipsearch = FALSE; 528 entry->dbe_time_rele = 0; 529 entry->dbe_id = 0; 530 531 if (table->dbt_id_space) 532 entry->dbe_id = id; 533 entry->dbe_table = table; 534 535 for (i = 0; i < table->dbt_maxcnt; i++) { 536 entry->dbe_indices[i].next = entry->dbe_indices[i].prev = NULL; 537 entry->dbe_indices[i].entry = entry; 538 /* 539 * We mark the entry as not indexed by setting the low 540 * order bit, since address are word aligned. This has 541 * the advantage of causeing a trap if the address is 542 * used. After the entry is linked in to the 543 * corresponding index the bit will be cleared. 544 */ 545 INVALIDATE_ADDR(entry->dbe_indices[i].entry); 546 } 547 548 entry->dbe_data = (rfs4_entry_t)&entry->dbe_indices[table->dbt_maxcnt]; 549 bzero(entry->dbe_data, table->dbt_usize); 550 entry->dbe_data->dbe = entry; 551 552 if (!(*table->dbt_create)(entry->dbe_data, data)) { 553 kmem_cache_free(table->dbt_mem_cache, entry); 554 return (NULL); 555 } 556 557 mutex_enter(table->dbt_lock); 558 table->dbt_count++; 559 mutex_exit(table->dbt_lock); 560 561 return (entry); 562 } 563 564 static void 565 rfs4_dbe_tabreap_adjust(rfs4_table_t *table) 566 { 567 clock_t tabreap; 568 clock_t reap_int; 569 uint32_t in_use; 570 571 /* 572 * Adjust the table's reap interval based on the 573 * number of id's currently in use. Each table's 574 * default remains the same if id usage subsides. 575 */ 576 ASSERT(MUTEX_HELD(&table->dbt_reaper_cv_lock)); 577 tabreap = MIN(rfs4_reap_interval, table->dbt_max_cache_time); 578 579 in_use = table->dbt_count + 1; /* see rfs4_dbe_create */ 580 if (in_use >= table->dbt_id_hwat) { 581 ASSERT(t_hreap != 0); 582 reap_int = (tabreap * t_hreap) / 100; 583 } else if (in_use >= table->dbt_id_lwat) { 584 ASSERT(t_lreap != 0); 585 reap_int = (tabreap * t_lreap) / 100; 586 } else { 587 reap_int = tabreap; 588 } 589 table->dbt_id_reap = reap_int; 590 DTRACE_PROBE2(table__reap__interval, char *, 591 table->dbt_name, time_t, table->dbt_id_reap); 592 } 593 594 rfs4_entry_t 595 rfs4_dbsearch(rfs4_index_t *idx, void *key, bool_t *create, void *arg, 596 rfs4_dbsearch_type_t dbsearch_type) 597 { 598 int already_done; 599 uint32_t i; 600 rfs4_table_t *table = idx->dbi_table; 601 rfs4_index_t *ip; 602 rfs4_bucket_t *bp; 603 rfs4_link_t *l; 604 rfs4_dbe_t *entry; 605 id_t id = -1; 606 607 i = HASH(idx, key); 608 bp = &idx->dbi_buckets[i]; 609 610 NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG, 611 (CE_NOTE, "Searching for key %p in table %s by %s", 612 key, table->dbt_name, idx->dbi_keyname)); 613 614 rw_enter(bp->dbk_lock, RW_READER); 615 retry: 616 for (l = bp->dbk_head; l; l = l->next) { 617 if (l->entry->dbe_refcnt > 0 && 618 (l->entry->dbe_skipsearch == FALSE || 619 (l->entry->dbe_skipsearch == TRUE && 620 dbsearch_type == RFS4_DBS_INVALID)) && 621 (*idx->dbi_compare)(l->entry->dbe_data, key)) { 622 mutex_enter(l->entry->dbe_lock); 623 if (l->entry->dbe_refcnt == 0) { 624 mutex_exit(l->entry->dbe_lock); 625 continue; 626 } 627 628 /* place an additional hold since we are returning */ 629 rfs4_dbe_hold(l->entry); 630 631 mutex_exit(l->entry->dbe_lock); 632 rw_exit(bp->dbk_lock); 633 634 *create = FALSE; 635 636 NFS4_DEBUG((table->dbt_debug & SEARCH_DEBUG), 637 (CE_NOTE, "Found entry %p for %p in table %s", 638 (void *)l->entry, key, table->dbt_name)); 639 640 if (id != -1) 641 id_free(table->dbt_id_space, id); 642 return (l->entry->dbe_data); 643 } 644 } 645 646 if (!*create || table->dbt_create == NULL || !idx->dbi_createable || 647 table->dbt_maxentries == table->dbt_count) { 648 NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG, 649 (CE_NOTE, "Entry for %p in %s not found", 650 key, table->dbt_name)); 651 652 rw_exit(bp->dbk_lock); 653 if (id != -1) 654 id_free(table->dbt_id_space, id); 655 return (NULL); 656 } 657 658 if (table->dbt_id_space && id == -1) { 659 rw_exit(bp->dbk_lock); 660 661 /* get an id, ok to sleep for it here */ 662 id = id_alloc(table->dbt_id_space); 663 ASSERT(id != -1); 664 665 mutex_enter(&table->dbt_reaper_cv_lock); 666 rfs4_dbe_tabreap_adjust(table); 667 mutex_exit(&table->dbt_reaper_cv_lock); 668 669 rw_enter(bp->dbk_lock, RW_WRITER); 670 goto retry; 671 } 672 673 /* get an exclusive lock on the bucket */ 674 if (rw_read_locked(bp->dbk_lock) && !rw_tryupgrade(bp->dbk_lock)) { 675 NFS4_DEBUG(table->dbt_debug & OTHER_DEBUG, 676 (CE_NOTE, "Trying to upgrade lock on " 677 "hash chain %d (%p) for %s by %s", 678 i, (void*)bp, table->dbt_name, idx->dbi_keyname)); 679 680 rw_exit(bp->dbk_lock); 681 rw_enter(bp->dbk_lock, RW_WRITER); 682 goto retry; 683 } 684 685 /* create entry */ 686 entry = rfs4_dbe_create(table, id, arg); 687 if (entry == NULL) { 688 rw_exit(bp->dbk_lock); 689 if (id != -1) 690 id_free(table->dbt_id_space, id); 691 692 NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG, 693 (CE_NOTE, "Constructor for table %s failed", 694 table->dbt_name)); 695 return (NULL); 696 } 697 698 /* 699 * Add one ref for entry into table's hash - only one 700 * reference added even though there may be multiple indices 701 */ 702 rfs4_dbe_hold(entry); 703 ENQUEUE(bp->dbk_head, &entry->dbe_indices[idx->dbi_tblidx]); 704 VALIDATE_ADDR(entry->dbe_indices[idx->dbi_tblidx].entry); 705 706 already_done = idx->dbi_tblidx; 707 rw_exit(bp->dbk_lock); 708 709 for (ip = table->dbt_indices; ip; ip = ip->dbi_inext) { 710 if (ip->dbi_tblidx == already_done) 711 continue; 712 l = &entry->dbe_indices[ip->dbi_tblidx]; 713 i = HASH(ip, ip->dbi_mkkey(entry->dbe_data)); 714 ASSERT(i < ip->dbi_table->dbt_len); 715 bp = &ip->dbi_buckets[i]; 716 ENQUEUE_IDX(bp, l); 717 } 718 719 NFS4_DEBUG( 720 table->dbt_debug & SEARCH_DEBUG || table->dbt_debug & CREATE_DEBUG, 721 (CE_NOTE, "Entry %p created for %s = %p in table %s", 722 (void*)entry, idx->dbi_keyname, (void*)key, table->dbt_name)); 723 724 return (entry->dbe_data); 725 } 726 727 /*ARGSUSED*/ 728 boolean_t 729 rfs4_cpr_callb(void *arg, int code) 730 { 731 rfs4_bucket_t *buckets, *bp; 732 rfs4_link_t *l; 733 rfs4_client_t *cp; 734 int i; 735 736 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 737 rfs4_table_t *table = nsrv4->rfs4_client_tab; 738 739 /* 740 * We get called for Suspend and Resume events. 741 * For the suspend case we simply don't care! Nor do we care if 742 * there are no clients. 743 */ 744 if (code == CB_CODE_CPR_CHKPT || table == NULL) { 745 return (B_TRUE); 746 } 747 748 buckets = table->dbt_indices->dbi_buckets; 749 750 /* 751 * When we get this far we are in the process of 752 * resuming the system from a previous suspend. 753 * 754 * We are going to blast through and update the 755 * last_access time for all the clients and in 756 * doing so extend them by one lease period. 757 */ 758 for (i = 0; i < table->dbt_len; i++) { 759 bp = &buckets[i]; 760 for (l = bp->dbk_head; l; l = l->next) { 761 cp = (rfs4_client_t *)l->entry->dbe_data; 762 cp->rc_last_access = gethrestime_sec(); 763 } 764 } 765 766 return (B_TRUE); 767 } 768 769 /* 770 * Given a table, lock each of the buckets and walk all entries (in 771 * turn locking those) and calling the provided "callout" function 772 * with the provided parameter. Obviously used to iterate across all 773 * entries in a particular table via the database locking hierarchy. 774 * Obviously the caller must not hold locks on any of the entries in 775 * the specified table. 776 */ 777 void 778 rfs4_dbe_walk(rfs4_table_t *table, 779 void (*callout)(rfs4_entry_t, void *), 780 void *data) 781 { 782 rfs4_bucket_t *buckets = table->dbt_indices->dbi_buckets, *bp; 783 rfs4_link_t *l; 784 rfs4_dbe_t *entry; 785 int i; 786 787 NFS4_DEBUG(table->dbt_debug & WALK_DEBUG, 788 (CE_NOTE, "Walking entries in %s", table->dbt_name)); 789 790 /* Walk the buckets looking for entries to release/destroy */ 791 for (i = 0; i < table->dbt_len; i++) { 792 bp = &buckets[i]; 793 rw_enter(bp->dbk_lock, RW_READER); 794 for (l = bp->dbk_head; l; l = l->next) { 795 entry = l->entry; 796 mutex_enter(entry->dbe_lock); 797 (*callout)(entry->dbe_data, data); 798 mutex_exit(entry->dbe_lock); 799 } 800 rw_exit(bp->dbk_lock); 801 } 802 803 NFS4_DEBUG(table->dbt_debug & WALK_DEBUG, 804 (CE_NOTE, "Walking entries complete %s", table->dbt_name)); 805 } 806 807 808 static void 809 rfs4_dbe_reap(rfs4_table_t *table, time_t cache_time, uint32_t desired) 810 { 811 rfs4_index_t *idx = table->dbt_indices; 812 rfs4_bucket_t *buckets = idx->dbi_buckets, *bp; 813 rfs4_link_t *l, *t; 814 rfs4_dbe_t *entry; 815 bool_t found; 816 int i; 817 int count = 0; 818 819 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG, 820 (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s", 821 desired, cache_time, table->dbt_name)); 822 823 /* Walk the buckets looking for entries to release/destroy */ 824 for (i = 0; i < table->dbt_len; i++) { 825 bp = &buckets[i]; 826 do { 827 found = FALSE; 828 rw_enter(bp->dbk_lock, RW_READER); 829 for (l = bp->dbk_head; l; l = l->next) { 830 entry = l->entry; 831 /* 832 * Examine an entry. Ref count of 1 means 833 * that the only reference is for the hash 834 * table reference. 835 */ 836 if (entry->dbe_refcnt != 1) 837 continue; 838 mutex_enter(entry->dbe_lock); 839 if ((entry->dbe_refcnt == 1) && 840 (table->dbt_reaper_shutdown || 841 table->dbt_expiry == NULL || 842 (*table->dbt_expiry)(entry->dbe_data))) { 843 entry->dbe_refcnt--; 844 count++; 845 found = TRUE; 846 } 847 mutex_exit(entry->dbe_lock); 848 } 849 if (found) { 850 if (!rw_tryupgrade(bp->dbk_lock)) { 851 rw_exit(bp->dbk_lock); 852 rw_enter(bp->dbk_lock, RW_WRITER); 853 } 854 855 l = bp->dbk_head; 856 while (l) { 857 t = l; 858 entry = t->entry; 859 l = l->next; 860 if (entry->dbe_refcnt == 0) { 861 DEQUEUE(bp->dbk_head, t); 862 t->next = NULL; 863 t->prev = NULL; 864 INVALIDATE_ADDR(t->entry); 865 rfs4_dbe_destroy(entry); 866 } 867 } 868 } 869 rw_exit(bp->dbk_lock); 870 /* 871 * delay slightly if there is more work to do 872 * with the expectation that other reaper 873 * threads are freeing data structures as well 874 * and in turn will reduce ref counts on 875 * entries in this table allowing them to be 876 * released. This is only done in the 877 * instance that the tables are being shut down. 878 */ 879 if (table->dbt_reaper_shutdown && bp->dbk_head != NULL) 880 delay(hz/100); 881 /* 882 * If this is a table shutdown, keep going until 883 * everything is gone 884 */ 885 } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL); 886 887 if (!table->dbt_reaper_shutdown && desired && count >= desired) 888 break; 889 } 890 891 NFS4_DEBUG(table->dbt_debug & REAP_DEBUG, 892 (CE_NOTE, "Reaped %d entries older than %ld seconds in table %s", 893 count, cache_time, table->dbt_name)); 894 } 895 896 static void 897 reaper_thread(caddr_t *arg) 898 { 899 rfs4_table_t *table = (rfs4_table_t *)arg; 900 clock_t rc; 901 902 NFS4_DEBUG(table->dbt_debug, 903 (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name)); 904 905 CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock, 906 callb_generic_cpr, "nfsv4Reaper"); 907 908 mutex_enter(&table->dbt_reaper_cv_lock); 909 do { 910 CALLB_CPR_SAFE_BEGIN(&table->dbt_reaper_cpr_info); 911 rc = cv_reltimedwait_sig(&table->dbt_reaper_wait, 912 &table->dbt_reaper_cv_lock, 913 SEC_TO_TICK(table->dbt_id_reap), TR_CLOCK_TICK); 914 CALLB_CPR_SAFE_END(&table->dbt_reaper_cpr_info, 915 &table->dbt_reaper_cv_lock); 916 rfs4_dbe_reap(table, table->dbt_max_cache_time, 0); 917 } while (rc != 0 && table->dbt_reaper_shutdown == FALSE); 918 919 CALLB_CPR_EXIT(&table->dbt_reaper_cpr_info); 920 921 NFS4_DEBUG(table->dbt_debug, 922 (CE_NOTE, "rfs4_reaper_thread exiting for %s", table->dbt_name)); 923 924 /* Notify the database shutdown processing that the table is shutdown */ 925 mutex_enter(table->dbt_db->db_lock); 926 table->dbt_db->db_shutdown_count--; 927 cv_signal(&table->dbt_db->db_shutdown_wait); 928 mutex_exit(table->dbt_db->db_lock); 929 zthread_exit(); 930 } 931 932 static void 933 rfs4_start_reaper(rfs4_table_t *table) 934 { 935 if (table->dbt_max_cache_time == 0) 936 return; 937 938 (void) zthread_create(NULL, 0, reaper_thread, table, 0, 939 minclsyspri); 940 } 941 942 #ifdef DEBUG 943 void 944 rfs4_dbe_debug(rfs4_dbe_t *entry) 945 { 946 cmn_err(CE_NOTE, "Entry %p from table %s", 947 (void *)entry, entry->dbe_table->dbt_name); 948 cmn_err(CE_CONT, "\trefcnt = %d id = %d", 949 entry->dbe_refcnt, entry->dbe_id); 950 } 951 #endif 952