xref: /titanic_51/usr/src/uts/common/fs/nfs/nfs4_db.c (revision 1a5e258f5471356ca102c7176637cdce45bac147)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5d216dff5SRobert Mastors  * Common Development and Distribution License (the "License").
6d216dff5SRobert Mastors  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22422d9515SGerald Thornbrugh  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
237c478bd9Sstevel@tonic-gate  */
247c478bd9Sstevel@tonic-gate 
257c478bd9Sstevel@tonic-gate #include <sys/systm.h>
267c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
277c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
287c478bd9Sstevel@tonic-gate #include <sys/disp.h>
297c478bd9Sstevel@tonic-gate #include <sys/id_space.h>
307c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
317c478bd9Sstevel@tonic-gate #include <rpc/rpc.h>
327c478bd9Sstevel@tonic-gate #include <nfs/nfs4.h>
337c478bd9Sstevel@tonic-gate #include <nfs/nfs4_db_impl.h>
34f6cf9e50SRick Mesta #include <sys/sdt.h>
357c478bd9Sstevel@tonic-gate 
367c478bd9Sstevel@tonic-gate static int rfs4_reap_interval = RFS4_REAP_INTERVAL;
377c478bd9Sstevel@tonic-gate 
387c478bd9Sstevel@tonic-gate static void rfs4_dbe_reap(rfs4_table_t *, time_t, uint32_t);
397c478bd9Sstevel@tonic-gate static void rfs4_dbe_destroy(rfs4_dbe_t *);
40d216dff5SRobert Mastors static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t);
417c478bd9Sstevel@tonic-gate static void rfs4_start_reaper(rfs4_table_t *);
427c478bd9Sstevel@tonic-gate 
43f6cf9e50SRick Mesta /*
44f6cf9e50SRick Mesta  * t_lowat - integer percentage of table entries	/etc/system only
45f6cf9e50SRick Mesta  * t_hiwat - integer percentage of table entries	/etc/system only
46f6cf9e50SRick Mesta  * t_lreap - integer percentage of table reap time	mdb or /etc/system
47f6cf9e50SRick Mesta  * t_hreap - integer percentage of table reap time	mdb or /etc/system
48f6cf9e50SRick Mesta  */
49f6cf9e50SRick Mesta uint32_t	t_lowat = 50;	/* reap at t_lreap when id's in use hit 50% */
50f6cf9e50SRick Mesta uint32_t	t_hiwat = 75;	/* reap at t_hreap when id's in use hit 75% */
51f6cf9e50SRick Mesta time_t		t_lreap = 50;	/* default to 50% of table's reap interval */
52f6cf9e50SRick Mesta time_t		t_hreap = 10;	/* default to 10% of table's reap interval */
53f6cf9e50SRick Mesta 
547c478bd9Sstevel@tonic-gate id_t
55d216dff5SRobert Mastors rfs4_dbe_getid(rfs4_dbe_t *entry)
567c478bd9Sstevel@tonic-gate {
57d216dff5SRobert Mastors 	return (entry->dbe_id);
587c478bd9Sstevel@tonic-gate }
597c478bd9Sstevel@tonic-gate 
607c478bd9Sstevel@tonic-gate void
61d216dff5SRobert Mastors rfs4_dbe_hold(rfs4_dbe_t *entry)
627c478bd9Sstevel@tonic-gate {
63*1a5e258fSJosef 'Jeff' Sipek 	atomic_inc_32(&entry->dbe_refcnt);
647c478bd9Sstevel@tonic-gate }
657c478bd9Sstevel@tonic-gate 
667c478bd9Sstevel@tonic-gate /*
677c478bd9Sstevel@tonic-gate  * rfs4_dbe_rele_nolock only decrements the reference count of the entry.
687c478bd9Sstevel@tonic-gate  */
697c478bd9Sstevel@tonic-gate void
70d216dff5SRobert Mastors rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
717c478bd9Sstevel@tonic-gate {
72*1a5e258fSJosef 'Jeff' Sipek 	atomic_dec_32(&entry->dbe_refcnt);
737c478bd9Sstevel@tonic-gate }
747c478bd9Sstevel@tonic-gate 
757c478bd9Sstevel@tonic-gate 
767c478bd9Sstevel@tonic-gate uint32_t
77d216dff5SRobert Mastors rfs4_dbe_refcnt(rfs4_dbe_t *entry)
787c478bd9Sstevel@tonic-gate {
79d216dff5SRobert Mastors 	return (entry->dbe_refcnt);
807c478bd9Sstevel@tonic-gate }
817c478bd9Sstevel@tonic-gate 
827c478bd9Sstevel@tonic-gate /*
837c478bd9Sstevel@tonic-gate  * Mark an entry such that the dbsearch will skip it.
847c478bd9Sstevel@tonic-gate  * Caller does not want this entry to be found any longer
857c478bd9Sstevel@tonic-gate  */
867c478bd9Sstevel@tonic-gate void
87d216dff5SRobert Mastors rfs4_dbe_invalidate(rfs4_dbe_t *entry)
887c478bd9Sstevel@tonic-gate {
89d216dff5SRobert Mastors 	entry->dbe_invalid = TRUE;
90d216dff5SRobert Mastors 	entry->dbe_skipsearch = TRUE;
917c478bd9Sstevel@tonic-gate }
927c478bd9Sstevel@tonic-gate 
937c478bd9Sstevel@tonic-gate /*
947c478bd9Sstevel@tonic-gate  * Is this entry invalid?
957c478bd9Sstevel@tonic-gate  */
967c478bd9Sstevel@tonic-gate bool_t
97d216dff5SRobert Mastors rfs4_dbe_is_invalid(rfs4_dbe_t *entry)
987c478bd9Sstevel@tonic-gate {
99d216dff5SRobert Mastors 	return (entry->dbe_invalid);
1007c478bd9Sstevel@tonic-gate }
1017c478bd9Sstevel@tonic-gate 
1027c478bd9Sstevel@tonic-gate time_t
103d216dff5SRobert Mastors rfs4_dbe_get_timerele(rfs4_dbe_t *entry)
1047c478bd9Sstevel@tonic-gate {
105d216dff5SRobert Mastors 	return (entry->dbe_time_rele);
1067c478bd9Sstevel@tonic-gate }
1077c478bd9Sstevel@tonic-gate 
1087c478bd9Sstevel@tonic-gate /*
1097c478bd9Sstevel@tonic-gate  * Use these to temporarily hide/unhide a db entry.
1107c478bd9Sstevel@tonic-gate  */
1117c478bd9Sstevel@tonic-gate void
112d216dff5SRobert Mastors rfs4_dbe_hide(rfs4_dbe_t *entry)
1137c478bd9Sstevel@tonic-gate {
114d216dff5SRobert Mastors 	rfs4_dbe_lock(entry);
115d216dff5SRobert Mastors 	entry->dbe_skipsearch = TRUE;
116d216dff5SRobert Mastors 	rfs4_dbe_unlock(entry);
1177c478bd9Sstevel@tonic-gate }
1187c478bd9Sstevel@tonic-gate 
1197c478bd9Sstevel@tonic-gate void
120d216dff5SRobert Mastors rfs4_dbe_unhide(rfs4_dbe_t *entry)
1217c478bd9Sstevel@tonic-gate {
122d216dff5SRobert Mastors 	rfs4_dbe_lock(entry);
123d216dff5SRobert Mastors 	entry->dbe_skipsearch = FALSE;
124d216dff5SRobert Mastors 	rfs4_dbe_unlock(entry);
1257c478bd9Sstevel@tonic-gate }
1267c478bd9Sstevel@tonic-gate 
1277c478bd9Sstevel@tonic-gate void
128d216dff5SRobert Mastors rfs4_dbe_rele(rfs4_dbe_t *entry)
1297c478bd9Sstevel@tonic-gate {
130d216dff5SRobert Mastors 	mutex_enter(entry->dbe_lock);
131d216dff5SRobert Mastors 	ASSERT(entry->dbe_refcnt > 1);
132*1a5e258fSJosef 'Jeff' Sipek 	atomic_dec_32(&entry->dbe_refcnt);
133d216dff5SRobert Mastors 	entry->dbe_time_rele = gethrestime_sec();
134d216dff5SRobert Mastors 	mutex_exit(entry->dbe_lock);
1357c478bd9Sstevel@tonic-gate }
1367c478bd9Sstevel@tonic-gate 
1377c478bd9Sstevel@tonic-gate void
138d216dff5SRobert Mastors rfs4_dbe_lock(rfs4_dbe_t *entry)
1397c478bd9Sstevel@tonic-gate {
140d216dff5SRobert Mastors 	mutex_enter(entry->dbe_lock);
1417c478bd9Sstevel@tonic-gate }
1427c478bd9Sstevel@tonic-gate 
1437c478bd9Sstevel@tonic-gate void
144d216dff5SRobert Mastors rfs4_dbe_unlock(rfs4_dbe_t *entry)
1457c478bd9Sstevel@tonic-gate {
146d216dff5SRobert Mastors 	mutex_exit(entry->dbe_lock);
1477c478bd9Sstevel@tonic-gate }
1487c478bd9Sstevel@tonic-gate 
1497c478bd9Sstevel@tonic-gate bool_t
150d216dff5SRobert Mastors rfs4_dbe_islocked(rfs4_dbe_t *entry)
1517c478bd9Sstevel@tonic-gate {
152d216dff5SRobert Mastors 	return (mutex_owned(entry->dbe_lock));
1537c478bd9Sstevel@tonic-gate }
1547c478bd9Sstevel@tonic-gate 
1557c478bd9Sstevel@tonic-gate clock_t
156d216dff5SRobert Mastors rfs4_dbe_twait(rfs4_dbe_t *entry, clock_t timeout)
1577c478bd9Sstevel@tonic-gate {
158d216dff5SRobert Mastors 	return (cv_timedwait(entry->dbe_cv, entry->dbe_lock, timeout));
1597c478bd9Sstevel@tonic-gate }
1607c478bd9Sstevel@tonic-gate 
1617c478bd9Sstevel@tonic-gate void
162d216dff5SRobert Mastors rfs4_dbe_cv_broadcast(rfs4_dbe_t *entry)
1637c478bd9Sstevel@tonic-gate {
164d216dff5SRobert Mastors 	cv_broadcast(entry->dbe_cv);
1657c478bd9Sstevel@tonic-gate }
1667c478bd9Sstevel@tonic-gate 
1677c478bd9Sstevel@tonic-gate /* ARGSUSED */
1687c478bd9Sstevel@tonic-gate static int
1697c478bd9Sstevel@tonic-gate rfs4_dbe_kmem_constructor(void *obj, void *private, int kmflag)
1707c478bd9Sstevel@tonic-gate {
1717c478bd9Sstevel@tonic-gate 	rfs4_dbe_t *entry = obj;
1727c478bd9Sstevel@tonic-gate 
173d216dff5SRobert Mastors 	mutex_init(entry->dbe_lock, NULL, MUTEX_DEFAULT, NULL);
174d216dff5SRobert Mastors 	cv_init(entry->dbe_cv, NULL, CV_DEFAULT, NULL);
1757c478bd9Sstevel@tonic-gate 
1767c478bd9Sstevel@tonic-gate 	return (0);
1777c478bd9Sstevel@tonic-gate }
1787c478bd9Sstevel@tonic-gate 
1797c478bd9Sstevel@tonic-gate static void
1807c478bd9Sstevel@tonic-gate rfs4_dbe_kmem_destructor(void *obj, void *private)
1817c478bd9Sstevel@tonic-gate {
1827c478bd9Sstevel@tonic-gate 	rfs4_dbe_t *entry = obj;
1837c478bd9Sstevel@tonic-gate 	/*LINTED*/
1847c478bd9Sstevel@tonic-gate 	rfs4_table_t *table = private;
1857c478bd9Sstevel@tonic-gate 
186d216dff5SRobert Mastors 	mutex_destroy(entry->dbe_lock);
187d216dff5SRobert Mastors 	cv_destroy(entry->dbe_cv);
1887c478bd9Sstevel@tonic-gate }
1897c478bd9Sstevel@tonic-gate 
1907c478bd9Sstevel@tonic-gate rfs4_database_t *
1917c478bd9Sstevel@tonic-gate rfs4_database_create(uint32_t flags)
1927c478bd9Sstevel@tonic-gate {
1937c478bd9Sstevel@tonic-gate 	rfs4_database_t *db;
1947c478bd9Sstevel@tonic-gate 
1957c478bd9Sstevel@tonic-gate 	db = kmem_alloc(sizeof (rfs4_database_t), KM_SLEEP);
196d216dff5SRobert Mastors 	mutex_init(db->db_lock, NULL, MUTEX_DEFAULT, NULL);
197d216dff5SRobert Mastors 	db->db_tables = NULL;
198d216dff5SRobert Mastors 	db->db_debug_flags = flags;
199d216dff5SRobert Mastors 	db->db_shutdown_count = 0;
200d216dff5SRobert Mastors 	cv_init(&db->db_shutdown_wait, NULL, CV_DEFAULT, NULL);
2017c478bd9Sstevel@tonic-gate 	return (db);
2027c478bd9Sstevel@tonic-gate }
2037c478bd9Sstevel@tonic-gate 
2047c478bd9Sstevel@tonic-gate 
2057c478bd9Sstevel@tonic-gate /*
2067c478bd9Sstevel@tonic-gate  * The reaper threads that have been created for the tables in this
2077c478bd9Sstevel@tonic-gate  * database must be stopped and the entries in the tables released.
2087c478bd9Sstevel@tonic-gate  * Each table will be marked as "shutdown" and the reaper threads
2097c478bd9Sstevel@tonic-gate  * poked and they will see that a shutdown is in progress and cleanup
2107c478bd9Sstevel@tonic-gate  * and exit.  This function waits for all reaper threads to stop
2117c478bd9Sstevel@tonic-gate  * before returning to the caller.
2127c478bd9Sstevel@tonic-gate  */
2137c478bd9Sstevel@tonic-gate void
2147c478bd9Sstevel@tonic-gate rfs4_database_shutdown(rfs4_database_t *db)
2157c478bd9Sstevel@tonic-gate {
2167c478bd9Sstevel@tonic-gate 	rfs4_table_t *table;
2177c478bd9Sstevel@tonic-gate 
218d216dff5SRobert Mastors 	mutex_enter(db->db_lock);
219d216dff5SRobert Mastors 	for (table = db->db_tables; table; table = table->dbt_tnext) {
220d216dff5SRobert Mastors 		mutex_enter(&table->dbt_reaper_cv_lock);
221422d9515SGerald Thornbrugh 		table->dbt_reaper_shutdown = TRUE;
222d216dff5SRobert Mastors 		cv_broadcast(&table->dbt_reaper_wait);
223d216dff5SRobert Mastors 		db->db_shutdown_count++;
224d216dff5SRobert Mastors 		mutex_exit(&table->dbt_reaper_cv_lock);
2257c478bd9Sstevel@tonic-gate 	}
226d216dff5SRobert Mastors 	while (db->db_shutdown_count > 0) {
227d216dff5SRobert Mastors 		cv_wait(&db->db_shutdown_wait, db->db_lock);
2287c478bd9Sstevel@tonic-gate 	}
229d216dff5SRobert Mastors 	mutex_exit(db->db_lock);
2307c478bd9Sstevel@tonic-gate }
2317c478bd9Sstevel@tonic-gate 
2327c478bd9Sstevel@tonic-gate /*
2337c478bd9Sstevel@tonic-gate  * Given a database that has been "shutdown" by the function above all
2347c478bd9Sstevel@tonic-gate  * of the table tables are destroyed and then the database itself
2357c478bd9Sstevel@tonic-gate  * freed.
2367c478bd9Sstevel@tonic-gate  */
2377c478bd9Sstevel@tonic-gate void
2387c478bd9Sstevel@tonic-gate rfs4_database_destroy(rfs4_database_t *db)
2397c478bd9Sstevel@tonic-gate {
2407c478bd9Sstevel@tonic-gate 	rfs4_table_t *next, *tmp;
2417c478bd9Sstevel@tonic-gate 
242d216dff5SRobert Mastors 	for (next = db->db_tables; next; ) {
2437c478bd9Sstevel@tonic-gate 		tmp = next;
244d216dff5SRobert Mastors 		next = tmp->dbt_tnext;
2457c478bd9Sstevel@tonic-gate 		rfs4_table_destroy(db, tmp);
2467c478bd9Sstevel@tonic-gate 	}
2477c478bd9Sstevel@tonic-gate 
248d216dff5SRobert Mastors 	mutex_destroy(db->db_lock);
2497c478bd9Sstevel@tonic-gate 	kmem_free(db, sizeof (rfs4_database_t));
2507c478bd9Sstevel@tonic-gate }
2517c478bd9Sstevel@tonic-gate 
2527c478bd9Sstevel@tonic-gate rfs4_table_t *
253d216dff5SRobert Mastors rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time,
2547c478bd9Sstevel@tonic-gate     uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *),
2557c478bd9Sstevel@tonic-gate     void (*destroy)(rfs4_entry_t),
2567c478bd9Sstevel@tonic-gate     bool_t (*expiry)(rfs4_entry_t),
2577c478bd9Sstevel@tonic-gate     uint32_t size, uint32_t hashsize,
2587c478bd9Sstevel@tonic-gate     uint32_t maxentries, id_t start)
2597c478bd9Sstevel@tonic-gate {
2607c478bd9Sstevel@tonic-gate 	rfs4_table_t	*table;
2617c478bd9Sstevel@tonic-gate 	int		 len;
2627c478bd9Sstevel@tonic-gate 	char		*cache_name;
2637c478bd9Sstevel@tonic-gate 	char		*id_name;
2647c478bd9Sstevel@tonic-gate 
2657c478bd9Sstevel@tonic-gate 	table = kmem_alloc(sizeof (rfs4_table_t), KM_SLEEP);
266d216dff5SRobert Mastors 	table->dbt_db = db;
267d216dff5SRobert Mastors 	rw_init(table->dbt_t_lock, NULL, RW_DEFAULT, NULL);
268d216dff5SRobert Mastors 	mutex_init(table->dbt_lock, NULL, MUTEX_DEFAULT, NULL);
269d216dff5SRobert Mastors 	mutex_init(&table->dbt_reaper_cv_lock, NULL, MUTEX_DEFAULT, NULL);
270d216dff5SRobert Mastors 	cv_init(&table->dbt_reaper_wait, NULL, CV_DEFAULT, NULL);
2717c478bd9Sstevel@tonic-gate 
2727c478bd9Sstevel@tonic-gate 	len = strlen(tabname);
273d216dff5SRobert Mastors 	table->dbt_name = kmem_alloc(len+1, KM_SLEEP);
2747c478bd9Sstevel@tonic-gate 	cache_name = kmem_alloc(len + 12 /* "_entry_cache" */ + 1, KM_SLEEP);
275d216dff5SRobert Mastors 	(void) strcpy(table->dbt_name, tabname);
276d216dff5SRobert Mastors 	(void) sprintf(cache_name, "%s_entry_cache", table->dbt_name);
277d216dff5SRobert Mastors 	table->dbt_max_cache_time = max_cache_time;
278d216dff5SRobert Mastors 	table->dbt_usize = size;
279d216dff5SRobert Mastors 	table->dbt_len = hashsize;
280d216dff5SRobert Mastors 	table->dbt_count = 0;
281d216dff5SRobert Mastors 	table->dbt_idxcnt = 0;
282d216dff5SRobert Mastors 	table->dbt_ccnt = 0;
283d216dff5SRobert Mastors 	table->dbt_maxcnt = idxcnt;
284d216dff5SRobert Mastors 	table->dbt_indices = NULL;
285d216dff5SRobert Mastors 	table->dbt_id_space = NULL;
286d216dff5SRobert Mastors 	table->dbt_reaper_shutdown = FALSE;
2877c478bd9Sstevel@tonic-gate 
2887c478bd9Sstevel@tonic-gate 	if (start >= 0) {
2897c478bd9Sstevel@tonic-gate 		if (maxentries + (uint32_t)start > (uint32_t)INT32_MAX)
2907c478bd9Sstevel@tonic-gate 			maxentries = INT32_MAX - start;
2917c478bd9Sstevel@tonic-gate 		id_name = kmem_alloc(len + 9 /* "_id_space" */ + 1, KM_SLEEP);
292d216dff5SRobert Mastors 		(void) sprintf(id_name, "%s_id_space", table->dbt_name);
293d216dff5SRobert Mastors 		table->dbt_id_space = id_space_create(id_name, start,
2947c478bd9Sstevel@tonic-gate 		    maxentries + start);
2957c478bd9Sstevel@tonic-gate 		kmem_free(id_name, len + 10);
2967c478bd9Sstevel@tonic-gate 	}
297f6cf9e50SRick Mesta 	ASSERT(t_lowat != 0);
298f6cf9e50SRick Mesta 	table->dbt_id_lwat = (maxentries * t_lowat) / 100;
299f6cf9e50SRick Mesta 	ASSERT(t_hiwat != 0);
300f6cf9e50SRick Mesta 	table->dbt_id_hwat = (maxentries * t_hiwat) / 100;
301f6cf9e50SRick Mesta 	table->dbt_id_reap = MIN(rfs4_reap_interval, max_cache_time);
302d216dff5SRobert Mastors 	table->dbt_maxentries = maxentries;
303d216dff5SRobert Mastors 	table->dbt_create = create;
304d216dff5SRobert Mastors 	table->dbt_destroy = destroy;
305d216dff5SRobert Mastors 	table->dbt_expiry = expiry;
3067c478bd9Sstevel@tonic-gate 
307d216dff5SRobert Mastors 	table->dbt_mem_cache = kmem_cache_create(cache_name,
308d216dff5SRobert Mastors 	    sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
3097c478bd9Sstevel@tonic-gate 	    0,
3107c478bd9Sstevel@tonic-gate 	    rfs4_dbe_kmem_constructor,
3117c478bd9Sstevel@tonic-gate 	    rfs4_dbe_kmem_destructor,
3127c478bd9Sstevel@tonic-gate 	    NULL,
3137c478bd9Sstevel@tonic-gate 	    table,
3147c478bd9Sstevel@tonic-gate 	    NULL,
3157c478bd9Sstevel@tonic-gate 	    0);
3167c478bd9Sstevel@tonic-gate 	kmem_free(cache_name, len+13);
3177c478bd9Sstevel@tonic-gate 
318d216dff5SRobert Mastors 	table->dbt_debug = db->db_debug_flags;
3197c478bd9Sstevel@tonic-gate 
320d216dff5SRobert Mastors 	mutex_enter(db->db_lock);
321d216dff5SRobert Mastors 	table->dbt_tnext = db->db_tables;
322d216dff5SRobert Mastors 	db->db_tables = table;
323d216dff5SRobert Mastors 	mutex_exit(db->db_lock);
3247c478bd9Sstevel@tonic-gate 
3257c478bd9Sstevel@tonic-gate 	rfs4_start_reaper(table);
3267c478bd9Sstevel@tonic-gate 
3277c478bd9Sstevel@tonic-gate 	return (table);
3287c478bd9Sstevel@tonic-gate }
3297c478bd9Sstevel@tonic-gate 
3307c478bd9Sstevel@tonic-gate void
331d216dff5SRobert Mastors rfs4_table_destroy(rfs4_database_t *db, rfs4_table_t *table)
3327c478bd9Sstevel@tonic-gate {
3337c478bd9Sstevel@tonic-gate 	rfs4_table_t *p;
334d216dff5SRobert Mastors 	rfs4_index_t *idx;
3357c478bd9Sstevel@tonic-gate 
336d216dff5SRobert Mastors 	ASSERT(table->dbt_count == 0);
3377c478bd9Sstevel@tonic-gate 
338d216dff5SRobert Mastors 	mutex_enter(db->db_lock);
339d216dff5SRobert Mastors 	if (table == db->db_tables)
340d216dff5SRobert Mastors 		db->db_tables = table->dbt_tnext;
3417c478bd9Sstevel@tonic-gate 	else {
342d216dff5SRobert Mastors 		for (p = db->db_tables; p; p = p->dbt_tnext)
343d216dff5SRobert Mastors 			if (p->dbt_tnext == table) {
344d216dff5SRobert Mastors 				p->dbt_tnext = table->dbt_tnext;
345d216dff5SRobert Mastors 				table->dbt_tnext = NULL;
3467c478bd9Sstevel@tonic-gate 				break;
3477c478bd9Sstevel@tonic-gate 			}
3487c478bd9Sstevel@tonic-gate 		ASSERT(p != NULL);
3497c478bd9Sstevel@tonic-gate 	}
350d216dff5SRobert Mastors 	mutex_exit(db->db_lock);
3517c478bd9Sstevel@tonic-gate 
3527c478bd9Sstevel@tonic-gate 	/* Destroy indices */
353d216dff5SRobert Mastors 	while (table->dbt_indices) {
354d216dff5SRobert Mastors 		idx = table->dbt_indices;
355d216dff5SRobert Mastors 		table->dbt_indices = idx->dbi_inext;
356d216dff5SRobert Mastors 		rfs4_index_destroy(idx);
3577c478bd9Sstevel@tonic-gate 	}
3587c478bd9Sstevel@tonic-gate 
359d216dff5SRobert Mastors 	rw_destroy(table->dbt_t_lock);
360d216dff5SRobert Mastors 	mutex_destroy(table->dbt_lock);
361d216dff5SRobert Mastors 	mutex_destroy(&table->dbt_reaper_cv_lock);
362d216dff5SRobert Mastors 	cv_destroy(&table->dbt_reaper_wait);
3637c478bd9Sstevel@tonic-gate 
364d216dff5SRobert Mastors 	kmem_free(table->dbt_name, strlen(table->dbt_name) + 1);
365d216dff5SRobert Mastors 	if (table->dbt_id_space)
366d216dff5SRobert Mastors 		id_space_destroy(table->dbt_id_space);
367d216dff5SRobert Mastors 	kmem_cache_destroy(table->dbt_mem_cache);
3687c478bd9Sstevel@tonic-gate 	kmem_free(table, sizeof (rfs4_table_t));
3697c478bd9Sstevel@tonic-gate }
3707c478bd9Sstevel@tonic-gate 
3717c478bd9Sstevel@tonic-gate rfs4_index_t *
3727c478bd9Sstevel@tonic-gate rfs4_index_create(rfs4_table_t *table, char *keyname,
3737c478bd9Sstevel@tonic-gate     uint32_t (*hash)(void *),
3747c478bd9Sstevel@tonic-gate     bool_t (compare)(rfs4_entry_t, void *),
3757c478bd9Sstevel@tonic-gate     void *(*mkkey)(rfs4_entry_t),
3767c478bd9Sstevel@tonic-gate     bool_t createable)
3777c478bd9Sstevel@tonic-gate {
3787c478bd9Sstevel@tonic-gate 	rfs4_index_t *idx;
3797c478bd9Sstevel@tonic-gate 
380d216dff5SRobert Mastors 	ASSERT(table->dbt_idxcnt < table->dbt_maxcnt);
3817c478bd9Sstevel@tonic-gate 
3827c478bd9Sstevel@tonic-gate 	idx = kmem_alloc(sizeof (rfs4_index_t), KM_SLEEP);
3837c478bd9Sstevel@tonic-gate 
384d216dff5SRobert Mastors 	idx->dbi_table = table;
385d216dff5SRobert Mastors 	idx->dbi_keyname = kmem_alloc(strlen(keyname) + 1, KM_SLEEP);
386d216dff5SRobert Mastors 	(void) strcpy(idx->dbi_keyname, keyname);
387d216dff5SRobert Mastors 	idx->dbi_hash = hash;
388d216dff5SRobert Mastors 	idx->dbi_compare = compare;
389d216dff5SRobert Mastors 	idx->dbi_mkkey = mkkey;
390d216dff5SRobert Mastors 	idx->dbi_tblidx = table->dbt_idxcnt;
391d216dff5SRobert Mastors 	table->dbt_idxcnt++;
3927c478bd9Sstevel@tonic-gate 	if (createable) {
393d216dff5SRobert Mastors 		table->dbt_ccnt++;
394d216dff5SRobert Mastors 		if (table->dbt_ccnt > 1)
3957c478bd9Sstevel@tonic-gate 			panic("Table %s currently can have only have one "
3967c478bd9Sstevel@tonic-gate 			    "index that will allow creation of entries",
397d216dff5SRobert Mastors 			    table->dbt_name);
398d216dff5SRobert Mastors 		idx->dbi_createable = TRUE;
3997c478bd9Sstevel@tonic-gate 	} else {
400d216dff5SRobert Mastors 		idx->dbi_createable = FALSE;
4017c478bd9Sstevel@tonic-gate 	}
4027c478bd9Sstevel@tonic-gate 
403d216dff5SRobert Mastors 	idx->dbi_inext = table->dbt_indices;
404d216dff5SRobert Mastors 	table->dbt_indices = idx;
405d216dff5SRobert Mastors 	idx->dbi_buckets = kmem_zalloc(sizeof (rfs4_bucket_t) * table->dbt_len,
406d216dff5SRobert Mastors 	    KM_SLEEP);
4077c478bd9Sstevel@tonic-gate 
4087c478bd9Sstevel@tonic-gate 	return (idx);
4097c478bd9Sstevel@tonic-gate }
4107c478bd9Sstevel@tonic-gate 
4117c478bd9Sstevel@tonic-gate void
4127c478bd9Sstevel@tonic-gate rfs4_index_destroy(rfs4_index_t *idx)
4137c478bd9Sstevel@tonic-gate {
414d216dff5SRobert Mastors 	kmem_free(idx->dbi_keyname, strlen(idx->dbi_keyname) + 1);
415d216dff5SRobert Mastors 	kmem_free(idx->dbi_buckets,
416d216dff5SRobert Mastors 	    sizeof (rfs4_bucket_t) * idx->dbi_table->dbt_len);
4177c478bd9Sstevel@tonic-gate 	kmem_free(idx, sizeof (rfs4_index_t));
4187c478bd9Sstevel@tonic-gate }
4197c478bd9Sstevel@tonic-gate 
4207c478bd9Sstevel@tonic-gate static void
4217c478bd9Sstevel@tonic-gate rfs4_dbe_destroy(rfs4_dbe_t *entry)
4227c478bd9Sstevel@tonic-gate {
423d216dff5SRobert Mastors 	rfs4_index_t *idx;
4247c478bd9Sstevel@tonic-gate 	void *key;
4257c478bd9Sstevel@tonic-gate 	int i;
426d216dff5SRobert Mastors 	rfs4_bucket_t *bp;
427d216dff5SRobert Mastors 	rfs4_table_t *table = entry->dbe_table;
428d216dff5SRobert Mastors 	rfs4_link_t *l;
4297c478bd9Sstevel@tonic-gate 
430d216dff5SRobert Mastors 	NFS4_DEBUG(table->dbt_debug & DESTROY_DEBUG,
4317c478bd9Sstevel@tonic-gate 	    (CE_NOTE, "Destroying entry %p from %s",
432d216dff5SRobert Mastors 	    (void*)entry, table->dbt_name));
4337c478bd9Sstevel@tonic-gate 
434d216dff5SRobert Mastors 	mutex_enter(entry->dbe_lock);
435d216dff5SRobert Mastors 	ASSERT(entry->dbe_refcnt == 0);
436d216dff5SRobert Mastors 	mutex_exit(entry->dbe_lock);
4377c478bd9Sstevel@tonic-gate 
4387c478bd9Sstevel@tonic-gate 	/* Unlink from all indices */
439d216dff5SRobert Mastors 	for (idx = table->dbt_indices; idx; idx = idx->dbi_inext) {
440d216dff5SRobert Mastors 		l = &entry->dbe_indices[idx->dbi_tblidx];
4417c478bd9Sstevel@tonic-gate 		/* check and see if we were ever linked in to the index */
4427c478bd9Sstevel@tonic-gate 		if (INVALID_LINK(l)) {
4437c478bd9Sstevel@tonic-gate 			ASSERT(l->next == NULL && l->prev == NULL);
4447c478bd9Sstevel@tonic-gate 			continue;
4457c478bd9Sstevel@tonic-gate 		}
446d216dff5SRobert Mastors 		key = idx->dbi_mkkey(entry->dbe_data);
447d216dff5SRobert Mastors 		i = HASH(idx, key);
448d216dff5SRobert Mastors 		bp = &idx->dbi_buckets[i];
449d216dff5SRobert Mastors 		ASSERT(bp->dbk_head != NULL);
450d216dff5SRobert Mastors 		DEQUEUE_IDX(bp, &entry->dbe_indices[idx->dbi_tblidx]);
4517c478bd9Sstevel@tonic-gate 	}
4527c478bd9Sstevel@tonic-gate 
4537c478bd9Sstevel@tonic-gate 	/* Destroy user data */
454d216dff5SRobert Mastors 	if (table->dbt_destroy)
455d216dff5SRobert Mastors 		(*table->dbt_destroy)(entry->dbe_data);
4567c478bd9Sstevel@tonic-gate 
457d216dff5SRobert Mastors 	if (table->dbt_id_space)
458d216dff5SRobert Mastors 		id_free(table->dbt_id_space, entry->dbe_id);
4597c478bd9Sstevel@tonic-gate 
460d216dff5SRobert Mastors 	mutex_enter(table->dbt_lock);
461d216dff5SRobert Mastors 	table->dbt_count--;
462d216dff5SRobert Mastors 	mutex_exit(table->dbt_lock);
4637c478bd9Sstevel@tonic-gate 
4647c478bd9Sstevel@tonic-gate 	/* Destroy the entry itself */
465d216dff5SRobert Mastors 	kmem_cache_free(table->dbt_mem_cache, entry);
4667c478bd9Sstevel@tonic-gate }
4677c478bd9Sstevel@tonic-gate 
4687c478bd9Sstevel@tonic-gate 
4697c478bd9Sstevel@tonic-gate static rfs4_dbe_t *
470d216dff5SRobert Mastors rfs4_dbe_create(rfs4_table_t *table, id_t id, rfs4_entry_t data)
4717c478bd9Sstevel@tonic-gate {
4727c478bd9Sstevel@tonic-gate 	rfs4_dbe_t *entry;
4737c478bd9Sstevel@tonic-gate 	int i;
4747c478bd9Sstevel@tonic-gate 
475d216dff5SRobert Mastors 	NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG,
476d216dff5SRobert Mastors 	    (CE_NOTE, "Creating entry in table %s", table->dbt_name));
4777c478bd9Sstevel@tonic-gate 
478d216dff5SRobert Mastors 	entry = kmem_cache_alloc(table->dbt_mem_cache, KM_SLEEP);
4797c478bd9Sstevel@tonic-gate 
480d216dff5SRobert Mastors 	entry->dbe_refcnt = 1;
481d216dff5SRobert Mastors 	entry->dbe_invalid = FALSE;
482d216dff5SRobert Mastors 	entry->dbe_skipsearch = FALSE;
483d216dff5SRobert Mastors 	entry->dbe_time_rele = 0;
484d216dff5SRobert Mastors 	entry->dbe_id = 0;
4857c478bd9Sstevel@tonic-gate 
486d216dff5SRobert Mastors 	if (table->dbt_id_space)
487d216dff5SRobert Mastors 		entry->dbe_id = id;
488d216dff5SRobert Mastors 	entry->dbe_table = table;
4897c478bd9Sstevel@tonic-gate 
490d216dff5SRobert Mastors 	for (i = 0; i < table->dbt_maxcnt; i++) {
491d216dff5SRobert Mastors 		entry->dbe_indices[i].next = entry->dbe_indices[i].prev = NULL;
492d216dff5SRobert Mastors 		entry->dbe_indices[i].entry = entry;
4937c478bd9Sstevel@tonic-gate 		/*
4947c478bd9Sstevel@tonic-gate 		 * We mark the entry as not indexed by setting the low
4957c478bd9Sstevel@tonic-gate 		 * order bit, since address are word aligned. This has
4967c478bd9Sstevel@tonic-gate 		 * the advantage of causeing a trap if the address is
4977c478bd9Sstevel@tonic-gate 		 * used. After the entry is linked in to the
4987c478bd9Sstevel@tonic-gate 		 * corresponding index the bit will be cleared.
4997c478bd9Sstevel@tonic-gate 		 */
500d216dff5SRobert Mastors 		INVALIDATE_ADDR(entry->dbe_indices[i].entry);
5017c478bd9Sstevel@tonic-gate 	}
5027c478bd9Sstevel@tonic-gate 
503d216dff5SRobert Mastors 	entry->dbe_data = (rfs4_entry_t)&entry->dbe_indices[table->dbt_maxcnt];
504d216dff5SRobert Mastors 	bzero(entry->dbe_data, table->dbt_usize);
505d216dff5SRobert Mastors 	entry->dbe_data->dbe = entry;
5067c478bd9Sstevel@tonic-gate 
507d216dff5SRobert Mastors 	if (!(*table->dbt_create)(entry->dbe_data, data)) {
508d216dff5SRobert Mastors 		kmem_cache_free(table->dbt_mem_cache, entry);
5097c478bd9Sstevel@tonic-gate 		return (NULL);
5107c478bd9Sstevel@tonic-gate 	}
5117c478bd9Sstevel@tonic-gate 
512d216dff5SRobert Mastors 	mutex_enter(table->dbt_lock);
513d216dff5SRobert Mastors 	table->dbt_count++;
514d216dff5SRobert Mastors 	mutex_exit(table->dbt_lock);
5157c478bd9Sstevel@tonic-gate 
5167c478bd9Sstevel@tonic-gate 	return (entry);
5177c478bd9Sstevel@tonic-gate }
5187c478bd9Sstevel@tonic-gate 
519f6cf9e50SRick Mesta static void
520f6cf9e50SRick Mesta rfs4_dbe_tabreap_adjust(rfs4_table_t *table)
521f6cf9e50SRick Mesta {
522f6cf9e50SRick Mesta 	clock_t		tabreap;
523f6cf9e50SRick Mesta 	clock_t		reap_int;
524f6cf9e50SRick Mesta 	uint32_t	in_use;
525f6cf9e50SRick Mesta 
526f6cf9e50SRick Mesta 	/*
527f6cf9e50SRick Mesta 	 * Adjust the table's reap interval based on the
528f6cf9e50SRick Mesta 	 * number of id's currently in use. Each table's
529f6cf9e50SRick Mesta 	 * default remains the same if id usage subsides.
530f6cf9e50SRick Mesta 	 */
531f6cf9e50SRick Mesta 	ASSERT(MUTEX_HELD(&table->dbt_reaper_cv_lock));
532f6cf9e50SRick Mesta 	tabreap = MIN(rfs4_reap_interval, table->dbt_max_cache_time);
533f6cf9e50SRick Mesta 
534f6cf9e50SRick Mesta 	in_use = table->dbt_count + 1;	/* see rfs4_dbe_create */
535f6cf9e50SRick Mesta 	if (in_use >= table->dbt_id_hwat) {
536f6cf9e50SRick Mesta 		ASSERT(t_hreap != 0);
537f6cf9e50SRick Mesta 		reap_int = (tabreap * t_hreap) / 100;
538f6cf9e50SRick Mesta 	} else if (in_use >= table->dbt_id_lwat) {
539f6cf9e50SRick Mesta 		ASSERT(t_lreap != 0);
540f6cf9e50SRick Mesta 		reap_int = (tabreap * t_lreap) / 100;
541f6cf9e50SRick Mesta 	} else {
542f6cf9e50SRick Mesta 		reap_int = tabreap;
543f6cf9e50SRick Mesta 	}
544f6cf9e50SRick Mesta 	table->dbt_id_reap = reap_int;
545f6cf9e50SRick Mesta 	DTRACE_PROBE2(table__reap__interval, char *,
546f6cf9e50SRick Mesta 	    table->dbt_name, time_t, table->dbt_id_reap);
547f6cf9e50SRick Mesta }
548f6cf9e50SRick Mesta 
5497c478bd9Sstevel@tonic-gate rfs4_entry_t
5507c478bd9Sstevel@tonic-gate rfs4_dbsearch(rfs4_index_t *idx, void *key, bool_t *create, void *arg,
5517c478bd9Sstevel@tonic-gate     rfs4_dbsearch_type_t dbsearch_type)
5527c478bd9Sstevel@tonic-gate {
5537c478bd9Sstevel@tonic-gate 	int		 already_done;
5547c478bd9Sstevel@tonic-gate 	uint32_t	 i;
555d216dff5SRobert Mastors 	rfs4_table_t	*table = idx->dbi_table;
5567c478bd9Sstevel@tonic-gate 	rfs4_index_t	*ip;
557d216dff5SRobert Mastors 	rfs4_bucket_t	*bp;
558d216dff5SRobert Mastors 	rfs4_link_t	*l;
559d216dff5SRobert Mastors 	rfs4_dbe_t	*entry;
560d216dff5SRobert Mastors 	id_t		 id = -1;
5617c478bd9Sstevel@tonic-gate 
5627c478bd9Sstevel@tonic-gate 	i = HASH(idx, key);
563d216dff5SRobert Mastors 	bp = &idx->dbi_buckets[i];
5647c478bd9Sstevel@tonic-gate 
565d216dff5SRobert Mastors 	NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG,
5667c478bd9Sstevel@tonic-gate 	    (CE_NOTE, "Searching for key %p in table %s by %s",
567d216dff5SRobert Mastors 	    key, table->dbt_name, idx->dbi_keyname));
5687c478bd9Sstevel@tonic-gate 
569d216dff5SRobert Mastors 	rw_enter(bp->dbk_lock, RW_READER);
5707c478bd9Sstevel@tonic-gate retry:
571d216dff5SRobert Mastors 	for (l = bp->dbk_head; l; l = l->next) {
572d216dff5SRobert Mastors 		if (l->entry->dbe_refcnt > 0 &&
573d216dff5SRobert Mastors 		    (l->entry->dbe_skipsearch == FALSE ||
574d216dff5SRobert Mastors 		    (l->entry->dbe_skipsearch == TRUE &&
5757c478bd9Sstevel@tonic-gate 		    dbsearch_type == RFS4_DBS_INVALID)) &&
576d216dff5SRobert Mastors 		    (*idx->dbi_compare)(l->entry->dbe_data, key)) {
577d216dff5SRobert Mastors 			mutex_enter(l->entry->dbe_lock);
578d216dff5SRobert Mastors 			if (l->entry->dbe_refcnt == 0) {
579d216dff5SRobert Mastors 				mutex_exit(l->entry->dbe_lock);
5807c478bd9Sstevel@tonic-gate 				continue;
5817c478bd9Sstevel@tonic-gate 			}
5827c478bd9Sstevel@tonic-gate 
5837c478bd9Sstevel@tonic-gate 			/* place an additional hold since we are returning */
5847c478bd9Sstevel@tonic-gate 			rfs4_dbe_hold(l->entry);
5857c478bd9Sstevel@tonic-gate 
586d216dff5SRobert Mastors 			mutex_exit(l->entry->dbe_lock);
587d216dff5SRobert Mastors 			rw_exit(bp->dbk_lock);
5887c478bd9Sstevel@tonic-gate 
5897c478bd9Sstevel@tonic-gate 			*create = FALSE;
5907c478bd9Sstevel@tonic-gate 
591d216dff5SRobert Mastors 			NFS4_DEBUG((table->dbt_debug & SEARCH_DEBUG),
5927c478bd9Sstevel@tonic-gate 			    (CE_NOTE, "Found entry %p for %p in table %s",
593d216dff5SRobert Mastors 			    (void *)l->entry, key, table->dbt_name));
5947c478bd9Sstevel@tonic-gate 
595d216dff5SRobert Mastors 			if (id != -1)
596d216dff5SRobert Mastors 				id_free(table->dbt_id_space, id);
597d216dff5SRobert Mastors 			return (l->entry->dbe_data);
5987c478bd9Sstevel@tonic-gate 		}
5997c478bd9Sstevel@tonic-gate 	}
6007c478bd9Sstevel@tonic-gate 
601d216dff5SRobert Mastors 	if (!*create || table->dbt_create == NULL || !idx->dbi_createable ||
602d216dff5SRobert Mastors 	    table->dbt_maxentries == table->dbt_count) {
603d216dff5SRobert Mastors 		NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG,
6047c478bd9Sstevel@tonic-gate 		    (CE_NOTE, "Entry for %p in %s not found",
605d216dff5SRobert Mastors 		    key, table->dbt_name));
6067c478bd9Sstevel@tonic-gate 
607d216dff5SRobert Mastors 		rw_exit(bp->dbk_lock);
608d216dff5SRobert Mastors 		if (id != -1)
609d216dff5SRobert Mastors 			id_free(table->dbt_id_space, id);
6107c478bd9Sstevel@tonic-gate 		return (NULL);
6117c478bd9Sstevel@tonic-gate 	}
6127c478bd9Sstevel@tonic-gate 
613d216dff5SRobert Mastors 	if (table->dbt_id_space && id == -1) {
614d216dff5SRobert Mastors 		rw_exit(bp->dbk_lock);
6157c478bd9Sstevel@tonic-gate 
616d216dff5SRobert Mastors 		/* get an id, ok to sleep for it here */
617d216dff5SRobert Mastors 		id = id_alloc(table->dbt_id_space);
618f6cf9e50SRick Mesta 		ASSERT(id != -1);
619f6cf9e50SRick Mesta 
620f6cf9e50SRick Mesta 		mutex_enter(&table->dbt_reaper_cv_lock);
621f6cf9e50SRick Mesta 		rfs4_dbe_tabreap_adjust(table);
622f6cf9e50SRick Mesta 		mutex_exit(&table->dbt_reaper_cv_lock);
6237c478bd9Sstevel@tonic-gate 
624d216dff5SRobert Mastors 		rw_enter(bp->dbk_lock, RW_WRITER);
6257c478bd9Sstevel@tonic-gate 		goto retry;
6267c478bd9Sstevel@tonic-gate 	}
627d216dff5SRobert Mastors 
628d216dff5SRobert Mastors 	/* get an exclusive lock on the bucket */
629d216dff5SRobert Mastors 	if (rw_read_locked(bp->dbk_lock) && !rw_tryupgrade(bp->dbk_lock)) {
630d216dff5SRobert Mastors 		NFS4_DEBUG(table->dbt_debug & OTHER_DEBUG,
631d216dff5SRobert Mastors 		    (CE_NOTE, "Trying to upgrade lock on "
632d216dff5SRobert Mastors 		    "hash chain %d (%p) for  %s by %s",
633d216dff5SRobert Mastors 		    i, (void*)bp, table->dbt_name, idx->dbi_keyname));
634d216dff5SRobert Mastors 
635d216dff5SRobert Mastors 		rw_exit(bp->dbk_lock);
636d216dff5SRobert Mastors 		rw_enter(bp->dbk_lock, RW_WRITER);
637d216dff5SRobert Mastors 		goto retry;
638d216dff5SRobert Mastors 	}
639d216dff5SRobert Mastors 
640d216dff5SRobert Mastors 	/* create entry */
641d216dff5SRobert Mastors 	entry = rfs4_dbe_create(table, id, arg);
642d216dff5SRobert Mastors 	if (entry == NULL) {
643d216dff5SRobert Mastors 		rw_exit(bp->dbk_lock);
644d216dff5SRobert Mastors 		if (id != -1)
645d216dff5SRobert Mastors 			id_free(table->dbt_id_space, id);
646d216dff5SRobert Mastors 
647d216dff5SRobert Mastors 		NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG,
648d216dff5SRobert Mastors 		    (CE_NOTE, "Constructor for table %s failed",
649d216dff5SRobert Mastors 		    table->dbt_name));
650d216dff5SRobert Mastors 		return (NULL);
651d216dff5SRobert Mastors 	}
6527c478bd9Sstevel@tonic-gate 
6537c478bd9Sstevel@tonic-gate 	/*
6547c478bd9Sstevel@tonic-gate 	 * Add one ref for entry into table's hash - only one
655d216dff5SRobert Mastors 	 * reference added even though there may be multiple indices
6567c478bd9Sstevel@tonic-gate 	 */
6577c478bd9Sstevel@tonic-gate 	rfs4_dbe_hold(entry);
658d216dff5SRobert Mastors 	ENQUEUE(bp->dbk_head, &entry->dbe_indices[idx->dbi_tblidx]);
659d216dff5SRobert Mastors 	VALIDATE_ADDR(entry->dbe_indices[idx->dbi_tblidx].entry);
6607c478bd9Sstevel@tonic-gate 
661d216dff5SRobert Mastors 	already_done = idx->dbi_tblidx;
662d216dff5SRobert Mastors 	rw_exit(bp->dbk_lock);
6637c478bd9Sstevel@tonic-gate 
664d216dff5SRobert Mastors 	for (ip = table->dbt_indices; ip; ip = ip->dbi_inext) {
665d216dff5SRobert Mastors 		if (ip->dbi_tblidx == already_done)
6667c478bd9Sstevel@tonic-gate 			continue;
667d216dff5SRobert Mastors 		l = &entry->dbe_indices[ip->dbi_tblidx];
668d216dff5SRobert Mastors 		i = HASH(ip, ip->dbi_mkkey(entry->dbe_data));
669d216dff5SRobert Mastors 		ASSERT(i < ip->dbi_table->dbt_len);
670d216dff5SRobert Mastors 		bp = &ip->dbi_buckets[i];
6717c478bd9Sstevel@tonic-gate 		ENQUEUE_IDX(bp, l);
6727c478bd9Sstevel@tonic-gate 	}
6737c478bd9Sstevel@tonic-gate 
674d216dff5SRobert Mastors 	NFS4_DEBUG(
675d216dff5SRobert Mastors 	    table->dbt_debug & SEARCH_DEBUG || table->dbt_debug & CREATE_DEBUG,
6767c478bd9Sstevel@tonic-gate 	    (CE_NOTE, "Entry %p created for %s = %p in table %s",
677d216dff5SRobert Mastors 	    (void*)entry, idx->dbi_keyname, (void*)key, table->dbt_name));
6787c478bd9Sstevel@tonic-gate 
679d216dff5SRobert Mastors 	return (entry->dbe_data);
6807c478bd9Sstevel@tonic-gate }
6817c478bd9Sstevel@tonic-gate 
6827c478bd9Sstevel@tonic-gate /*ARGSUSED*/
6837c478bd9Sstevel@tonic-gate boolean_t
6847c478bd9Sstevel@tonic-gate rfs4_cpr_callb(void *arg, int code)
6857c478bd9Sstevel@tonic-gate {
686d216dff5SRobert Mastors 	rfs4_table_t *table = rfs4_client_tab;
687d216dff5SRobert Mastors 	rfs4_bucket_t *buckets, *bp;
688d216dff5SRobert Mastors 	rfs4_link_t *l;
689d216dff5SRobert Mastors 	rfs4_client_t *cp;
6907c478bd9Sstevel@tonic-gate 	int i;
6917c478bd9Sstevel@tonic-gate 
6927c478bd9Sstevel@tonic-gate 	/*
6937c478bd9Sstevel@tonic-gate 	 * We get called for Suspend and Resume events.
6947c478bd9Sstevel@tonic-gate 	 * For the suspend case we simply don't care!  Nor do we care if
6957c478bd9Sstevel@tonic-gate 	 * there are no clients.
6967c478bd9Sstevel@tonic-gate 	 */
697d216dff5SRobert Mastors 	if (code == CB_CODE_CPR_CHKPT || table == NULL) {
6987c478bd9Sstevel@tonic-gate 		return (B_TRUE);
6997c478bd9Sstevel@tonic-gate 	}
7007c478bd9Sstevel@tonic-gate 
701d216dff5SRobert Mastors 	buckets = table->dbt_indices->dbi_buckets;
7027c478bd9Sstevel@tonic-gate 
7037c478bd9Sstevel@tonic-gate 	/*
7047c478bd9Sstevel@tonic-gate 	 * When we get this far we are in the process of
7057c478bd9Sstevel@tonic-gate 	 * resuming the system from a previous suspend.
7067c478bd9Sstevel@tonic-gate 	 *
7077c478bd9Sstevel@tonic-gate 	 * We are going to blast through and update the
7087c478bd9Sstevel@tonic-gate 	 * last_access time for all the clients and in
7097c478bd9Sstevel@tonic-gate 	 * doing so extend them by one lease period.
7107c478bd9Sstevel@tonic-gate 	 */
711d216dff5SRobert Mastors 	for (i = 0; i < table->dbt_len; i++) {
7127c478bd9Sstevel@tonic-gate 		bp = &buckets[i];
713d216dff5SRobert Mastors 		for (l = bp->dbk_head; l; l = l->next) {
714d216dff5SRobert Mastors 			cp = (rfs4_client_t *)l->entry->dbe_data;
715d216dff5SRobert Mastors 			cp->rc_last_access = gethrestime_sec();
7167c478bd9Sstevel@tonic-gate 		}
7177c478bd9Sstevel@tonic-gate 	}
7187c478bd9Sstevel@tonic-gate 
7197c478bd9Sstevel@tonic-gate 	return (B_TRUE);
7207c478bd9Sstevel@tonic-gate }
7217c478bd9Sstevel@tonic-gate 
7227c478bd9Sstevel@tonic-gate /*
7237c478bd9Sstevel@tonic-gate  * Given a table, lock each of the buckets and walk all entries (in
7247c478bd9Sstevel@tonic-gate  * turn locking those) and calling the provided "callout" function
7257c478bd9Sstevel@tonic-gate  * with the provided parameter.  Obviously used to iterate across all
7267c478bd9Sstevel@tonic-gate  * entries in a particular table via the database locking hierarchy.
7277c478bd9Sstevel@tonic-gate  * Obviously the caller must not hold locks on any of the entries in
7287c478bd9Sstevel@tonic-gate  * the specified table.
7297c478bd9Sstevel@tonic-gate  */
7307c478bd9Sstevel@tonic-gate void
7317c478bd9Sstevel@tonic-gate rfs4_dbe_walk(rfs4_table_t *table,
7327c478bd9Sstevel@tonic-gate     void (*callout)(rfs4_entry_t, void *),
7337c478bd9Sstevel@tonic-gate     void *data)
7347c478bd9Sstevel@tonic-gate {
735d216dff5SRobert Mastors 	rfs4_bucket_t *buckets = table->dbt_indices->dbi_buckets, *bp;
736d216dff5SRobert Mastors 	rfs4_link_t *l;
737d216dff5SRobert Mastors 	rfs4_dbe_t *entry;
7387c478bd9Sstevel@tonic-gate 	int i;
7397c478bd9Sstevel@tonic-gate 
740d216dff5SRobert Mastors 	NFS4_DEBUG(table->dbt_debug & WALK_DEBUG,
741d216dff5SRobert Mastors 	    (CE_NOTE, "Walking entries in %s", table->dbt_name));
7427c478bd9Sstevel@tonic-gate 
7437c478bd9Sstevel@tonic-gate 	/* Walk the buckets looking for entries to release/destroy */
744d216dff5SRobert Mastors 	for (i = 0; i < table->dbt_len; i++) {
7457c478bd9Sstevel@tonic-gate 		bp = &buckets[i];
746d216dff5SRobert Mastors 		rw_enter(bp->dbk_lock, RW_READER);
747d216dff5SRobert Mastors 		for (l = bp->dbk_head; l; l = l->next) {
748d216dff5SRobert Mastors 			entry = l->entry;
749d216dff5SRobert Mastors 			mutex_enter(entry->dbe_lock);
750d216dff5SRobert Mastors 			(*callout)(entry->dbe_data, data);
751d216dff5SRobert Mastors 			mutex_exit(entry->dbe_lock);
7527c478bd9Sstevel@tonic-gate 		}
753d216dff5SRobert Mastors 		rw_exit(bp->dbk_lock);
7547c478bd9Sstevel@tonic-gate 	}
7557c478bd9Sstevel@tonic-gate 
756d216dff5SRobert Mastors 	NFS4_DEBUG(table->dbt_debug & WALK_DEBUG,
757d216dff5SRobert Mastors 	    (CE_NOTE, "Walking entries complete %s", table->dbt_name));
7587c478bd9Sstevel@tonic-gate }
7597c478bd9Sstevel@tonic-gate 
7607c478bd9Sstevel@tonic-gate 
7617c478bd9Sstevel@tonic-gate static void
7627c478bd9Sstevel@tonic-gate rfs4_dbe_reap(rfs4_table_t *table, time_t cache_time, uint32_t desired)
7637c478bd9Sstevel@tonic-gate {
764d216dff5SRobert Mastors 	rfs4_index_t *idx = table->dbt_indices;
765d216dff5SRobert Mastors 	rfs4_bucket_t *buckets = idx->dbi_buckets, *bp;
766d216dff5SRobert Mastors 	rfs4_link_t *l, *t;
767d216dff5SRobert Mastors 	rfs4_dbe_t *entry;
7687c478bd9Sstevel@tonic-gate 	bool_t found;
7697c478bd9Sstevel@tonic-gate 	int i;
7707c478bd9Sstevel@tonic-gate 	int count = 0;
7717c478bd9Sstevel@tonic-gate 
772d216dff5SRobert Mastors 	NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
773d216dff5SRobert Mastors 	    (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
774d216dff5SRobert Mastors 	    desired, cache_time, table->dbt_name));
7757c478bd9Sstevel@tonic-gate 
7767c478bd9Sstevel@tonic-gate 	/* Walk the buckets looking for entries to release/destroy */
777d216dff5SRobert Mastors 	for (i = 0; i < table->dbt_len; i++) {
7787c478bd9Sstevel@tonic-gate 		bp = &buckets[i];
7797c478bd9Sstevel@tonic-gate 		do {
7807c478bd9Sstevel@tonic-gate 			found = FALSE;
781d216dff5SRobert Mastors 			rw_enter(bp->dbk_lock, RW_READER);
782d216dff5SRobert Mastors 			for (l = bp->dbk_head; l; l = l->next) {
783d216dff5SRobert Mastors 				entry = l->entry;
7847c478bd9Sstevel@tonic-gate 				/*
7857c478bd9Sstevel@tonic-gate 				 * Examine an entry.  Ref count of 1 means
7867c478bd9Sstevel@tonic-gate 				 * that the only reference is for the hash
7877c478bd9Sstevel@tonic-gate 				 * table reference.
7887c478bd9Sstevel@tonic-gate 				 */
789d216dff5SRobert Mastors 				if (entry->dbe_refcnt != 1)
790d216dff5SRobert Mastors 					continue;
791d216dff5SRobert Mastors 				mutex_enter(entry->dbe_lock);
792d216dff5SRobert Mastors 				if ((entry->dbe_refcnt == 1) &&
793d216dff5SRobert Mastors 				    (table->dbt_reaper_shutdown ||
794d216dff5SRobert Mastors 				    table->dbt_expiry == NULL ||
795d216dff5SRobert Mastors 				    (*table->dbt_expiry)(entry->dbe_data))) {
796d216dff5SRobert Mastors 					entry->dbe_refcnt--;
7977c478bd9Sstevel@tonic-gate 					count++;
7987c478bd9Sstevel@tonic-gate 					found = TRUE;
7997c478bd9Sstevel@tonic-gate 				}
800d216dff5SRobert Mastors 				mutex_exit(entry->dbe_lock);
8017c478bd9Sstevel@tonic-gate 			}
8027c478bd9Sstevel@tonic-gate 			if (found) {
803d216dff5SRobert Mastors 				if (!rw_tryupgrade(bp->dbk_lock)) {
804d216dff5SRobert Mastors 					rw_exit(bp->dbk_lock);
805d216dff5SRobert Mastors 					rw_enter(bp->dbk_lock, RW_WRITER);
8067c478bd9Sstevel@tonic-gate 				}
8077c478bd9Sstevel@tonic-gate 
808d216dff5SRobert Mastors 				l = bp->dbk_head;
8097c478bd9Sstevel@tonic-gate 				while (l) {
8107c478bd9Sstevel@tonic-gate 					t = l;
811d216dff5SRobert Mastors 					entry = t->entry;
8127c478bd9Sstevel@tonic-gate 					l = l->next;
813d216dff5SRobert Mastors 					if (entry->dbe_refcnt == 0) {
814d216dff5SRobert Mastors 						DEQUEUE(bp->dbk_head, t);
8157c478bd9Sstevel@tonic-gate 						t->next = NULL;
8167c478bd9Sstevel@tonic-gate 						t->prev = NULL;
8177c478bd9Sstevel@tonic-gate 						INVALIDATE_ADDR(t->entry);
818d216dff5SRobert Mastors 						rfs4_dbe_destroy(entry);
8197c478bd9Sstevel@tonic-gate 					}
8207c478bd9Sstevel@tonic-gate 				}
8217c478bd9Sstevel@tonic-gate 			}
822d216dff5SRobert Mastors 			rw_exit(bp->dbk_lock);
8237c478bd9Sstevel@tonic-gate 			/*
8247c478bd9Sstevel@tonic-gate 			 * delay slightly if there is more work to do
8257c478bd9Sstevel@tonic-gate 			 * with the expectation that other reaper
8267c478bd9Sstevel@tonic-gate 			 * threads are freeing data structures as well
8277c478bd9Sstevel@tonic-gate 			 * and in turn will reduce ref counts on
8287c478bd9Sstevel@tonic-gate 			 * entries in this table allowing them to be
8297c478bd9Sstevel@tonic-gate 			 * released.  This is only done in the
8307c478bd9Sstevel@tonic-gate 			 * instance that the tables are being shut down.
8317c478bd9Sstevel@tonic-gate 			 */
832d216dff5SRobert Mastors 			if (table->dbt_reaper_shutdown && bp->dbk_head != NULL)
8337c478bd9Sstevel@tonic-gate 				delay(hz/100);
8347c478bd9Sstevel@tonic-gate 		/*
8357c478bd9Sstevel@tonic-gate 		 * If this is a table shutdown, keep going until
8367c478bd9Sstevel@tonic-gate 		 * everything is gone
8377c478bd9Sstevel@tonic-gate 		 */
838d216dff5SRobert Mastors 		} while (table->dbt_reaper_shutdown && bp->dbk_head != NULL);
8397c478bd9Sstevel@tonic-gate 
840d216dff5SRobert Mastors 		if (!table->dbt_reaper_shutdown && desired && count >= desired)
8417c478bd9Sstevel@tonic-gate 			break;
8427c478bd9Sstevel@tonic-gate 	}
8437c478bd9Sstevel@tonic-gate 
844d216dff5SRobert Mastors 	NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
845d216dff5SRobert Mastors 	    (CE_NOTE, "Reaped %d entries older than %ld seconds in table %s",
846d216dff5SRobert Mastors 	    count, cache_time, table->dbt_name));
8477c478bd9Sstevel@tonic-gate }
8487c478bd9Sstevel@tonic-gate 
8497c478bd9Sstevel@tonic-gate static void
8507c478bd9Sstevel@tonic-gate reaper_thread(caddr_t *arg)
8517c478bd9Sstevel@tonic-gate {
8527c478bd9Sstevel@tonic-gate 	rfs4_table_t	*table = (rfs4_table_t *)arg;
853f6cf9e50SRick Mesta 	clock_t		 rc;
8547c478bd9Sstevel@tonic-gate 
855d216dff5SRobert Mastors 	NFS4_DEBUG(table->dbt_debug,
856d216dff5SRobert Mastors 	    (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name));
8577c478bd9Sstevel@tonic-gate 
858d216dff5SRobert Mastors 	CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock,
8597c478bd9Sstevel@tonic-gate 	    callb_generic_cpr, "nfsv4Reaper");
8607c478bd9Sstevel@tonic-gate 
861d216dff5SRobert Mastors 	mutex_enter(&table->dbt_reaper_cv_lock);
8627c478bd9Sstevel@tonic-gate 	do {
863d216dff5SRobert Mastors 		CALLB_CPR_SAFE_BEGIN(&table->dbt_reaper_cpr_info);
864d3d50737SRafael Vanoni 		rc = cv_reltimedwait_sig(&table->dbt_reaper_wait,
865f6cf9e50SRick Mesta 		    &table->dbt_reaper_cv_lock,
866f6cf9e50SRick Mesta 		    SEC_TO_TICK(table->dbt_id_reap), TR_CLOCK_TICK);
867d216dff5SRobert Mastors 		CALLB_CPR_SAFE_END(&table->dbt_reaper_cpr_info,
868d216dff5SRobert Mastors 		    &table->dbt_reaper_cv_lock);
869d216dff5SRobert Mastors 		rfs4_dbe_reap(table, table->dbt_max_cache_time, 0);
870d216dff5SRobert Mastors 	} while (rc != 0 && table->dbt_reaper_shutdown == FALSE);
8717c478bd9Sstevel@tonic-gate 
872d216dff5SRobert Mastors 	CALLB_CPR_EXIT(&table->dbt_reaper_cpr_info);
8737c478bd9Sstevel@tonic-gate 
874d216dff5SRobert Mastors 	NFS4_DEBUG(table->dbt_debug,
875d216dff5SRobert Mastors 	    (CE_NOTE, "rfs4_reaper_thread exiting for %s", table->dbt_name));
8767c478bd9Sstevel@tonic-gate 
8777c478bd9Sstevel@tonic-gate 	/* Notify the database shutdown processing that the table is shutdown */
878d216dff5SRobert Mastors 	mutex_enter(table->dbt_db->db_lock);
879d216dff5SRobert Mastors 	table->dbt_db->db_shutdown_count--;
880d216dff5SRobert Mastors 	cv_signal(&table->dbt_db->db_shutdown_wait);
881d216dff5SRobert Mastors 	mutex_exit(table->dbt_db->db_lock);
8827c478bd9Sstevel@tonic-gate }
8837c478bd9Sstevel@tonic-gate 
8847c478bd9Sstevel@tonic-gate static void
8857c478bd9Sstevel@tonic-gate rfs4_start_reaper(rfs4_table_t *table)
8867c478bd9Sstevel@tonic-gate {
887d216dff5SRobert Mastors 	if (table->dbt_max_cache_time == 0)
8887c478bd9Sstevel@tonic-gate 		return;
8897c478bd9Sstevel@tonic-gate 
8907c478bd9Sstevel@tonic-gate 	(void) thread_create(NULL, 0, reaper_thread, table, 0, &p0, TS_RUN,
8917c478bd9Sstevel@tonic-gate 	    minclsyspri);
8927c478bd9Sstevel@tonic-gate }
8937c478bd9Sstevel@tonic-gate 
8947c478bd9Sstevel@tonic-gate #ifdef DEBUG
8957c478bd9Sstevel@tonic-gate void
896d216dff5SRobert Mastors rfs4_dbe_debug(rfs4_dbe_t *entry)
8977c478bd9Sstevel@tonic-gate {
898d216dff5SRobert Mastors 	cmn_err(CE_NOTE, "Entry %p from table %s",
899d216dff5SRobert Mastors 	    (void *)entry, entry->dbe_table->dbt_name);
900d216dff5SRobert Mastors 	cmn_err(CE_CONT, "\trefcnt = %d id = %d",
901d216dff5SRobert Mastors 	    entry->dbe_refcnt, entry->dbe_id);
9027c478bd9Sstevel@tonic-gate }
9037c478bd9Sstevel@tonic-gate #endif
904