xref: /titanic_53/usr/src/uts/common/os/devcache.c (revision 83c4dfe9546fd839e7a52bca7e9920da918f916e)
1*83c4dfe9Sjg /*
2*83c4dfe9Sjg  * CDDL HEADER START
3*83c4dfe9Sjg  *
4*83c4dfe9Sjg  * The contents of this file are subject to the terms of the
5*83c4dfe9Sjg  * Common Development and Distribution License (the "License").
6*83c4dfe9Sjg  * You may not use this file except in compliance with the License.
7*83c4dfe9Sjg  *
8*83c4dfe9Sjg  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*83c4dfe9Sjg  * or http://www.opensolaris.org/os/licensing.
10*83c4dfe9Sjg  * See the License for the specific language governing permissions
11*83c4dfe9Sjg  * and limitations under the License.
12*83c4dfe9Sjg  *
13*83c4dfe9Sjg  * When distributing Covered Code, include this CDDL HEADER in each
14*83c4dfe9Sjg  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*83c4dfe9Sjg  * If applicable, add the following below this CDDL HEADER, with the
16*83c4dfe9Sjg  * fields enclosed by brackets "[]" replaced with your own identifying
17*83c4dfe9Sjg  * information: Portions Copyright [yyyy] [name of copyright owner]
18*83c4dfe9Sjg  *
19*83c4dfe9Sjg  * CDDL HEADER END
20*83c4dfe9Sjg  */
21*83c4dfe9Sjg /*
22*83c4dfe9Sjg  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23*83c4dfe9Sjg  * Use is subject to license terms.
24*83c4dfe9Sjg  */
25*83c4dfe9Sjg 
26*83c4dfe9Sjg #pragma ident	"%Z%%M%	%I%	%E% SMI"
27*83c4dfe9Sjg 
28*83c4dfe9Sjg #include <sys/note.h>
29*83c4dfe9Sjg #include <sys/t_lock.h>
30*83c4dfe9Sjg #include <sys/cmn_err.h>
31*83c4dfe9Sjg #include <sys/instance.h>
32*83c4dfe9Sjg #include <sys/conf.h>
33*83c4dfe9Sjg #include <sys/stat.h>
34*83c4dfe9Sjg #include <sys/ddi.h>
35*83c4dfe9Sjg #include <sys/hwconf.h>
36*83c4dfe9Sjg #include <sys/sunddi.h>
37*83c4dfe9Sjg #include <sys/sunndi.h>
38*83c4dfe9Sjg #include <sys/ddi_impldefs.h>
39*83c4dfe9Sjg #include <sys/ndi_impldefs.h>
40*83c4dfe9Sjg #include <sys/modctl.h>
41*83c4dfe9Sjg #include <sys/dacf.h>
42*83c4dfe9Sjg #include <sys/promif.h>
43*83c4dfe9Sjg #include <sys/cpuvar.h>
44*83c4dfe9Sjg #include <sys/pathname.h>
45*83c4dfe9Sjg #include <sys/kobj.h>
46*83c4dfe9Sjg #include <sys/devcache.h>
47*83c4dfe9Sjg #include <sys/devcache_impl.h>
48*83c4dfe9Sjg #include <sys/sysmacros.h>
49*83c4dfe9Sjg #include <sys/varargs.h>
50*83c4dfe9Sjg #include <sys/callb.h>
51*83c4dfe9Sjg 
52*83c4dfe9Sjg /*
53*83c4dfe9Sjg  * This facility provides interfaces to clients to register,
54*83c4dfe9Sjg  * read and update cache data in persisted backing store files,
55*83c4dfe9Sjg  * usually in /etc/devices.  The data persisted through this
56*83c4dfe9Sjg  * mechanism should be stateless data, functioning in the sense
57*83c4dfe9Sjg  * of a cache.  Writes are performed by a background daemon
58*83c4dfe9Sjg  * thread, permitting a client to schedule an update without
59*83c4dfe9Sjg  * blocking, then continue updating the data state in
60*83c4dfe9Sjg  * parallel.  The data is only locked by the daemon thread
61*83c4dfe9Sjg  * to pack the data in preparation for the write.
62*83c4dfe9Sjg  *
63*83c4dfe9Sjg  * Data persisted through this mechanism should be capable
64*83c4dfe9Sjg  * of being regenerated through normal system operation,
65*83c4dfe9Sjg  * for example attaching all disk devices would cause all
66*83c4dfe9Sjg  * devids to be registered for those devices.  By caching
67*83c4dfe9Sjg  * a devid-device tuple, the system can operate in a
68*83c4dfe9Sjg  * more optimal way, directly attaching the device mapped
69*83c4dfe9Sjg  * to a devid, rather than burdensomely driving attach of
70*83c4dfe9Sjg  * the entire device tree to discover a single device.
71*83c4dfe9Sjg  *
72*83c4dfe9Sjg  * Note that a client should only need to include
73*83c4dfe9Sjg  * <sys/devcache.h> for the supported interfaces.
74*83c4dfe9Sjg  *
75*83c4dfe9Sjg  * The data per client is entirely within the control of
76*83c4dfe9Sjg  * the client.  When reading, data unpacked from the backing
77*83c4dfe9Sjg  * store should be inserted in the list.  The pointer to
78*83c4dfe9Sjg  * the list can be retreived via nvf_list().  When writing,
79*83c4dfe9Sjg  * the data on the list is to be packed and returned to the
80*83c4dfe9Sjg  * nvpdaemon as an nvlist.
81*83c4dfe9Sjg  *
82*83c4dfe9Sjg  * Obvious restrictions are imposed by the limits of the
83*83c4dfe9Sjg  * nvlist format.  The data cannot be read or written
84*83c4dfe9Sjg  * piecemeal, and large amounts of data aren't recommended.
85*83c4dfe9Sjg  * However, nvlists do allow that data be named and typed
86*83c4dfe9Sjg  * and can be size-of-int invariant, and the cached data
87*83c4dfe9Sjg  * can be versioned conveniently.
88*83c4dfe9Sjg  *
89*83c4dfe9Sjg  * The registration involves two steps: a handle is
90*83c4dfe9Sjg  * allocated by calling the registration function.
91*83c4dfe9Sjg  * This sets up the data referenced by the handle and
92*83c4dfe9Sjg  * initializes the lock.  Following registration, the
93*83c4dfe9Sjg  * client must initialize the data list.  The list
94*83c4dfe9Sjg  * interfaces require that the list element with offset
95*83c4dfe9Sjg  * to the node link be provided.  The format of the
96*83c4dfe9Sjg  * list element is under the control of the client.
97*83c4dfe9Sjg  *
98*83c4dfe9Sjg  * Locking: the address of the data list r/w lock provided
99*83c4dfe9Sjg  * can be accessed with nvf_lock().  The lock must be held
100*83c4dfe9Sjg  * as reader when traversing the list or checking state,
101*83c4dfe9Sjg  * such as nvf_is_dirty().  The lock must be held as
102*83c4dfe9Sjg  * writer when updating the list or marking it dirty.
103*83c4dfe9Sjg  * The lock must not be held when waking the daemon.
104*83c4dfe9Sjg  *
105*83c4dfe9Sjg  * The data r/w lock is held as writer when the pack,
106*83c4dfe9Sjg  * unpack and free list handlers are called.  The
107*83c4dfe9Sjg  * lock should not be dropped and must be still held
108*83c4dfe9Sjg  * upon return.  The client should also hold the lock
109*83c4dfe9Sjg  * as reader when checking if the list is dirty, and
110*83c4dfe9Sjg  * as writer when marking the list dirty or initiating
111*83c4dfe9Sjg  * a read.
112*83c4dfe9Sjg  *
113*83c4dfe9Sjg  * The asynchronous nature of updates allows for the
114*83c4dfe9Sjg  * possibility that the data may continue to be updated
115*83c4dfe9Sjg  * once the daemon has been notified that an update is
116*83c4dfe9Sjg  * desired.  The data only needs to be locked against
117*83c4dfe9Sjg  * updates when packing the data into the form to be
118*83c4dfe9Sjg  * written.  When the write of the packed data has
119*83c4dfe9Sjg  * completed, the daemon will automatically reschedule
120*83c4dfe9Sjg  * an update if the data was marked dirty after the
121*83c4dfe9Sjg  * point at which it was packed.  Before beginning an
122*83c4dfe9Sjg  * update, the daemon attempts to lock the data as
123*83c4dfe9Sjg  * writer; if the writer lock is already held, it
124*83c4dfe9Sjg  * backs off and retries later.  The model is to give
125*83c4dfe9Sjg  * priority to the kernel processes generating the
126*83c4dfe9Sjg  * data, and that the nature of the data is that
127*83c4dfe9Sjg  * it does not change often, can be re-generated when
128*83c4dfe9Sjg  * needed, so updates should not happen often and
129*83c4dfe9Sjg  * can be delayed until the data stops changing.
130*83c4dfe9Sjg  * The client may update the list or mark it dirty
131*83c4dfe9Sjg  * any time it is able to acquire the lock as
132*83c4dfe9Sjg  * writer first.
133*83c4dfe9Sjg  *
134*83c4dfe9Sjg  * A failed write will be retried after some delay,
135*83c4dfe9Sjg  * in the hope that the cause of the error will be
136*83c4dfe9Sjg  * transient, for example a filesystem with no space
137*83c4dfe9Sjg  * available.  An update on a read-only filesystem
138*83c4dfe9Sjg  * is failed silently and not retried; this would be
139*83c4dfe9Sjg  * the case when booted off install media.
140*83c4dfe9Sjg  *
141*83c4dfe9Sjg  * There is no unregister mechanism as of yet, as it
142*83c4dfe9Sjg  * hasn't been needed so far.
143*83c4dfe9Sjg  */
144*83c4dfe9Sjg 
145*83c4dfe9Sjg /*
146*83c4dfe9Sjg  * Global list of files registered and updated by the nvpflush
147*83c4dfe9Sjg  * daemon, protected by the nvf_cache_mutex.  While an
148*83c4dfe9Sjg  * update is taking place, a file is temporarily moved to
149*83c4dfe9Sjg  * the dirty list to avoid locking the primary list for
150*83c4dfe9Sjg  * the duration of the update.
151*83c4dfe9Sjg  */
152*83c4dfe9Sjg list_t		nvf_cache_files;
153*83c4dfe9Sjg list_t		nvf_dirty_files;
154*83c4dfe9Sjg kmutex_t	nvf_cache_mutex;
155*83c4dfe9Sjg 
156*83c4dfe9Sjg 
157*83c4dfe9Sjg /*
158*83c4dfe9Sjg  * Allow some delay from an update of the data before flushing
159*83c4dfe9Sjg  * to permit simultaneous updates of multiple changes.
160*83c4dfe9Sjg  * Changes in the data are expected to be bursty, ie
161*83c4dfe9Sjg  * reconfig or hot-plug of a new adapter.
162*83c4dfe9Sjg  *
163*83c4dfe9Sjg  * kfio_report_error (default 0)
164*83c4dfe9Sjg  *	Set to 1 to enable some error messages related to low-level
165*83c4dfe9Sjg  *	kernel file i/o operations.
166*83c4dfe9Sjg  *
167*83c4dfe9Sjg  * nvpflush_delay (default 10)
168*83c4dfe9Sjg  *	The number of seconds after data is marked dirty before the
169*83c4dfe9Sjg  *	flush daemon is triggered to flush the data.  A longer period
170*83c4dfe9Sjg  *	of time permits more data updates per write.  Note that
171*83c4dfe9Sjg  *	every update resets the timer so no repository write will
172*83c4dfe9Sjg  *	occur while data is being updated continuously.
173*83c4dfe9Sjg  *
174*83c4dfe9Sjg  * nvpdaemon_idle_time (default 60)
175*83c4dfe9Sjg  *	The number of seconds the daemon will sleep idle before exiting.
176*83c4dfe9Sjg  *
177*83c4dfe9Sjg  */
178*83c4dfe9Sjg #define	NVPFLUSH_DELAY		10
179*83c4dfe9Sjg #define	NVPDAEMON_IDLE_TIME	60
180*83c4dfe9Sjg 
181*83c4dfe9Sjg #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
182*83c4dfe9Sjg 
183*83c4dfe9Sjg /*
184*83c4dfe9Sjg  * Tunables
185*83c4dfe9Sjg  */
186*83c4dfe9Sjg int kfio_report_error = 0;		/* kernel file i/o operations */
187*83c4dfe9Sjg int kfio_disable_read = 0;		/* disable all reads */
188*83c4dfe9Sjg int kfio_disable_write = 0;		/* disable all writes */
189*83c4dfe9Sjg 
190*83c4dfe9Sjg int nvpflush_delay	= NVPFLUSH_DELAY;
191*83c4dfe9Sjg int nvpdaemon_idle_time	= NVPDAEMON_IDLE_TIME;
192*83c4dfe9Sjg 
193*83c4dfe9Sjg static timeout_id_t	nvpflush_id = 0;
194*83c4dfe9Sjg static int		nvpflush_timer_busy = 0;
195*83c4dfe9Sjg static int		nvpflush_daemon_active = 0;
196*83c4dfe9Sjg static kthread_t	*nvpflush_thr_id = 0;
197*83c4dfe9Sjg 
198*83c4dfe9Sjg static int		do_nvpflush = 0;
199*83c4dfe9Sjg static int		nvpbusy = 0;
200*83c4dfe9Sjg static kmutex_t		nvpflush_lock;
201*83c4dfe9Sjg static kcondvar_t	nvpflush_cv;
202*83c4dfe9Sjg static kthread_id_t	nvpflush_thread;
203*83c4dfe9Sjg static clock_t		nvpticks;
204*83c4dfe9Sjg 
205*83c4dfe9Sjg static void nvpflush_daemon(void);
206*83c4dfe9Sjg 
207*83c4dfe9Sjg #ifdef	DEBUG
208*83c4dfe9Sjg int nvpdaemon_debug = 0;
209*83c4dfe9Sjg int kfio_debug = 0;
210*83c4dfe9Sjg #endif	/* DEBUG */
211*83c4dfe9Sjg 
212*83c4dfe9Sjg extern int modrootloaded;
213*83c4dfe9Sjg extern void mdi_read_devices_files(void);
214*83c4dfe9Sjg extern void mdi_clean_vhcache(void);
215*83c4dfe9Sjg 
216*83c4dfe9Sjg /*
217*83c4dfe9Sjg  * Initialize the overall cache file management
218*83c4dfe9Sjg  */
219*83c4dfe9Sjg void
220*83c4dfe9Sjg i_ddi_devices_init(void)
221*83c4dfe9Sjg {
222*83c4dfe9Sjg 	list_create(&nvf_cache_files, sizeof (nvfd_t),
223*83c4dfe9Sjg 	    offsetof(nvfd_t, nvf_link));
224*83c4dfe9Sjg 	list_create(&nvf_dirty_files, sizeof (nvfd_t),
225*83c4dfe9Sjg 	    offsetof(nvfd_t, nvf_link));
226*83c4dfe9Sjg 	mutex_init(&nvf_cache_mutex, NULL, MUTEX_DEFAULT, NULL);
227*83c4dfe9Sjg 	devid_cache_init();
228*83c4dfe9Sjg }
229*83c4dfe9Sjg 
230*83c4dfe9Sjg /*
231*83c4dfe9Sjg  * Read cache files
232*83c4dfe9Sjg  * The files read here should be restricted to those
233*83c4dfe9Sjg  * that may be required to mount root.
234*83c4dfe9Sjg  */
235*83c4dfe9Sjg void
236*83c4dfe9Sjg i_ddi_read_devices_files(void)
237*83c4dfe9Sjg {
238*83c4dfe9Sjg 	if (!kfio_disable_read) {
239*83c4dfe9Sjg 		mdi_read_devices_files();
240*83c4dfe9Sjg 		devid_cache_read();
241*83c4dfe9Sjg 	}
242*83c4dfe9Sjg }
243*83c4dfe9Sjg 
244*83c4dfe9Sjg void
245*83c4dfe9Sjg i_ddi_start_flush_daemon(void)
246*83c4dfe9Sjg {
247*83c4dfe9Sjg 	nvfd_t	*nvfdp;
248*83c4dfe9Sjg 
249*83c4dfe9Sjg 	ASSERT(i_ddi_io_initialized());
250*83c4dfe9Sjg 
251*83c4dfe9Sjg 	mutex_init(&nvpflush_lock, NULL, MUTEX_DRIVER, NULL);
252*83c4dfe9Sjg 	cv_init(&nvpflush_cv, NULL, CV_DRIVER, NULL);
253*83c4dfe9Sjg 
254*83c4dfe9Sjg 	mutex_enter(&nvf_cache_mutex);
255*83c4dfe9Sjg 	for (nvfdp = list_head(&nvf_cache_files); nvfdp;
256*83c4dfe9Sjg 	    nvfdp = list_next(&nvf_cache_files, nvfdp)) {
257*83c4dfe9Sjg 		if (NVF_IS_DIRTY(nvfdp)) {
258*83c4dfe9Sjg 			nvf_wake_daemon();
259*83c4dfe9Sjg 			break;
260*83c4dfe9Sjg 		}
261*83c4dfe9Sjg 	}
262*83c4dfe9Sjg 	mutex_exit(&nvf_cache_mutex);
263*83c4dfe9Sjg }
264*83c4dfe9Sjg 
265*83c4dfe9Sjg void
266*83c4dfe9Sjg i_ddi_clean_devices_files(void)
267*83c4dfe9Sjg {
268*83c4dfe9Sjg 	devid_cache_cleanup();
269*83c4dfe9Sjg 	mdi_clean_vhcache();
270*83c4dfe9Sjg }
271*83c4dfe9Sjg 
272*83c4dfe9Sjg /*
273*83c4dfe9Sjg  * Register a cache file to be managed and updated by the nvpflush daemon.
274*83c4dfe9Sjg  * All operations are performed through the returned handle.
275*83c4dfe9Sjg  * There is no unregister mechanism for now.
276*83c4dfe9Sjg  */
277*83c4dfe9Sjg nvf_handle_t
278*83c4dfe9Sjg nvf_register_file(nvf_ops_t *ops)
279*83c4dfe9Sjg {
280*83c4dfe9Sjg 	nvfd_t *nvfdp;
281*83c4dfe9Sjg 
282*83c4dfe9Sjg 	nvfdp = kmem_zalloc(sizeof (*nvfdp), KM_SLEEP);
283*83c4dfe9Sjg 
284*83c4dfe9Sjg 	nvfdp->nvf_ops = ops;
285*83c4dfe9Sjg 	nvfdp->nvf_flags = 0;
286*83c4dfe9Sjg 	rw_init(&nvfdp->nvf_lock, NULL, RW_DRIVER, NULL);
287*83c4dfe9Sjg 
288*83c4dfe9Sjg 	mutex_enter(&nvf_cache_mutex);
289*83c4dfe9Sjg 	list_insert_tail(&nvf_cache_files, nvfdp);
290*83c4dfe9Sjg 	mutex_exit(&nvf_cache_mutex);
291*83c4dfe9Sjg 
292*83c4dfe9Sjg 	return ((nvf_handle_t)nvfdp);
293*83c4dfe9Sjg }
294*83c4dfe9Sjg 
295*83c4dfe9Sjg /*PRINTFLIKE1*/
296*83c4dfe9Sjg void
297*83c4dfe9Sjg nvf_error(const char *fmt, ...)
298*83c4dfe9Sjg {
299*83c4dfe9Sjg 	va_list ap;
300*83c4dfe9Sjg 
301*83c4dfe9Sjg 	if (kfio_report_error) {
302*83c4dfe9Sjg 		va_start(ap, fmt);
303*83c4dfe9Sjg 		vcmn_err(CE_NOTE, fmt, ap);
304*83c4dfe9Sjg 		va_end(ap);
305*83c4dfe9Sjg 	}
306*83c4dfe9Sjg }
307*83c4dfe9Sjg 
308*83c4dfe9Sjg /*
309*83c4dfe9Sjg  * Some operations clients may use to manage the data
310*83c4dfe9Sjg  * to be persisted in a cache file.
311*83c4dfe9Sjg  */
312*83c4dfe9Sjg char *
313*83c4dfe9Sjg nvf_cache_name(nvf_handle_t handle)
314*83c4dfe9Sjg {
315*83c4dfe9Sjg 	return (((nvfd_t *)handle)->nvf_cache_path);
316*83c4dfe9Sjg }
317*83c4dfe9Sjg 
318*83c4dfe9Sjg krwlock_t *
319*83c4dfe9Sjg nvf_lock(nvf_handle_t handle)
320*83c4dfe9Sjg {
321*83c4dfe9Sjg 	return (&(((nvfd_t *)handle)->nvf_lock));
322*83c4dfe9Sjg }
323*83c4dfe9Sjg 
324*83c4dfe9Sjg list_t *
325*83c4dfe9Sjg nvf_list(nvf_handle_t handle)
326*83c4dfe9Sjg {
327*83c4dfe9Sjg 	return (&(((nvfd_t *)handle)->nvf_data_list));
328*83c4dfe9Sjg }
329*83c4dfe9Sjg 
330*83c4dfe9Sjg void
331*83c4dfe9Sjg nvf_mark_dirty(nvf_handle_t handle)
332*83c4dfe9Sjg {
333*83c4dfe9Sjg 	ASSERT(RW_WRITE_HELD(&(((nvfd_t *)handle)->nvf_lock)));
334*83c4dfe9Sjg 	NVF_MARK_DIRTY((nvfd_t *)handle);
335*83c4dfe9Sjg }
336*83c4dfe9Sjg 
337*83c4dfe9Sjg int
338*83c4dfe9Sjg nvf_is_dirty(nvf_handle_t handle)
339*83c4dfe9Sjg {
340*83c4dfe9Sjg 	ASSERT(RW_LOCK_HELD(&(((nvfd_t *)handle)->nvf_lock)));
341*83c4dfe9Sjg 	return (NVF_IS_DIRTY((nvfd_t *)handle));
342*83c4dfe9Sjg }
343*83c4dfe9Sjg 
344*83c4dfe9Sjg static uint16_t
345*83c4dfe9Sjg nvp_cksum(uchar_t *buf, int64_t buflen)
346*83c4dfe9Sjg {
347*83c4dfe9Sjg 	uint16_t cksum = 0;
348*83c4dfe9Sjg 	uint16_t *p = (uint16_t *)buf;
349*83c4dfe9Sjg 	int64_t n;
350*83c4dfe9Sjg 
351*83c4dfe9Sjg 	if ((buflen & 0x01) != 0) {
352*83c4dfe9Sjg 		buflen--;
353*83c4dfe9Sjg 		cksum = buf[buflen];
354*83c4dfe9Sjg 	}
355*83c4dfe9Sjg 	n = buflen / 2;
356*83c4dfe9Sjg 	while (n-- > 0)
357*83c4dfe9Sjg 		cksum ^= *p++;
358*83c4dfe9Sjg 	return (cksum);
359*83c4dfe9Sjg }
360*83c4dfe9Sjg 
361*83c4dfe9Sjg int
362*83c4dfe9Sjg fread_nvlist(char *filename, nvlist_t **ret_nvlist)
363*83c4dfe9Sjg {
364*83c4dfe9Sjg 	struct _buf	*file;
365*83c4dfe9Sjg 	nvpf_hdr_t	hdr;
366*83c4dfe9Sjg 	char		*buf;
367*83c4dfe9Sjg 	nvlist_t	*nvl;
368*83c4dfe9Sjg 	int		rval;
369*83c4dfe9Sjg 	uint_t		offset;
370*83c4dfe9Sjg 	int		n;
371*83c4dfe9Sjg 	char		c;
372*83c4dfe9Sjg 	uint16_t	cksum, hdrsum;
373*83c4dfe9Sjg 
374*83c4dfe9Sjg 	*ret_nvlist = NULL;
375*83c4dfe9Sjg 
376*83c4dfe9Sjg 	file = kobj_open_file(filename);
377*83c4dfe9Sjg 	if (file == (struct _buf *)-1) {
378*83c4dfe9Sjg 		KFDEBUG((CE_CONT, "cannot open file: %s\n", filename));
379*83c4dfe9Sjg 		return (ENOENT);
380*83c4dfe9Sjg 	}
381*83c4dfe9Sjg 
382*83c4dfe9Sjg 	offset = 0;
383*83c4dfe9Sjg 	n = kobj_read_file(file, (char *)&hdr, sizeof (hdr), offset);
384*83c4dfe9Sjg 	if (n != sizeof (hdr)) {
385*83c4dfe9Sjg 		kobj_close_file(file);
386*83c4dfe9Sjg 		if (n < 0) {
387*83c4dfe9Sjg 			nvf_error("error reading header: %s\n", filename);
388*83c4dfe9Sjg 			return (EIO);
389*83c4dfe9Sjg 		} else if (n == 0) {
390*83c4dfe9Sjg 			KFDEBUG((CE_CONT, "file empty: %s\n", filename));
391*83c4dfe9Sjg 		} else {
392*83c4dfe9Sjg 			nvf_error("header size incorrect: %s\n", filename);
393*83c4dfe9Sjg 		}
394*83c4dfe9Sjg 		return (EINVAL);
395*83c4dfe9Sjg 	}
396*83c4dfe9Sjg 	offset += n;
397*83c4dfe9Sjg 
398*83c4dfe9Sjg 	KFDEBUG2((CE_CONT, "nvpf_magic: 0x%x\n", hdr.nvpf_magic));
399*83c4dfe9Sjg 	KFDEBUG2((CE_CONT, "nvpf_version: %d\n", hdr.nvpf_version));
400*83c4dfe9Sjg 	KFDEBUG2((CE_CONT, "nvpf_size: %lld\n",
401*83c4dfe9Sjg 		(longlong_t)hdr.nvpf_size));
402*83c4dfe9Sjg 	KFDEBUG2((CE_CONT, "nvpf_hdr_chksum: 0x%x\n",
403*83c4dfe9Sjg 		hdr.nvpf_hdr_chksum));
404*83c4dfe9Sjg 	KFDEBUG2((CE_CONT, "nvpf_chksum: 0x%x\n", hdr.nvpf_chksum));
405*83c4dfe9Sjg 
406*83c4dfe9Sjg 	cksum = hdr.nvpf_hdr_chksum;
407*83c4dfe9Sjg 	hdr.nvpf_hdr_chksum = 0;
408*83c4dfe9Sjg 	hdrsum = nvp_cksum((uchar_t *)&hdr, sizeof (hdr));
409*83c4dfe9Sjg 
410*83c4dfe9Sjg 	if (hdr.nvpf_magic != NVPF_HDR_MAGIC ||
411*83c4dfe9Sjg 	    hdr.nvpf_version != NVPF_HDR_VERSION || hdrsum != cksum) {
412*83c4dfe9Sjg 		kobj_close_file(file);
413*83c4dfe9Sjg 		if (hdrsum != cksum) {
414*83c4dfe9Sjg 			nvf_error("%s: checksum error "
415*83c4dfe9Sjg 			    "(actual 0x%x, expected 0x%x)\n",
416*83c4dfe9Sjg 			    filename, hdrsum, cksum);
417*83c4dfe9Sjg 		}
418*83c4dfe9Sjg 		nvf_error("%s: header information incorrect", filename);
419*83c4dfe9Sjg 		return (EINVAL);
420*83c4dfe9Sjg 	}
421*83c4dfe9Sjg 
422*83c4dfe9Sjg 	ASSERT(hdr.nvpf_size >= 0);
423*83c4dfe9Sjg 
424*83c4dfe9Sjg 	buf = kmem_alloc(hdr.nvpf_size, KM_SLEEP);
425*83c4dfe9Sjg 	n = kobj_read_file(file, buf, hdr.nvpf_size, offset);
426*83c4dfe9Sjg 	if (n != hdr.nvpf_size) {
427*83c4dfe9Sjg 		kmem_free(buf, hdr.nvpf_size);
428*83c4dfe9Sjg 		kobj_close_file(file);
429*83c4dfe9Sjg 		if (n < 0) {
430*83c4dfe9Sjg 			nvf_error("%s: read error %d", filename, n);
431*83c4dfe9Sjg 		} else {
432*83c4dfe9Sjg 			nvf_error("%s: incomplete read %d/%lld",
433*83c4dfe9Sjg 				filename, n, (longlong_t)hdr.nvpf_size);
434*83c4dfe9Sjg 		}
435*83c4dfe9Sjg 		return (EINVAL);
436*83c4dfe9Sjg 	}
437*83c4dfe9Sjg 	offset += n;
438*83c4dfe9Sjg 
439*83c4dfe9Sjg 	rval = kobj_read_file(file, &c, 1, offset);
440*83c4dfe9Sjg 	kobj_close_file(file);
441*83c4dfe9Sjg 	if (rval > 0) {
442*83c4dfe9Sjg 		nvf_error("%s is larger than %lld\n",
443*83c4dfe9Sjg 			filename, (longlong_t)hdr.nvpf_size);
444*83c4dfe9Sjg 		kmem_free(buf, hdr.nvpf_size);
445*83c4dfe9Sjg 		return (EINVAL);
446*83c4dfe9Sjg 	}
447*83c4dfe9Sjg 
448*83c4dfe9Sjg 	cksum = nvp_cksum((uchar_t *)buf, hdr.nvpf_size);
449*83c4dfe9Sjg 	if (hdr.nvpf_chksum != cksum) {
450*83c4dfe9Sjg 		nvf_error("%s: checksum error (actual 0x%x, expected 0x%x)\n",
451*83c4dfe9Sjg 		    filename, hdr.nvpf_chksum, cksum);
452*83c4dfe9Sjg 		kmem_free(buf, hdr.nvpf_size);
453*83c4dfe9Sjg 		return (EINVAL);
454*83c4dfe9Sjg 	}
455*83c4dfe9Sjg 
456*83c4dfe9Sjg 	nvl = NULL;
457*83c4dfe9Sjg 	rval = nvlist_unpack(buf, hdr.nvpf_size, &nvl, 0);
458*83c4dfe9Sjg 	if (rval != 0) {
459*83c4dfe9Sjg 		nvf_error("%s: error %d unpacking nvlist\n",
460*83c4dfe9Sjg 			filename, rval);
461*83c4dfe9Sjg 		kmem_free(buf, hdr.nvpf_size);
462*83c4dfe9Sjg 		return (EINVAL);
463*83c4dfe9Sjg 	}
464*83c4dfe9Sjg 
465*83c4dfe9Sjg 	kmem_free(buf, hdr.nvpf_size);
466*83c4dfe9Sjg 	*ret_nvlist = nvl;
467*83c4dfe9Sjg 	return (0);
468*83c4dfe9Sjg }
469*83c4dfe9Sjg 
470*83c4dfe9Sjg static int
471*83c4dfe9Sjg kfcreate(char *filename, kfile_t **kfilep)
472*83c4dfe9Sjg {
473*83c4dfe9Sjg 	kfile_t	*fp;
474*83c4dfe9Sjg 	int	rval;
475*83c4dfe9Sjg 
476*83c4dfe9Sjg 	ASSERT(modrootloaded);
477*83c4dfe9Sjg 
478*83c4dfe9Sjg 	fp = kmem_alloc(sizeof (kfile_t), KM_SLEEP);
479*83c4dfe9Sjg 
480*83c4dfe9Sjg 	fp->kf_vnflags = FCREAT | FWRITE | FTRUNC;
481*83c4dfe9Sjg 	fp->kf_fname = filename;
482*83c4dfe9Sjg 	fp->kf_fpos = 0;
483*83c4dfe9Sjg 	fp->kf_state = 0;
484*83c4dfe9Sjg 
485*83c4dfe9Sjg 	KFDEBUG((CE_CONT, "create: %s flags 0x%x\n",
486*83c4dfe9Sjg 		filename, fp->kf_vnflags));
487*83c4dfe9Sjg 	rval = vn_open(filename, UIO_SYSSPACE, fp->kf_vnflags,
488*83c4dfe9Sjg 	    0444, &fp->kf_vp, CRCREAT, 0);
489*83c4dfe9Sjg 	if (rval != 0) {
490*83c4dfe9Sjg 		kmem_free(fp, sizeof (kfile_t));
491*83c4dfe9Sjg 		KFDEBUG((CE_CONT, "%s: create error %d\n",
492*83c4dfe9Sjg 			filename, rval));
493*83c4dfe9Sjg 		return (rval);
494*83c4dfe9Sjg 	}
495*83c4dfe9Sjg 
496*83c4dfe9Sjg 	*kfilep = fp;
497*83c4dfe9Sjg 	return (0);
498*83c4dfe9Sjg }
499*83c4dfe9Sjg 
500*83c4dfe9Sjg static int
501*83c4dfe9Sjg kfremove(char *filename)
502*83c4dfe9Sjg {
503*83c4dfe9Sjg 	int rval;
504*83c4dfe9Sjg 
505*83c4dfe9Sjg 	KFDEBUG((CE_CONT, "remove: %s\n", filename));
506*83c4dfe9Sjg 	rval = vn_remove(filename, UIO_SYSSPACE, RMFILE);
507*83c4dfe9Sjg 	if (rval != 0) {
508*83c4dfe9Sjg 		KFDEBUG((CE_CONT, "%s: remove error %d\n",
509*83c4dfe9Sjg 			filename, rval));
510*83c4dfe9Sjg 	}
511*83c4dfe9Sjg 	return (rval);
512*83c4dfe9Sjg }
513*83c4dfe9Sjg 
514*83c4dfe9Sjg static int
515*83c4dfe9Sjg kfread(kfile_t *fp, char *buf, ssize_t bufsiz, ssize_t *ret_n)
516*83c4dfe9Sjg {
517*83c4dfe9Sjg 	ssize_t		resid;
518*83c4dfe9Sjg 	int		err;
519*83c4dfe9Sjg 	ssize_t		n;
520*83c4dfe9Sjg 
521*83c4dfe9Sjg 	ASSERT(modrootloaded);
522*83c4dfe9Sjg 
523*83c4dfe9Sjg 	if (fp->kf_state != 0)
524*83c4dfe9Sjg 		return (fp->kf_state);
525*83c4dfe9Sjg 
526*83c4dfe9Sjg 	err = vn_rdwr(UIO_READ, fp->kf_vp, buf, bufsiz, fp->kf_fpos,
527*83c4dfe9Sjg 		UIO_SYSSPACE, 0, (rlim64_t)0, kcred, &resid);
528*83c4dfe9Sjg 	if (err != 0) {
529*83c4dfe9Sjg 		KFDEBUG((CE_CONT, "%s: read error %d\n",
530*83c4dfe9Sjg 			fp->kf_fname, err));
531*83c4dfe9Sjg 		fp->kf_state = err;
532*83c4dfe9Sjg 		return (err);
533*83c4dfe9Sjg 	}
534*83c4dfe9Sjg 
535*83c4dfe9Sjg 	ASSERT(resid >= 0 && resid <= bufsiz);
536*83c4dfe9Sjg 	n = bufsiz - resid;
537*83c4dfe9Sjg 
538*83c4dfe9Sjg 	KFDEBUG1((CE_CONT, "%s: read %ld bytes ok %ld bufsiz, %ld resid\n",
539*83c4dfe9Sjg 		fp->kf_fname, n, bufsiz, resid));
540*83c4dfe9Sjg 
541*83c4dfe9Sjg 	fp->kf_fpos += n;
542*83c4dfe9Sjg 	*ret_n = n;
543*83c4dfe9Sjg 	return (0);
544*83c4dfe9Sjg }
545*83c4dfe9Sjg 
546*83c4dfe9Sjg static int
547*83c4dfe9Sjg kfwrite(kfile_t *fp, char *buf, ssize_t bufsiz, ssize_t *ret_n)
548*83c4dfe9Sjg {
549*83c4dfe9Sjg 	rlim64_t	rlimit;
550*83c4dfe9Sjg 	ssize_t		resid;
551*83c4dfe9Sjg 	int		err;
552*83c4dfe9Sjg 	ssize_t		len;
553*83c4dfe9Sjg 	ssize_t		n = 0;
554*83c4dfe9Sjg 
555*83c4dfe9Sjg 	ASSERT(modrootloaded);
556*83c4dfe9Sjg 
557*83c4dfe9Sjg 	if (fp->kf_state != 0)
558*83c4dfe9Sjg 		return (fp->kf_state);
559*83c4dfe9Sjg 
560*83c4dfe9Sjg 	len = bufsiz;
561*83c4dfe9Sjg 	rlimit = bufsiz + 1;
562*83c4dfe9Sjg 	for (;;) {
563*83c4dfe9Sjg 		err = vn_rdwr(UIO_WRITE, fp->kf_vp, buf, len, fp->kf_fpos,
564*83c4dfe9Sjg 			UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid);
565*83c4dfe9Sjg 		if (err) {
566*83c4dfe9Sjg 			KFDEBUG((CE_CONT, "%s: write error %d\n",
567*83c4dfe9Sjg 				fp->kf_fname, err));
568*83c4dfe9Sjg 			fp->kf_state = err;
569*83c4dfe9Sjg 			return (err);
570*83c4dfe9Sjg 		}
571*83c4dfe9Sjg 
572*83c4dfe9Sjg 		KFDEBUG1((CE_CONT, "%s: write %ld bytes ok %ld resid\n",
573*83c4dfe9Sjg 			fp->kf_fname, len-resid, resid));
574*83c4dfe9Sjg 
575*83c4dfe9Sjg 		ASSERT(resid >= 0 && resid <= len);
576*83c4dfe9Sjg 
577*83c4dfe9Sjg 		n += (len - resid);
578*83c4dfe9Sjg 		if (resid == 0)
579*83c4dfe9Sjg 			break;
580*83c4dfe9Sjg 
581*83c4dfe9Sjg 		if (resid == len) {
582*83c4dfe9Sjg 			KFDEBUG((CE_CONT, "%s: filesystem full?\n",
583*83c4dfe9Sjg 				fp->kf_fname));
584*83c4dfe9Sjg 			fp->kf_state = ENOSPC;
585*83c4dfe9Sjg 			return (ENOSPC);
586*83c4dfe9Sjg 		}
587*83c4dfe9Sjg 
588*83c4dfe9Sjg 		len -= resid;
589*83c4dfe9Sjg 		buf += len;
590*83c4dfe9Sjg 		fp->kf_fpos += len;
591*83c4dfe9Sjg 		len = resid;
592*83c4dfe9Sjg 	}
593*83c4dfe9Sjg 
594*83c4dfe9Sjg 	ASSERT(n == bufsiz);
595*83c4dfe9Sjg 	KFDEBUG1((CE_CONT, "%s: wrote %ld bytes ok\n", fp->kf_fname, n));
596*83c4dfe9Sjg 
597*83c4dfe9Sjg 	*ret_n = n;
598*83c4dfe9Sjg 	return (0);
599*83c4dfe9Sjg }
600*83c4dfe9Sjg 
601*83c4dfe9Sjg 
602*83c4dfe9Sjg static int
603*83c4dfe9Sjg kfclose(kfile_t *fp)
604*83c4dfe9Sjg {
605*83c4dfe9Sjg 	int		rval;
606*83c4dfe9Sjg 
607*83c4dfe9Sjg 	KFDEBUG((CE_CONT, "close: %s\n", fp->kf_fname));
608*83c4dfe9Sjg 
609*83c4dfe9Sjg 	if ((fp->kf_vnflags & FWRITE) && fp->kf_state == 0) {
610*83c4dfe9Sjg 		rval = VOP_FSYNC(fp->kf_vp, FSYNC,  kcred);
611*83c4dfe9Sjg 		if (rval != 0) {
612*83c4dfe9Sjg 			nvf_error("%s: sync error %d\n",
613*83c4dfe9Sjg 				fp->kf_fname, rval);
614*83c4dfe9Sjg 		}
615*83c4dfe9Sjg 		KFDEBUG((CE_CONT, "%s: sync ok\n", fp->kf_fname));
616*83c4dfe9Sjg 	}
617*83c4dfe9Sjg 
618*83c4dfe9Sjg 	rval = VOP_CLOSE(fp->kf_vp, fp->kf_vnflags, 1, (offset_t)0, kcred);
619*83c4dfe9Sjg 	if (rval != 0) {
620*83c4dfe9Sjg 		if (fp->kf_state == 0) {
621*83c4dfe9Sjg 			nvf_error("%s: close error %d\n",
622*83c4dfe9Sjg 				fp->kf_fname, rval);
623*83c4dfe9Sjg 		}
624*83c4dfe9Sjg 	} else {
625*83c4dfe9Sjg 		if (fp->kf_state == 0)
626*83c4dfe9Sjg 			KFDEBUG((CE_CONT, "%s: close ok\n", fp->kf_fname));
627*83c4dfe9Sjg 	}
628*83c4dfe9Sjg 
629*83c4dfe9Sjg 	VN_RELE(fp->kf_vp);
630*83c4dfe9Sjg 	kmem_free(fp, sizeof (kfile_t));
631*83c4dfe9Sjg 	return (rval);
632*83c4dfe9Sjg }
633*83c4dfe9Sjg 
634*83c4dfe9Sjg static int
635*83c4dfe9Sjg kfrename(char *oldname, char *newname)
636*83c4dfe9Sjg {
637*83c4dfe9Sjg 	int rval;
638*83c4dfe9Sjg 
639*83c4dfe9Sjg 	ASSERT(modrootloaded);
640*83c4dfe9Sjg 
641*83c4dfe9Sjg 	KFDEBUG((CE_CONT, "renaming %s to %s\n", oldname, newname));
642*83c4dfe9Sjg 
643*83c4dfe9Sjg 	if ((rval = vn_rename(oldname, newname, UIO_SYSSPACE)) != 0) {
644*83c4dfe9Sjg 		KFDEBUG((CE_CONT, "rename %s to %s: %d\n",
645*83c4dfe9Sjg 			oldname, newname, rval));
646*83c4dfe9Sjg 	}
647*83c4dfe9Sjg 
648*83c4dfe9Sjg 	return (rval);
649*83c4dfe9Sjg }
650*83c4dfe9Sjg 
651*83c4dfe9Sjg int
652*83c4dfe9Sjg fwrite_nvlist(char *filename, nvlist_t *nvl)
653*83c4dfe9Sjg {
654*83c4dfe9Sjg 	char	*buf;
655*83c4dfe9Sjg 	char	*nvbuf;
656*83c4dfe9Sjg 	kfile_t	*fp;
657*83c4dfe9Sjg 	char	*newname;
658*83c4dfe9Sjg 	int	len, err, err1;
659*83c4dfe9Sjg 	size_t	buflen;
660*83c4dfe9Sjg 	ssize_t	n;
661*83c4dfe9Sjg 
662*83c4dfe9Sjg 	ASSERT(modrootloaded);
663*83c4dfe9Sjg 
664*83c4dfe9Sjg 	nvbuf = NULL;
665*83c4dfe9Sjg 	err = nvlist_pack(nvl, &nvbuf, &buflen, NV_ENCODE_NATIVE, 0);
666*83c4dfe9Sjg 	if (err != 0) {
667*83c4dfe9Sjg 		nvf_error("%s: error %d packing nvlist\n",
668*83c4dfe9Sjg 			filename, err);
669*83c4dfe9Sjg 		return (err);
670*83c4dfe9Sjg 	}
671*83c4dfe9Sjg 
672*83c4dfe9Sjg 	buf = kmem_alloc(sizeof (nvpf_hdr_t) + buflen, KM_SLEEP);
673*83c4dfe9Sjg 	bzero(buf, sizeof (nvpf_hdr_t));
674*83c4dfe9Sjg 
675*83c4dfe9Sjg 	((nvpf_hdr_t *)buf)->nvpf_magic = NVPF_HDR_MAGIC;
676*83c4dfe9Sjg 	((nvpf_hdr_t *)buf)->nvpf_version = NVPF_HDR_VERSION;
677*83c4dfe9Sjg 	((nvpf_hdr_t *)buf)->nvpf_size = buflen;
678*83c4dfe9Sjg 	((nvpf_hdr_t *)buf)->nvpf_chksum = nvp_cksum((uchar_t *)nvbuf, buflen);
679*83c4dfe9Sjg 	((nvpf_hdr_t *)buf)->nvpf_hdr_chksum =
680*83c4dfe9Sjg 		nvp_cksum((uchar_t *)buf, sizeof (nvpf_hdr_t));
681*83c4dfe9Sjg 
682*83c4dfe9Sjg 	bcopy(nvbuf, buf + sizeof (nvpf_hdr_t), buflen);
683*83c4dfe9Sjg 	kmem_free(nvbuf, buflen);
684*83c4dfe9Sjg 	buflen += sizeof (nvpf_hdr_t);
685*83c4dfe9Sjg 
686*83c4dfe9Sjg 	len = strlen(filename) + MAX_SUFFIX_LEN + 2;
687*83c4dfe9Sjg 	newname = kmem_alloc(len, KM_SLEEP);
688*83c4dfe9Sjg 
689*83c4dfe9Sjg 
690*83c4dfe9Sjg 	(void) sprintf(newname, "%s.%s",
691*83c4dfe9Sjg 		filename, NEW_FILENAME_SUFFIX);
692*83c4dfe9Sjg 
693*83c4dfe9Sjg 	/*
694*83c4dfe9Sjg 	 * To make it unlikely we suffer data loss, write
695*83c4dfe9Sjg 	 * data to the new temporary file.  Once successful
696*83c4dfe9Sjg 	 * complete the transaction by renaming the new file
697*83c4dfe9Sjg 	 * to replace the previous.
698*83c4dfe9Sjg 	 */
699*83c4dfe9Sjg 
700*83c4dfe9Sjg 	if ((err = kfcreate(newname, &fp)) == 0) {
701*83c4dfe9Sjg 		err = kfwrite(fp, buf, buflen, &n);
702*83c4dfe9Sjg 		if (err) {
703*83c4dfe9Sjg 			nvf_error("%s: write error - %d\n",
704*83c4dfe9Sjg 				newname, err);
705*83c4dfe9Sjg 		} else {
706*83c4dfe9Sjg 			if (n != buflen) {
707*83c4dfe9Sjg 				nvf_error(
708*83c4dfe9Sjg 				    "%s: partial write %ld of %ld bytes\n",
709*83c4dfe9Sjg 				    newname, n, buflen);
710*83c4dfe9Sjg 				nvf_error("%s: filesystem may be full?\n",
711*83c4dfe9Sjg 				    newname);
712*83c4dfe9Sjg 				err = EIO;
713*83c4dfe9Sjg 			}
714*83c4dfe9Sjg 		}
715*83c4dfe9Sjg 		if ((err1 = kfclose(fp)) != 0) {
716*83c4dfe9Sjg 			nvf_error("%s: close error\n", newname);
717*83c4dfe9Sjg 			if (err == 0)
718*83c4dfe9Sjg 				err = err1;
719*83c4dfe9Sjg 		}
720*83c4dfe9Sjg 		if (err != 0) {
721*83c4dfe9Sjg 			if (kfremove(newname) != 0) {
722*83c4dfe9Sjg 				nvf_error("%s: remove failed\n",
723*83c4dfe9Sjg 				    newname);
724*83c4dfe9Sjg 			}
725*83c4dfe9Sjg 		}
726*83c4dfe9Sjg 	} else {
727*83c4dfe9Sjg 		nvf_error("%s: create failed - %d\n", filename, err);
728*83c4dfe9Sjg 	}
729*83c4dfe9Sjg 
730*83c4dfe9Sjg 	if (err == 0) {
731*83c4dfe9Sjg 		if ((err = kfrename(newname, filename)) != 0) {
732*83c4dfe9Sjg 			nvf_error("%s: rename from %s failed\n",
733*83c4dfe9Sjg 				newname, filename);
734*83c4dfe9Sjg 		}
735*83c4dfe9Sjg 	}
736*83c4dfe9Sjg 
737*83c4dfe9Sjg 	kmem_free(newname, len);
738*83c4dfe9Sjg 	kmem_free(buf, buflen);
739*83c4dfe9Sjg 
740*83c4dfe9Sjg 	return (err);
741*83c4dfe9Sjg }
742*83c4dfe9Sjg 
743*83c4dfe9Sjg static int
744*83c4dfe9Sjg e_fwrite_nvlist(nvfd_t *nvfd, nvlist_t *nvl)
745*83c4dfe9Sjg {
746*83c4dfe9Sjg 	int err;
747*83c4dfe9Sjg 
748*83c4dfe9Sjg 	if ((err = fwrite_nvlist(nvfd->nvf_cache_path, nvl)) == 0)
749*83c4dfe9Sjg 		return (DDI_SUCCESS);
750*83c4dfe9Sjg 	else {
751*83c4dfe9Sjg 		if (err == EROFS)
752*83c4dfe9Sjg 			NVF_MARK_READONLY(nvfd);
753*83c4dfe9Sjg 		return (DDI_FAILURE);
754*83c4dfe9Sjg 	}
755*83c4dfe9Sjg }
756*83c4dfe9Sjg 
757*83c4dfe9Sjg static void
758*83c4dfe9Sjg nvp_list_free(nvfd_t *nvf)
759*83c4dfe9Sjg {
760*83c4dfe9Sjg 	ASSERT(RW_WRITE_HELD(&nvf->nvf_lock));
761*83c4dfe9Sjg 	(nvf->nvf_list_free)((nvf_handle_t)nvf);
762*83c4dfe9Sjg 	ASSERT(RW_WRITE_HELD(&nvf->nvf_lock));
763*83c4dfe9Sjg }
764*83c4dfe9Sjg 
765*83c4dfe9Sjg /*
766*83c4dfe9Sjg  * Read a file in the nvlist format
767*83c4dfe9Sjg  *	EIO - i/o error during read
768*83c4dfe9Sjg  *	ENOENT - file not found
769*83c4dfe9Sjg  *	EINVAL - file contents corrupted
770*83c4dfe9Sjg  */
771*83c4dfe9Sjg static int
772*83c4dfe9Sjg fread_nvp_list(nvfd_t *nvfd)
773*83c4dfe9Sjg {
774*83c4dfe9Sjg 	nvlist_t	*nvl;
775*83c4dfe9Sjg 	nvpair_t	*nvp;
776*83c4dfe9Sjg 	char		*name;
777*83c4dfe9Sjg 	nvlist_t	*sublist;
778*83c4dfe9Sjg 	int		rval;
779*83c4dfe9Sjg 	int		rv;
780*83c4dfe9Sjg 
781*83c4dfe9Sjg 	ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock)));
782*83c4dfe9Sjg 
783*83c4dfe9Sjg 	rval = fread_nvlist(nvfd->nvf_cache_path, &nvl);
784*83c4dfe9Sjg 	if (rval != 0)
785*83c4dfe9Sjg 		return (rval);
786*83c4dfe9Sjg 	ASSERT(nvl != NULL);
787*83c4dfe9Sjg 
788*83c4dfe9Sjg 	nvp = NULL;
789*83c4dfe9Sjg 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
790*83c4dfe9Sjg 		name = nvpair_name(nvp);
791*83c4dfe9Sjg 		ASSERT(strlen(name) > 0);
792*83c4dfe9Sjg 
793*83c4dfe9Sjg 		switch (nvpair_type(nvp)) {
794*83c4dfe9Sjg 		case DATA_TYPE_NVLIST:
795*83c4dfe9Sjg 			rval = nvpair_value_nvlist(nvp, &sublist);
796*83c4dfe9Sjg 			if (rval != 0) {
797*83c4dfe9Sjg 				nvf_error(
798*83c4dfe9Sjg 				    "nvpair_value_nvlist error %s %d\n",
799*83c4dfe9Sjg 				    name, rval);
800*83c4dfe9Sjg 				goto error;
801*83c4dfe9Sjg 			}
802*83c4dfe9Sjg 
803*83c4dfe9Sjg 			/*
804*83c4dfe9Sjg 			 * unpack nvlist for this device and
805*83c4dfe9Sjg 			 * add elements to data list.
806*83c4dfe9Sjg 			 */
807*83c4dfe9Sjg 			ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock)));
808*83c4dfe9Sjg 			rv = (nvfd->nvf_unpack_nvlist)
809*83c4dfe9Sjg 			    ((nvf_handle_t)nvfd, sublist, name);
810*83c4dfe9Sjg 			ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock)));
811*83c4dfe9Sjg 			if (rv != 0) {
812*83c4dfe9Sjg 				nvf_error(
813*83c4dfe9Sjg 				    "%s: %s invalid list element\n",
814*83c4dfe9Sjg 				    nvfd->nvf_cache_path, name);
815*83c4dfe9Sjg 				rval = EINVAL;
816*83c4dfe9Sjg 				goto error;
817*83c4dfe9Sjg 			}
818*83c4dfe9Sjg 			break;
819*83c4dfe9Sjg 
820*83c4dfe9Sjg 		default:
821*83c4dfe9Sjg 			nvf_error("%s: %s unsupported data type %d\n",
822*83c4dfe9Sjg 				nvfd->nvf_cache_path, name, nvpair_type(nvp));
823*83c4dfe9Sjg 			rval = EINVAL;
824*83c4dfe9Sjg 			goto error;
825*83c4dfe9Sjg 		}
826*83c4dfe9Sjg 	}
827*83c4dfe9Sjg 
828*83c4dfe9Sjg 	nvlist_free(nvl);
829*83c4dfe9Sjg 
830*83c4dfe9Sjg 	return (0);
831*83c4dfe9Sjg 
832*83c4dfe9Sjg error:
833*83c4dfe9Sjg 	nvlist_free(nvl);
834*83c4dfe9Sjg 	nvp_list_free(nvfd);
835*83c4dfe9Sjg 	return (rval);
836*83c4dfe9Sjg }
837*83c4dfe9Sjg 
838*83c4dfe9Sjg 
839*83c4dfe9Sjg int
840*83c4dfe9Sjg nvf_read_file(nvf_handle_t nvf_handle)
841*83c4dfe9Sjg {
842*83c4dfe9Sjg 	nvfd_t *nvfd = (nvfd_t *)nvf_handle;
843*83c4dfe9Sjg 	int rval;
844*83c4dfe9Sjg 
845*83c4dfe9Sjg 	ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock));
846*83c4dfe9Sjg 
847*83c4dfe9Sjg 	if (kfio_disable_read)
848*83c4dfe9Sjg 		return (0);
849*83c4dfe9Sjg 
850*83c4dfe9Sjg 	KFDEBUG((CE_CONT, "reading %s\n", nvfd->nvf_cache_path));
851*83c4dfe9Sjg 
852*83c4dfe9Sjg 	rval = fread_nvp_list(nvfd);
853*83c4dfe9Sjg 	if (rval) {
854*83c4dfe9Sjg 		switch (rval) {
855*83c4dfe9Sjg 		case EIO:
856*83c4dfe9Sjg 			nvfd->nvf_flags |= NVF_F_REBUILD_MSG;
857*83c4dfe9Sjg 			cmn_err(CE_WARN, "%s: I/O error",
858*83c4dfe9Sjg 				nvfd->nvf_cache_path);
859*83c4dfe9Sjg 			break;
860*83c4dfe9Sjg 		case ENOENT:
861*83c4dfe9Sjg 			nvfd->nvf_flags |= NVF_F_CREATE_MSG;
862*83c4dfe9Sjg 			nvf_error("%s: not found\n",
863*83c4dfe9Sjg 				nvfd->nvf_cache_path);
864*83c4dfe9Sjg 			break;
865*83c4dfe9Sjg 		case EINVAL:
866*83c4dfe9Sjg 		default:
867*83c4dfe9Sjg 			nvfd->nvf_flags |= NVF_F_REBUILD_MSG;
868*83c4dfe9Sjg 			cmn_err(CE_WARN, "%s: data file corrupted",
869*83c4dfe9Sjg 				nvfd->nvf_cache_path);
870*83c4dfe9Sjg 			break;
871*83c4dfe9Sjg 		}
872*83c4dfe9Sjg 	}
873*83c4dfe9Sjg 	return (rval);
874*83c4dfe9Sjg }
875*83c4dfe9Sjg 
876*83c4dfe9Sjg static void
877*83c4dfe9Sjg nvf_write_is_complete(nvfd_t *fd)
878*83c4dfe9Sjg {
879*83c4dfe9Sjg 	if (fd->nvf_write_complete) {
880*83c4dfe9Sjg 		(fd->nvf_write_complete)((nvf_handle_t)fd);
881*83c4dfe9Sjg 	}
882*83c4dfe9Sjg }
883*83c4dfe9Sjg 
884*83c4dfe9Sjg /*ARGSUSED*/
885*83c4dfe9Sjg static void
886*83c4dfe9Sjg nvpflush_timeout(void *arg)
887*83c4dfe9Sjg {
888*83c4dfe9Sjg 	clock_t nticks;
889*83c4dfe9Sjg 
890*83c4dfe9Sjg 	mutex_enter(&nvpflush_lock);
891*83c4dfe9Sjg 	nticks = nvpticks - ddi_get_lbolt();
892*83c4dfe9Sjg 	if (nticks > 4) {
893*83c4dfe9Sjg 		nvpflush_timer_busy = 1;
894*83c4dfe9Sjg 		mutex_exit(&nvpflush_lock);
895*83c4dfe9Sjg 		nvpflush_id = timeout(nvpflush_timeout, NULL, nticks);
896*83c4dfe9Sjg 	} else {
897*83c4dfe9Sjg 		do_nvpflush = 1;
898*83c4dfe9Sjg 		NVPDAEMON_DEBUG((CE_CONT, "signal nvpdaemon\n"));
899*83c4dfe9Sjg 		cv_signal(&nvpflush_cv);
900*83c4dfe9Sjg 		nvpflush_id = 0;
901*83c4dfe9Sjg 		nvpflush_timer_busy = 0;
902*83c4dfe9Sjg 		mutex_exit(&nvpflush_lock);
903*83c4dfe9Sjg 	}
904*83c4dfe9Sjg }
905*83c4dfe9Sjg 
906*83c4dfe9Sjg /*
907*83c4dfe9Sjg  * After marking a list as dirty, wake the nvpflush daemon
908*83c4dfe9Sjg  * to perform the update.
909*83c4dfe9Sjg  */
910*83c4dfe9Sjg void
911*83c4dfe9Sjg nvf_wake_daemon(void)
912*83c4dfe9Sjg {
913*83c4dfe9Sjg 	clock_t nticks;
914*83c4dfe9Sjg 
915*83c4dfe9Sjg 	/*
916*83c4dfe9Sjg 	 * If the system isn't up yet
917*83c4dfe9Sjg 	 * don't even think about starting a flush.
918*83c4dfe9Sjg 	 */
919*83c4dfe9Sjg 	if (!i_ddi_io_initialized())
920*83c4dfe9Sjg 		return;
921*83c4dfe9Sjg 
922*83c4dfe9Sjg 	mutex_enter(&nvpflush_lock);
923*83c4dfe9Sjg 
924*83c4dfe9Sjg 	if (nvpflush_daemon_active == 0) {
925*83c4dfe9Sjg 		nvpflush_daemon_active = 1;
926*83c4dfe9Sjg 		mutex_exit(&nvpflush_lock);
927*83c4dfe9Sjg 		NVPDAEMON_DEBUG((CE_CONT, "starting nvpdaemon thread\n"));
928*83c4dfe9Sjg 		nvpflush_thr_id = thread_create(NULL, 0,
929*83c4dfe9Sjg 		    (void (*)())nvpflush_daemon,
930*83c4dfe9Sjg 		    NULL, 0, &p0, TS_RUN, minclsyspri);
931*83c4dfe9Sjg 		mutex_enter(&nvpflush_lock);
932*83c4dfe9Sjg 	}
933*83c4dfe9Sjg 
934*83c4dfe9Sjg 	nticks = nvpflush_delay * TICKS_PER_SECOND;
935*83c4dfe9Sjg 	nvpticks = ddi_get_lbolt() + nticks;
936*83c4dfe9Sjg 	if (nvpflush_timer_busy == 0) {
937*83c4dfe9Sjg 		nvpflush_timer_busy = 1;
938*83c4dfe9Sjg 		mutex_exit(&nvpflush_lock);
939*83c4dfe9Sjg 		nvpflush_id = timeout(nvpflush_timeout, NULL, nticks + 4);
940*83c4dfe9Sjg 	} else
941*83c4dfe9Sjg 		mutex_exit(&nvpflush_lock);
942*83c4dfe9Sjg }
943*83c4dfe9Sjg 
944*83c4dfe9Sjg static int
945*83c4dfe9Sjg nvpflush_one(nvfd_t *nvfd)
946*83c4dfe9Sjg {
947*83c4dfe9Sjg 	int rval = DDI_SUCCESS;
948*83c4dfe9Sjg 	nvlist_t *nvl;
949*83c4dfe9Sjg 
950*83c4dfe9Sjg 	rw_enter(&nvfd->nvf_lock, RW_READER);
951*83c4dfe9Sjg 
952*83c4dfe9Sjg 	ASSERT((nvfd->nvf_flags & NVF_F_FLUSHING) == 0);
953*83c4dfe9Sjg 
954*83c4dfe9Sjg 	if (!NVF_IS_DIRTY(nvfd) ||
955*83c4dfe9Sjg 	    NVF_IS_READONLY(nvfd) || kfio_disable_write) {
956*83c4dfe9Sjg 		NVF_CLEAR_DIRTY(nvfd);
957*83c4dfe9Sjg 		rw_exit(&nvfd->nvf_lock);
958*83c4dfe9Sjg 		return (DDI_SUCCESS);
959*83c4dfe9Sjg 	}
960*83c4dfe9Sjg 
961*83c4dfe9Sjg 	if (rw_tryupgrade(&nvfd->nvf_lock) == 0) {
962*83c4dfe9Sjg 		nvf_error("nvpflush: "
963*83c4dfe9Sjg 		    "%s rw upgrade failed\n", nvfd->nvf_cache_path);
964*83c4dfe9Sjg 		rw_exit(&nvfd->nvf_lock);
965*83c4dfe9Sjg 		return (DDI_FAILURE);
966*83c4dfe9Sjg 	}
967*83c4dfe9Sjg 	if (((nvfd->nvf_pack_list)
968*83c4dfe9Sjg 	    ((nvf_handle_t)nvfd, &nvl)) != DDI_SUCCESS) {
969*83c4dfe9Sjg 		nvf_error("nvpflush: "
970*83c4dfe9Sjg 		    "%s nvlist construction failed\n", nvfd->nvf_cache_path);
971*83c4dfe9Sjg 		ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock));
972*83c4dfe9Sjg 		rw_exit(&nvfd->nvf_lock);
973*83c4dfe9Sjg 		return (DDI_FAILURE);
974*83c4dfe9Sjg 	}
975*83c4dfe9Sjg 	ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock));
976*83c4dfe9Sjg 
977*83c4dfe9Sjg 	NVF_CLEAR_DIRTY(nvfd);
978*83c4dfe9Sjg 	nvfd->nvf_flags |= NVF_F_FLUSHING;
979*83c4dfe9Sjg 	rw_exit(&nvfd->nvf_lock);
980*83c4dfe9Sjg 
981*83c4dfe9Sjg 	rval = e_fwrite_nvlist(nvfd, nvl);
982*83c4dfe9Sjg 	nvlist_free(nvl);
983*83c4dfe9Sjg 
984*83c4dfe9Sjg 	rw_enter(&nvfd->nvf_lock, RW_WRITER);
985*83c4dfe9Sjg 	nvfd->nvf_flags &= ~NVF_F_FLUSHING;
986*83c4dfe9Sjg 	if (rval == DDI_FAILURE) {
987*83c4dfe9Sjg 		if (NVF_IS_READONLY(nvfd)) {
988*83c4dfe9Sjg 			rval = DDI_SUCCESS;
989*83c4dfe9Sjg 			nvfd->nvf_flags &= ~(NVF_F_ERROR | NVF_F_DIRTY);
990*83c4dfe9Sjg 		} else if ((nvfd->nvf_flags & NVF_F_ERROR) == 0) {
991*83c4dfe9Sjg 			cmn_err(CE_CONT,
992*83c4dfe9Sjg 			    "%s: updated failed\n", nvfd->nvf_cache_path);
993*83c4dfe9Sjg 			nvfd->nvf_flags |= NVF_F_ERROR | NVF_F_DIRTY;
994*83c4dfe9Sjg 		}
995*83c4dfe9Sjg 	} else {
996*83c4dfe9Sjg 		if (nvfd->nvf_flags & NVF_F_CREATE_MSG) {
997*83c4dfe9Sjg 			cmn_err(CE_CONT,
998*83c4dfe9Sjg 			    "!Creating %s\n", nvfd->nvf_cache_path);
999*83c4dfe9Sjg 			nvfd->nvf_flags &= ~NVF_F_CREATE_MSG;
1000*83c4dfe9Sjg 		}
1001*83c4dfe9Sjg 		if (nvfd->nvf_flags & NVF_F_REBUILD_MSG) {
1002*83c4dfe9Sjg 			cmn_err(CE_CONT,
1003*83c4dfe9Sjg 			    "!Rebuilding %s\n", nvfd->nvf_cache_path);
1004*83c4dfe9Sjg 			nvfd->nvf_flags &= ~NVF_F_REBUILD_MSG;
1005*83c4dfe9Sjg 		}
1006*83c4dfe9Sjg 		if (nvfd->nvf_flags & NVF_F_ERROR) {
1007*83c4dfe9Sjg 			cmn_err(CE_CONT,
1008*83c4dfe9Sjg 			    "%s: update now ok\n", nvfd->nvf_cache_path);
1009*83c4dfe9Sjg 			nvfd->nvf_flags &= ~NVF_F_ERROR;
1010*83c4dfe9Sjg 		}
1011*83c4dfe9Sjg 		/*
1012*83c4dfe9Sjg 		 * The file may need to be flushed again if the cached
1013*83c4dfe9Sjg 		 * data was touched while writing the earlier contents.
1014*83c4dfe9Sjg 		 */
1015*83c4dfe9Sjg 		if (NVF_IS_DIRTY(nvfd))
1016*83c4dfe9Sjg 			rval = DDI_FAILURE;
1017*83c4dfe9Sjg 	}
1018*83c4dfe9Sjg 
1019*83c4dfe9Sjg 	rw_exit(&nvfd->nvf_lock);
1020*83c4dfe9Sjg 	return (rval);
1021*83c4dfe9Sjg }
1022*83c4dfe9Sjg 
1023*83c4dfe9Sjg 
1024*83c4dfe9Sjg static void
1025*83c4dfe9Sjg nvpflush_daemon(void)
1026*83c4dfe9Sjg {
1027*83c4dfe9Sjg 	callb_cpr_t cprinfo;
1028*83c4dfe9Sjg 	nvfd_t *nvfdp, *nextfdp;
1029*83c4dfe9Sjg 	clock_t clk;
1030*83c4dfe9Sjg 	int rval;
1031*83c4dfe9Sjg 	int want_wakeup;
1032*83c4dfe9Sjg 	int is_now_clean;
1033*83c4dfe9Sjg 
1034*83c4dfe9Sjg 	ASSERT(modrootloaded);
1035*83c4dfe9Sjg 
1036*83c4dfe9Sjg 	nvpflush_thread = curthread;
1037*83c4dfe9Sjg 	NVPDAEMON_DEBUG((CE_CONT, "nvpdaemon: init\n"));
1038*83c4dfe9Sjg 
1039*83c4dfe9Sjg 	CALLB_CPR_INIT(&cprinfo, &nvpflush_lock, callb_generic_cpr, "nvp");
1040*83c4dfe9Sjg 	mutex_enter(&nvpflush_lock);
1041*83c4dfe9Sjg 	for (;;) {
1042*83c4dfe9Sjg 
1043*83c4dfe9Sjg 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
1044*83c4dfe9Sjg 		while (do_nvpflush == 0) {
1045*83c4dfe9Sjg 			clk = cv_timedwait(&nvpflush_cv, &nvpflush_lock,
1046*83c4dfe9Sjg 			    ddi_get_lbolt() +
1047*83c4dfe9Sjg 				(nvpdaemon_idle_time * TICKS_PER_SECOND));
1048*83c4dfe9Sjg 			if (clk == -1 &&
1049*83c4dfe9Sjg 			    do_nvpflush == 0 && nvpflush_timer_busy == 0) {
1050*83c4dfe9Sjg 				/*
1051*83c4dfe9Sjg 				 * Note that CALLB_CPR_EXIT calls mutex_exit()
1052*83c4dfe9Sjg 				 * on the lock passed in to CALLB_CPR_INIT,
1053*83c4dfe9Sjg 				 * so the lock must be held when invoking it.
1054*83c4dfe9Sjg 				 */
1055*83c4dfe9Sjg 				CALLB_CPR_SAFE_END(&cprinfo, &nvpflush_lock);
1056*83c4dfe9Sjg 				NVPDAEMON_DEBUG((CE_CONT, "nvpdaemon: exit\n"));
1057*83c4dfe9Sjg 				ASSERT(mutex_owned(&nvpflush_lock));
1058*83c4dfe9Sjg 				nvpflush_thr_id = NULL;
1059*83c4dfe9Sjg 				nvpflush_daemon_active = 0;
1060*83c4dfe9Sjg 				CALLB_CPR_EXIT(&cprinfo);
1061*83c4dfe9Sjg 				thread_exit();
1062*83c4dfe9Sjg 			}
1063*83c4dfe9Sjg 		}
1064*83c4dfe9Sjg 		CALLB_CPR_SAFE_END(&cprinfo, &nvpflush_lock);
1065*83c4dfe9Sjg 
1066*83c4dfe9Sjg 		nvpbusy = 1;
1067*83c4dfe9Sjg 		want_wakeup = 0;
1068*83c4dfe9Sjg 		do_nvpflush = 0;
1069*83c4dfe9Sjg 		mutex_exit(&nvpflush_lock);
1070*83c4dfe9Sjg 
1071*83c4dfe9Sjg 		/*
1072*83c4dfe9Sjg 		 * Try flushing what's dirty, reschedule if there's
1073*83c4dfe9Sjg 		 * a failure or data gets marked as dirty again.
1074*83c4dfe9Sjg 		 * First move each file marked dirty to the dirty
1075*83c4dfe9Sjg 		 * list to avoid locking the list across the write.
1076*83c4dfe9Sjg 		 */
1077*83c4dfe9Sjg 		mutex_enter(&nvf_cache_mutex);
1078*83c4dfe9Sjg 		for (nvfdp = list_head(&nvf_cache_files);
1079*83c4dfe9Sjg 		    nvfdp; nvfdp = nextfdp) {
1080*83c4dfe9Sjg 			nextfdp = list_next(&nvf_cache_files, nvfdp);
1081*83c4dfe9Sjg 			rw_enter(&nvfdp->nvf_lock, RW_READER);
1082*83c4dfe9Sjg 			if (NVF_IS_DIRTY(nvfdp)) {
1083*83c4dfe9Sjg 				list_remove(&nvf_cache_files, nvfdp);
1084*83c4dfe9Sjg 				list_insert_tail(&nvf_dirty_files, nvfdp);
1085*83c4dfe9Sjg 				rw_exit(&nvfdp->nvf_lock);
1086*83c4dfe9Sjg 			} else {
1087*83c4dfe9Sjg 				NVPDAEMON_DEBUG((CE_CONT,
1088*83c4dfe9Sjg 				    "nvpdaemon: not dirty %s\n",
1089*83c4dfe9Sjg 				    nvfdp->nvf_cache_path));
1090*83c4dfe9Sjg 				rw_exit(&nvfdp->nvf_lock);
1091*83c4dfe9Sjg 			}
1092*83c4dfe9Sjg 		}
1093*83c4dfe9Sjg 		mutex_exit(&nvf_cache_mutex);
1094*83c4dfe9Sjg 
1095*83c4dfe9Sjg 		/*
1096*83c4dfe9Sjg 		 * Now go through the dirty list
1097*83c4dfe9Sjg 		 */
1098*83c4dfe9Sjg 		for (nvfdp = list_head(&nvf_dirty_files);
1099*83c4dfe9Sjg 		    nvfdp; nvfdp = nextfdp) {
1100*83c4dfe9Sjg 			nextfdp = list_next(&nvf_dirty_files, nvfdp);
1101*83c4dfe9Sjg 
1102*83c4dfe9Sjg 			is_now_clean = 0;
1103*83c4dfe9Sjg 			rw_enter(&nvfdp->nvf_lock, RW_READER);
1104*83c4dfe9Sjg 			if (NVF_IS_DIRTY(nvfdp)) {
1105*83c4dfe9Sjg 				NVPDAEMON_DEBUG((CE_CONT,
1106*83c4dfe9Sjg 				    "nvpdaemon: flush %s\n",
1107*83c4dfe9Sjg 				    nvfdp->nvf_cache_path));
1108*83c4dfe9Sjg 				rw_exit(&nvfdp->nvf_lock);
1109*83c4dfe9Sjg 				rval = nvpflush_one(nvfdp);
1110*83c4dfe9Sjg 				rw_enter(&nvfdp->nvf_lock, RW_READER);
1111*83c4dfe9Sjg 				if (rval != DDI_SUCCESS ||
1112*83c4dfe9Sjg 				    NVF_IS_DIRTY(nvfdp)) {
1113*83c4dfe9Sjg 					rw_exit(&nvfdp->nvf_lock);
1114*83c4dfe9Sjg 					NVPDAEMON_DEBUG((CE_CONT,
1115*83c4dfe9Sjg 					    "nvpdaemon: %s dirty again\n",
1116*83c4dfe9Sjg 					    nvfdp->nvf_cache_path));
1117*83c4dfe9Sjg 					want_wakeup = 1;
1118*83c4dfe9Sjg 				} else {
1119*83c4dfe9Sjg 					rw_exit(&nvfdp->nvf_lock);
1120*83c4dfe9Sjg 					nvf_write_is_complete(nvfdp);
1121*83c4dfe9Sjg 					is_now_clean = 1;
1122*83c4dfe9Sjg 				}
1123*83c4dfe9Sjg 			} else {
1124*83c4dfe9Sjg 				NVPDAEMON_DEBUG((CE_CONT,
1125*83c4dfe9Sjg 				    "nvpdaemon: not dirty %s\n",
1126*83c4dfe9Sjg 				    nvfdp->nvf_cache_path));
1127*83c4dfe9Sjg 				rw_exit(&nvfdp->nvf_lock);
1128*83c4dfe9Sjg 				is_now_clean = 1;
1129*83c4dfe9Sjg 			}
1130*83c4dfe9Sjg 
1131*83c4dfe9Sjg 			if (is_now_clean) {
1132*83c4dfe9Sjg 				mutex_enter(&nvf_cache_mutex);
1133*83c4dfe9Sjg 				list_remove(&nvf_dirty_files, nvfdp);
1134*83c4dfe9Sjg 				list_insert_tail(&nvf_cache_files,
1135*83c4dfe9Sjg 				    nvfdp);
1136*83c4dfe9Sjg 				mutex_exit(&nvf_cache_mutex);
1137*83c4dfe9Sjg 			}
1138*83c4dfe9Sjg 		}
1139*83c4dfe9Sjg 
1140*83c4dfe9Sjg 		if (want_wakeup)
1141*83c4dfe9Sjg 			nvf_wake_daemon();
1142*83c4dfe9Sjg 
1143*83c4dfe9Sjg 		mutex_enter(&nvpflush_lock);
1144*83c4dfe9Sjg 		nvpbusy = 0;
1145*83c4dfe9Sjg 	}
1146*83c4dfe9Sjg }
1147