1*83c4dfe9Sjg /* 2*83c4dfe9Sjg * CDDL HEADER START 3*83c4dfe9Sjg * 4*83c4dfe9Sjg * The contents of this file are subject to the terms of the 5*83c4dfe9Sjg * Common Development and Distribution License (the "License"). 6*83c4dfe9Sjg * You may not use this file except in compliance with the License. 7*83c4dfe9Sjg * 8*83c4dfe9Sjg * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*83c4dfe9Sjg * or http://www.opensolaris.org/os/licensing. 10*83c4dfe9Sjg * See the License for the specific language governing permissions 11*83c4dfe9Sjg * and limitations under the License. 12*83c4dfe9Sjg * 13*83c4dfe9Sjg * When distributing Covered Code, include this CDDL HEADER in each 14*83c4dfe9Sjg * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*83c4dfe9Sjg * If applicable, add the following below this CDDL HEADER, with the 16*83c4dfe9Sjg * fields enclosed by brackets "[]" replaced with your own identifying 17*83c4dfe9Sjg * information: Portions Copyright [yyyy] [name of copyright owner] 18*83c4dfe9Sjg * 19*83c4dfe9Sjg * CDDL HEADER END 20*83c4dfe9Sjg */ 21*83c4dfe9Sjg /* 22*83c4dfe9Sjg * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23*83c4dfe9Sjg * Use is subject to license terms. 24*83c4dfe9Sjg */ 25*83c4dfe9Sjg 26*83c4dfe9Sjg #pragma ident "%Z%%M% %I% %E% SMI" 27*83c4dfe9Sjg 28*83c4dfe9Sjg #include <sys/note.h> 29*83c4dfe9Sjg #include <sys/t_lock.h> 30*83c4dfe9Sjg #include <sys/cmn_err.h> 31*83c4dfe9Sjg #include <sys/instance.h> 32*83c4dfe9Sjg #include <sys/conf.h> 33*83c4dfe9Sjg #include <sys/stat.h> 34*83c4dfe9Sjg #include <sys/ddi.h> 35*83c4dfe9Sjg #include <sys/hwconf.h> 36*83c4dfe9Sjg #include <sys/sunddi.h> 37*83c4dfe9Sjg #include <sys/sunndi.h> 38*83c4dfe9Sjg #include <sys/ddi_impldefs.h> 39*83c4dfe9Sjg #include <sys/ndi_impldefs.h> 40*83c4dfe9Sjg #include <sys/modctl.h> 41*83c4dfe9Sjg #include <sys/dacf.h> 42*83c4dfe9Sjg #include <sys/promif.h> 43*83c4dfe9Sjg #include <sys/cpuvar.h> 44*83c4dfe9Sjg #include <sys/pathname.h> 45*83c4dfe9Sjg #include <sys/kobj.h> 46*83c4dfe9Sjg #include <sys/devcache.h> 47*83c4dfe9Sjg #include <sys/devcache_impl.h> 48*83c4dfe9Sjg #include <sys/sysmacros.h> 49*83c4dfe9Sjg #include <sys/varargs.h> 50*83c4dfe9Sjg #include <sys/callb.h> 51*83c4dfe9Sjg 52*83c4dfe9Sjg /* 53*83c4dfe9Sjg * This facility provides interfaces to clients to register, 54*83c4dfe9Sjg * read and update cache data in persisted backing store files, 55*83c4dfe9Sjg * usually in /etc/devices. The data persisted through this 56*83c4dfe9Sjg * mechanism should be stateless data, functioning in the sense 57*83c4dfe9Sjg * of a cache. Writes are performed by a background daemon 58*83c4dfe9Sjg * thread, permitting a client to schedule an update without 59*83c4dfe9Sjg * blocking, then continue updating the data state in 60*83c4dfe9Sjg * parallel. The data is only locked by the daemon thread 61*83c4dfe9Sjg * to pack the data in preparation for the write. 62*83c4dfe9Sjg * 63*83c4dfe9Sjg * Data persisted through this mechanism should be capable 64*83c4dfe9Sjg * of being regenerated through normal system operation, 65*83c4dfe9Sjg * for example attaching all disk devices would cause all 66*83c4dfe9Sjg * devids to be registered for those devices. By caching 67*83c4dfe9Sjg * a devid-device tuple, the system can operate in a 68*83c4dfe9Sjg * more optimal way, directly attaching the device mapped 69*83c4dfe9Sjg * to a devid, rather than burdensomely driving attach of 70*83c4dfe9Sjg * the entire device tree to discover a single device. 71*83c4dfe9Sjg * 72*83c4dfe9Sjg * Note that a client should only need to include 73*83c4dfe9Sjg * <sys/devcache.h> for the supported interfaces. 74*83c4dfe9Sjg * 75*83c4dfe9Sjg * The data per client is entirely within the control of 76*83c4dfe9Sjg * the client. When reading, data unpacked from the backing 77*83c4dfe9Sjg * store should be inserted in the list. The pointer to 78*83c4dfe9Sjg * the list can be retreived via nvf_list(). When writing, 79*83c4dfe9Sjg * the data on the list is to be packed and returned to the 80*83c4dfe9Sjg * nvpdaemon as an nvlist. 81*83c4dfe9Sjg * 82*83c4dfe9Sjg * Obvious restrictions are imposed by the limits of the 83*83c4dfe9Sjg * nvlist format. The data cannot be read or written 84*83c4dfe9Sjg * piecemeal, and large amounts of data aren't recommended. 85*83c4dfe9Sjg * However, nvlists do allow that data be named and typed 86*83c4dfe9Sjg * and can be size-of-int invariant, and the cached data 87*83c4dfe9Sjg * can be versioned conveniently. 88*83c4dfe9Sjg * 89*83c4dfe9Sjg * The registration involves two steps: a handle is 90*83c4dfe9Sjg * allocated by calling the registration function. 91*83c4dfe9Sjg * This sets up the data referenced by the handle and 92*83c4dfe9Sjg * initializes the lock. Following registration, the 93*83c4dfe9Sjg * client must initialize the data list. The list 94*83c4dfe9Sjg * interfaces require that the list element with offset 95*83c4dfe9Sjg * to the node link be provided. The format of the 96*83c4dfe9Sjg * list element is under the control of the client. 97*83c4dfe9Sjg * 98*83c4dfe9Sjg * Locking: the address of the data list r/w lock provided 99*83c4dfe9Sjg * can be accessed with nvf_lock(). The lock must be held 100*83c4dfe9Sjg * as reader when traversing the list or checking state, 101*83c4dfe9Sjg * such as nvf_is_dirty(). The lock must be held as 102*83c4dfe9Sjg * writer when updating the list or marking it dirty. 103*83c4dfe9Sjg * The lock must not be held when waking the daemon. 104*83c4dfe9Sjg * 105*83c4dfe9Sjg * The data r/w lock is held as writer when the pack, 106*83c4dfe9Sjg * unpack and free list handlers are called. The 107*83c4dfe9Sjg * lock should not be dropped and must be still held 108*83c4dfe9Sjg * upon return. The client should also hold the lock 109*83c4dfe9Sjg * as reader when checking if the list is dirty, and 110*83c4dfe9Sjg * as writer when marking the list dirty or initiating 111*83c4dfe9Sjg * a read. 112*83c4dfe9Sjg * 113*83c4dfe9Sjg * The asynchronous nature of updates allows for the 114*83c4dfe9Sjg * possibility that the data may continue to be updated 115*83c4dfe9Sjg * once the daemon has been notified that an update is 116*83c4dfe9Sjg * desired. The data only needs to be locked against 117*83c4dfe9Sjg * updates when packing the data into the form to be 118*83c4dfe9Sjg * written. When the write of the packed data has 119*83c4dfe9Sjg * completed, the daemon will automatically reschedule 120*83c4dfe9Sjg * an update if the data was marked dirty after the 121*83c4dfe9Sjg * point at which it was packed. Before beginning an 122*83c4dfe9Sjg * update, the daemon attempts to lock the data as 123*83c4dfe9Sjg * writer; if the writer lock is already held, it 124*83c4dfe9Sjg * backs off and retries later. The model is to give 125*83c4dfe9Sjg * priority to the kernel processes generating the 126*83c4dfe9Sjg * data, and that the nature of the data is that 127*83c4dfe9Sjg * it does not change often, can be re-generated when 128*83c4dfe9Sjg * needed, so updates should not happen often and 129*83c4dfe9Sjg * can be delayed until the data stops changing. 130*83c4dfe9Sjg * The client may update the list or mark it dirty 131*83c4dfe9Sjg * any time it is able to acquire the lock as 132*83c4dfe9Sjg * writer first. 133*83c4dfe9Sjg * 134*83c4dfe9Sjg * A failed write will be retried after some delay, 135*83c4dfe9Sjg * in the hope that the cause of the error will be 136*83c4dfe9Sjg * transient, for example a filesystem with no space 137*83c4dfe9Sjg * available. An update on a read-only filesystem 138*83c4dfe9Sjg * is failed silently and not retried; this would be 139*83c4dfe9Sjg * the case when booted off install media. 140*83c4dfe9Sjg * 141*83c4dfe9Sjg * There is no unregister mechanism as of yet, as it 142*83c4dfe9Sjg * hasn't been needed so far. 143*83c4dfe9Sjg */ 144*83c4dfe9Sjg 145*83c4dfe9Sjg /* 146*83c4dfe9Sjg * Global list of files registered and updated by the nvpflush 147*83c4dfe9Sjg * daemon, protected by the nvf_cache_mutex. While an 148*83c4dfe9Sjg * update is taking place, a file is temporarily moved to 149*83c4dfe9Sjg * the dirty list to avoid locking the primary list for 150*83c4dfe9Sjg * the duration of the update. 151*83c4dfe9Sjg */ 152*83c4dfe9Sjg list_t nvf_cache_files; 153*83c4dfe9Sjg list_t nvf_dirty_files; 154*83c4dfe9Sjg kmutex_t nvf_cache_mutex; 155*83c4dfe9Sjg 156*83c4dfe9Sjg 157*83c4dfe9Sjg /* 158*83c4dfe9Sjg * Allow some delay from an update of the data before flushing 159*83c4dfe9Sjg * to permit simultaneous updates of multiple changes. 160*83c4dfe9Sjg * Changes in the data are expected to be bursty, ie 161*83c4dfe9Sjg * reconfig or hot-plug of a new adapter. 162*83c4dfe9Sjg * 163*83c4dfe9Sjg * kfio_report_error (default 0) 164*83c4dfe9Sjg * Set to 1 to enable some error messages related to low-level 165*83c4dfe9Sjg * kernel file i/o operations. 166*83c4dfe9Sjg * 167*83c4dfe9Sjg * nvpflush_delay (default 10) 168*83c4dfe9Sjg * The number of seconds after data is marked dirty before the 169*83c4dfe9Sjg * flush daemon is triggered to flush the data. A longer period 170*83c4dfe9Sjg * of time permits more data updates per write. Note that 171*83c4dfe9Sjg * every update resets the timer so no repository write will 172*83c4dfe9Sjg * occur while data is being updated continuously. 173*83c4dfe9Sjg * 174*83c4dfe9Sjg * nvpdaemon_idle_time (default 60) 175*83c4dfe9Sjg * The number of seconds the daemon will sleep idle before exiting. 176*83c4dfe9Sjg * 177*83c4dfe9Sjg */ 178*83c4dfe9Sjg #define NVPFLUSH_DELAY 10 179*83c4dfe9Sjg #define NVPDAEMON_IDLE_TIME 60 180*83c4dfe9Sjg 181*83c4dfe9Sjg #define TICKS_PER_SECOND (drv_usectohz(1000000)) 182*83c4dfe9Sjg 183*83c4dfe9Sjg /* 184*83c4dfe9Sjg * Tunables 185*83c4dfe9Sjg */ 186*83c4dfe9Sjg int kfio_report_error = 0; /* kernel file i/o operations */ 187*83c4dfe9Sjg int kfio_disable_read = 0; /* disable all reads */ 188*83c4dfe9Sjg int kfio_disable_write = 0; /* disable all writes */ 189*83c4dfe9Sjg 190*83c4dfe9Sjg int nvpflush_delay = NVPFLUSH_DELAY; 191*83c4dfe9Sjg int nvpdaemon_idle_time = NVPDAEMON_IDLE_TIME; 192*83c4dfe9Sjg 193*83c4dfe9Sjg static timeout_id_t nvpflush_id = 0; 194*83c4dfe9Sjg static int nvpflush_timer_busy = 0; 195*83c4dfe9Sjg static int nvpflush_daemon_active = 0; 196*83c4dfe9Sjg static kthread_t *nvpflush_thr_id = 0; 197*83c4dfe9Sjg 198*83c4dfe9Sjg static int do_nvpflush = 0; 199*83c4dfe9Sjg static int nvpbusy = 0; 200*83c4dfe9Sjg static kmutex_t nvpflush_lock; 201*83c4dfe9Sjg static kcondvar_t nvpflush_cv; 202*83c4dfe9Sjg static kthread_id_t nvpflush_thread; 203*83c4dfe9Sjg static clock_t nvpticks; 204*83c4dfe9Sjg 205*83c4dfe9Sjg static void nvpflush_daemon(void); 206*83c4dfe9Sjg 207*83c4dfe9Sjg #ifdef DEBUG 208*83c4dfe9Sjg int nvpdaemon_debug = 0; 209*83c4dfe9Sjg int kfio_debug = 0; 210*83c4dfe9Sjg #endif /* DEBUG */ 211*83c4dfe9Sjg 212*83c4dfe9Sjg extern int modrootloaded; 213*83c4dfe9Sjg extern void mdi_read_devices_files(void); 214*83c4dfe9Sjg extern void mdi_clean_vhcache(void); 215*83c4dfe9Sjg 216*83c4dfe9Sjg /* 217*83c4dfe9Sjg * Initialize the overall cache file management 218*83c4dfe9Sjg */ 219*83c4dfe9Sjg void 220*83c4dfe9Sjg i_ddi_devices_init(void) 221*83c4dfe9Sjg { 222*83c4dfe9Sjg list_create(&nvf_cache_files, sizeof (nvfd_t), 223*83c4dfe9Sjg offsetof(nvfd_t, nvf_link)); 224*83c4dfe9Sjg list_create(&nvf_dirty_files, sizeof (nvfd_t), 225*83c4dfe9Sjg offsetof(nvfd_t, nvf_link)); 226*83c4dfe9Sjg mutex_init(&nvf_cache_mutex, NULL, MUTEX_DEFAULT, NULL); 227*83c4dfe9Sjg devid_cache_init(); 228*83c4dfe9Sjg } 229*83c4dfe9Sjg 230*83c4dfe9Sjg /* 231*83c4dfe9Sjg * Read cache files 232*83c4dfe9Sjg * The files read here should be restricted to those 233*83c4dfe9Sjg * that may be required to mount root. 234*83c4dfe9Sjg */ 235*83c4dfe9Sjg void 236*83c4dfe9Sjg i_ddi_read_devices_files(void) 237*83c4dfe9Sjg { 238*83c4dfe9Sjg if (!kfio_disable_read) { 239*83c4dfe9Sjg mdi_read_devices_files(); 240*83c4dfe9Sjg devid_cache_read(); 241*83c4dfe9Sjg } 242*83c4dfe9Sjg } 243*83c4dfe9Sjg 244*83c4dfe9Sjg void 245*83c4dfe9Sjg i_ddi_start_flush_daemon(void) 246*83c4dfe9Sjg { 247*83c4dfe9Sjg nvfd_t *nvfdp; 248*83c4dfe9Sjg 249*83c4dfe9Sjg ASSERT(i_ddi_io_initialized()); 250*83c4dfe9Sjg 251*83c4dfe9Sjg mutex_init(&nvpflush_lock, NULL, MUTEX_DRIVER, NULL); 252*83c4dfe9Sjg cv_init(&nvpflush_cv, NULL, CV_DRIVER, NULL); 253*83c4dfe9Sjg 254*83c4dfe9Sjg mutex_enter(&nvf_cache_mutex); 255*83c4dfe9Sjg for (nvfdp = list_head(&nvf_cache_files); nvfdp; 256*83c4dfe9Sjg nvfdp = list_next(&nvf_cache_files, nvfdp)) { 257*83c4dfe9Sjg if (NVF_IS_DIRTY(nvfdp)) { 258*83c4dfe9Sjg nvf_wake_daemon(); 259*83c4dfe9Sjg break; 260*83c4dfe9Sjg } 261*83c4dfe9Sjg } 262*83c4dfe9Sjg mutex_exit(&nvf_cache_mutex); 263*83c4dfe9Sjg } 264*83c4dfe9Sjg 265*83c4dfe9Sjg void 266*83c4dfe9Sjg i_ddi_clean_devices_files(void) 267*83c4dfe9Sjg { 268*83c4dfe9Sjg devid_cache_cleanup(); 269*83c4dfe9Sjg mdi_clean_vhcache(); 270*83c4dfe9Sjg } 271*83c4dfe9Sjg 272*83c4dfe9Sjg /* 273*83c4dfe9Sjg * Register a cache file to be managed and updated by the nvpflush daemon. 274*83c4dfe9Sjg * All operations are performed through the returned handle. 275*83c4dfe9Sjg * There is no unregister mechanism for now. 276*83c4dfe9Sjg */ 277*83c4dfe9Sjg nvf_handle_t 278*83c4dfe9Sjg nvf_register_file(nvf_ops_t *ops) 279*83c4dfe9Sjg { 280*83c4dfe9Sjg nvfd_t *nvfdp; 281*83c4dfe9Sjg 282*83c4dfe9Sjg nvfdp = kmem_zalloc(sizeof (*nvfdp), KM_SLEEP); 283*83c4dfe9Sjg 284*83c4dfe9Sjg nvfdp->nvf_ops = ops; 285*83c4dfe9Sjg nvfdp->nvf_flags = 0; 286*83c4dfe9Sjg rw_init(&nvfdp->nvf_lock, NULL, RW_DRIVER, NULL); 287*83c4dfe9Sjg 288*83c4dfe9Sjg mutex_enter(&nvf_cache_mutex); 289*83c4dfe9Sjg list_insert_tail(&nvf_cache_files, nvfdp); 290*83c4dfe9Sjg mutex_exit(&nvf_cache_mutex); 291*83c4dfe9Sjg 292*83c4dfe9Sjg return ((nvf_handle_t)nvfdp); 293*83c4dfe9Sjg } 294*83c4dfe9Sjg 295*83c4dfe9Sjg /*PRINTFLIKE1*/ 296*83c4dfe9Sjg void 297*83c4dfe9Sjg nvf_error(const char *fmt, ...) 298*83c4dfe9Sjg { 299*83c4dfe9Sjg va_list ap; 300*83c4dfe9Sjg 301*83c4dfe9Sjg if (kfio_report_error) { 302*83c4dfe9Sjg va_start(ap, fmt); 303*83c4dfe9Sjg vcmn_err(CE_NOTE, fmt, ap); 304*83c4dfe9Sjg va_end(ap); 305*83c4dfe9Sjg } 306*83c4dfe9Sjg } 307*83c4dfe9Sjg 308*83c4dfe9Sjg /* 309*83c4dfe9Sjg * Some operations clients may use to manage the data 310*83c4dfe9Sjg * to be persisted in a cache file. 311*83c4dfe9Sjg */ 312*83c4dfe9Sjg char * 313*83c4dfe9Sjg nvf_cache_name(nvf_handle_t handle) 314*83c4dfe9Sjg { 315*83c4dfe9Sjg return (((nvfd_t *)handle)->nvf_cache_path); 316*83c4dfe9Sjg } 317*83c4dfe9Sjg 318*83c4dfe9Sjg krwlock_t * 319*83c4dfe9Sjg nvf_lock(nvf_handle_t handle) 320*83c4dfe9Sjg { 321*83c4dfe9Sjg return (&(((nvfd_t *)handle)->nvf_lock)); 322*83c4dfe9Sjg } 323*83c4dfe9Sjg 324*83c4dfe9Sjg list_t * 325*83c4dfe9Sjg nvf_list(nvf_handle_t handle) 326*83c4dfe9Sjg { 327*83c4dfe9Sjg return (&(((nvfd_t *)handle)->nvf_data_list)); 328*83c4dfe9Sjg } 329*83c4dfe9Sjg 330*83c4dfe9Sjg void 331*83c4dfe9Sjg nvf_mark_dirty(nvf_handle_t handle) 332*83c4dfe9Sjg { 333*83c4dfe9Sjg ASSERT(RW_WRITE_HELD(&(((nvfd_t *)handle)->nvf_lock))); 334*83c4dfe9Sjg NVF_MARK_DIRTY((nvfd_t *)handle); 335*83c4dfe9Sjg } 336*83c4dfe9Sjg 337*83c4dfe9Sjg int 338*83c4dfe9Sjg nvf_is_dirty(nvf_handle_t handle) 339*83c4dfe9Sjg { 340*83c4dfe9Sjg ASSERT(RW_LOCK_HELD(&(((nvfd_t *)handle)->nvf_lock))); 341*83c4dfe9Sjg return (NVF_IS_DIRTY((nvfd_t *)handle)); 342*83c4dfe9Sjg } 343*83c4dfe9Sjg 344*83c4dfe9Sjg static uint16_t 345*83c4dfe9Sjg nvp_cksum(uchar_t *buf, int64_t buflen) 346*83c4dfe9Sjg { 347*83c4dfe9Sjg uint16_t cksum = 0; 348*83c4dfe9Sjg uint16_t *p = (uint16_t *)buf; 349*83c4dfe9Sjg int64_t n; 350*83c4dfe9Sjg 351*83c4dfe9Sjg if ((buflen & 0x01) != 0) { 352*83c4dfe9Sjg buflen--; 353*83c4dfe9Sjg cksum = buf[buflen]; 354*83c4dfe9Sjg } 355*83c4dfe9Sjg n = buflen / 2; 356*83c4dfe9Sjg while (n-- > 0) 357*83c4dfe9Sjg cksum ^= *p++; 358*83c4dfe9Sjg return (cksum); 359*83c4dfe9Sjg } 360*83c4dfe9Sjg 361*83c4dfe9Sjg int 362*83c4dfe9Sjg fread_nvlist(char *filename, nvlist_t **ret_nvlist) 363*83c4dfe9Sjg { 364*83c4dfe9Sjg struct _buf *file; 365*83c4dfe9Sjg nvpf_hdr_t hdr; 366*83c4dfe9Sjg char *buf; 367*83c4dfe9Sjg nvlist_t *nvl; 368*83c4dfe9Sjg int rval; 369*83c4dfe9Sjg uint_t offset; 370*83c4dfe9Sjg int n; 371*83c4dfe9Sjg char c; 372*83c4dfe9Sjg uint16_t cksum, hdrsum; 373*83c4dfe9Sjg 374*83c4dfe9Sjg *ret_nvlist = NULL; 375*83c4dfe9Sjg 376*83c4dfe9Sjg file = kobj_open_file(filename); 377*83c4dfe9Sjg if (file == (struct _buf *)-1) { 378*83c4dfe9Sjg KFDEBUG((CE_CONT, "cannot open file: %s\n", filename)); 379*83c4dfe9Sjg return (ENOENT); 380*83c4dfe9Sjg } 381*83c4dfe9Sjg 382*83c4dfe9Sjg offset = 0; 383*83c4dfe9Sjg n = kobj_read_file(file, (char *)&hdr, sizeof (hdr), offset); 384*83c4dfe9Sjg if (n != sizeof (hdr)) { 385*83c4dfe9Sjg kobj_close_file(file); 386*83c4dfe9Sjg if (n < 0) { 387*83c4dfe9Sjg nvf_error("error reading header: %s\n", filename); 388*83c4dfe9Sjg return (EIO); 389*83c4dfe9Sjg } else if (n == 0) { 390*83c4dfe9Sjg KFDEBUG((CE_CONT, "file empty: %s\n", filename)); 391*83c4dfe9Sjg } else { 392*83c4dfe9Sjg nvf_error("header size incorrect: %s\n", filename); 393*83c4dfe9Sjg } 394*83c4dfe9Sjg return (EINVAL); 395*83c4dfe9Sjg } 396*83c4dfe9Sjg offset += n; 397*83c4dfe9Sjg 398*83c4dfe9Sjg KFDEBUG2((CE_CONT, "nvpf_magic: 0x%x\n", hdr.nvpf_magic)); 399*83c4dfe9Sjg KFDEBUG2((CE_CONT, "nvpf_version: %d\n", hdr.nvpf_version)); 400*83c4dfe9Sjg KFDEBUG2((CE_CONT, "nvpf_size: %lld\n", 401*83c4dfe9Sjg (longlong_t)hdr.nvpf_size)); 402*83c4dfe9Sjg KFDEBUG2((CE_CONT, "nvpf_hdr_chksum: 0x%x\n", 403*83c4dfe9Sjg hdr.nvpf_hdr_chksum)); 404*83c4dfe9Sjg KFDEBUG2((CE_CONT, "nvpf_chksum: 0x%x\n", hdr.nvpf_chksum)); 405*83c4dfe9Sjg 406*83c4dfe9Sjg cksum = hdr.nvpf_hdr_chksum; 407*83c4dfe9Sjg hdr.nvpf_hdr_chksum = 0; 408*83c4dfe9Sjg hdrsum = nvp_cksum((uchar_t *)&hdr, sizeof (hdr)); 409*83c4dfe9Sjg 410*83c4dfe9Sjg if (hdr.nvpf_magic != NVPF_HDR_MAGIC || 411*83c4dfe9Sjg hdr.nvpf_version != NVPF_HDR_VERSION || hdrsum != cksum) { 412*83c4dfe9Sjg kobj_close_file(file); 413*83c4dfe9Sjg if (hdrsum != cksum) { 414*83c4dfe9Sjg nvf_error("%s: checksum error " 415*83c4dfe9Sjg "(actual 0x%x, expected 0x%x)\n", 416*83c4dfe9Sjg filename, hdrsum, cksum); 417*83c4dfe9Sjg } 418*83c4dfe9Sjg nvf_error("%s: header information incorrect", filename); 419*83c4dfe9Sjg return (EINVAL); 420*83c4dfe9Sjg } 421*83c4dfe9Sjg 422*83c4dfe9Sjg ASSERT(hdr.nvpf_size >= 0); 423*83c4dfe9Sjg 424*83c4dfe9Sjg buf = kmem_alloc(hdr.nvpf_size, KM_SLEEP); 425*83c4dfe9Sjg n = kobj_read_file(file, buf, hdr.nvpf_size, offset); 426*83c4dfe9Sjg if (n != hdr.nvpf_size) { 427*83c4dfe9Sjg kmem_free(buf, hdr.nvpf_size); 428*83c4dfe9Sjg kobj_close_file(file); 429*83c4dfe9Sjg if (n < 0) { 430*83c4dfe9Sjg nvf_error("%s: read error %d", filename, n); 431*83c4dfe9Sjg } else { 432*83c4dfe9Sjg nvf_error("%s: incomplete read %d/%lld", 433*83c4dfe9Sjg filename, n, (longlong_t)hdr.nvpf_size); 434*83c4dfe9Sjg } 435*83c4dfe9Sjg return (EINVAL); 436*83c4dfe9Sjg } 437*83c4dfe9Sjg offset += n; 438*83c4dfe9Sjg 439*83c4dfe9Sjg rval = kobj_read_file(file, &c, 1, offset); 440*83c4dfe9Sjg kobj_close_file(file); 441*83c4dfe9Sjg if (rval > 0) { 442*83c4dfe9Sjg nvf_error("%s is larger than %lld\n", 443*83c4dfe9Sjg filename, (longlong_t)hdr.nvpf_size); 444*83c4dfe9Sjg kmem_free(buf, hdr.nvpf_size); 445*83c4dfe9Sjg return (EINVAL); 446*83c4dfe9Sjg } 447*83c4dfe9Sjg 448*83c4dfe9Sjg cksum = nvp_cksum((uchar_t *)buf, hdr.nvpf_size); 449*83c4dfe9Sjg if (hdr.nvpf_chksum != cksum) { 450*83c4dfe9Sjg nvf_error("%s: checksum error (actual 0x%x, expected 0x%x)\n", 451*83c4dfe9Sjg filename, hdr.nvpf_chksum, cksum); 452*83c4dfe9Sjg kmem_free(buf, hdr.nvpf_size); 453*83c4dfe9Sjg return (EINVAL); 454*83c4dfe9Sjg } 455*83c4dfe9Sjg 456*83c4dfe9Sjg nvl = NULL; 457*83c4dfe9Sjg rval = nvlist_unpack(buf, hdr.nvpf_size, &nvl, 0); 458*83c4dfe9Sjg if (rval != 0) { 459*83c4dfe9Sjg nvf_error("%s: error %d unpacking nvlist\n", 460*83c4dfe9Sjg filename, rval); 461*83c4dfe9Sjg kmem_free(buf, hdr.nvpf_size); 462*83c4dfe9Sjg return (EINVAL); 463*83c4dfe9Sjg } 464*83c4dfe9Sjg 465*83c4dfe9Sjg kmem_free(buf, hdr.nvpf_size); 466*83c4dfe9Sjg *ret_nvlist = nvl; 467*83c4dfe9Sjg return (0); 468*83c4dfe9Sjg } 469*83c4dfe9Sjg 470*83c4dfe9Sjg static int 471*83c4dfe9Sjg kfcreate(char *filename, kfile_t **kfilep) 472*83c4dfe9Sjg { 473*83c4dfe9Sjg kfile_t *fp; 474*83c4dfe9Sjg int rval; 475*83c4dfe9Sjg 476*83c4dfe9Sjg ASSERT(modrootloaded); 477*83c4dfe9Sjg 478*83c4dfe9Sjg fp = kmem_alloc(sizeof (kfile_t), KM_SLEEP); 479*83c4dfe9Sjg 480*83c4dfe9Sjg fp->kf_vnflags = FCREAT | FWRITE | FTRUNC; 481*83c4dfe9Sjg fp->kf_fname = filename; 482*83c4dfe9Sjg fp->kf_fpos = 0; 483*83c4dfe9Sjg fp->kf_state = 0; 484*83c4dfe9Sjg 485*83c4dfe9Sjg KFDEBUG((CE_CONT, "create: %s flags 0x%x\n", 486*83c4dfe9Sjg filename, fp->kf_vnflags)); 487*83c4dfe9Sjg rval = vn_open(filename, UIO_SYSSPACE, fp->kf_vnflags, 488*83c4dfe9Sjg 0444, &fp->kf_vp, CRCREAT, 0); 489*83c4dfe9Sjg if (rval != 0) { 490*83c4dfe9Sjg kmem_free(fp, sizeof (kfile_t)); 491*83c4dfe9Sjg KFDEBUG((CE_CONT, "%s: create error %d\n", 492*83c4dfe9Sjg filename, rval)); 493*83c4dfe9Sjg return (rval); 494*83c4dfe9Sjg } 495*83c4dfe9Sjg 496*83c4dfe9Sjg *kfilep = fp; 497*83c4dfe9Sjg return (0); 498*83c4dfe9Sjg } 499*83c4dfe9Sjg 500*83c4dfe9Sjg static int 501*83c4dfe9Sjg kfremove(char *filename) 502*83c4dfe9Sjg { 503*83c4dfe9Sjg int rval; 504*83c4dfe9Sjg 505*83c4dfe9Sjg KFDEBUG((CE_CONT, "remove: %s\n", filename)); 506*83c4dfe9Sjg rval = vn_remove(filename, UIO_SYSSPACE, RMFILE); 507*83c4dfe9Sjg if (rval != 0) { 508*83c4dfe9Sjg KFDEBUG((CE_CONT, "%s: remove error %d\n", 509*83c4dfe9Sjg filename, rval)); 510*83c4dfe9Sjg } 511*83c4dfe9Sjg return (rval); 512*83c4dfe9Sjg } 513*83c4dfe9Sjg 514*83c4dfe9Sjg static int 515*83c4dfe9Sjg kfread(kfile_t *fp, char *buf, ssize_t bufsiz, ssize_t *ret_n) 516*83c4dfe9Sjg { 517*83c4dfe9Sjg ssize_t resid; 518*83c4dfe9Sjg int err; 519*83c4dfe9Sjg ssize_t n; 520*83c4dfe9Sjg 521*83c4dfe9Sjg ASSERT(modrootloaded); 522*83c4dfe9Sjg 523*83c4dfe9Sjg if (fp->kf_state != 0) 524*83c4dfe9Sjg return (fp->kf_state); 525*83c4dfe9Sjg 526*83c4dfe9Sjg err = vn_rdwr(UIO_READ, fp->kf_vp, buf, bufsiz, fp->kf_fpos, 527*83c4dfe9Sjg UIO_SYSSPACE, 0, (rlim64_t)0, kcred, &resid); 528*83c4dfe9Sjg if (err != 0) { 529*83c4dfe9Sjg KFDEBUG((CE_CONT, "%s: read error %d\n", 530*83c4dfe9Sjg fp->kf_fname, err)); 531*83c4dfe9Sjg fp->kf_state = err; 532*83c4dfe9Sjg return (err); 533*83c4dfe9Sjg } 534*83c4dfe9Sjg 535*83c4dfe9Sjg ASSERT(resid >= 0 && resid <= bufsiz); 536*83c4dfe9Sjg n = bufsiz - resid; 537*83c4dfe9Sjg 538*83c4dfe9Sjg KFDEBUG1((CE_CONT, "%s: read %ld bytes ok %ld bufsiz, %ld resid\n", 539*83c4dfe9Sjg fp->kf_fname, n, bufsiz, resid)); 540*83c4dfe9Sjg 541*83c4dfe9Sjg fp->kf_fpos += n; 542*83c4dfe9Sjg *ret_n = n; 543*83c4dfe9Sjg return (0); 544*83c4dfe9Sjg } 545*83c4dfe9Sjg 546*83c4dfe9Sjg static int 547*83c4dfe9Sjg kfwrite(kfile_t *fp, char *buf, ssize_t bufsiz, ssize_t *ret_n) 548*83c4dfe9Sjg { 549*83c4dfe9Sjg rlim64_t rlimit; 550*83c4dfe9Sjg ssize_t resid; 551*83c4dfe9Sjg int err; 552*83c4dfe9Sjg ssize_t len; 553*83c4dfe9Sjg ssize_t n = 0; 554*83c4dfe9Sjg 555*83c4dfe9Sjg ASSERT(modrootloaded); 556*83c4dfe9Sjg 557*83c4dfe9Sjg if (fp->kf_state != 0) 558*83c4dfe9Sjg return (fp->kf_state); 559*83c4dfe9Sjg 560*83c4dfe9Sjg len = bufsiz; 561*83c4dfe9Sjg rlimit = bufsiz + 1; 562*83c4dfe9Sjg for (;;) { 563*83c4dfe9Sjg err = vn_rdwr(UIO_WRITE, fp->kf_vp, buf, len, fp->kf_fpos, 564*83c4dfe9Sjg UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid); 565*83c4dfe9Sjg if (err) { 566*83c4dfe9Sjg KFDEBUG((CE_CONT, "%s: write error %d\n", 567*83c4dfe9Sjg fp->kf_fname, err)); 568*83c4dfe9Sjg fp->kf_state = err; 569*83c4dfe9Sjg return (err); 570*83c4dfe9Sjg } 571*83c4dfe9Sjg 572*83c4dfe9Sjg KFDEBUG1((CE_CONT, "%s: write %ld bytes ok %ld resid\n", 573*83c4dfe9Sjg fp->kf_fname, len-resid, resid)); 574*83c4dfe9Sjg 575*83c4dfe9Sjg ASSERT(resid >= 0 && resid <= len); 576*83c4dfe9Sjg 577*83c4dfe9Sjg n += (len - resid); 578*83c4dfe9Sjg if (resid == 0) 579*83c4dfe9Sjg break; 580*83c4dfe9Sjg 581*83c4dfe9Sjg if (resid == len) { 582*83c4dfe9Sjg KFDEBUG((CE_CONT, "%s: filesystem full?\n", 583*83c4dfe9Sjg fp->kf_fname)); 584*83c4dfe9Sjg fp->kf_state = ENOSPC; 585*83c4dfe9Sjg return (ENOSPC); 586*83c4dfe9Sjg } 587*83c4dfe9Sjg 588*83c4dfe9Sjg len -= resid; 589*83c4dfe9Sjg buf += len; 590*83c4dfe9Sjg fp->kf_fpos += len; 591*83c4dfe9Sjg len = resid; 592*83c4dfe9Sjg } 593*83c4dfe9Sjg 594*83c4dfe9Sjg ASSERT(n == bufsiz); 595*83c4dfe9Sjg KFDEBUG1((CE_CONT, "%s: wrote %ld bytes ok\n", fp->kf_fname, n)); 596*83c4dfe9Sjg 597*83c4dfe9Sjg *ret_n = n; 598*83c4dfe9Sjg return (0); 599*83c4dfe9Sjg } 600*83c4dfe9Sjg 601*83c4dfe9Sjg 602*83c4dfe9Sjg static int 603*83c4dfe9Sjg kfclose(kfile_t *fp) 604*83c4dfe9Sjg { 605*83c4dfe9Sjg int rval; 606*83c4dfe9Sjg 607*83c4dfe9Sjg KFDEBUG((CE_CONT, "close: %s\n", fp->kf_fname)); 608*83c4dfe9Sjg 609*83c4dfe9Sjg if ((fp->kf_vnflags & FWRITE) && fp->kf_state == 0) { 610*83c4dfe9Sjg rval = VOP_FSYNC(fp->kf_vp, FSYNC, kcred); 611*83c4dfe9Sjg if (rval != 0) { 612*83c4dfe9Sjg nvf_error("%s: sync error %d\n", 613*83c4dfe9Sjg fp->kf_fname, rval); 614*83c4dfe9Sjg } 615*83c4dfe9Sjg KFDEBUG((CE_CONT, "%s: sync ok\n", fp->kf_fname)); 616*83c4dfe9Sjg } 617*83c4dfe9Sjg 618*83c4dfe9Sjg rval = VOP_CLOSE(fp->kf_vp, fp->kf_vnflags, 1, (offset_t)0, kcred); 619*83c4dfe9Sjg if (rval != 0) { 620*83c4dfe9Sjg if (fp->kf_state == 0) { 621*83c4dfe9Sjg nvf_error("%s: close error %d\n", 622*83c4dfe9Sjg fp->kf_fname, rval); 623*83c4dfe9Sjg } 624*83c4dfe9Sjg } else { 625*83c4dfe9Sjg if (fp->kf_state == 0) 626*83c4dfe9Sjg KFDEBUG((CE_CONT, "%s: close ok\n", fp->kf_fname)); 627*83c4dfe9Sjg } 628*83c4dfe9Sjg 629*83c4dfe9Sjg VN_RELE(fp->kf_vp); 630*83c4dfe9Sjg kmem_free(fp, sizeof (kfile_t)); 631*83c4dfe9Sjg return (rval); 632*83c4dfe9Sjg } 633*83c4dfe9Sjg 634*83c4dfe9Sjg static int 635*83c4dfe9Sjg kfrename(char *oldname, char *newname) 636*83c4dfe9Sjg { 637*83c4dfe9Sjg int rval; 638*83c4dfe9Sjg 639*83c4dfe9Sjg ASSERT(modrootloaded); 640*83c4dfe9Sjg 641*83c4dfe9Sjg KFDEBUG((CE_CONT, "renaming %s to %s\n", oldname, newname)); 642*83c4dfe9Sjg 643*83c4dfe9Sjg if ((rval = vn_rename(oldname, newname, UIO_SYSSPACE)) != 0) { 644*83c4dfe9Sjg KFDEBUG((CE_CONT, "rename %s to %s: %d\n", 645*83c4dfe9Sjg oldname, newname, rval)); 646*83c4dfe9Sjg } 647*83c4dfe9Sjg 648*83c4dfe9Sjg return (rval); 649*83c4dfe9Sjg } 650*83c4dfe9Sjg 651*83c4dfe9Sjg int 652*83c4dfe9Sjg fwrite_nvlist(char *filename, nvlist_t *nvl) 653*83c4dfe9Sjg { 654*83c4dfe9Sjg char *buf; 655*83c4dfe9Sjg char *nvbuf; 656*83c4dfe9Sjg kfile_t *fp; 657*83c4dfe9Sjg char *newname; 658*83c4dfe9Sjg int len, err, err1; 659*83c4dfe9Sjg size_t buflen; 660*83c4dfe9Sjg ssize_t n; 661*83c4dfe9Sjg 662*83c4dfe9Sjg ASSERT(modrootloaded); 663*83c4dfe9Sjg 664*83c4dfe9Sjg nvbuf = NULL; 665*83c4dfe9Sjg err = nvlist_pack(nvl, &nvbuf, &buflen, NV_ENCODE_NATIVE, 0); 666*83c4dfe9Sjg if (err != 0) { 667*83c4dfe9Sjg nvf_error("%s: error %d packing nvlist\n", 668*83c4dfe9Sjg filename, err); 669*83c4dfe9Sjg return (err); 670*83c4dfe9Sjg } 671*83c4dfe9Sjg 672*83c4dfe9Sjg buf = kmem_alloc(sizeof (nvpf_hdr_t) + buflen, KM_SLEEP); 673*83c4dfe9Sjg bzero(buf, sizeof (nvpf_hdr_t)); 674*83c4dfe9Sjg 675*83c4dfe9Sjg ((nvpf_hdr_t *)buf)->nvpf_magic = NVPF_HDR_MAGIC; 676*83c4dfe9Sjg ((nvpf_hdr_t *)buf)->nvpf_version = NVPF_HDR_VERSION; 677*83c4dfe9Sjg ((nvpf_hdr_t *)buf)->nvpf_size = buflen; 678*83c4dfe9Sjg ((nvpf_hdr_t *)buf)->nvpf_chksum = nvp_cksum((uchar_t *)nvbuf, buflen); 679*83c4dfe9Sjg ((nvpf_hdr_t *)buf)->nvpf_hdr_chksum = 680*83c4dfe9Sjg nvp_cksum((uchar_t *)buf, sizeof (nvpf_hdr_t)); 681*83c4dfe9Sjg 682*83c4dfe9Sjg bcopy(nvbuf, buf + sizeof (nvpf_hdr_t), buflen); 683*83c4dfe9Sjg kmem_free(nvbuf, buflen); 684*83c4dfe9Sjg buflen += sizeof (nvpf_hdr_t); 685*83c4dfe9Sjg 686*83c4dfe9Sjg len = strlen(filename) + MAX_SUFFIX_LEN + 2; 687*83c4dfe9Sjg newname = kmem_alloc(len, KM_SLEEP); 688*83c4dfe9Sjg 689*83c4dfe9Sjg 690*83c4dfe9Sjg (void) sprintf(newname, "%s.%s", 691*83c4dfe9Sjg filename, NEW_FILENAME_SUFFIX); 692*83c4dfe9Sjg 693*83c4dfe9Sjg /* 694*83c4dfe9Sjg * To make it unlikely we suffer data loss, write 695*83c4dfe9Sjg * data to the new temporary file. Once successful 696*83c4dfe9Sjg * complete the transaction by renaming the new file 697*83c4dfe9Sjg * to replace the previous. 698*83c4dfe9Sjg */ 699*83c4dfe9Sjg 700*83c4dfe9Sjg if ((err = kfcreate(newname, &fp)) == 0) { 701*83c4dfe9Sjg err = kfwrite(fp, buf, buflen, &n); 702*83c4dfe9Sjg if (err) { 703*83c4dfe9Sjg nvf_error("%s: write error - %d\n", 704*83c4dfe9Sjg newname, err); 705*83c4dfe9Sjg } else { 706*83c4dfe9Sjg if (n != buflen) { 707*83c4dfe9Sjg nvf_error( 708*83c4dfe9Sjg "%s: partial write %ld of %ld bytes\n", 709*83c4dfe9Sjg newname, n, buflen); 710*83c4dfe9Sjg nvf_error("%s: filesystem may be full?\n", 711*83c4dfe9Sjg newname); 712*83c4dfe9Sjg err = EIO; 713*83c4dfe9Sjg } 714*83c4dfe9Sjg } 715*83c4dfe9Sjg if ((err1 = kfclose(fp)) != 0) { 716*83c4dfe9Sjg nvf_error("%s: close error\n", newname); 717*83c4dfe9Sjg if (err == 0) 718*83c4dfe9Sjg err = err1; 719*83c4dfe9Sjg } 720*83c4dfe9Sjg if (err != 0) { 721*83c4dfe9Sjg if (kfremove(newname) != 0) { 722*83c4dfe9Sjg nvf_error("%s: remove failed\n", 723*83c4dfe9Sjg newname); 724*83c4dfe9Sjg } 725*83c4dfe9Sjg } 726*83c4dfe9Sjg } else { 727*83c4dfe9Sjg nvf_error("%s: create failed - %d\n", filename, err); 728*83c4dfe9Sjg } 729*83c4dfe9Sjg 730*83c4dfe9Sjg if (err == 0) { 731*83c4dfe9Sjg if ((err = kfrename(newname, filename)) != 0) { 732*83c4dfe9Sjg nvf_error("%s: rename from %s failed\n", 733*83c4dfe9Sjg newname, filename); 734*83c4dfe9Sjg } 735*83c4dfe9Sjg } 736*83c4dfe9Sjg 737*83c4dfe9Sjg kmem_free(newname, len); 738*83c4dfe9Sjg kmem_free(buf, buflen); 739*83c4dfe9Sjg 740*83c4dfe9Sjg return (err); 741*83c4dfe9Sjg } 742*83c4dfe9Sjg 743*83c4dfe9Sjg static int 744*83c4dfe9Sjg e_fwrite_nvlist(nvfd_t *nvfd, nvlist_t *nvl) 745*83c4dfe9Sjg { 746*83c4dfe9Sjg int err; 747*83c4dfe9Sjg 748*83c4dfe9Sjg if ((err = fwrite_nvlist(nvfd->nvf_cache_path, nvl)) == 0) 749*83c4dfe9Sjg return (DDI_SUCCESS); 750*83c4dfe9Sjg else { 751*83c4dfe9Sjg if (err == EROFS) 752*83c4dfe9Sjg NVF_MARK_READONLY(nvfd); 753*83c4dfe9Sjg return (DDI_FAILURE); 754*83c4dfe9Sjg } 755*83c4dfe9Sjg } 756*83c4dfe9Sjg 757*83c4dfe9Sjg static void 758*83c4dfe9Sjg nvp_list_free(nvfd_t *nvf) 759*83c4dfe9Sjg { 760*83c4dfe9Sjg ASSERT(RW_WRITE_HELD(&nvf->nvf_lock)); 761*83c4dfe9Sjg (nvf->nvf_list_free)((nvf_handle_t)nvf); 762*83c4dfe9Sjg ASSERT(RW_WRITE_HELD(&nvf->nvf_lock)); 763*83c4dfe9Sjg } 764*83c4dfe9Sjg 765*83c4dfe9Sjg /* 766*83c4dfe9Sjg * Read a file in the nvlist format 767*83c4dfe9Sjg * EIO - i/o error during read 768*83c4dfe9Sjg * ENOENT - file not found 769*83c4dfe9Sjg * EINVAL - file contents corrupted 770*83c4dfe9Sjg */ 771*83c4dfe9Sjg static int 772*83c4dfe9Sjg fread_nvp_list(nvfd_t *nvfd) 773*83c4dfe9Sjg { 774*83c4dfe9Sjg nvlist_t *nvl; 775*83c4dfe9Sjg nvpair_t *nvp; 776*83c4dfe9Sjg char *name; 777*83c4dfe9Sjg nvlist_t *sublist; 778*83c4dfe9Sjg int rval; 779*83c4dfe9Sjg int rv; 780*83c4dfe9Sjg 781*83c4dfe9Sjg ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock))); 782*83c4dfe9Sjg 783*83c4dfe9Sjg rval = fread_nvlist(nvfd->nvf_cache_path, &nvl); 784*83c4dfe9Sjg if (rval != 0) 785*83c4dfe9Sjg return (rval); 786*83c4dfe9Sjg ASSERT(nvl != NULL); 787*83c4dfe9Sjg 788*83c4dfe9Sjg nvp = NULL; 789*83c4dfe9Sjg while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 790*83c4dfe9Sjg name = nvpair_name(nvp); 791*83c4dfe9Sjg ASSERT(strlen(name) > 0); 792*83c4dfe9Sjg 793*83c4dfe9Sjg switch (nvpair_type(nvp)) { 794*83c4dfe9Sjg case DATA_TYPE_NVLIST: 795*83c4dfe9Sjg rval = nvpair_value_nvlist(nvp, &sublist); 796*83c4dfe9Sjg if (rval != 0) { 797*83c4dfe9Sjg nvf_error( 798*83c4dfe9Sjg "nvpair_value_nvlist error %s %d\n", 799*83c4dfe9Sjg name, rval); 800*83c4dfe9Sjg goto error; 801*83c4dfe9Sjg } 802*83c4dfe9Sjg 803*83c4dfe9Sjg /* 804*83c4dfe9Sjg * unpack nvlist for this device and 805*83c4dfe9Sjg * add elements to data list. 806*83c4dfe9Sjg */ 807*83c4dfe9Sjg ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock))); 808*83c4dfe9Sjg rv = (nvfd->nvf_unpack_nvlist) 809*83c4dfe9Sjg ((nvf_handle_t)nvfd, sublist, name); 810*83c4dfe9Sjg ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock))); 811*83c4dfe9Sjg if (rv != 0) { 812*83c4dfe9Sjg nvf_error( 813*83c4dfe9Sjg "%s: %s invalid list element\n", 814*83c4dfe9Sjg nvfd->nvf_cache_path, name); 815*83c4dfe9Sjg rval = EINVAL; 816*83c4dfe9Sjg goto error; 817*83c4dfe9Sjg } 818*83c4dfe9Sjg break; 819*83c4dfe9Sjg 820*83c4dfe9Sjg default: 821*83c4dfe9Sjg nvf_error("%s: %s unsupported data type %d\n", 822*83c4dfe9Sjg nvfd->nvf_cache_path, name, nvpair_type(nvp)); 823*83c4dfe9Sjg rval = EINVAL; 824*83c4dfe9Sjg goto error; 825*83c4dfe9Sjg } 826*83c4dfe9Sjg } 827*83c4dfe9Sjg 828*83c4dfe9Sjg nvlist_free(nvl); 829*83c4dfe9Sjg 830*83c4dfe9Sjg return (0); 831*83c4dfe9Sjg 832*83c4dfe9Sjg error: 833*83c4dfe9Sjg nvlist_free(nvl); 834*83c4dfe9Sjg nvp_list_free(nvfd); 835*83c4dfe9Sjg return (rval); 836*83c4dfe9Sjg } 837*83c4dfe9Sjg 838*83c4dfe9Sjg 839*83c4dfe9Sjg int 840*83c4dfe9Sjg nvf_read_file(nvf_handle_t nvf_handle) 841*83c4dfe9Sjg { 842*83c4dfe9Sjg nvfd_t *nvfd = (nvfd_t *)nvf_handle; 843*83c4dfe9Sjg int rval; 844*83c4dfe9Sjg 845*83c4dfe9Sjg ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock)); 846*83c4dfe9Sjg 847*83c4dfe9Sjg if (kfio_disable_read) 848*83c4dfe9Sjg return (0); 849*83c4dfe9Sjg 850*83c4dfe9Sjg KFDEBUG((CE_CONT, "reading %s\n", nvfd->nvf_cache_path)); 851*83c4dfe9Sjg 852*83c4dfe9Sjg rval = fread_nvp_list(nvfd); 853*83c4dfe9Sjg if (rval) { 854*83c4dfe9Sjg switch (rval) { 855*83c4dfe9Sjg case EIO: 856*83c4dfe9Sjg nvfd->nvf_flags |= NVF_F_REBUILD_MSG; 857*83c4dfe9Sjg cmn_err(CE_WARN, "%s: I/O error", 858*83c4dfe9Sjg nvfd->nvf_cache_path); 859*83c4dfe9Sjg break; 860*83c4dfe9Sjg case ENOENT: 861*83c4dfe9Sjg nvfd->nvf_flags |= NVF_F_CREATE_MSG; 862*83c4dfe9Sjg nvf_error("%s: not found\n", 863*83c4dfe9Sjg nvfd->nvf_cache_path); 864*83c4dfe9Sjg break; 865*83c4dfe9Sjg case EINVAL: 866*83c4dfe9Sjg default: 867*83c4dfe9Sjg nvfd->nvf_flags |= NVF_F_REBUILD_MSG; 868*83c4dfe9Sjg cmn_err(CE_WARN, "%s: data file corrupted", 869*83c4dfe9Sjg nvfd->nvf_cache_path); 870*83c4dfe9Sjg break; 871*83c4dfe9Sjg } 872*83c4dfe9Sjg } 873*83c4dfe9Sjg return (rval); 874*83c4dfe9Sjg } 875*83c4dfe9Sjg 876*83c4dfe9Sjg static void 877*83c4dfe9Sjg nvf_write_is_complete(nvfd_t *fd) 878*83c4dfe9Sjg { 879*83c4dfe9Sjg if (fd->nvf_write_complete) { 880*83c4dfe9Sjg (fd->nvf_write_complete)((nvf_handle_t)fd); 881*83c4dfe9Sjg } 882*83c4dfe9Sjg } 883*83c4dfe9Sjg 884*83c4dfe9Sjg /*ARGSUSED*/ 885*83c4dfe9Sjg static void 886*83c4dfe9Sjg nvpflush_timeout(void *arg) 887*83c4dfe9Sjg { 888*83c4dfe9Sjg clock_t nticks; 889*83c4dfe9Sjg 890*83c4dfe9Sjg mutex_enter(&nvpflush_lock); 891*83c4dfe9Sjg nticks = nvpticks - ddi_get_lbolt(); 892*83c4dfe9Sjg if (nticks > 4) { 893*83c4dfe9Sjg nvpflush_timer_busy = 1; 894*83c4dfe9Sjg mutex_exit(&nvpflush_lock); 895*83c4dfe9Sjg nvpflush_id = timeout(nvpflush_timeout, NULL, nticks); 896*83c4dfe9Sjg } else { 897*83c4dfe9Sjg do_nvpflush = 1; 898*83c4dfe9Sjg NVPDAEMON_DEBUG((CE_CONT, "signal nvpdaemon\n")); 899*83c4dfe9Sjg cv_signal(&nvpflush_cv); 900*83c4dfe9Sjg nvpflush_id = 0; 901*83c4dfe9Sjg nvpflush_timer_busy = 0; 902*83c4dfe9Sjg mutex_exit(&nvpflush_lock); 903*83c4dfe9Sjg } 904*83c4dfe9Sjg } 905*83c4dfe9Sjg 906*83c4dfe9Sjg /* 907*83c4dfe9Sjg * After marking a list as dirty, wake the nvpflush daemon 908*83c4dfe9Sjg * to perform the update. 909*83c4dfe9Sjg */ 910*83c4dfe9Sjg void 911*83c4dfe9Sjg nvf_wake_daemon(void) 912*83c4dfe9Sjg { 913*83c4dfe9Sjg clock_t nticks; 914*83c4dfe9Sjg 915*83c4dfe9Sjg /* 916*83c4dfe9Sjg * If the system isn't up yet 917*83c4dfe9Sjg * don't even think about starting a flush. 918*83c4dfe9Sjg */ 919*83c4dfe9Sjg if (!i_ddi_io_initialized()) 920*83c4dfe9Sjg return; 921*83c4dfe9Sjg 922*83c4dfe9Sjg mutex_enter(&nvpflush_lock); 923*83c4dfe9Sjg 924*83c4dfe9Sjg if (nvpflush_daemon_active == 0) { 925*83c4dfe9Sjg nvpflush_daemon_active = 1; 926*83c4dfe9Sjg mutex_exit(&nvpflush_lock); 927*83c4dfe9Sjg NVPDAEMON_DEBUG((CE_CONT, "starting nvpdaemon thread\n")); 928*83c4dfe9Sjg nvpflush_thr_id = thread_create(NULL, 0, 929*83c4dfe9Sjg (void (*)())nvpflush_daemon, 930*83c4dfe9Sjg NULL, 0, &p0, TS_RUN, minclsyspri); 931*83c4dfe9Sjg mutex_enter(&nvpflush_lock); 932*83c4dfe9Sjg } 933*83c4dfe9Sjg 934*83c4dfe9Sjg nticks = nvpflush_delay * TICKS_PER_SECOND; 935*83c4dfe9Sjg nvpticks = ddi_get_lbolt() + nticks; 936*83c4dfe9Sjg if (nvpflush_timer_busy == 0) { 937*83c4dfe9Sjg nvpflush_timer_busy = 1; 938*83c4dfe9Sjg mutex_exit(&nvpflush_lock); 939*83c4dfe9Sjg nvpflush_id = timeout(nvpflush_timeout, NULL, nticks + 4); 940*83c4dfe9Sjg } else 941*83c4dfe9Sjg mutex_exit(&nvpflush_lock); 942*83c4dfe9Sjg } 943*83c4dfe9Sjg 944*83c4dfe9Sjg static int 945*83c4dfe9Sjg nvpflush_one(nvfd_t *nvfd) 946*83c4dfe9Sjg { 947*83c4dfe9Sjg int rval = DDI_SUCCESS; 948*83c4dfe9Sjg nvlist_t *nvl; 949*83c4dfe9Sjg 950*83c4dfe9Sjg rw_enter(&nvfd->nvf_lock, RW_READER); 951*83c4dfe9Sjg 952*83c4dfe9Sjg ASSERT((nvfd->nvf_flags & NVF_F_FLUSHING) == 0); 953*83c4dfe9Sjg 954*83c4dfe9Sjg if (!NVF_IS_DIRTY(nvfd) || 955*83c4dfe9Sjg NVF_IS_READONLY(nvfd) || kfio_disable_write) { 956*83c4dfe9Sjg NVF_CLEAR_DIRTY(nvfd); 957*83c4dfe9Sjg rw_exit(&nvfd->nvf_lock); 958*83c4dfe9Sjg return (DDI_SUCCESS); 959*83c4dfe9Sjg } 960*83c4dfe9Sjg 961*83c4dfe9Sjg if (rw_tryupgrade(&nvfd->nvf_lock) == 0) { 962*83c4dfe9Sjg nvf_error("nvpflush: " 963*83c4dfe9Sjg "%s rw upgrade failed\n", nvfd->nvf_cache_path); 964*83c4dfe9Sjg rw_exit(&nvfd->nvf_lock); 965*83c4dfe9Sjg return (DDI_FAILURE); 966*83c4dfe9Sjg } 967*83c4dfe9Sjg if (((nvfd->nvf_pack_list) 968*83c4dfe9Sjg ((nvf_handle_t)nvfd, &nvl)) != DDI_SUCCESS) { 969*83c4dfe9Sjg nvf_error("nvpflush: " 970*83c4dfe9Sjg "%s nvlist construction failed\n", nvfd->nvf_cache_path); 971*83c4dfe9Sjg ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock)); 972*83c4dfe9Sjg rw_exit(&nvfd->nvf_lock); 973*83c4dfe9Sjg return (DDI_FAILURE); 974*83c4dfe9Sjg } 975*83c4dfe9Sjg ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock)); 976*83c4dfe9Sjg 977*83c4dfe9Sjg NVF_CLEAR_DIRTY(nvfd); 978*83c4dfe9Sjg nvfd->nvf_flags |= NVF_F_FLUSHING; 979*83c4dfe9Sjg rw_exit(&nvfd->nvf_lock); 980*83c4dfe9Sjg 981*83c4dfe9Sjg rval = e_fwrite_nvlist(nvfd, nvl); 982*83c4dfe9Sjg nvlist_free(nvl); 983*83c4dfe9Sjg 984*83c4dfe9Sjg rw_enter(&nvfd->nvf_lock, RW_WRITER); 985*83c4dfe9Sjg nvfd->nvf_flags &= ~NVF_F_FLUSHING; 986*83c4dfe9Sjg if (rval == DDI_FAILURE) { 987*83c4dfe9Sjg if (NVF_IS_READONLY(nvfd)) { 988*83c4dfe9Sjg rval = DDI_SUCCESS; 989*83c4dfe9Sjg nvfd->nvf_flags &= ~(NVF_F_ERROR | NVF_F_DIRTY); 990*83c4dfe9Sjg } else if ((nvfd->nvf_flags & NVF_F_ERROR) == 0) { 991*83c4dfe9Sjg cmn_err(CE_CONT, 992*83c4dfe9Sjg "%s: updated failed\n", nvfd->nvf_cache_path); 993*83c4dfe9Sjg nvfd->nvf_flags |= NVF_F_ERROR | NVF_F_DIRTY; 994*83c4dfe9Sjg } 995*83c4dfe9Sjg } else { 996*83c4dfe9Sjg if (nvfd->nvf_flags & NVF_F_CREATE_MSG) { 997*83c4dfe9Sjg cmn_err(CE_CONT, 998*83c4dfe9Sjg "!Creating %s\n", nvfd->nvf_cache_path); 999*83c4dfe9Sjg nvfd->nvf_flags &= ~NVF_F_CREATE_MSG; 1000*83c4dfe9Sjg } 1001*83c4dfe9Sjg if (nvfd->nvf_flags & NVF_F_REBUILD_MSG) { 1002*83c4dfe9Sjg cmn_err(CE_CONT, 1003*83c4dfe9Sjg "!Rebuilding %s\n", nvfd->nvf_cache_path); 1004*83c4dfe9Sjg nvfd->nvf_flags &= ~NVF_F_REBUILD_MSG; 1005*83c4dfe9Sjg } 1006*83c4dfe9Sjg if (nvfd->nvf_flags & NVF_F_ERROR) { 1007*83c4dfe9Sjg cmn_err(CE_CONT, 1008*83c4dfe9Sjg "%s: update now ok\n", nvfd->nvf_cache_path); 1009*83c4dfe9Sjg nvfd->nvf_flags &= ~NVF_F_ERROR; 1010*83c4dfe9Sjg } 1011*83c4dfe9Sjg /* 1012*83c4dfe9Sjg * The file may need to be flushed again if the cached 1013*83c4dfe9Sjg * data was touched while writing the earlier contents. 1014*83c4dfe9Sjg */ 1015*83c4dfe9Sjg if (NVF_IS_DIRTY(nvfd)) 1016*83c4dfe9Sjg rval = DDI_FAILURE; 1017*83c4dfe9Sjg } 1018*83c4dfe9Sjg 1019*83c4dfe9Sjg rw_exit(&nvfd->nvf_lock); 1020*83c4dfe9Sjg return (rval); 1021*83c4dfe9Sjg } 1022*83c4dfe9Sjg 1023*83c4dfe9Sjg 1024*83c4dfe9Sjg static void 1025*83c4dfe9Sjg nvpflush_daemon(void) 1026*83c4dfe9Sjg { 1027*83c4dfe9Sjg callb_cpr_t cprinfo; 1028*83c4dfe9Sjg nvfd_t *nvfdp, *nextfdp; 1029*83c4dfe9Sjg clock_t clk; 1030*83c4dfe9Sjg int rval; 1031*83c4dfe9Sjg int want_wakeup; 1032*83c4dfe9Sjg int is_now_clean; 1033*83c4dfe9Sjg 1034*83c4dfe9Sjg ASSERT(modrootloaded); 1035*83c4dfe9Sjg 1036*83c4dfe9Sjg nvpflush_thread = curthread; 1037*83c4dfe9Sjg NVPDAEMON_DEBUG((CE_CONT, "nvpdaemon: init\n")); 1038*83c4dfe9Sjg 1039*83c4dfe9Sjg CALLB_CPR_INIT(&cprinfo, &nvpflush_lock, callb_generic_cpr, "nvp"); 1040*83c4dfe9Sjg mutex_enter(&nvpflush_lock); 1041*83c4dfe9Sjg for (;;) { 1042*83c4dfe9Sjg 1043*83c4dfe9Sjg CALLB_CPR_SAFE_BEGIN(&cprinfo); 1044*83c4dfe9Sjg while (do_nvpflush == 0) { 1045*83c4dfe9Sjg clk = cv_timedwait(&nvpflush_cv, &nvpflush_lock, 1046*83c4dfe9Sjg ddi_get_lbolt() + 1047*83c4dfe9Sjg (nvpdaemon_idle_time * TICKS_PER_SECOND)); 1048*83c4dfe9Sjg if (clk == -1 && 1049*83c4dfe9Sjg do_nvpflush == 0 && nvpflush_timer_busy == 0) { 1050*83c4dfe9Sjg /* 1051*83c4dfe9Sjg * Note that CALLB_CPR_EXIT calls mutex_exit() 1052*83c4dfe9Sjg * on the lock passed in to CALLB_CPR_INIT, 1053*83c4dfe9Sjg * so the lock must be held when invoking it. 1054*83c4dfe9Sjg */ 1055*83c4dfe9Sjg CALLB_CPR_SAFE_END(&cprinfo, &nvpflush_lock); 1056*83c4dfe9Sjg NVPDAEMON_DEBUG((CE_CONT, "nvpdaemon: exit\n")); 1057*83c4dfe9Sjg ASSERT(mutex_owned(&nvpflush_lock)); 1058*83c4dfe9Sjg nvpflush_thr_id = NULL; 1059*83c4dfe9Sjg nvpflush_daemon_active = 0; 1060*83c4dfe9Sjg CALLB_CPR_EXIT(&cprinfo); 1061*83c4dfe9Sjg thread_exit(); 1062*83c4dfe9Sjg } 1063*83c4dfe9Sjg } 1064*83c4dfe9Sjg CALLB_CPR_SAFE_END(&cprinfo, &nvpflush_lock); 1065*83c4dfe9Sjg 1066*83c4dfe9Sjg nvpbusy = 1; 1067*83c4dfe9Sjg want_wakeup = 0; 1068*83c4dfe9Sjg do_nvpflush = 0; 1069*83c4dfe9Sjg mutex_exit(&nvpflush_lock); 1070*83c4dfe9Sjg 1071*83c4dfe9Sjg /* 1072*83c4dfe9Sjg * Try flushing what's dirty, reschedule if there's 1073*83c4dfe9Sjg * a failure or data gets marked as dirty again. 1074*83c4dfe9Sjg * First move each file marked dirty to the dirty 1075*83c4dfe9Sjg * list to avoid locking the list across the write. 1076*83c4dfe9Sjg */ 1077*83c4dfe9Sjg mutex_enter(&nvf_cache_mutex); 1078*83c4dfe9Sjg for (nvfdp = list_head(&nvf_cache_files); 1079*83c4dfe9Sjg nvfdp; nvfdp = nextfdp) { 1080*83c4dfe9Sjg nextfdp = list_next(&nvf_cache_files, nvfdp); 1081*83c4dfe9Sjg rw_enter(&nvfdp->nvf_lock, RW_READER); 1082*83c4dfe9Sjg if (NVF_IS_DIRTY(nvfdp)) { 1083*83c4dfe9Sjg list_remove(&nvf_cache_files, nvfdp); 1084*83c4dfe9Sjg list_insert_tail(&nvf_dirty_files, nvfdp); 1085*83c4dfe9Sjg rw_exit(&nvfdp->nvf_lock); 1086*83c4dfe9Sjg } else { 1087*83c4dfe9Sjg NVPDAEMON_DEBUG((CE_CONT, 1088*83c4dfe9Sjg "nvpdaemon: not dirty %s\n", 1089*83c4dfe9Sjg nvfdp->nvf_cache_path)); 1090*83c4dfe9Sjg rw_exit(&nvfdp->nvf_lock); 1091*83c4dfe9Sjg } 1092*83c4dfe9Sjg } 1093*83c4dfe9Sjg mutex_exit(&nvf_cache_mutex); 1094*83c4dfe9Sjg 1095*83c4dfe9Sjg /* 1096*83c4dfe9Sjg * Now go through the dirty list 1097*83c4dfe9Sjg */ 1098*83c4dfe9Sjg for (nvfdp = list_head(&nvf_dirty_files); 1099*83c4dfe9Sjg nvfdp; nvfdp = nextfdp) { 1100*83c4dfe9Sjg nextfdp = list_next(&nvf_dirty_files, nvfdp); 1101*83c4dfe9Sjg 1102*83c4dfe9Sjg is_now_clean = 0; 1103*83c4dfe9Sjg rw_enter(&nvfdp->nvf_lock, RW_READER); 1104*83c4dfe9Sjg if (NVF_IS_DIRTY(nvfdp)) { 1105*83c4dfe9Sjg NVPDAEMON_DEBUG((CE_CONT, 1106*83c4dfe9Sjg "nvpdaemon: flush %s\n", 1107*83c4dfe9Sjg nvfdp->nvf_cache_path)); 1108*83c4dfe9Sjg rw_exit(&nvfdp->nvf_lock); 1109*83c4dfe9Sjg rval = nvpflush_one(nvfdp); 1110*83c4dfe9Sjg rw_enter(&nvfdp->nvf_lock, RW_READER); 1111*83c4dfe9Sjg if (rval != DDI_SUCCESS || 1112*83c4dfe9Sjg NVF_IS_DIRTY(nvfdp)) { 1113*83c4dfe9Sjg rw_exit(&nvfdp->nvf_lock); 1114*83c4dfe9Sjg NVPDAEMON_DEBUG((CE_CONT, 1115*83c4dfe9Sjg "nvpdaemon: %s dirty again\n", 1116*83c4dfe9Sjg nvfdp->nvf_cache_path)); 1117*83c4dfe9Sjg want_wakeup = 1; 1118*83c4dfe9Sjg } else { 1119*83c4dfe9Sjg rw_exit(&nvfdp->nvf_lock); 1120*83c4dfe9Sjg nvf_write_is_complete(nvfdp); 1121*83c4dfe9Sjg is_now_clean = 1; 1122*83c4dfe9Sjg } 1123*83c4dfe9Sjg } else { 1124*83c4dfe9Sjg NVPDAEMON_DEBUG((CE_CONT, 1125*83c4dfe9Sjg "nvpdaemon: not dirty %s\n", 1126*83c4dfe9Sjg nvfdp->nvf_cache_path)); 1127*83c4dfe9Sjg rw_exit(&nvfdp->nvf_lock); 1128*83c4dfe9Sjg is_now_clean = 1; 1129*83c4dfe9Sjg } 1130*83c4dfe9Sjg 1131*83c4dfe9Sjg if (is_now_clean) { 1132*83c4dfe9Sjg mutex_enter(&nvf_cache_mutex); 1133*83c4dfe9Sjg list_remove(&nvf_dirty_files, nvfdp); 1134*83c4dfe9Sjg list_insert_tail(&nvf_cache_files, 1135*83c4dfe9Sjg nvfdp); 1136*83c4dfe9Sjg mutex_exit(&nvf_cache_mutex); 1137*83c4dfe9Sjg } 1138*83c4dfe9Sjg } 1139*83c4dfe9Sjg 1140*83c4dfe9Sjg if (want_wakeup) 1141*83c4dfe9Sjg nvf_wake_daemon(); 1142*83c4dfe9Sjg 1143*83c4dfe9Sjg mutex_enter(&nvpflush_lock); 1144*83c4dfe9Sjg nvpbusy = 0; 1145*83c4dfe9Sjg } 1146*83c4dfe9Sjg } 1147