183c4dfe9Sjg /* 283c4dfe9Sjg * CDDL HEADER START 383c4dfe9Sjg * 483c4dfe9Sjg * The contents of this file are subject to the terms of the 583c4dfe9Sjg * Common Development and Distribution License (the "License"). 683c4dfe9Sjg * You may not use this file except in compliance with the License. 783c4dfe9Sjg * 883c4dfe9Sjg * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 983c4dfe9Sjg * or http://www.opensolaris.org/os/licensing. 1083c4dfe9Sjg * See the License for the specific language governing permissions 1183c4dfe9Sjg * and limitations under the License. 1283c4dfe9Sjg * 1383c4dfe9Sjg * When distributing Covered Code, include this CDDL HEADER in each 1483c4dfe9Sjg * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1583c4dfe9Sjg * If applicable, add the following below this CDDL HEADER, with the 1683c4dfe9Sjg * fields enclosed by brackets "[]" replaced with your own identifying 1783c4dfe9Sjg * information: Portions Copyright [yyyy] [name of copyright owner] 1883c4dfe9Sjg * 1983c4dfe9Sjg * CDDL HEADER END 2083c4dfe9Sjg */ 2183c4dfe9Sjg /* 22*d3d50737SRafael Vanoni * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 2383c4dfe9Sjg * Use is subject to license terms. 2483c4dfe9Sjg */ 2583c4dfe9Sjg 2683c4dfe9Sjg #include <sys/note.h> 2783c4dfe9Sjg #include <sys/t_lock.h> 2883c4dfe9Sjg #include <sys/cmn_err.h> 2983c4dfe9Sjg #include <sys/instance.h> 3083c4dfe9Sjg #include <sys/conf.h> 3183c4dfe9Sjg #include <sys/stat.h> 3283c4dfe9Sjg #include <sys/ddi.h> 3383c4dfe9Sjg #include <sys/hwconf.h> 3483c4dfe9Sjg #include <sys/sunddi.h> 3583c4dfe9Sjg #include <sys/sunndi.h> 3683c4dfe9Sjg #include <sys/ddi_impldefs.h> 3783c4dfe9Sjg #include <sys/ndi_impldefs.h> 3883c4dfe9Sjg #include <sys/modctl.h> 3983c4dfe9Sjg #include <sys/dacf.h> 4083c4dfe9Sjg #include <sys/promif.h> 4183c4dfe9Sjg #include <sys/cpuvar.h> 4283c4dfe9Sjg #include <sys/pathname.h> 4383c4dfe9Sjg #include <sys/kobj.h> 4483c4dfe9Sjg #include <sys/devcache.h> 4583c4dfe9Sjg #include <sys/devcache_impl.h> 4683c4dfe9Sjg #include <sys/sysmacros.h> 4783c4dfe9Sjg #include <sys/varargs.h> 4883c4dfe9Sjg #include <sys/callb.h> 4983c4dfe9Sjg 5083c4dfe9Sjg /* 5183c4dfe9Sjg * This facility provides interfaces to clients to register, 5283c4dfe9Sjg * read and update cache data in persisted backing store files, 5383c4dfe9Sjg * usually in /etc/devices. The data persisted through this 5483c4dfe9Sjg * mechanism should be stateless data, functioning in the sense 5583c4dfe9Sjg * of a cache. Writes are performed by a background daemon 5683c4dfe9Sjg * thread, permitting a client to schedule an update without 5783c4dfe9Sjg * blocking, then continue updating the data state in 5883c4dfe9Sjg * parallel. The data is only locked by the daemon thread 5983c4dfe9Sjg * to pack the data in preparation for the write. 6083c4dfe9Sjg * 6183c4dfe9Sjg * Data persisted through this mechanism should be capable 6283c4dfe9Sjg * of being regenerated through normal system operation, 6383c4dfe9Sjg * for example attaching all disk devices would cause all 6483c4dfe9Sjg * devids to be registered for those devices. By caching 6583c4dfe9Sjg * a devid-device tuple, the system can operate in a 6683c4dfe9Sjg * more optimal way, directly attaching the device mapped 6783c4dfe9Sjg * to a devid, rather than burdensomely driving attach of 6883c4dfe9Sjg * the entire device tree to discover a single device. 6983c4dfe9Sjg * 7083c4dfe9Sjg * Note that a client should only need to include 7183c4dfe9Sjg * <sys/devcache.h> for the supported interfaces. 7283c4dfe9Sjg * 7383c4dfe9Sjg * The data per client is entirely within the control of 7483c4dfe9Sjg * the client. When reading, data unpacked from the backing 7583c4dfe9Sjg * store should be inserted in the list. The pointer to 76da6c28aaSamw * the list can be retrieved via nvf_list(). When writing, 7783c4dfe9Sjg * the data on the list is to be packed and returned to the 7883c4dfe9Sjg * nvpdaemon as an nvlist. 7983c4dfe9Sjg * 8083c4dfe9Sjg * Obvious restrictions are imposed by the limits of the 8183c4dfe9Sjg * nvlist format. The data cannot be read or written 8283c4dfe9Sjg * piecemeal, and large amounts of data aren't recommended. 8383c4dfe9Sjg * However, nvlists do allow that data be named and typed 8483c4dfe9Sjg * and can be size-of-int invariant, and the cached data 8583c4dfe9Sjg * can be versioned conveniently. 8683c4dfe9Sjg * 8783c4dfe9Sjg * The registration involves two steps: a handle is 8883c4dfe9Sjg * allocated by calling the registration function. 8983c4dfe9Sjg * This sets up the data referenced by the handle and 9083c4dfe9Sjg * initializes the lock. Following registration, the 9183c4dfe9Sjg * client must initialize the data list. The list 9283c4dfe9Sjg * interfaces require that the list element with offset 9383c4dfe9Sjg * to the node link be provided. The format of the 9483c4dfe9Sjg * list element is under the control of the client. 9583c4dfe9Sjg * 9683c4dfe9Sjg * Locking: the address of the data list r/w lock provided 9783c4dfe9Sjg * can be accessed with nvf_lock(). The lock must be held 9883c4dfe9Sjg * as reader when traversing the list or checking state, 9983c4dfe9Sjg * such as nvf_is_dirty(). The lock must be held as 10083c4dfe9Sjg * writer when updating the list or marking it dirty. 10183c4dfe9Sjg * The lock must not be held when waking the daemon. 10283c4dfe9Sjg * 10383c4dfe9Sjg * The data r/w lock is held as writer when the pack, 10483c4dfe9Sjg * unpack and free list handlers are called. The 10583c4dfe9Sjg * lock should not be dropped and must be still held 10683c4dfe9Sjg * upon return. The client should also hold the lock 10783c4dfe9Sjg * as reader when checking if the list is dirty, and 10883c4dfe9Sjg * as writer when marking the list dirty or initiating 10983c4dfe9Sjg * a read. 11083c4dfe9Sjg * 11183c4dfe9Sjg * The asynchronous nature of updates allows for the 11283c4dfe9Sjg * possibility that the data may continue to be updated 11383c4dfe9Sjg * once the daemon has been notified that an update is 11483c4dfe9Sjg * desired. The data only needs to be locked against 11583c4dfe9Sjg * updates when packing the data into the form to be 11683c4dfe9Sjg * written. When the write of the packed data has 11783c4dfe9Sjg * completed, the daemon will automatically reschedule 11883c4dfe9Sjg * an update if the data was marked dirty after the 11983c4dfe9Sjg * point at which it was packed. Before beginning an 12083c4dfe9Sjg * update, the daemon attempts to lock the data as 12183c4dfe9Sjg * writer; if the writer lock is already held, it 12283c4dfe9Sjg * backs off and retries later. The model is to give 12383c4dfe9Sjg * priority to the kernel processes generating the 12483c4dfe9Sjg * data, and that the nature of the data is that 12583c4dfe9Sjg * it does not change often, can be re-generated when 12683c4dfe9Sjg * needed, so updates should not happen often and 12783c4dfe9Sjg * can be delayed until the data stops changing. 12883c4dfe9Sjg * The client may update the list or mark it dirty 12983c4dfe9Sjg * any time it is able to acquire the lock as 13083c4dfe9Sjg * writer first. 13183c4dfe9Sjg * 13283c4dfe9Sjg * A failed write will be retried after some delay, 13383c4dfe9Sjg * in the hope that the cause of the error will be 13483c4dfe9Sjg * transient, for example a filesystem with no space 13583c4dfe9Sjg * available. An update on a read-only filesystem 13683c4dfe9Sjg * is failed silently and not retried; this would be 13783c4dfe9Sjg * the case when booted off install media. 13883c4dfe9Sjg * 13983c4dfe9Sjg * There is no unregister mechanism as of yet, as it 14083c4dfe9Sjg * hasn't been needed so far. 14183c4dfe9Sjg */ 14283c4dfe9Sjg 14383c4dfe9Sjg /* 14483c4dfe9Sjg * Global list of files registered and updated by the nvpflush 14583c4dfe9Sjg * daemon, protected by the nvf_cache_mutex. While an 14683c4dfe9Sjg * update is taking place, a file is temporarily moved to 14783c4dfe9Sjg * the dirty list to avoid locking the primary list for 14883c4dfe9Sjg * the duration of the update. 14983c4dfe9Sjg */ 15083c4dfe9Sjg list_t nvf_cache_files; 15183c4dfe9Sjg list_t nvf_dirty_files; 15283c4dfe9Sjg kmutex_t nvf_cache_mutex; 15383c4dfe9Sjg 15483c4dfe9Sjg 15583c4dfe9Sjg /* 15683c4dfe9Sjg * Allow some delay from an update of the data before flushing 15783c4dfe9Sjg * to permit simultaneous updates of multiple changes. 15883c4dfe9Sjg * Changes in the data are expected to be bursty, ie 15983c4dfe9Sjg * reconfig or hot-plug of a new adapter. 16083c4dfe9Sjg * 16183c4dfe9Sjg * kfio_report_error (default 0) 16283c4dfe9Sjg * Set to 1 to enable some error messages related to low-level 16383c4dfe9Sjg * kernel file i/o operations. 16483c4dfe9Sjg * 16583c4dfe9Sjg * nvpflush_delay (default 10) 16683c4dfe9Sjg * The number of seconds after data is marked dirty before the 16783c4dfe9Sjg * flush daemon is triggered to flush the data. A longer period 16883c4dfe9Sjg * of time permits more data updates per write. Note that 16983c4dfe9Sjg * every update resets the timer so no repository write will 17083c4dfe9Sjg * occur while data is being updated continuously. 17183c4dfe9Sjg * 17283c4dfe9Sjg * nvpdaemon_idle_time (default 60) 17383c4dfe9Sjg * The number of seconds the daemon will sleep idle before exiting. 17483c4dfe9Sjg * 17583c4dfe9Sjg */ 17683c4dfe9Sjg #define NVPFLUSH_DELAY 10 17783c4dfe9Sjg #define NVPDAEMON_IDLE_TIME 60 17883c4dfe9Sjg 17983c4dfe9Sjg #define TICKS_PER_SECOND (drv_usectohz(1000000)) 18083c4dfe9Sjg 18183c4dfe9Sjg /* 18283c4dfe9Sjg * Tunables 18383c4dfe9Sjg */ 18483c4dfe9Sjg int kfio_report_error = 0; /* kernel file i/o operations */ 18583c4dfe9Sjg int kfio_disable_read = 0; /* disable all reads */ 18683c4dfe9Sjg int kfio_disable_write = 0; /* disable all writes */ 18783c4dfe9Sjg 18883c4dfe9Sjg int nvpflush_delay = NVPFLUSH_DELAY; 18983c4dfe9Sjg int nvpdaemon_idle_time = NVPDAEMON_IDLE_TIME; 19083c4dfe9Sjg 19183c4dfe9Sjg static timeout_id_t nvpflush_id = 0; 19283c4dfe9Sjg static int nvpflush_timer_busy = 0; 19383c4dfe9Sjg static int nvpflush_daemon_active = 0; 19483c4dfe9Sjg static kthread_t *nvpflush_thr_id = 0; 19583c4dfe9Sjg 19683c4dfe9Sjg static int do_nvpflush = 0; 19783c4dfe9Sjg static int nvpbusy = 0; 19883c4dfe9Sjg static kmutex_t nvpflush_lock; 19983c4dfe9Sjg static kcondvar_t nvpflush_cv; 20083c4dfe9Sjg static kthread_id_t nvpflush_thread; 20183c4dfe9Sjg static clock_t nvpticks; 20283c4dfe9Sjg 20383c4dfe9Sjg static void nvpflush_daemon(void); 20483c4dfe9Sjg 20583c4dfe9Sjg #ifdef DEBUG 20683c4dfe9Sjg int nvpdaemon_debug = 0; 20783c4dfe9Sjg int kfio_debug = 0; 20883c4dfe9Sjg #endif /* DEBUG */ 20983c4dfe9Sjg 21083c4dfe9Sjg extern int modrootloaded; 21183c4dfe9Sjg extern void mdi_read_devices_files(void); 21283c4dfe9Sjg extern void mdi_clean_vhcache(void); 213c3b4ae18SJerry Gilliam extern int sys_shutdown; 21483c4dfe9Sjg 21583c4dfe9Sjg /* 21683c4dfe9Sjg * Initialize the overall cache file management 21783c4dfe9Sjg */ 21883c4dfe9Sjg void 21983c4dfe9Sjg i_ddi_devices_init(void) 22083c4dfe9Sjg { 22183c4dfe9Sjg list_create(&nvf_cache_files, sizeof (nvfd_t), 22283c4dfe9Sjg offsetof(nvfd_t, nvf_link)); 22383c4dfe9Sjg list_create(&nvf_dirty_files, sizeof (nvfd_t), 22483c4dfe9Sjg offsetof(nvfd_t, nvf_link)); 22583c4dfe9Sjg mutex_init(&nvf_cache_mutex, NULL, MUTEX_DEFAULT, NULL); 22625e8c5aaSvikram retire_store_init(); 22783c4dfe9Sjg devid_cache_init(); 22883c4dfe9Sjg } 22983c4dfe9Sjg 23083c4dfe9Sjg /* 23183c4dfe9Sjg * Read cache files 23283c4dfe9Sjg * The files read here should be restricted to those 23383c4dfe9Sjg * that may be required to mount root. 23483c4dfe9Sjg */ 23583c4dfe9Sjg void 23683c4dfe9Sjg i_ddi_read_devices_files(void) 23783c4dfe9Sjg { 23825e8c5aaSvikram /* 23925e8c5aaSvikram * The retire store should be the first file read as it 24025e8c5aaSvikram * may need to offline devices. kfio_disable_read is not 24125e8c5aaSvikram * used for retire. For the rationale see the tunable 24225e8c5aaSvikram * ddi_retire_store_bypass and comments in: 24325e8c5aaSvikram * uts/common/os/retire_store.c 24425e8c5aaSvikram */ 24525e8c5aaSvikram 24625e8c5aaSvikram retire_store_read(); 24725e8c5aaSvikram 24883c4dfe9Sjg if (!kfio_disable_read) { 24983c4dfe9Sjg mdi_read_devices_files(); 25083c4dfe9Sjg devid_cache_read(); 25183c4dfe9Sjg } 25283c4dfe9Sjg } 25383c4dfe9Sjg 25483c4dfe9Sjg void 25583c4dfe9Sjg i_ddi_start_flush_daemon(void) 25683c4dfe9Sjg { 25783c4dfe9Sjg nvfd_t *nvfdp; 25883c4dfe9Sjg 25983c4dfe9Sjg ASSERT(i_ddi_io_initialized()); 26083c4dfe9Sjg 26183c4dfe9Sjg mutex_init(&nvpflush_lock, NULL, MUTEX_DRIVER, NULL); 26283c4dfe9Sjg cv_init(&nvpflush_cv, NULL, CV_DRIVER, NULL); 26383c4dfe9Sjg 26483c4dfe9Sjg mutex_enter(&nvf_cache_mutex); 26583c4dfe9Sjg for (nvfdp = list_head(&nvf_cache_files); nvfdp; 26683c4dfe9Sjg nvfdp = list_next(&nvf_cache_files, nvfdp)) { 26783c4dfe9Sjg if (NVF_IS_DIRTY(nvfdp)) { 26883c4dfe9Sjg nvf_wake_daemon(); 26983c4dfe9Sjg break; 27083c4dfe9Sjg } 27183c4dfe9Sjg } 27283c4dfe9Sjg mutex_exit(&nvf_cache_mutex); 27383c4dfe9Sjg } 27483c4dfe9Sjg 27583c4dfe9Sjg void 27683c4dfe9Sjg i_ddi_clean_devices_files(void) 27783c4dfe9Sjg { 27883c4dfe9Sjg devid_cache_cleanup(); 27983c4dfe9Sjg mdi_clean_vhcache(); 28083c4dfe9Sjg } 28183c4dfe9Sjg 28283c4dfe9Sjg /* 28383c4dfe9Sjg * Register a cache file to be managed and updated by the nvpflush daemon. 28483c4dfe9Sjg * All operations are performed through the returned handle. 28583c4dfe9Sjg * There is no unregister mechanism for now. 28683c4dfe9Sjg */ 28783c4dfe9Sjg nvf_handle_t 28883c4dfe9Sjg nvf_register_file(nvf_ops_t *ops) 28983c4dfe9Sjg { 29083c4dfe9Sjg nvfd_t *nvfdp; 29183c4dfe9Sjg 29283c4dfe9Sjg nvfdp = kmem_zalloc(sizeof (*nvfdp), KM_SLEEP); 29383c4dfe9Sjg 29483c4dfe9Sjg nvfdp->nvf_ops = ops; 29583c4dfe9Sjg nvfdp->nvf_flags = 0; 29683c4dfe9Sjg rw_init(&nvfdp->nvf_lock, NULL, RW_DRIVER, NULL); 29783c4dfe9Sjg 29883c4dfe9Sjg mutex_enter(&nvf_cache_mutex); 29983c4dfe9Sjg list_insert_tail(&nvf_cache_files, nvfdp); 30083c4dfe9Sjg mutex_exit(&nvf_cache_mutex); 30183c4dfe9Sjg 30283c4dfe9Sjg return ((nvf_handle_t)nvfdp); 30383c4dfe9Sjg } 30483c4dfe9Sjg 30583c4dfe9Sjg /*PRINTFLIKE1*/ 30683c4dfe9Sjg void 30783c4dfe9Sjg nvf_error(const char *fmt, ...) 30883c4dfe9Sjg { 30983c4dfe9Sjg va_list ap; 31083c4dfe9Sjg 31183c4dfe9Sjg if (kfio_report_error) { 31283c4dfe9Sjg va_start(ap, fmt); 31383c4dfe9Sjg vcmn_err(CE_NOTE, fmt, ap); 31483c4dfe9Sjg va_end(ap); 31583c4dfe9Sjg } 31683c4dfe9Sjg } 31783c4dfe9Sjg 31883c4dfe9Sjg /* 31983c4dfe9Sjg * Some operations clients may use to manage the data 32083c4dfe9Sjg * to be persisted in a cache file. 32183c4dfe9Sjg */ 32283c4dfe9Sjg char * 32383c4dfe9Sjg nvf_cache_name(nvf_handle_t handle) 32483c4dfe9Sjg { 32583c4dfe9Sjg return (((nvfd_t *)handle)->nvf_cache_path); 32683c4dfe9Sjg } 32783c4dfe9Sjg 32883c4dfe9Sjg krwlock_t * 32983c4dfe9Sjg nvf_lock(nvf_handle_t handle) 33083c4dfe9Sjg { 33183c4dfe9Sjg return (&(((nvfd_t *)handle)->nvf_lock)); 33283c4dfe9Sjg } 33383c4dfe9Sjg 33483c4dfe9Sjg list_t * 33583c4dfe9Sjg nvf_list(nvf_handle_t handle) 33683c4dfe9Sjg { 33783c4dfe9Sjg return (&(((nvfd_t *)handle)->nvf_data_list)); 33883c4dfe9Sjg } 33983c4dfe9Sjg 34083c4dfe9Sjg void 34183c4dfe9Sjg nvf_mark_dirty(nvf_handle_t handle) 34283c4dfe9Sjg { 34383c4dfe9Sjg ASSERT(RW_WRITE_HELD(&(((nvfd_t *)handle)->nvf_lock))); 34483c4dfe9Sjg NVF_MARK_DIRTY((nvfd_t *)handle); 34583c4dfe9Sjg } 34683c4dfe9Sjg 34783c4dfe9Sjg int 34883c4dfe9Sjg nvf_is_dirty(nvf_handle_t handle) 34983c4dfe9Sjg { 35083c4dfe9Sjg ASSERT(RW_LOCK_HELD(&(((nvfd_t *)handle)->nvf_lock))); 35183c4dfe9Sjg return (NVF_IS_DIRTY((nvfd_t *)handle)); 35283c4dfe9Sjg } 35383c4dfe9Sjg 35483c4dfe9Sjg static uint16_t 35583c4dfe9Sjg nvp_cksum(uchar_t *buf, int64_t buflen) 35683c4dfe9Sjg { 35783c4dfe9Sjg uint16_t cksum = 0; 35883c4dfe9Sjg uint16_t *p = (uint16_t *)buf; 35983c4dfe9Sjg int64_t n; 36083c4dfe9Sjg 36183c4dfe9Sjg if ((buflen & 0x01) != 0) { 36283c4dfe9Sjg buflen--; 36383c4dfe9Sjg cksum = buf[buflen]; 36483c4dfe9Sjg } 36583c4dfe9Sjg n = buflen / 2; 36683c4dfe9Sjg while (n-- > 0) 36783c4dfe9Sjg cksum ^= *p++; 36883c4dfe9Sjg return (cksum); 36983c4dfe9Sjg } 37083c4dfe9Sjg 37183c4dfe9Sjg int 37283c4dfe9Sjg fread_nvlist(char *filename, nvlist_t **ret_nvlist) 37383c4dfe9Sjg { 37483c4dfe9Sjg struct _buf *file; 37583c4dfe9Sjg nvpf_hdr_t hdr; 37683c4dfe9Sjg char *buf; 37783c4dfe9Sjg nvlist_t *nvl; 37883c4dfe9Sjg int rval; 37983c4dfe9Sjg uint_t offset; 38083c4dfe9Sjg int n; 38183c4dfe9Sjg char c; 38283c4dfe9Sjg uint16_t cksum, hdrsum; 38383c4dfe9Sjg 38483c4dfe9Sjg *ret_nvlist = NULL; 38583c4dfe9Sjg 38683c4dfe9Sjg file = kobj_open_file(filename); 38783c4dfe9Sjg if (file == (struct _buf *)-1) { 38883c4dfe9Sjg KFDEBUG((CE_CONT, "cannot open file: %s\n", filename)); 38983c4dfe9Sjg return (ENOENT); 39083c4dfe9Sjg } 39183c4dfe9Sjg 39283c4dfe9Sjg offset = 0; 39383c4dfe9Sjg n = kobj_read_file(file, (char *)&hdr, sizeof (hdr), offset); 39483c4dfe9Sjg if (n != sizeof (hdr)) { 39583c4dfe9Sjg kobj_close_file(file); 39683c4dfe9Sjg if (n < 0) { 39783c4dfe9Sjg nvf_error("error reading header: %s\n", filename); 39883c4dfe9Sjg return (EIO); 39983c4dfe9Sjg } else if (n == 0) { 40083c4dfe9Sjg KFDEBUG((CE_CONT, "file empty: %s\n", filename)); 40183c4dfe9Sjg } else { 40283c4dfe9Sjg nvf_error("header size incorrect: %s\n", filename); 40383c4dfe9Sjg } 40483c4dfe9Sjg return (EINVAL); 40583c4dfe9Sjg } 40683c4dfe9Sjg offset += n; 40783c4dfe9Sjg 40883c4dfe9Sjg KFDEBUG2((CE_CONT, "nvpf_magic: 0x%x\n", hdr.nvpf_magic)); 40983c4dfe9Sjg KFDEBUG2((CE_CONT, "nvpf_version: %d\n", hdr.nvpf_version)); 41083c4dfe9Sjg KFDEBUG2((CE_CONT, "nvpf_size: %lld\n", 41183c4dfe9Sjg (longlong_t)hdr.nvpf_size)); 41283c4dfe9Sjg KFDEBUG2((CE_CONT, "nvpf_hdr_chksum: 0x%x\n", 41383c4dfe9Sjg hdr.nvpf_hdr_chksum)); 41483c4dfe9Sjg KFDEBUG2((CE_CONT, "nvpf_chksum: 0x%x\n", hdr.nvpf_chksum)); 41583c4dfe9Sjg 41683c4dfe9Sjg cksum = hdr.nvpf_hdr_chksum; 41783c4dfe9Sjg hdr.nvpf_hdr_chksum = 0; 41883c4dfe9Sjg hdrsum = nvp_cksum((uchar_t *)&hdr, sizeof (hdr)); 41983c4dfe9Sjg 42083c4dfe9Sjg if (hdr.nvpf_magic != NVPF_HDR_MAGIC || 42183c4dfe9Sjg hdr.nvpf_version != NVPF_HDR_VERSION || hdrsum != cksum) { 42283c4dfe9Sjg kobj_close_file(file); 42383c4dfe9Sjg if (hdrsum != cksum) { 42483c4dfe9Sjg nvf_error("%s: checksum error " 42583c4dfe9Sjg "(actual 0x%x, expected 0x%x)\n", 42683c4dfe9Sjg filename, hdrsum, cksum); 42783c4dfe9Sjg } 42883c4dfe9Sjg nvf_error("%s: header information incorrect", filename); 42983c4dfe9Sjg return (EINVAL); 43083c4dfe9Sjg } 43183c4dfe9Sjg 43283c4dfe9Sjg ASSERT(hdr.nvpf_size >= 0); 43383c4dfe9Sjg 43483c4dfe9Sjg buf = kmem_alloc(hdr.nvpf_size, KM_SLEEP); 43583c4dfe9Sjg n = kobj_read_file(file, buf, hdr.nvpf_size, offset); 43683c4dfe9Sjg if (n != hdr.nvpf_size) { 43783c4dfe9Sjg kmem_free(buf, hdr.nvpf_size); 43883c4dfe9Sjg kobj_close_file(file); 43983c4dfe9Sjg if (n < 0) { 44083c4dfe9Sjg nvf_error("%s: read error %d", filename, n); 44183c4dfe9Sjg } else { 44283c4dfe9Sjg nvf_error("%s: incomplete read %d/%lld", 44383c4dfe9Sjg filename, n, (longlong_t)hdr.nvpf_size); 44483c4dfe9Sjg } 44583c4dfe9Sjg return (EINVAL); 44683c4dfe9Sjg } 44783c4dfe9Sjg offset += n; 44883c4dfe9Sjg 44983c4dfe9Sjg rval = kobj_read_file(file, &c, 1, offset); 45083c4dfe9Sjg kobj_close_file(file); 45183c4dfe9Sjg if (rval > 0) { 45283c4dfe9Sjg nvf_error("%s is larger than %lld\n", 45383c4dfe9Sjg filename, (longlong_t)hdr.nvpf_size); 45483c4dfe9Sjg kmem_free(buf, hdr.nvpf_size); 45583c4dfe9Sjg return (EINVAL); 45683c4dfe9Sjg } 45783c4dfe9Sjg 45883c4dfe9Sjg cksum = nvp_cksum((uchar_t *)buf, hdr.nvpf_size); 45983c4dfe9Sjg if (hdr.nvpf_chksum != cksum) { 46083c4dfe9Sjg nvf_error("%s: checksum error (actual 0x%x, expected 0x%x)\n", 46183c4dfe9Sjg filename, hdr.nvpf_chksum, cksum); 46283c4dfe9Sjg kmem_free(buf, hdr.nvpf_size); 46383c4dfe9Sjg return (EINVAL); 46483c4dfe9Sjg } 46583c4dfe9Sjg 46683c4dfe9Sjg nvl = NULL; 46783c4dfe9Sjg rval = nvlist_unpack(buf, hdr.nvpf_size, &nvl, 0); 46883c4dfe9Sjg if (rval != 0) { 46983c4dfe9Sjg nvf_error("%s: error %d unpacking nvlist\n", 47083c4dfe9Sjg filename, rval); 47183c4dfe9Sjg kmem_free(buf, hdr.nvpf_size); 47283c4dfe9Sjg return (EINVAL); 47383c4dfe9Sjg } 47483c4dfe9Sjg 47583c4dfe9Sjg kmem_free(buf, hdr.nvpf_size); 47683c4dfe9Sjg *ret_nvlist = nvl; 47783c4dfe9Sjg return (0); 47883c4dfe9Sjg } 47983c4dfe9Sjg 48083c4dfe9Sjg static int 48183c4dfe9Sjg kfcreate(char *filename, kfile_t **kfilep) 48283c4dfe9Sjg { 48383c4dfe9Sjg kfile_t *fp; 48483c4dfe9Sjg int rval; 48583c4dfe9Sjg 48683c4dfe9Sjg ASSERT(modrootloaded); 48783c4dfe9Sjg 48883c4dfe9Sjg fp = kmem_alloc(sizeof (kfile_t), KM_SLEEP); 48983c4dfe9Sjg 49083c4dfe9Sjg fp->kf_vnflags = FCREAT | FWRITE | FTRUNC; 49183c4dfe9Sjg fp->kf_fname = filename; 49283c4dfe9Sjg fp->kf_fpos = 0; 49383c4dfe9Sjg fp->kf_state = 0; 49483c4dfe9Sjg 49583c4dfe9Sjg KFDEBUG((CE_CONT, "create: %s flags 0x%x\n", 49683c4dfe9Sjg filename, fp->kf_vnflags)); 49783c4dfe9Sjg rval = vn_open(filename, UIO_SYSSPACE, fp->kf_vnflags, 49883c4dfe9Sjg 0444, &fp->kf_vp, CRCREAT, 0); 49983c4dfe9Sjg if (rval != 0) { 50083c4dfe9Sjg kmem_free(fp, sizeof (kfile_t)); 50183c4dfe9Sjg KFDEBUG((CE_CONT, "%s: create error %d\n", 50283c4dfe9Sjg filename, rval)); 50383c4dfe9Sjg return (rval); 50483c4dfe9Sjg } 50583c4dfe9Sjg 50683c4dfe9Sjg *kfilep = fp; 50783c4dfe9Sjg return (0); 50883c4dfe9Sjg } 50983c4dfe9Sjg 51083c4dfe9Sjg static int 51183c4dfe9Sjg kfremove(char *filename) 51283c4dfe9Sjg { 51383c4dfe9Sjg int rval; 51483c4dfe9Sjg 51583c4dfe9Sjg KFDEBUG((CE_CONT, "remove: %s\n", filename)); 51683c4dfe9Sjg rval = vn_remove(filename, UIO_SYSSPACE, RMFILE); 51783c4dfe9Sjg if (rval != 0) { 51883c4dfe9Sjg KFDEBUG((CE_CONT, "%s: remove error %d\n", 51983c4dfe9Sjg filename, rval)); 52083c4dfe9Sjg } 52183c4dfe9Sjg return (rval); 52283c4dfe9Sjg } 52383c4dfe9Sjg 52483c4dfe9Sjg static int 52583c4dfe9Sjg kfread(kfile_t *fp, char *buf, ssize_t bufsiz, ssize_t *ret_n) 52683c4dfe9Sjg { 52783c4dfe9Sjg ssize_t resid; 52883c4dfe9Sjg int err; 52983c4dfe9Sjg ssize_t n; 53083c4dfe9Sjg 53183c4dfe9Sjg ASSERT(modrootloaded); 53283c4dfe9Sjg 53383c4dfe9Sjg if (fp->kf_state != 0) 53483c4dfe9Sjg return (fp->kf_state); 53583c4dfe9Sjg 53683c4dfe9Sjg err = vn_rdwr(UIO_READ, fp->kf_vp, buf, bufsiz, fp->kf_fpos, 53783c4dfe9Sjg UIO_SYSSPACE, 0, (rlim64_t)0, kcred, &resid); 53883c4dfe9Sjg if (err != 0) { 53983c4dfe9Sjg KFDEBUG((CE_CONT, "%s: read error %d\n", 54083c4dfe9Sjg fp->kf_fname, err)); 54183c4dfe9Sjg fp->kf_state = err; 54283c4dfe9Sjg return (err); 54383c4dfe9Sjg } 54483c4dfe9Sjg 54583c4dfe9Sjg ASSERT(resid >= 0 && resid <= bufsiz); 54683c4dfe9Sjg n = bufsiz - resid; 54783c4dfe9Sjg 54883c4dfe9Sjg KFDEBUG1((CE_CONT, "%s: read %ld bytes ok %ld bufsiz, %ld resid\n", 54983c4dfe9Sjg fp->kf_fname, n, bufsiz, resid)); 55083c4dfe9Sjg 55183c4dfe9Sjg fp->kf_fpos += n; 55283c4dfe9Sjg *ret_n = n; 55383c4dfe9Sjg return (0); 55483c4dfe9Sjg } 55583c4dfe9Sjg 55683c4dfe9Sjg static int 55783c4dfe9Sjg kfwrite(kfile_t *fp, char *buf, ssize_t bufsiz, ssize_t *ret_n) 55883c4dfe9Sjg { 55983c4dfe9Sjg rlim64_t rlimit; 56083c4dfe9Sjg ssize_t resid; 56183c4dfe9Sjg int err; 56283c4dfe9Sjg ssize_t len; 56383c4dfe9Sjg ssize_t n = 0; 56483c4dfe9Sjg 56583c4dfe9Sjg ASSERT(modrootloaded); 56683c4dfe9Sjg 56783c4dfe9Sjg if (fp->kf_state != 0) 56883c4dfe9Sjg return (fp->kf_state); 56983c4dfe9Sjg 57083c4dfe9Sjg len = bufsiz; 57183c4dfe9Sjg rlimit = bufsiz + 1; 57283c4dfe9Sjg for (;;) { 57383c4dfe9Sjg err = vn_rdwr(UIO_WRITE, fp->kf_vp, buf, len, fp->kf_fpos, 57483c4dfe9Sjg UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid); 57583c4dfe9Sjg if (err) { 57683c4dfe9Sjg KFDEBUG((CE_CONT, "%s: write error %d\n", 57783c4dfe9Sjg fp->kf_fname, err)); 57883c4dfe9Sjg fp->kf_state = err; 57983c4dfe9Sjg return (err); 58083c4dfe9Sjg } 58183c4dfe9Sjg 58283c4dfe9Sjg KFDEBUG1((CE_CONT, "%s: write %ld bytes ok %ld resid\n", 58383c4dfe9Sjg fp->kf_fname, len-resid, resid)); 58483c4dfe9Sjg 58583c4dfe9Sjg ASSERT(resid >= 0 && resid <= len); 58683c4dfe9Sjg 58783c4dfe9Sjg n += (len - resid); 58883c4dfe9Sjg if (resid == 0) 58983c4dfe9Sjg break; 59083c4dfe9Sjg 59183c4dfe9Sjg if (resid == len) { 59283c4dfe9Sjg KFDEBUG((CE_CONT, "%s: filesystem full?\n", 59383c4dfe9Sjg fp->kf_fname)); 59483c4dfe9Sjg fp->kf_state = ENOSPC; 59583c4dfe9Sjg return (ENOSPC); 59683c4dfe9Sjg } 59783c4dfe9Sjg 59883c4dfe9Sjg len -= resid; 59983c4dfe9Sjg buf += len; 60083c4dfe9Sjg fp->kf_fpos += len; 60183c4dfe9Sjg len = resid; 60283c4dfe9Sjg } 60383c4dfe9Sjg 60483c4dfe9Sjg ASSERT(n == bufsiz); 60583c4dfe9Sjg KFDEBUG1((CE_CONT, "%s: wrote %ld bytes ok\n", fp->kf_fname, n)); 60683c4dfe9Sjg 60783c4dfe9Sjg *ret_n = n; 60883c4dfe9Sjg return (0); 60983c4dfe9Sjg } 61083c4dfe9Sjg 61183c4dfe9Sjg 61283c4dfe9Sjg static int 61383c4dfe9Sjg kfclose(kfile_t *fp) 61483c4dfe9Sjg { 61583c4dfe9Sjg int rval; 61683c4dfe9Sjg 61783c4dfe9Sjg KFDEBUG((CE_CONT, "close: %s\n", fp->kf_fname)); 61883c4dfe9Sjg 61983c4dfe9Sjg if ((fp->kf_vnflags & FWRITE) && fp->kf_state == 0) { 620da6c28aaSamw rval = VOP_FSYNC(fp->kf_vp, FSYNC, kcred, NULL); 62183c4dfe9Sjg if (rval != 0) { 62283c4dfe9Sjg nvf_error("%s: sync error %d\n", 62383c4dfe9Sjg fp->kf_fname, rval); 62483c4dfe9Sjg } 62583c4dfe9Sjg KFDEBUG((CE_CONT, "%s: sync ok\n", fp->kf_fname)); 62683c4dfe9Sjg } 62783c4dfe9Sjg 628c3b4ae18SJerry Gilliam rval = VOP_CLOSE(fp->kf_vp, fp->kf_vnflags, 1, 629c3b4ae18SJerry Gilliam (offset_t)0, kcred, NULL); 63083c4dfe9Sjg if (rval != 0) { 63183c4dfe9Sjg if (fp->kf_state == 0) { 63283c4dfe9Sjg nvf_error("%s: close error %d\n", 63383c4dfe9Sjg fp->kf_fname, rval); 63483c4dfe9Sjg } 63583c4dfe9Sjg } else { 63683c4dfe9Sjg if (fp->kf_state == 0) 63783c4dfe9Sjg KFDEBUG((CE_CONT, "%s: close ok\n", fp->kf_fname)); 63883c4dfe9Sjg } 63983c4dfe9Sjg 64083c4dfe9Sjg VN_RELE(fp->kf_vp); 64183c4dfe9Sjg kmem_free(fp, sizeof (kfile_t)); 64283c4dfe9Sjg return (rval); 64383c4dfe9Sjg } 64483c4dfe9Sjg 64583c4dfe9Sjg static int 64683c4dfe9Sjg kfrename(char *oldname, char *newname) 64783c4dfe9Sjg { 64883c4dfe9Sjg int rval; 64983c4dfe9Sjg 65083c4dfe9Sjg ASSERT(modrootloaded); 65183c4dfe9Sjg 65283c4dfe9Sjg KFDEBUG((CE_CONT, "renaming %s to %s\n", oldname, newname)); 65383c4dfe9Sjg 65483c4dfe9Sjg if ((rval = vn_rename(oldname, newname, UIO_SYSSPACE)) != 0) { 65583c4dfe9Sjg KFDEBUG((CE_CONT, "rename %s to %s: %d\n", 65683c4dfe9Sjg oldname, newname, rval)); 65783c4dfe9Sjg } 65883c4dfe9Sjg 65983c4dfe9Sjg return (rval); 66083c4dfe9Sjg } 66183c4dfe9Sjg 66283c4dfe9Sjg int 66383c4dfe9Sjg fwrite_nvlist(char *filename, nvlist_t *nvl) 66483c4dfe9Sjg { 66583c4dfe9Sjg char *buf; 66683c4dfe9Sjg char *nvbuf; 66783c4dfe9Sjg kfile_t *fp; 66883c4dfe9Sjg char *newname; 66983c4dfe9Sjg int len, err, err1; 67083c4dfe9Sjg size_t buflen; 67183c4dfe9Sjg ssize_t n; 67283c4dfe9Sjg 67383c4dfe9Sjg ASSERT(modrootloaded); 67483c4dfe9Sjg 67583c4dfe9Sjg nvbuf = NULL; 67683c4dfe9Sjg err = nvlist_pack(nvl, &nvbuf, &buflen, NV_ENCODE_NATIVE, 0); 67783c4dfe9Sjg if (err != 0) { 67883c4dfe9Sjg nvf_error("%s: error %d packing nvlist\n", 67983c4dfe9Sjg filename, err); 68083c4dfe9Sjg return (err); 68183c4dfe9Sjg } 68283c4dfe9Sjg 68383c4dfe9Sjg buf = kmem_alloc(sizeof (nvpf_hdr_t) + buflen, KM_SLEEP); 68483c4dfe9Sjg bzero(buf, sizeof (nvpf_hdr_t)); 68583c4dfe9Sjg 68683c4dfe9Sjg ((nvpf_hdr_t *)buf)->nvpf_magic = NVPF_HDR_MAGIC; 68783c4dfe9Sjg ((nvpf_hdr_t *)buf)->nvpf_version = NVPF_HDR_VERSION; 68883c4dfe9Sjg ((nvpf_hdr_t *)buf)->nvpf_size = buflen; 68983c4dfe9Sjg ((nvpf_hdr_t *)buf)->nvpf_chksum = nvp_cksum((uchar_t *)nvbuf, buflen); 69083c4dfe9Sjg ((nvpf_hdr_t *)buf)->nvpf_hdr_chksum = 69183c4dfe9Sjg nvp_cksum((uchar_t *)buf, sizeof (nvpf_hdr_t)); 69283c4dfe9Sjg 69383c4dfe9Sjg bcopy(nvbuf, buf + sizeof (nvpf_hdr_t), buflen); 69483c4dfe9Sjg kmem_free(nvbuf, buflen); 69583c4dfe9Sjg buflen += sizeof (nvpf_hdr_t); 69683c4dfe9Sjg 69783c4dfe9Sjg len = strlen(filename) + MAX_SUFFIX_LEN + 2; 69883c4dfe9Sjg newname = kmem_alloc(len, KM_SLEEP); 69983c4dfe9Sjg 70083c4dfe9Sjg 701c3b4ae18SJerry Gilliam (void) sprintf(newname, "%s.%s", filename, NEW_FILENAME_SUFFIX); 70283c4dfe9Sjg 70383c4dfe9Sjg /* 70483c4dfe9Sjg * To make it unlikely we suffer data loss, write 70583c4dfe9Sjg * data to the new temporary file. Once successful 70683c4dfe9Sjg * complete the transaction by renaming the new file 70783c4dfe9Sjg * to replace the previous. 70883c4dfe9Sjg */ 70983c4dfe9Sjg 71083c4dfe9Sjg if ((err = kfcreate(newname, &fp)) == 0) { 71183c4dfe9Sjg err = kfwrite(fp, buf, buflen, &n); 71283c4dfe9Sjg if (err) { 71383c4dfe9Sjg nvf_error("%s: write error - %d\n", 71483c4dfe9Sjg newname, err); 71583c4dfe9Sjg } else { 71683c4dfe9Sjg if (n != buflen) { 71783c4dfe9Sjg nvf_error( 71883c4dfe9Sjg "%s: partial write %ld of %ld bytes\n", 71983c4dfe9Sjg newname, n, buflen); 72083c4dfe9Sjg nvf_error("%s: filesystem may be full?\n", 72183c4dfe9Sjg newname); 72283c4dfe9Sjg err = EIO; 72383c4dfe9Sjg } 72483c4dfe9Sjg } 72583c4dfe9Sjg if ((err1 = kfclose(fp)) != 0) { 72683c4dfe9Sjg nvf_error("%s: close error\n", newname); 72783c4dfe9Sjg if (err == 0) 72883c4dfe9Sjg err = err1; 72983c4dfe9Sjg } 73083c4dfe9Sjg if (err != 0) { 73183c4dfe9Sjg if (kfremove(newname) != 0) { 73283c4dfe9Sjg nvf_error("%s: remove failed\n", 73383c4dfe9Sjg newname); 73483c4dfe9Sjg } 73583c4dfe9Sjg } 73683c4dfe9Sjg } else { 73783c4dfe9Sjg nvf_error("%s: create failed - %d\n", filename, err); 73883c4dfe9Sjg } 73983c4dfe9Sjg 74083c4dfe9Sjg if (err == 0) { 74183c4dfe9Sjg if ((err = kfrename(newname, filename)) != 0) { 74283c4dfe9Sjg nvf_error("%s: rename from %s failed\n", 74383c4dfe9Sjg newname, filename); 74483c4dfe9Sjg } 74583c4dfe9Sjg } 74683c4dfe9Sjg 74783c4dfe9Sjg kmem_free(newname, len); 74883c4dfe9Sjg kmem_free(buf, buflen); 74983c4dfe9Sjg 75083c4dfe9Sjg return (err); 75183c4dfe9Sjg } 75283c4dfe9Sjg 75383c4dfe9Sjg static int 75483c4dfe9Sjg e_fwrite_nvlist(nvfd_t *nvfd, nvlist_t *nvl) 75583c4dfe9Sjg { 75683c4dfe9Sjg int err; 75783c4dfe9Sjg 75883c4dfe9Sjg if ((err = fwrite_nvlist(nvfd->nvf_cache_path, nvl)) == 0) 75983c4dfe9Sjg return (DDI_SUCCESS); 76083c4dfe9Sjg else { 76183c4dfe9Sjg if (err == EROFS) 76283c4dfe9Sjg NVF_MARK_READONLY(nvfd); 76383c4dfe9Sjg return (DDI_FAILURE); 76483c4dfe9Sjg } 76583c4dfe9Sjg } 76683c4dfe9Sjg 76783c4dfe9Sjg static void 76883c4dfe9Sjg nvp_list_free(nvfd_t *nvf) 76983c4dfe9Sjg { 77083c4dfe9Sjg ASSERT(RW_WRITE_HELD(&nvf->nvf_lock)); 77183c4dfe9Sjg (nvf->nvf_list_free)((nvf_handle_t)nvf); 77283c4dfe9Sjg ASSERT(RW_WRITE_HELD(&nvf->nvf_lock)); 77383c4dfe9Sjg } 77483c4dfe9Sjg 77583c4dfe9Sjg /* 77683c4dfe9Sjg * Read a file in the nvlist format 77783c4dfe9Sjg * EIO - i/o error during read 77883c4dfe9Sjg * ENOENT - file not found 77983c4dfe9Sjg * EINVAL - file contents corrupted 78083c4dfe9Sjg */ 78183c4dfe9Sjg static int 78283c4dfe9Sjg fread_nvp_list(nvfd_t *nvfd) 78383c4dfe9Sjg { 78483c4dfe9Sjg nvlist_t *nvl; 78583c4dfe9Sjg nvpair_t *nvp; 78683c4dfe9Sjg char *name; 78783c4dfe9Sjg nvlist_t *sublist; 78883c4dfe9Sjg int rval; 78983c4dfe9Sjg int rv; 79083c4dfe9Sjg 79183c4dfe9Sjg ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock))); 79283c4dfe9Sjg 79383c4dfe9Sjg rval = fread_nvlist(nvfd->nvf_cache_path, &nvl); 79483c4dfe9Sjg if (rval != 0) 79583c4dfe9Sjg return (rval); 79683c4dfe9Sjg ASSERT(nvl != NULL); 79783c4dfe9Sjg 79883c4dfe9Sjg nvp = NULL; 79983c4dfe9Sjg while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 80083c4dfe9Sjg name = nvpair_name(nvp); 80183c4dfe9Sjg ASSERT(strlen(name) > 0); 80283c4dfe9Sjg 80383c4dfe9Sjg switch (nvpair_type(nvp)) { 80483c4dfe9Sjg case DATA_TYPE_NVLIST: 80583c4dfe9Sjg rval = nvpair_value_nvlist(nvp, &sublist); 80683c4dfe9Sjg if (rval != 0) { 80783c4dfe9Sjg nvf_error( 80883c4dfe9Sjg "nvpair_value_nvlist error %s %d\n", 80983c4dfe9Sjg name, rval); 81083c4dfe9Sjg goto error; 81183c4dfe9Sjg } 81283c4dfe9Sjg 81383c4dfe9Sjg /* 81483c4dfe9Sjg * unpack nvlist for this device and 81583c4dfe9Sjg * add elements to data list. 81683c4dfe9Sjg */ 81783c4dfe9Sjg ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock))); 81883c4dfe9Sjg rv = (nvfd->nvf_unpack_nvlist) 81983c4dfe9Sjg ((nvf_handle_t)nvfd, sublist, name); 82083c4dfe9Sjg ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock))); 82183c4dfe9Sjg if (rv != 0) { 82283c4dfe9Sjg nvf_error( 82383c4dfe9Sjg "%s: %s invalid list element\n", 82483c4dfe9Sjg nvfd->nvf_cache_path, name); 82583c4dfe9Sjg rval = EINVAL; 82683c4dfe9Sjg goto error; 82783c4dfe9Sjg } 82883c4dfe9Sjg break; 82983c4dfe9Sjg 83083c4dfe9Sjg default: 83183c4dfe9Sjg nvf_error("%s: %s unsupported data type %d\n", 83283c4dfe9Sjg nvfd->nvf_cache_path, name, nvpair_type(nvp)); 83383c4dfe9Sjg rval = EINVAL; 83483c4dfe9Sjg goto error; 83583c4dfe9Sjg } 83683c4dfe9Sjg } 83783c4dfe9Sjg 83883c4dfe9Sjg nvlist_free(nvl); 83983c4dfe9Sjg 84083c4dfe9Sjg return (0); 84183c4dfe9Sjg 84283c4dfe9Sjg error: 84383c4dfe9Sjg nvlist_free(nvl); 84483c4dfe9Sjg nvp_list_free(nvfd); 84583c4dfe9Sjg return (rval); 84683c4dfe9Sjg } 84783c4dfe9Sjg 84883c4dfe9Sjg 84983c4dfe9Sjg int 85083c4dfe9Sjg nvf_read_file(nvf_handle_t nvf_handle) 85183c4dfe9Sjg { 85283c4dfe9Sjg nvfd_t *nvfd = (nvfd_t *)nvf_handle; 85383c4dfe9Sjg int rval; 85483c4dfe9Sjg 85583c4dfe9Sjg ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock)); 85683c4dfe9Sjg 85783c4dfe9Sjg if (kfio_disable_read) 85883c4dfe9Sjg return (0); 85983c4dfe9Sjg 86083c4dfe9Sjg KFDEBUG((CE_CONT, "reading %s\n", nvfd->nvf_cache_path)); 86183c4dfe9Sjg 86283c4dfe9Sjg rval = fread_nvp_list(nvfd); 86383c4dfe9Sjg if (rval) { 86483c4dfe9Sjg switch (rval) { 86583c4dfe9Sjg case EIO: 86683c4dfe9Sjg nvfd->nvf_flags |= NVF_F_REBUILD_MSG; 86783c4dfe9Sjg cmn_err(CE_WARN, "%s: I/O error", 86883c4dfe9Sjg nvfd->nvf_cache_path); 86983c4dfe9Sjg break; 87083c4dfe9Sjg case ENOENT: 87183c4dfe9Sjg nvfd->nvf_flags |= NVF_F_CREATE_MSG; 87283c4dfe9Sjg nvf_error("%s: not found\n", 87383c4dfe9Sjg nvfd->nvf_cache_path); 87483c4dfe9Sjg break; 87583c4dfe9Sjg case EINVAL: 87683c4dfe9Sjg default: 87783c4dfe9Sjg nvfd->nvf_flags |= NVF_F_REBUILD_MSG; 87883c4dfe9Sjg cmn_err(CE_WARN, "%s: data file corrupted", 87983c4dfe9Sjg nvfd->nvf_cache_path); 88083c4dfe9Sjg break; 88183c4dfe9Sjg } 88283c4dfe9Sjg } 88383c4dfe9Sjg return (rval); 88483c4dfe9Sjg } 88583c4dfe9Sjg 88683c4dfe9Sjg static void 88783c4dfe9Sjg nvf_write_is_complete(nvfd_t *fd) 88883c4dfe9Sjg { 88983c4dfe9Sjg if (fd->nvf_write_complete) { 89083c4dfe9Sjg (fd->nvf_write_complete)((nvf_handle_t)fd); 89183c4dfe9Sjg } 89283c4dfe9Sjg } 89383c4dfe9Sjg 89483c4dfe9Sjg /*ARGSUSED*/ 89583c4dfe9Sjg static void 89683c4dfe9Sjg nvpflush_timeout(void *arg) 89783c4dfe9Sjg { 89883c4dfe9Sjg clock_t nticks; 89983c4dfe9Sjg 90083c4dfe9Sjg mutex_enter(&nvpflush_lock); 90183c4dfe9Sjg nticks = nvpticks - ddi_get_lbolt(); 90283c4dfe9Sjg if (nticks > 4) { 90383c4dfe9Sjg nvpflush_timer_busy = 1; 90483c4dfe9Sjg mutex_exit(&nvpflush_lock); 90583c4dfe9Sjg nvpflush_id = timeout(nvpflush_timeout, NULL, nticks); 90683c4dfe9Sjg } else { 90783c4dfe9Sjg do_nvpflush = 1; 90883c4dfe9Sjg NVPDAEMON_DEBUG((CE_CONT, "signal nvpdaemon\n")); 90983c4dfe9Sjg cv_signal(&nvpflush_cv); 91083c4dfe9Sjg nvpflush_id = 0; 91183c4dfe9Sjg nvpflush_timer_busy = 0; 91283c4dfe9Sjg mutex_exit(&nvpflush_lock); 91383c4dfe9Sjg } 91483c4dfe9Sjg } 91583c4dfe9Sjg 91683c4dfe9Sjg /* 91783c4dfe9Sjg * After marking a list as dirty, wake the nvpflush daemon 91883c4dfe9Sjg * to perform the update. 91983c4dfe9Sjg */ 92083c4dfe9Sjg void 92183c4dfe9Sjg nvf_wake_daemon(void) 92283c4dfe9Sjg { 92383c4dfe9Sjg clock_t nticks; 92483c4dfe9Sjg 92583c4dfe9Sjg /* 926c3b4ae18SJerry Gilliam * If the system isn't up yet or is shutting down, 92783c4dfe9Sjg * don't even think about starting a flush. 92883c4dfe9Sjg */ 929c3b4ae18SJerry Gilliam if (!i_ddi_io_initialized() || sys_shutdown) 93083c4dfe9Sjg return; 93183c4dfe9Sjg 93283c4dfe9Sjg mutex_enter(&nvpflush_lock); 93383c4dfe9Sjg 93483c4dfe9Sjg if (nvpflush_daemon_active == 0) { 93583c4dfe9Sjg nvpflush_daemon_active = 1; 93683c4dfe9Sjg mutex_exit(&nvpflush_lock); 93783c4dfe9Sjg NVPDAEMON_DEBUG((CE_CONT, "starting nvpdaemon thread\n")); 93883c4dfe9Sjg nvpflush_thr_id = thread_create(NULL, 0, 93983c4dfe9Sjg (void (*)())nvpflush_daemon, 94083c4dfe9Sjg NULL, 0, &p0, TS_RUN, minclsyspri); 94183c4dfe9Sjg mutex_enter(&nvpflush_lock); 94283c4dfe9Sjg } 94383c4dfe9Sjg 94483c4dfe9Sjg nticks = nvpflush_delay * TICKS_PER_SECOND; 94583c4dfe9Sjg nvpticks = ddi_get_lbolt() + nticks; 94683c4dfe9Sjg if (nvpflush_timer_busy == 0) { 94783c4dfe9Sjg nvpflush_timer_busy = 1; 94883c4dfe9Sjg mutex_exit(&nvpflush_lock); 94983c4dfe9Sjg nvpflush_id = timeout(nvpflush_timeout, NULL, nticks + 4); 95083c4dfe9Sjg } else 95183c4dfe9Sjg mutex_exit(&nvpflush_lock); 95283c4dfe9Sjg } 95383c4dfe9Sjg 95483c4dfe9Sjg static int 95583c4dfe9Sjg nvpflush_one(nvfd_t *nvfd) 95683c4dfe9Sjg { 95783c4dfe9Sjg int rval = DDI_SUCCESS; 95883c4dfe9Sjg nvlist_t *nvl; 95983c4dfe9Sjg 96083c4dfe9Sjg rw_enter(&nvfd->nvf_lock, RW_READER); 96183c4dfe9Sjg 96283c4dfe9Sjg ASSERT((nvfd->nvf_flags & NVF_F_FLUSHING) == 0); 96383c4dfe9Sjg 96483c4dfe9Sjg if (!NVF_IS_DIRTY(nvfd) || 965c3b4ae18SJerry Gilliam NVF_IS_READONLY(nvfd) || kfio_disable_write || sys_shutdown) { 96683c4dfe9Sjg NVF_CLEAR_DIRTY(nvfd); 96783c4dfe9Sjg rw_exit(&nvfd->nvf_lock); 96883c4dfe9Sjg return (DDI_SUCCESS); 96983c4dfe9Sjg } 97083c4dfe9Sjg 97183c4dfe9Sjg if (rw_tryupgrade(&nvfd->nvf_lock) == 0) { 97283c4dfe9Sjg nvf_error("nvpflush: " 97383c4dfe9Sjg "%s rw upgrade failed\n", nvfd->nvf_cache_path); 97483c4dfe9Sjg rw_exit(&nvfd->nvf_lock); 97583c4dfe9Sjg return (DDI_FAILURE); 97683c4dfe9Sjg } 97783c4dfe9Sjg if (((nvfd->nvf_pack_list) 97883c4dfe9Sjg ((nvf_handle_t)nvfd, &nvl)) != DDI_SUCCESS) { 97983c4dfe9Sjg nvf_error("nvpflush: " 98083c4dfe9Sjg "%s nvlist construction failed\n", nvfd->nvf_cache_path); 98183c4dfe9Sjg ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock)); 98283c4dfe9Sjg rw_exit(&nvfd->nvf_lock); 98383c4dfe9Sjg return (DDI_FAILURE); 98483c4dfe9Sjg } 98583c4dfe9Sjg ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock)); 98683c4dfe9Sjg 98783c4dfe9Sjg NVF_CLEAR_DIRTY(nvfd); 98883c4dfe9Sjg nvfd->nvf_flags |= NVF_F_FLUSHING; 98983c4dfe9Sjg rw_exit(&nvfd->nvf_lock); 99083c4dfe9Sjg 99183c4dfe9Sjg rval = e_fwrite_nvlist(nvfd, nvl); 99283c4dfe9Sjg nvlist_free(nvl); 99383c4dfe9Sjg 99483c4dfe9Sjg rw_enter(&nvfd->nvf_lock, RW_WRITER); 99583c4dfe9Sjg nvfd->nvf_flags &= ~NVF_F_FLUSHING; 99683c4dfe9Sjg if (rval == DDI_FAILURE) { 99783c4dfe9Sjg if (NVF_IS_READONLY(nvfd)) { 99883c4dfe9Sjg rval = DDI_SUCCESS; 99983c4dfe9Sjg nvfd->nvf_flags &= ~(NVF_F_ERROR | NVF_F_DIRTY); 100083c4dfe9Sjg } else if ((nvfd->nvf_flags & NVF_F_ERROR) == 0) { 100183c4dfe9Sjg cmn_err(CE_CONT, 1002c3b4ae18SJerry Gilliam "%s: update failed\n", nvfd->nvf_cache_path); 100383c4dfe9Sjg nvfd->nvf_flags |= NVF_F_ERROR | NVF_F_DIRTY; 100483c4dfe9Sjg } 100583c4dfe9Sjg } else { 100683c4dfe9Sjg if (nvfd->nvf_flags & NVF_F_CREATE_MSG) { 100783c4dfe9Sjg cmn_err(CE_CONT, 100883c4dfe9Sjg "!Creating %s\n", nvfd->nvf_cache_path); 100983c4dfe9Sjg nvfd->nvf_flags &= ~NVF_F_CREATE_MSG; 101083c4dfe9Sjg } 101183c4dfe9Sjg if (nvfd->nvf_flags & NVF_F_REBUILD_MSG) { 101283c4dfe9Sjg cmn_err(CE_CONT, 101383c4dfe9Sjg "!Rebuilding %s\n", nvfd->nvf_cache_path); 101483c4dfe9Sjg nvfd->nvf_flags &= ~NVF_F_REBUILD_MSG; 101583c4dfe9Sjg } 101683c4dfe9Sjg if (nvfd->nvf_flags & NVF_F_ERROR) { 101783c4dfe9Sjg cmn_err(CE_CONT, 101883c4dfe9Sjg "%s: update now ok\n", nvfd->nvf_cache_path); 101983c4dfe9Sjg nvfd->nvf_flags &= ~NVF_F_ERROR; 102083c4dfe9Sjg } 102183c4dfe9Sjg /* 102283c4dfe9Sjg * The file may need to be flushed again if the cached 102383c4dfe9Sjg * data was touched while writing the earlier contents. 102483c4dfe9Sjg */ 102583c4dfe9Sjg if (NVF_IS_DIRTY(nvfd)) 102683c4dfe9Sjg rval = DDI_FAILURE; 102783c4dfe9Sjg } 102883c4dfe9Sjg 102983c4dfe9Sjg rw_exit(&nvfd->nvf_lock); 103083c4dfe9Sjg return (rval); 103183c4dfe9Sjg } 103283c4dfe9Sjg 103383c4dfe9Sjg 103483c4dfe9Sjg static void 103583c4dfe9Sjg nvpflush_daemon(void) 103683c4dfe9Sjg { 103783c4dfe9Sjg callb_cpr_t cprinfo; 103883c4dfe9Sjg nvfd_t *nvfdp, *nextfdp; 103983c4dfe9Sjg clock_t clk; 104083c4dfe9Sjg int rval; 104183c4dfe9Sjg int want_wakeup; 104283c4dfe9Sjg int is_now_clean; 104383c4dfe9Sjg 104483c4dfe9Sjg ASSERT(modrootloaded); 104583c4dfe9Sjg 104683c4dfe9Sjg nvpflush_thread = curthread; 104783c4dfe9Sjg NVPDAEMON_DEBUG((CE_CONT, "nvpdaemon: init\n")); 104883c4dfe9Sjg 104983c4dfe9Sjg CALLB_CPR_INIT(&cprinfo, &nvpflush_lock, callb_generic_cpr, "nvp"); 105083c4dfe9Sjg mutex_enter(&nvpflush_lock); 105183c4dfe9Sjg for (;;) { 105283c4dfe9Sjg CALLB_CPR_SAFE_BEGIN(&cprinfo); 105383c4dfe9Sjg while (do_nvpflush == 0) { 1054*d3d50737SRafael Vanoni clk = cv_reltimedwait(&nvpflush_cv, &nvpflush_lock, 1055*d3d50737SRafael Vanoni (nvpdaemon_idle_time * TICKS_PER_SECOND), 1056*d3d50737SRafael Vanoni TR_CLOCK_TICK); 1057c3b4ae18SJerry Gilliam if ((clk == -1 && do_nvpflush == 0 && 1058c3b4ae18SJerry Gilliam nvpflush_timer_busy == 0) || sys_shutdown) { 105983c4dfe9Sjg /* 106083c4dfe9Sjg * Note that CALLB_CPR_EXIT calls mutex_exit() 106183c4dfe9Sjg * on the lock passed in to CALLB_CPR_INIT, 106283c4dfe9Sjg * so the lock must be held when invoking it. 106383c4dfe9Sjg */ 106483c4dfe9Sjg CALLB_CPR_SAFE_END(&cprinfo, &nvpflush_lock); 106583c4dfe9Sjg NVPDAEMON_DEBUG((CE_CONT, "nvpdaemon: exit\n")); 106683c4dfe9Sjg ASSERT(mutex_owned(&nvpflush_lock)); 106783c4dfe9Sjg nvpflush_thr_id = NULL; 106883c4dfe9Sjg nvpflush_daemon_active = 0; 106983c4dfe9Sjg CALLB_CPR_EXIT(&cprinfo); 107083c4dfe9Sjg thread_exit(); 107183c4dfe9Sjg } 107283c4dfe9Sjg } 107383c4dfe9Sjg CALLB_CPR_SAFE_END(&cprinfo, &nvpflush_lock); 107483c4dfe9Sjg 107583c4dfe9Sjg nvpbusy = 1; 107683c4dfe9Sjg want_wakeup = 0; 107783c4dfe9Sjg do_nvpflush = 0; 107883c4dfe9Sjg mutex_exit(&nvpflush_lock); 107983c4dfe9Sjg 108083c4dfe9Sjg /* 108183c4dfe9Sjg * Try flushing what's dirty, reschedule if there's 108283c4dfe9Sjg * a failure or data gets marked as dirty again. 108383c4dfe9Sjg * First move each file marked dirty to the dirty 108483c4dfe9Sjg * list to avoid locking the list across the write. 108583c4dfe9Sjg */ 108683c4dfe9Sjg mutex_enter(&nvf_cache_mutex); 108783c4dfe9Sjg for (nvfdp = list_head(&nvf_cache_files); 108883c4dfe9Sjg nvfdp; nvfdp = nextfdp) { 108983c4dfe9Sjg nextfdp = list_next(&nvf_cache_files, nvfdp); 109083c4dfe9Sjg rw_enter(&nvfdp->nvf_lock, RW_READER); 109183c4dfe9Sjg if (NVF_IS_DIRTY(nvfdp)) { 109283c4dfe9Sjg list_remove(&nvf_cache_files, nvfdp); 109383c4dfe9Sjg list_insert_tail(&nvf_dirty_files, nvfdp); 109483c4dfe9Sjg rw_exit(&nvfdp->nvf_lock); 109583c4dfe9Sjg } else { 109683c4dfe9Sjg NVPDAEMON_DEBUG((CE_CONT, 109783c4dfe9Sjg "nvpdaemon: not dirty %s\n", 109883c4dfe9Sjg nvfdp->nvf_cache_path)); 109983c4dfe9Sjg rw_exit(&nvfdp->nvf_lock); 110083c4dfe9Sjg } 110183c4dfe9Sjg } 110283c4dfe9Sjg mutex_exit(&nvf_cache_mutex); 110383c4dfe9Sjg 110483c4dfe9Sjg /* 110583c4dfe9Sjg * Now go through the dirty list 110683c4dfe9Sjg */ 110783c4dfe9Sjg for (nvfdp = list_head(&nvf_dirty_files); 110883c4dfe9Sjg nvfdp; nvfdp = nextfdp) { 110983c4dfe9Sjg nextfdp = list_next(&nvf_dirty_files, nvfdp); 111083c4dfe9Sjg 111183c4dfe9Sjg is_now_clean = 0; 111283c4dfe9Sjg rw_enter(&nvfdp->nvf_lock, RW_READER); 111383c4dfe9Sjg if (NVF_IS_DIRTY(nvfdp)) { 111483c4dfe9Sjg NVPDAEMON_DEBUG((CE_CONT, 111583c4dfe9Sjg "nvpdaemon: flush %s\n", 111683c4dfe9Sjg nvfdp->nvf_cache_path)); 111783c4dfe9Sjg rw_exit(&nvfdp->nvf_lock); 111883c4dfe9Sjg rval = nvpflush_one(nvfdp); 111983c4dfe9Sjg rw_enter(&nvfdp->nvf_lock, RW_READER); 112083c4dfe9Sjg if (rval != DDI_SUCCESS || 112183c4dfe9Sjg NVF_IS_DIRTY(nvfdp)) { 112283c4dfe9Sjg rw_exit(&nvfdp->nvf_lock); 112383c4dfe9Sjg NVPDAEMON_DEBUG((CE_CONT, 112483c4dfe9Sjg "nvpdaemon: %s dirty again\n", 112583c4dfe9Sjg nvfdp->nvf_cache_path)); 112683c4dfe9Sjg want_wakeup = 1; 112783c4dfe9Sjg } else { 112883c4dfe9Sjg rw_exit(&nvfdp->nvf_lock); 112983c4dfe9Sjg nvf_write_is_complete(nvfdp); 113083c4dfe9Sjg is_now_clean = 1; 113183c4dfe9Sjg } 113283c4dfe9Sjg } else { 113383c4dfe9Sjg NVPDAEMON_DEBUG((CE_CONT, 113483c4dfe9Sjg "nvpdaemon: not dirty %s\n", 113583c4dfe9Sjg nvfdp->nvf_cache_path)); 113683c4dfe9Sjg rw_exit(&nvfdp->nvf_lock); 113783c4dfe9Sjg is_now_clean = 1; 113883c4dfe9Sjg } 113983c4dfe9Sjg 114083c4dfe9Sjg if (is_now_clean) { 114183c4dfe9Sjg mutex_enter(&nvf_cache_mutex); 114283c4dfe9Sjg list_remove(&nvf_dirty_files, nvfdp); 114383c4dfe9Sjg list_insert_tail(&nvf_cache_files, 114483c4dfe9Sjg nvfdp); 114583c4dfe9Sjg mutex_exit(&nvf_cache_mutex); 114683c4dfe9Sjg } 114783c4dfe9Sjg } 114883c4dfe9Sjg 114983c4dfe9Sjg if (want_wakeup) 115083c4dfe9Sjg nvf_wake_daemon(); 115183c4dfe9Sjg 115283c4dfe9Sjg mutex_enter(&nvpflush_lock); 115383c4dfe9Sjg nvpbusy = 0; 115483c4dfe9Sjg } 115583c4dfe9Sjg } 1156