17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5ddfcde86Srsb * Common Development and Distribution License (the "License"). 6ddfcde86Srsb * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*19397407SSherry Moore * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate #include <sys/debug.h> 287c478bd9Sstevel@tonic-gate #include <sys/types.h> 297c478bd9Sstevel@tonic-gate #include <sys/file.h> 307c478bd9Sstevel@tonic-gate #include <sys/errno.h> 317c478bd9Sstevel@tonic-gate #include <sys/uio.h> 327c478bd9Sstevel@tonic-gate #include <sys/open.h> 337c478bd9Sstevel@tonic-gate #include <sys/cred.h> 347c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 357c478bd9Sstevel@tonic-gate #include <sys/conf.h> 367c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 377c478bd9Sstevel@tonic-gate #include <sys/modctl.h> 387c478bd9Sstevel@tonic-gate #include <sys/disp.h> 397c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 407c478bd9Sstevel@tonic-gate #include <sys/filio.h> 417c478bd9Sstevel@tonic-gate #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */ 427c478bd9Sstevel@tonic-gate #include <sys/kstat.h> 437c478bd9Sstevel@tonic-gate 447c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 457c478bd9Sstevel@tonic-gate #include <sys/devops.h> 467c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 47184cd04cScth #include <sys/esunddi.h> 487c478bd9Sstevel@tonic-gate #include <sys/priv_names.h> 497c478bd9Sstevel@tonic-gate 507c478bd9Sstevel@tonic-gate #include <sys/fssnap.h> 517c478bd9Sstevel@tonic-gate #include <sys/fssnap_if.h> 527c478bd9Sstevel@tonic-gate 537c478bd9Sstevel@tonic-gate /* 547c478bd9Sstevel@tonic-gate * This module implements the file system snapshot code, which provides a 557c478bd9Sstevel@tonic-gate * point-in-time image of a file system for the purposes of online backup. 567c478bd9Sstevel@tonic-gate * There are essentially two parts to this project: the driver half and the 577c478bd9Sstevel@tonic-gate * file system half. The driver half is a pseudo device driver called 587c478bd9Sstevel@tonic-gate * "fssnap" that represents the snapshot. Each snapshot is assigned a 597c478bd9Sstevel@tonic-gate * number that corresponds to the minor number of the device, and a control 607c478bd9Sstevel@tonic-gate * device with a high minor number is used to initiate snapshot creation and 617c478bd9Sstevel@tonic-gate * deletion. For all practical purposes the driver half acts like a 627c478bd9Sstevel@tonic-gate * read-only disk device whose contents are exactly the same as the master 637c478bd9Sstevel@tonic-gate * file system at the time the snapshot was created. 647c478bd9Sstevel@tonic-gate * 657c478bd9Sstevel@tonic-gate * The file system half provides interfaces necessary for performing the 667c478bd9Sstevel@tonic-gate * file system dependent operations required to create and delete snapshots 677c478bd9Sstevel@tonic-gate * and a special driver strategy routine that must always be used by the file 687c478bd9Sstevel@tonic-gate * system for snapshots to work correctly. 697c478bd9Sstevel@tonic-gate * 707c478bd9Sstevel@tonic-gate * When a snapshot is to be created, the user utility will send an ioctl to 717c478bd9Sstevel@tonic-gate * the control device of the driver half specifying the file system to be 727c478bd9Sstevel@tonic-gate * snapshotted, the file descriptor of a backing-store file which is used to 737c478bd9Sstevel@tonic-gate * hold old data before it is overwritten, and other snapshot parameters. 747c478bd9Sstevel@tonic-gate * This ioctl is passed on to the file system specified in the original 757c478bd9Sstevel@tonic-gate * ioctl request. The file system is expected to be able to flush 767c478bd9Sstevel@tonic-gate * everything out to make the file system consistent and lock it to ensure 777c478bd9Sstevel@tonic-gate * no changes occur while the snapshot is being created. It then calls 787c478bd9Sstevel@tonic-gate * fssnap_create() to create state for a new snapshot, from which an opaque 797c478bd9Sstevel@tonic-gate * handle is returned with the snapshot locked. Next, the file system must 807c478bd9Sstevel@tonic-gate * populate the "candidate bitmap", which tells the snapshot code which 817c478bd9Sstevel@tonic-gate * "chunks" should be considered for copy-on-write (a chunk is the unit of 827c478bd9Sstevel@tonic-gate * granularity used for copy-on-write, which is independent of the device 837c478bd9Sstevel@tonic-gate * and file system block sizes). This is typically done by scanning the 847c478bd9Sstevel@tonic-gate * file system allocation bitmaps to determine which chunks contain 857c478bd9Sstevel@tonic-gate * allocated blocks in the file system at the time the snapshot was created. 867c478bd9Sstevel@tonic-gate * If a chunk has no allocated blocks, it does not need to be copied before 877c478bd9Sstevel@tonic-gate * being written to. Once the candidate bitmap is populated with 887c478bd9Sstevel@tonic-gate * fssnap_set_candidate(), the file system calls fssnap_create_done() to 897c478bd9Sstevel@tonic-gate * complete the snapshot creation and unlock the snapshot. The file system 907c478bd9Sstevel@tonic-gate * may now be unlocked and modifications to it resumed. 917c478bd9Sstevel@tonic-gate * 927c478bd9Sstevel@tonic-gate * Once a snapshot is created, the file system must perform all writes 937c478bd9Sstevel@tonic-gate * through a special strategy routine, fssnap_strategy(). This strategy 947c478bd9Sstevel@tonic-gate * routine determines whether the chunks contained by the write must be 957c478bd9Sstevel@tonic-gate * copied before being overwritten by consulting the candidate bitmap 967c478bd9Sstevel@tonic-gate * described above, and the "hastrans bitmap" which tells it whether the chunk 977c478bd9Sstevel@tonic-gate * has been copied already or not. If the chunk is a candidate but has not 987c478bd9Sstevel@tonic-gate * been copied, it reads the old data in and adds it to a queue. The 997c478bd9Sstevel@tonic-gate * old data can then be overwritten with the new data. An asynchronous 1007c478bd9Sstevel@tonic-gate * task queue is dispatched for each old chunk read in which writes the old 1017c478bd9Sstevel@tonic-gate * data to the backing file specified at snapshot creation time. The 1027c478bd9Sstevel@tonic-gate * backing file is a sparse file the same size as the file system that 1037c478bd9Sstevel@tonic-gate * contains the old data at the offset that data originally had in the 1047c478bd9Sstevel@tonic-gate * file system. If the queue containing in-memory chunks gets too large, 1057c478bd9Sstevel@tonic-gate * writes to the file system may be throttled by a semaphore until the 1067c478bd9Sstevel@tonic-gate * task queues have a chance to push some of the chunks to the backing file. 1077c478bd9Sstevel@tonic-gate * 1087c478bd9Sstevel@tonic-gate * With the candidate bitmap, the hastrans bitmap, the data on the master 1097c478bd9Sstevel@tonic-gate * file system, and the old data in memory and in the backing file, the 1107c478bd9Sstevel@tonic-gate * snapshot pseudo-driver can piece together the original file system 1117c478bd9Sstevel@tonic-gate * information to satisfy read requests. If the requested chunk is not a 1127c478bd9Sstevel@tonic-gate * candidate, it returns a zeroed buffer. If the chunk is a candidate but 1137c478bd9Sstevel@tonic-gate * has not been copied it reads it from the master file system. If it is a 1147c478bd9Sstevel@tonic-gate * candidate and has been copied, it either copies the data from the 1157c478bd9Sstevel@tonic-gate * in-memory queue or it reads it in from the backing file. The result is 1167c478bd9Sstevel@tonic-gate * a replication of the original file system that can be backed up, mounted, 1177c478bd9Sstevel@tonic-gate * or manipulated by other file system utilities that work on a read-only 1187c478bd9Sstevel@tonic-gate * device. 1197c478bd9Sstevel@tonic-gate * 1207c478bd9Sstevel@tonic-gate * This module is divided into three roughly logical sections: 1217c478bd9Sstevel@tonic-gate * 1227c478bd9Sstevel@tonic-gate * - The snapshot driver, which is a character/block driver 1237c478bd9Sstevel@tonic-gate * representing the snapshot itself. These routines are 1247c478bd9Sstevel@tonic-gate * prefixed with "snap_". 1257c478bd9Sstevel@tonic-gate * 1267c478bd9Sstevel@tonic-gate * - The library routines that are defined in fssnap_if.h that 1277c478bd9Sstevel@tonic-gate * are used by file systems that use this snapshot implementation. 1287c478bd9Sstevel@tonic-gate * These functions are prefixed with "fssnap_" and are called through 1297c478bd9Sstevel@tonic-gate * a function vector from the file system. 1307c478bd9Sstevel@tonic-gate * 1317c478bd9Sstevel@tonic-gate * - The helper routines used by the snapshot driver and the fssnap 1327c478bd9Sstevel@tonic-gate * library routines for managing the translation table and other 1337c478bd9Sstevel@tonic-gate * useful functions. These routines are all static and are 1347c478bd9Sstevel@tonic-gate * prefixed with either "fssnap_" or "transtbl_" if they 1357c478bd9Sstevel@tonic-gate * are specifically used for translation table activities. 1367c478bd9Sstevel@tonic-gate */ 1377c478bd9Sstevel@tonic-gate 1387c478bd9Sstevel@tonic-gate static dev_info_t *fssnap_dip = NULL; 1397c478bd9Sstevel@tonic-gate static struct snapshot_id *snapshot = NULL; 1407c478bd9Sstevel@tonic-gate static struct snapshot_id snap_ctl; 1417c478bd9Sstevel@tonic-gate static int num_snapshots = 0; 1427c478bd9Sstevel@tonic-gate static kmutex_t snapshot_mutex; 1437c478bd9Sstevel@tonic-gate static char snapname[] = SNAP_NAME; 1447c478bd9Sstevel@tonic-gate 1457c478bd9Sstevel@tonic-gate /* "tunable" parameters */ 1467c478bd9Sstevel@tonic-gate static int fssnap_taskq_nthreads = FSSNAP_TASKQ_THREADS; 1477c478bd9Sstevel@tonic-gate static uint_t fssnap_max_mem_chunks = FSSNAP_MAX_MEM_CHUNKS; 1487c478bd9Sstevel@tonic-gate static int fssnap_taskq_maxtasks = FSSNAP_TASKQ_MAXTASKS; 1497c478bd9Sstevel@tonic-gate 1507c478bd9Sstevel@tonic-gate /* static function prototypes */ 1517c478bd9Sstevel@tonic-gate 1527c478bd9Sstevel@tonic-gate /* snapshot driver */ 1537c478bd9Sstevel@tonic-gate static int snap_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 1547c478bd9Sstevel@tonic-gate static int snap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 1557c478bd9Sstevel@tonic-gate static int snap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 1567c478bd9Sstevel@tonic-gate static int snap_open(dev_t *devp, int flag, int otyp, cred_t *cred); 1577c478bd9Sstevel@tonic-gate static int snap_close(dev_t dev, int flag, int otyp, cred_t *cred); 1587c478bd9Sstevel@tonic-gate static int snap_strategy(struct buf *bp); 1597c478bd9Sstevel@tonic-gate static int snap_read(dev_t dev, struct uio *uiop, cred_t *credp); 1607c478bd9Sstevel@tonic-gate static int snap_print(dev_t dev, char *str); 1617c478bd9Sstevel@tonic-gate static int snap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 1627c478bd9Sstevel@tonic-gate cred_t *credp, int *rvalp); 1637c478bd9Sstevel@tonic-gate static int snap_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, 1647c478bd9Sstevel@tonic-gate int flags, char *name, caddr_t valuep, int *lengthp); 1657c478bd9Sstevel@tonic-gate static int snap_getchunk(struct snapshot_id *sidp, chunknumber_t chunk, 1667c478bd9Sstevel@tonic-gate int offset, int len, char *buffer); 1677c478bd9Sstevel@tonic-gate 1687c478bd9Sstevel@tonic-gate 1697c478bd9Sstevel@tonic-gate /* fssnap interface implementations (see fssnap_if.h) */ 1707c478bd9Sstevel@tonic-gate static void fssnap_strategy_impl(void *, struct buf *); 1717c478bd9Sstevel@tonic-gate static void *fssnap_create_impl(chunknumber_t, uint_t, u_offset_t, 1727c478bd9Sstevel@tonic-gate struct vnode *, int, struct vnode **, char *, u_offset_t); 1737c478bd9Sstevel@tonic-gate static void fssnap_set_candidate_impl(void *, chunknumber_t); 1747c478bd9Sstevel@tonic-gate static int fssnap_is_candidate_impl(void *, u_offset_t); 1757c478bd9Sstevel@tonic-gate static int fssnap_create_done_impl(void *); 1767c478bd9Sstevel@tonic-gate static int fssnap_delete_impl(void *); 1777c478bd9Sstevel@tonic-gate 1787c478bd9Sstevel@tonic-gate /* fssnap interface support routines */ 1797c478bd9Sstevel@tonic-gate static int fssnap_translate(struct snapshot_id **, struct buf *); 1807c478bd9Sstevel@tonic-gate static void fssnap_write_taskq(void *); 1817c478bd9Sstevel@tonic-gate static void fssnap_create_kstats(snapshot_id_t *, int, const char *, 1827c478bd9Sstevel@tonic-gate const char *); 1837c478bd9Sstevel@tonic-gate static int fssnap_update_kstat_num(kstat_t *, int); 1847c478bd9Sstevel@tonic-gate static void fssnap_delete_kstats(struct cow_info *); 1857c478bd9Sstevel@tonic-gate 1867c478bd9Sstevel@tonic-gate /* translation table prototypes */ 1877c478bd9Sstevel@tonic-gate static cow_map_node_t *transtbl_add(cow_map_t *, chunknumber_t, caddr_t); 1887c478bd9Sstevel@tonic-gate static cow_map_node_t *transtbl_get(cow_map_t *, chunknumber_t); 1897c478bd9Sstevel@tonic-gate static void transtbl_delete(cow_map_t *, cow_map_node_t *); 1907c478bd9Sstevel@tonic-gate static void transtbl_free(cow_map_t *); 1917c478bd9Sstevel@tonic-gate 1927c478bd9Sstevel@tonic-gate static kstat_t *fssnap_highwater_kstat; 1937c478bd9Sstevel@tonic-gate 1947c478bd9Sstevel@tonic-gate /* ************************************************************************ */ 1957c478bd9Sstevel@tonic-gate 1967c478bd9Sstevel@tonic-gate /* Device and Module Structures */ 1977c478bd9Sstevel@tonic-gate 1987c478bd9Sstevel@tonic-gate static struct cb_ops snap_cb_ops = { 1997c478bd9Sstevel@tonic-gate snap_open, 2007c478bd9Sstevel@tonic-gate snap_close, 2017c478bd9Sstevel@tonic-gate snap_strategy, 2027c478bd9Sstevel@tonic-gate snap_print, 2037c478bd9Sstevel@tonic-gate nodev, /* no snap_dump */ 2047c478bd9Sstevel@tonic-gate snap_read, 2057c478bd9Sstevel@tonic-gate nodev, /* no snap_write */ 2067c478bd9Sstevel@tonic-gate snap_ioctl, 2077c478bd9Sstevel@tonic-gate nodev, /* no snap_devmap */ 2087c478bd9Sstevel@tonic-gate nodev, /* no snap_mmap */ 2097c478bd9Sstevel@tonic-gate nodev, /* no snap_segmap */ 2107c478bd9Sstevel@tonic-gate nochpoll, 2117c478bd9Sstevel@tonic-gate snap_prop_op, 2127c478bd9Sstevel@tonic-gate NULL, /* streamtab */ 2137c478bd9Sstevel@tonic-gate D_64BIT | D_NEW | D_MP, /* driver compatibility */ 2147c478bd9Sstevel@tonic-gate CB_REV, 2157c478bd9Sstevel@tonic-gate nodev, /* async I/O read entry point */ 2167c478bd9Sstevel@tonic-gate nodev /* async I/O write entry point */ 2177c478bd9Sstevel@tonic-gate }; 2187c478bd9Sstevel@tonic-gate 2197c478bd9Sstevel@tonic-gate static struct dev_ops snap_ops = { 2207c478bd9Sstevel@tonic-gate DEVO_REV, 2217c478bd9Sstevel@tonic-gate 0, /* ref count */ 2227c478bd9Sstevel@tonic-gate snap_getinfo, 2237c478bd9Sstevel@tonic-gate nulldev, /* snap_identify obsolete */ 2247c478bd9Sstevel@tonic-gate nulldev, /* no snap_probe */ 2257c478bd9Sstevel@tonic-gate snap_attach, 2267c478bd9Sstevel@tonic-gate snap_detach, 2277c478bd9Sstevel@tonic-gate nodev, /* no snap_reset */ 2287c478bd9Sstevel@tonic-gate &snap_cb_ops, 2297c478bd9Sstevel@tonic-gate (struct bus_ops *)NULL, 230*19397407SSherry Moore nulldev, /* no snap_power() */ 231*19397407SSherry Moore ddi_quiesce_not_needed, /* quiesce */ 2327c478bd9Sstevel@tonic-gate }; 2337c478bd9Sstevel@tonic-gate 2347c478bd9Sstevel@tonic-gate extern struct mod_ops mod_driverops; 2357c478bd9Sstevel@tonic-gate 2367c478bd9Sstevel@tonic-gate static struct modldrv md = { 2377c478bd9Sstevel@tonic-gate &mod_driverops, /* Type of module. This is a driver */ 238*19397407SSherry Moore "snapshot driver", /* Name of the module */ 2397c478bd9Sstevel@tonic-gate &snap_ops, 2407c478bd9Sstevel@tonic-gate }; 2417c478bd9Sstevel@tonic-gate 2427c478bd9Sstevel@tonic-gate static struct modlinkage ml = { 2437c478bd9Sstevel@tonic-gate MODREV_1, 2447c478bd9Sstevel@tonic-gate &md, 2457c478bd9Sstevel@tonic-gate NULL 2467c478bd9Sstevel@tonic-gate }; 2477c478bd9Sstevel@tonic-gate 2487c478bd9Sstevel@tonic-gate static void *statep; 2497c478bd9Sstevel@tonic-gate 2507c478bd9Sstevel@tonic-gate int 2517c478bd9Sstevel@tonic-gate _init(void) 2527c478bd9Sstevel@tonic-gate { 2537c478bd9Sstevel@tonic-gate int error; 2547c478bd9Sstevel@tonic-gate kstat_t *ksp; 2557c478bd9Sstevel@tonic-gate kstat_named_t *ksdata; 2567c478bd9Sstevel@tonic-gate 2577c478bd9Sstevel@tonic-gate error = ddi_soft_state_init(&statep, sizeof (struct snapshot_id *), 1); 2587c478bd9Sstevel@tonic-gate if (error) { 2597c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "_init: failed to init ddi_soft_state."); 2607c478bd9Sstevel@tonic-gate return (error); 2617c478bd9Sstevel@tonic-gate } 2627c478bd9Sstevel@tonic-gate 2637c478bd9Sstevel@tonic-gate error = mod_install(&ml); 2647c478bd9Sstevel@tonic-gate 2657c478bd9Sstevel@tonic-gate if (error) { 2667c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "_init: failed to mod_install."); 2677c478bd9Sstevel@tonic-gate ddi_soft_state_fini(&statep); 2687c478bd9Sstevel@tonic-gate return (error); 2697c478bd9Sstevel@tonic-gate } 2707c478bd9Sstevel@tonic-gate 2717c478bd9Sstevel@tonic-gate /* 2727c478bd9Sstevel@tonic-gate * Fill in the snapshot operations vector for file systems 2737c478bd9Sstevel@tonic-gate * (defined in fssnap_if.c) 2747c478bd9Sstevel@tonic-gate */ 2757c478bd9Sstevel@tonic-gate 2767c478bd9Sstevel@tonic-gate snapops.fssnap_create = fssnap_create_impl; 2777c478bd9Sstevel@tonic-gate snapops.fssnap_set_candidate = fssnap_set_candidate_impl; 2787c478bd9Sstevel@tonic-gate snapops.fssnap_is_candidate = fssnap_is_candidate_impl; 2797c478bd9Sstevel@tonic-gate snapops.fssnap_create_done = fssnap_create_done_impl; 2807c478bd9Sstevel@tonic-gate snapops.fssnap_delete = fssnap_delete_impl; 2817c478bd9Sstevel@tonic-gate snapops.fssnap_strategy = fssnap_strategy_impl; 2827c478bd9Sstevel@tonic-gate 2837c478bd9Sstevel@tonic-gate mutex_init(&snapshot_mutex, NULL, MUTEX_DEFAULT, NULL); 2847c478bd9Sstevel@tonic-gate 2857c478bd9Sstevel@tonic-gate /* 2867c478bd9Sstevel@tonic-gate * Initialize the fssnap highwater kstat 2877c478bd9Sstevel@tonic-gate */ 2887c478bd9Sstevel@tonic-gate ksp = kstat_create(snapname, 0, FSSNAP_KSTAT_HIGHWATER, "misc", 2897c478bd9Sstevel@tonic-gate KSTAT_TYPE_NAMED, 1, 0); 2907c478bd9Sstevel@tonic-gate if (ksp != NULL) { 2917c478bd9Sstevel@tonic-gate ksdata = (kstat_named_t *)ksp->ks_data; 2927c478bd9Sstevel@tonic-gate kstat_named_init(ksdata, FSSNAP_KSTAT_HIGHWATER, 2937c478bd9Sstevel@tonic-gate KSTAT_DATA_UINT32); 2947c478bd9Sstevel@tonic-gate ksdata->value.ui32 = 0; 2957c478bd9Sstevel@tonic-gate kstat_install(ksp); 2967c478bd9Sstevel@tonic-gate } else { 2977c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "_init: failed to create highwater kstat."); 2987c478bd9Sstevel@tonic-gate } 2997c478bd9Sstevel@tonic-gate fssnap_highwater_kstat = ksp; 3007c478bd9Sstevel@tonic-gate 3017c478bd9Sstevel@tonic-gate return (0); 3027c478bd9Sstevel@tonic-gate } 3037c478bd9Sstevel@tonic-gate 3047c478bd9Sstevel@tonic-gate int 3057c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop) 3067c478bd9Sstevel@tonic-gate { 3077c478bd9Sstevel@tonic-gate return (mod_info(&ml, modinfop)); 3087c478bd9Sstevel@tonic-gate } 3097c478bd9Sstevel@tonic-gate 3107c478bd9Sstevel@tonic-gate int 3117c478bd9Sstevel@tonic-gate _fini(void) 3127c478bd9Sstevel@tonic-gate { 3137c478bd9Sstevel@tonic-gate int error; 3147c478bd9Sstevel@tonic-gate 3157c478bd9Sstevel@tonic-gate error = mod_remove(&ml); 3167c478bd9Sstevel@tonic-gate if (error) 3177c478bd9Sstevel@tonic-gate return (error); 3187c478bd9Sstevel@tonic-gate ddi_soft_state_fini(&statep); 3197c478bd9Sstevel@tonic-gate 3207c478bd9Sstevel@tonic-gate /* 3217c478bd9Sstevel@tonic-gate * delete the fssnap highwater kstat 3227c478bd9Sstevel@tonic-gate */ 3237c478bd9Sstevel@tonic-gate kstat_delete(fssnap_highwater_kstat); 3247c478bd9Sstevel@tonic-gate 3257c478bd9Sstevel@tonic-gate mutex_destroy(&snapshot_mutex); 3267c478bd9Sstevel@tonic-gate 3277c478bd9Sstevel@tonic-gate /* Clear out the file system operations vector */ 3287c478bd9Sstevel@tonic-gate snapops.fssnap_create = NULL; 3297c478bd9Sstevel@tonic-gate snapops.fssnap_set_candidate = NULL; 3307c478bd9Sstevel@tonic-gate snapops.fssnap_create_done = NULL; 3317c478bd9Sstevel@tonic-gate snapops.fssnap_delete = NULL; 3327c478bd9Sstevel@tonic-gate snapops.fssnap_strategy = NULL; 3337c478bd9Sstevel@tonic-gate 3347c478bd9Sstevel@tonic-gate return (0); 3357c478bd9Sstevel@tonic-gate } 3367c478bd9Sstevel@tonic-gate 3377c478bd9Sstevel@tonic-gate /* ************************************************************************ */ 3387c478bd9Sstevel@tonic-gate 3397c478bd9Sstevel@tonic-gate /* 3407c478bd9Sstevel@tonic-gate * Snapshot Driver Routines 3417c478bd9Sstevel@tonic-gate * 3427c478bd9Sstevel@tonic-gate * This section implements the snapshot character and block drivers. The 3437c478bd9Sstevel@tonic-gate * device will appear to be a consistent read-only file system to 3447c478bd9Sstevel@tonic-gate * applications that wish to back it up or mount it. The snapshot driver 3457c478bd9Sstevel@tonic-gate * communicates with the file system through the translation table, which 3467c478bd9Sstevel@tonic-gate * tells the snapshot driver where to find the data necessary to piece 3477c478bd9Sstevel@tonic-gate * together the frozen file system. The data may either be on the master 3487c478bd9Sstevel@tonic-gate * device (no translation exists), in memory (a translation exists but has 3497c478bd9Sstevel@tonic-gate * not been flushed to the backing store), or in the backing store file. 350da6c28aaSamw * The read request may require the snapshot driver to retrieve data from 3517c478bd9Sstevel@tonic-gate * several different places and piece it together to look like a single 3527c478bd9Sstevel@tonic-gate * contiguous read. 3537c478bd9Sstevel@tonic-gate * 3547c478bd9Sstevel@tonic-gate * The device minor number corresponds to the snapshot number in the list of 3557c478bd9Sstevel@tonic-gate * snapshot identifiers. The soft state for each minor number is simply a 3567c478bd9Sstevel@tonic-gate * pointer to the snapshot id, which holds all of the snapshot state. One 3577c478bd9Sstevel@tonic-gate * minor number is designated as the control device. All snapshot create 3587c478bd9Sstevel@tonic-gate * and delete requests go through the control device to ensure this module 3597c478bd9Sstevel@tonic-gate * is properly loaded and attached before the file system starts calling 3607c478bd9Sstevel@tonic-gate * routines defined here. 3617c478bd9Sstevel@tonic-gate */ 3627c478bd9Sstevel@tonic-gate 3637c478bd9Sstevel@tonic-gate 3647c478bd9Sstevel@tonic-gate /* 3657c478bd9Sstevel@tonic-gate * snap_getinfo() - snapshot driver getinfo(9E) routine 3667c478bd9Sstevel@tonic-gate * 3677c478bd9Sstevel@tonic-gate */ 3687c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 3697c478bd9Sstevel@tonic-gate static int 3707c478bd9Sstevel@tonic-gate snap_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 3717c478bd9Sstevel@tonic-gate { 3727c478bd9Sstevel@tonic-gate switch (infocmd) { 3737c478bd9Sstevel@tonic-gate case DDI_INFO_DEVT2DEVINFO: 3747c478bd9Sstevel@tonic-gate *result = fssnap_dip; 3757c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 3767c478bd9Sstevel@tonic-gate case DDI_INFO_DEVT2INSTANCE: 3777c478bd9Sstevel@tonic-gate *result = 0; /* we only have one instance */ 3787c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 3797c478bd9Sstevel@tonic-gate } 3807c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 3817c478bd9Sstevel@tonic-gate } 3827c478bd9Sstevel@tonic-gate 3837c478bd9Sstevel@tonic-gate /* 3847c478bd9Sstevel@tonic-gate * snap_attach() - snapshot driver attach(9E) routine 3857c478bd9Sstevel@tonic-gate * 3867c478bd9Sstevel@tonic-gate * sets up snapshot control device and control state. The control state 3877c478bd9Sstevel@tonic-gate * is a pointer to an "anonymous" snapshot_id for tracking opens and closes 3887c478bd9Sstevel@tonic-gate */ 3897c478bd9Sstevel@tonic-gate static int 3907c478bd9Sstevel@tonic-gate snap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3917c478bd9Sstevel@tonic-gate { 3927c478bd9Sstevel@tonic-gate int error; 3937c478bd9Sstevel@tonic-gate 3947c478bd9Sstevel@tonic-gate switch (cmd) { 3957c478bd9Sstevel@tonic-gate case DDI_ATTACH: 3967c478bd9Sstevel@tonic-gate /* create the control device */ 3977c478bd9Sstevel@tonic-gate error = ddi_create_priv_minor_node(dip, SNAP_CTL_NODE, S_IFCHR, 3987c478bd9Sstevel@tonic-gate SNAP_CTL_MINOR, DDI_PSEUDO, PRIVONLY_DEV, 3997c478bd9Sstevel@tonic-gate PRIV_SYS_CONFIG, PRIV_SYS_CONFIG, 0666); 4007c478bd9Sstevel@tonic-gate if (error == DDI_FAILURE) { 4017c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 4027c478bd9Sstevel@tonic-gate } 4037c478bd9Sstevel@tonic-gate 4047c478bd9Sstevel@tonic-gate rw_init(&snap_ctl.sid_rwlock, NULL, RW_DEFAULT, NULL); 4057c478bd9Sstevel@tonic-gate rw_enter(&snap_ctl.sid_rwlock, RW_WRITER); 4067c478bd9Sstevel@tonic-gate fssnap_dip = dip; 4077c478bd9Sstevel@tonic-gate snap_ctl.sid_snapnumber = SNAP_CTL_MINOR; 4087c478bd9Sstevel@tonic-gate /* the control sid is not linked into the snapshot list */ 4097c478bd9Sstevel@tonic-gate snap_ctl.sid_next = NULL; 4107c478bd9Sstevel@tonic-gate snap_ctl.sid_cowinfo = NULL; 4117c478bd9Sstevel@tonic-gate snap_ctl.sid_flags = 0; 4127c478bd9Sstevel@tonic-gate rw_exit(&snap_ctl.sid_rwlock); 4137c478bd9Sstevel@tonic-gate ddi_report_dev(dip); 4147c478bd9Sstevel@tonic-gate 4157c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 4167c478bd9Sstevel@tonic-gate case DDI_PM_RESUME: 4177c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 4187c478bd9Sstevel@tonic-gate 4197c478bd9Sstevel@tonic-gate case DDI_RESUME: 4207c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 4217c478bd9Sstevel@tonic-gate 4227c478bd9Sstevel@tonic-gate default: 4237c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 4247c478bd9Sstevel@tonic-gate } 4257c478bd9Sstevel@tonic-gate } 4267c478bd9Sstevel@tonic-gate 4277c478bd9Sstevel@tonic-gate /* 4287c478bd9Sstevel@tonic-gate * snap_detach() - snapshot driver detach(9E) routine 4297c478bd9Sstevel@tonic-gate * 4307c478bd9Sstevel@tonic-gate * destroys snapshot control device and control state. If any snapshots 4317c478bd9Sstevel@tonic-gate * are active (ie. num_snapshots != 0), the device will refuse to detach. 4327c478bd9Sstevel@tonic-gate */ 4337c478bd9Sstevel@tonic-gate static int 4347c478bd9Sstevel@tonic-gate snap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4357c478bd9Sstevel@tonic-gate { 4367c478bd9Sstevel@tonic-gate struct snapshot_id *sidp, *sidnextp; 4377c478bd9Sstevel@tonic-gate 4387c478bd9Sstevel@tonic-gate switch (cmd) { 4397c478bd9Sstevel@tonic-gate case DDI_DETACH: 4407c478bd9Sstevel@tonic-gate /* do not detach if the device is active */ 4417c478bd9Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 4427c478bd9Sstevel@tonic-gate if ((num_snapshots != 0) || 4437c478bd9Sstevel@tonic-gate ((snap_ctl.sid_flags & SID_CHAR_BUSY) != 0)) { 4447c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 4457c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 4467c478bd9Sstevel@tonic-gate } 4477c478bd9Sstevel@tonic-gate 4487c478bd9Sstevel@tonic-gate /* free up the snapshot list */ 4497c478bd9Sstevel@tonic-gate for (sidp = snapshot; sidp != NULL; sidp = sidnextp) { 4507c478bd9Sstevel@tonic-gate ASSERT(SID_AVAILABLE(sidp) && 4517c478bd9Sstevel@tonic-gate !RW_LOCK_HELD(&sidp->sid_rwlock)); 4527c478bd9Sstevel@tonic-gate sidnextp = sidp->sid_next; 4537c478bd9Sstevel@tonic-gate rw_destroy(&sidp->sid_rwlock); 4547c478bd9Sstevel@tonic-gate kmem_free(sidp, sizeof (struct snapshot_id)); 4557c478bd9Sstevel@tonic-gate } 4567c478bd9Sstevel@tonic-gate snapshot = NULL; 4577c478bd9Sstevel@tonic-gate 4587c478bd9Sstevel@tonic-gate /* delete the control device */ 4597c478bd9Sstevel@tonic-gate ddi_remove_minor_node(dip, SNAP_CTL_NODE); 4607c478bd9Sstevel@tonic-gate fssnap_dip = NULL; 4617c478bd9Sstevel@tonic-gate 4627c478bd9Sstevel@tonic-gate ASSERT((snap_ctl.sid_flags & SID_CHAR_BUSY) == 0); 4637c478bd9Sstevel@tonic-gate rw_destroy(&snap_ctl.sid_rwlock); 4647c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 4657c478bd9Sstevel@tonic-gate 4667c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 4677c478bd9Sstevel@tonic-gate 4687c478bd9Sstevel@tonic-gate default: 4697c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 4707c478bd9Sstevel@tonic-gate } 4717c478bd9Sstevel@tonic-gate } 4727c478bd9Sstevel@tonic-gate 4737c478bd9Sstevel@tonic-gate /* 4747c478bd9Sstevel@tonic-gate * snap_open() - snapshot driver open(9E) routine 4757c478bd9Sstevel@tonic-gate * 4767c478bd9Sstevel@tonic-gate * marks the snapshot id as busy so it will not be recycled when deleted 4777c478bd9Sstevel@tonic-gate * until the snapshot is closed. 4787c478bd9Sstevel@tonic-gate */ 4797c478bd9Sstevel@tonic-gate /* ARGSUSED */ 4807c478bd9Sstevel@tonic-gate static int 4817c478bd9Sstevel@tonic-gate snap_open(dev_t *devp, int flag, int otyp, cred_t *cred) 4827c478bd9Sstevel@tonic-gate { 4837c478bd9Sstevel@tonic-gate minor_t minor; 4847c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp, *sidp; 4857c478bd9Sstevel@tonic-gate 4867c478bd9Sstevel@tonic-gate /* snapshots are read-only */ 4877c478bd9Sstevel@tonic-gate if (flag & FWRITE) 4887c478bd9Sstevel@tonic-gate return (EROFS); 4897c478bd9Sstevel@tonic-gate 4907c478bd9Sstevel@tonic-gate minor = getminor(*devp); 4917c478bd9Sstevel@tonic-gate 4927c478bd9Sstevel@tonic-gate if (minor == SNAP_CTL_MINOR) { 4937c478bd9Sstevel@tonic-gate /* control device must be opened exclusively */ 4947c478bd9Sstevel@tonic-gate if (((flag & FEXCL) != FEXCL) || (otyp != OTYP_CHR)) 4957c478bd9Sstevel@tonic-gate return (EINVAL); 4967c478bd9Sstevel@tonic-gate 4977c478bd9Sstevel@tonic-gate rw_enter(&snap_ctl.sid_rwlock, RW_WRITER); 4987c478bd9Sstevel@tonic-gate if ((snap_ctl.sid_flags & SID_CHAR_BUSY) != 0) { 4997c478bd9Sstevel@tonic-gate rw_exit(&snap_ctl.sid_rwlock); 5007c478bd9Sstevel@tonic-gate return (EBUSY); 5017c478bd9Sstevel@tonic-gate } 5027c478bd9Sstevel@tonic-gate 5037c478bd9Sstevel@tonic-gate snap_ctl.sid_flags |= SID_CHAR_BUSY; 5047c478bd9Sstevel@tonic-gate rw_exit(&snap_ctl.sid_rwlock); 5057c478bd9Sstevel@tonic-gate 5067c478bd9Sstevel@tonic-gate return (0); 5077c478bd9Sstevel@tonic-gate } 5087c478bd9Sstevel@tonic-gate 5097c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 5107c478bd9Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) 5117c478bd9Sstevel@tonic-gate return (ENXIO); 5127c478bd9Sstevel@tonic-gate sidp = *sidpp; 5137c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 5147c478bd9Sstevel@tonic-gate 5157c478bd9Sstevel@tonic-gate if ((flag & FEXCL) && SID_BUSY(sidp)) { 5167c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 5177c478bd9Sstevel@tonic-gate return (EAGAIN); 5187c478bd9Sstevel@tonic-gate } 5197c478bd9Sstevel@tonic-gate 5207c478bd9Sstevel@tonic-gate ASSERT(sidpp != NULL && sidp != NULL); 5217c478bd9Sstevel@tonic-gate /* check to see if this snapshot has been killed on us */ 5227c478bd9Sstevel@tonic-gate if (SID_INACTIVE(sidp)) { 5237c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_open: snapshot %d does not exist.", 5247c478bd9Sstevel@tonic-gate minor); 5257c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 5267c478bd9Sstevel@tonic-gate return (ENXIO); 5277c478bd9Sstevel@tonic-gate } 5287c478bd9Sstevel@tonic-gate 5297c478bd9Sstevel@tonic-gate switch (otyp) { 5307c478bd9Sstevel@tonic-gate case OTYP_CHR: 5317c478bd9Sstevel@tonic-gate sidp->sid_flags |= SID_CHAR_BUSY; 5327c478bd9Sstevel@tonic-gate break; 5337c478bd9Sstevel@tonic-gate case OTYP_BLK: 5347c478bd9Sstevel@tonic-gate sidp->sid_flags |= SID_BLOCK_BUSY; 5357c478bd9Sstevel@tonic-gate break; 5367c478bd9Sstevel@tonic-gate default: 5377c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 5387c478bd9Sstevel@tonic-gate return (EINVAL); 5397c478bd9Sstevel@tonic-gate } 5407c478bd9Sstevel@tonic-gate 5417c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 5427c478bd9Sstevel@tonic-gate 5437c478bd9Sstevel@tonic-gate /* 5447c478bd9Sstevel@tonic-gate * at this point if a valid snapshot was found then it has 5457c478bd9Sstevel@tonic-gate * been marked busy and we can use it. 5467c478bd9Sstevel@tonic-gate */ 5477c478bd9Sstevel@tonic-gate return (0); 5487c478bd9Sstevel@tonic-gate } 5497c478bd9Sstevel@tonic-gate 5507c478bd9Sstevel@tonic-gate /* 5517c478bd9Sstevel@tonic-gate * snap_close() - snapshot driver close(9E) routine 5527c478bd9Sstevel@tonic-gate * 5537c478bd9Sstevel@tonic-gate * unsets the busy bits in the snapshot id. If the snapshot has been 5547c478bd9Sstevel@tonic-gate * deleted while the snapshot device was open, the close call will clean 5557c478bd9Sstevel@tonic-gate * up the remaining state information. 5567c478bd9Sstevel@tonic-gate */ 5577c478bd9Sstevel@tonic-gate /* ARGSUSED */ 5587c478bd9Sstevel@tonic-gate static int 5597c478bd9Sstevel@tonic-gate snap_close(dev_t dev, int flag, int otyp, cred_t *cred) 5607c478bd9Sstevel@tonic-gate { 5617c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp, *sidp; 5627c478bd9Sstevel@tonic-gate minor_t minor; 5637c478bd9Sstevel@tonic-gate char name[20]; 5647c478bd9Sstevel@tonic-gate 5657c478bd9Sstevel@tonic-gate minor = getminor(dev); 5667c478bd9Sstevel@tonic-gate 5677c478bd9Sstevel@tonic-gate /* if this is the control device, close it and return */ 5687c478bd9Sstevel@tonic-gate if (minor == SNAP_CTL_MINOR) { 5697c478bd9Sstevel@tonic-gate rw_enter(&snap_ctl.sid_rwlock, RW_WRITER); 5707c478bd9Sstevel@tonic-gate snap_ctl.sid_flags &= ~(SID_CHAR_BUSY); 5717c478bd9Sstevel@tonic-gate rw_exit(&snap_ctl.sid_rwlock); 5727c478bd9Sstevel@tonic-gate return (0); 5737c478bd9Sstevel@tonic-gate } 5747c478bd9Sstevel@tonic-gate 5757c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 5767c478bd9Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 5777c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_close: could not find state for " 5787c478bd9Sstevel@tonic-gate "snapshot %d.", minor); 5797c478bd9Sstevel@tonic-gate return (ENXIO); 5807c478bd9Sstevel@tonic-gate } 5817c478bd9Sstevel@tonic-gate sidp = *sidpp; 5827c478bd9Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 5837c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 5847c478bd9Sstevel@tonic-gate 5857c478bd9Sstevel@tonic-gate /* Mark the snapshot as not being busy anymore */ 5867c478bd9Sstevel@tonic-gate switch (otyp) { 5877c478bd9Sstevel@tonic-gate case OTYP_CHR: 5887c478bd9Sstevel@tonic-gate sidp->sid_flags &= ~(SID_CHAR_BUSY); 5897c478bd9Sstevel@tonic-gate break; 5907c478bd9Sstevel@tonic-gate case OTYP_BLK: 5917c478bd9Sstevel@tonic-gate sidp->sid_flags &= ~(SID_BLOCK_BUSY); 5927c478bd9Sstevel@tonic-gate break; 5937c478bd9Sstevel@tonic-gate default: 5947c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 5957c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 5967c478bd9Sstevel@tonic-gate return (EINVAL); 5977c478bd9Sstevel@tonic-gate } 5987c478bd9Sstevel@tonic-gate 5997c478bd9Sstevel@tonic-gate if (SID_AVAILABLE(sidp)) { 6007c478bd9Sstevel@tonic-gate /* 6017c478bd9Sstevel@tonic-gate * if this is the last close on a snapshot that has been 6027c478bd9Sstevel@tonic-gate * deleted, then free up the soft state. The snapdelete 6037c478bd9Sstevel@tonic-gate * ioctl does not free this when the device is in use so 6047c478bd9Sstevel@tonic-gate * we do it here after the last reference goes away. 6057c478bd9Sstevel@tonic-gate */ 6067c478bd9Sstevel@tonic-gate 6077c478bd9Sstevel@tonic-gate /* remove the device nodes */ 6087c478bd9Sstevel@tonic-gate ASSERT(fssnap_dip != NULL); 6097c478bd9Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d", 6107c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 6117c478bd9Sstevel@tonic-gate ddi_remove_minor_node(fssnap_dip, name); 6127c478bd9Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d,raw", 6137c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 6147c478bd9Sstevel@tonic-gate ddi_remove_minor_node(fssnap_dip, name); 6157c478bd9Sstevel@tonic-gate 6167c478bd9Sstevel@tonic-gate /* delete the state structure */ 6177c478bd9Sstevel@tonic-gate ddi_soft_state_free(statep, sidp->sid_snapnumber); 6187c478bd9Sstevel@tonic-gate num_snapshots--; 6197c478bd9Sstevel@tonic-gate } 6207c478bd9Sstevel@tonic-gate 6217c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 6227c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 6237c478bd9Sstevel@tonic-gate 6247c478bd9Sstevel@tonic-gate return (0); 6257c478bd9Sstevel@tonic-gate } 6267c478bd9Sstevel@tonic-gate 6277c478bd9Sstevel@tonic-gate /* 6287c478bd9Sstevel@tonic-gate * snap_read() - snapshot driver read(9E) routine 6297c478bd9Sstevel@tonic-gate * 6307c478bd9Sstevel@tonic-gate * reads data from the snapshot by calling snap_strategy() through physio() 6317c478bd9Sstevel@tonic-gate */ 6327c478bd9Sstevel@tonic-gate /* ARGSUSED */ 6337c478bd9Sstevel@tonic-gate static int 6347c478bd9Sstevel@tonic-gate snap_read(dev_t dev, struct uio *uiop, cred_t *credp) 6357c478bd9Sstevel@tonic-gate { 6367c478bd9Sstevel@tonic-gate minor_t minor; 6377c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp; 6387c478bd9Sstevel@tonic-gate 6397c478bd9Sstevel@tonic-gate minor = getminor(dev); 6407c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 6417c478bd9Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 6427c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 6437c478bd9Sstevel@tonic-gate "snap_read: could not find state for snapshot %d.", minor); 6447c478bd9Sstevel@tonic-gate return (ENXIO); 6457c478bd9Sstevel@tonic-gate } 6467c478bd9Sstevel@tonic-gate return (physio(snap_strategy, NULL, dev, B_READ, minphys, uiop)); 6477c478bd9Sstevel@tonic-gate } 6487c478bd9Sstevel@tonic-gate 6497c478bd9Sstevel@tonic-gate /* 6507c478bd9Sstevel@tonic-gate * snap_strategy() - snapshot driver strategy(9E) routine 6517c478bd9Sstevel@tonic-gate * 6527c478bd9Sstevel@tonic-gate * cycles through each chunk in the requested buffer and calls 6537c478bd9Sstevel@tonic-gate * snap_getchunk() on each chunk to retrieve it from the appropriate 6547c478bd9Sstevel@tonic-gate * place. Once all of the parts are put together the requested buffer 6557c478bd9Sstevel@tonic-gate * is returned. The snapshot driver is read-only, so a write is invalid. 6567c478bd9Sstevel@tonic-gate */ 6577c478bd9Sstevel@tonic-gate static int 6587c478bd9Sstevel@tonic-gate snap_strategy(struct buf *bp) 6597c478bd9Sstevel@tonic-gate { 6607c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp, *sidp; 6617c478bd9Sstevel@tonic-gate minor_t minor; 6627c478bd9Sstevel@tonic-gate chunknumber_t chunk; 6637c478bd9Sstevel@tonic-gate int off, len; 6647c478bd9Sstevel@tonic-gate u_longlong_t reqptr; 6657c478bd9Sstevel@tonic-gate int error = 0; 6667c478bd9Sstevel@tonic-gate size_t chunksz; 6677c478bd9Sstevel@tonic-gate caddr_t buf; 6687c478bd9Sstevel@tonic-gate 6697c478bd9Sstevel@tonic-gate /* snapshot device is read-only */ 6707c478bd9Sstevel@tonic-gate if (bp->b_flags & B_WRITE) { 6717c478bd9Sstevel@tonic-gate bioerror(bp, EROFS); 6727c478bd9Sstevel@tonic-gate bp->b_resid = bp->b_bcount; 6737c478bd9Sstevel@tonic-gate biodone(bp); 6747c478bd9Sstevel@tonic-gate return (0); 6757c478bd9Sstevel@tonic-gate } 6767c478bd9Sstevel@tonic-gate 6777c478bd9Sstevel@tonic-gate minor = getminor(bp->b_edev); 6787c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 6797c478bd9Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 6807c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 6817c478bd9Sstevel@tonic-gate "snap_strategy: could not find state for snapshot %d.", 6827c478bd9Sstevel@tonic-gate minor); 6837c478bd9Sstevel@tonic-gate bioerror(bp, ENXIO); 6847c478bd9Sstevel@tonic-gate bp->b_resid = bp->b_bcount; 6857c478bd9Sstevel@tonic-gate biodone(bp); 6867c478bd9Sstevel@tonic-gate return (0); 6877c478bd9Sstevel@tonic-gate } 6887c478bd9Sstevel@tonic-gate sidp = *sidpp; 6897c478bd9Sstevel@tonic-gate ASSERT(sidp); 6907c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 6917c478bd9Sstevel@tonic-gate 6927c478bd9Sstevel@tonic-gate if (SID_INACTIVE(sidp)) { 6937c478bd9Sstevel@tonic-gate bioerror(bp, ENXIO); 6947c478bd9Sstevel@tonic-gate bp->b_resid = bp->b_bcount; 6957c478bd9Sstevel@tonic-gate biodone(bp); 6967c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 6977c478bd9Sstevel@tonic-gate return (0); 6987c478bd9Sstevel@tonic-gate } 6997c478bd9Sstevel@tonic-gate 7007c478bd9Sstevel@tonic-gate if (bp->b_flags & (B_PAGEIO|B_PHYS)) 7017c478bd9Sstevel@tonic-gate bp_mapin(bp); 7027c478bd9Sstevel@tonic-gate 7037c478bd9Sstevel@tonic-gate bp->b_resid = bp->b_bcount; 7047c478bd9Sstevel@tonic-gate ASSERT(bp->b_un.b_addr); 7057c478bd9Sstevel@tonic-gate buf = bp->b_un.b_addr; 7067c478bd9Sstevel@tonic-gate 7077c478bd9Sstevel@tonic-gate chunksz = sidp->sid_cowinfo->cow_map.cmap_chunksz; 7087c478bd9Sstevel@tonic-gate 7097c478bd9Sstevel@tonic-gate /* reqptr is the current DEV_BSIZE offset into the device */ 7107c478bd9Sstevel@tonic-gate /* chunk is the chunk containing reqptr */ 7117c478bd9Sstevel@tonic-gate /* len is the length of the request (in the current chunk) in bytes */ 7127c478bd9Sstevel@tonic-gate /* off is the byte offset into the current chunk */ 7137c478bd9Sstevel@tonic-gate reqptr = bp->b_lblkno; 7147c478bd9Sstevel@tonic-gate while (bp->b_resid > 0) { 7157c478bd9Sstevel@tonic-gate chunk = dbtocowchunk(&sidp->sid_cowinfo->cow_map, reqptr); 7167c478bd9Sstevel@tonic-gate off = (reqptr % (chunksz >> DEV_BSHIFT)) << DEV_BSHIFT; 7177c478bd9Sstevel@tonic-gate len = min(chunksz - off, bp->b_resid); 7187c478bd9Sstevel@tonic-gate ASSERT((off + len) <= chunksz); 7197c478bd9Sstevel@tonic-gate 7207c478bd9Sstevel@tonic-gate if ((error = snap_getchunk(sidp, chunk, off, len, buf)) != 0) { 7217c478bd9Sstevel@tonic-gate /* 7227c478bd9Sstevel@tonic-gate * EINVAL means the user tried to go out of range. 7237c478bd9Sstevel@tonic-gate * Anything else means it's likely that we're 7247c478bd9Sstevel@tonic-gate * confused. 7257c478bd9Sstevel@tonic-gate */ 7267c478bd9Sstevel@tonic-gate if (error != EINVAL) { 7277c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_strategy: error " 7287c478bd9Sstevel@tonic-gate "calling snap_getchunk, chunk = %llu, " 7297c478bd9Sstevel@tonic-gate "offset = %d, len = %d, resid = %lu, " 7307c478bd9Sstevel@tonic-gate "error = %d.", 7317c478bd9Sstevel@tonic-gate chunk, off, len, bp->b_resid, error); 7327c478bd9Sstevel@tonic-gate } 7337c478bd9Sstevel@tonic-gate bioerror(bp, error); 7347c478bd9Sstevel@tonic-gate biodone(bp); 7357c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 7367c478bd9Sstevel@tonic-gate return (0); 7377c478bd9Sstevel@tonic-gate } 7387c478bd9Sstevel@tonic-gate bp->b_resid -= len; 7397c478bd9Sstevel@tonic-gate reqptr += (len >> DEV_BSHIFT); 7407c478bd9Sstevel@tonic-gate buf += len; 7417c478bd9Sstevel@tonic-gate } 7427c478bd9Sstevel@tonic-gate 7437c478bd9Sstevel@tonic-gate ASSERT(bp->b_resid == 0); 7447c478bd9Sstevel@tonic-gate biodone(bp); 7457c478bd9Sstevel@tonic-gate 7467c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 7477c478bd9Sstevel@tonic-gate return (0); 7487c478bd9Sstevel@tonic-gate } 7497c478bd9Sstevel@tonic-gate 7507c478bd9Sstevel@tonic-gate /* 7517c478bd9Sstevel@tonic-gate * snap_getchunk() - helper function for snap_strategy() 7527c478bd9Sstevel@tonic-gate * 7537c478bd9Sstevel@tonic-gate * gets the requested data from the appropriate place and fills in the 7547c478bd9Sstevel@tonic-gate * buffer. chunk is the chunk number of the request, offset is the 7557c478bd9Sstevel@tonic-gate * offset into that chunk and must be less than the chunk size. len is 7567c478bd9Sstevel@tonic-gate * the length of the request starting at offset, and must not exceed a 7577c478bd9Sstevel@tonic-gate * chunk boundary. buffer is the address to copy the data to. len 7587c478bd9Sstevel@tonic-gate * bytes are copied into the buffer starting at the location specified. 7597c478bd9Sstevel@tonic-gate * 7607c478bd9Sstevel@tonic-gate * A chunk is located according to the following algorithm: 7617c478bd9Sstevel@tonic-gate * - If the chunk does not have a translation or is not a candidate 7627c478bd9Sstevel@tonic-gate * for translation, it is read straight from the master device. 7637c478bd9Sstevel@tonic-gate * - If the chunk does have a translation, then it is either on 7647c478bd9Sstevel@tonic-gate * disk or in memory: 7657c478bd9Sstevel@tonic-gate * o If it is in memory the requested data is simply copied out 7667c478bd9Sstevel@tonic-gate * of the in-memory buffer. 7677c478bd9Sstevel@tonic-gate * o If it is in the backing store, it is read from there. 7687c478bd9Sstevel@tonic-gate * 7697c478bd9Sstevel@tonic-gate * This function does the real work of the snapshot driver. 7707c478bd9Sstevel@tonic-gate */ 7717c478bd9Sstevel@tonic-gate static int 7727c478bd9Sstevel@tonic-gate snap_getchunk(struct snapshot_id *sidp, chunknumber_t chunk, int offset, 7737c478bd9Sstevel@tonic-gate int len, char *buffer) 7747c478bd9Sstevel@tonic-gate { 7757c478bd9Sstevel@tonic-gate cow_map_t *cmap = &sidp->sid_cowinfo->cow_map; 7767c478bd9Sstevel@tonic-gate cow_map_node_t *cmn; 7777c478bd9Sstevel@tonic-gate struct buf *snapbuf; 7787c478bd9Sstevel@tonic-gate int error = 0; 7797c478bd9Sstevel@tonic-gate char *newbuffer; 7807c478bd9Sstevel@tonic-gate int newlen = 0; 7817c478bd9Sstevel@tonic-gate int partial = 0; 7827c478bd9Sstevel@tonic-gate 7837c478bd9Sstevel@tonic-gate ASSERT(RW_READ_HELD(&sidp->sid_rwlock)); 7847c478bd9Sstevel@tonic-gate ASSERT(offset + len <= cmap->cmap_chunksz); 7857c478bd9Sstevel@tonic-gate 7867c478bd9Sstevel@tonic-gate /* 7877c478bd9Sstevel@tonic-gate * Check if the chunk number is out of range and if so bail out 7887c478bd9Sstevel@tonic-gate */ 7897c478bd9Sstevel@tonic-gate if (chunk >= (cmap->cmap_bmsize * NBBY)) { 7907c478bd9Sstevel@tonic-gate return (EINVAL); 7917c478bd9Sstevel@tonic-gate } 7927c478bd9Sstevel@tonic-gate 7937c478bd9Sstevel@tonic-gate /* 7947c478bd9Sstevel@tonic-gate * If the chunk is not a candidate for translation, then the chunk 7957c478bd9Sstevel@tonic-gate * was not allocated when the snapshot was taken. Since it does 7967c478bd9Sstevel@tonic-gate * not contain data associated with this snapshot, just return a 7977c478bd9Sstevel@tonic-gate * zero buffer instead. 7987c478bd9Sstevel@tonic-gate */ 7997c478bd9Sstevel@tonic-gate if (isclr(cmap->cmap_candidate, chunk)) { 8007c478bd9Sstevel@tonic-gate bzero(buffer, len); 8017c478bd9Sstevel@tonic-gate return (0); 8027c478bd9Sstevel@tonic-gate } 8037c478bd9Sstevel@tonic-gate 8047c478bd9Sstevel@tonic-gate /* 8057c478bd9Sstevel@tonic-gate * if the chunk is a candidate for translation but a 8067c478bd9Sstevel@tonic-gate * translation does not exist, then read through to the 8077c478bd9Sstevel@tonic-gate * original file system. The rwlock is held until the read 8087c478bd9Sstevel@tonic-gate * completes if it hasn't been translated to make sure the 8097c478bd9Sstevel@tonic-gate * file system does not translate the block before we 8107c478bd9Sstevel@tonic-gate * access it. If it has already been translated we don't 8117c478bd9Sstevel@tonic-gate * need the lock, because the translation will never go away. 8127c478bd9Sstevel@tonic-gate */ 8137c478bd9Sstevel@tonic-gate rw_enter(&cmap->cmap_rwlock, RW_READER); 8147c478bd9Sstevel@tonic-gate if (isclr(cmap->cmap_hastrans, chunk)) { 8157c478bd9Sstevel@tonic-gate snapbuf = getrbuf(KM_SLEEP); 8167c478bd9Sstevel@tonic-gate /* 8177c478bd9Sstevel@tonic-gate * Reading into the buffer saves having to do a copy, 8187c478bd9Sstevel@tonic-gate * but gets tricky if the request size is not a 8197c478bd9Sstevel@tonic-gate * multiple of DEV_BSIZE. However, we are filling the 8207c478bd9Sstevel@tonic-gate * buffer left to right, so future reads will write 8217c478bd9Sstevel@tonic-gate * over any extra data we might have read. 8227c478bd9Sstevel@tonic-gate */ 8237c478bd9Sstevel@tonic-gate 8247c478bd9Sstevel@tonic-gate partial = len % DEV_BSIZE; 8257c478bd9Sstevel@tonic-gate 8267c478bd9Sstevel@tonic-gate snapbuf->b_bcount = len; 8277c478bd9Sstevel@tonic-gate snapbuf->b_lblkno = lbtodb(chunk * cmap->cmap_chunksz + offset); 8287c478bd9Sstevel@tonic-gate snapbuf->b_un.b_addr = buffer; 8297c478bd9Sstevel@tonic-gate 8307c478bd9Sstevel@tonic-gate snapbuf->b_iodone = NULL; 8317c478bd9Sstevel@tonic-gate snapbuf->b_proc = NULL; /* i.e. the kernel */ 8327c478bd9Sstevel@tonic-gate snapbuf->b_flags = B_READ | B_BUSY; 8337c478bd9Sstevel@tonic-gate snapbuf->b_edev = sidp->sid_fvp->v_vfsp->vfs_dev; 8347c478bd9Sstevel@tonic-gate 8357c478bd9Sstevel@tonic-gate if (partial) { 8367c478bd9Sstevel@tonic-gate /* 8377c478bd9Sstevel@tonic-gate * Partial block read in progress. 8387c478bd9Sstevel@tonic-gate * This is bad as modules further down the line 8397c478bd9Sstevel@tonic-gate * assume buf's are exact multiples of DEV_BSIZE 8407c478bd9Sstevel@tonic-gate * and we end up with fewer, or zero, bytes read. 8417c478bd9Sstevel@tonic-gate * To get round this we need to round up to the 8427c478bd9Sstevel@tonic-gate * nearest full block read and then return only 8437c478bd9Sstevel@tonic-gate * len bytes. 8447c478bd9Sstevel@tonic-gate */ 8457c478bd9Sstevel@tonic-gate newlen = (len - partial) + DEV_BSIZE; 8467c478bd9Sstevel@tonic-gate newbuffer = kmem_alloc(newlen, KM_SLEEP); 8477c478bd9Sstevel@tonic-gate 8487c478bd9Sstevel@tonic-gate snapbuf->b_bcount = newlen; 8497c478bd9Sstevel@tonic-gate snapbuf->b_un.b_addr = newbuffer; 8507c478bd9Sstevel@tonic-gate } 8517c478bd9Sstevel@tonic-gate 8527c478bd9Sstevel@tonic-gate (void) bdev_strategy(snapbuf); 8537c478bd9Sstevel@tonic-gate (void) biowait(snapbuf); 8547c478bd9Sstevel@tonic-gate 8557c478bd9Sstevel@tonic-gate error = geterror(snapbuf); 8567c478bd9Sstevel@tonic-gate 8577c478bd9Sstevel@tonic-gate if (partial) { 8587c478bd9Sstevel@tonic-gate /* 8597c478bd9Sstevel@tonic-gate * Partial block read. Now we need to bcopy the 8607c478bd9Sstevel@tonic-gate * correct number of bytes back into the 8617c478bd9Sstevel@tonic-gate * supplied buffer, and tidy up our temp 8627c478bd9Sstevel@tonic-gate * buffer. 8637c478bd9Sstevel@tonic-gate */ 8647c478bd9Sstevel@tonic-gate bcopy(newbuffer, buffer, len); 8657c478bd9Sstevel@tonic-gate kmem_free(newbuffer, newlen); 8667c478bd9Sstevel@tonic-gate } 8677c478bd9Sstevel@tonic-gate 8687c478bd9Sstevel@tonic-gate freerbuf(snapbuf); 8697c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 8707c478bd9Sstevel@tonic-gate 8717c478bd9Sstevel@tonic-gate return (error); 8727c478bd9Sstevel@tonic-gate } 8737c478bd9Sstevel@tonic-gate 8747c478bd9Sstevel@tonic-gate /* 8757c478bd9Sstevel@tonic-gate * finally, if the chunk is a candidate for translation and it 8767c478bd9Sstevel@tonic-gate * has been translated, then we clone the chunk of the buffer 8777c478bd9Sstevel@tonic-gate * that was copied aside by the file system. 8787c478bd9Sstevel@tonic-gate * The cmap_rwlock does not need to be held after we know the 8797c478bd9Sstevel@tonic-gate * data has already been copied. Once a chunk has been copied 8807c478bd9Sstevel@tonic-gate * to the backing file, it is stable read only data. 8817c478bd9Sstevel@tonic-gate */ 8827c478bd9Sstevel@tonic-gate cmn = transtbl_get(cmap, chunk); 8837c478bd9Sstevel@tonic-gate 8847c478bd9Sstevel@tonic-gate /* check whether the data is in memory or in the backing file */ 8857c478bd9Sstevel@tonic-gate if (cmn != NULL) { 8867c478bd9Sstevel@tonic-gate ASSERT(cmn->cmn_buf); 8877c478bd9Sstevel@tonic-gate /* already in memory */ 8887c478bd9Sstevel@tonic-gate bcopy(cmn->cmn_buf + offset, buffer, len); 8897c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 8907c478bd9Sstevel@tonic-gate } else { 8917c478bd9Sstevel@tonic-gate ssize_t resid = len; 8927c478bd9Sstevel@tonic-gate int bf_index; 8937c478bd9Sstevel@tonic-gate /* 8947c478bd9Sstevel@tonic-gate * can cause deadlock with writer if we don't drop the 8957c478bd9Sstevel@tonic-gate * cmap_rwlock before trying to get the backing store file 8967c478bd9Sstevel@tonic-gate * vnode rwlock. 8977c478bd9Sstevel@tonic-gate */ 8987c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 8997c478bd9Sstevel@tonic-gate 9007c478bd9Sstevel@tonic-gate bf_index = chunk / cmap->cmap_chunksperbf; 9017c478bd9Sstevel@tonic-gate 9027c478bd9Sstevel@tonic-gate /* read buffer from backing file */ 9037c478bd9Sstevel@tonic-gate error = vn_rdwr(UIO_READ, 9047c478bd9Sstevel@tonic-gate (sidp->sid_cowinfo->cow_backfile_array)[bf_index], 9057c478bd9Sstevel@tonic-gate buffer, len, ((chunk % cmap->cmap_chunksperbf) * 9067c478bd9Sstevel@tonic-gate cmap->cmap_chunksz) + offset, UIO_SYSSPACE, 0, 9077c478bd9Sstevel@tonic-gate RLIM64_INFINITY, kcred, &resid); 9087c478bd9Sstevel@tonic-gate } 9097c478bd9Sstevel@tonic-gate 9107c478bd9Sstevel@tonic-gate return (error); 9117c478bd9Sstevel@tonic-gate } 9127c478bd9Sstevel@tonic-gate 9137c478bd9Sstevel@tonic-gate /* 9147c478bd9Sstevel@tonic-gate * snap_print() - snapshot driver print(9E) routine 9157c478bd9Sstevel@tonic-gate * 9167c478bd9Sstevel@tonic-gate * prints the device identification string. 9177c478bd9Sstevel@tonic-gate */ 9187c478bd9Sstevel@tonic-gate static int 9197c478bd9Sstevel@tonic-gate snap_print(dev_t dev, char *str) 9207c478bd9Sstevel@tonic-gate { 9217c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp; 9227c478bd9Sstevel@tonic-gate minor_t minor; 9237c478bd9Sstevel@tonic-gate 9247c478bd9Sstevel@tonic-gate minor = getminor(dev); 9257c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 9267c478bd9Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 9277c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 9287c478bd9Sstevel@tonic-gate "snap_print: could not find state for snapshot %d.", minor); 9297c478bd9Sstevel@tonic-gate return (ENXIO); 9307c478bd9Sstevel@tonic-gate } 9317c478bd9Sstevel@tonic-gate 9327c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "snap_print: snapshot %d: %s", minor, str); 9337c478bd9Sstevel@tonic-gate 9347c478bd9Sstevel@tonic-gate return (0); 9357c478bd9Sstevel@tonic-gate } 9367c478bd9Sstevel@tonic-gate 9377c478bd9Sstevel@tonic-gate /* 9387c478bd9Sstevel@tonic-gate * snap_prop_op() - snapshot driver prop_op(9E) routine 9397c478bd9Sstevel@tonic-gate * 9407c478bd9Sstevel@tonic-gate * get 32-bit and 64-bit values for size (character driver) and nblocks 9417c478bd9Sstevel@tonic-gate * (block driver). 9427c478bd9Sstevel@tonic-gate */ 9437c478bd9Sstevel@tonic-gate static int 9447c478bd9Sstevel@tonic-gate snap_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, 9457c478bd9Sstevel@tonic-gate int flags, char *name, caddr_t valuep, int *lengthp) 9467c478bd9Sstevel@tonic-gate { 9477c478bd9Sstevel@tonic-gate int minor; 948184cd04cScth struct snapshot_id **sidpp; 9497c478bd9Sstevel@tonic-gate dev_t mdev; 950184cd04cScth dev_info_t *mdip; 951184cd04cScth int error; 9527c478bd9Sstevel@tonic-gate 9537c478bd9Sstevel@tonic-gate minor = getminor(dev); 9547c478bd9Sstevel@tonic-gate 9557c478bd9Sstevel@tonic-gate /* if this is the control device just check for .conf properties */ 9567c478bd9Sstevel@tonic-gate if (minor == SNAP_CTL_MINOR) 9577c478bd9Sstevel@tonic-gate return (ddi_prop_op(dev, dip, prop_op, flags, name, 9587c478bd9Sstevel@tonic-gate valuep, lengthp)); 959184cd04cScth 9607c478bd9Sstevel@tonic-gate /* check to see if there is a master device plumbed */ 9617c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 9627c478bd9Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 9637c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 9647c478bd9Sstevel@tonic-gate "snap_prop_op: could not find state for " 9657c478bd9Sstevel@tonic-gate "snapshot %d.", minor); 9667c478bd9Sstevel@tonic-gate return (DDI_PROP_NOT_FOUND); 9677c478bd9Sstevel@tonic-gate } 9687c478bd9Sstevel@tonic-gate 9697c478bd9Sstevel@tonic-gate if (((*sidpp)->sid_fvp == NULL) || ((*sidpp)->sid_fvp->v_vfsp == NULL)) 9707c478bd9Sstevel@tonic-gate return (ddi_prop_op(dev, dip, prop_op, flags, name, 9717c478bd9Sstevel@tonic-gate valuep, lengthp)); 972184cd04cScth 973184cd04cScth /* hold master device and pass operation down */ 9747c478bd9Sstevel@tonic-gate mdev = (*sidpp)->sid_fvp->v_vfsp->vfs_dev; 975184cd04cScth if (mdip = e_ddi_hold_devi_by_dev(mdev, 0)) { 9767c478bd9Sstevel@tonic-gate 9777c478bd9Sstevel@tonic-gate /* get size information from the master device. */ 978184cd04cScth error = cdev_prop_op(mdev, mdip, 979184cd04cScth prop_op, flags, name, valuep, lengthp); 980184cd04cScth ddi_release_devi(mdip); 981184cd04cScth if (error == DDI_PROP_SUCCESS) 982184cd04cScth return (error); 9837c478bd9Sstevel@tonic-gate } 9847c478bd9Sstevel@tonic-gate 985184cd04cScth /* master device did not service the request, try framework */ 986184cd04cScth return (ddi_prop_op(dev, dip, prop_op, flags, name, valuep, lengthp)); 9877c478bd9Sstevel@tonic-gate 9887c478bd9Sstevel@tonic-gate } 9897c478bd9Sstevel@tonic-gate 9907c478bd9Sstevel@tonic-gate /* 9917c478bd9Sstevel@tonic-gate * snap_ioctl() - snapshot driver ioctl(9E) routine 9927c478bd9Sstevel@tonic-gate * 9937c478bd9Sstevel@tonic-gate * only applies to the control device. The control device accepts two 9947c478bd9Sstevel@tonic-gate * ioctl requests: create a snapshot or delete a snapshot. In either 9957c478bd9Sstevel@tonic-gate * case, the vnode for the requested file system is extracted, and the 9967c478bd9Sstevel@tonic-gate * request is passed on to the file system via the same ioctl. The file 9977c478bd9Sstevel@tonic-gate * system is responsible for doing the things necessary for creating or 9987c478bd9Sstevel@tonic-gate * destroying a snapshot, including any file system specific operations 9997c478bd9Sstevel@tonic-gate * that must be performed as well as setting up and deleting the snapshot 10007c478bd9Sstevel@tonic-gate * state through the fssnap interfaces. 10017c478bd9Sstevel@tonic-gate */ 10027c478bd9Sstevel@tonic-gate static int 10037c478bd9Sstevel@tonic-gate snap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 10047c478bd9Sstevel@tonic-gate int *rvalp) 10057c478bd9Sstevel@tonic-gate { 10067c478bd9Sstevel@tonic-gate minor_t minor; 10077c478bd9Sstevel@tonic-gate int error = 0; 10087c478bd9Sstevel@tonic-gate 10097c478bd9Sstevel@tonic-gate minor = getminor(dev); 10107c478bd9Sstevel@tonic-gate 10117c478bd9Sstevel@tonic-gate if (minor != SNAP_CTL_MINOR) { 10127c478bd9Sstevel@tonic-gate return (EINVAL); 10137c478bd9Sstevel@tonic-gate } 10147c478bd9Sstevel@tonic-gate 10157c478bd9Sstevel@tonic-gate switch (cmd) { 10167c478bd9Sstevel@tonic-gate case _FIOSNAPSHOTCREATE: 10177c478bd9Sstevel@tonic-gate { 10187c478bd9Sstevel@tonic-gate struct fiosnapcreate fc; 10197c478bd9Sstevel@tonic-gate struct file *fp; 10207c478bd9Sstevel@tonic-gate struct vnode *vp; 10217c478bd9Sstevel@tonic-gate 10227c478bd9Sstevel@tonic-gate if (ddi_copyin((void *)arg, &fc, sizeof (fc), mode)) 10237c478bd9Sstevel@tonic-gate return (EFAULT); 10247c478bd9Sstevel@tonic-gate 10257c478bd9Sstevel@tonic-gate /* get vnode for file system mount point */ 10267c478bd9Sstevel@tonic-gate if ((fp = getf(fc.rootfiledesc)) == NULL) 10277c478bd9Sstevel@tonic-gate return (EBADF); 10287c478bd9Sstevel@tonic-gate 10297c478bd9Sstevel@tonic-gate ASSERT(fp->f_vnode); 10307c478bd9Sstevel@tonic-gate vp = fp->f_vnode; 10317c478bd9Sstevel@tonic-gate VN_HOLD(vp); 10327c478bd9Sstevel@tonic-gate releasef(fc.rootfiledesc); 10337c478bd9Sstevel@tonic-gate 10347c478bd9Sstevel@tonic-gate /* pass ioctl request to file system */ 1035da6c28aaSamw error = VOP_IOCTL(vp, cmd, arg, 0, credp, rvalp, NULL); 10367c478bd9Sstevel@tonic-gate VN_RELE(vp); 10377c478bd9Sstevel@tonic-gate break; 10387c478bd9Sstevel@tonic-gate } 10397c478bd9Sstevel@tonic-gate case _FIOSNAPSHOTCREATE_MULTI: 10407c478bd9Sstevel@tonic-gate { 10417c478bd9Sstevel@tonic-gate struct fiosnapcreate_multi fc; 10427c478bd9Sstevel@tonic-gate struct file *fp; 10437c478bd9Sstevel@tonic-gate struct vnode *vp; 10447c478bd9Sstevel@tonic-gate 10457c478bd9Sstevel@tonic-gate if (ddi_copyin((void *)arg, &fc, sizeof (fc), mode)) 10467c478bd9Sstevel@tonic-gate return (EFAULT); 10477c478bd9Sstevel@tonic-gate 10487c478bd9Sstevel@tonic-gate /* get vnode for file system mount point */ 10497c478bd9Sstevel@tonic-gate if ((fp = getf(fc.rootfiledesc)) == NULL) 10507c478bd9Sstevel@tonic-gate return (EBADF); 10517c478bd9Sstevel@tonic-gate 10527c478bd9Sstevel@tonic-gate ASSERT(fp->f_vnode); 10537c478bd9Sstevel@tonic-gate vp = fp->f_vnode; 10547c478bd9Sstevel@tonic-gate VN_HOLD(vp); 10557c478bd9Sstevel@tonic-gate releasef(fc.rootfiledesc); 10567c478bd9Sstevel@tonic-gate 10577c478bd9Sstevel@tonic-gate /* pass ioctl request to file system */ 1058da6c28aaSamw error = VOP_IOCTL(vp, cmd, arg, 0, credp, rvalp, NULL); 10597c478bd9Sstevel@tonic-gate VN_RELE(vp); 10607c478bd9Sstevel@tonic-gate break; 10617c478bd9Sstevel@tonic-gate } 10627c478bd9Sstevel@tonic-gate case _FIOSNAPSHOTDELETE: 10637c478bd9Sstevel@tonic-gate { 10647c478bd9Sstevel@tonic-gate major_t major; 10657c478bd9Sstevel@tonic-gate struct fiosnapdelete fc; 10667c478bd9Sstevel@tonic-gate snapshot_id_t *sidp = NULL; 10677c478bd9Sstevel@tonic-gate snapshot_id_t *sidnextp = NULL; 10687c478bd9Sstevel@tonic-gate struct file *fp = NULL; 10697c478bd9Sstevel@tonic-gate struct vnode *vp = NULL; 10707c478bd9Sstevel@tonic-gate struct vfs *vfsp = NULL; 10717c478bd9Sstevel@tonic-gate vfsops_t *vfsops = EIO_vfsops; 10727c478bd9Sstevel@tonic-gate 10737c478bd9Sstevel@tonic-gate if (ddi_copyin((void *)arg, &fc, sizeof (fc), mode)) 10747c478bd9Sstevel@tonic-gate return (EFAULT); 10757c478bd9Sstevel@tonic-gate 10767c478bd9Sstevel@tonic-gate /* get vnode for file system mount point */ 10777c478bd9Sstevel@tonic-gate if ((fp = getf(fc.rootfiledesc)) == NULL) 10787c478bd9Sstevel@tonic-gate return (EBADF); 10797c478bd9Sstevel@tonic-gate 10807c478bd9Sstevel@tonic-gate ASSERT(fp->f_vnode); 10817c478bd9Sstevel@tonic-gate vp = fp->f_vnode; 10827c478bd9Sstevel@tonic-gate VN_HOLD(vp); 10837c478bd9Sstevel@tonic-gate releasef(fc.rootfiledesc); 10847c478bd9Sstevel@tonic-gate /* 10857c478bd9Sstevel@tonic-gate * Test for two formats of delete and set correct minor/vp: 10867c478bd9Sstevel@tonic-gate * pseudo device: 10877c478bd9Sstevel@tonic-gate * fssnap -d [/dev/fssnap/x] 10887c478bd9Sstevel@tonic-gate * or 10897c478bd9Sstevel@tonic-gate * mount point: 10907c478bd9Sstevel@tonic-gate * fssnap -d [/mntpt] 10917c478bd9Sstevel@tonic-gate * Note that minor is verified to be equal to SNAP_CTL_MINOR 10927c478bd9Sstevel@tonic-gate * at this point which is an invalid minor number. 10937c478bd9Sstevel@tonic-gate */ 10947c478bd9Sstevel@tonic-gate ASSERT(fssnap_dip != NULL); 10957c478bd9Sstevel@tonic-gate major = ddi_driver_major(fssnap_dip); 10967c478bd9Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 10977c478bd9Sstevel@tonic-gate for (sidp = snapshot; sidp != NULL; sidp = sidnextp) { 10987c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 10997c478bd9Sstevel@tonic-gate sidnextp = sidp->sid_next; 11007c478bd9Sstevel@tonic-gate /* pseudo device: */ 11017c478bd9Sstevel@tonic-gate if (major == getmajor(vp->v_rdev)) { 11027c478bd9Sstevel@tonic-gate minor = getminor(vp->v_rdev); 11037c478bd9Sstevel@tonic-gate if (sidp->sid_snapnumber == (uint_t)minor && 11047c478bd9Sstevel@tonic-gate sidp->sid_fvp) { 11057c478bd9Sstevel@tonic-gate VN_RELE(vp); 11067c478bd9Sstevel@tonic-gate vp = sidp->sid_fvp; 11077c478bd9Sstevel@tonic-gate VN_HOLD(vp); 11087c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 11097c478bd9Sstevel@tonic-gate break; 11107c478bd9Sstevel@tonic-gate } 11117c478bd9Sstevel@tonic-gate /* Mount point: */ 11127c478bd9Sstevel@tonic-gate } else { 11137c478bd9Sstevel@tonic-gate if (sidp->sid_fvp == vp) { 11147c478bd9Sstevel@tonic-gate minor = sidp->sid_snapnumber; 11157c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 11167c478bd9Sstevel@tonic-gate break; 11177c478bd9Sstevel@tonic-gate } 11187c478bd9Sstevel@tonic-gate } 11197c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 11207c478bd9Sstevel@tonic-gate } 11217c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 11227c478bd9Sstevel@tonic-gate /* Verify minor got set correctly above */ 11237c478bd9Sstevel@tonic-gate if (minor == SNAP_CTL_MINOR) { 11247c478bd9Sstevel@tonic-gate VN_RELE(vp); 11257c478bd9Sstevel@tonic-gate return (EINVAL); 11267c478bd9Sstevel@tonic-gate } 11277c478bd9Sstevel@tonic-gate dev = makedevice(major, minor); 11287c478bd9Sstevel@tonic-gate /* 11297c478bd9Sstevel@tonic-gate * Create dummy vfs entry 11307c478bd9Sstevel@tonic-gate * to use as a locking semaphore across the IOCTL 11317c478bd9Sstevel@tonic-gate * for mount in progress cases... 11327c478bd9Sstevel@tonic-gate */ 1133da6c28aaSamw vfsp = vfs_alloc(KM_SLEEP); 11347c478bd9Sstevel@tonic-gate VFS_INIT(vfsp, vfsops, NULL); 1135ddfcde86Srsb VFS_HOLD(vfsp); 11367c478bd9Sstevel@tonic-gate vfs_addmip(dev, vfsp); 11377c478bd9Sstevel@tonic-gate if ((vfs_devmounting(dev, vfsp)) || 11387c478bd9Sstevel@tonic-gate (vfs_devismounted(dev))) { 11397c478bd9Sstevel@tonic-gate vfs_delmip(vfsp); 1140ddfcde86Srsb VFS_RELE(vfsp); 11417c478bd9Sstevel@tonic-gate VN_RELE(vp); 11427c478bd9Sstevel@tonic-gate return (EBUSY); 11437c478bd9Sstevel@tonic-gate } 11447c478bd9Sstevel@tonic-gate /* 11457c478bd9Sstevel@tonic-gate * Nobody mounted but do not release mount in progress lock 11467c478bd9Sstevel@tonic-gate * until IOCTL complete to prohibit a mount sneaking 11477c478bd9Sstevel@tonic-gate * in 11487c478bd9Sstevel@tonic-gate */ 1149da6c28aaSamw error = VOP_IOCTL(vp, cmd, arg, 0, credp, rvalp, NULL); 11507c478bd9Sstevel@tonic-gate vfs_delmip(vfsp); 1151ddfcde86Srsb VFS_RELE(vfsp); 11527c478bd9Sstevel@tonic-gate VN_RELE(vp); 11537c478bd9Sstevel@tonic-gate break; 11547c478bd9Sstevel@tonic-gate } 11557c478bd9Sstevel@tonic-gate default: 11567c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_ioctl: Invalid ioctl cmd %d, minor %d.", 11577c478bd9Sstevel@tonic-gate cmd, minor); 11587c478bd9Sstevel@tonic-gate return (EINVAL); 11597c478bd9Sstevel@tonic-gate } 11607c478bd9Sstevel@tonic-gate 11617c478bd9Sstevel@tonic-gate return (error); 11627c478bd9Sstevel@tonic-gate } 11637c478bd9Sstevel@tonic-gate 11647c478bd9Sstevel@tonic-gate 11657c478bd9Sstevel@tonic-gate /* ************************************************************************ */ 11667c478bd9Sstevel@tonic-gate 11677c478bd9Sstevel@tonic-gate /* 11687c478bd9Sstevel@tonic-gate * Translation Table Routines 11697c478bd9Sstevel@tonic-gate * 11707c478bd9Sstevel@tonic-gate * These support routines implement a simple doubly linked list 11717c478bd9Sstevel@tonic-gate * to keep track of chunks that are currently in memory. The maximum 11727c478bd9Sstevel@tonic-gate * size of the list is determined by the fssnap_max_mem_chunks variable. 11737c478bd9Sstevel@tonic-gate * The cmap_rwlock is used to protect the linkage of the list. 11747c478bd9Sstevel@tonic-gate */ 11757c478bd9Sstevel@tonic-gate 11767c478bd9Sstevel@tonic-gate /* 11777c478bd9Sstevel@tonic-gate * transtbl_add() - add a node to the translation table 11787c478bd9Sstevel@tonic-gate * 11797c478bd9Sstevel@tonic-gate * allocates a new node and points it at the buffer passed in. The node 11807c478bd9Sstevel@tonic-gate * is added to the beginning of the doubly linked list and the head of 11817c478bd9Sstevel@tonic-gate * the list is moved. The cmap_rwlock must be held as a writer through 11827c478bd9Sstevel@tonic-gate * this operation. 11837c478bd9Sstevel@tonic-gate */ 11847c478bd9Sstevel@tonic-gate static cow_map_node_t * 11857c478bd9Sstevel@tonic-gate transtbl_add(cow_map_t *cmap, chunknumber_t chunk, caddr_t buf) 11867c478bd9Sstevel@tonic-gate { 11877c478bd9Sstevel@tonic-gate cow_map_node_t *cmnode; 11887c478bd9Sstevel@tonic-gate 11897c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&cmap->cmap_rwlock)); 11907c478bd9Sstevel@tonic-gate 11917c478bd9Sstevel@tonic-gate cmnode = kmem_alloc(sizeof (cow_map_node_t), KM_SLEEP); 11927c478bd9Sstevel@tonic-gate 11937c478bd9Sstevel@tonic-gate /* 11947c478bd9Sstevel@tonic-gate * insert new translations at the beginning so cmn_table is always 11957c478bd9Sstevel@tonic-gate * the first node. 11967c478bd9Sstevel@tonic-gate */ 11977c478bd9Sstevel@tonic-gate cmnode->cmn_chunk = chunk; 11987c478bd9Sstevel@tonic-gate cmnode->cmn_buf = buf; 11997c478bd9Sstevel@tonic-gate cmnode->cmn_prev = NULL; 12007c478bd9Sstevel@tonic-gate cmnode->cmn_next = cmap->cmap_table; 12017c478bd9Sstevel@tonic-gate if (cmnode->cmn_next) 12027c478bd9Sstevel@tonic-gate cmnode->cmn_next->cmn_prev = cmnode; 12037c478bd9Sstevel@tonic-gate cmap->cmap_table = cmnode; 12047c478bd9Sstevel@tonic-gate 12057c478bd9Sstevel@tonic-gate return (cmnode); 12067c478bd9Sstevel@tonic-gate } 12077c478bd9Sstevel@tonic-gate 12087c478bd9Sstevel@tonic-gate /* 12097c478bd9Sstevel@tonic-gate * transtbl_get() - look up a node in the translation table 12107c478bd9Sstevel@tonic-gate * 12117c478bd9Sstevel@tonic-gate * called by the snapshot driver to find data that has been translated. 12127c478bd9Sstevel@tonic-gate * The lookup is done by the chunk number, and the node is returned. 12137c478bd9Sstevel@tonic-gate * If the node was not found, NULL is returned. 12147c478bd9Sstevel@tonic-gate */ 12157c478bd9Sstevel@tonic-gate static cow_map_node_t * 12167c478bd9Sstevel@tonic-gate transtbl_get(cow_map_t *cmap, chunknumber_t chunk) 12177c478bd9Sstevel@tonic-gate { 12187c478bd9Sstevel@tonic-gate cow_map_node_t *cmn; 12197c478bd9Sstevel@tonic-gate 12207c478bd9Sstevel@tonic-gate ASSERT(RW_READ_HELD(&cmap->cmap_rwlock)); 12217c478bd9Sstevel@tonic-gate ASSERT(cmap); 12227c478bd9Sstevel@tonic-gate 12237c478bd9Sstevel@tonic-gate /* search the translation table */ 12247c478bd9Sstevel@tonic-gate for (cmn = cmap->cmap_table; cmn != NULL; cmn = cmn->cmn_next) { 12257c478bd9Sstevel@tonic-gate if (cmn->cmn_chunk == chunk) 12267c478bd9Sstevel@tonic-gate return (cmn); 12277c478bd9Sstevel@tonic-gate } 12287c478bd9Sstevel@tonic-gate 12297c478bd9Sstevel@tonic-gate /* not found */ 12307c478bd9Sstevel@tonic-gate return (NULL); 12317c478bd9Sstevel@tonic-gate } 12327c478bd9Sstevel@tonic-gate 12337c478bd9Sstevel@tonic-gate /* 12347c478bd9Sstevel@tonic-gate * transtbl_delete() - delete a node from the translation table 12357c478bd9Sstevel@tonic-gate * 12367c478bd9Sstevel@tonic-gate * called when a node's data has been written out to disk. The 12377c478bd9Sstevel@tonic-gate * cmap_rwlock must be held as a writer for this operation. If the node 12387c478bd9Sstevel@tonic-gate * being deleted is the head of the list, then the head is moved to the 12397c478bd9Sstevel@tonic-gate * next node. Both the node's data and the node itself are freed. 12407c478bd9Sstevel@tonic-gate */ 12417c478bd9Sstevel@tonic-gate static void 12427c478bd9Sstevel@tonic-gate transtbl_delete(cow_map_t *cmap, cow_map_node_t *cmn) 12437c478bd9Sstevel@tonic-gate { 12447c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&cmap->cmap_rwlock)); 12457c478bd9Sstevel@tonic-gate ASSERT(cmn); 12467c478bd9Sstevel@tonic-gate ASSERT(cmap->cmap_table); 12477c478bd9Sstevel@tonic-gate 12487c478bd9Sstevel@tonic-gate /* if the head of the list is being deleted, then move the head up */ 12497c478bd9Sstevel@tonic-gate if (cmap->cmap_table == cmn) { 12507c478bd9Sstevel@tonic-gate ASSERT(cmn->cmn_prev == NULL); 12517c478bd9Sstevel@tonic-gate cmap->cmap_table = cmn->cmn_next; 12527c478bd9Sstevel@tonic-gate } 12537c478bd9Sstevel@tonic-gate 12547c478bd9Sstevel@tonic-gate 12557c478bd9Sstevel@tonic-gate /* make previous node's next pointer skip over current node */ 12567c478bd9Sstevel@tonic-gate if (cmn->cmn_prev != NULL) { 12577c478bd9Sstevel@tonic-gate ASSERT(cmn->cmn_prev->cmn_next == cmn); 12587c478bd9Sstevel@tonic-gate cmn->cmn_prev->cmn_next = cmn->cmn_next; 12597c478bd9Sstevel@tonic-gate } 12607c478bd9Sstevel@tonic-gate 12617c478bd9Sstevel@tonic-gate /* make next node's previous pointer skip over current node */ 12627c478bd9Sstevel@tonic-gate if (cmn->cmn_next != NULL) { 12637c478bd9Sstevel@tonic-gate ASSERT(cmn->cmn_next->cmn_prev == cmn); 12647c478bd9Sstevel@tonic-gate cmn->cmn_next->cmn_prev = cmn->cmn_prev; 12657c478bd9Sstevel@tonic-gate } 12667c478bd9Sstevel@tonic-gate 12677c478bd9Sstevel@tonic-gate /* free the data and the node */ 12687c478bd9Sstevel@tonic-gate ASSERT(cmn->cmn_buf); 12697c478bd9Sstevel@tonic-gate kmem_free(cmn->cmn_buf, cmap->cmap_chunksz); 12707c478bd9Sstevel@tonic-gate kmem_free(cmn, sizeof (cow_map_node_t)); 12717c478bd9Sstevel@tonic-gate } 12727c478bd9Sstevel@tonic-gate 12737c478bd9Sstevel@tonic-gate /* 12747c478bd9Sstevel@tonic-gate * transtbl_free() - free the entire translation table 12757c478bd9Sstevel@tonic-gate * 12767c478bd9Sstevel@tonic-gate * called when the snapshot is deleted. This frees all of the nodes in 12777c478bd9Sstevel@tonic-gate * the translation table (but not the bitmaps). 12787c478bd9Sstevel@tonic-gate */ 12797c478bd9Sstevel@tonic-gate static void 12807c478bd9Sstevel@tonic-gate transtbl_free(cow_map_t *cmap) 12817c478bd9Sstevel@tonic-gate { 12827c478bd9Sstevel@tonic-gate cow_map_node_t *curnode; 12837c478bd9Sstevel@tonic-gate cow_map_node_t *tempnode; 12847c478bd9Sstevel@tonic-gate 12857c478bd9Sstevel@tonic-gate for (curnode = cmap->cmap_table; curnode != NULL; curnode = tempnode) { 12867c478bd9Sstevel@tonic-gate tempnode = curnode->cmn_next; 12877c478bd9Sstevel@tonic-gate 12887c478bd9Sstevel@tonic-gate kmem_free(curnode->cmn_buf, cmap->cmap_chunksz); 12897c478bd9Sstevel@tonic-gate kmem_free(curnode, sizeof (cow_map_node_t)); 12907c478bd9Sstevel@tonic-gate } 12917c478bd9Sstevel@tonic-gate } 12927c478bd9Sstevel@tonic-gate 12937c478bd9Sstevel@tonic-gate 12947c478bd9Sstevel@tonic-gate /* ************************************************************************ */ 12957c478bd9Sstevel@tonic-gate 12967c478bd9Sstevel@tonic-gate /* 12977c478bd9Sstevel@tonic-gate * Interface Implementation Routines 12987c478bd9Sstevel@tonic-gate * 12997c478bd9Sstevel@tonic-gate * The following functions implement snapshot interface routines that are 13007c478bd9Sstevel@tonic-gate * called by the file system to create, delete, and use a snapshot. The 13017c478bd9Sstevel@tonic-gate * interfaces are defined in fssnap_if.c and are filled in by this driver 13027c478bd9Sstevel@tonic-gate * when it is loaded. This technique allows the file system to depend on 13037c478bd9Sstevel@tonic-gate * the interface module without having to load the full implementation and 13047c478bd9Sstevel@tonic-gate * snapshot device drivers. 13057c478bd9Sstevel@tonic-gate */ 13067c478bd9Sstevel@tonic-gate 13077c478bd9Sstevel@tonic-gate /* 13087c478bd9Sstevel@tonic-gate * fssnap_strategy_impl() - strategy routine called by the file system 13097c478bd9Sstevel@tonic-gate * 13107c478bd9Sstevel@tonic-gate * called by the file system to handle copy-on-write when necessary. All 13117c478bd9Sstevel@tonic-gate * reads and writes that the file system performs should go through this 13127c478bd9Sstevel@tonic-gate * function. If the file system calls the underlying device's strategy 13137c478bd9Sstevel@tonic-gate * routine without going through fssnap_strategy() (eg. by calling 13147c478bd9Sstevel@tonic-gate * bdev_strategy()), the snapshot may not be consistent. 13157c478bd9Sstevel@tonic-gate * 13167c478bd9Sstevel@tonic-gate * This function starts by doing significant sanity checking to insure 13177c478bd9Sstevel@tonic-gate * the snapshot was not deleted out from under it or deleted and then 13187c478bd9Sstevel@tonic-gate * recreated. To do this, it checks the actual pointer passed into it 13197c478bd9Sstevel@tonic-gate * (ie. the handle held by the file system). NOTE that the parameter is 13207c478bd9Sstevel@tonic-gate * a POINTER TO A POINTER to the snapshot id. Once the snapshot id is 13217c478bd9Sstevel@tonic-gate * locked, it knows things are ok and that this snapshot is really for 13227c478bd9Sstevel@tonic-gate * this file system. 13237c478bd9Sstevel@tonic-gate * 13247c478bd9Sstevel@tonic-gate * If the request is a write, fssnap_translate() is called to determine 13257c478bd9Sstevel@tonic-gate * whether a copy-on-write is required. If it is a read, the read is 13267c478bd9Sstevel@tonic-gate * simply passed on to the underlying device. 13277c478bd9Sstevel@tonic-gate */ 13287c478bd9Sstevel@tonic-gate static void 13297c478bd9Sstevel@tonic-gate fssnap_strategy_impl(void *snapshot_id, buf_t *bp) 13307c478bd9Sstevel@tonic-gate { 13317c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp; 13327c478bd9Sstevel@tonic-gate struct snapshot_id *sidp; 13337c478bd9Sstevel@tonic-gate int error; 13347c478bd9Sstevel@tonic-gate 13357c478bd9Sstevel@tonic-gate /* read requests are always passed through */ 13367c478bd9Sstevel@tonic-gate if (bp->b_flags & B_READ) { 13377c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 13387c478bd9Sstevel@tonic-gate return; 13397c478bd9Sstevel@tonic-gate } 13407c478bd9Sstevel@tonic-gate 13417c478bd9Sstevel@tonic-gate /* 13427c478bd9Sstevel@tonic-gate * Because we were not able to take the snapshot read lock BEFORE 13437c478bd9Sstevel@tonic-gate * checking for a snapshot back in the file system, things may have 13447c478bd9Sstevel@tonic-gate * drastically changed out from under us. For instance, the snapshot 13457c478bd9Sstevel@tonic-gate * may have been deleted, deleted and recreated, or worse yet, deleted 13467c478bd9Sstevel@tonic-gate * for this file system but now the snapshot number is in use by another 13477c478bd9Sstevel@tonic-gate * file system. 13487c478bd9Sstevel@tonic-gate * 13497c478bd9Sstevel@tonic-gate * Having a pointer to the file system's snapshot id pointer allows us 13507c478bd9Sstevel@tonic-gate * to sanity check most of this, though it assumes the file system is 13517c478bd9Sstevel@tonic-gate * keeping track of a pointer to the snapshot_id somewhere. 13527c478bd9Sstevel@tonic-gate */ 13537c478bd9Sstevel@tonic-gate sidpp = (struct snapshot_id **)snapshot_id; 13547c478bd9Sstevel@tonic-gate sidp = *sidpp; 13557c478bd9Sstevel@tonic-gate 13567c478bd9Sstevel@tonic-gate /* 13577c478bd9Sstevel@tonic-gate * if this file system's snapshot was disabled, just pass the 13587c478bd9Sstevel@tonic-gate * request through. 13597c478bd9Sstevel@tonic-gate */ 13607c478bd9Sstevel@tonic-gate if (sidp == NULL) { 13617c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 13627c478bd9Sstevel@tonic-gate return; 13637c478bd9Sstevel@tonic-gate } 13647c478bd9Sstevel@tonic-gate 13657c478bd9Sstevel@tonic-gate /* 13667c478bd9Sstevel@tonic-gate * Once we have the reader lock the snapshot will not magically go 13677c478bd9Sstevel@tonic-gate * away. But things may have changed on us before this so double check. 13687c478bd9Sstevel@tonic-gate */ 13697c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 13707c478bd9Sstevel@tonic-gate 13717c478bd9Sstevel@tonic-gate /* 13727c478bd9Sstevel@tonic-gate * if an error was founds somewhere the DELETE flag will be 13737c478bd9Sstevel@tonic-gate * set to indicate the snapshot should be deleted and no new 13747c478bd9Sstevel@tonic-gate * translations should occur. 13757c478bd9Sstevel@tonic-gate */ 13767c478bd9Sstevel@tonic-gate if (sidp->sid_flags & SID_DELETE) { 13777c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 13787c478bd9Sstevel@tonic-gate (void) fssnap_delete_impl(sidpp); 13797c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 13807c478bd9Sstevel@tonic-gate return; 13817c478bd9Sstevel@tonic-gate } 13827c478bd9Sstevel@tonic-gate 13837c478bd9Sstevel@tonic-gate /* 13847c478bd9Sstevel@tonic-gate * If the file system is no longer pointing to the snapshot we were 13857c478bd9Sstevel@tonic-gate * called with, then it should not attempt to translate this buffer as 13867c478bd9Sstevel@tonic-gate * it may be going to a snapshot for a different file system. 13877c478bd9Sstevel@tonic-gate * Even if the file system snapshot pointer is still the same, the 13887c478bd9Sstevel@tonic-gate * snapshot may have been disabled before we got the reader lock. 13897c478bd9Sstevel@tonic-gate */ 13907c478bd9Sstevel@tonic-gate if (sidp != *sidpp || SID_INACTIVE(sidp)) { 13917c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 13927c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 13937c478bd9Sstevel@tonic-gate return; 13947c478bd9Sstevel@tonic-gate } 13957c478bd9Sstevel@tonic-gate 13967c478bd9Sstevel@tonic-gate /* 13977c478bd9Sstevel@tonic-gate * At this point we're sure the snapshot will not go away while the 13987c478bd9Sstevel@tonic-gate * reader lock is held, and we are reasonably certain that we are 13997c478bd9Sstevel@tonic-gate * writing to the correct snapshot. 14007c478bd9Sstevel@tonic-gate */ 14017c478bd9Sstevel@tonic-gate if ((error = fssnap_translate(sidpp, bp)) != 0) { 14027c478bd9Sstevel@tonic-gate /* 14037c478bd9Sstevel@tonic-gate * fssnap_translate can release the reader lock if it 14047c478bd9Sstevel@tonic-gate * has to wait for a semaphore. In this case it is possible 14057c478bd9Sstevel@tonic-gate * for the snapshot to be deleted in this time frame. If this 14067c478bd9Sstevel@tonic-gate * happens just sent the buf thru to the filesystems device. 14077c478bd9Sstevel@tonic-gate */ 14087c478bd9Sstevel@tonic-gate if (sidp != *sidpp || SID_INACTIVE(sidp)) { 14097c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 14107c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 14117c478bd9Sstevel@tonic-gate return; 14127c478bd9Sstevel@tonic-gate } 14137c478bd9Sstevel@tonic-gate bioerror(bp, error); 14147c478bd9Sstevel@tonic-gate biodone(bp); 14157c478bd9Sstevel@tonic-gate } 14167c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 14177c478bd9Sstevel@tonic-gate } 14187c478bd9Sstevel@tonic-gate 14197c478bd9Sstevel@tonic-gate /* 14207c478bd9Sstevel@tonic-gate * fssnap_translate() - helper function for fssnap_strategy() 14217c478bd9Sstevel@tonic-gate * 14227c478bd9Sstevel@tonic-gate * performs the actual copy-on-write for write requests, if required. 14237c478bd9Sstevel@tonic-gate * This function does the real work of the file system side of things. 14247c478bd9Sstevel@tonic-gate * 14257c478bd9Sstevel@tonic-gate * It first checks the candidate bitmap to quickly determine whether any 14267c478bd9Sstevel@tonic-gate * action is necessary. If the candidate bitmap indicates the chunk was 14277c478bd9Sstevel@tonic-gate * allocated when the snapshot was created, then it checks to see whether 14287c478bd9Sstevel@tonic-gate * a translation already exists. If a translation already exists then no 14297c478bd9Sstevel@tonic-gate * action is required. If the chunk is a candidate for copy-on-write, 14307c478bd9Sstevel@tonic-gate * and a translation does not already exist, then the chunk is read in 14317c478bd9Sstevel@tonic-gate * and a node is added to the translation table. 14327c478bd9Sstevel@tonic-gate * 14337c478bd9Sstevel@tonic-gate * Once all of the chunks in the request range have been copied (if they 14347c478bd9Sstevel@tonic-gate * needed to be), then the original request can be satisfied and the old 14357c478bd9Sstevel@tonic-gate * data can be overwritten. 14367c478bd9Sstevel@tonic-gate */ 14377c478bd9Sstevel@tonic-gate static int 14387c478bd9Sstevel@tonic-gate fssnap_translate(struct snapshot_id **sidpp, struct buf *wbp) 14397c478bd9Sstevel@tonic-gate { 14407c478bd9Sstevel@tonic-gate snapshot_id_t *sidp = *sidpp; 14417c478bd9Sstevel@tonic-gate struct buf *oldbp; /* buffer to store old data in */ 14427c478bd9Sstevel@tonic-gate struct cow_info *cowp = sidp->sid_cowinfo; 14437c478bd9Sstevel@tonic-gate cow_map_t *cmap = &cowp->cow_map; 14447c478bd9Sstevel@tonic-gate cow_map_node_t *cmn; 14457c478bd9Sstevel@tonic-gate chunknumber_t cowchunk, startchunk, endchunk; 14467c478bd9Sstevel@tonic-gate int error; 14477c478bd9Sstevel@tonic-gate int throttle_write = 0; 14487c478bd9Sstevel@tonic-gate 14497c478bd9Sstevel@tonic-gate /* make sure the snapshot is active */ 14507c478bd9Sstevel@tonic-gate ASSERT(RW_READ_HELD(&sidp->sid_rwlock)); 14517c478bd9Sstevel@tonic-gate 14527c478bd9Sstevel@tonic-gate startchunk = dbtocowchunk(cmap, wbp->b_lblkno); 14537c478bd9Sstevel@tonic-gate endchunk = dbtocowchunk(cmap, wbp->b_lblkno + 14547c478bd9Sstevel@tonic-gate ((wbp->b_bcount-1) >> DEV_BSHIFT)); 14557c478bd9Sstevel@tonic-gate 14567c478bd9Sstevel@tonic-gate /* 14577c478bd9Sstevel@tonic-gate * Do not throttle the writes of the fssnap taskq thread and 14587c478bd9Sstevel@tonic-gate * the log roll (trans_roll) thread. Furthermore the writes to 14597c478bd9Sstevel@tonic-gate * the on-disk log are also not subject to throttling. 14607c478bd9Sstevel@tonic-gate * The fssnap_write_taskq thread's write can block on the throttling 14617c478bd9Sstevel@tonic-gate * semaphore which leads to self-deadlock as this same thread 14627c478bd9Sstevel@tonic-gate * releases the throttling semaphore after completing the IO. 14637c478bd9Sstevel@tonic-gate * If the trans_roll thread's write is throttled then we can deadlock 14647c478bd9Sstevel@tonic-gate * because the fssnap_taskq_thread which releases the throttling 14657c478bd9Sstevel@tonic-gate * semaphore can block waiting for log space which can only be 14667c478bd9Sstevel@tonic-gate * released by the trans_roll thread. 14677c478bd9Sstevel@tonic-gate */ 14687c478bd9Sstevel@tonic-gate 14697c478bd9Sstevel@tonic-gate throttle_write = !(taskq_member(cowp->cow_taskq, curthread) || 14707c478bd9Sstevel@tonic-gate tsd_get(bypass_snapshot_throttle_key)); 14717c478bd9Sstevel@tonic-gate 14727c478bd9Sstevel@tonic-gate /* 14737c478bd9Sstevel@tonic-gate * Iterate through all chunks covered by this write and perform the 14747c478bd9Sstevel@tonic-gate * copy-aside if necessary. Once all chunks have been safely 14757c478bd9Sstevel@tonic-gate * stowed away, the new data may be written in a single sweep. 14767c478bd9Sstevel@tonic-gate * 14777c478bd9Sstevel@tonic-gate * For each chunk in the range, the following sequence is performed: 14787c478bd9Sstevel@tonic-gate * - Is the chunk a candidate for translation? 14797c478bd9Sstevel@tonic-gate * o If not, then no translation is necessary, continue 14807c478bd9Sstevel@tonic-gate * - If it is a candidate, then does it already have a translation? 14817c478bd9Sstevel@tonic-gate * o If so, then no translation is necessary, continue 14827c478bd9Sstevel@tonic-gate * - If it is a candidate, but does not yet have a translation, 14837c478bd9Sstevel@tonic-gate * then read the old data and schedule an asynchronous taskq 14847c478bd9Sstevel@tonic-gate * to write the old data to the backing file. 14857c478bd9Sstevel@tonic-gate * 14867c478bd9Sstevel@tonic-gate * Once this has been performed over the entire range of chunks, then 14877c478bd9Sstevel@tonic-gate * it is safe to overwrite the data that is there. 14887c478bd9Sstevel@tonic-gate * 14897c478bd9Sstevel@tonic-gate * Note that no lock is required to check the candidate bitmap because 14907c478bd9Sstevel@tonic-gate * it never changes once the snapshot is created. The reader lock is 14917c478bd9Sstevel@tonic-gate * taken to check the hastrans bitmap since it may change. If it 14927c478bd9Sstevel@tonic-gate * turns out a copy is required, then the lock is upgraded to a 14937c478bd9Sstevel@tonic-gate * writer, and the bitmap is re-checked as it may have changed while 14947c478bd9Sstevel@tonic-gate * the lock was released. Finally, the write lock is held while 14957c478bd9Sstevel@tonic-gate * reading the old data to make sure it is not translated out from 14967c478bd9Sstevel@tonic-gate * under us. 14977c478bd9Sstevel@tonic-gate * 14987c478bd9Sstevel@tonic-gate * This locking mechanism should be sufficient to handle multiple 14997c478bd9Sstevel@tonic-gate * threads writing to overlapping chunks simultaneously. 15007c478bd9Sstevel@tonic-gate */ 15017c478bd9Sstevel@tonic-gate for (cowchunk = startchunk; cowchunk <= endchunk; cowchunk++) { 15027c478bd9Sstevel@tonic-gate /* 15037c478bd9Sstevel@tonic-gate * If the cowchunk is outside of the range of our 15047c478bd9Sstevel@tonic-gate * candidate maps, then simply break out of the 15057c478bd9Sstevel@tonic-gate * loop and pass the I/O through to bdev_strategy. 15067c478bd9Sstevel@tonic-gate * This would occur if the file system has grown 15077c478bd9Sstevel@tonic-gate * larger since the snapshot was taken. 15087c478bd9Sstevel@tonic-gate */ 15097c478bd9Sstevel@tonic-gate if (cowchunk >= (cmap->cmap_bmsize * NBBY)) 15107c478bd9Sstevel@tonic-gate break; 15117c478bd9Sstevel@tonic-gate 15127c478bd9Sstevel@tonic-gate /* 15137c478bd9Sstevel@tonic-gate * If no disk blocks were allocated in this chunk when the 15147c478bd9Sstevel@tonic-gate * snapshot was created then no copy-on-write will be 15157c478bd9Sstevel@tonic-gate * required. Since this bitmap is read-only no locks are 15167c478bd9Sstevel@tonic-gate * necessary. 15177c478bd9Sstevel@tonic-gate */ 15187c478bd9Sstevel@tonic-gate if (isclr(cmap->cmap_candidate, cowchunk)) { 15197c478bd9Sstevel@tonic-gate continue; 15207c478bd9Sstevel@tonic-gate } 15217c478bd9Sstevel@tonic-gate 15227c478bd9Sstevel@tonic-gate /* 15237c478bd9Sstevel@tonic-gate * If a translation already exists, the data can be written 15247c478bd9Sstevel@tonic-gate * through since the old data has already been saved off. 15257c478bd9Sstevel@tonic-gate */ 15267c478bd9Sstevel@tonic-gate if (isset(cmap->cmap_hastrans, cowchunk)) { 15277c478bd9Sstevel@tonic-gate continue; 15287c478bd9Sstevel@tonic-gate } 15297c478bd9Sstevel@tonic-gate 15307c478bd9Sstevel@tonic-gate 15317c478bd9Sstevel@tonic-gate /* 15327c478bd9Sstevel@tonic-gate * Throttle translations if there are too many outstanding 15337c478bd9Sstevel@tonic-gate * chunks in memory. The semaphore is sema_v'd by the taskq. 15347c478bd9Sstevel@tonic-gate * 15357c478bd9Sstevel@tonic-gate * You can't keep the sid_rwlock if you would go to sleep. 15367c478bd9Sstevel@tonic-gate * This will result in deadlock when someone tries to delete 15377c478bd9Sstevel@tonic-gate * the snapshot (wants the sid_rwlock as a writer, but can't 15387c478bd9Sstevel@tonic-gate * get it). 15397c478bd9Sstevel@tonic-gate */ 15407c478bd9Sstevel@tonic-gate if (throttle_write) { 15417c478bd9Sstevel@tonic-gate if (sema_tryp(&cmap->cmap_throttle_sem) == 0) { 15427c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 15437c478bd9Sstevel@tonic-gate atomic_add_32(&cmap->cmap_waiters, 1); 15447c478bd9Sstevel@tonic-gate sema_p(&cmap->cmap_throttle_sem); 15457c478bd9Sstevel@tonic-gate atomic_add_32(&cmap->cmap_waiters, -1); 15467c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 15477c478bd9Sstevel@tonic-gate 15487c478bd9Sstevel@tonic-gate /* 15497c478bd9Sstevel@tonic-gate * Now since we released the sid_rwlock the state may 15507c478bd9Sstevel@tonic-gate * have transitioned underneath us. so check that again. 15517c478bd9Sstevel@tonic-gate */ 15527c478bd9Sstevel@tonic-gate if (sidp != *sidpp || SID_INACTIVE(sidp)) { 15537c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 15547c478bd9Sstevel@tonic-gate return (ENXIO); 15557c478bd9Sstevel@tonic-gate } 15567c478bd9Sstevel@tonic-gate } 15577c478bd9Sstevel@tonic-gate } 15587c478bd9Sstevel@tonic-gate 15597c478bd9Sstevel@tonic-gate /* 15607c478bd9Sstevel@tonic-gate * Acquire the lock as a writer and check to see if a 15617c478bd9Sstevel@tonic-gate * translation has been added in the meantime. 15627c478bd9Sstevel@tonic-gate */ 15637c478bd9Sstevel@tonic-gate rw_enter(&cmap->cmap_rwlock, RW_WRITER); 15647c478bd9Sstevel@tonic-gate if (isset(cmap->cmap_hastrans, cowchunk)) { 15657c478bd9Sstevel@tonic-gate if (throttle_write) 15667c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 15677c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 15687c478bd9Sstevel@tonic-gate continue; /* go to the next chunk */ 15697c478bd9Sstevel@tonic-gate } 15707c478bd9Sstevel@tonic-gate 15717c478bd9Sstevel@tonic-gate /* 15727c478bd9Sstevel@tonic-gate * read a full chunk of data from the requested offset rounded 15737c478bd9Sstevel@tonic-gate * down to the nearest chunk size. 15747c478bd9Sstevel@tonic-gate */ 15757c478bd9Sstevel@tonic-gate oldbp = getrbuf(KM_SLEEP); 15767c478bd9Sstevel@tonic-gate oldbp->b_lblkno = cowchunktodb(cmap, cowchunk); 15777c478bd9Sstevel@tonic-gate oldbp->b_edev = wbp->b_edev; 15787c478bd9Sstevel@tonic-gate oldbp->b_bcount = cmap->cmap_chunksz; 15797c478bd9Sstevel@tonic-gate oldbp->b_bufsize = cmap->cmap_chunksz; 15807c478bd9Sstevel@tonic-gate oldbp->b_iodone = NULL; 15817c478bd9Sstevel@tonic-gate oldbp->b_proc = NULL; 15827c478bd9Sstevel@tonic-gate oldbp->b_flags = B_READ; 15837c478bd9Sstevel@tonic-gate oldbp->b_un.b_addr = kmem_alloc(cmap->cmap_chunksz, KM_SLEEP); 15847c478bd9Sstevel@tonic-gate 15857c478bd9Sstevel@tonic-gate (void) bdev_strategy(oldbp); 15867c478bd9Sstevel@tonic-gate (void) biowait(oldbp); 15877c478bd9Sstevel@tonic-gate 15887c478bd9Sstevel@tonic-gate /* 15897c478bd9Sstevel@tonic-gate * It's ok to bail in the middle of translating the range 15907c478bd9Sstevel@tonic-gate * because the extra copy-asides will not hurt anything 15917c478bd9Sstevel@tonic-gate * (except by using extra space in the backing store). 15927c478bd9Sstevel@tonic-gate */ 15937c478bd9Sstevel@tonic-gate if ((error = geterror(oldbp)) != 0) { 15947c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_translate: error reading " 15957c478bd9Sstevel@tonic-gate "old data for snapshot %d, chunk %llu, disk block " 15967c478bd9Sstevel@tonic-gate "%lld, size %lu, error %d.", sidp->sid_snapnumber, 15977c478bd9Sstevel@tonic-gate cowchunk, oldbp->b_lblkno, oldbp->b_bcount, error); 15987c478bd9Sstevel@tonic-gate kmem_free(oldbp->b_un.b_addr, cmap->cmap_chunksz); 15997c478bd9Sstevel@tonic-gate freerbuf(oldbp); 16007c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 16017c478bd9Sstevel@tonic-gate if (throttle_write) 16027c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 16037c478bd9Sstevel@tonic-gate return (error); 16047c478bd9Sstevel@tonic-gate } 16057c478bd9Sstevel@tonic-gate 16067c478bd9Sstevel@tonic-gate /* 16077c478bd9Sstevel@tonic-gate * add the node to the translation table and save a reference 16087c478bd9Sstevel@tonic-gate * to pass to the taskq for writing out to the backing file 16097c478bd9Sstevel@tonic-gate */ 16107c478bd9Sstevel@tonic-gate cmn = transtbl_add(cmap, cowchunk, oldbp->b_un.b_addr); 16117c478bd9Sstevel@tonic-gate freerbuf(oldbp); 16127c478bd9Sstevel@tonic-gate 16137c478bd9Sstevel@tonic-gate /* 16147c478bd9Sstevel@tonic-gate * Add a reference to the snapshot id so the lower level 16157c478bd9Sstevel@tonic-gate * processing (ie. the taskq) can get back to the state 16167c478bd9Sstevel@tonic-gate * information. 16177c478bd9Sstevel@tonic-gate */ 16187c478bd9Sstevel@tonic-gate cmn->cmn_sid = sidp; 16197c478bd9Sstevel@tonic-gate cmn->release_sem = throttle_write; 16207c478bd9Sstevel@tonic-gate setbit(cmap->cmap_hastrans, cowchunk); 16217c478bd9Sstevel@tonic-gate 16227c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 16237c478bd9Sstevel@tonic-gate 16247c478bd9Sstevel@tonic-gate /* 16257c478bd9Sstevel@tonic-gate * schedule the asynchronous write to the backing file 16267c478bd9Sstevel@tonic-gate */ 16277c478bd9Sstevel@tonic-gate if (cowp->cow_backfile_array != NULL) 16287c478bd9Sstevel@tonic-gate (void) taskq_dispatch(cowp->cow_taskq, 16297c478bd9Sstevel@tonic-gate fssnap_write_taskq, cmn, TQ_SLEEP); 16307c478bd9Sstevel@tonic-gate } 16317c478bd9Sstevel@tonic-gate 16327c478bd9Sstevel@tonic-gate /* 16337c478bd9Sstevel@tonic-gate * Write new data in place of the old data. At this point all of the 16347c478bd9Sstevel@tonic-gate * chunks touched by this write have been copied aside and so the new 16357c478bd9Sstevel@tonic-gate * data can be written out all at once. 16367c478bd9Sstevel@tonic-gate */ 16377c478bd9Sstevel@tonic-gate (void) bdev_strategy(wbp); 16387c478bd9Sstevel@tonic-gate 16397c478bd9Sstevel@tonic-gate return (0); 16407c478bd9Sstevel@tonic-gate } 16417c478bd9Sstevel@tonic-gate 16427c478bd9Sstevel@tonic-gate /* 16437c478bd9Sstevel@tonic-gate * fssnap_write_taskq() - write in-memory translations to the backing file 16447c478bd9Sstevel@tonic-gate * 16457c478bd9Sstevel@tonic-gate * writes in-memory translations to the backing file asynchronously. A 16467c478bd9Sstevel@tonic-gate * task is dispatched each time a new translation is created. The task 16477c478bd9Sstevel@tonic-gate * writes the data to the backing file and removes it from the memory 16487c478bd9Sstevel@tonic-gate * list. The throttling semaphore is released only if the particular 16497c478bd9Sstevel@tonic-gate * translation was throttled in fssnap_translate. 16507c478bd9Sstevel@tonic-gate */ 16517c478bd9Sstevel@tonic-gate static void 16527c478bd9Sstevel@tonic-gate fssnap_write_taskq(void *arg) 16537c478bd9Sstevel@tonic-gate { 16547c478bd9Sstevel@tonic-gate cow_map_node_t *cmn = (cow_map_node_t *)arg; 16557c478bd9Sstevel@tonic-gate snapshot_id_t *sidp = cmn->cmn_sid; 16567c478bd9Sstevel@tonic-gate cow_info_t *cowp = sidp->sid_cowinfo; 16577c478bd9Sstevel@tonic-gate cow_map_t *cmap = &cowp->cow_map; 16587c478bd9Sstevel@tonic-gate int error; 16597c478bd9Sstevel@tonic-gate int bf_index; 16607c478bd9Sstevel@tonic-gate int release_sem = cmn->release_sem; 16617c478bd9Sstevel@tonic-gate 16627c478bd9Sstevel@tonic-gate /* 16637c478bd9Sstevel@tonic-gate * The sid_rwlock does not need to be held here because the taskqs 16647c478bd9Sstevel@tonic-gate * are destroyed explicitly by fssnap_delete (with the sid_rwlock 16657c478bd9Sstevel@tonic-gate * held as a writer). taskq_destroy() will flush all of the tasks 16667c478bd9Sstevel@tonic-gate * out before fssnap_delete frees up all of the structures. 16677c478bd9Sstevel@tonic-gate */ 16687c478bd9Sstevel@tonic-gate 16697c478bd9Sstevel@tonic-gate /* if the snapshot was disabled from under us, drop the request. */ 16707c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 16717c478bd9Sstevel@tonic-gate if (SID_INACTIVE(sidp)) { 16727c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 16737c478bd9Sstevel@tonic-gate if (release_sem) 16747c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 16757c478bd9Sstevel@tonic-gate return; 16767c478bd9Sstevel@tonic-gate } 16777c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 16787c478bd9Sstevel@tonic-gate 16797c478bd9Sstevel@tonic-gate atomic_add_64((uint64_t *)&cmap->cmap_nchunks, 1); 16807c478bd9Sstevel@tonic-gate 16817c478bd9Sstevel@tonic-gate if ((cmap->cmap_maxsize != 0) && 16827c478bd9Sstevel@tonic-gate ((cmap->cmap_nchunks * cmap->cmap_chunksz) > cmap->cmap_maxsize)) { 16837c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_write_taskq: snapshot %d (%s) has " 16847c478bd9Sstevel@tonic-gate "reached the maximum backing file size specified (%llu " 16857c478bd9Sstevel@tonic-gate "bytes) and will be deleted.", sidp->sid_snapnumber, 16867c478bd9Sstevel@tonic-gate (char *)cowp->cow_kstat_mntpt->ks_data, 16877c478bd9Sstevel@tonic-gate cmap->cmap_maxsize); 16887c478bd9Sstevel@tonic-gate if (release_sem) 16897c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 16907c478bd9Sstevel@tonic-gate atomic_or_uint(&sidp->sid_flags, SID_DELETE); 16917c478bd9Sstevel@tonic-gate return; 16927c478bd9Sstevel@tonic-gate } 16937c478bd9Sstevel@tonic-gate 16947c478bd9Sstevel@tonic-gate /* perform the write */ 16957c478bd9Sstevel@tonic-gate bf_index = cmn->cmn_chunk / cmap->cmap_chunksperbf; 16967c478bd9Sstevel@tonic-gate 16977c478bd9Sstevel@tonic-gate if (error = vn_rdwr(UIO_WRITE, (cowp->cow_backfile_array)[bf_index], 16987c478bd9Sstevel@tonic-gate cmn->cmn_buf, cmap->cmap_chunksz, 16997c478bd9Sstevel@tonic-gate (cmn->cmn_chunk % cmap->cmap_chunksperbf) * cmap->cmap_chunksz, 17007c478bd9Sstevel@tonic-gate UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, (ssize_t *)NULL)) { 17017c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_write_taskq: error writing to " 17027c478bd9Sstevel@tonic-gate "backing file. DELETING SNAPSHOT %d, backing file path " 17037c478bd9Sstevel@tonic-gate "%s, offset %llu bytes, error %d.", sidp->sid_snapnumber, 17047c478bd9Sstevel@tonic-gate (char *)cowp->cow_kstat_bfname->ks_data, 17057c478bd9Sstevel@tonic-gate cmn->cmn_chunk * cmap->cmap_chunksz, error); 17067c478bd9Sstevel@tonic-gate if (release_sem) 17077c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 17087c478bd9Sstevel@tonic-gate atomic_or_uint(&sidp->sid_flags, SID_DELETE); 17097c478bd9Sstevel@tonic-gate return; 17107c478bd9Sstevel@tonic-gate } 17117c478bd9Sstevel@tonic-gate 17127c478bd9Sstevel@tonic-gate /* 17137c478bd9Sstevel@tonic-gate * now remove the node and buffer from memory 17147c478bd9Sstevel@tonic-gate */ 17157c478bd9Sstevel@tonic-gate rw_enter(&cmap->cmap_rwlock, RW_WRITER); 17167c478bd9Sstevel@tonic-gate transtbl_delete(cmap, cmn); 17177c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 17187c478bd9Sstevel@tonic-gate 17197c478bd9Sstevel@tonic-gate /* Allow more translations */ 17207c478bd9Sstevel@tonic-gate if (release_sem) 17217c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 17227c478bd9Sstevel@tonic-gate 17237c478bd9Sstevel@tonic-gate } 17247c478bd9Sstevel@tonic-gate 17257c478bd9Sstevel@tonic-gate /* 17267c478bd9Sstevel@tonic-gate * fssnap_create_impl() - called from the file system to create a new snapshot 17277c478bd9Sstevel@tonic-gate * 17287c478bd9Sstevel@tonic-gate * allocates and initializes the structures needed for a new snapshot. 17297c478bd9Sstevel@tonic-gate * This is called by the file system when it receives an ioctl request to 17307c478bd9Sstevel@tonic-gate * create a new snapshot. An unused snapshot identifier is either found 17317c478bd9Sstevel@tonic-gate * or created, and eventually returned as the opaque handle the file 17327c478bd9Sstevel@tonic-gate * system will use to identify this snapshot. The snapshot number 17337c478bd9Sstevel@tonic-gate * associated with the snapshot identifier is the same as the minor 17347c478bd9Sstevel@tonic-gate * number for the snapshot device that is used to access that snapshot. 17357c478bd9Sstevel@tonic-gate * 17367c478bd9Sstevel@tonic-gate * The snapshot can not be used until the candidate bitmap is populated 17377c478bd9Sstevel@tonic-gate * by the file system (see fssnap_set_candidate_impl()), and the file 17387c478bd9Sstevel@tonic-gate * system finishes the setup process by calling fssnap_create_done(). 17397c478bd9Sstevel@tonic-gate * Nearly all of the snapshot locks are held for the duration of the 17407c478bd9Sstevel@tonic-gate * create, and are not released until fssnap_create_done is called(). 17417c478bd9Sstevel@tonic-gate */ 17427c478bd9Sstevel@tonic-gate static void * 17437c478bd9Sstevel@tonic-gate fssnap_create_impl(chunknumber_t nchunks, uint_t chunksz, u_offset_t maxsize, 17447c478bd9Sstevel@tonic-gate struct vnode *fsvp, int backfilecount, struct vnode **bfvpp, char *backpath, 17457c478bd9Sstevel@tonic-gate u_offset_t max_backfile_size) 17467c478bd9Sstevel@tonic-gate { 17477c478bd9Sstevel@tonic-gate refstr_t *mountpoint; 17487c478bd9Sstevel@tonic-gate char taskqname[50]; 17497c478bd9Sstevel@tonic-gate struct cow_info *cowp; 17507c478bd9Sstevel@tonic-gate struct cow_map *cmap; 17517c478bd9Sstevel@tonic-gate struct snapshot_id *sidp; 17527c478bd9Sstevel@tonic-gate int lastsnap; 17537c478bd9Sstevel@tonic-gate 17547c478bd9Sstevel@tonic-gate /* 17557c478bd9Sstevel@tonic-gate * Sanity check the parameters we care about 17567c478bd9Sstevel@tonic-gate * (we don't care about the informational parameters) 17577c478bd9Sstevel@tonic-gate */ 17587c478bd9Sstevel@tonic-gate if ((nchunks == 0) || 17597c478bd9Sstevel@tonic-gate ((chunksz % DEV_BSIZE) != 0) || 17607c478bd9Sstevel@tonic-gate (bfvpp == NULL)) { 17617c478bd9Sstevel@tonic-gate return (NULL); 17627c478bd9Sstevel@tonic-gate } 17637c478bd9Sstevel@tonic-gate 17647c478bd9Sstevel@tonic-gate /* 17657c478bd9Sstevel@tonic-gate * Look for unused snapshot identifiers. Snapshot ids are never 17667c478bd9Sstevel@tonic-gate * freed, but deleted snapshot ids will be recycled as needed. 17677c478bd9Sstevel@tonic-gate */ 17687c478bd9Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 17697c478bd9Sstevel@tonic-gate 17707c478bd9Sstevel@tonic-gate findagain: 17717c478bd9Sstevel@tonic-gate lastsnap = 0; 17727c478bd9Sstevel@tonic-gate for (sidp = snapshot; sidp != NULL; sidp = sidp->sid_next) { 17737c478bd9Sstevel@tonic-gate if (sidp->sid_snapnumber > lastsnap) 17747c478bd9Sstevel@tonic-gate lastsnap = sidp->sid_snapnumber; 17757c478bd9Sstevel@tonic-gate 17767c478bd9Sstevel@tonic-gate /* 17777c478bd9Sstevel@tonic-gate * The sid_rwlock is taken as a reader initially so that 17787c478bd9Sstevel@tonic-gate * activity on each snapshot is not stalled while searching 17797c478bd9Sstevel@tonic-gate * for a free snapshot id. 17807c478bd9Sstevel@tonic-gate */ 17817c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 17827c478bd9Sstevel@tonic-gate 17837c478bd9Sstevel@tonic-gate /* 17847c478bd9Sstevel@tonic-gate * If the snapshot has been deleted and nobody is using the 17857c478bd9Sstevel@tonic-gate * snapshot device than we can reuse this snapshot_id. If 17867c478bd9Sstevel@tonic-gate * the snapshot is marked to be deleted (SID_DELETE), then 17877c478bd9Sstevel@tonic-gate * it hasn't been deleted yet so don't reuse it. 17887c478bd9Sstevel@tonic-gate */ 17897c478bd9Sstevel@tonic-gate if (SID_AVAILABLE(sidp)) 17907c478bd9Sstevel@tonic-gate break; /* This spot is unused, so take it */ 17917c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 17927c478bd9Sstevel@tonic-gate } 17937c478bd9Sstevel@tonic-gate 17947c478bd9Sstevel@tonic-gate /* 17957c478bd9Sstevel@tonic-gate * add a new snapshot identifier if there are no deleted 17967c478bd9Sstevel@tonic-gate * entries. Since it doesn't matter what order the entries 17977c478bd9Sstevel@tonic-gate * are in we can just add it to the beginning of the list. 17987c478bd9Sstevel@tonic-gate */ 17997c478bd9Sstevel@tonic-gate if (sidp) { 18007c478bd9Sstevel@tonic-gate if (rw_tryupgrade(&sidp->sid_rwlock) == 0) { 18017c478bd9Sstevel@tonic-gate /* someone else grabbed it as a writer, try again */ 18027c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 18037c478bd9Sstevel@tonic-gate goto findagain; 18047c478bd9Sstevel@tonic-gate } 18057c478bd9Sstevel@tonic-gate } else { 18067c478bd9Sstevel@tonic-gate /* Create a new node if we didn't find an unused one */ 18077c478bd9Sstevel@tonic-gate sidp = kmem_alloc(sizeof (struct snapshot_id), KM_SLEEP); 18087c478bd9Sstevel@tonic-gate rw_init(&sidp->sid_rwlock, NULL, RW_DEFAULT, NULL); 18097c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 18107c478bd9Sstevel@tonic-gate sidp->sid_snapnumber = (snapshot == NULL) ? 0 : lastsnap + 1; 18117c478bd9Sstevel@tonic-gate sidp->sid_cowinfo = NULL; 18127c478bd9Sstevel@tonic-gate sidp->sid_flags = 0; 18137c478bd9Sstevel@tonic-gate sidp->sid_next = snapshot; 18147c478bd9Sstevel@tonic-gate snapshot = sidp; 18157c478bd9Sstevel@tonic-gate } 18167c478bd9Sstevel@tonic-gate 18177c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&sidp->sid_rwlock)); 18187c478bd9Sstevel@tonic-gate ASSERT(sidp->sid_cowinfo == NULL); 18197c478bd9Sstevel@tonic-gate ASSERT(sidp->sid_snapnumber <= (lastsnap + 1)); 18207c478bd9Sstevel@tonic-gate 18217c478bd9Sstevel@tonic-gate sidp->sid_flags |= SID_CREATING; 18227c478bd9Sstevel@tonic-gate /* The root vnode is held until snap_delete_impl() is called */ 18237c478bd9Sstevel@tonic-gate VN_HOLD(fsvp); 18247c478bd9Sstevel@tonic-gate sidp->sid_fvp = fsvp; 18257c478bd9Sstevel@tonic-gate num_snapshots++; 18267c478bd9Sstevel@tonic-gate 18277c478bd9Sstevel@tonic-gate /* allocate and initialize structures */ 18287c478bd9Sstevel@tonic-gate 18297c478bd9Sstevel@tonic-gate cowp = kmem_zalloc(sizeof (struct cow_info), KM_SLEEP); 18307c478bd9Sstevel@tonic-gate 18317c478bd9Sstevel@tonic-gate cowp->cow_backfile_array = bfvpp; 18327c478bd9Sstevel@tonic-gate cowp->cow_backcount = backfilecount; 18337c478bd9Sstevel@tonic-gate cowp->cow_backfile_sz = max_backfile_size; 18347c478bd9Sstevel@tonic-gate 18357c478bd9Sstevel@tonic-gate /* 18367c478bd9Sstevel@tonic-gate * Initialize task queues for this snapshot. Only a small number 18377c478bd9Sstevel@tonic-gate * of threads are required because they will be serialized on the 18387c478bd9Sstevel@tonic-gate * backing file's reader/writer lock anyway. 18397c478bd9Sstevel@tonic-gate */ 18407c478bd9Sstevel@tonic-gate (void) snprintf(taskqname, sizeof (taskqname), "%s_taskq_%d", snapname, 18417c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 18427c478bd9Sstevel@tonic-gate cowp->cow_taskq = taskq_create(taskqname, fssnap_taskq_nthreads, 18437c478bd9Sstevel@tonic-gate minclsyspri, 1, fssnap_taskq_maxtasks, 0); 18447c478bd9Sstevel@tonic-gate 18457c478bd9Sstevel@tonic-gate /* don't allow tasks to start until after everything is ready */ 18467c478bd9Sstevel@tonic-gate taskq_suspend(cowp->cow_taskq); 18477c478bd9Sstevel@tonic-gate 18487c478bd9Sstevel@tonic-gate /* initialize translation table */ 18497c478bd9Sstevel@tonic-gate cmap = &cowp->cow_map; 18507c478bd9Sstevel@tonic-gate rw_init(&cmap->cmap_rwlock, NULL, RW_DEFAULT, NULL); 18517c478bd9Sstevel@tonic-gate rw_enter(&cmap->cmap_rwlock, RW_WRITER); 18527c478bd9Sstevel@tonic-gate 18537c478bd9Sstevel@tonic-gate sema_init(&cmap->cmap_throttle_sem, fssnap_max_mem_chunks, NULL, 18547c478bd9Sstevel@tonic-gate SEMA_DEFAULT, NULL); 18557c478bd9Sstevel@tonic-gate 18567c478bd9Sstevel@tonic-gate cmap->cmap_chunksz = chunksz; 18577c478bd9Sstevel@tonic-gate cmap->cmap_maxsize = maxsize; 18587c478bd9Sstevel@tonic-gate cmap->cmap_chunksperbf = max_backfile_size / chunksz; 18597c478bd9Sstevel@tonic-gate 18607c478bd9Sstevel@tonic-gate /* 18617c478bd9Sstevel@tonic-gate * allocate one bit per chunk for the bitmaps, round up 18627c478bd9Sstevel@tonic-gate */ 18637c478bd9Sstevel@tonic-gate cmap->cmap_bmsize = (nchunks + (NBBY - 1)) / NBBY; 18647c478bd9Sstevel@tonic-gate cmap->cmap_hastrans = kmem_zalloc(cmap->cmap_bmsize, KM_SLEEP); 18657c478bd9Sstevel@tonic-gate cmap->cmap_candidate = kmem_zalloc(cmap->cmap_bmsize, KM_SLEEP); 18667c478bd9Sstevel@tonic-gate 18677c478bd9Sstevel@tonic-gate sidp->sid_cowinfo = cowp; 18687c478bd9Sstevel@tonic-gate 18697c478bd9Sstevel@tonic-gate /* initialize kstats for this snapshot */ 18707c478bd9Sstevel@tonic-gate mountpoint = vfs_getmntpoint(fsvp->v_vfsp); 18717c478bd9Sstevel@tonic-gate fssnap_create_kstats(sidp, sidp->sid_snapnumber, 18727c478bd9Sstevel@tonic-gate refstr_value(mountpoint), backpath); 18737c478bd9Sstevel@tonic-gate refstr_rele(mountpoint); 18747c478bd9Sstevel@tonic-gate 18757c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 18767c478bd9Sstevel@tonic-gate 18777c478bd9Sstevel@tonic-gate /* 18787c478bd9Sstevel@tonic-gate * return with snapshot id rwlock held as a writer until 18797c478bd9Sstevel@tonic-gate * fssnap_create_done is called 18807c478bd9Sstevel@tonic-gate */ 18817c478bd9Sstevel@tonic-gate return (sidp); 18827c478bd9Sstevel@tonic-gate } 18837c478bd9Sstevel@tonic-gate 18847c478bd9Sstevel@tonic-gate /* 18857c478bd9Sstevel@tonic-gate * fssnap_set_candidate_impl() - mark a chunk as a candidate for copy-on-write 18867c478bd9Sstevel@tonic-gate * 18877c478bd9Sstevel@tonic-gate * sets a bit in the candidate bitmap that indicates that a chunk is a 18887c478bd9Sstevel@tonic-gate * candidate for copy-on-write. Typically, chunks that are allocated on 18897c478bd9Sstevel@tonic-gate * the file system at the time the snapshot is taken are candidates, 18907c478bd9Sstevel@tonic-gate * while chunks that have no allocated data do not need to be copied. 18917c478bd9Sstevel@tonic-gate * Chunks containing metadata must be marked as candidates as well. 18927c478bd9Sstevel@tonic-gate */ 18937c478bd9Sstevel@tonic-gate static void 18947c478bd9Sstevel@tonic-gate fssnap_set_candidate_impl(void *snapshot_id, chunknumber_t chunknumber) 18957c478bd9Sstevel@tonic-gate { 18967c478bd9Sstevel@tonic-gate struct snapshot_id *sid = snapshot_id; 18977c478bd9Sstevel@tonic-gate struct cow_info *cowp = sid->sid_cowinfo; 18987c478bd9Sstevel@tonic-gate struct cow_map *cmap = &cowp->cow_map; 18997c478bd9Sstevel@tonic-gate 19007c478bd9Sstevel@tonic-gate /* simple bitmap operation for now */ 19017c478bd9Sstevel@tonic-gate ASSERT(chunknumber < (cmap->cmap_bmsize * NBBY)); 19027c478bd9Sstevel@tonic-gate setbit(cmap->cmap_candidate, chunknumber); 19037c478bd9Sstevel@tonic-gate } 19047c478bd9Sstevel@tonic-gate 19057c478bd9Sstevel@tonic-gate /* 19067c478bd9Sstevel@tonic-gate * fssnap_is_candidate_impl() - check whether a chunk is a candidate 19077c478bd9Sstevel@tonic-gate * 19087c478bd9Sstevel@tonic-gate * returns 0 if the chunk is not a candidate and 1 if the chunk is a 19097c478bd9Sstevel@tonic-gate * candidate. This can be used by the file system to change behavior for 19107c478bd9Sstevel@tonic-gate * chunks that might induce a copy-on-write. The offset is specified in 19117c478bd9Sstevel@tonic-gate * bytes since the chunk size may not be known by the file system. 19127c478bd9Sstevel@tonic-gate */ 19137c478bd9Sstevel@tonic-gate static int 19147c478bd9Sstevel@tonic-gate fssnap_is_candidate_impl(void *snapshot_id, u_offset_t off) 19157c478bd9Sstevel@tonic-gate { 19167c478bd9Sstevel@tonic-gate struct snapshot_id *sid = snapshot_id; 19177c478bd9Sstevel@tonic-gate struct cow_info *cowp = sid->sid_cowinfo; 19187c478bd9Sstevel@tonic-gate struct cow_map *cmap = &cowp->cow_map; 19197c478bd9Sstevel@tonic-gate ulong_t chunknumber = off / cmap->cmap_chunksz; 19207c478bd9Sstevel@tonic-gate 19217c478bd9Sstevel@tonic-gate /* simple bitmap operation for now */ 19227c478bd9Sstevel@tonic-gate ASSERT(chunknumber < (cmap->cmap_bmsize * NBBY)); 19237c478bd9Sstevel@tonic-gate return (isset(cmap->cmap_candidate, chunknumber)); 19247c478bd9Sstevel@tonic-gate } 19257c478bd9Sstevel@tonic-gate 19267c478bd9Sstevel@tonic-gate /* 19277c478bd9Sstevel@tonic-gate * fssnap_create_done_impl() - complete the snapshot setup process 19287c478bd9Sstevel@tonic-gate * 19297c478bd9Sstevel@tonic-gate * called when the file system is done populating the candidate bitmap 19307c478bd9Sstevel@tonic-gate * and it is ready to start using the snapshot. This routine releases 19317c478bd9Sstevel@tonic-gate * the snapshot locks, allows taskq tasks to start processing, and 19327c478bd9Sstevel@tonic-gate * creates the device minor nodes associated with the snapshot. 19337c478bd9Sstevel@tonic-gate */ 19347c478bd9Sstevel@tonic-gate static int 19357c478bd9Sstevel@tonic-gate fssnap_create_done_impl(void *snapshot_id) 19367c478bd9Sstevel@tonic-gate { 19377c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp, *sidp = snapshot_id; 19387c478bd9Sstevel@tonic-gate struct cow_info *cowp; 19397c478bd9Sstevel@tonic-gate struct cow_map *cmap; 19407c478bd9Sstevel@tonic-gate int snapnumber = -1; 19417c478bd9Sstevel@tonic-gate char name[20]; 19427c478bd9Sstevel@tonic-gate 19437c478bd9Sstevel@tonic-gate /* sid rwlock and cmap rwlock should be taken from fssnap_create */ 19447c478bd9Sstevel@tonic-gate ASSERT(sidp); 19457c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&sidp->sid_rwlock)); 19467c478bd9Sstevel@tonic-gate ASSERT(sidp->sid_cowinfo); 19477c478bd9Sstevel@tonic-gate 19487c478bd9Sstevel@tonic-gate cowp = sidp->sid_cowinfo; 19497c478bd9Sstevel@tonic-gate cmap = &cowp->cow_map; 19507c478bd9Sstevel@tonic-gate 19517c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&cmap->cmap_rwlock)); 19527c478bd9Sstevel@tonic-gate 19537c478bd9Sstevel@tonic-gate sidp->sid_flags &= ~(SID_CREATING | SID_DISABLED); 19547c478bd9Sstevel@tonic-gate snapnumber = sidp->sid_snapnumber; 19557c478bd9Sstevel@tonic-gate 19567c478bd9Sstevel@tonic-gate /* allocate state structure and find new snapshot id */ 19577c478bd9Sstevel@tonic-gate if (ddi_soft_state_zalloc(statep, snapnumber) != DDI_SUCCESS) { 19587c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 19597c478bd9Sstevel@tonic-gate "snap_ioctl: create: could not allocate " 19607c478bd9Sstevel@tonic-gate "state for snapshot %d.", snapnumber); 19617c478bd9Sstevel@tonic-gate snapnumber = -1; 19627c478bd9Sstevel@tonic-gate goto out; 19637c478bd9Sstevel@tonic-gate } 19647c478bd9Sstevel@tonic-gate 19657c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, snapnumber); 19667c478bd9Sstevel@tonic-gate *sidpp = sidp; 19677c478bd9Sstevel@tonic-gate 19687c478bd9Sstevel@tonic-gate /* create minor node based on snapshot number */ 19697c478bd9Sstevel@tonic-gate ASSERT(fssnap_dip != NULL); 19707c478bd9Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d", snapnumber); 19717c478bd9Sstevel@tonic-gate if (ddi_create_minor_node(fssnap_dip, name, S_IFBLK, 19727c478bd9Sstevel@tonic-gate snapnumber, DDI_PSEUDO, 0) != DDI_SUCCESS) { 19737c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_ioctl: could not create " 19747c478bd9Sstevel@tonic-gate "block minor node for snapshot %d.", snapnumber); 19757c478bd9Sstevel@tonic-gate snapnumber = -1; 19767c478bd9Sstevel@tonic-gate goto out; 19777c478bd9Sstevel@tonic-gate } 19787c478bd9Sstevel@tonic-gate 19797c478bd9Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d,raw", snapnumber); 19807c478bd9Sstevel@tonic-gate if (ddi_create_minor_node(fssnap_dip, name, S_IFCHR, 19817c478bd9Sstevel@tonic-gate snapnumber, DDI_PSEUDO, 0) != DDI_SUCCESS) { 19827c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_ioctl: could not create " 19837c478bd9Sstevel@tonic-gate "character minor node for snapshot %d.", snapnumber); 19847c478bd9Sstevel@tonic-gate snapnumber = -1; 19857c478bd9Sstevel@tonic-gate } 19867c478bd9Sstevel@tonic-gate 19877c478bd9Sstevel@tonic-gate out: 19887c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 19897c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 19907c478bd9Sstevel@tonic-gate 19917c478bd9Sstevel@tonic-gate /* let the taskq threads start processing */ 19927c478bd9Sstevel@tonic-gate taskq_resume(cowp->cow_taskq); 19937c478bd9Sstevel@tonic-gate 19947c478bd9Sstevel@tonic-gate return (snapnumber); 19957c478bd9Sstevel@tonic-gate } 19967c478bd9Sstevel@tonic-gate 19977c478bd9Sstevel@tonic-gate /* 19987c478bd9Sstevel@tonic-gate * fssnap_delete_impl() - delete a snapshot 19997c478bd9Sstevel@tonic-gate * 20007c478bd9Sstevel@tonic-gate * used when a snapshot is no longer needed. This is called by the file 20017c478bd9Sstevel@tonic-gate * system when it receives an ioctl request to delete a snapshot. It is 20027c478bd9Sstevel@tonic-gate * also called internally when error conditions such as disk full, errors 20037c478bd9Sstevel@tonic-gate * writing to the backing file, or backing file maxsize exceeded occur. 20047c478bd9Sstevel@tonic-gate * If the snapshot device is busy when the delete request is received, 20057c478bd9Sstevel@tonic-gate * all state will be deleted except for the soft state and device files 20067c478bd9Sstevel@tonic-gate * associated with the snapshot; they will be deleted when the snapshot 20077c478bd9Sstevel@tonic-gate * device is closed. 20087c478bd9Sstevel@tonic-gate * 20097c478bd9Sstevel@tonic-gate * NOTE this function takes a POINTER TO A POINTER to the snapshot id, 20107c478bd9Sstevel@tonic-gate * and expects to be able to set the handle held by the file system to 20117c478bd9Sstevel@tonic-gate * NULL. This depends on the file system checking that variable for NULL 20127c478bd9Sstevel@tonic-gate * before calling fssnap_strategy(). 20137c478bd9Sstevel@tonic-gate */ 20147c478bd9Sstevel@tonic-gate static int 20157c478bd9Sstevel@tonic-gate fssnap_delete_impl(void *snapshot_id) 20167c478bd9Sstevel@tonic-gate { 20177c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp = (struct snapshot_id **)snapshot_id; 20187c478bd9Sstevel@tonic-gate struct snapshot_id *sidp; 20197c478bd9Sstevel@tonic-gate struct snapshot_id **statesidpp; 20207c478bd9Sstevel@tonic-gate struct cow_info *cowp; 20217c478bd9Sstevel@tonic-gate struct cow_map *cmap; 20227c478bd9Sstevel@tonic-gate char name[20]; 20237c478bd9Sstevel@tonic-gate int snapnumber = -1; 20247c478bd9Sstevel@tonic-gate vnode_t **vpp; 20257c478bd9Sstevel@tonic-gate 20267c478bd9Sstevel@tonic-gate /* 20277c478bd9Sstevel@tonic-gate * sidp is guaranteed to be valid if sidpp is valid because 20287c478bd9Sstevel@tonic-gate * the snapshot list is append-only. 20297c478bd9Sstevel@tonic-gate */ 20307c478bd9Sstevel@tonic-gate if (sidpp == NULL) { 20317c478bd9Sstevel@tonic-gate return (-1); 20327c478bd9Sstevel@tonic-gate } 20337c478bd9Sstevel@tonic-gate 20347c478bd9Sstevel@tonic-gate sidp = *sidpp; 20357c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 20367c478bd9Sstevel@tonic-gate 20377c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&sidp->sid_rwlock)); 20387c478bd9Sstevel@tonic-gate 20397c478bd9Sstevel@tonic-gate /* 20407c478bd9Sstevel@tonic-gate * double check that the snapshot is still valid for THIS file system 20417c478bd9Sstevel@tonic-gate */ 20427c478bd9Sstevel@tonic-gate if (*sidpp == NULL) { 20437c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 20447c478bd9Sstevel@tonic-gate return (-1); 20457c478bd9Sstevel@tonic-gate } 20467c478bd9Sstevel@tonic-gate 20477c478bd9Sstevel@tonic-gate /* 20487c478bd9Sstevel@tonic-gate * Now we know the snapshot is still valid and will not go away 20497c478bd9Sstevel@tonic-gate * because we have the write lock. Once the state is transitioned 20507c478bd9Sstevel@tonic-gate * to "disabling", the sid_rwlock can be released. Any pending I/O 20517c478bd9Sstevel@tonic-gate * waiting for the lock as a reader will check for this state and 20527c478bd9Sstevel@tonic-gate * abort without touching data that may be getting freed. 20537c478bd9Sstevel@tonic-gate */ 20547c478bd9Sstevel@tonic-gate sidp->sid_flags |= SID_DISABLING; 20557c478bd9Sstevel@tonic-gate if (sidp->sid_flags & SID_DELETE) { 20567c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "Snapshot %d automatically deleted.", 20577c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 20587c478bd9Sstevel@tonic-gate sidp->sid_flags &= ~(SID_DELETE); 20597c478bd9Sstevel@tonic-gate } 20607c478bd9Sstevel@tonic-gate 20617c478bd9Sstevel@tonic-gate 20627c478bd9Sstevel@tonic-gate /* 20637c478bd9Sstevel@tonic-gate * This is pointing into file system specific data! The assumption is 20647c478bd9Sstevel@tonic-gate * that fssnap_strategy() gets called from the file system based on 20657c478bd9Sstevel@tonic-gate * whether this reference to the snapshot_id is NULL or not. So 20667c478bd9Sstevel@tonic-gate * setting this to NULL should disable snapshots for the file system. 20677c478bd9Sstevel@tonic-gate */ 20687c478bd9Sstevel@tonic-gate *sidpp = NULL; 20697c478bd9Sstevel@tonic-gate 20707c478bd9Sstevel@tonic-gate /* remove cowinfo */ 20717c478bd9Sstevel@tonic-gate cowp = sidp->sid_cowinfo; 20727c478bd9Sstevel@tonic-gate if (cowp == NULL) { 20737c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 20747c478bd9Sstevel@tonic-gate return (-1); 20757c478bd9Sstevel@tonic-gate } 20767c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 20777c478bd9Sstevel@tonic-gate 20787c478bd9Sstevel@tonic-gate /* destroy task queues first so they don't reference freed data. */ 20797c478bd9Sstevel@tonic-gate if (cowp->cow_taskq) { 20807c478bd9Sstevel@tonic-gate taskq_destroy(cowp->cow_taskq); 20817c478bd9Sstevel@tonic-gate cowp->cow_taskq = NULL; 20827c478bd9Sstevel@tonic-gate } 20837c478bd9Sstevel@tonic-gate 20847c478bd9Sstevel@tonic-gate if (cowp->cow_backfile_array != NULL) { 20857c478bd9Sstevel@tonic-gate for (vpp = cowp->cow_backfile_array; *vpp; vpp++) 20867c478bd9Sstevel@tonic-gate VN_RELE(*vpp); 20877c478bd9Sstevel@tonic-gate kmem_free(cowp->cow_backfile_array, 20887c478bd9Sstevel@tonic-gate (cowp->cow_backcount + 1) * sizeof (vnode_t *)); 20897c478bd9Sstevel@tonic-gate cowp->cow_backfile_array = NULL; 20907c478bd9Sstevel@tonic-gate } 20917c478bd9Sstevel@tonic-gate 20927c478bd9Sstevel@tonic-gate sidp->sid_cowinfo = NULL; 20937c478bd9Sstevel@tonic-gate 20947c478bd9Sstevel@tonic-gate /* remove cmap */ 20957c478bd9Sstevel@tonic-gate cmap = &cowp->cow_map; 20967c478bd9Sstevel@tonic-gate ASSERT(cmap); 20977c478bd9Sstevel@tonic-gate 20987c478bd9Sstevel@tonic-gate if (cmap->cmap_candidate) 20997c478bd9Sstevel@tonic-gate kmem_free(cmap->cmap_candidate, cmap->cmap_bmsize); 21007c478bd9Sstevel@tonic-gate 21017c478bd9Sstevel@tonic-gate if (cmap->cmap_hastrans) 21027c478bd9Sstevel@tonic-gate kmem_free(cmap->cmap_hastrans, cmap->cmap_bmsize); 21037c478bd9Sstevel@tonic-gate 21047c478bd9Sstevel@tonic-gate if (cmap->cmap_table) 21057c478bd9Sstevel@tonic-gate transtbl_free(&cowp->cow_map); 21067c478bd9Sstevel@tonic-gate 21077c478bd9Sstevel@tonic-gate rw_destroy(&cmap->cmap_rwlock); 21087c478bd9Sstevel@tonic-gate 21097c478bd9Sstevel@tonic-gate while (cmap->cmap_waiters) { 21107c478bd9Sstevel@tonic-gate sema_p(&cmap->cmap_throttle_sem); 21117c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 21127c478bd9Sstevel@tonic-gate } 21137c478bd9Sstevel@tonic-gate sema_destroy(&cmap->cmap_throttle_sem); 21147c478bd9Sstevel@tonic-gate 21157c478bd9Sstevel@tonic-gate /* remove kstats */ 21167c478bd9Sstevel@tonic-gate fssnap_delete_kstats(cowp); 21177c478bd9Sstevel@tonic-gate 21187c478bd9Sstevel@tonic-gate kmem_free(cowp, sizeof (struct cow_info)); 21197c478bd9Sstevel@tonic-gate 21207c478bd9Sstevel@tonic-gate statesidpp = ddi_get_soft_state(statep, sidp->sid_snapnumber); 21217c478bd9Sstevel@tonic-gate if (statesidpp == NULL || *statesidpp == NULL) { 21227c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 21237c478bd9Sstevel@tonic-gate "fssnap_delete_impl: could not find state for snapshot %d.", 21247c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 21257c478bd9Sstevel@tonic-gate } 21267c478bd9Sstevel@tonic-gate ASSERT(*statesidpp == sidp); 21277c478bd9Sstevel@tonic-gate 21287c478bd9Sstevel@tonic-gate /* 21297c478bd9Sstevel@tonic-gate * Leave the node in the list marked DISABLED so it can be reused 21307c478bd9Sstevel@tonic-gate * and avoid many race conditions. Return the snapshot number 21317c478bd9Sstevel@tonic-gate * that was deleted. 21327c478bd9Sstevel@tonic-gate */ 21337c478bd9Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 21347c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 21357c478bd9Sstevel@tonic-gate sidp->sid_flags &= ~(SID_DISABLING); 21367c478bd9Sstevel@tonic-gate sidp->sid_flags |= SID_DISABLED; 21377c478bd9Sstevel@tonic-gate VN_RELE(sidp->sid_fvp); 21387c478bd9Sstevel@tonic-gate sidp->sid_fvp = NULL; 21397c478bd9Sstevel@tonic-gate snapnumber = sidp->sid_snapnumber; 21407c478bd9Sstevel@tonic-gate 21417c478bd9Sstevel@tonic-gate /* 21427c478bd9Sstevel@tonic-gate * If the snapshot is not busy, free the device info now. Otherwise 21437c478bd9Sstevel@tonic-gate * the device nodes are freed in snap_close() when the device is 21447c478bd9Sstevel@tonic-gate * closed. The sid will not be reused until the device is not busy. 21457c478bd9Sstevel@tonic-gate */ 21467c478bd9Sstevel@tonic-gate if (SID_AVAILABLE(sidp)) { 21477c478bd9Sstevel@tonic-gate /* remove the device nodes */ 21487c478bd9Sstevel@tonic-gate ASSERT(fssnap_dip != NULL); 21497c478bd9Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d", 21507c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 21517c478bd9Sstevel@tonic-gate ddi_remove_minor_node(fssnap_dip, name); 21527c478bd9Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d,raw", 21537c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 21547c478bd9Sstevel@tonic-gate ddi_remove_minor_node(fssnap_dip, name); 21557c478bd9Sstevel@tonic-gate 21567c478bd9Sstevel@tonic-gate /* delete the state structure */ 21577c478bd9Sstevel@tonic-gate ddi_soft_state_free(statep, sidp->sid_snapnumber); 21587c478bd9Sstevel@tonic-gate num_snapshots--; 21597c478bd9Sstevel@tonic-gate } 21607c478bd9Sstevel@tonic-gate 21617c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 21627c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 21637c478bd9Sstevel@tonic-gate 21647c478bd9Sstevel@tonic-gate return (snapnumber); 21657c478bd9Sstevel@tonic-gate } 21667c478bd9Sstevel@tonic-gate 21677c478bd9Sstevel@tonic-gate /* 21687c478bd9Sstevel@tonic-gate * fssnap_create_kstats() - allocate and initialize snapshot kstats 21697c478bd9Sstevel@tonic-gate * 21707c478bd9Sstevel@tonic-gate */ 21717c478bd9Sstevel@tonic-gate static void 21727c478bd9Sstevel@tonic-gate fssnap_create_kstats(snapshot_id_t *sidp, int snapnum, 21737c478bd9Sstevel@tonic-gate const char *mountpoint, const char *backfilename) 21747c478bd9Sstevel@tonic-gate { 21757c478bd9Sstevel@tonic-gate kstat_t *num, *mntpoint, *bfname; 21767c478bd9Sstevel@tonic-gate kstat_named_t *hw; 21777c478bd9Sstevel@tonic-gate struct cow_info *cowp = sidp->sid_cowinfo; 21787c478bd9Sstevel@tonic-gate struct cow_kstat_num *stats; 21797c478bd9Sstevel@tonic-gate 21807c478bd9Sstevel@tonic-gate /* update the high water mark */ 21817c478bd9Sstevel@tonic-gate if (fssnap_highwater_kstat == NULL) { 21827c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: failed to lookup " 21837c478bd9Sstevel@tonic-gate "high water mark kstat."); 21847c478bd9Sstevel@tonic-gate return; 21857c478bd9Sstevel@tonic-gate } 21867c478bd9Sstevel@tonic-gate 21877c478bd9Sstevel@tonic-gate hw = (kstat_named_t *)fssnap_highwater_kstat->ks_data; 21887c478bd9Sstevel@tonic-gate if (hw->value.ui32 < snapnum) 21897c478bd9Sstevel@tonic-gate hw->value.ui32 = snapnum; 21907c478bd9Sstevel@tonic-gate 21917c478bd9Sstevel@tonic-gate /* initialize the mount point kstat */ 21927c478bd9Sstevel@tonic-gate kstat_delete_byname(snapname, snapnum, FSSNAP_KSTAT_MNTPT); 21937c478bd9Sstevel@tonic-gate 21947c478bd9Sstevel@tonic-gate if (mountpoint != NULL) { 21957c478bd9Sstevel@tonic-gate mntpoint = kstat_create(snapname, snapnum, FSSNAP_KSTAT_MNTPT, 21967c478bd9Sstevel@tonic-gate "misc", KSTAT_TYPE_RAW, strlen(mountpoint) + 1, 0); 21977c478bd9Sstevel@tonic-gate if (mntpoint == NULL) { 21987c478bd9Sstevel@tonic-gate cowp->cow_kstat_mntpt = NULL; 21997c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: failed to " 22007c478bd9Sstevel@tonic-gate "create mount point kstat"); 22017c478bd9Sstevel@tonic-gate } else { 22027c478bd9Sstevel@tonic-gate (void) strncpy(mntpoint->ks_data, mountpoint, 22037c478bd9Sstevel@tonic-gate strlen(mountpoint)); 22047c478bd9Sstevel@tonic-gate cowp->cow_kstat_mntpt = mntpoint; 22057c478bd9Sstevel@tonic-gate kstat_install(mntpoint); 22067c478bd9Sstevel@tonic-gate } 22077c478bd9Sstevel@tonic-gate } else { 22087c478bd9Sstevel@tonic-gate cowp->cow_kstat_mntpt = NULL; 22097c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: mount point not " 22107c478bd9Sstevel@tonic-gate "specified."); 22117c478bd9Sstevel@tonic-gate } 22127c478bd9Sstevel@tonic-gate 22137c478bd9Sstevel@tonic-gate /* initialize the backing file kstat */ 22147c478bd9Sstevel@tonic-gate kstat_delete_byname(snapname, snapnum, FSSNAP_KSTAT_BFNAME); 22157c478bd9Sstevel@tonic-gate 22167c478bd9Sstevel@tonic-gate if (backfilename == NULL) { 22177c478bd9Sstevel@tonic-gate cowp->cow_kstat_bfname = NULL; 22187c478bd9Sstevel@tonic-gate } else { 22197c478bd9Sstevel@tonic-gate bfname = kstat_create(snapname, snapnum, FSSNAP_KSTAT_BFNAME, 22207c478bd9Sstevel@tonic-gate "misc", KSTAT_TYPE_RAW, strlen(backfilename) + 1, 0); 22217c478bd9Sstevel@tonic-gate if (bfname != NULL) { 22227c478bd9Sstevel@tonic-gate (void) strncpy(bfname->ks_data, backfilename, 22237c478bd9Sstevel@tonic-gate strlen(backfilename)); 22247c478bd9Sstevel@tonic-gate cowp->cow_kstat_bfname = bfname; 22257c478bd9Sstevel@tonic-gate kstat_install(bfname); 22267c478bd9Sstevel@tonic-gate } else { 22277c478bd9Sstevel@tonic-gate cowp->cow_kstat_bfname = NULL; 22287c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: failed to " 22297c478bd9Sstevel@tonic-gate "create backing file name kstat"); 22307c478bd9Sstevel@tonic-gate } 22317c478bd9Sstevel@tonic-gate } 22327c478bd9Sstevel@tonic-gate 22337c478bd9Sstevel@tonic-gate /* initialize numeric kstats */ 22347c478bd9Sstevel@tonic-gate kstat_delete_byname(snapname, snapnum, FSSNAP_KSTAT_NUM); 22357c478bd9Sstevel@tonic-gate 22367c478bd9Sstevel@tonic-gate num = kstat_create(snapname, snapnum, FSSNAP_KSTAT_NUM, 22377c478bd9Sstevel@tonic-gate "misc", KSTAT_TYPE_NAMED, 22387c478bd9Sstevel@tonic-gate sizeof (struct cow_kstat_num) / sizeof (kstat_named_t), 22397c478bd9Sstevel@tonic-gate 0); 22407c478bd9Sstevel@tonic-gate if (num == NULL) { 22417c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: failed to create " 22427c478bd9Sstevel@tonic-gate "numeric kstats"); 22437c478bd9Sstevel@tonic-gate cowp->cow_kstat_num = NULL; 22447c478bd9Sstevel@tonic-gate return; 22457c478bd9Sstevel@tonic-gate } 22467c478bd9Sstevel@tonic-gate 22477c478bd9Sstevel@tonic-gate cowp->cow_kstat_num = num; 22487c478bd9Sstevel@tonic-gate stats = num->ks_data; 22497c478bd9Sstevel@tonic-gate num->ks_update = fssnap_update_kstat_num; 22507c478bd9Sstevel@tonic-gate num->ks_private = sidp; 22517c478bd9Sstevel@tonic-gate 22527c478bd9Sstevel@tonic-gate kstat_named_init(&stats->ckn_state, FSSNAP_KSTAT_NUM_STATE, 22537c478bd9Sstevel@tonic-gate KSTAT_DATA_INT32); 22547c478bd9Sstevel@tonic-gate kstat_named_init(&stats->ckn_bfsize, FSSNAP_KSTAT_NUM_BFSIZE, 22557c478bd9Sstevel@tonic-gate KSTAT_DATA_UINT64); 22567c478bd9Sstevel@tonic-gate kstat_named_init(&stats->ckn_maxsize, FSSNAP_KSTAT_NUM_MAXSIZE, 22577c478bd9Sstevel@tonic-gate KSTAT_DATA_UINT64); 22587c478bd9Sstevel@tonic-gate kstat_named_init(&stats->ckn_createtime, FSSNAP_KSTAT_NUM_CREATETIME, 22597c478bd9Sstevel@tonic-gate KSTAT_DATA_LONG); 22607c478bd9Sstevel@tonic-gate kstat_named_init(&stats->ckn_chunksize, FSSNAP_KSTAT_NUM_CHUNKSIZE, 22617c478bd9Sstevel@tonic-gate KSTAT_DATA_UINT32); 22627c478bd9Sstevel@tonic-gate 22637c478bd9Sstevel@tonic-gate /* initialize the static kstats */ 22647c478bd9Sstevel@tonic-gate stats->ckn_chunksize.value.ui32 = cowp->cow_map.cmap_chunksz; 22657c478bd9Sstevel@tonic-gate stats->ckn_maxsize.value.ui64 = cowp->cow_map.cmap_maxsize; 22667c478bd9Sstevel@tonic-gate stats->ckn_createtime.value.l = gethrestime_sec(); 22677c478bd9Sstevel@tonic-gate 22687c478bd9Sstevel@tonic-gate kstat_install(num); 22697c478bd9Sstevel@tonic-gate } 22707c478bd9Sstevel@tonic-gate 22717c478bd9Sstevel@tonic-gate /* 22727c478bd9Sstevel@tonic-gate * fssnap_update_kstat_num() - update a numerical snapshot kstat value 22737c478bd9Sstevel@tonic-gate * 22747c478bd9Sstevel@tonic-gate */ 22757c478bd9Sstevel@tonic-gate int 22767c478bd9Sstevel@tonic-gate fssnap_update_kstat_num(kstat_t *ksp, int rw) 22777c478bd9Sstevel@tonic-gate { 22787c478bd9Sstevel@tonic-gate snapshot_id_t *sidp = (snapshot_id_t *)ksp->ks_private; 22797c478bd9Sstevel@tonic-gate struct cow_info *cowp = sidp->sid_cowinfo; 22807c478bd9Sstevel@tonic-gate struct cow_kstat_num *stats = ksp->ks_data; 22817c478bd9Sstevel@tonic-gate 22827c478bd9Sstevel@tonic-gate if (rw == KSTAT_WRITE) 22837c478bd9Sstevel@tonic-gate return (EACCES); 22847c478bd9Sstevel@tonic-gate 22857c478bd9Sstevel@tonic-gate /* state */ 22867c478bd9Sstevel@tonic-gate if (sidp->sid_flags & SID_CREATING) 22877c478bd9Sstevel@tonic-gate stats->ckn_state.value.i32 = COWSTATE_CREATING; 22887c478bd9Sstevel@tonic-gate else if (SID_INACTIVE(sidp)) 22897c478bd9Sstevel@tonic-gate stats->ckn_state.value.i32 = COWSTATE_DISABLED; 22907c478bd9Sstevel@tonic-gate else if (SID_BUSY(sidp)) 22917c478bd9Sstevel@tonic-gate stats->ckn_state.value.i32 = COWSTATE_ACTIVE; 22927c478bd9Sstevel@tonic-gate else 22937c478bd9Sstevel@tonic-gate stats->ckn_state.value.i32 = COWSTATE_IDLE; 22947c478bd9Sstevel@tonic-gate 22957c478bd9Sstevel@tonic-gate /* bfsize */ 22967c478bd9Sstevel@tonic-gate stats->ckn_bfsize.value.ui64 = cowp->cow_map.cmap_nchunks * 22977c478bd9Sstevel@tonic-gate cowp->cow_map.cmap_chunksz; 22987c478bd9Sstevel@tonic-gate 22997c478bd9Sstevel@tonic-gate return (0); 23007c478bd9Sstevel@tonic-gate } 23017c478bd9Sstevel@tonic-gate 23027c478bd9Sstevel@tonic-gate /* 23037c478bd9Sstevel@tonic-gate * fssnap_delete_kstats() - deallocate snapshot kstats 23047c478bd9Sstevel@tonic-gate * 23057c478bd9Sstevel@tonic-gate */ 23067c478bd9Sstevel@tonic-gate void 23077c478bd9Sstevel@tonic-gate fssnap_delete_kstats(struct cow_info *cowp) 23087c478bd9Sstevel@tonic-gate { 23097c478bd9Sstevel@tonic-gate if (cowp->cow_kstat_num != NULL) { 23107c478bd9Sstevel@tonic-gate kstat_delete(cowp->cow_kstat_num); 23117c478bd9Sstevel@tonic-gate cowp->cow_kstat_num = NULL; 23127c478bd9Sstevel@tonic-gate } 23137c478bd9Sstevel@tonic-gate if (cowp->cow_kstat_mntpt != NULL) { 23147c478bd9Sstevel@tonic-gate kstat_delete(cowp->cow_kstat_mntpt); 23157c478bd9Sstevel@tonic-gate cowp->cow_kstat_mntpt = NULL; 23167c478bd9Sstevel@tonic-gate } 23177c478bd9Sstevel@tonic-gate if (cowp->cow_kstat_bfname != NULL) { 23187c478bd9Sstevel@tonic-gate kstat_delete(cowp->cow_kstat_bfname); 23197c478bd9Sstevel@tonic-gate cowp->cow_kstat_bfname = NULL; 23207c478bd9Sstevel@tonic-gate } 23217c478bd9Sstevel@tonic-gate } 2322