17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*ddfcde86Srsb * Common Development and Distribution License (the "License"). 6*ddfcde86Srsb * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*ddfcde86Srsb * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 277c478bd9Sstevel@tonic-gate 287c478bd9Sstevel@tonic-gate #include <sys/debug.h> 297c478bd9Sstevel@tonic-gate #include <sys/types.h> 307c478bd9Sstevel@tonic-gate #include <sys/file.h> 317c478bd9Sstevel@tonic-gate #include <sys/errno.h> 327c478bd9Sstevel@tonic-gate #include <sys/uio.h> 337c478bd9Sstevel@tonic-gate #include <sys/open.h> 347c478bd9Sstevel@tonic-gate #include <sys/cred.h> 357c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 367c478bd9Sstevel@tonic-gate #include <sys/conf.h> 377c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 387c478bd9Sstevel@tonic-gate #include <sys/modctl.h> 397c478bd9Sstevel@tonic-gate #include <sys/disp.h> 407c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 417c478bd9Sstevel@tonic-gate #include <sys/filio.h> 427c478bd9Sstevel@tonic-gate #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */ 437c478bd9Sstevel@tonic-gate #include <sys/kstat.h> 447c478bd9Sstevel@tonic-gate 457c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 467c478bd9Sstevel@tonic-gate #include <sys/devops.h> 477c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 487c478bd9Sstevel@tonic-gate #include <sys/priv_names.h> 497c478bd9Sstevel@tonic-gate 507c478bd9Sstevel@tonic-gate #include <sys/fssnap.h> 517c478bd9Sstevel@tonic-gate #include <sys/fssnap_if.h> 527c478bd9Sstevel@tonic-gate 537c478bd9Sstevel@tonic-gate /* 547c478bd9Sstevel@tonic-gate * This module implements the file system snapshot code, which provides a 557c478bd9Sstevel@tonic-gate * point-in-time image of a file system for the purposes of online backup. 567c478bd9Sstevel@tonic-gate * There are essentially two parts to this project: the driver half and the 577c478bd9Sstevel@tonic-gate * file system half. The driver half is a pseudo device driver called 587c478bd9Sstevel@tonic-gate * "fssnap" that represents the snapshot. Each snapshot is assigned a 597c478bd9Sstevel@tonic-gate * number that corresponds to the minor number of the device, and a control 607c478bd9Sstevel@tonic-gate * device with a high minor number is used to initiate snapshot creation and 617c478bd9Sstevel@tonic-gate * deletion. For all practical purposes the driver half acts like a 627c478bd9Sstevel@tonic-gate * read-only disk device whose contents are exactly the same as the master 637c478bd9Sstevel@tonic-gate * file system at the time the snapshot was created. 647c478bd9Sstevel@tonic-gate * 657c478bd9Sstevel@tonic-gate * The file system half provides interfaces necessary for performing the 667c478bd9Sstevel@tonic-gate * file system dependent operations required to create and delete snapshots 677c478bd9Sstevel@tonic-gate * and a special driver strategy routine that must always be used by the file 687c478bd9Sstevel@tonic-gate * system for snapshots to work correctly. 697c478bd9Sstevel@tonic-gate * 707c478bd9Sstevel@tonic-gate * When a snapshot is to be created, the user utility will send an ioctl to 717c478bd9Sstevel@tonic-gate * the control device of the driver half specifying the file system to be 727c478bd9Sstevel@tonic-gate * snapshotted, the file descriptor of a backing-store file which is used to 737c478bd9Sstevel@tonic-gate * hold old data before it is overwritten, and other snapshot parameters. 747c478bd9Sstevel@tonic-gate * This ioctl is passed on to the file system specified in the original 757c478bd9Sstevel@tonic-gate * ioctl request. The file system is expected to be able to flush 767c478bd9Sstevel@tonic-gate * everything out to make the file system consistent and lock it to ensure 777c478bd9Sstevel@tonic-gate * no changes occur while the snapshot is being created. It then calls 787c478bd9Sstevel@tonic-gate * fssnap_create() to create state for a new snapshot, from which an opaque 797c478bd9Sstevel@tonic-gate * handle is returned with the snapshot locked. Next, the file system must 807c478bd9Sstevel@tonic-gate * populate the "candidate bitmap", which tells the snapshot code which 817c478bd9Sstevel@tonic-gate * "chunks" should be considered for copy-on-write (a chunk is the unit of 827c478bd9Sstevel@tonic-gate * granularity used for copy-on-write, which is independent of the device 837c478bd9Sstevel@tonic-gate * and file system block sizes). This is typically done by scanning the 847c478bd9Sstevel@tonic-gate * file system allocation bitmaps to determine which chunks contain 857c478bd9Sstevel@tonic-gate * allocated blocks in the file system at the time the snapshot was created. 867c478bd9Sstevel@tonic-gate * If a chunk has no allocated blocks, it does not need to be copied before 877c478bd9Sstevel@tonic-gate * being written to. Once the candidate bitmap is populated with 887c478bd9Sstevel@tonic-gate * fssnap_set_candidate(), the file system calls fssnap_create_done() to 897c478bd9Sstevel@tonic-gate * complete the snapshot creation and unlock the snapshot. The file system 907c478bd9Sstevel@tonic-gate * may now be unlocked and modifications to it resumed. 917c478bd9Sstevel@tonic-gate * 927c478bd9Sstevel@tonic-gate * Once a snapshot is created, the file system must perform all writes 937c478bd9Sstevel@tonic-gate * through a special strategy routine, fssnap_strategy(). This strategy 947c478bd9Sstevel@tonic-gate * routine determines whether the chunks contained by the write must be 957c478bd9Sstevel@tonic-gate * copied before being overwritten by consulting the candidate bitmap 967c478bd9Sstevel@tonic-gate * described above, and the "hastrans bitmap" which tells it whether the chunk 977c478bd9Sstevel@tonic-gate * has been copied already or not. If the chunk is a candidate but has not 987c478bd9Sstevel@tonic-gate * been copied, it reads the old data in and adds it to a queue. The 997c478bd9Sstevel@tonic-gate * old data can then be overwritten with the new data. An asynchronous 1007c478bd9Sstevel@tonic-gate * task queue is dispatched for each old chunk read in which writes the old 1017c478bd9Sstevel@tonic-gate * data to the backing file specified at snapshot creation time. The 1027c478bd9Sstevel@tonic-gate * backing file is a sparse file the same size as the file system that 1037c478bd9Sstevel@tonic-gate * contains the old data at the offset that data originally had in the 1047c478bd9Sstevel@tonic-gate * file system. If the queue containing in-memory chunks gets too large, 1057c478bd9Sstevel@tonic-gate * writes to the file system may be throttled by a semaphore until the 1067c478bd9Sstevel@tonic-gate * task queues have a chance to push some of the chunks to the backing file. 1077c478bd9Sstevel@tonic-gate * 1087c478bd9Sstevel@tonic-gate * With the candidate bitmap, the hastrans bitmap, the data on the master 1097c478bd9Sstevel@tonic-gate * file system, and the old data in memory and in the backing file, the 1107c478bd9Sstevel@tonic-gate * snapshot pseudo-driver can piece together the original file system 1117c478bd9Sstevel@tonic-gate * information to satisfy read requests. If the requested chunk is not a 1127c478bd9Sstevel@tonic-gate * candidate, it returns a zeroed buffer. If the chunk is a candidate but 1137c478bd9Sstevel@tonic-gate * has not been copied it reads it from the master file system. If it is a 1147c478bd9Sstevel@tonic-gate * candidate and has been copied, it either copies the data from the 1157c478bd9Sstevel@tonic-gate * in-memory queue or it reads it in from the backing file. The result is 1167c478bd9Sstevel@tonic-gate * a replication of the original file system that can be backed up, mounted, 1177c478bd9Sstevel@tonic-gate * or manipulated by other file system utilities that work on a read-only 1187c478bd9Sstevel@tonic-gate * device. 1197c478bd9Sstevel@tonic-gate * 1207c478bd9Sstevel@tonic-gate * This module is divided into three roughly logical sections: 1217c478bd9Sstevel@tonic-gate * 1227c478bd9Sstevel@tonic-gate * - The snapshot driver, which is a character/block driver 1237c478bd9Sstevel@tonic-gate * representing the snapshot itself. These routines are 1247c478bd9Sstevel@tonic-gate * prefixed with "snap_". 1257c478bd9Sstevel@tonic-gate * 1267c478bd9Sstevel@tonic-gate * - The library routines that are defined in fssnap_if.h that 1277c478bd9Sstevel@tonic-gate * are used by file systems that use this snapshot implementation. 1287c478bd9Sstevel@tonic-gate * These functions are prefixed with "fssnap_" and are called through 1297c478bd9Sstevel@tonic-gate * a function vector from the file system. 1307c478bd9Sstevel@tonic-gate * 1317c478bd9Sstevel@tonic-gate * - The helper routines used by the snapshot driver and the fssnap 1327c478bd9Sstevel@tonic-gate * library routines for managing the translation table and other 1337c478bd9Sstevel@tonic-gate * useful functions. These routines are all static and are 1347c478bd9Sstevel@tonic-gate * prefixed with either "fssnap_" or "transtbl_" if they 1357c478bd9Sstevel@tonic-gate * are specifically used for translation table activities. 1367c478bd9Sstevel@tonic-gate */ 1377c478bd9Sstevel@tonic-gate 1387c478bd9Sstevel@tonic-gate static dev_info_t *fssnap_dip = NULL; 1397c478bd9Sstevel@tonic-gate static struct snapshot_id *snapshot = NULL; 1407c478bd9Sstevel@tonic-gate static struct snapshot_id snap_ctl; 1417c478bd9Sstevel@tonic-gate static int num_snapshots = 0; 1427c478bd9Sstevel@tonic-gate static kmutex_t snapshot_mutex; 1437c478bd9Sstevel@tonic-gate static char snapname[] = SNAP_NAME; 1447c478bd9Sstevel@tonic-gate 1457c478bd9Sstevel@tonic-gate /* "tunable" parameters */ 1467c478bd9Sstevel@tonic-gate static int fssnap_taskq_nthreads = FSSNAP_TASKQ_THREADS; 1477c478bd9Sstevel@tonic-gate static uint_t fssnap_max_mem_chunks = FSSNAP_MAX_MEM_CHUNKS; 1487c478bd9Sstevel@tonic-gate static int fssnap_taskq_maxtasks = FSSNAP_TASKQ_MAXTASKS; 1497c478bd9Sstevel@tonic-gate 1507c478bd9Sstevel@tonic-gate /* static function prototypes */ 1517c478bd9Sstevel@tonic-gate 1527c478bd9Sstevel@tonic-gate /* snapshot driver */ 1537c478bd9Sstevel@tonic-gate static int snap_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 1547c478bd9Sstevel@tonic-gate static int snap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 1557c478bd9Sstevel@tonic-gate static int snap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 1567c478bd9Sstevel@tonic-gate static int snap_open(dev_t *devp, int flag, int otyp, cred_t *cred); 1577c478bd9Sstevel@tonic-gate static int snap_close(dev_t dev, int flag, int otyp, cred_t *cred); 1587c478bd9Sstevel@tonic-gate static int snap_strategy(struct buf *bp); 1597c478bd9Sstevel@tonic-gate static int snap_read(dev_t dev, struct uio *uiop, cred_t *credp); 1607c478bd9Sstevel@tonic-gate static int snap_print(dev_t dev, char *str); 1617c478bd9Sstevel@tonic-gate static int snap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 1627c478bd9Sstevel@tonic-gate cred_t *credp, int *rvalp); 1637c478bd9Sstevel@tonic-gate static int snap_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, 1647c478bd9Sstevel@tonic-gate int flags, char *name, caddr_t valuep, int *lengthp); 1657c478bd9Sstevel@tonic-gate static int snap_getchunk(struct snapshot_id *sidp, chunknumber_t chunk, 1667c478bd9Sstevel@tonic-gate int offset, int len, char *buffer); 1677c478bd9Sstevel@tonic-gate 1687c478bd9Sstevel@tonic-gate 1697c478bd9Sstevel@tonic-gate /* fssnap interface implementations (see fssnap_if.h) */ 1707c478bd9Sstevel@tonic-gate static void fssnap_strategy_impl(void *, struct buf *); 1717c478bd9Sstevel@tonic-gate static void *fssnap_create_impl(chunknumber_t, uint_t, u_offset_t, 1727c478bd9Sstevel@tonic-gate struct vnode *, int, struct vnode **, char *, u_offset_t); 1737c478bd9Sstevel@tonic-gate static void fssnap_set_candidate_impl(void *, chunknumber_t); 1747c478bd9Sstevel@tonic-gate static int fssnap_is_candidate_impl(void *, u_offset_t); 1757c478bd9Sstevel@tonic-gate static int fssnap_create_done_impl(void *); 1767c478bd9Sstevel@tonic-gate static int fssnap_delete_impl(void *); 1777c478bd9Sstevel@tonic-gate 1787c478bd9Sstevel@tonic-gate /* fssnap interface support routines */ 1797c478bd9Sstevel@tonic-gate static int fssnap_translate(struct snapshot_id **, struct buf *); 1807c478bd9Sstevel@tonic-gate static void fssnap_write_taskq(void *); 1817c478bd9Sstevel@tonic-gate static void fssnap_create_kstats(snapshot_id_t *, int, const char *, 1827c478bd9Sstevel@tonic-gate const char *); 1837c478bd9Sstevel@tonic-gate static int fssnap_update_kstat_num(kstat_t *, int); 1847c478bd9Sstevel@tonic-gate static void fssnap_delete_kstats(struct cow_info *); 1857c478bd9Sstevel@tonic-gate 1867c478bd9Sstevel@tonic-gate /* translation table prototypes */ 1877c478bd9Sstevel@tonic-gate static cow_map_node_t *transtbl_add(cow_map_t *, chunknumber_t, caddr_t); 1887c478bd9Sstevel@tonic-gate static cow_map_node_t *transtbl_get(cow_map_t *, chunknumber_t); 1897c478bd9Sstevel@tonic-gate static void transtbl_delete(cow_map_t *, cow_map_node_t *); 1907c478bd9Sstevel@tonic-gate static void transtbl_free(cow_map_t *); 1917c478bd9Sstevel@tonic-gate 1927c478bd9Sstevel@tonic-gate static kstat_t *fssnap_highwater_kstat; 1937c478bd9Sstevel@tonic-gate 1947c478bd9Sstevel@tonic-gate /* ************************************************************************ */ 1957c478bd9Sstevel@tonic-gate 1967c478bd9Sstevel@tonic-gate /* Device and Module Structures */ 1977c478bd9Sstevel@tonic-gate 1987c478bd9Sstevel@tonic-gate static struct cb_ops snap_cb_ops = { 1997c478bd9Sstevel@tonic-gate snap_open, 2007c478bd9Sstevel@tonic-gate snap_close, 2017c478bd9Sstevel@tonic-gate snap_strategy, 2027c478bd9Sstevel@tonic-gate snap_print, 2037c478bd9Sstevel@tonic-gate nodev, /* no snap_dump */ 2047c478bd9Sstevel@tonic-gate snap_read, 2057c478bd9Sstevel@tonic-gate nodev, /* no snap_write */ 2067c478bd9Sstevel@tonic-gate snap_ioctl, 2077c478bd9Sstevel@tonic-gate nodev, /* no snap_devmap */ 2087c478bd9Sstevel@tonic-gate nodev, /* no snap_mmap */ 2097c478bd9Sstevel@tonic-gate nodev, /* no snap_segmap */ 2107c478bd9Sstevel@tonic-gate nochpoll, 2117c478bd9Sstevel@tonic-gate snap_prop_op, 2127c478bd9Sstevel@tonic-gate NULL, /* streamtab */ 2137c478bd9Sstevel@tonic-gate D_64BIT | D_NEW | D_MP, /* driver compatibility */ 2147c478bd9Sstevel@tonic-gate CB_REV, 2157c478bd9Sstevel@tonic-gate nodev, /* async I/O read entry point */ 2167c478bd9Sstevel@tonic-gate nodev /* async I/O write entry point */ 2177c478bd9Sstevel@tonic-gate }; 2187c478bd9Sstevel@tonic-gate 2197c478bd9Sstevel@tonic-gate static struct dev_ops snap_ops = { 2207c478bd9Sstevel@tonic-gate DEVO_REV, 2217c478bd9Sstevel@tonic-gate 0, /* ref count */ 2227c478bd9Sstevel@tonic-gate snap_getinfo, 2237c478bd9Sstevel@tonic-gate nulldev, /* snap_identify obsolete */ 2247c478bd9Sstevel@tonic-gate nulldev, /* no snap_probe */ 2257c478bd9Sstevel@tonic-gate snap_attach, 2267c478bd9Sstevel@tonic-gate snap_detach, 2277c478bd9Sstevel@tonic-gate nodev, /* no snap_reset */ 2287c478bd9Sstevel@tonic-gate &snap_cb_ops, 2297c478bd9Sstevel@tonic-gate (struct bus_ops *)NULL, 2307c478bd9Sstevel@tonic-gate nulldev /* no snap_power() */ 2317c478bd9Sstevel@tonic-gate }; 2327c478bd9Sstevel@tonic-gate 2337c478bd9Sstevel@tonic-gate extern struct mod_ops mod_driverops; 2347c478bd9Sstevel@tonic-gate 2357c478bd9Sstevel@tonic-gate static struct modldrv md = { 2367c478bd9Sstevel@tonic-gate &mod_driverops, /* Type of module. This is a driver */ 2377c478bd9Sstevel@tonic-gate "snapshot driver %I%", /* Name of the module */ 2387c478bd9Sstevel@tonic-gate &snap_ops, 2397c478bd9Sstevel@tonic-gate }; 2407c478bd9Sstevel@tonic-gate 2417c478bd9Sstevel@tonic-gate static struct modlinkage ml = { 2427c478bd9Sstevel@tonic-gate MODREV_1, 2437c478bd9Sstevel@tonic-gate &md, 2447c478bd9Sstevel@tonic-gate NULL 2457c478bd9Sstevel@tonic-gate }; 2467c478bd9Sstevel@tonic-gate 2477c478bd9Sstevel@tonic-gate static void *statep; 2487c478bd9Sstevel@tonic-gate 2497c478bd9Sstevel@tonic-gate int 2507c478bd9Sstevel@tonic-gate _init(void) 2517c478bd9Sstevel@tonic-gate { 2527c478bd9Sstevel@tonic-gate int error; 2537c478bd9Sstevel@tonic-gate kstat_t *ksp; 2547c478bd9Sstevel@tonic-gate kstat_named_t *ksdata; 2557c478bd9Sstevel@tonic-gate 2567c478bd9Sstevel@tonic-gate error = ddi_soft_state_init(&statep, sizeof (struct snapshot_id *), 1); 2577c478bd9Sstevel@tonic-gate if (error) { 2587c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "_init: failed to init ddi_soft_state."); 2597c478bd9Sstevel@tonic-gate return (error); 2607c478bd9Sstevel@tonic-gate } 2617c478bd9Sstevel@tonic-gate 2627c478bd9Sstevel@tonic-gate error = mod_install(&ml); 2637c478bd9Sstevel@tonic-gate 2647c478bd9Sstevel@tonic-gate if (error) { 2657c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "_init: failed to mod_install."); 2667c478bd9Sstevel@tonic-gate ddi_soft_state_fini(&statep); 2677c478bd9Sstevel@tonic-gate return (error); 2687c478bd9Sstevel@tonic-gate } 2697c478bd9Sstevel@tonic-gate 2707c478bd9Sstevel@tonic-gate /* 2717c478bd9Sstevel@tonic-gate * Fill in the snapshot operations vector for file systems 2727c478bd9Sstevel@tonic-gate * (defined in fssnap_if.c) 2737c478bd9Sstevel@tonic-gate */ 2747c478bd9Sstevel@tonic-gate 2757c478bd9Sstevel@tonic-gate snapops.fssnap_create = fssnap_create_impl; 2767c478bd9Sstevel@tonic-gate snapops.fssnap_set_candidate = fssnap_set_candidate_impl; 2777c478bd9Sstevel@tonic-gate snapops.fssnap_is_candidate = fssnap_is_candidate_impl; 2787c478bd9Sstevel@tonic-gate snapops.fssnap_create_done = fssnap_create_done_impl; 2797c478bd9Sstevel@tonic-gate snapops.fssnap_delete = fssnap_delete_impl; 2807c478bd9Sstevel@tonic-gate snapops.fssnap_strategy = fssnap_strategy_impl; 2817c478bd9Sstevel@tonic-gate 2827c478bd9Sstevel@tonic-gate mutex_init(&snapshot_mutex, NULL, MUTEX_DEFAULT, NULL); 2837c478bd9Sstevel@tonic-gate 2847c478bd9Sstevel@tonic-gate /* 2857c478bd9Sstevel@tonic-gate * Initialize the fssnap highwater kstat 2867c478bd9Sstevel@tonic-gate */ 2877c478bd9Sstevel@tonic-gate ksp = kstat_create(snapname, 0, FSSNAP_KSTAT_HIGHWATER, "misc", 2887c478bd9Sstevel@tonic-gate KSTAT_TYPE_NAMED, 1, 0); 2897c478bd9Sstevel@tonic-gate if (ksp != NULL) { 2907c478bd9Sstevel@tonic-gate ksdata = (kstat_named_t *)ksp->ks_data; 2917c478bd9Sstevel@tonic-gate kstat_named_init(ksdata, FSSNAP_KSTAT_HIGHWATER, 2927c478bd9Sstevel@tonic-gate KSTAT_DATA_UINT32); 2937c478bd9Sstevel@tonic-gate ksdata->value.ui32 = 0; 2947c478bd9Sstevel@tonic-gate kstat_install(ksp); 2957c478bd9Sstevel@tonic-gate } else { 2967c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "_init: failed to create highwater kstat."); 2977c478bd9Sstevel@tonic-gate } 2987c478bd9Sstevel@tonic-gate fssnap_highwater_kstat = ksp; 2997c478bd9Sstevel@tonic-gate 3007c478bd9Sstevel@tonic-gate return (0); 3017c478bd9Sstevel@tonic-gate } 3027c478bd9Sstevel@tonic-gate 3037c478bd9Sstevel@tonic-gate int 3047c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop) 3057c478bd9Sstevel@tonic-gate { 3067c478bd9Sstevel@tonic-gate return (mod_info(&ml, modinfop)); 3077c478bd9Sstevel@tonic-gate } 3087c478bd9Sstevel@tonic-gate 3097c478bd9Sstevel@tonic-gate int 3107c478bd9Sstevel@tonic-gate _fini(void) 3117c478bd9Sstevel@tonic-gate { 3127c478bd9Sstevel@tonic-gate int error; 3137c478bd9Sstevel@tonic-gate 3147c478bd9Sstevel@tonic-gate error = mod_remove(&ml); 3157c478bd9Sstevel@tonic-gate if (error) 3167c478bd9Sstevel@tonic-gate return (error); 3177c478bd9Sstevel@tonic-gate ddi_soft_state_fini(&statep); 3187c478bd9Sstevel@tonic-gate 3197c478bd9Sstevel@tonic-gate /* 3207c478bd9Sstevel@tonic-gate * delete the fssnap highwater kstat 3217c478bd9Sstevel@tonic-gate */ 3227c478bd9Sstevel@tonic-gate kstat_delete(fssnap_highwater_kstat); 3237c478bd9Sstevel@tonic-gate 3247c478bd9Sstevel@tonic-gate mutex_destroy(&snapshot_mutex); 3257c478bd9Sstevel@tonic-gate 3267c478bd9Sstevel@tonic-gate /* Clear out the file system operations vector */ 3277c478bd9Sstevel@tonic-gate snapops.fssnap_create = NULL; 3287c478bd9Sstevel@tonic-gate snapops.fssnap_set_candidate = NULL; 3297c478bd9Sstevel@tonic-gate snapops.fssnap_create_done = NULL; 3307c478bd9Sstevel@tonic-gate snapops.fssnap_delete = NULL; 3317c478bd9Sstevel@tonic-gate snapops.fssnap_strategy = NULL; 3327c478bd9Sstevel@tonic-gate 3337c478bd9Sstevel@tonic-gate return (0); 3347c478bd9Sstevel@tonic-gate } 3357c478bd9Sstevel@tonic-gate 3367c478bd9Sstevel@tonic-gate /* ************************************************************************ */ 3377c478bd9Sstevel@tonic-gate 3387c478bd9Sstevel@tonic-gate /* 3397c478bd9Sstevel@tonic-gate * Snapshot Driver Routines 3407c478bd9Sstevel@tonic-gate * 3417c478bd9Sstevel@tonic-gate * This section implements the snapshot character and block drivers. The 3427c478bd9Sstevel@tonic-gate * device will appear to be a consistent read-only file system to 3437c478bd9Sstevel@tonic-gate * applications that wish to back it up or mount it. The snapshot driver 3447c478bd9Sstevel@tonic-gate * communicates with the file system through the translation table, which 3457c478bd9Sstevel@tonic-gate * tells the snapshot driver where to find the data necessary to piece 3467c478bd9Sstevel@tonic-gate * together the frozen file system. The data may either be on the master 3477c478bd9Sstevel@tonic-gate * device (no translation exists), in memory (a translation exists but has 3487c478bd9Sstevel@tonic-gate * not been flushed to the backing store), or in the backing store file. 3497c478bd9Sstevel@tonic-gate * The read request may require the snapshot driver to retreive data from 3507c478bd9Sstevel@tonic-gate * several different places and piece it together to look like a single 3517c478bd9Sstevel@tonic-gate * contiguous read. 3527c478bd9Sstevel@tonic-gate * 3537c478bd9Sstevel@tonic-gate * The device minor number corresponds to the snapshot number in the list of 3547c478bd9Sstevel@tonic-gate * snapshot identifiers. The soft state for each minor number is simply a 3557c478bd9Sstevel@tonic-gate * pointer to the snapshot id, which holds all of the snapshot state. One 3567c478bd9Sstevel@tonic-gate * minor number is designated as the control device. All snapshot create 3577c478bd9Sstevel@tonic-gate * and delete requests go through the control device to ensure this module 3587c478bd9Sstevel@tonic-gate * is properly loaded and attached before the file system starts calling 3597c478bd9Sstevel@tonic-gate * routines defined here. 3607c478bd9Sstevel@tonic-gate */ 3617c478bd9Sstevel@tonic-gate 3627c478bd9Sstevel@tonic-gate 3637c478bd9Sstevel@tonic-gate /* 3647c478bd9Sstevel@tonic-gate * snap_getinfo() - snapshot driver getinfo(9E) routine 3657c478bd9Sstevel@tonic-gate * 3667c478bd9Sstevel@tonic-gate */ 3677c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 3687c478bd9Sstevel@tonic-gate static int 3697c478bd9Sstevel@tonic-gate snap_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 3707c478bd9Sstevel@tonic-gate { 3717c478bd9Sstevel@tonic-gate switch (infocmd) { 3727c478bd9Sstevel@tonic-gate case DDI_INFO_DEVT2DEVINFO: 3737c478bd9Sstevel@tonic-gate *result = fssnap_dip; 3747c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 3757c478bd9Sstevel@tonic-gate case DDI_INFO_DEVT2INSTANCE: 3767c478bd9Sstevel@tonic-gate *result = 0; /* we only have one instance */ 3777c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 3787c478bd9Sstevel@tonic-gate } 3797c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 3807c478bd9Sstevel@tonic-gate } 3817c478bd9Sstevel@tonic-gate 3827c478bd9Sstevel@tonic-gate /* 3837c478bd9Sstevel@tonic-gate * snap_attach() - snapshot driver attach(9E) routine 3847c478bd9Sstevel@tonic-gate * 3857c478bd9Sstevel@tonic-gate * sets up snapshot control device and control state. The control state 3867c478bd9Sstevel@tonic-gate * is a pointer to an "anonymous" snapshot_id for tracking opens and closes 3877c478bd9Sstevel@tonic-gate */ 3887c478bd9Sstevel@tonic-gate static int 3897c478bd9Sstevel@tonic-gate snap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3907c478bd9Sstevel@tonic-gate { 3917c478bd9Sstevel@tonic-gate int error; 3927c478bd9Sstevel@tonic-gate 3937c478bd9Sstevel@tonic-gate switch (cmd) { 3947c478bd9Sstevel@tonic-gate case DDI_ATTACH: 3957c478bd9Sstevel@tonic-gate /* create the control device */ 3967c478bd9Sstevel@tonic-gate error = ddi_create_priv_minor_node(dip, SNAP_CTL_NODE, S_IFCHR, 3977c478bd9Sstevel@tonic-gate SNAP_CTL_MINOR, DDI_PSEUDO, PRIVONLY_DEV, 3987c478bd9Sstevel@tonic-gate PRIV_SYS_CONFIG, PRIV_SYS_CONFIG, 0666); 3997c478bd9Sstevel@tonic-gate if (error == DDI_FAILURE) { 4007c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 4017c478bd9Sstevel@tonic-gate } 4027c478bd9Sstevel@tonic-gate 4037c478bd9Sstevel@tonic-gate rw_init(&snap_ctl.sid_rwlock, NULL, RW_DEFAULT, NULL); 4047c478bd9Sstevel@tonic-gate rw_enter(&snap_ctl.sid_rwlock, RW_WRITER); 4057c478bd9Sstevel@tonic-gate fssnap_dip = dip; 4067c478bd9Sstevel@tonic-gate snap_ctl.sid_snapnumber = SNAP_CTL_MINOR; 4077c478bd9Sstevel@tonic-gate /* the control sid is not linked into the snapshot list */ 4087c478bd9Sstevel@tonic-gate snap_ctl.sid_next = NULL; 4097c478bd9Sstevel@tonic-gate snap_ctl.sid_cowinfo = NULL; 4107c478bd9Sstevel@tonic-gate snap_ctl.sid_flags = 0; 4117c478bd9Sstevel@tonic-gate rw_exit(&snap_ctl.sid_rwlock); 4127c478bd9Sstevel@tonic-gate ddi_report_dev(dip); 4137c478bd9Sstevel@tonic-gate 4147c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 4157c478bd9Sstevel@tonic-gate case DDI_PM_RESUME: 4167c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 4177c478bd9Sstevel@tonic-gate 4187c478bd9Sstevel@tonic-gate case DDI_RESUME: 4197c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 4207c478bd9Sstevel@tonic-gate 4217c478bd9Sstevel@tonic-gate default: 4227c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 4237c478bd9Sstevel@tonic-gate } 4247c478bd9Sstevel@tonic-gate } 4257c478bd9Sstevel@tonic-gate 4267c478bd9Sstevel@tonic-gate /* 4277c478bd9Sstevel@tonic-gate * snap_detach() - snapshot driver detach(9E) routine 4287c478bd9Sstevel@tonic-gate * 4297c478bd9Sstevel@tonic-gate * destroys snapshot control device and control state. If any snapshots 4307c478bd9Sstevel@tonic-gate * are active (ie. num_snapshots != 0), the device will refuse to detach. 4317c478bd9Sstevel@tonic-gate */ 4327c478bd9Sstevel@tonic-gate static int 4337c478bd9Sstevel@tonic-gate snap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4347c478bd9Sstevel@tonic-gate { 4357c478bd9Sstevel@tonic-gate struct snapshot_id *sidp, *sidnextp; 4367c478bd9Sstevel@tonic-gate 4377c478bd9Sstevel@tonic-gate switch (cmd) { 4387c478bd9Sstevel@tonic-gate case DDI_DETACH: 4397c478bd9Sstevel@tonic-gate /* do not detach if the device is active */ 4407c478bd9Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 4417c478bd9Sstevel@tonic-gate if ((num_snapshots != 0) || 4427c478bd9Sstevel@tonic-gate ((snap_ctl.sid_flags & SID_CHAR_BUSY) != 0)) { 4437c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 4447c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 4457c478bd9Sstevel@tonic-gate } 4467c478bd9Sstevel@tonic-gate 4477c478bd9Sstevel@tonic-gate /* free up the snapshot list */ 4487c478bd9Sstevel@tonic-gate for (sidp = snapshot; sidp != NULL; sidp = sidnextp) { 4497c478bd9Sstevel@tonic-gate ASSERT(SID_AVAILABLE(sidp) && 4507c478bd9Sstevel@tonic-gate !RW_LOCK_HELD(&sidp->sid_rwlock)); 4517c478bd9Sstevel@tonic-gate sidnextp = sidp->sid_next; 4527c478bd9Sstevel@tonic-gate rw_destroy(&sidp->sid_rwlock); 4537c478bd9Sstevel@tonic-gate kmem_free(sidp, sizeof (struct snapshot_id)); 4547c478bd9Sstevel@tonic-gate } 4557c478bd9Sstevel@tonic-gate snapshot = NULL; 4567c478bd9Sstevel@tonic-gate 4577c478bd9Sstevel@tonic-gate /* delete the control device */ 4587c478bd9Sstevel@tonic-gate ddi_remove_minor_node(dip, SNAP_CTL_NODE); 4597c478bd9Sstevel@tonic-gate fssnap_dip = NULL; 4607c478bd9Sstevel@tonic-gate 4617c478bd9Sstevel@tonic-gate ASSERT((snap_ctl.sid_flags & SID_CHAR_BUSY) == 0); 4627c478bd9Sstevel@tonic-gate rw_destroy(&snap_ctl.sid_rwlock); 4637c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 4647c478bd9Sstevel@tonic-gate 4657c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 4667c478bd9Sstevel@tonic-gate 4677c478bd9Sstevel@tonic-gate default: 4687c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 4697c478bd9Sstevel@tonic-gate } 4707c478bd9Sstevel@tonic-gate } 4717c478bd9Sstevel@tonic-gate 4727c478bd9Sstevel@tonic-gate /* 4737c478bd9Sstevel@tonic-gate * snap_open() - snapshot driver open(9E) routine 4747c478bd9Sstevel@tonic-gate * 4757c478bd9Sstevel@tonic-gate * marks the snapshot id as busy so it will not be recycled when deleted 4767c478bd9Sstevel@tonic-gate * until the snapshot is closed. 4777c478bd9Sstevel@tonic-gate */ 4787c478bd9Sstevel@tonic-gate /* ARGSUSED */ 4797c478bd9Sstevel@tonic-gate static int 4807c478bd9Sstevel@tonic-gate snap_open(dev_t *devp, int flag, int otyp, cred_t *cred) 4817c478bd9Sstevel@tonic-gate { 4827c478bd9Sstevel@tonic-gate minor_t minor; 4837c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp, *sidp; 4847c478bd9Sstevel@tonic-gate 4857c478bd9Sstevel@tonic-gate /* snapshots are read-only */ 4867c478bd9Sstevel@tonic-gate if (flag & FWRITE) 4877c478bd9Sstevel@tonic-gate return (EROFS); 4887c478bd9Sstevel@tonic-gate 4897c478bd9Sstevel@tonic-gate minor = getminor(*devp); 4907c478bd9Sstevel@tonic-gate 4917c478bd9Sstevel@tonic-gate if (minor == SNAP_CTL_MINOR) { 4927c478bd9Sstevel@tonic-gate /* control device must be opened exclusively */ 4937c478bd9Sstevel@tonic-gate if (((flag & FEXCL) != FEXCL) || (otyp != OTYP_CHR)) 4947c478bd9Sstevel@tonic-gate return (EINVAL); 4957c478bd9Sstevel@tonic-gate 4967c478bd9Sstevel@tonic-gate rw_enter(&snap_ctl.sid_rwlock, RW_WRITER); 4977c478bd9Sstevel@tonic-gate if ((snap_ctl.sid_flags & SID_CHAR_BUSY) != 0) { 4987c478bd9Sstevel@tonic-gate rw_exit(&snap_ctl.sid_rwlock); 4997c478bd9Sstevel@tonic-gate return (EBUSY); 5007c478bd9Sstevel@tonic-gate } 5017c478bd9Sstevel@tonic-gate 5027c478bd9Sstevel@tonic-gate snap_ctl.sid_flags |= SID_CHAR_BUSY; 5037c478bd9Sstevel@tonic-gate rw_exit(&snap_ctl.sid_rwlock); 5047c478bd9Sstevel@tonic-gate 5057c478bd9Sstevel@tonic-gate return (0); 5067c478bd9Sstevel@tonic-gate } 5077c478bd9Sstevel@tonic-gate 5087c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 5097c478bd9Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) 5107c478bd9Sstevel@tonic-gate return (ENXIO); 5117c478bd9Sstevel@tonic-gate sidp = *sidpp; 5127c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 5137c478bd9Sstevel@tonic-gate 5147c478bd9Sstevel@tonic-gate if ((flag & FEXCL) && SID_BUSY(sidp)) { 5157c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 5167c478bd9Sstevel@tonic-gate return (EAGAIN); 5177c478bd9Sstevel@tonic-gate } 5187c478bd9Sstevel@tonic-gate 5197c478bd9Sstevel@tonic-gate ASSERT(sidpp != NULL && sidp != NULL); 5207c478bd9Sstevel@tonic-gate /* check to see if this snapshot has been killed on us */ 5217c478bd9Sstevel@tonic-gate if (SID_INACTIVE(sidp)) { 5227c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_open: snapshot %d does not exist.", 5237c478bd9Sstevel@tonic-gate minor); 5247c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 5257c478bd9Sstevel@tonic-gate return (ENXIO); 5267c478bd9Sstevel@tonic-gate } 5277c478bd9Sstevel@tonic-gate 5287c478bd9Sstevel@tonic-gate switch (otyp) { 5297c478bd9Sstevel@tonic-gate case OTYP_CHR: 5307c478bd9Sstevel@tonic-gate sidp->sid_flags |= SID_CHAR_BUSY; 5317c478bd9Sstevel@tonic-gate break; 5327c478bd9Sstevel@tonic-gate case OTYP_BLK: 5337c478bd9Sstevel@tonic-gate sidp->sid_flags |= SID_BLOCK_BUSY; 5347c478bd9Sstevel@tonic-gate break; 5357c478bd9Sstevel@tonic-gate default: 5367c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 5377c478bd9Sstevel@tonic-gate return (EINVAL); 5387c478bd9Sstevel@tonic-gate } 5397c478bd9Sstevel@tonic-gate 5407c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 5417c478bd9Sstevel@tonic-gate 5427c478bd9Sstevel@tonic-gate /* 5437c478bd9Sstevel@tonic-gate * at this point if a valid snapshot was found then it has 5447c478bd9Sstevel@tonic-gate * been marked busy and we can use it. 5457c478bd9Sstevel@tonic-gate */ 5467c478bd9Sstevel@tonic-gate return (0); 5477c478bd9Sstevel@tonic-gate } 5487c478bd9Sstevel@tonic-gate 5497c478bd9Sstevel@tonic-gate /* 5507c478bd9Sstevel@tonic-gate * snap_close() - snapshot driver close(9E) routine 5517c478bd9Sstevel@tonic-gate * 5527c478bd9Sstevel@tonic-gate * unsets the busy bits in the snapshot id. If the snapshot has been 5537c478bd9Sstevel@tonic-gate * deleted while the snapshot device was open, the close call will clean 5547c478bd9Sstevel@tonic-gate * up the remaining state information. 5557c478bd9Sstevel@tonic-gate */ 5567c478bd9Sstevel@tonic-gate /* ARGSUSED */ 5577c478bd9Sstevel@tonic-gate static int 5587c478bd9Sstevel@tonic-gate snap_close(dev_t dev, int flag, int otyp, cred_t *cred) 5597c478bd9Sstevel@tonic-gate { 5607c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp, *sidp; 5617c478bd9Sstevel@tonic-gate minor_t minor; 5627c478bd9Sstevel@tonic-gate char name[20]; 5637c478bd9Sstevel@tonic-gate 5647c478bd9Sstevel@tonic-gate minor = getminor(dev); 5657c478bd9Sstevel@tonic-gate 5667c478bd9Sstevel@tonic-gate /* if this is the control device, close it and return */ 5677c478bd9Sstevel@tonic-gate if (minor == SNAP_CTL_MINOR) { 5687c478bd9Sstevel@tonic-gate rw_enter(&snap_ctl.sid_rwlock, RW_WRITER); 5697c478bd9Sstevel@tonic-gate snap_ctl.sid_flags &= ~(SID_CHAR_BUSY); 5707c478bd9Sstevel@tonic-gate rw_exit(&snap_ctl.sid_rwlock); 5717c478bd9Sstevel@tonic-gate return (0); 5727c478bd9Sstevel@tonic-gate } 5737c478bd9Sstevel@tonic-gate 5747c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 5757c478bd9Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 5767c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_close: could not find state for " 5777c478bd9Sstevel@tonic-gate "snapshot %d.", minor); 5787c478bd9Sstevel@tonic-gate return (ENXIO); 5797c478bd9Sstevel@tonic-gate } 5807c478bd9Sstevel@tonic-gate sidp = *sidpp; 5817c478bd9Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 5827c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 5837c478bd9Sstevel@tonic-gate 5847c478bd9Sstevel@tonic-gate /* Mark the snapshot as not being busy anymore */ 5857c478bd9Sstevel@tonic-gate switch (otyp) { 5867c478bd9Sstevel@tonic-gate case OTYP_CHR: 5877c478bd9Sstevel@tonic-gate sidp->sid_flags &= ~(SID_CHAR_BUSY); 5887c478bd9Sstevel@tonic-gate break; 5897c478bd9Sstevel@tonic-gate case OTYP_BLK: 5907c478bd9Sstevel@tonic-gate sidp->sid_flags &= ~(SID_BLOCK_BUSY); 5917c478bd9Sstevel@tonic-gate break; 5927c478bd9Sstevel@tonic-gate default: 5937c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 5947c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 5957c478bd9Sstevel@tonic-gate return (EINVAL); 5967c478bd9Sstevel@tonic-gate } 5977c478bd9Sstevel@tonic-gate 5987c478bd9Sstevel@tonic-gate if (SID_AVAILABLE(sidp)) { 5997c478bd9Sstevel@tonic-gate /* 6007c478bd9Sstevel@tonic-gate * if this is the last close on a snapshot that has been 6017c478bd9Sstevel@tonic-gate * deleted, then free up the soft state. The snapdelete 6027c478bd9Sstevel@tonic-gate * ioctl does not free this when the device is in use so 6037c478bd9Sstevel@tonic-gate * we do it here after the last reference goes away. 6047c478bd9Sstevel@tonic-gate */ 6057c478bd9Sstevel@tonic-gate 6067c478bd9Sstevel@tonic-gate /* remove the device nodes */ 6077c478bd9Sstevel@tonic-gate ASSERT(fssnap_dip != NULL); 6087c478bd9Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d", 6097c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 6107c478bd9Sstevel@tonic-gate ddi_remove_minor_node(fssnap_dip, name); 6117c478bd9Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d,raw", 6127c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 6137c478bd9Sstevel@tonic-gate ddi_remove_minor_node(fssnap_dip, name); 6147c478bd9Sstevel@tonic-gate 6157c478bd9Sstevel@tonic-gate /* delete the state structure */ 6167c478bd9Sstevel@tonic-gate ddi_soft_state_free(statep, sidp->sid_snapnumber); 6177c478bd9Sstevel@tonic-gate num_snapshots--; 6187c478bd9Sstevel@tonic-gate } 6197c478bd9Sstevel@tonic-gate 6207c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 6217c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 6227c478bd9Sstevel@tonic-gate 6237c478bd9Sstevel@tonic-gate return (0); 6247c478bd9Sstevel@tonic-gate } 6257c478bd9Sstevel@tonic-gate 6267c478bd9Sstevel@tonic-gate /* 6277c478bd9Sstevel@tonic-gate * snap_read() - snapshot driver read(9E) routine 6287c478bd9Sstevel@tonic-gate * 6297c478bd9Sstevel@tonic-gate * reads data from the snapshot by calling snap_strategy() through physio() 6307c478bd9Sstevel@tonic-gate */ 6317c478bd9Sstevel@tonic-gate /* ARGSUSED */ 6327c478bd9Sstevel@tonic-gate static int 6337c478bd9Sstevel@tonic-gate snap_read(dev_t dev, struct uio *uiop, cred_t *credp) 6347c478bd9Sstevel@tonic-gate { 6357c478bd9Sstevel@tonic-gate minor_t minor; 6367c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp; 6377c478bd9Sstevel@tonic-gate 6387c478bd9Sstevel@tonic-gate minor = getminor(dev); 6397c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 6407c478bd9Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 6417c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 6427c478bd9Sstevel@tonic-gate "snap_read: could not find state for snapshot %d.", minor); 6437c478bd9Sstevel@tonic-gate return (ENXIO); 6447c478bd9Sstevel@tonic-gate } 6457c478bd9Sstevel@tonic-gate return (physio(snap_strategy, NULL, dev, B_READ, minphys, uiop)); 6467c478bd9Sstevel@tonic-gate } 6477c478bd9Sstevel@tonic-gate 6487c478bd9Sstevel@tonic-gate /* 6497c478bd9Sstevel@tonic-gate * snap_strategy() - snapshot driver strategy(9E) routine 6507c478bd9Sstevel@tonic-gate * 6517c478bd9Sstevel@tonic-gate * cycles through each chunk in the requested buffer and calls 6527c478bd9Sstevel@tonic-gate * snap_getchunk() on each chunk to retrieve it from the appropriate 6537c478bd9Sstevel@tonic-gate * place. Once all of the parts are put together the requested buffer 6547c478bd9Sstevel@tonic-gate * is returned. The snapshot driver is read-only, so a write is invalid. 6557c478bd9Sstevel@tonic-gate */ 6567c478bd9Sstevel@tonic-gate static int 6577c478bd9Sstevel@tonic-gate snap_strategy(struct buf *bp) 6587c478bd9Sstevel@tonic-gate { 6597c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp, *sidp; 6607c478bd9Sstevel@tonic-gate minor_t minor; 6617c478bd9Sstevel@tonic-gate chunknumber_t chunk; 6627c478bd9Sstevel@tonic-gate int off, len; 6637c478bd9Sstevel@tonic-gate u_longlong_t reqptr; 6647c478bd9Sstevel@tonic-gate int error = 0; 6657c478bd9Sstevel@tonic-gate size_t chunksz; 6667c478bd9Sstevel@tonic-gate caddr_t buf; 6677c478bd9Sstevel@tonic-gate 6687c478bd9Sstevel@tonic-gate /* snapshot device is read-only */ 6697c478bd9Sstevel@tonic-gate if (bp->b_flags & B_WRITE) { 6707c478bd9Sstevel@tonic-gate bioerror(bp, EROFS); 6717c478bd9Sstevel@tonic-gate bp->b_resid = bp->b_bcount; 6727c478bd9Sstevel@tonic-gate biodone(bp); 6737c478bd9Sstevel@tonic-gate return (0); 6747c478bd9Sstevel@tonic-gate } 6757c478bd9Sstevel@tonic-gate 6767c478bd9Sstevel@tonic-gate minor = getminor(bp->b_edev); 6777c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 6787c478bd9Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 6797c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 6807c478bd9Sstevel@tonic-gate "snap_strategy: could not find state for snapshot %d.", 6817c478bd9Sstevel@tonic-gate minor); 6827c478bd9Sstevel@tonic-gate bioerror(bp, ENXIO); 6837c478bd9Sstevel@tonic-gate bp->b_resid = bp->b_bcount; 6847c478bd9Sstevel@tonic-gate biodone(bp); 6857c478bd9Sstevel@tonic-gate return (0); 6867c478bd9Sstevel@tonic-gate } 6877c478bd9Sstevel@tonic-gate sidp = *sidpp; 6887c478bd9Sstevel@tonic-gate ASSERT(sidp); 6897c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 6907c478bd9Sstevel@tonic-gate 6917c478bd9Sstevel@tonic-gate if (SID_INACTIVE(sidp)) { 6927c478bd9Sstevel@tonic-gate bioerror(bp, ENXIO); 6937c478bd9Sstevel@tonic-gate bp->b_resid = bp->b_bcount; 6947c478bd9Sstevel@tonic-gate biodone(bp); 6957c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 6967c478bd9Sstevel@tonic-gate return (0); 6977c478bd9Sstevel@tonic-gate } 6987c478bd9Sstevel@tonic-gate 6997c478bd9Sstevel@tonic-gate if (bp->b_flags & (B_PAGEIO|B_PHYS)) 7007c478bd9Sstevel@tonic-gate bp_mapin(bp); 7017c478bd9Sstevel@tonic-gate 7027c478bd9Sstevel@tonic-gate bp->b_resid = bp->b_bcount; 7037c478bd9Sstevel@tonic-gate ASSERT(bp->b_un.b_addr); 7047c478bd9Sstevel@tonic-gate buf = bp->b_un.b_addr; 7057c478bd9Sstevel@tonic-gate 7067c478bd9Sstevel@tonic-gate chunksz = sidp->sid_cowinfo->cow_map.cmap_chunksz; 7077c478bd9Sstevel@tonic-gate 7087c478bd9Sstevel@tonic-gate /* reqptr is the current DEV_BSIZE offset into the device */ 7097c478bd9Sstevel@tonic-gate /* chunk is the chunk containing reqptr */ 7107c478bd9Sstevel@tonic-gate /* len is the length of the request (in the current chunk) in bytes */ 7117c478bd9Sstevel@tonic-gate /* off is the byte offset into the current chunk */ 7127c478bd9Sstevel@tonic-gate reqptr = bp->b_lblkno; 7137c478bd9Sstevel@tonic-gate while (bp->b_resid > 0) { 7147c478bd9Sstevel@tonic-gate chunk = dbtocowchunk(&sidp->sid_cowinfo->cow_map, reqptr); 7157c478bd9Sstevel@tonic-gate off = (reqptr % (chunksz >> DEV_BSHIFT)) << DEV_BSHIFT; 7167c478bd9Sstevel@tonic-gate len = min(chunksz - off, bp->b_resid); 7177c478bd9Sstevel@tonic-gate ASSERT((off + len) <= chunksz); 7187c478bd9Sstevel@tonic-gate 7197c478bd9Sstevel@tonic-gate if ((error = snap_getchunk(sidp, chunk, off, len, buf)) != 0) { 7207c478bd9Sstevel@tonic-gate /* 7217c478bd9Sstevel@tonic-gate * EINVAL means the user tried to go out of range. 7227c478bd9Sstevel@tonic-gate * Anything else means it's likely that we're 7237c478bd9Sstevel@tonic-gate * confused. 7247c478bd9Sstevel@tonic-gate */ 7257c478bd9Sstevel@tonic-gate if (error != EINVAL) { 7267c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_strategy: error " 7277c478bd9Sstevel@tonic-gate "calling snap_getchunk, chunk = %llu, " 7287c478bd9Sstevel@tonic-gate "offset = %d, len = %d, resid = %lu, " 7297c478bd9Sstevel@tonic-gate "error = %d.", 7307c478bd9Sstevel@tonic-gate chunk, off, len, bp->b_resid, error); 7317c478bd9Sstevel@tonic-gate } 7327c478bd9Sstevel@tonic-gate bioerror(bp, error); 7337c478bd9Sstevel@tonic-gate biodone(bp); 7347c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 7357c478bd9Sstevel@tonic-gate return (0); 7367c478bd9Sstevel@tonic-gate } 7377c478bd9Sstevel@tonic-gate bp->b_resid -= len; 7387c478bd9Sstevel@tonic-gate reqptr += (len >> DEV_BSHIFT); 7397c478bd9Sstevel@tonic-gate buf += len; 7407c478bd9Sstevel@tonic-gate } 7417c478bd9Sstevel@tonic-gate 7427c478bd9Sstevel@tonic-gate ASSERT(bp->b_resid == 0); 7437c478bd9Sstevel@tonic-gate biodone(bp); 7447c478bd9Sstevel@tonic-gate 7457c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 7467c478bd9Sstevel@tonic-gate return (0); 7477c478bd9Sstevel@tonic-gate } 7487c478bd9Sstevel@tonic-gate 7497c478bd9Sstevel@tonic-gate /* 7507c478bd9Sstevel@tonic-gate * snap_getchunk() - helper function for snap_strategy() 7517c478bd9Sstevel@tonic-gate * 7527c478bd9Sstevel@tonic-gate * gets the requested data from the appropriate place and fills in the 7537c478bd9Sstevel@tonic-gate * buffer. chunk is the chunk number of the request, offset is the 7547c478bd9Sstevel@tonic-gate * offset into that chunk and must be less than the chunk size. len is 7557c478bd9Sstevel@tonic-gate * the length of the request starting at offset, and must not exceed a 7567c478bd9Sstevel@tonic-gate * chunk boundary. buffer is the address to copy the data to. len 7577c478bd9Sstevel@tonic-gate * bytes are copied into the buffer starting at the location specified. 7587c478bd9Sstevel@tonic-gate * 7597c478bd9Sstevel@tonic-gate * A chunk is located according to the following algorithm: 7607c478bd9Sstevel@tonic-gate * - If the chunk does not have a translation or is not a candidate 7617c478bd9Sstevel@tonic-gate * for translation, it is read straight from the master device. 7627c478bd9Sstevel@tonic-gate * - If the chunk does have a translation, then it is either on 7637c478bd9Sstevel@tonic-gate * disk or in memory: 7647c478bd9Sstevel@tonic-gate * o If it is in memory the requested data is simply copied out 7657c478bd9Sstevel@tonic-gate * of the in-memory buffer. 7667c478bd9Sstevel@tonic-gate * o If it is in the backing store, it is read from there. 7677c478bd9Sstevel@tonic-gate * 7687c478bd9Sstevel@tonic-gate * This function does the real work of the snapshot driver. 7697c478bd9Sstevel@tonic-gate */ 7707c478bd9Sstevel@tonic-gate static int 7717c478bd9Sstevel@tonic-gate snap_getchunk(struct snapshot_id *sidp, chunknumber_t chunk, int offset, 7727c478bd9Sstevel@tonic-gate int len, char *buffer) 7737c478bd9Sstevel@tonic-gate { 7747c478bd9Sstevel@tonic-gate cow_map_t *cmap = &sidp->sid_cowinfo->cow_map; 7757c478bd9Sstevel@tonic-gate cow_map_node_t *cmn; 7767c478bd9Sstevel@tonic-gate struct buf *snapbuf; 7777c478bd9Sstevel@tonic-gate int error = 0; 7787c478bd9Sstevel@tonic-gate char *newbuffer; 7797c478bd9Sstevel@tonic-gate int newlen = 0; 7807c478bd9Sstevel@tonic-gate int partial = 0; 7817c478bd9Sstevel@tonic-gate 7827c478bd9Sstevel@tonic-gate ASSERT(RW_READ_HELD(&sidp->sid_rwlock)); 7837c478bd9Sstevel@tonic-gate ASSERT(offset + len <= cmap->cmap_chunksz); 7847c478bd9Sstevel@tonic-gate 7857c478bd9Sstevel@tonic-gate /* 7867c478bd9Sstevel@tonic-gate * Check if the chunk number is out of range and if so bail out 7877c478bd9Sstevel@tonic-gate */ 7887c478bd9Sstevel@tonic-gate if (chunk >= (cmap->cmap_bmsize * NBBY)) { 7897c478bd9Sstevel@tonic-gate return (EINVAL); 7907c478bd9Sstevel@tonic-gate } 7917c478bd9Sstevel@tonic-gate 7927c478bd9Sstevel@tonic-gate /* 7937c478bd9Sstevel@tonic-gate * If the chunk is not a candidate for translation, then the chunk 7947c478bd9Sstevel@tonic-gate * was not allocated when the snapshot was taken. Since it does 7957c478bd9Sstevel@tonic-gate * not contain data associated with this snapshot, just return a 7967c478bd9Sstevel@tonic-gate * zero buffer instead. 7977c478bd9Sstevel@tonic-gate */ 7987c478bd9Sstevel@tonic-gate if (isclr(cmap->cmap_candidate, chunk)) { 7997c478bd9Sstevel@tonic-gate bzero(buffer, len); 8007c478bd9Sstevel@tonic-gate return (0); 8017c478bd9Sstevel@tonic-gate } 8027c478bd9Sstevel@tonic-gate 8037c478bd9Sstevel@tonic-gate /* 8047c478bd9Sstevel@tonic-gate * if the chunk is a candidate for translation but a 8057c478bd9Sstevel@tonic-gate * translation does not exist, then read through to the 8067c478bd9Sstevel@tonic-gate * original file system. The rwlock is held until the read 8077c478bd9Sstevel@tonic-gate * completes if it hasn't been translated to make sure the 8087c478bd9Sstevel@tonic-gate * file system does not translate the block before we 8097c478bd9Sstevel@tonic-gate * access it. If it has already been translated we don't 8107c478bd9Sstevel@tonic-gate * need the lock, because the translation will never go away. 8117c478bd9Sstevel@tonic-gate */ 8127c478bd9Sstevel@tonic-gate rw_enter(&cmap->cmap_rwlock, RW_READER); 8137c478bd9Sstevel@tonic-gate if (isclr(cmap->cmap_hastrans, chunk)) { 8147c478bd9Sstevel@tonic-gate snapbuf = getrbuf(KM_SLEEP); 8157c478bd9Sstevel@tonic-gate /* 8167c478bd9Sstevel@tonic-gate * Reading into the buffer saves having to do a copy, 8177c478bd9Sstevel@tonic-gate * but gets tricky if the request size is not a 8187c478bd9Sstevel@tonic-gate * multiple of DEV_BSIZE. However, we are filling the 8197c478bd9Sstevel@tonic-gate * buffer left to right, so future reads will write 8207c478bd9Sstevel@tonic-gate * over any extra data we might have read. 8217c478bd9Sstevel@tonic-gate */ 8227c478bd9Sstevel@tonic-gate 8237c478bd9Sstevel@tonic-gate partial = len % DEV_BSIZE; 8247c478bd9Sstevel@tonic-gate 8257c478bd9Sstevel@tonic-gate snapbuf->b_bcount = len; 8267c478bd9Sstevel@tonic-gate snapbuf->b_lblkno = lbtodb(chunk * cmap->cmap_chunksz + offset); 8277c478bd9Sstevel@tonic-gate snapbuf->b_un.b_addr = buffer; 8287c478bd9Sstevel@tonic-gate 8297c478bd9Sstevel@tonic-gate snapbuf->b_iodone = NULL; 8307c478bd9Sstevel@tonic-gate snapbuf->b_proc = NULL; /* i.e. the kernel */ 8317c478bd9Sstevel@tonic-gate snapbuf->b_flags = B_READ | B_BUSY; 8327c478bd9Sstevel@tonic-gate snapbuf->b_edev = sidp->sid_fvp->v_vfsp->vfs_dev; 8337c478bd9Sstevel@tonic-gate 8347c478bd9Sstevel@tonic-gate if (partial) { 8357c478bd9Sstevel@tonic-gate /* 8367c478bd9Sstevel@tonic-gate * Partial block read in progress. 8377c478bd9Sstevel@tonic-gate * This is bad as modules further down the line 8387c478bd9Sstevel@tonic-gate * assume buf's are exact multiples of DEV_BSIZE 8397c478bd9Sstevel@tonic-gate * and we end up with fewer, or zero, bytes read. 8407c478bd9Sstevel@tonic-gate * To get round this we need to round up to the 8417c478bd9Sstevel@tonic-gate * nearest full block read and then return only 8427c478bd9Sstevel@tonic-gate * len bytes. 8437c478bd9Sstevel@tonic-gate */ 8447c478bd9Sstevel@tonic-gate newlen = (len - partial) + DEV_BSIZE; 8457c478bd9Sstevel@tonic-gate newbuffer = kmem_alloc(newlen, KM_SLEEP); 8467c478bd9Sstevel@tonic-gate 8477c478bd9Sstevel@tonic-gate snapbuf->b_bcount = newlen; 8487c478bd9Sstevel@tonic-gate snapbuf->b_un.b_addr = newbuffer; 8497c478bd9Sstevel@tonic-gate } 8507c478bd9Sstevel@tonic-gate 8517c478bd9Sstevel@tonic-gate (void) bdev_strategy(snapbuf); 8527c478bd9Sstevel@tonic-gate (void) biowait(snapbuf); 8537c478bd9Sstevel@tonic-gate 8547c478bd9Sstevel@tonic-gate error = geterror(snapbuf); 8557c478bd9Sstevel@tonic-gate 8567c478bd9Sstevel@tonic-gate if (partial) { 8577c478bd9Sstevel@tonic-gate /* 8587c478bd9Sstevel@tonic-gate * Partial block read. Now we need to bcopy the 8597c478bd9Sstevel@tonic-gate * correct number of bytes back into the 8607c478bd9Sstevel@tonic-gate * supplied buffer, and tidy up our temp 8617c478bd9Sstevel@tonic-gate * buffer. 8627c478bd9Sstevel@tonic-gate */ 8637c478bd9Sstevel@tonic-gate bcopy(newbuffer, buffer, len); 8647c478bd9Sstevel@tonic-gate kmem_free(newbuffer, newlen); 8657c478bd9Sstevel@tonic-gate } 8667c478bd9Sstevel@tonic-gate 8677c478bd9Sstevel@tonic-gate freerbuf(snapbuf); 8687c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 8697c478bd9Sstevel@tonic-gate 8707c478bd9Sstevel@tonic-gate return (error); 8717c478bd9Sstevel@tonic-gate } 8727c478bd9Sstevel@tonic-gate 8737c478bd9Sstevel@tonic-gate /* 8747c478bd9Sstevel@tonic-gate * finally, if the chunk is a candidate for translation and it 8757c478bd9Sstevel@tonic-gate * has been translated, then we clone the chunk of the buffer 8767c478bd9Sstevel@tonic-gate * that was copied aside by the file system. 8777c478bd9Sstevel@tonic-gate * The cmap_rwlock does not need to be held after we know the 8787c478bd9Sstevel@tonic-gate * data has already been copied. Once a chunk has been copied 8797c478bd9Sstevel@tonic-gate * to the backing file, it is stable read only data. 8807c478bd9Sstevel@tonic-gate */ 8817c478bd9Sstevel@tonic-gate cmn = transtbl_get(cmap, chunk); 8827c478bd9Sstevel@tonic-gate 8837c478bd9Sstevel@tonic-gate /* check whether the data is in memory or in the backing file */ 8847c478bd9Sstevel@tonic-gate if (cmn != NULL) { 8857c478bd9Sstevel@tonic-gate ASSERT(cmn->cmn_buf); 8867c478bd9Sstevel@tonic-gate /* already in memory */ 8877c478bd9Sstevel@tonic-gate bcopy(cmn->cmn_buf + offset, buffer, len); 8887c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 8897c478bd9Sstevel@tonic-gate } else { 8907c478bd9Sstevel@tonic-gate ssize_t resid = len; 8917c478bd9Sstevel@tonic-gate int bf_index; 8927c478bd9Sstevel@tonic-gate /* 8937c478bd9Sstevel@tonic-gate * can cause deadlock with writer if we don't drop the 8947c478bd9Sstevel@tonic-gate * cmap_rwlock before trying to get the backing store file 8957c478bd9Sstevel@tonic-gate * vnode rwlock. 8967c478bd9Sstevel@tonic-gate */ 8977c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 8987c478bd9Sstevel@tonic-gate 8997c478bd9Sstevel@tonic-gate bf_index = chunk / cmap->cmap_chunksperbf; 9007c478bd9Sstevel@tonic-gate 9017c478bd9Sstevel@tonic-gate /* read buffer from backing file */ 9027c478bd9Sstevel@tonic-gate error = vn_rdwr(UIO_READ, 9037c478bd9Sstevel@tonic-gate (sidp->sid_cowinfo->cow_backfile_array)[bf_index], 9047c478bd9Sstevel@tonic-gate buffer, len, ((chunk % cmap->cmap_chunksperbf) * 9057c478bd9Sstevel@tonic-gate cmap->cmap_chunksz) + offset, UIO_SYSSPACE, 0, 9067c478bd9Sstevel@tonic-gate RLIM64_INFINITY, kcred, &resid); 9077c478bd9Sstevel@tonic-gate } 9087c478bd9Sstevel@tonic-gate 9097c478bd9Sstevel@tonic-gate return (error); 9107c478bd9Sstevel@tonic-gate } 9117c478bd9Sstevel@tonic-gate 9127c478bd9Sstevel@tonic-gate /* 9137c478bd9Sstevel@tonic-gate * snap_print() - snapshot driver print(9E) routine 9147c478bd9Sstevel@tonic-gate * 9157c478bd9Sstevel@tonic-gate * prints the device identification string. 9167c478bd9Sstevel@tonic-gate */ 9177c478bd9Sstevel@tonic-gate static int 9187c478bd9Sstevel@tonic-gate snap_print(dev_t dev, char *str) 9197c478bd9Sstevel@tonic-gate { 9207c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp; 9217c478bd9Sstevel@tonic-gate minor_t minor; 9227c478bd9Sstevel@tonic-gate 9237c478bd9Sstevel@tonic-gate minor = getminor(dev); 9247c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 9257c478bd9Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 9267c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 9277c478bd9Sstevel@tonic-gate "snap_print: could not find state for snapshot %d.", minor); 9287c478bd9Sstevel@tonic-gate return (ENXIO); 9297c478bd9Sstevel@tonic-gate } 9307c478bd9Sstevel@tonic-gate 9317c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "snap_print: snapshot %d: %s", minor, str); 9327c478bd9Sstevel@tonic-gate 9337c478bd9Sstevel@tonic-gate return (0); 9347c478bd9Sstevel@tonic-gate } 9357c478bd9Sstevel@tonic-gate 9367c478bd9Sstevel@tonic-gate /* 9377c478bd9Sstevel@tonic-gate * snap_prop_op() - snapshot driver prop_op(9E) routine 9387c478bd9Sstevel@tonic-gate * 9397c478bd9Sstevel@tonic-gate * get 32-bit and 64-bit values for size (character driver) and nblocks 9407c478bd9Sstevel@tonic-gate * (block driver). 9417c478bd9Sstevel@tonic-gate */ 9427c478bd9Sstevel@tonic-gate static int 9437c478bd9Sstevel@tonic-gate snap_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, 9447c478bd9Sstevel@tonic-gate int flags, char *name, caddr_t valuep, int *lengthp) 9457c478bd9Sstevel@tonic-gate { 9467c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp; 9477c478bd9Sstevel@tonic-gate int length, km_flags; 9487c478bd9Sstevel@tonic-gate int nblocks, size; 9497c478bd9Sstevel@tonic-gate uint64_t Size, Nblocks; 9507c478bd9Sstevel@tonic-gate caddr_t buffer; 9517c478bd9Sstevel@tonic-gate int minor; 9527c478bd9Sstevel@tonic-gate dev_t mdev; 9537c478bd9Sstevel@tonic-gate 9547c478bd9Sstevel@tonic-gate minor = getminor(dev); 9557c478bd9Sstevel@tonic-gate length = *lengthp; /* Get callers length */ 9567c478bd9Sstevel@tonic-gate 9577c478bd9Sstevel@tonic-gate /* if this is the control device just check for .conf properties */ 9587c478bd9Sstevel@tonic-gate if (minor == SNAP_CTL_MINOR) 9597c478bd9Sstevel@tonic-gate return (ddi_prop_op(dev, dip, prop_op, flags, name, 9607c478bd9Sstevel@tonic-gate valuep, lengthp)); 9617c478bd9Sstevel@tonic-gate /* check to see if there is a master device plumbed */ 9627c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 9637c478bd9Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 9647c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 9657c478bd9Sstevel@tonic-gate "snap_prop_op: could not find state for " 9667c478bd9Sstevel@tonic-gate "snapshot %d.", minor); 9677c478bd9Sstevel@tonic-gate return (DDI_PROP_NOT_FOUND); 9687c478bd9Sstevel@tonic-gate } 9697c478bd9Sstevel@tonic-gate 9707c478bd9Sstevel@tonic-gate if (((*sidpp)->sid_fvp == NULL) || ((*sidpp)->sid_fvp->v_vfsp == NULL)) 9717c478bd9Sstevel@tonic-gate return (ddi_prop_op(dev, dip, prop_op, flags, name, 9727c478bd9Sstevel@tonic-gate valuep, lengthp)); 9737c478bd9Sstevel@tonic-gate mdev = (*sidpp)->sid_fvp->v_vfsp->vfs_dev; 9747c478bd9Sstevel@tonic-gate 9757c478bd9Sstevel@tonic-gate /* get size information from the master device. */ 9767c478bd9Sstevel@tonic-gate 9777c478bd9Sstevel@tonic-gate if (strcmp(name, "nblocks") == 0) { 9787c478bd9Sstevel@tonic-gate nblocks = bdev_size(mdev); 9797c478bd9Sstevel@tonic-gate *lengthp = sizeof (nblocks); /* Set callers length */ 9807c478bd9Sstevel@tonic-gate } else if (strcmp(name, "Nblocks") == 0) { 9817c478bd9Sstevel@tonic-gate Nblocks = bdev_Size(mdev); 9827c478bd9Sstevel@tonic-gate *lengthp = sizeof (Nblocks); /* Set callers length */ 9837c478bd9Sstevel@tonic-gate } else if (strcmp(name, "size") == 0) { 9847c478bd9Sstevel@tonic-gate size = cdev_size(mdev); 9857c478bd9Sstevel@tonic-gate *lengthp = sizeof (size); /* Set callers length */ 9867c478bd9Sstevel@tonic-gate } else if (strcmp(name, "Size") == 0) { 9877c478bd9Sstevel@tonic-gate Size = cdev_Size(mdev); 9887c478bd9Sstevel@tonic-gate *lengthp = sizeof (Size); /* Set callers length */ 9897c478bd9Sstevel@tonic-gate } else { /* not for us */ 9907c478bd9Sstevel@tonic-gate return (ddi_prop_op(dev, dip, prop_op, flags, name, 9917c478bd9Sstevel@tonic-gate valuep, lengthp)); 9927c478bd9Sstevel@tonic-gate } 9937c478bd9Sstevel@tonic-gate 9947c478bd9Sstevel@tonic-gate /* 9957c478bd9Sstevel@tonic-gate * If length only request, just return the length. 9967c478bd9Sstevel@tonic-gate */ 9977c478bd9Sstevel@tonic-gate if (prop_op == PROP_LEN) { 9987c478bd9Sstevel@tonic-gate return (DDI_PROP_SUCCESS); 9997c478bd9Sstevel@tonic-gate } 10007c478bd9Sstevel@tonic-gate 10017c478bd9Sstevel@tonic-gate /* 10027c478bd9Sstevel@tonic-gate * Allocate buffer, if required. Either way, set `buffer' variable. 10037c478bd9Sstevel@tonic-gate */ 10047c478bd9Sstevel@tonic-gate switch (prop_op) { 10057c478bd9Sstevel@tonic-gate case PROP_LEN_AND_VAL_ALLOC: 10067c478bd9Sstevel@tonic-gate 10077c478bd9Sstevel@tonic-gate km_flags = KM_NOSLEEP; 10087c478bd9Sstevel@tonic-gate 10097c478bd9Sstevel@tonic-gate if (flags & DDI_PROP_CANSLEEP) 10107c478bd9Sstevel@tonic-gate km_flags = KM_SLEEP; 10117c478bd9Sstevel@tonic-gate 10127c478bd9Sstevel@tonic-gate buffer = kmem_alloc(*lengthp, km_flags); 10137c478bd9Sstevel@tonic-gate if (buffer == NULL) { 10147c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_get_prop: no mem for " 10157c478bd9Sstevel@tonic-gate "property %s.", name); 10167c478bd9Sstevel@tonic-gate return (DDI_PROP_NO_MEMORY); 10177c478bd9Sstevel@tonic-gate } 10187c478bd9Sstevel@tonic-gate *(caddr_t *)valuep = buffer; /* Set callers buf ptr */ 10197c478bd9Sstevel@tonic-gate break; 10207c478bd9Sstevel@tonic-gate 10217c478bd9Sstevel@tonic-gate case PROP_LEN_AND_VAL_BUF: 10227c478bd9Sstevel@tonic-gate 10237c478bd9Sstevel@tonic-gate if (*lengthp > length) 10247c478bd9Sstevel@tonic-gate return (DDI_PROP_BUF_TOO_SMALL); 10257c478bd9Sstevel@tonic-gate 10267c478bd9Sstevel@tonic-gate buffer = valuep; /* get callers buf ptr */ 10277c478bd9Sstevel@tonic-gate break; 10287c478bd9Sstevel@tonic-gate } 10297c478bd9Sstevel@tonic-gate 10307c478bd9Sstevel@tonic-gate if (strcmp(name, "nblocks") == 0) { 10317c478bd9Sstevel@tonic-gate *((uint_t *)buffer) = nblocks; 10327c478bd9Sstevel@tonic-gate } else if (strcmp(name, "Nblocks") == 0) { 10337c478bd9Sstevel@tonic-gate *((uint64_t *)buffer) = Nblocks; 10347c478bd9Sstevel@tonic-gate } else if (strcmp(name, "size") == 0) { 10357c478bd9Sstevel@tonic-gate *((uint_t *)buffer) = size; 10367c478bd9Sstevel@tonic-gate } else if (strcmp(name, "Size") == 0) { 10377c478bd9Sstevel@tonic-gate *((uint64_t *)buffer) = Size; 10387c478bd9Sstevel@tonic-gate } 10397c478bd9Sstevel@tonic-gate 10407c478bd9Sstevel@tonic-gate return (DDI_PROP_SUCCESS); 10417c478bd9Sstevel@tonic-gate } 10427c478bd9Sstevel@tonic-gate 10437c478bd9Sstevel@tonic-gate /* 10447c478bd9Sstevel@tonic-gate * snap_ioctl() - snapshot driver ioctl(9E) routine 10457c478bd9Sstevel@tonic-gate * 10467c478bd9Sstevel@tonic-gate * only applies to the control device. The control device accepts two 10477c478bd9Sstevel@tonic-gate * ioctl requests: create a snapshot or delete a snapshot. In either 10487c478bd9Sstevel@tonic-gate * case, the vnode for the requested file system is extracted, and the 10497c478bd9Sstevel@tonic-gate * request is passed on to the file system via the same ioctl. The file 10507c478bd9Sstevel@tonic-gate * system is responsible for doing the things necessary for creating or 10517c478bd9Sstevel@tonic-gate * destroying a snapshot, including any file system specific operations 10527c478bd9Sstevel@tonic-gate * that must be performed as well as setting up and deleting the snapshot 10537c478bd9Sstevel@tonic-gate * state through the fssnap interfaces. 10547c478bd9Sstevel@tonic-gate */ 10557c478bd9Sstevel@tonic-gate static int 10567c478bd9Sstevel@tonic-gate snap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 10577c478bd9Sstevel@tonic-gate int *rvalp) 10587c478bd9Sstevel@tonic-gate { 10597c478bd9Sstevel@tonic-gate minor_t minor; 10607c478bd9Sstevel@tonic-gate int error = 0; 10617c478bd9Sstevel@tonic-gate 10627c478bd9Sstevel@tonic-gate minor = getminor(dev); 10637c478bd9Sstevel@tonic-gate 10647c478bd9Sstevel@tonic-gate if (minor != SNAP_CTL_MINOR) { 10657c478bd9Sstevel@tonic-gate return (EINVAL); 10667c478bd9Sstevel@tonic-gate } 10677c478bd9Sstevel@tonic-gate 10687c478bd9Sstevel@tonic-gate switch (cmd) { 10697c478bd9Sstevel@tonic-gate case _FIOSNAPSHOTCREATE: 10707c478bd9Sstevel@tonic-gate { 10717c478bd9Sstevel@tonic-gate struct fiosnapcreate fc; 10727c478bd9Sstevel@tonic-gate struct file *fp; 10737c478bd9Sstevel@tonic-gate struct vnode *vp; 10747c478bd9Sstevel@tonic-gate 10757c478bd9Sstevel@tonic-gate if (ddi_copyin((void *)arg, &fc, sizeof (fc), mode)) 10767c478bd9Sstevel@tonic-gate return (EFAULT); 10777c478bd9Sstevel@tonic-gate 10787c478bd9Sstevel@tonic-gate /* get vnode for file system mount point */ 10797c478bd9Sstevel@tonic-gate if ((fp = getf(fc.rootfiledesc)) == NULL) 10807c478bd9Sstevel@tonic-gate return (EBADF); 10817c478bd9Sstevel@tonic-gate 10827c478bd9Sstevel@tonic-gate ASSERT(fp->f_vnode); 10837c478bd9Sstevel@tonic-gate vp = fp->f_vnode; 10847c478bd9Sstevel@tonic-gate VN_HOLD(vp); 10857c478bd9Sstevel@tonic-gate releasef(fc.rootfiledesc); 10867c478bd9Sstevel@tonic-gate 10877c478bd9Sstevel@tonic-gate /* pass ioctl request to file system */ 10887c478bd9Sstevel@tonic-gate error = VOP_IOCTL(vp, cmd, arg, 0, credp, rvalp); 10897c478bd9Sstevel@tonic-gate VN_RELE(vp); 10907c478bd9Sstevel@tonic-gate break; 10917c478bd9Sstevel@tonic-gate } 10927c478bd9Sstevel@tonic-gate case _FIOSNAPSHOTCREATE_MULTI: 10937c478bd9Sstevel@tonic-gate { 10947c478bd9Sstevel@tonic-gate struct fiosnapcreate_multi fc; 10957c478bd9Sstevel@tonic-gate struct file *fp; 10967c478bd9Sstevel@tonic-gate struct vnode *vp; 10977c478bd9Sstevel@tonic-gate 10987c478bd9Sstevel@tonic-gate if (ddi_copyin((void *)arg, &fc, sizeof (fc), mode)) 10997c478bd9Sstevel@tonic-gate return (EFAULT); 11007c478bd9Sstevel@tonic-gate 11017c478bd9Sstevel@tonic-gate /* get vnode for file system mount point */ 11027c478bd9Sstevel@tonic-gate if ((fp = getf(fc.rootfiledesc)) == NULL) 11037c478bd9Sstevel@tonic-gate return (EBADF); 11047c478bd9Sstevel@tonic-gate 11057c478bd9Sstevel@tonic-gate ASSERT(fp->f_vnode); 11067c478bd9Sstevel@tonic-gate vp = fp->f_vnode; 11077c478bd9Sstevel@tonic-gate VN_HOLD(vp); 11087c478bd9Sstevel@tonic-gate releasef(fc.rootfiledesc); 11097c478bd9Sstevel@tonic-gate 11107c478bd9Sstevel@tonic-gate /* pass ioctl request to file system */ 11117c478bd9Sstevel@tonic-gate error = VOP_IOCTL(vp, cmd, arg, 0, credp, rvalp); 11127c478bd9Sstevel@tonic-gate VN_RELE(vp); 11137c478bd9Sstevel@tonic-gate break; 11147c478bd9Sstevel@tonic-gate } 11157c478bd9Sstevel@tonic-gate case _FIOSNAPSHOTDELETE: 11167c478bd9Sstevel@tonic-gate { 11177c478bd9Sstevel@tonic-gate major_t major; 11187c478bd9Sstevel@tonic-gate struct fiosnapdelete fc; 11197c478bd9Sstevel@tonic-gate snapshot_id_t *sidp = NULL; 11207c478bd9Sstevel@tonic-gate snapshot_id_t *sidnextp = NULL; 11217c478bd9Sstevel@tonic-gate struct file *fp = NULL; 11227c478bd9Sstevel@tonic-gate struct vnode *vp = NULL; 11237c478bd9Sstevel@tonic-gate struct vfs *vfsp = NULL; 11247c478bd9Sstevel@tonic-gate vfsops_t *vfsops = EIO_vfsops; 11257c478bd9Sstevel@tonic-gate 11267c478bd9Sstevel@tonic-gate if (ddi_copyin((void *)arg, &fc, sizeof (fc), mode)) 11277c478bd9Sstevel@tonic-gate return (EFAULT); 11287c478bd9Sstevel@tonic-gate 11297c478bd9Sstevel@tonic-gate /* get vnode for file system mount point */ 11307c478bd9Sstevel@tonic-gate if ((fp = getf(fc.rootfiledesc)) == NULL) 11317c478bd9Sstevel@tonic-gate return (EBADF); 11327c478bd9Sstevel@tonic-gate 11337c478bd9Sstevel@tonic-gate ASSERT(fp->f_vnode); 11347c478bd9Sstevel@tonic-gate vp = fp->f_vnode; 11357c478bd9Sstevel@tonic-gate VN_HOLD(vp); 11367c478bd9Sstevel@tonic-gate releasef(fc.rootfiledesc); 11377c478bd9Sstevel@tonic-gate /* 11387c478bd9Sstevel@tonic-gate * Test for two formats of delete and set correct minor/vp: 11397c478bd9Sstevel@tonic-gate * pseudo device: 11407c478bd9Sstevel@tonic-gate * fssnap -d [/dev/fssnap/x] 11417c478bd9Sstevel@tonic-gate * or 11427c478bd9Sstevel@tonic-gate * mount point: 11437c478bd9Sstevel@tonic-gate * fssnap -d [/mntpt] 11447c478bd9Sstevel@tonic-gate * Note that minor is verified to be equal to SNAP_CTL_MINOR 11457c478bd9Sstevel@tonic-gate * at this point which is an invalid minor number. 11467c478bd9Sstevel@tonic-gate */ 11477c478bd9Sstevel@tonic-gate ASSERT(fssnap_dip != NULL); 11487c478bd9Sstevel@tonic-gate major = ddi_driver_major(fssnap_dip); 11497c478bd9Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 11507c478bd9Sstevel@tonic-gate for (sidp = snapshot; sidp != NULL; sidp = sidnextp) { 11517c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 11527c478bd9Sstevel@tonic-gate sidnextp = sidp->sid_next; 11537c478bd9Sstevel@tonic-gate /* pseudo device: */ 11547c478bd9Sstevel@tonic-gate if (major == getmajor(vp->v_rdev)) { 11557c478bd9Sstevel@tonic-gate minor = getminor(vp->v_rdev); 11567c478bd9Sstevel@tonic-gate if (sidp->sid_snapnumber == (uint_t)minor && 11577c478bd9Sstevel@tonic-gate sidp->sid_fvp) { 11587c478bd9Sstevel@tonic-gate VN_RELE(vp); 11597c478bd9Sstevel@tonic-gate vp = sidp->sid_fvp; 11607c478bd9Sstevel@tonic-gate VN_HOLD(vp); 11617c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 11627c478bd9Sstevel@tonic-gate break; 11637c478bd9Sstevel@tonic-gate } 11647c478bd9Sstevel@tonic-gate /* Mount point: */ 11657c478bd9Sstevel@tonic-gate } else { 11667c478bd9Sstevel@tonic-gate if (sidp->sid_fvp == vp) { 11677c478bd9Sstevel@tonic-gate minor = sidp->sid_snapnumber; 11687c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 11697c478bd9Sstevel@tonic-gate break; 11707c478bd9Sstevel@tonic-gate } 11717c478bd9Sstevel@tonic-gate } 11727c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 11737c478bd9Sstevel@tonic-gate } 11747c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 11757c478bd9Sstevel@tonic-gate /* Verify minor got set correctly above */ 11767c478bd9Sstevel@tonic-gate if (minor == SNAP_CTL_MINOR) { 11777c478bd9Sstevel@tonic-gate VN_RELE(vp); 11787c478bd9Sstevel@tonic-gate return (EINVAL); 11797c478bd9Sstevel@tonic-gate } 11807c478bd9Sstevel@tonic-gate dev = makedevice(major, minor); 11817c478bd9Sstevel@tonic-gate /* 11827c478bd9Sstevel@tonic-gate * Create dummy vfs entry 11837c478bd9Sstevel@tonic-gate * to use as a locking semaphore across the IOCTL 11847c478bd9Sstevel@tonic-gate * for mount in progress cases... 11857c478bd9Sstevel@tonic-gate */ 11867c478bd9Sstevel@tonic-gate vfsp = kmem_alloc(sizeof (vfs_t), KM_SLEEP); 11877c478bd9Sstevel@tonic-gate VFS_INIT(vfsp, vfsops, NULL); 1188*ddfcde86Srsb VFS_HOLD(vfsp); 11897c478bd9Sstevel@tonic-gate vfs_addmip(dev, vfsp); 11907c478bd9Sstevel@tonic-gate if ((vfs_devmounting(dev, vfsp)) || 11917c478bd9Sstevel@tonic-gate (vfs_devismounted(dev))) { 11927c478bd9Sstevel@tonic-gate vfs_delmip(vfsp); 1193*ddfcde86Srsb VFS_RELE(vfsp); 11947c478bd9Sstevel@tonic-gate VN_RELE(vp); 11957c478bd9Sstevel@tonic-gate return (EBUSY); 11967c478bd9Sstevel@tonic-gate } 11977c478bd9Sstevel@tonic-gate /* 11987c478bd9Sstevel@tonic-gate * Nobody mounted but do not release mount in progress lock 11997c478bd9Sstevel@tonic-gate * until IOCTL complete to prohibit a mount sneaking 12007c478bd9Sstevel@tonic-gate * in 12017c478bd9Sstevel@tonic-gate */ 12027c478bd9Sstevel@tonic-gate error = VOP_IOCTL(vp, cmd, arg, 0, credp, rvalp); 12037c478bd9Sstevel@tonic-gate vfs_delmip(vfsp); 1204*ddfcde86Srsb VFS_RELE(vfsp); 12057c478bd9Sstevel@tonic-gate VN_RELE(vp); 12067c478bd9Sstevel@tonic-gate break; 12077c478bd9Sstevel@tonic-gate } 12087c478bd9Sstevel@tonic-gate default: 12097c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_ioctl: Invalid ioctl cmd %d, minor %d.", 12107c478bd9Sstevel@tonic-gate cmd, minor); 12117c478bd9Sstevel@tonic-gate return (EINVAL); 12127c478bd9Sstevel@tonic-gate } 12137c478bd9Sstevel@tonic-gate 12147c478bd9Sstevel@tonic-gate return (error); 12157c478bd9Sstevel@tonic-gate } 12167c478bd9Sstevel@tonic-gate 12177c478bd9Sstevel@tonic-gate 12187c478bd9Sstevel@tonic-gate /* ************************************************************************ */ 12197c478bd9Sstevel@tonic-gate 12207c478bd9Sstevel@tonic-gate /* 12217c478bd9Sstevel@tonic-gate * Translation Table Routines 12227c478bd9Sstevel@tonic-gate * 12237c478bd9Sstevel@tonic-gate * These support routines implement a simple doubly linked list 12247c478bd9Sstevel@tonic-gate * to keep track of chunks that are currently in memory. The maximum 12257c478bd9Sstevel@tonic-gate * size of the list is determined by the fssnap_max_mem_chunks variable. 12267c478bd9Sstevel@tonic-gate * The cmap_rwlock is used to protect the linkage of the list. 12277c478bd9Sstevel@tonic-gate */ 12287c478bd9Sstevel@tonic-gate 12297c478bd9Sstevel@tonic-gate /* 12307c478bd9Sstevel@tonic-gate * transtbl_add() - add a node to the translation table 12317c478bd9Sstevel@tonic-gate * 12327c478bd9Sstevel@tonic-gate * allocates a new node and points it at the buffer passed in. The node 12337c478bd9Sstevel@tonic-gate * is added to the beginning of the doubly linked list and the head of 12347c478bd9Sstevel@tonic-gate * the list is moved. The cmap_rwlock must be held as a writer through 12357c478bd9Sstevel@tonic-gate * this operation. 12367c478bd9Sstevel@tonic-gate */ 12377c478bd9Sstevel@tonic-gate static cow_map_node_t * 12387c478bd9Sstevel@tonic-gate transtbl_add(cow_map_t *cmap, chunknumber_t chunk, caddr_t buf) 12397c478bd9Sstevel@tonic-gate { 12407c478bd9Sstevel@tonic-gate cow_map_node_t *cmnode; 12417c478bd9Sstevel@tonic-gate 12427c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&cmap->cmap_rwlock)); 12437c478bd9Sstevel@tonic-gate 12447c478bd9Sstevel@tonic-gate cmnode = kmem_alloc(sizeof (cow_map_node_t), KM_SLEEP); 12457c478bd9Sstevel@tonic-gate 12467c478bd9Sstevel@tonic-gate /* 12477c478bd9Sstevel@tonic-gate * insert new translations at the beginning so cmn_table is always 12487c478bd9Sstevel@tonic-gate * the first node. 12497c478bd9Sstevel@tonic-gate */ 12507c478bd9Sstevel@tonic-gate cmnode->cmn_chunk = chunk; 12517c478bd9Sstevel@tonic-gate cmnode->cmn_buf = buf; 12527c478bd9Sstevel@tonic-gate cmnode->cmn_prev = NULL; 12537c478bd9Sstevel@tonic-gate cmnode->cmn_next = cmap->cmap_table; 12547c478bd9Sstevel@tonic-gate if (cmnode->cmn_next) 12557c478bd9Sstevel@tonic-gate cmnode->cmn_next->cmn_prev = cmnode; 12567c478bd9Sstevel@tonic-gate cmap->cmap_table = cmnode; 12577c478bd9Sstevel@tonic-gate 12587c478bd9Sstevel@tonic-gate return (cmnode); 12597c478bd9Sstevel@tonic-gate } 12607c478bd9Sstevel@tonic-gate 12617c478bd9Sstevel@tonic-gate /* 12627c478bd9Sstevel@tonic-gate * transtbl_get() - look up a node in the translation table 12637c478bd9Sstevel@tonic-gate * 12647c478bd9Sstevel@tonic-gate * called by the snapshot driver to find data that has been translated. 12657c478bd9Sstevel@tonic-gate * The lookup is done by the chunk number, and the node is returned. 12667c478bd9Sstevel@tonic-gate * If the node was not found, NULL is returned. 12677c478bd9Sstevel@tonic-gate */ 12687c478bd9Sstevel@tonic-gate static cow_map_node_t * 12697c478bd9Sstevel@tonic-gate transtbl_get(cow_map_t *cmap, chunknumber_t chunk) 12707c478bd9Sstevel@tonic-gate { 12717c478bd9Sstevel@tonic-gate cow_map_node_t *cmn; 12727c478bd9Sstevel@tonic-gate 12737c478bd9Sstevel@tonic-gate ASSERT(RW_READ_HELD(&cmap->cmap_rwlock)); 12747c478bd9Sstevel@tonic-gate ASSERT(cmap); 12757c478bd9Sstevel@tonic-gate 12767c478bd9Sstevel@tonic-gate /* search the translation table */ 12777c478bd9Sstevel@tonic-gate for (cmn = cmap->cmap_table; cmn != NULL; cmn = cmn->cmn_next) { 12787c478bd9Sstevel@tonic-gate if (cmn->cmn_chunk == chunk) 12797c478bd9Sstevel@tonic-gate return (cmn); 12807c478bd9Sstevel@tonic-gate } 12817c478bd9Sstevel@tonic-gate 12827c478bd9Sstevel@tonic-gate /* not found */ 12837c478bd9Sstevel@tonic-gate return (NULL); 12847c478bd9Sstevel@tonic-gate } 12857c478bd9Sstevel@tonic-gate 12867c478bd9Sstevel@tonic-gate /* 12877c478bd9Sstevel@tonic-gate * transtbl_delete() - delete a node from the translation table 12887c478bd9Sstevel@tonic-gate * 12897c478bd9Sstevel@tonic-gate * called when a node's data has been written out to disk. The 12907c478bd9Sstevel@tonic-gate * cmap_rwlock must be held as a writer for this operation. If the node 12917c478bd9Sstevel@tonic-gate * being deleted is the head of the list, then the head is moved to the 12927c478bd9Sstevel@tonic-gate * next node. Both the node's data and the node itself are freed. 12937c478bd9Sstevel@tonic-gate */ 12947c478bd9Sstevel@tonic-gate static void 12957c478bd9Sstevel@tonic-gate transtbl_delete(cow_map_t *cmap, cow_map_node_t *cmn) 12967c478bd9Sstevel@tonic-gate { 12977c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&cmap->cmap_rwlock)); 12987c478bd9Sstevel@tonic-gate ASSERT(cmn); 12997c478bd9Sstevel@tonic-gate ASSERT(cmap->cmap_table); 13007c478bd9Sstevel@tonic-gate 13017c478bd9Sstevel@tonic-gate /* if the head of the list is being deleted, then move the head up */ 13027c478bd9Sstevel@tonic-gate if (cmap->cmap_table == cmn) { 13037c478bd9Sstevel@tonic-gate ASSERT(cmn->cmn_prev == NULL); 13047c478bd9Sstevel@tonic-gate cmap->cmap_table = cmn->cmn_next; 13057c478bd9Sstevel@tonic-gate } 13067c478bd9Sstevel@tonic-gate 13077c478bd9Sstevel@tonic-gate 13087c478bd9Sstevel@tonic-gate /* make previous node's next pointer skip over current node */ 13097c478bd9Sstevel@tonic-gate if (cmn->cmn_prev != NULL) { 13107c478bd9Sstevel@tonic-gate ASSERT(cmn->cmn_prev->cmn_next == cmn); 13117c478bd9Sstevel@tonic-gate cmn->cmn_prev->cmn_next = cmn->cmn_next; 13127c478bd9Sstevel@tonic-gate } 13137c478bd9Sstevel@tonic-gate 13147c478bd9Sstevel@tonic-gate /* make next node's previous pointer skip over current node */ 13157c478bd9Sstevel@tonic-gate if (cmn->cmn_next != NULL) { 13167c478bd9Sstevel@tonic-gate ASSERT(cmn->cmn_next->cmn_prev == cmn); 13177c478bd9Sstevel@tonic-gate cmn->cmn_next->cmn_prev = cmn->cmn_prev; 13187c478bd9Sstevel@tonic-gate } 13197c478bd9Sstevel@tonic-gate 13207c478bd9Sstevel@tonic-gate /* free the data and the node */ 13217c478bd9Sstevel@tonic-gate ASSERT(cmn->cmn_buf); 13227c478bd9Sstevel@tonic-gate kmem_free(cmn->cmn_buf, cmap->cmap_chunksz); 13237c478bd9Sstevel@tonic-gate kmem_free(cmn, sizeof (cow_map_node_t)); 13247c478bd9Sstevel@tonic-gate } 13257c478bd9Sstevel@tonic-gate 13267c478bd9Sstevel@tonic-gate /* 13277c478bd9Sstevel@tonic-gate * transtbl_free() - free the entire translation table 13287c478bd9Sstevel@tonic-gate * 13297c478bd9Sstevel@tonic-gate * called when the snapshot is deleted. This frees all of the nodes in 13307c478bd9Sstevel@tonic-gate * the translation table (but not the bitmaps). 13317c478bd9Sstevel@tonic-gate */ 13327c478bd9Sstevel@tonic-gate static void 13337c478bd9Sstevel@tonic-gate transtbl_free(cow_map_t *cmap) 13347c478bd9Sstevel@tonic-gate { 13357c478bd9Sstevel@tonic-gate cow_map_node_t *curnode; 13367c478bd9Sstevel@tonic-gate cow_map_node_t *tempnode; 13377c478bd9Sstevel@tonic-gate 13387c478bd9Sstevel@tonic-gate for (curnode = cmap->cmap_table; curnode != NULL; curnode = tempnode) { 13397c478bd9Sstevel@tonic-gate tempnode = curnode->cmn_next; 13407c478bd9Sstevel@tonic-gate 13417c478bd9Sstevel@tonic-gate kmem_free(curnode->cmn_buf, cmap->cmap_chunksz); 13427c478bd9Sstevel@tonic-gate kmem_free(curnode, sizeof (cow_map_node_t)); 13437c478bd9Sstevel@tonic-gate } 13447c478bd9Sstevel@tonic-gate } 13457c478bd9Sstevel@tonic-gate 13467c478bd9Sstevel@tonic-gate 13477c478bd9Sstevel@tonic-gate /* ************************************************************************ */ 13487c478bd9Sstevel@tonic-gate 13497c478bd9Sstevel@tonic-gate /* 13507c478bd9Sstevel@tonic-gate * Interface Implementation Routines 13517c478bd9Sstevel@tonic-gate * 13527c478bd9Sstevel@tonic-gate * The following functions implement snapshot interface routines that are 13537c478bd9Sstevel@tonic-gate * called by the file system to create, delete, and use a snapshot. The 13547c478bd9Sstevel@tonic-gate * interfaces are defined in fssnap_if.c and are filled in by this driver 13557c478bd9Sstevel@tonic-gate * when it is loaded. This technique allows the file system to depend on 13567c478bd9Sstevel@tonic-gate * the interface module without having to load the full implementation and 13577c478bd9Sstevel@tonic-gate * snapshot device drivers. 13587c478bd9Sstevel@tonic-gate */ 13597c478bd9Sstevel@tonic-gate 13607c478bd9Sstevel@tonic-gate /* 13617c478bd9Sstevel@tonic-gate * fssnap_strategy_impl() - strategy routine called by the file system 13627c478bd9Sstevel@tonic-gate * 13637c478bd9Sstevel@tonic-gate * called by the file system to handle copy-on-write when necessary. All 13647c478bd9Sstevel@tonic-gate * reads and writes that the file system performs should go through this 13657c478bd9Sstevel@tonic-gate * function. If the file system calls the underlying device's strategy 13667c478bd9Sstevel@tonic-gate * routine without going through fssnap_strategy() (eg. by calling 13677c478bd9Sstevel@tonic-gate * bdev_strategy()), the snapshot may not be consistent. 13687c478bd9Sstevel@tonic-gate * 13697c478bd9Sstevel@tonic-gate * This function starts by doing significant sanity checking to insure 13707c478bd9Sstevel@tonic-gate * the snapshot was not deleted out from under it or deleted and then 13717c478bd9Sstevel@tonic-gate * recreated. To do this, it checks the actual pointer passed into it 13727c478bd9Sstevel@tonic-gate * (ie. the handle held by the file system). NOTE that the parameter is 13737c478bd9Sstevel@tonic-gate * a POINTER TO A POINTER to the snapshot id. Once the snapshot id is 13747c478bd9Sstevel@tonic-gate * locked, it knows things are ok and that this snapshot is really for 13757c478bd9Sstevel@tonic-gate * this file system. 13767c478bd9Sstevel@tonic-gate * 13777c478bd9Sstevel@tonic-gate * If the request is a write, fssnap_translate() is called to determine 13787c478bd9Sstevel@tonic-gate * whether a copy-on-write is required. If it is a read, the read is 13797c478bd9Sstevel@tonic-gate * simply passed on to the underlying device. 13807c478bd9Sstevel@tonic-gate */ 13817c478bd9Sstevel@tonic-gate static void 13827c478bd9Sstevel@tonic-gate fssnap_strategy_impl(void *snapshot_id, buf_t *bp) 13837c478bd9Sstevel@tonic-gate { 13847c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp; 13857c478bd9Sstevel@tonic-gate struct snapshot_id *sidp; 13867c478bd9Sstevel@tonic-gate int error; 13877c478bd9Sstevel@tonic-gate 13887c478bd9Sstevel@tonic-gate /* read requests are always passed through */ 13897c478bd9Sstevel@tonic-gate if (bp->b_flags & B_READ) { 13907c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 13917c478bd9Sstevel@tonic-gate return; 13927c478bd9Sstevel@tonic-gate } 13937c478bd9Sstevel@tonic-gate 13947c478bd9Sstevel@tonic-gate /* 13957c478bd9Sstevel@tonic-gate * Because we were not able to take the snapshot read lock BEFORE 13967c478bd9Sstevel@tonic-gate * checking for a snapshot back in the file system, things may have 13977c478bd9Sstevel@tonic-gate * drastically changed out from under us. For instance, the snapshot 13987c478bd9Sstevel@tonic-gate * may have been deleted, deleted and recreated, or worse yet, deleted 13997c478bd9Sstevel@tonic-gate * for this file system but now the snapshot number is in use by another 14007c478bd9Sstevel@tonic-gate * file system. 14017c478bd9Sstevel@tonic-gate * 14027c478bd9Sstevel@tonic-gate * Having a pointer to the file system's snapshot id pointer allows us 14037c478bd9Sstevel@tonic-gate * to sanity check most of this, though it assumes the file system is 14047c478bd9Sstevel@tonic-gate * keeping track of a pointer to the snapshot_id somewhere. 14057c478bd9Sstevel@tonic-gate */ 14067c478bd9Sstevel@tonic-gate sidpp = (struct snapshot_id **)snapshot_id; 14077c478bd9Sstevel@tonic-gate sidp = *sidpp; 14087c478bd9Sstevel@tonic-gate 14097c478bd9Sstevel@tonic-gate /* 14107c478bd9Sstevel@tonic-gate * if this file system's snapshot was disabled, just pass the 14117c478bd9Sstevel@tonic-gate * request through. 14127c478bd9Sstevel@tonic-gate */ 14137c478bd9Sstevel@tonic-gate if (sidp == NULL) { 14147c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 14157c478bd9Sstevel@tonic-gate return; 14167c478bd9Sstevel@tonic-gate } 14177c478bd9Sstevel@tonic-gate 14187c478bd9Sstevel@tonic-gate /* 14197c478bd9Sstevel@tonic-gate * Once we have the reader lock the snapshot will not magically go 14207c478bd9Sstevel@tonic-gate * away. But things may have changed on us before this so double check. 14217c478bd9Sstevel@tonic-gate */ 14227c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 14237c478bd9Sstevel@tonic-gate 14247c478bd9Sstevel@tonic-gate /* 14257c478bd9Sstevel@tonic-gate * if an error was founds somewhere the DELETE flag will be 14267c478bd9Sstevel@tonic-gate * set to indicate the snapshot should be deleted and no new 14277c478bd9Sstevel@tonic-gate * translations should occur. 14287c478bd9Sstevel@tonic-gate */ 14297c478bd9Sstevel@tonic-gate if (sidp->sid_flags & SID_DELETE) { 14307c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 14317c478bd9Sstevel@tonic-gate (void) fssnap_delete_impl(sidpp); 14327c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 14337c478bd9Sstevel@tonic-gate return; 14347c478bd9Sstevel@tonic-gate } 14357c478bd9Sstevel@tonic-gate 14367c478bd9Sstevel@tonic-gate /* 14377c478bd9Sstevel@tonic-gate * If the file system is no longer pointing to the snapshot we were 14387c478bd9Sstevel@tonic-gate * called with, then it should not attempt to translate this buffer as 14397c478bd9Sstevel@tonic-gate * it may be going to a snapshot for a different file system. 14407c478bd9Sstevel@tonic-gate * Even if the file system snapshot pointer is still the same, the 14417c478bd9Sstevel@tonic-gate * snapshot may have been disabled before we got the reader lock. 14427c478bd9Sstevel@tonic-gate */ 14437c478bd9Sstevel@tonic-gate if (sidp != *sidpp || SID_INACTIVE(sidp)) { 14447c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 14457c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 14467c478bd9Sstevel@tonic-gate return; 14477c478bd9Sstevel@tonic-gate } 14487c478bd9Sstevel@tonic-gate 14497c478bd9Sstevel@tonic-gate /* 14507c478bd9Sstevel@tonic-gate * At this point we're sure the snapshot will not go away while the 14517c478bd9Sstevel@tonic-gate * reader lock is held, and we are reasonably certain that we are 14527c478bd9Sstevel@tonic-gate * writing to the correct snapshot. 14537c478bd9Sstevel@tonic-gate */ 14547c478bd9Sstevel@tonic-gate if ((error = fssnap_translate(sidpp, bp)) != 0) { 14557c478bd9Sstevel@tonic-gate /* 14567c478bd9Sstevel@tonic-gate * fssnap_translate can release the reader lock if it 14577c478bd9Sstevel@tonic-gate * has to wait for a semaphore. In this case it is possible 14587c478bd9Sstevel@tonic-gate * for the snapshot to be deleted in this time frame. If this 14597c478bd9Sstevel@tonic-gate * happens just sent the buf thru to the filesystems device. 14607c478bd9Sstevel@tonic-gate */ 14617c478bd9Sstevel@tonic-gate if (sidp != *sidpp || SID_INACTIVE(sidp)) { 14627c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 14637c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 14647c478bd9Sstevel@tonic-gate return; 14657c478bd9Sstevel@tonic-gate } 14667c478bd9Sstevel@tonic-gate bioerror(bp, error); 14677c478bd9Sstevel@tonic-gate biodone(bp); 14687c478bd9Sstevel@tonic-gate } 14697c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 14707c478bd9Sstevel@tonic-gate } 14717c478bd9Sstevel@tonic-gate 14727c478bd9Sstevel@tonic-gate /* 14737c478bd9Sstevel@tonic-gate * fssnap_translate() - helper function for fssnap_strategy() 14747c478bd9Sstevel@tonic-gate * 14757c478bd9Sstevel@tonic-gate * performs the actual copy-on-write for write requests, if required. 14767c478bd9Sstevel@tonic-gate * This function does the real work of the file system side of things. 14777c478bd9Sstevel@tonic-gate * 14787c478bd9Sstevel@tonic-gate * It first checks the candidate bitmap to quickly determine whether any 14797c478bd9Sstevel@tonic-gate * action is necessary. If the candidate bitmap indicates the chunk was 14807c478bd9Sstevel@tonic-gate * allocated when the snapshot was created, then it checks to see whether 14817c478bd9Sstevel@tonic-gate * a translation already exists. If a translation already exists then no 14827c478bd9Sstevel@tonic-gate * action is required. If the chunk is a candidate for copy-on-write, 14837c478bd9Sstevel@tonic-gate * and a translation does not already exist, then the chunk is read in 14847c478bd9Sstevel@tonic-gate * and a node is added to the translation table. 14857c478bd9Sstevel@tonic-gate * 14867c478bd9Sstevel@tonic-gate * Once all of the chunks in the request range have been copied (if they 14877c478bd9Sstevel@tonic-gate * needed to be), then the original request can be satisfied and the old 14887c478bd9Sstevel@tonic-gate * data can be overwritten. 14897c478bd9Sstevel@tonic-gate */ 14907c478bd9Sstevel@tonic-gate static int 14917c478bd9Sstevel@tonic-gate fssnap_translate(struct snapshot_id **sidpp, struct buf *wbp) 14927c478bd9Sstevel@tonic-gate { 14937c478bd9Sstevel@tonic-gate snapshot_id_t *sidp = *sidpp; 14947c478bd9Sstevel@tonic-gate struct buf *oldbp; /* buffer to store old data in */ 14957c478bd9Sstevel@tonic-gate struct cow_info *cowp = sidp->sid_cowinfo; 14967c478bd9Sstevel@tonic-gate cow_map_t *cmap = &cowp->cow_map; 14977c478bd9Sstevel@tonic-gate cow_map_node_t *cmn; 14987c478bd9Sstevel@tonic-gate chunknumber_t cowchunk, startchunk, endchunk; 14997c478bd9Sstevel@tonic-gate int error; 15007c478bd9Sstevel@tonic-gate int throttle_write = 0; 15017c478bd9Sstevel@tonic-gate 15027c478bd9Sstevel@tonic-gate /* make sure the snapshot is active */ 15037c478bd9Sstevel@tonic-gate ASSERT(RW_READ_HELD(&sidp->sid_rwlock)); 15047c478bd9Sstevel@tonic-gate 15057c478bd9Sstevel@tonic-gate startchunk = dbtocowchunk(cmap, wbp->b_lblkno); 15067c478bd9Sstevel@tonic-gate endchunk = dbtocowchunk(cmap, wbp->b_lblkno + 15077c478bd9Sstevel@tonic-gate ((wbp->b_bcount-1) >> DEV_BSHIFT)); 15087c478bd9Sstevel@tonic-gate 15097c478bd9Sstevel@tonic-gate /* 15107c478bd9Sstevel@tonic-gate * Do not throttle the writes of the fssnap taskq thread and 15117c478bd9Sstevel@tonic-gate * the log roll (trans_roll) thread. Furthermore the writes to 15127c478bd9Sstevel@tonic-gate * the on-disk log are also not subject to throttling. 15137c478bd9Sstevel@tonic-gate * The fssnap_write_taskq thread's write can block on the throttling 15147c478bd9Sstevel@tonic-gate * semaphore which leads to self-deadlock as this same thread 15157c478bd9Sstevel@tonic-gate * releases the throttling semaphore after completing the IO. 15167c478bd9Sstevel@tonic-gate * If the trans_roll thread's write is throttled then we can deadlock 15177c478bd9Sstevel@tonic-gate * because the fssnap_taskq_thread which releases the throttling 15187c478bd9Sstevel@tonic-gate * semaphore can block waiting for log space which can only be 15197c478bd9Sstevel@tonic-gate * released by the trans_roll thread. 15207c478bd9Sstevel@tonic-gate */ 15217c478bd9Sstevel@tonic-gate 15227c478bd9Sstevel@tonic-gate throttle_write = !(taskq_member(cowp->cow_taskq, curthread) || 15237c478bd9Sstevel@tonic-gate tsd_get(bypass_snapshot_throttle_key)); 15247c478bd9Sstevel@tonic-gate 15257c478bd9Sstevel@tonic-gate /* 15267c478bd9Sstevel@tonic-gate * Iterate through all chunks covered by this write and perform the 15277c478bd9Sstevel@tonic-gate * copy-aside if necessary. Once all chunks have been safely 15287c478bd9Sstevel@tonic-gate * stowed away, the new data may be written in a single sweep. 15297c478bd9Sstevel@tonic-gate * 15307c478bd9Sstevel@tonic-gate * For each chunk in the range, the following sequence is performed: 15317c478bd9Sstevel@tonic-gate * - Is the chunk a candidate for translation? 15327c478bd9Sstevel@tonic-gate * o If not, then no translation is necessary, continue 15337c478bd9Sstevel@tonic-gate * - If it is a candidate, then does it already have a translation? 15347c478bd9Sstevel@tonic-gate * o If so, then no translation is necessary, continue 15357c478bd9Sstevel@tonic-gate * - If it is a candidate, but does not yet have a translation, 15367c478bd9Sstevel@tonic-gate * then read the old data and schedule an asynchronous taskq 15377c478bd9Sstevel@tonic-gate * to write the old data to the backing file. 15387c478bd9Sstevel@tonic-gate * 15397c478bd9Sstevel@tonic-gate * Once this has been performed over the entire range of chunks, then 15407c478bd9Sstevel@tonic-gate * it is safe to overwrite the data that is there. 15417c478bd9Sstevel@tonic-gate * 15427c478bd9Sstevel@tonic-gate * Note that no lock is required to check the candidate bitmap because 15437c478bd9Sstevel@tonic-gate * it never changes once the snapshot is created. The reader lock is 15447c478bd9Sstevel@tonic-gate * taken to check the hastrans bitmap since it may change. If it 15457c478bd9Sstevel@tonic-gate * turns out a copy is required, then the lock is upgraded to a 15467c478bd9Sstevel@tonic-gate * writer, and the bitmap is re-checked as it may have changed while 15477c478bd9Sstevel@tonic-gate * the lock was released. Finally, the write lock is held while 15487c478bd9Sstevel@tonic-gate * reading the old data to make sure it is not translated out from 15497c478bd9Sstevel@tonic-gate * under us. 15507c478bd9Sstevel@tonic-gate * 15517c478bd9Sstevel@tonic-gate * This locking mechanism should be sufficient to handle multiple 15527c478bd9Sstevel@tonic-gate * threads writing to overlapping chunks simultaneously. 15537c478bd9Sstevel@tonic-gate */ 15547c478bd9Sstevel@tonic-gate for (cowchunk = startchunk; cowchunk <= endchunk; cowchunk++) { 15557c478bd9Sstevel@tonic-gate /* 15567c478bd9Sstevel@tonic-gate * If the cowchunk is outside of the range of our 15577c478bd9Sstevel@tonic-gate * candidate maps, then simply break out of the 15587c478bd9Sstevel@tonic-gate * loop and pass the I/O through to bdev_strategy. 15597c478bd9Sstevel@tonic-gate * This would occur if the file system has grown 15607c478bd9Sstevel@tonic-gate * larger since the snapshot was taken. 15617c478bd9Sstevel@tonic-gate */ 15627c478bd9Sstevel@tonic-gate if (cowchunk >= (cmap->cmap_bmsize * NBBY)) 15637c478bd9Sstevel@tonic-gate break; 15647c478bd9Sstevel@tonic-gate 15657c478bd9Sstevel@tonic-gate /* 15667c478bd9Sstevel@tonic-gate * If no disk blocks were allocated in this chunk when the 15677c478bd9Sstevel@tonic-gate * snapshot was created then no copy-on-write will be 15687c478bd9Sstevel@tonic-gate * required. Since this bitmap is read-only no locks are 15697c478bd9Sstevel@tonic-gate * necessary. 15707c478bd9Sstevel@tonic-gate */ 15717c478bd9Sstevel@tonic-gate if (isclr(cmap->cmap_candidate, cowchunk)) { 15727c478bd9Sstevel@tonic-gate continue; 15737c478bd9Sstevel@tonic-gate } 15747c478bd9Sstevel@tonic-gate 15757c478bd9Sstevel@tonic-gate /* 15767c478bd9Sstevel@tonic-gate * If a translation already exists, the data can be written 15777c478bd9Sstevel@tonic-gate * through since the old data has already been saved off. 15787c478bd9Sstevel@tonic-gate */ 15797c478bd9Sstevel@tonic-gate if (isset(cmap->cmap_hastrans, cowchunk)) { 15807c478bd9Sstevel@tonic-gate continue; 15817c478bd9Sstevel@tonic-gate } 15827c478bd9Sstevel@tonic-gate 15837c478bd9Sstevel@tonic-gate 15847c478bd9Sstevel@tonic-gate /* 15857c478bd9Sstevel@tonic-gate * Throttle translations if there are too many outstanding 15867c478bd9Sstevel@tonic-gate * chunks in memory. The semaphore is sema_v'd by the taskq. 15877c478bd9Sstevel@tonic-gate * 15887c478bd9Sstevel@tonic-gate * You can't keep the sid_rwlock if you would go to sleep. 15897c478bd9Sstevel@tonic-gate * This will result in deadlock when someone tries to delete 15907c478bd9Sstevel@tonic-gate * the snapshot (wants the sid_rwlock as a writer, but can't 15917c478bd9Sstevel@tonic-gate * get it). 15927c478bd9Sstevel@tonic-gate */ 15937c478bd9Sstevel@tonic-gate if (throttle_write) { 15947c478bd9Sstevel@tonic-gate if (sema_tryp(&cmap->cmap_throttle_sem) == 0) { 15957c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 15967c478bd9Sstevel@tonic-gate atomic_add_32(&cmap->cmap_waiters, 1); 15977c478bd9Sstevel@tonic-gate sema_p(&cmap->cmap_throttle_sem); 15987c478bd9Sstevel@tonic-gate atomic_add_32(&cmap->cmap_waiters, -1); 15997c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 16007c478bd9Sstevel@tonic-gate 16017c478bd9Sstevel@tonic-gate /* 16027c478bd9Sstevel@tonic-gate * Now since we released the sid_rwlock the state may 16037c478bd9Sstevel@tonic-gate * have transitioned underneath us. so check that again. 16047c478bd9Sstevel@tonic-gate */ 16057c478bd9Sstevel@tonic-gate if (sidp != *sidpp || SID_INACTIVE(sidp)) { 16067c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 16077c478bd9Sstevel@tonic-gate return (ENXIO); 16087c478bd9Sstevel@tonic-gate } 16097c478bd9Sstevel@tonic-gate } 16107c478bd9Sstevel@tonic-gate } 16117c478bd9Sstevel@tonic-gate 16127c478bd9Sstevel@tonic-gate /* 16137c478bd9Sstevel@tonic-gate * Acquire the lock as a writer and check to see if a 16147c478bd9Sstevel@tonic-gate * translation has been added in the meantime. 16157c478bd9Sstevel@tonic-gate */ 16167c478bd9Sstevel@tonic-gate rw_enter(&cmap->cmap_rwlock, RW_WRITER); 16177c478bd9Sstevel@tonic-gate if (isset(cmap->cmap_hastrans, cowchunk)) { 16187c478bd9Sstevel@tonic-gate if (throttle_write) 16197c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 16207c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 16217c478bd9Sstevel@tonic-gate continue; /* go to the next chunk */ 16227c478bd9Sstevel@tonic-gate } 16237c478bd9Sstevel@tonic-gate 16247c478bd9Sstevel@tonic-gate /* 16257c478bd9Sstevel@tonic-gate * read a full chunk of data from the requested offset rounded 16267c478bd9Sstevel@tonic-gate * down to the nearest chunk size. 16277c478bd9Sstevel@tonic-gate */ 16287c478bd9Sstevel@tonic-gate oldbp = getrbuf(KM_SLEEP); 16297c478bd9Sstevel@tonic-gate oldbp->b_lblkno = cowchunktodb(cmap, cowchunk); 16307c478bd9Sstevel@tonic-gate oldbp->b_edev = wbp->b_edev; 16317c478bd9Sstevel@tonic-gate oldbp->b_bcount = cmap->cmap_chunksz; 16327c478bd9Sstevel@tonic-gate oldbp->b_bufsize = cmap->cmap_chunksz; 16337c478bd9Sstevel@tonic-gate oldbp->b_iodone = NULL; 16347c478bd9Sstevel@tonic-gate oldbp->b_proc = NULL; 16357c478bd9Sstevel@tonic-gate oldbp->b_flags = B_READ; 16367c478bd9Sstevel@tonic-gate oldbp->b_un.b_addr = kmem_alloc(cmap->cmap_chunksz, KM_SLEEP); 16377c478bd9Sstevel@tonic-gate 16387c478bd9Sstevel@tonic-gate (void) bdev_strategy(oldbp); 16397c478bd9Sstevel@tonic-gate (void) biowait(oldbp); 16407c478bd9Sstevel@tonic-gate 16417c478bd9Sstevel@tonic-gate /* 16427c478bd9Sstevel@tonic-gate * It's ok to bail in the middle of translating the range 16437c478bd9Sstevel@tonic-gate * because the extra copy-asides will not hurt anything 16447c478bd9Sstevel@tonic-gate * (except by using extra space in the backing store). 16457c478bd9Sstevel@tonic-gate */ 16467c478bd9Sstevel@tonic-gate if ((error = geterror(oldbp)) != 0) { 16477c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_translate: error reading " 16487c478bd9Sstevel@tonic-gate "old data for snapshot %d, chunk %llu, disk block " 16497c478bd9Sstevel@tonic-gate "%lld, size %lu, error %d.", sidp->sid_snapnumber, 16507c478bd9Sstevel@tonic-gate cowchunk, oldbp->b_lblkno, oldbp->b_bcount, error); 16517c478bd9Sstevel@tonic-gate kmem_free(oldbp->b_un.b_addr, cmap->cmap_chunksz); 16527c478bd9Sstevel@tonic-gate freerbuf(oldbp); 16537c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 16547c478bd9Sstevel@tonic-gate if (throttle_write) 16557c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 16567c478bd9Sstevel@tonic-gate return (error); 16577c478bd9Sstevel@tonic-gate } 16587c478bd9Sstevel@tonic-gate 16597c478bd9Sstevel@tonic-gate /* 16607c478bd9Sstevel@tonic-gate * add the node to the translation table and save a reference 16617c478bd9Sstevel@tonic-gate * to pass to the taskq for writing out to the backing file 16627c478bd9Sstevel@tonic-gate */ 16637c478bd9Sstevel@tonic-gate cmn = transtbl_add(cmap, cowchunk, oldbp->b_un.b_addr); 16647c478bd9Sstevel@tonic-gate freerbuf(oldbp); 16657c478bd9Sstevel@tonic-gate 16667c478bd9Sstevel@tonic-gate /* 16677c478bd9Sstevel@tonic-gate * Add a reference to the snapshot id so the lower level 16687c478bd9Sstevel@tonic-gate * processing (ie. the taskq) can get back to the state 16697c478bd9Sstevel@tonic-gate * information. 16707c478bd9Sstevel@tonic-gate */ 16717c478bd9Sstevel@tonic-gate cmn->cmn_sid = sidp; 16727c478bd9Sstevel@tonic-gate cmn->release_sem = throttle_write; 16737c478bd9Sstevel@tonic-gate setbit(cmap->cmap_hastrans, cowchunk); 16747c478bd9Sstevel@tonic-gate 16757c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 16767c478bd9Sstevel@tonic-gate 16777c478bd9Sstevel@tonic-gate /* 16787c478bd9Sstevel@tonic-gate * schedule the asynchronous write to the backing file 16797c478bd9Sstevel@tonic-gate */ 16807c478bd9Sstevel@tonic-gate if (cowp->cow_backfile_array != NULL) 16817c478bd9Sstevel@tonic-gate (void) taskq_dispatch(cowp->cow_taskq, 16827c478bd9Sstevel@tonic-gate fssnap_write_taskq, cmn, TQ_SLEEP); 16837c478bd9Sstevel@tonic-gate } 16847c478bd9Sstevel@tonic-gate 16857c478bd9Sstevel@tonic-gate /* 16867c478bd9Sstevel@tonic-gate * Write new data in place of the old data. At this point all of the 16877c478bd9Sstevel@tonic-gate * chunks touched by this write have been copied aside and so the new 16887c478bd9Sstevel@tonic-gate * data can be written out all at once. 16897c478bd9Sstevel@tonic-gate */ 16907c478bd9Sstevel@tonic-gate (void) bdev_strategy(wbp); 16917c478bd9Sstevel@tonic-gate 16927c478bd9Sstevel@tonic-gate return (0); 16937c478bd9Sstevel@tonic-gate } 16947c478bd9Sstevel@tonic-gate 16957c478bd9Sstevel@tonic-gate /* 16967c478bd9Sstevel@tonic-gate * fssnap_write_taskq() - write in-memory translations to the backing file 16977c478bd9Sstevel@tonic-gate * 16987c478bd9Sstevel@tonic-gate * writes in-memory translations to the backing file asynchronously. A 16997c478bd9Sstevel@tonic-gate * task is dispatched each time a new translation is created. The task 17007c478bd9Sstevel@tonic-gate * writes the data to the backing file and removes it from the memory 17017c478bd9Sstevel@tonic-gate * list. The throttling semaphore is released only if the particular 17027c478bd9Sstevel@tonic-gate * translation was throttled in fssnap_translate. 17037c478bd9Sstevel@tonic-gate */ 17047c478bd9Sstevel@tonic-gate static void 17057c478bd9Sstevel@tonic-gate fssnap_write_taskq(void *arg) 17067c478bd9Sstevel@tonic-gate { 17077c478bd9Sstevel@tonic-gate cow_map_node_t *cmn = (cow_map_node_t *)arg; 17087c478bd9Sstevel@tonic-gate snapshot_id_t *sidp = cmn->cmn_sid; 17097c478bd9Sstevel@tonic-gate cow_info_t *cowp = sidp->sid_cowinfo; 17107c478bd9Sstevel@tonic-gate cow_map_t *cmap = &cowp->cow_map; 17117c478bd9Sstevel@tonic-gate int error; 17127c478bd9Sstevel@tonic-gate int bf_index; 17137c478bd9Sstevel@tonic-gate int release_sem = cmn->release_sem; 17147c478bd9Sstevel@tonic-gate 17157c478bd9Sstevel@tonic-gate /* 17167c478bd9Sstevel@tonic-gate * The sid_rwlock does not need to be held here because the taskqs 17177c478bd9Sstevel@tonic-gate * are destroyed explicitly by fssnap_delete (with the sid_rwlock 17187c478bd9Sstevel@tonic-gate * held as a writer). taskq_destroy() will flush all of the tasks 17197c478bd9Sstevel@tonic-gate * out before fssnap_delete frees up all of the structures. 17207c478bd9Sstevel@tonic-gate */ 17217c478bd9Sstevel@tonic-gate 17227c478bd9Sstevel@tonic-gate /* if the snapshot was disabled from under us, drop the request. */ 17237c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 17247c478bd9Sstevel@tonic-gate if (SID_INACTIVE(sidp)) { 17257c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 17267c478bd9Sstevel@tonic-gate if (release_sem) 17277c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 17287c478bd9Sstevel@tonic-gate return; 17297c478bd9Sstevel@tonic-gate } 17307c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 17317c478bd9Sstevel@tonic-gate 17327c478bd9Sstevel@tonic-gate atomic_add_64((uint64_t *)&cmap->cmap_nchunks, 1); 17337c478bd9Sstevel@tonic-gate 17347c478bd9Sstevel@tonic-gate if ((cmap->cmap_maxsize != 0) && 17357c478bd9Sstevel@tonic-gate ((cmap->cmap_nchunks * cmap->cmap_chunksz) > cmap->cmap_maxsize)) { 17367c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_write_taskq: snapshot %d (%s) has " 17377c478bd9Sstevel@tonic-gate "reached the maximum backing file size specified (%llu " 17387c478bd9Sstevel@tonic-gate "bytes) and will be deleted.", sidp->sid_snapnumber, 17397c478bd9Sstevel@tonic-gate (char *)cowp->cow_kstat_mntpt->ks_data, 17407c478bd9Sstevel@tonic-gate cmap->cmap_maxsize); 17417c478bd9Sstevel@tonic-gate if (release_sem) 17427c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 17437c478bd9Sstevel@tonic-gate atomic_or_uint(&sidp->sid_flags, SID_DELETE); 17447c478bd9Sstevel@tonic-gate return; 17457c478bd9Sstevel@tonic-gate } 17467c478bd9Sstevel@tonic-gate 17477c478bd9Sstevel@tonic-gate /* perform the write */ 17487c478bd9Sstevel@tonic-gate bf_index = cmn->cmn_chunk / cmap->cmap_chunksperbf; 17497c478bd9Sstevel@tonic-gate 17507c478bd9Sstevel@tonic-gate if (error = vn_rdwr(UIO_WRITE, (cowp->cow_backfile_array)[bf_index], 17517c478bd9Sstevel@tonic-gate cmn->cmn_buf, cmap->cmap_chunksz, 17527c478bd9Sstevel@tonic-gate (cmn->cmn_chunk % cmap->cmap_chunksperbf) * cmap->cmap_chunksz, 17537c478bd9Sstevel@tonic-gate UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, (ssize_t *)NULL)) { 17547c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_write_taskq: error writing to " 17557c478bd9Sstevel@tonic-gate "backing file. DELETING SNAPSHOT %d, backing file path " 17567c478bd9Sstevel@tonic-gate "%s, offset %llu bytes, error %d.", sidp->sid_snapnumber, 17577c478bd9Sstevel@tonic-gate (char *)cowp->cow_kstat_bfname->ks_data, 17587c478bd9Sstevel@tonic-gate cmn->cmn_chunk * cmap->cmap_chunksz, error); 17597c478bd9Sstevel@tonic-gate if (release_sem) 17607c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 17617c478bd9Sstevel@tonic-gate atomic_or_uint(&sidp->sid_flags, SID_DELETE); 17627c478bd9Sstevel@tonic-gate return; 17637c478bd9Sstevel@tonic-gate } 17647c478bd9Sstevel@tonic-gate 17657c478bd9Sstevel@tonic-gate /* 17667c478bd9Sstevel@tonic-gate * now remove the node and buffer from memory 17677c478bd9Sstevel@tonic-gate */ 17687c478bd9Sstevel@tonic-gate rw_enter(&cmap->cmap_rwlock, RW_WRITER); 17697c478bd9Sstevel@tonic-gate transtbl_delete(cmap, cmn); 17707c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 17717c478bd9Sstevel@tonic-gate 17727c478bd9Sstevel@tonic-gate /* Allow more translations */ 17737c478bd9Sstevel@tonic-gate if (release_sem) 17747c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 17757c478bd9Sstevel@tonic-gate 17767c478bd9Sstevel@tonic-gate } 17777c478bd9Sstevel@tonic-gate 17787c478bd9Sstevel@tonic-gate /* 17797c478bd9Sstevel@tonic-gate * fssnap_create_impl() - called from the file system to create a new snapshot 17807c478bd9Sstevel@tonic-gate * 17817c478bd9Sstevel@tonic-gate * allocates and initializes the structures needed for a new snapshot. 17827c478bd9Sstevel@tonic-gate * This is called by the file system when it receives an ioctl request to 17837c478bd9Sstevel@tonic-gate * create a new snapshot. An unused snapshot identifier is either found 17847c478bd9Sstevel@tonic-gate * or created, and eventually returned as the opaque handle the file 17857c478bd9Sstevel@tonic-gate * system will use to identify this snapshot. The snapshot number 17867c478bd9Sstevel@tonic-gate * associated with the snapshot identifier is the same as the minor 17877c478bd9Sstevel@tonic-gate * number for the snapshot device that is used to access that snapshot. 17887c478bd9Sstevel@tonic-gate * 17897c478bd9Sstevel@tonic-gate * The snapshot can not be used until the candidate bitmap is populated 17907c478bd9Sstevel@tonic-gate * by the file system (see fssnap_set_candidate_impl()), and the file 17917c478bd9Sstevel@tonic-gate * system finishes the setup process by calling fssnap_create_done(). 17927c478bd9Sstevel@tonic-gate * Nearly all of the snapshot locks are held for the duration of the 17937c478bd9Sstevel@tonic-gate * create, and are not released until fssnap_create_done is called(). 17947c478bd9Sstevel@tonic-gate */ 17957c478bd9Sstevel@tonic-gate static void * 17967c478bd9Sstevel@tonic-gate fssnap_create_impl(chunknumber_t nchunks, uint_t chunksz, u_offset_t maxsize, 17977c478bd9Sstevel@tonic-gate struct vnode *fsvp, int backfilecount, struct vnode **bfvpp, char *backpath, 17987c478bd9Sstevel@tonic-gate u_offset_t max_backfile_size) 17997c478bd9Sstevel@tonic-gate { 18007c478bd9Sstevel@tonic-gate refstr_t *mountpoint; 18017c478bd9Sstevel@tonic-gate char taskqname[50]; 18027c478bd9Sstevel@tonic-gate struct cow_info *cowp; 18037c478bd9Sstevel@tonic-gate struct cow_map *cmap; 18047c478bd9Sstevel@tonic-gate struct snapshot_id *sidp; 18057c478bd9Sstevel@tonic-gate int lastsnap; 18067c478bd9Sstevel@tonic-gate 18077c478bd9Sstevel@tonic-gate /* 18087c478bd9Sstevel@tonic-gate * Sanity check the parameters we care about 18097c478bd9Sstevel@tonic-gate * (we don't care about the informational parameters) 18107c478bd9Sstevel@tonic-gate */ 18117c478bd9Sstevel@tonic-gate if ((nchunks == 0) || 18127c478bd9Sstevel@tonic-gate ((chunksz % DEV_BSIZE) != 0) || 18137c478bd9Sstevel@tonic-gate (bfvpp == NULL)) { 18147c478bd9Sstevel@tonic-gate return (NULL); 18157c478bd9Sstevel@tonic-gate } 18167c478bd9Sstevel@tonic-gate 18177c478bd9Sstevel@tonic-gate /* 18187c478bd9Sstevel@tonic-gate * Look for unused snapshot identifiers. Snapshot ids are never 18197c478bd9Sstevel@tonic-gate * freed, but deleted snapshot ids will be recycled as needed. 18207c478bd9Sstevel@tonic-gate */ 18217c478bd9Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 18227c478bd9Sstevel@tonic-gate 18237c478bd9Sstevel@tonic-gate findagain: 18247c478bd9Sstevel@tonic-gate lastsnap = 0; 18257c478bd9Sstevel@tonic-gate for (sidp = snapshot; sidp != NULL; sidp = sidp->sid_next) { 18267c478bd9Sstevel@tonic-gate if (sidp->sid_snapnumber > lastsnap) 18277c478bd9Sstevel@tonic-gate lastsnap = sidp->sid_snapnumber; 18287c478bd9Sstevel@tonic-gate 18297c478bd9Sstevel@tonic-gate /* 18307c478bd9Sstevel@tonic-gate * The sid_rwlock is taken as a reader initially so that 18317c478bd9Sstevel@tonic-gate * activity on each snapshot is not stalled while searching 18327c478bd9Sstevel@tonic-gate * for a free snapshot id. 18337c478bd9Sstevel@tonic-gate */ 18347c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 18357c478bd9Sstevel@tonic-gate 18367c478bd9Sstevel@tonic-gate /* 18377c478bd9Sstevel@tonic-gate * If the snapshot has been deleted and nobody is using the 18387c478bd9Sstevel@tonic-gate * snapshot device than we can reuse this snapshot_id. If 18397c478bd9Sstevel@tonic-gate * the snapshot is marked to be deleted (SID_DELETE), then 18407c478bd9Sstevel@tonic-gate * it hasn't been deleted yet so don't reuse it. 18417c478bd9Sstevel@tonic-gate */ 18427c478bd9Sstevel@tonic-gate if (SID_AVAILABLE(sidp)) 18437c478bd9Sstevel@tonic-gate break; /* This spot is unused, so take it */ 18447c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 18457c478bd9Sstevel@tonic-gate } 18467c478bd9Sstevel@tonic-gate 18477c478bd9Sstevel@tonic-gate /* 18487c478bd9Sstevel@tonic-gate * add a new snapshot identifier if there are no deleted 18497c478bd9Sstevel@tonic-gate * entries. Since it doesn't matter what order the entries 18507c478bd9Sstevel@tonic-gate * are in we can just add it to the beginning of the list. 18517c478bd9Sstevel@tonic-gate */ 18527c478bd9Sstevel@tonic-gate if (sidp) { 18537c478bd9Sstevel@tonic-gate if (rw_tryupgrade(&sidp->sid_rwlock) == 0) { 18547c478bd9Sstevel@tonic-gate /* someone else grabbed it as a writer, try again */ 18557c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 18567c478bd9Sstevel@tonic-gate goto findagain; 18577c478bd9Sstevel@tonic-gate } 18587c478bd9Sstevel@tonic-gate } else { 18597c478bd9Sstevel@tonic-gate /* Create a new node if we didn't find an unused one */ 18607c478bd9Sstevel@tonic-gate sidp = kmem_alloc(sizeof (struct snapshot_id), KM_SLEEP); 18617c478bd9Sstevel@tonic-gate rw_init(&sidp->sid_rwlock, NULL, RW_DEFAULT, NULL); 18627c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 18637c478bd9Sstevel@tonic-gate sidp->sid_snapnumber = (snapshot == NULL) ? 0 : lastsnap + 1; 18647c478bd9Sstevel@tonic-gate sidp->sid_cowinfo = NULL; 18657c478bd9Sstevel@tonic-gate sidp->sid_flags = 0; 18667c478bd9Sstevel@tonic-gate sidp->sid_next = snapshot; 18677c478bd9Sstevel@tonic-gate snapshot = sidp; 18687c478bd9Sstevel@tonic-gate } 18697c478bd9Sstevel@tonic-gate 18707c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&sidp->sid_rwlock)); 18717c478bd9Sstevel@tonic-gate ASSERT(sidp->sid_cowinfo == NULL); 18727c478bd9Sstevel@tonic-gate ASSERT(sidp->sid_snapnumber <= (lastsnap + 1)); 18737c478bd9Sstevel@tonic-gate 18747c478bd9Sstevel@tonic-gate sidp->sid_flags |= SID_CREATING; 18757c478bd9Sstevel@tonic-gate /* The root vnode is held until snap_delete_impl() is called */ 18767c478bd9Sstevel@tonic-gate VN_HOLD(fsvp); 18777c478bd9Sstevel@tonic-gate sidp->sid_fvp = fsvp; 18787c478bd9Sstevel@tonic-gate num_snapshots++; 18797c478bd9Sstevel@tonic-gate 18807c478bd9Sstevel@tonic-gate /* allocate and initialize structures */ 18817c478bd9Sstevel@tonic-gate 18827c478bd9Sstevel@tonic-gate cowp = kmem_zalloc(sizeof (struct cow_info), KM_SLEEP); 18837c478bd9Sstevel@tonic-gate 18847c478bd9Sstevel@tonic-gate cowp->cow_backfile_array = bfvpp; 18857c478bd9Sstevel@tonic-gate cowp->cow_backcount = backfilecount; 18867c478bd9Sstevel@tonic-gate cowp->cow_backfile_sz = max_backfile_size; 18877c478bd9Sstevel@tonic-gate 18887c478bd9Sstevel@tonic-gate /* 18897c478bd9Sstevel@tonic-gate * Initialize task queues for this snapshot. Only a small number 18907c478bd9Sstevel@tonic-gate * of threads are required because they will be serialized on the 18917c478bd9Sstevel@tonic-gate * backing file's reader/writer lock anyway. 18927c478bd9Sstevel@tonic-gate */ 18937c478bd9Sstevel@tonic-gate (void) snprintf(taskqname, sizeof (taskqname), "%s_taskq_%d", snapname, 18947c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 18957c478bd9Sstevel@tonic-gate cowp->cow_taskq = taskq_create(taskqname, fssnap_taskq_nthreads, 18967c478bd9Sstevel@tonic-gate minclsyspri, 1, fssnap_taskq_maxtasks, 0); 18977c478bd9Sstevel@tonic-gate 18987c478bd9Sstevel@tonic-gate /* don't allow tasks to start until after everything is ready */ 18997c478bd9Sstevel@tonic-gate taskq_suspend(cowp->cow_taskq); 19007c478bd9Sstevel@tonic-gate 19017c478bd9Sstevel@tonic-gate /* initialize translation table */ 19027c478bd9Sstevel@tonic-gate cmap = &cowp->cow_map; 19037c478bd9Sstevel@tonic-gate rw_init(&cmap->cmap_rwlock, NULL, RW_DEFAULT, NULL); 19047c478bd9Sstevel@tonic-gate rw_enter(&cmap->cmap_rwlock, RW_WRITER); 19057c478bd9Sstevel@tonic-gate 19067c478bd9Sstevel@tonic-gate sema_init(&cmap->cmap_throttle_sem, fssnap_max_mem_chunks, NULL, 19077c478bd9Sstevel@tonic-gate SEMA_DEFAULT, NULL); 19087c478bd9Sstevel@tonic-gate 19097c478bd9Sstevel@tonic-gate cmap->cmap_chunksz = chunksz; 19107c478bd9Sstevel@tonic-gate cmap->cmap_maxsize = maxsize; 19117c478bd9Sstevel@tonic-gate cmap->cmap_chunksperbf = max_backfile_size / chunksz; 19127c478bd9Sstevel@tonic-gate 19137c478bd9Sstevel@tonic-gate /* 19147c478bd9Sstevel@tonic-gate * allocate one bit per chunk for the bitmaps, round up 19157c478bd9Sstevel@tonic-gate */ 19167c478bd9Sstevel@tonic-gate cmap->cmap_bmsize = (nchunks + (NBBY - 1)) / NBBY; 19177c478bd9Sstevel@tonic-gate cmap->cmap_hastrans = kmem_zalloc(cmap->cmap_bmsize, KM_SLEEP); 19187c478bd9Sstevel@tonic-gate cmap->cmap_candidate = kmem_zalloc(cmap->cmap_bmsize, KM_SLEEP); 19197c478bd9Sstevel@tonic-gate 19207c478bd9Sstevel@tonic-gate sidp->sid_cowinfo = cowp; 19217c478bd9Sstevel@tonic-gate 19227c478bd9Sstevel@tonic-gate /* initialize kstats for this snapshot */ 19237c478bd9Sstevel@tonic-gate mountpoint = vfs_getmntpoint(fsvp->v_vfsp); 19247c478bd9Sstevel@tonic-gate fssnap_create_kstats(sidp, sidp->sid_snapnumber, 19257c478bd9Sstevel@tonic-gate refstr_value(mountpoint), backpath); 19267c478bd9Sstevel@tonic-gate refstr_rele(mountpoint); 19277c478bd9Sstevel@tonic-gate 19287c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 19297c478bd9Sstevel@tonic-gate 19307c478bd9Sstevel@tonic-gate /* 19317c478bd9Sstevel@tonic-gate * return with snapshot id rwlock held as a writer until 19327c478bd9Sstevel@tonic-gate * fssnap_create_done is called 19337c478bd9Sstevel@tonic-gate */ 19347c478bd9Sstevel@tonic-gate return (sidp); 19357c478bd9Sstevel@tonic-gate } 19367c478bd9Sstevel@tonic-gate 19377c478bd9Sstevel@tonic-gate /* 19387c478bd9Sstevel@tonic-gate * fssnap_set_candidate_impl() - mark a chunk as a candidate for copy-on-write 19397c478bd9Sstevel@tonic-gate * 19407c478bd9Sstevel@tonic-gate * sets a bit in the candidate bitmap that indicates that a chunk is a 19417c478bd9Sstevel@tonic-gate * candidate for copy-on-write. Typically, chunks that are allocated on 19427c478bd9Sstevel@tonic-gate * the file system at the time the snapshot is taken are candidates, 19437c478bd9Sstevel@tonic-gate * while chunks that have no allocated data do not need to be copied. 19447c478bd9Sstevel@tonic-gate * Chunks containing metadata must be marked as candidates as well. 19457c478bd9Sstevel@tonic-gate */ 19467c478bd9Sstevel@tonic-gate static void 19477c478bd9Sstevel@tonic-gate fssnap_set_candidate_impl(void *snapshot_id, chunknumber_t chunknumber) 19487c478bd9Sstevel@tonic-gate { 19497c478bd9Sstevel@tonic-gate struct snapshot_id *sid = snapshot_id; 19507c478bd9Sstevel@tonic-gate struct cow_info *cowp = sid->sid_cowinfo; 19517c478bd9Sstevel@tonic-gate struct cow_map *cmap = &cowp->cow_map; 19527c478bd9Sstevel@tonic-gate 19537c478bd9Sstevel@tonic-gate /* simple bitmap operation for now */ 19547c478bd9Sstevel@tonic-gate ASSERT(chunknumber < (cmap->cmap_bmsize * NBBY)); 19557c478bd9Sstevel@tonic-gate setbit(cmap->cmap_candidate, chunknumber); 19567c478bd9Sstevel@tonic-gate } 19577c478bd9Sstevel@tonic-gate 19587c478bd9Sstevel@tonic-gate /* 19597c478bd9Sstevel@tonic-gate * fssnap_is_candidate_impl() - check whether a chunk is a candidate 19607c478bd9Sstevel@tonic-gate * 19617c478bd9Sstevel@tonic-gate * returns 0 if the chunk is not a candidate and 1 if the chunk is a 19627c478bd9Sstevel@tonic-gate * candidate. This can be used by the file system to change behavior for 19637c478bd9Sstevel@tonic-gate * chunks that might induce a copy-on-write. The offset is specified in 19647c478bd9Sstevel@tonic-gate * bytes since the chunk size may not be known by the file system. 19657c478bd9Sstevel@tonic-gate */ 19667c478bd9Sstevel@tonic-gate static int 19677c478bd9Sstevel@tonic-gate fssnap_is_candidate_impl(void *snapshot_id, u_offset_t off) 19687c478bd9Sstevel@tonic-gate { 19697c478bd9Sstevel@tonic-gate struct snapshot_id *sid = snapshot_id; 19707c478bd9Sstevel@tonic-gate struct cow_info *cowp = sid->sid_cowinfo; 19717c478bd9Sstevel@tonic-gate struct cow_map *cmap = &cowp->cow_map; 19727c478bd9Sstevel@tonic-gate ulong_t chunknumber = off / cmap->cmap_chunksz; 19737c478bd9Sstevel@tonic-gate 19747c478bd9Sstevel@tonic-gate /* simple bitmap operation for now */ 19757c478bd9Sstevel@tonic-gate ASSERT(chunknumber < (cmap->cmap_bmsize * NBBY)); 19767c478bd9Sstevel@tonic-gate return (isset(cmap->cmap_candidate, chunknumber)); 19777c478bd9Sstevel@tonic-gate } 19787c478bd9Sstevel@tonic-gate 19797c478bd9Sstevel@tonic-gate /* 19807c478bd9Sstevel@tonic-gate * fssnap_create_done_impl() - complete the snapshot setup process 19817c478bd9Sstevel@tonic-gate * 19827c478bd9Sstevel@tonic-gate * called when the file system is done populating the candidate bitmap 19837c478bd9Sstevel@tonic-gate * and it is ready to start using the snapshot. This routine releases 19847c478bd9Sstevel@tonic-gate * the snapshot locks, allows taskq tasks to start processing, and 19857c478bd9Sstevel@tonic-gate * creates the device minor nodes associated with the snapshot. 19867c478bd9Sstevel@tonic-gate */ 19877c478bd9Sstevel@tonic-gate static int 19887c478bd9Sstevel@tonic-gate fssnap_create_done_impl(void *snapshot_id) 19897c478bd9Sstevel@tonic-gate { 19907c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp, *sidp = snapshot_id; 19917c478bd9Sstevel@tonic-gate struct cow_info *cowp; 19927c478bd9Sstevel@tonic-gate struct cow_map *cmap; 19937c478bd9Sstevel@tonic-gate int snapnumber = -1; 19947c478bd9Sstevel@tonic-gate char name[20]; 19957c478bd9Sstevel@tonic-gate 19967c478bd9Sstevel@tonic-gate /* sid rwlock and cmap rwlock should be taken from fssnap_create */ 19977c478bd9Sstevel@tonic-gate ASSERT(sidp); 19987c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&sidp->sid_rwlock)); 19997c478bd9Sstevel@tonic-gate ASSERT(sidp->sid_cowinfo); 20007c478bd9Sstevel@tonic-gate 20017c478bd9Sstevel@tonic-gate cowp = sidp->sid_cowinfo; 20027c478bd9Sstevel@tonic-gate cmap = &cowp->cow_map; 20037c478bd9Sstevel@tonic-gate 20047c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&cmap->cmap_rwlock)); 20057c478bd9Sstevel@tonic-gate 20067c478bd9Sstevel@tonic-gate sidp->sid_flags &= ~(SID_CREATING | SID_DISABLED); 20077c478bd9Sstevel@tonic-gate snapnumber = sidp->sid_snapnumber; 20087c478bd9Sstevel@tonic-gate 20097c478bd9Sstevel@tonic-gate /* allocate state structure and find new snapshot id */ 20107c478bd9Sstevel@tonic-gate if (ddi_soft_state_zalloc(statep, snapnumber) != DDI_SUCCESS) { 20117c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 20127c478bd9Sstevel@tonic-gate "snap_ioctl: create: could not allocate " 20137c478bd9Sstevel@tonic-gate "state for snapshot %d.", snapnumber); 20147c478bd9Sstevel@tonic-gate snapnumber = -1; 20157c478bd9Sstevel@tonic-gate goto out; 20167c478bd9Sstevel@tonic-gate } 20177c478bd9Sstevel@tonic-gate 20187c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, snapnumber); 20197c478bd9Sstevel@tonic-gate *sidpp = sidp; 20207c478bd9Sstevel@tonic-gate 20217c478bd9Sstevel@tonic-gate /* create minor node based on snapshot number */ 20227c478bd9Sstevel@tonic-gate ASSERT(fssnap_dip != NULL); 20237c478bd9Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d", snapnumber); 20247c478bd9Sstevel@tonic-gate if (ddi_create_minor_node(fssnap_dip, name, S_IFBLK, 20257c478bd9Sstevel@tonic-gate snapnumber, DDI_PSEUDO, 0) != DDI_SUCCESS) { 20267c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_ioctl: could not create " 20277c478bd9Sstevel@tonic-gate "block minor node for snapshot %d.", snapnumber); 20287c478bd9Sstevel@tonic-gate snapnumber = -1; 20297c478bd9Sstevel@tonic-gate goto out; 20307c478bd9Sstevel@tonic-gate } 20317c478bd9Sstevel@tonic-gate 20327c478bd9Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d,raw", snapnumber); 20337c478bd9Sstevel@tonic-gate if (ddi_create_minor_node(fssnap_dip, name, S_IFCHR, 20347c478bd9Sstevel@tonic-gate snapnumber, DDI_PSEUDO, 0) != DDI_SUCCESS) { 20357c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_ioctl: could not create " 20367c478bd9Sstevel@tonic-gate "character minor node for snapshot %d.", snapnumber); 20377c478bd9Sstevel@tonic-gate snapnumber = -1; 20387c478bd9Sstevel@tonic-gate } 20397c478bd9Sstevel@tonic-gate 20407c478bd9Sstevel@tonic-gate out: 20417c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 20427c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 20437c478bd9Sstevel@tonic-gate 20447c478bd9Sstevel@tonic-gate /* let the taskq threads start processing */ 20457c478bd9Sstevel@tonic-gate taskq_resume(cowp->cow_taskq); 20467c478bd9Sstevel@tonic-gate 20477c478bd9Sstevel@tonic-gate return (snapnumber); 20487c478bd9Sstevel@tonic-gate } 20497c478bd9Sstevel@tonic-gate 20507c478bd9Sstevel@tonic-gate /* 20517c478bd9Sstevel@tonic-gate * fssnap_delete_impl() - delete a snapshot 20527c478bd9Sstevel@tonic-gate * 20537c478bd9Sstevel@tonic-gate * used when a snapshot is no longer needed. This is called by the file 20547c478bd9Sstevel@tonic-gate * system when it receives an ioctl request to delete a snapshot. It is 20557c478bd9Sstevel@tonic-gate * also called internally when error conditions such as disk full, errors 20567c478bd9Sstevel@tonic-gate * writing to the backing file, or backing file maxsize exceeded occur. 20577c478bd9Sstevel@tonic-gate * If the snapshot device is busy when the delete request is received, 20587c478bd9Sstevel@tonic-gate * all state will be deleted except for the soft state and device files 20597c478bd9Sstevel@tonic-gate * associated with the snapshot; they will be deleted when the snapshot 20607c478bd9Sstevel@tonic-gate * device is closed. 20617c478bd9Sstevel@tonic-gate * 20627c478bd9Sstevel@tonic-gate * NOTE this function takes a POINTER TO A POINTER to the snapshot id, 20637c478bd9Sstevel@tonic-gate * and expects to be able to set the handle held by the file system to 20647c478bd9Sstevel@tonic-gate * NULL. This depends on the file system checking that variable for NULL 20657c478bd9Sstevel@tonic-gate * before calling fssnap_strategy(). 20667c478bd9Sstevel@tonic-gate */ 20677c478bd9Sstevel@tonic-gate static int 20687c478bd9Sstevel@tonic-gate fssnap_delete_impl(void *snapshot_id) 20697c478bd9Sstevel@tonic-gate { 20707c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp = (struct snapshot_id **)snapshot_id; 20717c478bd9Sstevel@tonic-gate struct snapshot_id *sidp; 20727c478bd9Sstevel@tonic-gate struct snapshot_id **statesidpp; 20737c478bd9Sstevel@tonic-gate struct cow_info *cowp; 20747c478bd9Sstevel@tonic-gate struct cow_map *cmap; 20757c478bd9Sstevel@tonic-gate char name[20]; 20767c478bd9Sstevel@tonic-gate int snapnumber = -1; 20777c478bd9Sstevel@tonic-gate vnode_t **vpp; 20787c478bd9Sstevel@tonic-gate 20797c478bd9Sstevel@tonic-gate /* 20807c478bd9Sstevel@tonic-gate * sidp is guaranteed to be valid if sidpp is valid because 20817c478bd9Sstevel@tonic-gate * the snapshot list is append-only. 20827c478bd9Sstevel@tonic-gate */ 20837c478bd9Sstevel@tonic-gate if (sidpp == NULL) { 20847c478bd9Sstevel@tonic-gate return (-1); 20857c478bd9Sstevel@tonic-gate } 20867c478bd9Sstevel@tonic-gate 20877c478bd9Sstevel@tonic-gate sidp = *sidpp; 20887c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 20897c478bd9Sstevel@tonic-gate 20907c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&sidp->sid_rwlock)); 20917c478bd9Sstevel@tonic-gate 20927c478bd9Sstevel@tonic-gate /* 20937c478bd9Sstevel@tonic-gate * double check that the snapshot is still valid for THIS file system 20947c478bd9Sstevel@tonic-gate */ 20957c478bd9Sstevel@tonic-gate if (*sidpp == NULL) { 20967c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 20977c478bd9Sstevel@tonic-gate return (-1); 20987c478bd9Sstevel@tonic-gate } 20997c478bd9Sstevel@tonic-gate 21007c478bd9Sstevel@tonic-gate /* 21017c478bd9Sstevel@tonic-gate * Now we know the snapshot is still valid and will not go away 21027c478bd9Sstevel@tonic-gate * because we have the write lock. Once the state is transitioned 21037c478bd9Sstevel@tonic-gate * to "disabling", the sid_rwlock can be released. Any pending I/O 21047c478bd9Sstevel@tonic-gate * waiting for the lock as a reader will check for this state and 21057c478bd9Sstevel@tonic-gate * abort without touching data that may be getting freed. 21067c478bd9Sstevel@tonic-gate */ 21077c478bd9Sstevel@tonic-gate sidp->sid_flags |= SID_DISABLING; 21087c478bd9Sstevel@tonic-gate if (sidp->sid_flags & SID_DELETE) { 21097c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "Snapshot %d automatically deleted.", 21107c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 21117c478bd9Sstevel@tonic-gate sidp->sid_flags &= ~(SID_DELETE); 21127c478bd9Sstevel@tonic-gate } 21137c478bd9Sstevel@tonic-gate 21147c478bd9Sstevel@tonic-gate 21157c478bd9Sstevel@tonic-gate /* 21167c478bd9Sstevel@tonic-gate * This is pointing into file system specific data! The assumption is 21177c478bd9Sstevel@tonic-gate * that fssnap_strategy() gets called from the file system based on 21187c478bd9Sstevel@tonic-gate * whether this reference to the snapshot_id is NULL or not. So 21197c478bd9Sstevel@tonic-gate * setting this to NULL should disable snapshots for the file system. 21207c478bd9Sstevel@tonic-gate */ 21217c478bd9Sstevel@tonic-gate *sidpp = NULL; 21227c478bd9Sstevel@tonic-gate 21237c478bd9Sstevel@tonic-gate /* remove cowinfo */ 21247c478bd9Sstevel@tonic-gate cowp = sidp->sid_cowinfo; 21257c478bd9Sstevel@tonic-gate if (cowp == NULL) { 21267c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 21277c478bd9Sstevel@tonic-gate return (-1); 21287c478bd9Sstevel@tonic-gate } 21297c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 21307c478bd9Sstevel@tonic-gate 21317c478bd9Sstevel@tonic-gate /* destroy task queues first so they don't reference freed data. */ 21327c478bd9Sstevel@tonic-gate if (cowp->cow_taskq) { 21337c478bd9Sstevel@tonic-gate taskq_destroy(cowp->cow_taskq); 21347c478bd9Sstevel@tonic-gate cowp->cow_taskq = NULL; 21357c478bd9Sstevel@tonic-gate } 21367c478bd9Sstevel@tonic-gate 21377c478bd9Sstevel@tonic-gate if (cowp->cow_backfile_array != NULL) { 21387c478bd9Sstevel@tonic-gate for (vpp = cowp->cow_backfile_array; *vpp; vpp++) 21397c478bd9Sstevel@tonic-gate VN_RELE(*vpp); 21407c478bd9Sstevel@tonic-gate kmem_free(cowp->cow_backfile_array, 21417c478bd9Sstevel@tonic-gate (cowp->cow_backcount + 1) * sizeof (vnode_t *)); 21427c478bd9Sstevel@tonic-gate cowp->cow_backfile_array = NULL; 21437c478bd9Sstevel@tonic-gate } 21447c478bd9Sstevel@tonic-gate 21457c478bd9Sstevel@tonic-gate sidp->sid_cowinfo = NULL; 21467c478bd9Sstevel@tonic-gate 21477c478bd9Sstevel@tonic-gate /* remove cmap */ 21487c478bd9Sstevel@tonic-gate cmap = &cowp->cow_map; 21497c478bd9Sstevel@tonic-gate ASSERT(cmap); 21507c478bd9Sstevel@tonic-gate 21517c478bd9Sstevel@tonic-gate if (cmap->cmap_candidate) 21527c478bd9Sstevel@tonic-gate kmem_free(cmap->cmap_candidate, cmap->cmap_bmsize); 21537c478bd9Sstevel@tonic-gate 21547c478bd9Sstevel@tonic-gate if (cmap->cmap_hastrans) 21557c478bd9Sstevel@tonic-gate kmem_free(cmap->cmap_hastrans, cmap->cmap_bmsize); 21567c478bd9Sstevel@tonic-gate 21577c478bd9Sstevel@tonic-gate if (cmap->cmap_table) 21587c478bd9Sstevel@tonic-gate transtbl_free(&cowp->cow_map); 21597c478bd9Sstevel@tonic-gate 21607c478bd9Sstevel@tonic-gate rw_destroy(&cmap->cmap_rwlock); 21617c478bd9Sstevel@tonic-gate 21627c478bd9Sstevel@tonic-gate while (cmap->cmap_waiters) { 21637c478bd9Sstevel@tonic-gate sema_p(&cmap->cmap_throttle_sem); 21647c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 21657c478bd9Sstevel@tonic-gate } 21667c478bd9Sstevel@tonic-gate sema_destroy(&cmap->cmap_throttle_sem); 21677c478bd9Sstevel@tonic-gate 21687c478bd9Sstevel@tonic-gate /* remove kstats */ 21697c478bd9Sstevel@tonic-gate fssnap_delete_kstats(cowp); 21707c478bd9Sstevel@tonic-gate 21717c478bd9Sstevel@tonic-gate kmem_free(cowp, sizeof (struct cow_info)); 21727c478bd9Sstevel@tonic-gate 21737c478bd9Sstevel@tonic-gate statesidpp = ddi_get_soft_state(statep, sidp->sid_snapnumber); 21747c478bd9Sstevel@tonic-gate if (statesidpp == NULL || *statesidpp == NULL) { 21757c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 21767c478bd9Sstevel@tonic-gate "fssnap_delete_impl: could not find state for snapshot %d.", 21777c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 21787c478bd9Sstevel@tonic-gate } 21797c478bd9Sstevel@tonic-gate ASSERT(*statesidpp == sidp); 21807c478bd9Sstevel@tonic-gate 21817c478bd9Sstevel@tonic-gate /* 21827c478bd9Sstevel@tonic-gate * Leave the node in the list marked DISABLED so it can be reused 21837c478bd9Sstevel@tonic-gate * and avoid many race conditions. Return the snapshot number 21847c478bd9Sstevel@tonic-gate * that was deleted. 21857c478bd9Sstevel@tonic-gate */ 21867c478bd9Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 21877c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 21887c478bd9Sstevel@tonic-gate sidp->sid_flags &= ~(SID_DISABLING); 21897c478bd9Sstevel@tonic-gate sidp->sid_flags |= SID_DISABLED; 21907c478bd9Sstevel@tonic-gate VN_RELE(sidp->sid_fvp); 21917c478bd9Sstevel@tonic-gate sidp->sid_fvp = NULL; 21927c478bd9Sstevel@tonic-gate snapnumber = sidp->sid_snapnumber; 21937c478bd9Sstevel@tonic-gate 21947c478bd9Sstevel@tonic-gate /* 21957c478bd9Sstevel@tonic-gate * If the snapshot is not busy, free the device info now. Otherwise 21967c478bd9Sstevel@tonic-gate * the device nodes are freed in snap_close() when the device is 21977c478bd9Sstevel@tonic-gate * closed. The sid will not be reused until the device is not busy. 21987c478bd9Sstevel@tonic-gate */ 21997c478bd9Sstevel@tonic-gate if (SID_AVAILABLE(sidp)) { 22007c478bd9Sstevel@tonic-gate /* remove the device nodes */ 22017c478bd9Sstevel@tonic-gate ASSERT(fssnap_dip != NULL); 22027c478bd9Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d", 22037c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 22047c478bd9Sstevel@tonic-gate ddi_remove_minor_node(fssnap_dip, name); 22057c478bd9Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d,raw", 22067c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 22077c478bd9Sstevel@tonic-gate ddi_remove_minor_node(fssnap_dip, name); 22087c478bd9Sstevel@tonic-gate 22097c478bd9Sstevel@tonic-gate /* delete the state structure */ 22107c478bd9Sstevel@tonic-gate ddi_soft_state_free(statep, sidp->sid_snapnumber); 22117c478bd9Sstevel@tonic-gate num_snapshots--; 22127c478bd9Sstevel@tonic-gate } 22137c478bd9Sstevel@tonic-gate 22147c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 22157c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 22167c478bd9Sstevel@tonic-gate 22177c478bd9Sstevel@tonic-gate return (snapnumber); 22187c478bd9Sstevel@tonic-gate } 22197c478bd9Sstevel@tonic-gate 22207c478bd9Sstevel@tonic-gate /* 22217c478bd9Sstevel@tonic-gate * fssnap_create_kstats() - allocate and initialize snapshot kstats 22227c478bd9Sstevel@tonic-gate * 22237c478bd9Sstevel@tonic-gate */ 22247c478bd9Sstevel@tonic-gate static void 22257c478bd9Sstevel@tonic-gate fssnap_create_kstats(snapshot_id_t *sidp, int snapnum, 22267c478bd9Sstevel@tonic-gate const char *mountpoint, const char *backfilename) 22277c478bd9Sstevel@tonic-gate { 22287c478bd9Sstevel@tonic-gate kstat_t *num, *mntpoint, *bfname; 22297c478bd9Sstevel@tonic-gate kstat_named_t *hw; 22307c478bd9Sstevel@tonic-gate struct cow_info *cowp = sidp->sid_cowinfo; 22317c478bd9Sstevel@tonic-gate struct cow_kstat_num *stats; 22327c478bd9Sstevel@tonic-gate 22337c478bd9Sstevel@tonic-gate /* update the high water mark */ 22347c478bd9Sstevel@tonic-gate if (fssnap_highwater_kstat == NULL) { 22357c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: failed to lookup " 22367c478bd9Sstevel@tonic-gate "high water mark kstat."); 22377c478bd9Sstevel@tonic-gate return; 22387c478bd9Sstevel@tonic-gate } 22397c478bd9Sstevel@tonic-gate 22407c478bd9Sstevel@tonic-gate hw = (kstat_named_t *)fssnap_highwater_kstat->ks_data; 22417c478bd9Sstevel@tonic-gate if (hw->value.ui32 < snapnum) 22427c478bd9Sstevel@tonic-gate hw->value.ui32 = snapnum; 22437c478bd9Sstevel@tonic-gate 22447c478bd9Sstevel@tonic-gate /* initialize the mount point kstat */ 22457c478bd9Sstevel@tonic-gate kstat_delete_byname(snapname, snapnum, FSSNAP_KSTAT_MNTPT); 22467c478bd9Sstevel@tonic-gate 22477c478bd9Sstevel@tonic-gate if (mountpoint != NULL) { 22487c478bd9Sstevel@tonic-gate mntpoint = kstat_create(snapname, snapnum, FSSNAP_KSTAT_MNTPT, 22497c478bd9Sstevel@tonic-gate "misc", KSTAT_TYPE_RAW, strlen(mountpoint) + 1, 0); 22507c478bd9Sstevel@tonic-gate if (mntpoint == NULL) { 22517c478bd9Sstevel@tonic-gate cowp->cow_kstat_mntpt = NULL; 22527c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: failed to " 22537c478bd9Sstevel@tonic-gate "create mount point kstat"); 22547c478bd9Sstevel@tonic-gate } else { 22557c478bd9Sstevel@tonic-gate (void) strncpy(mntpoint->ks_data, mountpoint, 22567c478bd9Sstevel@tonic-gate strlen(mountpoint)); 22577c478bd9Sstevel@tonic-gate cowp->cow_kstat_mntpt = mntpoint; 22587c478bd9Sstevel@tonic-gate kstat_install(mntpoint); 22597c478bd9Sstevel@tonic-gate } 22607c478bd9Sstevel@tonic-gate } else { 22617c478bd9Sstevel@tonic-gate cowp->cow_kstat_mntpt = NULL; 22627c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: mount point not " 22637c478bd9Sstevel@tonic-gate "specified."); 22647c478bd9Sstevel@tonic-gate } 22657c478bd9Sstevel@tonic-gate 22667c478bd9Sstevel@tonic-gate /* initialize the backing file kstat */ 22677c478bd9Sstevel@tonic-gate kstat_delete_byname(snapname, snapnum, FSSNAP_KSTAT_BFNAME); 22687c478bd9Sstevel@tonic-gate 22697c478bd9Sstevel@tonic-gate if (backfilename == NULL) { 22707c478bd9Sstevel@tonic-gate cowp->cow_kstat_bfname = NULL; 22717c478bd9Sstevel@tonic-gate } else { 22727c478bd9Sstevel@tonic-gate bfname = kstat_create(snapname, snapnum, FSSNAP_KSTAT_BFNAME, 22737c478bd9Sstevel@tonic-gate "misc", KSTAT_TYPE_RAW, strlen(backfilename) + 1, 0); 22747c478bd9Sstevel@tonic-gate if (bfname != NULL) { 22757c478bd9Sstevel@tonic-gate (void) strncpy(bfname->ks_data, backfilename, 22767c478bd9Sstevel@tonic-gate strlen(backfilename)); 22777c478bd9Sstevel@tonic-gate cowp->cow_kstat_bfname = bfname; 22787c478bd9Sstevel@tonic-gate kstat_install(bfname); 22797c478bd9Sstevel@tonic-gate } else { 22807c478bd9Sstevel@tonic-gate cowp->cow_kstat_bfname = NULL; 22817c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: failed to " 22827c478bd9Sstevel@tonic-gate "create backing file name kstat"); 22837c478bd9Sstevel@tonic-gate } 22847c478bd9Sstevel@tonic-gate } 22857c478bd9Sstevel@tonic-gate 22867c478bd9Sstevel@tonic-gate /* initialize numeric kstats */ 22877c478bd9Sstevel@tonic-gate kstat_delete_byname(snapname, snapnum, FSSNAP_KSTAT_NUM); 22887c478bd9Sstevel@tonic-gate 22897c478bd9Sstevel@tonic-gate num = kstat_create(snapname, snapnum, FSSNAP_KSTAT_NUM, 22907c478bd9Sstevel@tonic-gate "misc", KSTAT_TYPE_NAMED, 22917c478bd9Sstevel@tonic-gate sizeof (struct cow_kstat_num) / sizeof (kstat_named_t), 22927c478bd9Sstevel@tonic-gate 0); 22937c478bd9Sstevel@tonic-gate if (num == NULL) { 22947c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: failed to create " 22957c478bd9Sstevel@tonic-gate "numeric kstats"); 22967c478bd9Sstevel@tonic-gate cowp->cow_kstat_num = NULL; 22977c478bd9Sstevel@tonic-gate return; 22987c478bd9Sstevel@tonic-gate } 22997c478bd9Sstevel@tonic-gate 23007c478bd9Sstevel@tonic-gate cowp->cow_kstat_num = num; 23017c478bd9Sstevel@tonic-gate stats = num->ks_data; 23027c478bd9Sstevel@tonic-gate num->ks_update = fssnap_update_kstat_num; 23037c478bd9Sstevel@tonic-gate num->ks_private = sidp; 23047c478bd9Sstevel@tonic-gate 23057c478bd9Sstevel@tonic-gate kstat_named_init(&stats->ckn_state, FSSNAP_KSTAT_NUM_STATE, 23067c478bd9Sstevel@tonic-gate KSTAT_DATA_INT32); 23077c478bd9Sstevel@tonic-gate kstat_named_init(&stats->ckn_bfsize, FSSNAP_KSTAT_NUM_BFSIZE, 23087c478bd9Sstevel@tonic-gate KSTAT_DATA_UINT64); 23097c478bd9Sstevel@tonic-gate kstat_named_init(&stats->ckn_maxsize, FSSNAP_KSTAT_NUM_MAXSIZE, 23107c478bd9Sstevel@tonic-gate KSTAT_DATA_UINT64); 23117c478bd9Sstevel@tonic-gate kstat_named_init(&stats->ckn_createtime, FSSNAP_KSTAT_NUM_CREATETIME, 23127c478bd9Sstevel@tonic-gate KSTAT_DATA_LONG); 23137c478bd9Sstevel@tonic-gate kstat_named_init(&stats->ckn_chunksize, FSSNAP_KSTAT_NUM_CHUNKSIZE, 23147c478bd9Sstevel@tonic-gate KSTAT_DATA_UINT32); 23157c478bd9Sstevel@tonic-gate 23167c478bd9Sstevel@tonic-gate /* initialize the static kstats */ 23177c478bd9Sstevel@tonic-gate stats->ckn_chunksize.value.ui32 = cowp->cow_map.cmap_chunksz; 23187c478bd9Sstevel@tonic-gate stats->ckn_maxsize.value.ui64 = cowp->cow_map.cmap_maxsize; 23197c478bd9Sstevel@tonic-gate stats->ckn_createtime.value.l = gethrestime_sec(); 23207c478bd9Sstevel@tonic-gate 23217c478bd9Sstevel@tonic-gate kstat_install(num); 23227c478bd9Sstevel@tonic-gate } 23237c478bd9Sstevel@tonic-gate 23247c478bd9Sstevel@tonic-gate /* 23257c478bd9Sstevel@tonic-gate * fssnap_update_kstat_num() - update a numerical snapshot kstat value 23267c478bd9Sstevel@tonic-gate * 23277c478bd9Sstevel@tonic-gate */ 23287c478bd9Sstevel@tonic-gate int 23297c478bd9Sstevel@tonic-gate fssnap_update_kstat_num(kstat_t *ksp, int rw) 23307c478bd9Sstevel@tonic-gate { 23317c478bd9Sstevel@tonic-gate snapshot_id_t *sidp = (snapshot_id_t *)ksp->ks_private; 23327c478bd9Sstevel@tonic-gate struct cow_info *cowp = sidp->sid_cowinfo; 23337c478bd9Sstevel@tonic-gate struct cow_kstat_num *stats = ksp->ks_data; 23347c478bd9Sstevel@tonic-gate 23357c478bd9Sstevel@tonic-gate if (rw == KSTAT_WRITE) 23367c478bd9Sstevel@tonic-gate return (EACCES); 23377c478bd9Sstevel@tonic-gate 23387c478bd9Sstevel@tonic-gate /* state */ 23397c478bd9Sstevel@tonic-gate if (sidp->sid_flags & SID_CREATING) 23407c478bd9Sstevel@tonic-gate stats->ckn_state.value.i32 = COWSTATE_CREATING; 23417c478bd9Sstevel@tonic-gate else if (SID_INACTIVE(sidp)) 23427c478bd9Sstevel@tonic-gate stats->ckn_state.value.i32 = COWSTATE_DISABLED; 23437c478bd9Sstevel@tonic-gate else if (SID_BUSY(sidp)) 23447c478bd9Sstevel@tonic-gate stats->ckn_state.value.i32 = COWSTATE_ACTIVE; 23457c478bd9Sstevel@tonic-gate else 23467c478bd9Sstevel@tonic-gate stats->ckn_state.value.i32 = COWSTATE_IDLE; 23477c478bd9Sstevel@tonic-gate 23487c478bd9Sstevel@tonic-gate /* bfsize */ 23497c478bd9Sstevel@tonic-gate stats->ckn_bfsize.value.ui64 = cowp->cow_map.cmap_nchunks * 23507c478bd9Sstevel@tonic-gate cowp->cow_map.cmap_chunksz; 23517c478bd9Sstevel@tonic-gate 23527c478bd9Sstevel@tonic-gate return (0); 23537c478bd9Sstevel@tonic-gate } 23547c478bd9Sstevel@tonic-gate 23557c478bd9Sstevel@tonic-gate /* 23567c478bd9Sstevel@tonic-gate * fssnap_delete_kstats() - deallocate snapshot kstats 23577c478bd9Sstevel@tonic-gate * 23587c478bd9Sstevel@tonic-gate */ 23597c478bd9Sstevel@tonic-gate void 23607c478bd9Sstevel@tonic-gate fssnap_delete_kstats(struct cow_info *cowp) 23617c478bd9Sstevel@tonic-gate { 23627c478bd9Sstevel@tonic-gate if (cowp->cow_kstat_num != NULL) { 23637c478bd9Sstevel@tonic-gate kstat_delete(cowp->cow_kstat_num); 23647c478bd9Sstevel@tonic-gate cowp->cow_kstat_num = NULL; 23657c478bd9Sstevel@tonic-gate } 23667c478bd9Sstevel@tonic-gate if (cowp->cow_kstat_mntpt != NULL) { 23677c478bd9Sstevel@tonic-gate kstat_delete(cowp->cow_kstat_mntpt); 23687c478bd9Sstevel@tonic-gate cowp->cow_kstat_mntpt = NULL; 23697c478bd9Sstevel@tonic-gate } 23707c478bd9Sstevel@tonic-gate if (cowp->cow_kstat_bfname != NULL) { 23717c478bd9Sstevel@tonic-gate kstat_delete(cowp->cow_kstat_bfname); 23727c478bd9Sstevel@tonic-gate cowp->cow_kstat_bfname = NULL; 23737c478bd9Sstevel@tonic-gate } 23747c478bd9Sstevel@tonic-gate } 2375