1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*7c478bd9Sstevel@tonic-gate 29*7c478bd9Sstevel@tonic-gate #include <sys/debug.h> 30*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 31*7c478bd9Sstevel@tonic-gate #include <sys/file.h> 32*7c478bd9Sstevel@tonic-gate #include <sys/errno.h> 33*7c478bd9Sstevel@tonic-gate #include <sys/uio.h> 34*7c478bd9Sstevel@tonic-gate #include <sys/open.h> 35*7c478bd9Sstevel@tonic-gate #include <sys/cred.h> 36*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 37*7c478bd9Sstevel@tonic-gate #include <sys/conf.h> 38*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 39*7c478bd9Sstevel@tonic-gate #include <sys/modctl.h> 40*7c478bd9Sstevel@tonic-gate #include <sys/disp.h> 41*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 42*7c478bd9Sstevel@tonic-gate #include <sys/filio.h> 43*7c478bd9Sstevel@tonic-gate #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */ 44*7c478bd9Sstevel@tonic-gate #include <sys/kstat.h> 45*7c478bd9Sstevel@tonic-gate 46*7c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 47*7c478bd9Sstevel@tonic-gate #include <sys/devops.h> 48*7c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 49*7c478bd9Sstevel@tonic-gate #include <sys/priv_names.h> 50*7c478bd9Sstevel@tonic-gate 51*7c478bd9Sstevel@tonic-gate #include <sys/fssnap.h> 52*7c478bd9Sstevel@tonic-gate #include <sys/fssnap_if.h> 53*7c478bd9Sstevel@tonic-gate 54*7c478bd9Sstevel@tonic-gate /* 55*7c478bd9Sstevel@tonic-gate * This module implements the file system snapshot code, which provides a 56*7c478bd9Sstevel@tonic-gate * point-in-time image of a file system for the purposes of online backup. 57*7c478bd9Sstevel@tonic-gate * There are essentially two parts to this project: the driver half and the 58*7c478bd9Sstevel@tonic-gate * file system half. The driver half is a pseudo device driver called 59*7c478bd9Sstevel@tonic-gate * "fssnap" that represents the snapshot. Each snapshot is assigned a 60*7c478bd9Sstevel@tonic-gate * number that corresponds to the minor number of the device, and a control 61*7c478bd9Sstevel@tonic-gate * device with a high minor number is used to initiate snapshot creation and 62*7c478bd9Sstevel@tonic-gate * deletion. For all practical purposes the driver half acts like a 63*7c478bd9Sstevel@tonic-gate * read-only disk device whose contents are exactly the same as the master 64*7c478bd9Sstevel@tonic-gate * file system at the time the snapshot was created. 65*7c478bd9Sstevel@tonic-gate * 66*7c478bd9Sstevel@tonic-gate * The file system half provides interfaces necessary for performing the 67*7c478bd9Sstevel@tonic-gate * file system dependent operations required to create and delete snapshots 68*7c478bd9Sstevel@tonic-gate * and a special driver strategy routine that must always be used by the file 69*7c478bd9Sstevel@tonic-gate * system for snapshots to work correctly. 70*7c478bd9Sstevel@tonic-gate * 71*7c478bd9Sstevel@tonic-gate * When a snapshot is to be created, the user utility will send an ioctl to 72*7c478bd9Sstevel@tonic-gate * the control device of the driver half specifying the file system to be 73*7c478bd9Sstevel@tonic-gate * snapshotted, the file descriptor of a backing-store file which is used to 74*7c478bd9Sstevel@tonic-gate * hold old data before it is overwritten, and other snapshot parameters. 75*7c478bd9Sstevel@tonic-gate * This ioctl is passed on to the file system specified in the original 76*7c478bd9Sstevel@tonic-gate * ioctl request. The file system is expected to be able to flush 77*7c478bd9Sstevel@tonic-gate * everything out to make the file system consistent and lock it to ensure 78*7c478bd9Sstevel@tonic-gate * no changes occur while the snapshot is being created. It then calls 79*7c478bd9Sstevel@tonic-gate * fssnap_create() to create state for a new snapshot, from which an opaque 80*7c478bd9Sstevel@tonic-gate * handle is returned with the snapshot locked. Next, the file system must 81*7c478bd9Sstevel@tonic-gate * populate the "candidate bitmap", which tells the snapshot code which 82*7c478bd9Sstevel@tonic-gate * "chunks" should be considered for copy-on-write (a chunk is the unit of 83*7c478bd9Sstevel@tonic-gate * granularity used for copy-on-write, which is independent of the device 84*7c478bd9Sstevel@tonic-gate * and file system block sizes). This is typically done by scanning the 85*7c478bd9Sstevel@tonic-gate * file system allocation bitmaps to determine which chunks contain 86*7c478bd9Sstevel@tonic-gate * allocated blocks in the file system at the time the snapshot was created. 87*7c478bd9Sstevel@tonic-gate * If a chunk has no allocated blocks, it does not need to be copied before 88*7c478bd9Sstevel@tonic-gate * being written to. Once the candidate bitmap is populated with 89*7c478bd9Sstevel@tonic-gate * fssnap_set_candidate(), the file system calls fssnap_create_done() to 90*7c478bd9Sstevel@tonic-gate * complete the snapshot creation and unlock the snapshot. The file system 91*7c478bd9Sstevel@tonic-gate * may now be unlocked and modifications to it resumed. 92*7c478bd9Sstevel@tonic-gate * 93*7c478bd9Sstevel@tonic-gate * Once a snapshot is created, the file system must perform all writes 94*7c478bd9Sstevel@tonic-gate * through a special strategy routine, fssnap_strategy(). This strategy 95*7c478bd9Sstevel@tonic-gate * routine determines whether the chunks contained by the write must be 96*7c478bd9Sstevel@tonic-gate * copied before being overwritten by consulting the candidate bitmap 97*7c478bd9Sstevel@tonic-gate * described above, and the "hastrans bitmap" which tells it whether the chunk 98*7c478bd9Sstevel@tonic-gate * has been copied already or not. If the chunk is a candidate but has not 99*7c478bd9Sstevel@tonic-gate * been copied, it reads the old data in and adds it to a queue. The 100*7c478bd9Sstevel@tonic-gate * old data can then be overwritten with the new data. An asynchronous 101*7c478bd9Sstevel@tonic-gate * task queue is dispatched for each old chunk read in which writes the old 102*7c478bd9Sstevel@tonic-gate * data to the backing file specified at snapshot creation time. The 103*7c478bd9Sstevel@tonic-gate * backing file is a sparse file the same size as the file system that 104*7c478bd9Sstevel@tonic-gate * contains the old data at the offset that data originally had in the 105*7c478bd9Sstevel@tonic-gate * file system. If the queue containing in-memory chunks gets too large, 106*7c478bd9Sstevel@tonic-gate * writes to the file system may be throttled by a semaphore until the 107*7c478bd9Sstevel@tonic-gate * task queues have a chance to push some of the chunks to the backing file. 108*7c478bd9Sstevel@tonic-gate * 109*7c478bd9Sstevel@tonic-gate * With the candidate bitmap, the hastrans bitmap, the data on the master 110*7c478bd9Sstevel@tonic-gate * file system, and the old data in memory and in the backing file, the 111*7c478bd9Sstevel@tonic-gate * snapshot pseudo-driver can piece together the original file system 112*7c478bd9Sstevel@tonic-gate * information to satisfy read requests. If the requested chunk is not a 113*7c478bd9Sstevel@tonic-gate * candidate, it returns a zeroed buffer. If the chunk is a candidate but 114*7c478bd9Sstevel@tonic-gate * has not been copied it reads it from the master file system. If it is a 115*7c478bd9Sstevel@tonic-gate * candidate and has been copied, it either copies the data from the 116*7c478bd9Sstevel@tonic-gate * in-memory queue or it reads it in from the backing file. The result is 117*7c478bd9Sstevel@tonic-gate * a replication of the original file system that can be backed up, mounted, 118*7c478bd9Sstevel@tonic-gate * or manipulated by other file system utilities that work on a read-only 119*7c478bd9Sstevel@tonic-gate * device. 120*7c478bd9Sstevel@tonic-gate * 121*7c478bd9Sstevel@tonic-gate * This module is divided into three roughly logical sections: 122*7c478bd9Sstevel@tonic-gate * 123*7c478bd9Sstevel@tonic-gate * - The snapshot driver, which is a character/block driver 124*7c478bd9Sstevel@tonic-gate * representing the snapshot itself. These routines are 125*7c478bd9Sstevel@tonic-gate * prefixed with "snap_". 126*7c478bd9Sstevel@tonic-gate * 127*7c478bd9Sstevel@tonic-gate * - The library routines that are defined in fssnap_if.h that 128*7c478bd9Sstevel@tonic-gate * are used by file systems that use this snapshot implementation. 129*7c478bd9Sstevel@tonic-gate * These functions are prefixed with "fssnap_" and are called through 130*7c478bd9Sstevel@tonic-gate * a function vector from the file system. 131*7c478bd9Sstevel@tonic-gate * 132*7c478bd9Sstevel@tonic-gate * - The helper routines used by the snapshot driver and the fssnap 133*7c478bd9Sstevel@tonic-gate * library routines for managing the translation table and other 134*7c478bd9Sstevel@tonic-gate * useful functions. These routines are all static and are 135*7c478bd9Sstevel@tonic-gate * prefixed with either "fssnap_" or "transtbl_" if they 136*7c478bd9Sstevel@tonic-gate * are specifically used for translation table activities. 137*7c478bd9Sstevel@tonic-gate */ 138*7c478bd9Sstevel@tonic-gate 139*7c478bd9Sstevel@tonic-gate static dev_info_t *fssnap_dip = NULL; 140*7c478bd9Sstevel@tonic-gate static struct snapshot_id *snapshot = NULL; 141*7c478bd9Sstevel@tonic-gate static struct snapshot_id snap_ctl; 142*7c478bd9Sstevel@tonic-gate static int num_snapshots = 0; 143*7c478bd9Sstevel@tonic-gate static kmutex_t snapshot_mutex; 144*7c478bd9Sstevel@tonic-gate static char snapname[] = SNAP_NAME; 145*7c478bd9Sstevel@tonic-gate 146*7c478bd9Sstevel@tonic-gate /* "tunable" parameters */ 147*7c478bd9Sstevel@tonic-gate static int fssnap_taskq_nthreads = FSSNAP_TASKQ_THREADS; 148*7c478bd9Sstevel@tonic-gate static uint_t fssnap_max_mem_chunks = FSSNAP_MAX_MEM_CHUNKS; 149*7c478bd9Sstevel@tonic-gate static int fssnap_taskq_maxtasks = FSSNAP_TASKQ_MAXTASKS; 150*7c478bd9Sstevel@tonic-gate 151*7c478bd9Sstevel@tonic-gate /* static function prototypes */ 152*7c478bd9Sstevel@tonic-gate 153*7c478bd9Sstevel@tonic-gate /* snapshot driver */ 154*7c478bd9Sstevel@tonic-gate static int snap_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 155*7c478bd9Sstevel@tonic-gate static int snap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 156*7c478bd9Sstevel@tonic-gate static int snap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 157*7c478bd9Sstevel@tonic-gate static int snap_open(dev_t *devp, int flag, int otyp, cred_t *cred); 158*7c478bd9Sstevel@tonic-gate static int snap_close(dev_t dev, int flag, int otyp, cred_t *cred); 159*7c478bd9Sstevel@tonic-gate static int snap_strategy(struct buf *bp); 160*7c478bd9Sstevel@tonic-gate static int snap_read(dev_t dev, struct uio *uiop, cred_t *credp); 161*7c478bd9Sstevel@tonic-gate static int snap_print(dev_t dev, char *str); 162*7c478bd9Sstevel@tonic-gate static int snap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 163*7c478bd9Sstevel@tonic-gate cred_t *credp, int *rvalp); 164*7c478bd9Sstevel@tonic-gate static int snap_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, 165*7c478bd9Sstevel@tonic-gate int flags, char *name, caddr_t valuep, int *lengthp); 166*7c478bd9Sstevel@tonic-gate static int snap_getchunk(struct snapshot_id *sidp, chunknumber_t chunk, 167*7c478bd9Sstevel@tonic-gate int offset, int len, char *buffer); 168*7c478bd9Sstevel@tonic-gate 169*7c478bd9Sstevel@tonic-gate 170*7c478bd9Sstevel@tonic-gate /* fssnap interface implementations (see fssnap_if.h) */ 171*7c478bd9Sstevel@tonic-gate static void fssnap_strategy_impl(void *, struct buf *); 172*7c478bd9Sstevel@tonic-gate static void *fssnap_create_impl(chunknumber_t, uint_t, u_offset_t, 173*7c478bd9Sstevel@tonic-gate struct vnode *, int, struct vnode **, char *, u_offset_t); 174*7c478bd9Sstevel@tonic-gate static void fssnap_set_candidate_impl(void *, chunknumber_t); 175*7c478bd9Sstevel@tonic-gate static int fssnap_is_candidate_impl(void *, u_offset_t); 176*7c478bd9Sstevel@tonic-gate static int fssnap_create_done_impl(void *); 177*7c478bd9Sstevel@tonic-gate static int fssnap_delete_impl(void *); 178*7c478bd9Sstevel@tonic-gate 179*7c478bd9Sstevel@tonic-gate /* fssnap interface support routines */ 180*7c478bd9Sstevel@tonic-gate static int fssnap_translate(struct snapshot_id **, struct buf *); 181*7c478bd9Sstevel@tonic-gate static void fssnap_write_taskq(void *); 182*7c478bd9Sstevel@tonic-gate static void fssnap_create_kstats(snapshot_id_t *, int, const char *, 183*7c478bd9Sstevel@tonic-gate const char *); 184*7c478bd9Sstevel@tonic-gate static int fssnap_update_kstat_num(kstat_t *, int); 185*7c478bd9Sstevel@tonic-gate static void fssnap_delete_kstats(struct cow_info *); 186*7c478bd9Sstevel@tonic-gate 187*7c478bd9Sstevel@tonic-gate /* translation table prototypes */ 188*7c478bd9Sstevel@tonic-gate static cow_map_node_t *transtbl_add(cow_map_t *, chunknumber_t, caddr_t); 189*7c478bd9Sstevel@tonic-gate static cow_map_node_t *transtbl_get(cow_map_t *, chunknumber_t); 190*7c478bd9Sstevel@tonic-gate static void transtbl_delete(cow_map_t *, cow_map_node_t *); 191*7c478bd9Sstevel@tonic-gate static void transtbl_free(cow_map_t *); 192*7c478bd9Sstevel@tonic-gate 193*7c478bd9Sstevel@tonic-gate static kstat_t *fssnap_highwater_kstat; 194*7c478bd9Sstevel@tonic-gate 195*7c478bd9Sstevel@tonic-gate /* ************************************************************************ */ 196*7c478bd9Sstevel@tonic-gate 197*7c478bd9Sstevel@tonic-gate /* Device and Module Structures */ 198*7c478bd9Sstevel@tonic-gate 199*7c478bd9Sstevel@tonic-gate static struct cb_ops snap_cb_ops = { 200*7c478bd9Sstevel@tonic-gate snap_open, 201*7c478bd9Sstevel@tonic-gate snap_close, 202*7c478bd9Sstevel@tonic-gate snap_strategy, 203*7c478bd9Sstevel@tonic-gate snap_print, 204*7c478bd9Sstevel@tonic-gate nodev, /* no snap_dump */ 205*7c478bd9Sstevel@tonic-gate snap_read, 206*7c478bd9Sstevel@tonic-gate nodev, /* no snap_write */ 207*7c478bd9Sstevel@tonic-gate snap_ioctl, 208*7c478bd9Sstevel@tonic-gate nodev, /* no snap_devmap */ 209*7c478bd9Sstevel@tonic-gate nodev, /* no snap_mmap */ 210*7c478bd9Sstevel@tonic-gate nodev, /* no snap_segmap */ 211*7c478bd9Sstevel@tonic-gate nochpoll, 212*7c478bd9Sstevel@tonic-gate snap_prop_op, 213*7c478bd9Sstevel@tonic-gate NULL, /* streamtab */ 214*7c478bd9Sstevel@tonic-gate D_64BIT | D_NEW | D_MP, /* driver compatibility */ 215*7c478bd9Sstevel@tonic-gate CB_REV, 216*7c478bd9Sstevel@tonic-gate nodev, /* async I/O read entry point */ 217*7c478bd9Sstevel@tonic-gate nodev /* async I/O write entry point */ 218*7c478bd9Sstevel@tonic-gate }; 219*7c478bd9Sstevel@tonic-gate 220*7c478bd9Sstevel@tonic-gate static struct dev_ops snap_ops = { 221*7c478bd9Sstevel@tonic-gate DEVO_REV, 222*7c478bd9Sstevel@tonic-gate 0, /* ref count */ 223*7c478bd9Sstevel@tonic-gate snap_getinfo, 224*7c478bd9Sstevel@tonic-gate nulldev, /* snap_identify obsolete */ 225*7c478bd9Sstevel@tonic-gate nulldev, /* no snap_probe */ 226*7c478bd9Sstevel@tonic-gate snap_attach, 227*7c478bd9Sstevel@tonic-gate snap_detach, 228*7c478bd9Sstevel@tonic-gate nodev, /* no snap_reset */ 229*7c478bd9Sstevel@tonic-gate &snap_cb_ops, 230*7c478bd9Sstevel@tonic-gate (struct bus_ops *)NULL, 231*7c478bd9Sstevel@tonic-gate nulldev /* no snap_power() */ 232*7c478bd9Sstevel@tonic-gate }; 233*7c478bd9Sstevel@tonic-gate 234*7c478bd9Sstevel@tonic-gate extern struct mod_ops mod_driverops; 235*7c478bd9Sstevel@tonic-gate 236*7c478bd9Sstevel@tonic-gate static struct modldrv md = { 237*7c478bd9Sstevel@tonic-gate &mod_driverops, /* Type of module. This is a driver */ 238*7c478bd9Sstevel@tonic-gate "snapshot driver %I%", /* Name of the module */ 239*7c478bd9Sstevel@tonic-gate &snap_ops, 240*7c478bd9Sstevel@tonic-gate }; 241*7c478bd9Sstevel@tonic-gate 242*7c478bd9Sstevel@tonic-gate static struct modlinkage ml = { 243*7c478bd9Sstevel@tonic-gate MODREV_1, 244*7c478bd9Sstevel@tonic-gate &md, 245*7c478bd9Sstevel@tonic-gate NULL 246*7c478bd9Sstevel@tonic-gate }; 247*7c478bd9Sstevel@tonic-gate 248*7c478bd9Sstevel@tonic-gate static void *statep; 249*7c478bd9Sstevel@tonic-gate 250*7c478bd9Sstevel@tonic-gate int 251*7c478bd9Sstevel@tonic-gate _init(void) 252*7c478bd9Sstevel@tonic-gate { 253*7c478bd9Sstevel@tonic-gate int error; 254*7c478bd9Sstevel@tonic-gate kstat_t *ksp; 255*7c478bd9Sstevel@tonic-gate kstat_named_t *ksdata; 256*7c478bd9Sstevel@tonic-gate 257*7c478bd9Sstevel@tonic-gate error = ddi_soft_state_init(&statep, sizeof (struct snapshot_id *), 1); 258*7c478bd9Sstevel@tonic-gate if (error) { 259*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "_init: failed to init ddi_soft_state."); 260*7c478bd9Sstevel@tonic-gate return (error); 261*7c478bd9Sstevel@tonic-gate } 262*7c478bd9Sstevel@tonic-gate 263*7c478bd9Sstevel@tonic-gate error = mod_install(&ml); 264*7c478bd9Sstevel@tonic-gate 265*7c478bd9Sstevel@tonic-gate if (error) { 266*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "_init: failed to mod_install."); 267*7c478bd9Sstevel@tonic-gate ddi_soft_state_fini(&statep); 268*7c478bd9Sstevel@tonic-gate return (error); 269*7c478bd9Sstevel@tonic-gate } 270*7c478bd9Sstevel@tonic-gate 271*7c478bd9Sstevel@tonic-gate /* 272*7c478bd9Sstevel@tonic-gate * Fill in the snapshot operations vector for file systems 273*7c478bd9Sstevel@tonic-gate * (defined in fssnap_if.c) 274*7c478bd9Sstevel@tonic-gate */ 275*7c478bd9Sstevel@tonic-gate 276*7c478bd9Sstevel@tonic-gate snapops.fssnap_create = fssnap_create_impl; 277*7c478bd9Sstevel@tonic-gate snapops.fssnap_set_candidate = fssnap_set_candidate_impl; 278*7c478bd9Sstevel@tonic-gate snapops.fssnap_is_candidate = fssnap_is_candidate_impl; 279*7c478bd9Sstevel@tonic-gate snapops.fssnap_create_done = fssnap_create_done_impl; 280*7c478bd9Sstevel@tonic-gate snapops.fssnap_delete = fssnap_delete_impl; 281*7c478bd9Sstevel@tonic-gate snapops.fssnap_strategy = fssnap_strategy_impl; 282*7c478bd9Sstevel@tonic-gate 283*7c478bd9Sstevel@tonic-gate mutex_init(&snapshot_mutex, NULL, MUTEX_DEFAULT, NULL); 284*7c478bd9Sstevel@tonic-gate 285*7c478bd9Sstevel@tonic-gate /* 286*7c478bd9Sstevel@tonic-gate * Initialize the fssnap highwater kstat 287*7c478bd9Sstevel@tonic-gate */ 288*7c478bd9Sstevel@tonic-gate ksp = kstat_create(snapname, 0, FSSNAP_KSTAT_HIGHWATER, "misc", 289*7c478bd9Sstevel@tonic-gate KSTAT_TYPE_NAMED, 1, 0); 290*7c478bd9Sstevel@tonic-gate if (ksp != NULL) { 291*7c478bd9Sstevel@tonic-gate ksdata = (kstat_named_t *)ksp->ks_data; 292*7c478bd9Sstevel@tonic-gate kstat_named_init(ksdata, FSSNAP_KSTAT_HIGHWATER, 293*7c478bd9Sstevel@tonic-gate KSTAT_DATA_UINT32); 294*7c478bd9Sstevel@tonic-gate ksdata->value.ui32 = 0; 295*7c478bd9Sstevel@tonic-gate kstat_install(ksp); 296*7c478bd9Sstevel@tonic-gate } else { 297*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "_init: failed to create highwater kstat."); 298*7c478bd9Sstevel@tonic-gate } 299*7c478bd9Sstevel@tonic-gate fssnap_highwater_kstat = ksp; 300*7c478bd9Sstevel@tonic-gate 301*7c478bd9Sstevel@tonic-gate return (0); 302*7c478bd9Sstevel@tonic-gate } 303*7c478bd9Sstevel@tonic-gate 304*7c478bd9Sstevel@tonic-gate int 305*7c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop) 306*7c478bd9Sstevel@tonic-gate { 307*7c478bd9Sstevel@tonic-gate return (mod_info(&ml, modinfop)); 308*7c478bd9Sstevel@tonic-gate } 309*7c478bd9Sstevel@tonic-gate 310*7c478bd9Sstevel@tonic-gate int 311*7c478bd9Sstevel@tonic-gate _fini(void) 312*7c478bd9Sstevel@tonic-gate { 313*7c478bd9Sstevel@tonic-gate int error; 314*7c478bd9Sstevel@tonic-gate 315*7c478bd9Sstevel@tonic-gate error = mod_remove(&ml); 316*7c478bd9Sstevel@tonic-gate if (error) 317*7c478bd9Sstevel@tonic-gate return (error); 318*7c478bd9Sstevel@tonic-gate ddi_soft_state_fini(&statep); 319*7c478bd9Sstevel@tonic-gate 320*7c478bd9Sstevel@tonic-gate /* 321*7c478bd9Sstevel@tonic-gate * delete the fssnap highwater kstat 322*7c478bd9Sstevel@tonic-gate */ 323*7c478bd9Sstevel@tonic-gate kstat_delete(fssnap_highwater_kstat); 324*7c478bd9Sstevel@tonic-gate 325*7c478bd9Sstevel@tonic-gate mutex_destroy(&snapshot_mutex); 326*7c478bd9Sstevel@tonic-gate 327*7c478bd9Sstevel@tonic-gate /* Clear out the file system operations vector */ 328*7c478bd9Sstevel@tonic-gate snapops.fssnap_create = NULL; 329*7c478bd9Sstevel@tonic-gate snapops.fssnap_set_candidate = NULL; 330*7c478bd9Sstevel@tonic-gate snapops.fssnap_create_done = NULL; 331*7c478bd9Sstevel@tonic-gate snapops.fssnap_delete = NULL; 332*7c478bd9Sstevel@tonic-gate snapops.fssnap_strategy = NULL; 333*7c478bd9Sstevel@tonic-gate 334*7c478bd9Sstevel@tonic-gate return (0); 335*7c478bd9Sstevel@tonic-gate } 336*7c478bd9Sstevel@tonic-gate 337*7c478bd9Sstevel@tonic-gate /* ************************************************************************ */ 338*7c478bd9Sstevel@tonic-gate 339*7c478bd9Sstevel@tonic-gate /* 340*7c478bd9Sstevel@tonic-gate * Snapshot Driver Routines 341*7c478bd9Sstevel@tonic-gate * 342*7c478bd9Sstevel@tonic-gate * This section implements the snapshot character and block drivers. The 343*7c478bd9Sstevel@tonic-gate * device will appear to be a consistent read-only file system to 344*7c478bd9Sstevel@tonic-gate * applications that wish to back it up or mount it. The snapshot driver 345*7c478bd9Sstevel@tonic-gate * communicates with the file system through the translation table, which 346*7c478bd9Sstevel@tonic-gate * tells the snapshot driver where to find the data necessary to piece 347*7c478bd9Sstevel@tonic-gate * together the frozen file system. The data may either be on the master 348*7c478bd9Sstevel@tonic-gate * device (no translation exists), in memory (a translation exists but has 349*7c478bd9Sstevel@tonic-gate * not been flushed to the backing store), or in the backing store file. 350*7c478bd9Sstevel@tonic-gate * The read request may require the snapshot driver to retreive data from 351*7c478bd9Sstevel@tonic-gate * several different places and piece it together to look like a single 352*7c478bd9Sstevel@tonic-gate * contiguous read. 353*7c478bd9Sstevel@tonic-gate * 354*7c478bd9Sstevel@tonic-gate * The device minor number corresponds to the snapshot number in the list of 355*7c478bd9Sstevel@tonic-gate * snapshot identifiers. The soft state for each minor number is simply a 356*7c478bd9Sstevel@tonic-gate * pointer to the snapshot id, which holds all of the snapshot state. One 357*7c478bd9Sstevel@tonic-gate * minor number is designated as the control device. All snapshot create 358*7c478bd9Sstevel@tonic-gate * and delete requests go through the control device to ensure this module 359*7c478bd9Sstevel@tonic-gate * is properly loaded and attached before the file system starts calling 360*7c478bd9Sstevel@tonic-gate * routines defined here. 361*7c478bd9Sstevel@tonic-gate */ 362*7c478bd9Sstevel@tonic-gate 363*7c478bd9Sstevel@tonic-gate 364*7c478bd9Sstevel@tonic-gate /* 365*7c478bd9Sstevel@tonic-gate * snap_getinfo() - snapshot driver getinfo(9E) routine 366*7c478bd9Sstevel@tonic-gate * 367*7c478bd9Sstevel@tonic-gate */ 368*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 369*7c478bd9Sstevel@tonic-gate static int 370*7c478bd9Sstevel@tonic-gate snap_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 371*7c478bd9Sstevel@tonic-gate { 372*7c478bd9Sstevel@tonic-gate switch (infocmd) { 373*7c478bd9Sstevel@tonic-gate case DDI_INFO_DEVT2DEVINFO: 374*7c478bd9Sstevel@tonic-gate *result = fssnap_dip; 375*7c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 376*7c478bd9Sstevel@tonic-gate case DDI_INFO_DEVT2INSTANCE: 377*7c478bd9Sstevel@tonic-gate *result = 0; /* we only have one instance */ 378*7c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 379*7c478bd9Sstevel@tonic-gate } 380*7c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 381*7c478bd9Sstevel@tonic-gate } 382*7c478bd9Sstevel@tonic-gate 383*7c478bd9Sstevel@tonic-gate /* 384*7c478bd9Sstevel@tonic-gate * snap_attach() - snapshot driver attach(9E) routine 385*7c478bd9Sstevel@tonic-gate * 386*7c478bd9Sstevel@tonic-gate * sets up snapshot control device and control state. The control state 387*7c478bd9Sstevel@tonic-gate * is a pointer to an "anonymous" snapshot_id for tracking opens and closes 388*7c478bd9Sstevel@tonic-gate */ 389*7c478bd9Sstevel@tonic-gate static int 390*7c478bd9Sstevel@tonic-gate snap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 391*7c478bd9Sstevel@tonic-gate { 392*7c478bd9Sstevel@tonic-gate int error; 393*7c478bd9Sstevel@tonic-gate 394*7c478bd9Sstevel@tonic-gate switch (cmd) { 395*7c478bd9Sstevel@tonic-gate case DDI_ATTACH: 396*7c478bd9Sstevel@tonic-gate /* create the control device */ 397*7c478bd9Sstevel@tonic-gate error = ddi_create_priv_minor_node(dip, SNAP_CTL_NODE, S_IFCHR, 398*7c478bd9Sstevel@tonic-gate SNAP_CTL_MINOR, DDI_PSEUDO, PRIVONLY_DEV, 399*7c478bd9Sstevel@tonic-gate PRIV_SYS_CONFIG, PRIV_SYS_CONFIG, 0666); 400*7c478bd9Sstevel@tonic-gate if (error == DDI_FAILURE) { 401*7c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 402*7c478bd9Sstevel@tonic-gate } 403*7c478bd9Sstevel@tonic-gate 404*7c478bd9Sstevel@tonic-gate rw_init(&snap_ctl.sid_rwlock, NULL, RW_DEFAULT, NULL); 405*7c478bd9Sstevel@tonic-gate rw_enter(&snap_ctl.sid_rwlock, RW_WRITER); 406*7c478bd9Sstevel@tonic-gate fssnap_dip = dip; 407*7c478bd9Sstevel@tonic-gate snap_ctl.sid_snapnumber = SNAP_CTL_MINOR; 408*7c478bd9Sstevel@tonic-gate /* the control sid is not linked into the snapshot list */ 409*7c478bd9Sstevel@tonic-gate snap_ctl.sid_next = NULL; 410*7c478bd9Sstevel@tonic-gate snap_ctl.sid_cowinfo = NULL; 411*7c478bd9Sstevel@tonic-gate snap_ctl.sid_flags = 0; 412*7c478bd9Sstevel@tonic-gate rw_exit(&snap_ctl.sid_rwlock); 413*7c478bd9Sstevel@tonic-gate ddi_report_dev(dip); 414*7c478bd9Sstevel@tonic-gate 415*7c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 416*7c478bd9Sstevel@tonic-gate case DDI_PM_RESUME: 417*7c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 418*7c478bd9Sstevel@tonic-gate 419*7c478bd9Sstevel@tonic-gate case DDI_RESUME: 420*7c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 421*7c478bd9Sstevel@tonic-gate 422*7c478bd9Sstevel@tonic-gate default: 423*7c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 424*7c478bd9Sstevel@tonic-gate } 425*7c478bd9Sstevel@tonic-gate } 426*7c478bd9Sstevel@tonic-gate 427*7c478bd9Sstevel@tonic-gate /* 428*7c478bd9Sstevel@tonic-gate * snap_detach() - snapshot driver detach(9E) routine 429*7c478bd9Sstevel@tonic-gate * 430*7c478bd9Sstevel@tonic-gate * destroys snapshot control device and control state. If any snapshots 431*7c478bd9Sstevel@tonic-gate * are active (ie. num_snapshots != 0), the device will refuse to detach. 432*7c478bd9Sstevel@tonic-gate */ 433*7c478bd9Sstevel@tonic-gate static int 434*7c478bd9Sstevel@tonic-gate snap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 435*7c478bd9Sstevel@tonic-gate { 436*7c478bd9Sstevel@tonic-gate struct snapshot_id *sidp, *sidnextp; 437*7c478bd9Sstevel@tonic-gate 438*7c478bd9Sstevel@tonic-gate switch (cmd) { 439*7c478bd9Sstevel@tonic-gate case DDI_DETACH: 440*7c478bd9Sstevel@tonic-gate /* do not detach if the device is active */ 441*7c478bd9Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 442*7c478bd9Sstevel@tonic-gate if ((num_snapshots != 0) || 443*7c478bd9Sstevel@tonic-gate ((snap_ctl.sid_flags & SID_CHAR_BUSY) != 0)) { 444*7c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 445*7c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 446*7c478bd9Sstevel@tonic-gate } 447*7c478bd9Sstevel@tonic-gate 448*7c478bd9Sstevel@tonic-gate /* free up the snapshot list */ 449*7c478bd9Sstevel@tonic-gate for (sidp = snapshot; sidp != NULL; sidp = sidnextp) { 450*7c478bd9Sstevel@tonic-gate ASSERT(SID_AVAILABLE(sidp) && 451*7c478bd9Sstevel@tonic-gate !RW_LOCK_HELD(&sidp->sid_rwlock)); 452*7c478bd9Sstevel@tonic-gate sidnextp = sidp->sid_next; 453*7c478bd9Sstevel@tonic-gate rw_destroy(&sidp->sid_rwlock); 454*7c478bd9Sstevel@tonic-gate kmem_free(sidp, sizeof (struct snapshot_id)); 455*7c478bd9Sstevel@tonic-gate } 456*7c478bd9Sstevel@tonic-gate snapshot = NULL; 457*7c478bd9Sstevel@tonic-gate 458*7c478bd9Sstevel@tonic-gate /* delete the control device */ 459*7c478bd9Sstevel@tonic-gate ddi_remove_minor_node(dip, SNAP_CTL_NODE); 460*7c478bd9Sstevel@tonic-gate fssnap_dip = NULL; 461*7c478bd9Sstevel@tonic-gate 462*7c478bd9Sstevel@tonic-gate ASSERT((snap_ctl.sid_flags & SID_CHAR_BUSY) == 0); 463*7c478bd9Sstevel@tonic-gate rw_destroy(&snap_ctl.sid_rwlock); 464*7c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 465*7c478bd9Sstevel@tonic-gate 466*7c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 467*7c478bd9Sstevel@tonic-gate 468*7c478bd9Sstevel@tonic-gate default: 469*7c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 470*7c478bd9Sstevel@tonic-gate } 471*7c478bd9Sstevel@tonic-gate } 472*7c478bd9Sstevel@tonic-gate 473*7c478bd9Sstevel@tonic-gate /* 474*7c478bd9Sstevel@tonic-gate * snap_open() - snapshot driver open(9E) routine 475*7c478bd9Sstevel@tonic-gate * 476*7c478bd9Sstevel@tonic-gate * marks the snapshot id as busy so it will not be recycled when deleted 477*7c478bd9Sstevel@tonic-gate * until the snapshot is closed. 478*7c478bd9Sstevel@tonic-gate */ 479*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 480*7c478bd9Sstevel@tonic-gate static int 481*7c478bd9Sstevel@tonic-gate snap_open(dev_t *devp, int flag, int otyp, cred_t *cred) 482*7c478bd9Sstevel@tonic-gate { 483*7c478bd9Sstevel@tonic-gate minor_t minor; 484*7c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp, *sidp; 485*7c478bd9Sstevel@tonic-gate 486*7c478bd9Sstevel@tonic-gate /* snapshots are read-only */ 487*7c478bd9Sstevel@tonic-gate if (flag & FWRITE) 488*7c478bd9Sstevel@tonic-gate return (EROFS); 489*7c478bd9Sstevel@tonic-gate 490*7c478bd9Sstevel@tonic-gate minor = getminor(*devp); 491*7c478bd9Sstevel@tonic-gate 492*7c478bd9Sstevel@tonic-gate if (minor == SNAP_CTL_MINOR) { 493*7c478bd9Sstevel@tonic-gate /* control device must be opened exclusively */ 494*7c478bd9Sstevel@tonic-gate if (((flag & FEXCL) != FEXCL) || (otyp != OTYP_CHR)) 495*7c478bd9Sstevel@tonic-gate return (EINVAL); 496*7c478bd9Sstevel@tonic-gate 497*7c478bd9Sstevel@tonic-gate rw_enter(&snap_ctl.sid_rwlock, RW_WRITER); 498*7c478bd9Sstevel@tonic-gate if ((snap_ctl.sid_flags & SID_CHAR_BUSY) != 0) { 499*7c478bd9Sstevel@tonic-gate rw_exit(&snap_ctl.sid_rwlock); 500*7c478bd9Sstevel@tonic-gate return (EBUSY); 501*7c478bd9Sstevel@tonic-gate } 502*7c478bd9Sstevel@tonic-gate 503*7c478bd9Sstevel@tonic-gate snap_ctl.sid_flags |= SID_CHAR_BUSY; 504*7c478bd9Sstevel@tonic-gate rw_exit(&snap_ctl.sid_rwlock); 505*7c478bd9Sstevel@tonic-gate 506*7c478bd9Sstevel@tonic-gate return (0); 507*7c478bd9Sstevel@tonic-gate } 508*7c478bd9Sstevel@tonic-gate 509*7c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 510*7c478bd9Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) 511*7c478bd9Sstevel@tonic-gate return (ENXIO); 512*7c478bd9Sstevel@tonic-gate sidp = *sidpp; 513*7c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 514*7c478bd9Sstevel@tonic-gate 515*7c478bd9Sstevel@tonic-gate if ((flag & FEXCL) && SID_BUSY(sidp)) { 516*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 517*7c478bd9Sstevel@tonic-gate return (EAGAIN); 518*7c478bd9Sstevel@tonic-gate } 519*7c478bd9Sstevel@tonic-gate 520*7c478bd9Sstevel@tonic-gate ASSERT(sidpp != NULL && sidp != NULL); 521*7c478bd9Sstevel@tonic-gate /* check to see if this snapshot has been killed on us */ 522*7c478bd9Sstevel@tonic-gate if (SID_INACTIVE(sidp)) { 523*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_open: snapshot %d does not exist.", 524*7c478bd9Sstevel@tonic-gate minor); 525*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 526*7c478bd9Sstevel@tonic-gate return (ENXIO); 527*7c478bd9Sstevel@tonic-gate } 528*7c478bd9Sstevel@tonic-gate 529*7c478bd9Sstevel@tonic-gate switch (otyp) { 530*7c478bd9Sstevel@tonic-gate case OTYP_CHR: 531*7c478bd9Sstevel@tonic-gate sidp->sid_flags |= SID_CHAR_BUSY; 532*7c478bd9Sstevel@tonic-gate break; 533*7c478bd9Sstevel@tonic-gate case OTYP_BLK: 534*7c478bd9Sstevel@tonic-gate sidp->sid_flags |= SID_BLOCK_BUSY; 535*7c478bd9Sstevel@tonic-gate break; 536*7c478bd9Sstevel@tonic-gate default: 537*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 538*7c478bd9Sstevel@tonic-gate return (EINVAL); 539*7c478bd9Sstevel@tonic-gate } 540*7c478bd9Sstevel@tonic-gate 541*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 542*7c478bd9Sstevel@tonic-gate 543*7c478bd9Sstevel@tonic-gate /* 544*7c478bd9Sstevel@tonic-gate * at this point if a valid snapshot was found then it has 545*7c478bd9Sstevel@tonic-gate * been marked busy and we can use it. 546*7c478bd9Sstevel@tonic-gate */ 547*7c478bd9Sstevel@tonic-gate return (0); 548*7c478bd9Sstevel@tonic-gate } 549*7c478bd9Sstevel@tonic-gate 550*7c478bd9Sstevel@tonic-gate /* 551*7c478bd9Sstevel@tonic-gate * snap_close() - snapshot driver close(9E) routine 552*7c478bd9Sstevel@tonic-gate * 553*7c478bd9Sstevel@tonic-gate * unsets the busy bits in the snapshot id. If the snapshot has been 554*7c478bd9Sstevel@tonic-gate * deleted while the snapshot device was open, the close call will clean 555*7c478bd9Sstevel@tonic-gate * up the remaining state information. 556*7c478bd9Sstevel@tonic-gate */ 557*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 558*7c478bd9Sstevel@tonic-gate static int 559*7c478bd9Sstevel@tonic-gate snap_close(dev_t dev, int flag, int otyp, cred_t *cred) 560*7c478bd9Sstevel@tonic-gate { 561*7c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp, *sidp; 562*7c478bd9Sstevel@tonic-gate minor_t minor; 563*7c478bd9Sstevel@tonic-gate char name[20]; 564*7c478bd9Sstevel@tonic-gate 565*7c478bd9Sstevel@tonic-gate minor = getminor(dev); 566*7c478bd9Sstevel@tonic-gate 567*7c478bd9Sstevel@tonic-gate /* if this is the control device, close it and return */ 568*7c478bd9Sstevel@tonic-gate if (minor == SNAP_CTL_MINOR) { 569*7c478bd9Sstevel@tonic-gate rw_enter(&snap_ctl.sid_rwlock, RW_WRITER); 570*7c478bd9Sstevel@tonic-gate snap_ctl.sid_flags &= ~(SID_CHAR_BUSY); 571*7c478bd9Sstevel@tonic-gate rw_exit(&snap_ctl.sid_rwlock); 572*7c478bd9Sstevel@tonic-gate return (0); 573*7c478bd9Sstevel@tonic-gate } 574*7c478bd9Sstevel@tonic-gate 575*7c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 576*7c478bd9Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 577*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_close: could not find state for " 578*7c478bd9Sstevel@tonic-gate "snapshot %d.", minor); 579*7c478bd9Sstevel@tonic-gate return (ENXIO); 580*7c478bd9Sstevel@tonic-gate } 581*7c478bd9Sstevel@tonic-gate sidp = *sidpp; 582*7c478bd9Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 583*7c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 584*7c478bd9Sstevel@tonic-gate 585*7c478bd9Sstevel@tonic-gate /* Mark the snapshot as not being busy anymore */ 586*7c478bd9Sstevel@tonic-gate switch (otyp) { 587*7c478bd9Sstevel@tonic-gate case OTYP_CHR: 588*7c478bd9Sstevel@tonic-gate sidp->sid_flags &= ~(SID_CHAR_BUSY); 589*7c478bd9Sstevel@tonic-gate break; 590*7c478bd9Sstevel@tonic-gate case OTYP_BLK: 591*7c478bd9Sstevel@tonic-gate sidp->sid_flags &= ~(SID_BLOCK_BUSY); 592*7c478bd9Sstevel@tonic-gate break; 593*7c478bd9Sstevel@tonic-gate default: 594*7c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 595*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 596*7c478bd9Sstevel@tonic-gate return (EINVAL); 597*7c478bd9Sstevel@tonic-gate } 598*7c478bd9Sstevel@tonic-gate 599*7c478bd9Sstevel@tonic-gate if (SID_AVAILABLE(sidp)) { 600*7c478bd9Sstevel@tonic-gate /* 601*7c478bd9Sstevel@tonic-gate * if this is the last close on a snapshot that has been 602*7c478bd9Sstevel@tonic-gate * deleted, then free up the soft state. The snapdelete 603*7c478bd9Sstevel@tonic-gate * ioctl does not free this when the device is in use so 604*7c478bd9Sstevel@tonic-gate * we do it here after the last reference goes away. 605*7c478bd9Sstevel@tonic-gate */ 606*7c478bd9Sstevel@tonic-gate 607*7c478bd9Sstevel@tonic-gate /* remove the device nodes */ 608*7c478bd9Sstevel@tonic-gate ASSERT(fssnap_dip != NULL); 609*7c478bd9Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d", 610*7c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 611*7c478bd9Sstevel@tonic-gate ddi_remove_minor_node(fssnap_dip, name); 612*7c478bd9Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d,raw", 613*7c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 614*7c478bd9Sstevel@tonic-gate ddi_remove_minor_node(fssnap_dip, name); 615*7c478bd9Sstevel@tonic-gate 616*7c478bd9Sstevel@tonic-gate /* delete the state structure */ 617*7c478bd9Sstevel@tonic-gate ddi_soft_state_free(statep, sidp->sid_snapnumber); 618*7c478bd9Sstevel@tonic-gate num_snapshots--; 619*7c478bd9Sstevel@tonic-gate } 620*7c478bd9Sstevel@tonic-gate 621*7c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 622*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 623*7c478bd9Sstevel@tonic-gate 624*7c478bd9Sstevel@tonic-gate return (0); 625*7c478bd9Sstevel@tonic-gate } 626*7c478bd9Sstevel@tonic-gate 627*7c478bd9Sstevel@tonic-gate /* 628*7c478bd9Sstevel@tonic-gate * snap_read() - snapshot driver read(9E) routine 629*7c478bd9Sstevel@tonic-gate * 630*7c478bd9Sstevel@tonic-gate * reads data from the snapshot by calling snap_strategy() through physio() 631*7c478bd9Sstevel@tonic-gate */ 632*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 633*7c478bd9Sstevel@tonic-gate static int 634*7c478bd9Sstevel@tonic-gate snap_read(dev_t dev, struct uio *uiop, cred_t *credp) 635*7c478bd9Sstevel@tonic-gate { 636*7c478bd9Sstevel@tonic-gate minor_t minor; 637*7c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp; 638*7c478bd9Sstevel@tonic-gate 639*7c478bd9Sstevel@tonic-gate minor = getminor(dev); 640*7c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 641*7c478bd9Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 642*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 643*7c478bd9Sstevel@tonic-gate "snap_read: could not find state for snapshot %d.", minor); 644*7c478bd9Sstevel@tonic-gate return (ENXIO); 645*7c478bd9Sstevel@tonic-gate } 646*7c478bd9Sstevel@tonic-gate return (physio(snap_strategy, NULL, dev, B_READ, minphys, uiop)); 647*7c478bd9Sstevel@tonic-gate } 648*7c478bd9Sstevel@tonic-gate 649*7c478bd9Sstevel@tonic-gate /* 650*7c478bd9Sstevel@tonic-gate * snap_strategy() - snapshot driver strategy(9E) routine 651*7c478bd9Sstevel@tonic-gate * 652*7c478bd9Sstevel@tonic-gate * cycles through each chunk in the requested buffer and calls 653*7c478bd9Sstevel@tonic-gate * snap_getchunk() on each chunk to retrieve it from the appropriate 654*7c478bd9Sstevel@tonic-gate * place. Once all of the parts are put together the requested buffer 655*7c478bd9Sstevel@tonic-gate * is returned. The snapshot driver is read-only, so a write is invalid. 656*7c478bd9Sstevel@tonic-gate */ 657*7c478bd9Sstevel@tonic-gate static int 658*7c478bd9Sstevel@tonic-gate snap_strategy(struct buf *bp) 659*7c478bd9Sstevel@tonic-gate { 660*7c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp, *sidp; 661*7c478bd9Sstevel@tonic-gate minor_t minor; 662*7c478bd9Sstevel@tonic-gate chunknumber_t chunk; 663*7c478bd9Sstevel@tonic-gate int off, len; 664*7c478bd9Sstevel@tonic-gate u_longlong_t reqptr; 665*7c478bd9Sstevel@tonic-gate int error = 0; 666*7c478bd9Sstevel@tonic-gate size_t chunksz; 667*7c478bd9Sstevel@tonic-gate caddr_t buf; 668*7c478bd9Sstevel@tonic-gate 669*7c478bd9Sstevel@tonic-gate /* snapshot device is read-only */ 670*7c478bd9Sstevel@tonic-gate if (bp->b_flags & B_WRITE) { 671*7c478bd9Sstevel@tonic-gate bioerror(bp, EROFS); 672*7c478bd9Sstevel@tonic-gate bp->b_resid = bp->b_bcount; 673*7c478bd9Sstevel@tonic-gate biodone(bp); 674*7c478bd9Sstevel@tonic-gate return (0); 675*7c478bd9Sstevel@tonic-gate } 676*7c478bd9Sstevel@tonic-gate 677*7c478bd9Sstevel@tonic-gate minor = getminor(bp->b_edev); 678*7c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 679*7c478bd9Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 680*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 681*7c478bd9Sstevel@tonic-gate "snap_strategy: could not find state for snapshot %d.", 682*7c478bd9Sstevel@tonic-gate minor); 683*7c478bd9Sstevel@tonic-gate bioerror(bp, ENXIO); 684*7c478bd9Sstevel@tonic-gate bp->b_resid = bp->b_bcount; 685*7c478bd9Sstevel@tonic-gate biodone(bp); 686*7c478bd9Sstevel@tonic-gate return (0); 687*7c478bd9Sstevel@tonic-gate } 688*7c478bd9Sstevel@tonic-gate sidp = *sidpp; 689*7c478bd9Sstevel@tonic-gate ASSERT(sidp); 690*7c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 691*7c478bd9Sstevel@tonic-gate 692*7c478bd9Sstevel@tonic-gate if (SID_INACTIVE(sidp)) { 693*7c478bd9Sstevel@tonic-gate bioerror(bp, ENXIO); 694*7c478bd9Sstevel@tonic-gate bp->b_resid = bp->b_bcount; 695*7c478bd9Sstevel@tonic-gate biodone(bp); 696*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 697*7c478bd9Sstevel@tonic-gate return (0); 698*7c478bd9Sstevel@tonic-gate } 699*7c478bd9Sstevel@tonic-gate 700*7c478bd9Sstevel@tonic-gate if (bp->b_flags & (B_PAGEIO|B_PHYS)) 701*7c478bd9Sstevel@tonic-gate bp_mapin(bp); 702*7c478bd9Sstevel@tonic-gate 703*7c478bd9Sstevel@tonic-gate bp->b_resid = bp->b_bcount; 704*7c478bd9Sstevel@tonic-gate ASSERT(bp->b_un.b_addr); 705*7c478bd9Sstevel@tonic-gate buf = bp->b_un.b_addr; 706*7c478bd9Sstevel@tonic-gate 707*7c478bd9Sstevel@tonic-gate chunksz = sidp->sid_cowinfo->cow_map.cmap_chunksz; 708*7c478bd9Sstevel@tonic-gate 709*7c478bd9Sstevel@tonic-gate /* reqptr is the current DEV_BSIZE offset into the device */ 710*7c478bd9Sstevel@tonic-gate /* chunk is the chunk containing reqptr */ 711*7c478bd9Sstevel@tonic-gate /* len is the length of the request (in the current chunk) in bytes */ 712*7c478bd9Sstevel@tonic-gate /* off is the byte offset into the current chunk */ 713*7c478bd9Sstevel@tonic-gate reqptr = bp->b_lblkno; 714*7c478bd9Sstevel@tonic-gate while (bp->b_resid > 0) { 715*7c478bd9Sstevel@tonic-gate chunk = dbtocowchunk(&sidp->sid_cowinfo->cow_map, reqptr); 716*7c478bd9Sstevel@tonic-gate off = (reqptr % (chunksz >> DEV_BSHIFT)) << DEV_BSHIFT; 717*7c478bd9Sstevel@tonic-gate len = min(chunksz - off, bp->b_resid); 718*7c478bd9Sstevel@tonic-gate ASSERT((off + len) <= chunksz); 719*7c478bd9Sstevel@tonic-gate 720*7c478bd9Sstevel@tonic-gate if ((error = snap_getchunk(sidp, chunk, off, len, buf)) != 0) { 721*7c478bd9Sstevel@tonic-gate /* 722*7c478bd9Sstevel@tonic-gate * EINVAL means the user tried to go out of range. 723*7c478bd9Sstevel@tonic-gate * Anything else means it's likely that we're 724*7c478bd9Sstevel@tonic-gate * confused. 725*7c478bd9Sstevel@tonic-gate */ 726*7c478bd9Sstevel@tonic-gate if (error != EINVAL) { 727*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_strategy: error " 728*7c478bd9Sstevel@tonic-gate "calling snap_getchunk, chunk = %llu, " 729*7c478bd9Sstevel@tonic-gate "offset = %d, len = %d, resid = %lu, " 730*7c478bd9Sstevel@tonic-gate "error = %d.", 731*7c478bd9Sstevel@tonic-gate chunk, off, len, bp->b_resid, error); 732*7c478bd9Sstevel@tonic-gate } 733*7c478bd9Sstevel@tonic-gate bioerror(bp, error); 734*7c478bd9Sstevel@tonic-gate biodone(bp); 735*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 736*7c478bd9Sstevel@tonic-gate return (0); 737*7c478bd9Sstevel@tonic-gate } 738*7c478bd9Sstevel@tonic-gate bp->b_resid -= len; 739*7c478bd9Sstevel@tonic-gate reqptr += (len >> DEV_BSHIFT); 740*7c478bd9Sstevel@tonic-gate buf += len; 741*7c478bd9Sstevel@tonic-gate } 742*7c478bd9Sstevel@tonic-gate 743*7c478bd9Sstevel@tonic-gate ASSERT(bp->b_resid == 0); 744*7c478bd9Sstevel@tonic-gate biodone(bp); 745*7c478bd9Sstevel@tonic-gate 746*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 747*7c478bd9Sstevel@tonic-gate return (0); 748*7c478bd9Sstevel@tonic-gate } 749*7c478bd9Sstevel@tonic-gate 750*7c478bd9Sstevel@tonic-gate /* 751*7c478bd9Sstevel@tonic-gate * snap_getchunk() - helper function for snap_strategy() 752*7c478bd9Sstevel@tonic-gate * 753*7c478bd9Sstevel@tonic-gate * gets the requested data from the appropriate place and fills in the 754*7c478bd9Sstevel@tonic-gate * buffer. chunk is the chunk number of the request, offset is the 755*7c478bd9Sstevel@tonic-gate * offset into that chunk and must be less than the chunk size. len is 756*7c478bd9Sstevel@tonic-gate * the length of the request starting at offset, and must not exceed a 757*7c478bd9Sstevel@tonic-gate * chunk boundary. buffer is the address to copy the data to. len 758*7c478bd9Sstevel@tonic-gate * bytes are copied into the buffer starting at the location specified. 759*7c478bd9Sstevel@tonic-gate * 760*7c478bd9Sstevel@tonic-gate * A chunk is located according to the following algorithm: 761*7c478bd9Sstevel@tonic-gate * - If the chunk does not have a translation or is not a candidate 762*7c478bd9Sstevel@tonic-gate * for translation, it is read straight from the master device. 763*7c478bd9Sstevel@tonic-gate * - If the chunk does have a translation, then it is either on 764*7c478bd9Sstevel@tonic-gate * disk or in memory: 765*7c478bd9Sstevel@tonic-gate * o If it is in memory the requested data is simply copied out 766*7c478bd9Sstevel@tonic-gate * of the in-memory buffer. 767*7c478bd9Sstevel@tonic-gate * o If it is in the backing store, it is read from there. 768*7c478bd9Sstevel@tonic-gate * 769*7c478bd9Sstevel@tonic-gate * This function does the real work of the snapshot driver. 770*7c478bd9Sstevel@tonic-gate */ 771*7c478bd9Sstevel@tonic-gate static int 772*7c478bd9Sstevel@tonic-gate snap_getchunk(struct snapshot_id *sidp, chunknumber_t chunk, int offset, 773*7c478bd9Sstevel@tonic-gate int len, char *buffer) 774*7c478bd9Sstevel@tonic-gate { 775*7c478bd9Sstevel@tonic-gate cow_map_t *cmap = &sidp->sid_cowinfo->cow_map; 776*7c478bd9Sstevel@tonic-gate cow_map_node_t *cmn; 777*7c478bd9Sstevel@tonic-gate struct buf *snapbuf; 778*7c478bd9Sstevel@tonic-gate int error = 0; 779*7c478bd9Sstevel@tonic-gate char *newbuffer; 780*7c478bd9Sstevel@tonic-gate int newlen = 0; 781*7c478bd9Sstevel@tonic-gate int partial = 0; 782*7c478bd9Sstevel@tonic-gate 783*7c478bd9Sstevel@tonic-gate ASSERT(RW_READ_HELD(&sidp->sid_rwlock)); 784*7c478bd9Sstevel@tonic-gate ASSERT(offset + len <= cmap->cmap_chunksz); 785*7c478bd9Sstevel@tonic-gate 786*7c478bd9Sstevel@tonic-gate /* 787*7c478bd9Sstevel@tonic-gate * Check if the chunk number is out of range and if so bail out 788*7c478bd9Sstevel@tonic-gate */ 789*7c478bd9Sstevel@tonic-gate if (chunk >= (cmap->cmap_bmsize * NBBY)) { 790*7c478bd9Sstevel@tonic-gate return (EINVAL); 791*7c478bd9Sstevel@tonic-gate } 792*7c478bd9Sstevel@tonic-gate 793*7c478bd9Sstevel@tonic-gate /* 794*7c478bd9Sstevel@tonic-gate * If the chunk is not a candidate for translation, then the chunk 795*7c478bd9Sstevel@tonic-gate * was not allocated when the snapshot was taken. Since it does 796*7c478bd9Sstevel@tonic-gate * not contain data associated with this snapshot, just return a 797*7c478bd9Sstevel@tonic-gate * zero buffer instead. 798*7c478bd9Sstevel@tonic-gate */ 799*7c478bd9Sstevel@tonic-gate if (isclr(cmap->cmap_candidate, chunk)) { 800*7c478bd9Sstevel@tonic-gate bzero(buffer, len); 801*7c478bd9Sstevel@tonic-gate return (0); 802*7c478bd9Sstevel@tonic-gate } 803*7c478bd9Sstevel@tonic-gate 804*7c478bd9Sstevel@tonic-gate /* 805*7c478bd9Sstevel@tonic-gate * if the chunk is a candidate for translation but a 806*7c478bd9Sstevel@tonic-gate * translation does not exist, then read through to the 807*7c478bd9Sstevel@tonic-gate * original file system. The rwlock is held until the read 808*7c478bd9Sstevel@tonic-gate * completes if it hasn't been translated to make sure the 809*7c478bd9Sstevel@tonic-gate * file system does not translate the block before we 810*7c478bd9Sstevel@tonic-gate * access it. If it has already been translated we don't 811*7c478bd9Sstevel@tonic-gate * need the lock, because the translation will never go away. 812*7c478bd9Sstevel@tonic-gate */ 813*7c478bd9Sstevel@tonic-gate rw_enter(&cmap->cmap_rwlock, RW_READER); 814*7c478bd9Sstevel@tonic-gate if (isclr(cmap->cmap_hastrans, chunk)) { 815*7c478bd9Sstevel@tonic-gate snapbuf = getrbuf(KM_SLEEP); 816*7c478bd9Sstevel@tonic-gate /* 817*7c478bd9Sstevel@tonic-gate * Reading into the buffer saves having to do a copy, 818*7c478bd9Sstevel@tonic-gate * but gets tricky if the request size is not a 819*7c478bd9Sstevel@tonic-gate * multiple of DEV_BSIZE. However, we are filling the 820*7c478bd9Sstevel@tonic-gate * buffer left to right, so future reads will write 821*7c478bd9Sstevel@tonic-gate * over any extra data we might have read. 822*7c478bd9Sstevel@tonic-gate */ 823*7c478bd9Sstevel@tonic-gate 824*7c478bd9Sstevel@tonic-gate partial = len % DEV_BSIZE; 825*7c478bd9Sstevel@tonic-gate 826*7c478bd9Sstevel@tonic-gate snapbuf->b_bcount = len; 827*7c478bd9Sstevel@tonic-gate snapbuf->b_lblkno = lbtodb(chunk * cmap->cmap_chunksz + offset); 828*7c478bd9Sstevel@tonic-gate snapbuf->b_un.b_addr = buffer; 829*7c478bd9Sstevel@tonic-gate 830*7c478bd9Sstevel@tonic-gate snapbuf->b_iodone = NULL; 831*7c478bd9Sstevel@tonic-gate snapbuf->b_proc = NULL; /* i.e. the kernel */ 832*7c478bd9Sstevel@tonic-gate snapbuf->b_flags = B_READ | B_BUSY; 833*7c478bd9Sstevel@tonic-gate snapbuf->b_edev = sidp->sid_fvp->v_vfsp->vfs_dev; 834*7c478bd9Sstevel@tonic-gate 835*7c478bd9Sstevel@tonic-gate if (partial) { 836*7c478bd9Sstevel@tonic-gate /* 837*7c478bd9Sstevel@tonic-gate * Partial block read in progress. 838*7c478bd9Sstevel@tonic-gate * This is bad as modules further down the line 839*7c478bd9Sstevel@tonic-gate * assume buf's are exact multiples of DEV_BSIZE 840*7c478bd9Sstevel@tonic-gate * and we end up with fewer, or zero, bytes read. 841*7c478bd9Sstevel@tonic-gate * To get round this we need to round up to the 842*7c478bd9Sstevel@tonic-gate * nearest full block read and then return only 843*7c478bd9Sstevel@tonic-gate * len bytes. 844*7c478bd9Sstevel@tonic-gate */ 845*7c478bd9Sstevel@tonic-gate newlen = (len - partial) + DEV_BSIZE; 846*7c478bd9Sstevel@tonic-gate newbuffer = kmem_alloc(newlen, KM_SLEEP); 847*7c478bd9Sstevel@tonic-gate 848*7c478bd9Sstevel@tonic-gate snapbuf->b_bcount = newlen; 849*7c478bd9Sstevel@tonic-gate snapbuf->b_un.b_addr = newbuffer; 850*7c478bd9Sstevel@tonic-gate } 851*7c478bd9Sstevel@tonic-gate 852*7c478bd9Sstevel@tonic-gate (void) bdev_strategy(snapbuf); 853*7c478bd9Sstevel@tonic-gate (void) biowait(snapbuf); 854*7c478bd9Sstevel@tonic-gate 855*7c478bd9Sstevel@tonic-gate error = geterror(snapbuf); 856*7c478bd9Sstevel@tonic-gate 857*7c478bd9Sstevel@tonic-gate if (partial) { 858*7c478bd9Sstevel@tonic-gate /* 859*7c478bd9Sstevel@tonic-gate * Partial block read. Now we need to bcopy the 860*7c478bd9Sstevel@tonic-gate * correct number of bytes back into the 861*7c478bd9Sstevel@tonic-gate * supplied buffer, and tidy up our temp 862*7c478bd9Sstevel@tonic-gate * buffer. 863*7c478bd9Sstevel@tonic-gate */ 864*7c478bd9Sstevel@tonic-gate bcopy(newbuffer, buffer, len); 865*7c478bd9Sstevel@tonic-gate kmem_free(newbuffer, newlen); 866*7c478bd9Sstevel@tonic-gate } 867*7c478bd9Sstevel@tonic-gate 868*7c478bd9Sstevel@tonic-gate freerbuf(snapbuf); 869*7c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 870*7c478bd9Sstevel@tonic-gate 871*7c478bd9Sstevel@tonic-gate return (error); 872*7c478bd9Sstevel@tonic-gate } 873*7c478bd9Sstevel@tonic-gate 874*7c478bd9Sstevel@tonic-gate /* 875*7c478bd9Sstevel@tonic-gate * finally, if the chunk is a candidate for translation and it 876*7c478bd9Sstevel@tonic-gate * has been translated, then we clone the chunk of the buffer 877*7c478bd9Sstevel@tonic-gate * that was copied aside by the file system. 878*7c478bd9Sstevel@tonic-gate * The cmap_rwlock does not need to be held after we know the 879*7c478bd9Sstevel@tonic-gate * data has already been copied. Once a chunk has been copied 880*7c478bd9Sstevel@tonic-gate * to the backing file, it is stable read only data. 881*7c478bd9Sstevel@tonic-gate */ 882*7c478bd9Sstevel@tonic-gate cmn = transtbl_get(cmap, chunk); 883*7c478bd9Sstevel@tonic-gate 884*7c478bd9Sstevel@tonic-gate /* check whether the data is in memory or in the backing file */ 885*7c478bd9Sstevel@tonic-gate if (cmn != NULL) { 886*7c478bd9Sstevel@tonic-gate ASSERT(cmn->cmn_buf); 887*7c478bd9Sstevel@tonic-gate /* already in memory */ 888*7c478bd9Sstevel@tonic-gate bcopy(cmn->cmn_buf + offset, buffer, len); 889*7c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 890*7c478bd9Sstevel@tonic-gate } else { 891*7c478bd9Sstevel@tonic-gate ssize_t resid = len; 892*7c478bd9Sstevel@tonic-gate int bf_index; 893*7c478bd9Sstevel@tonic-gate /* 894*7c478bd9Sstevel@tonic-gate * can cause deadlock with writer if we don't drop the 895*7c478bd9Sstevel@tonic-gate * cmap_rwlock before trying to get the backing store file 896*7c478bd9Sstevel@tonic-gate * vnode rwlock. 897*7c478bd9Sstevel@tonic-gate */ 898*7c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 899*7c478bd9Sstevel@tonic-gate 900*7c478bd9Sstevel@tonic-gate bf_index = chunk / cmap->cmap_chunksperbf; 901*7c478bd9Sstevel@tonic-gate 902*7c478bd9Sstevel@tonic-gate /* read buffer from backing file */ 903*7c478bd9Sstevel@tonic-gate error = vn_rdwr(UIO_READ, 904*7c478bd9Sstevel@tonic-gate (sidp->sid_cowinfo->cow_backfile_array)[bf_index], 905*7c478bd9Sstevel@tonic-gate buffer, len, ((chunk % cmap->cmap_chunksperbf) * 906*7c478bd9Sstevel@tonic-gate cmap->cmap_chunksz) + offset, UIO_SYSSPACE, 0, 907*7c478bd9Sstevel@tonic-gate RLIM64_INFINITY, kcred, &resid); 908*7c478bd9Sstevel@tonic-gate } 909*7c478bd9Sstevel@tonic-gate 910*7c478bd9Sstevel@tonic-gate return (error); 911*7c478bd9Sstevel@tonic-gate } 912*7c478bd9Sstevel@tonic-gate 913*7c478bd9Sstevel@tonic-gate /* 914*7c478bd9Sstevel@tonic-gate * snap_print() - snapshot driver print(9E) routine 915*7c478bd9Sstevel@tonic-gate * 916*7c478bd9Sstevel@tonic-gate * prints the device identification string. 917*7c478bd9Sstevel@tonic-gate */ 918*7c478bd9Sstevel@tonic-gate static int 919*7c478bd9Sstevel@tonic-gate snap_print(dev_t dev, char *str) 920*7c478bd9Sstevel@tonic-gate { 921*7c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp; 922*7c478bd9Sstevel@tonic-gate minor_t minor; 923*7c478bd9Sstevel@tonic-gate 924*7c478bd9Sstevel@tonic-gate minor = getminor(dev); 925*7c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 926*7c478bd9Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 927*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 928*7c478bd9Sstevel@tonic-gate "snap_print: could not find state for snapshot %d.", minor); 929*7c478bd9Sstevel@tonic-gate return (ENXIO); 930*7c478bd9Sstevel@tonic-gate } 931*7c478bd9Sstevel@tonic-gate 932*7c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "snap_print: snapshot %d: %s", minor, str); 933*7c478bd9Sstevel@tonic-gate 934*7c478bd9Sstevel@tonic-gate return (0); 935*7c478bd9Sstevel@tonic-gate } 936*7c478bd9Sstevel@tonic-gate 937*7c478bd9Sstevel@tonic-gate /* 938*7c478bd9Sstevel@tonic-gate * snap_prop_op() - snapshot driver prop_op(9E) routine 939*7c478bd9Sstevel@tonic-gate * 940*7c478bd9Sstevel@tonic-gate * get 32-bit and 64-bit values for size (character driver) and nblocks 941*7c478bd9Sstevel@tonic-gate * (block driver). 942*7c478bd9Sstevel@tonic-gate */ 943*7c478bd9Sstevel@tonic-gate static int 944*7c478bd9Sstevel@tonic-gate snap_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, 945*7c478bd9Sstevel@tonic-gate int flags, char *name, caddr_t valuep, int *lengthp) 946*7c478bd9Sstevel@tonic-gate { 947*7c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp; 948*7c478bd9Sstevel@tonic-gate int length, km_flags; 949*7c478bd9Sstevel@tonic-gate int nblocks, size; 950*7c478bd9Sstevel@tonic-gate uint64_t Size, Nblocks; 951*7c478bd9Sstevel@tonic-gate caddr_t buffer; 952*7c478bd9Sstevel@tonic-gate int minor; 953*7c478bd9Sstevel@tonic-gate dev_t mdev; 954*7c478bd9Sstevel@tonic-gate 955*7c478bd9Sstevel@tonic-gate minor = getminor(dev); 956*7c478bd9Sstevel@tonic-gate length = *lengthp; /* Get callers length */ 957*7c478bd9Sstevel@tonic-gate 958*7c478bd9Sstevel@tonic-gate /* if this is the control device just check for .conf properties */ 959*7c478bd9Sstevel@tonic-gate if (minor == SNAP_CTL_MINOR) 960*7c478bd9Sstevel@tonic-gate return (ddi_prop_op(dev, dip, prop_op, flags, name, 961*7c478bd9Sstevel@tonic-gate valuep, lengthp)); 962*7c478bd9Sstevel@tonic-gate /* check to see if there is a master device plumbed */ 963*7c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, minor); 964*7c478bd9Sstevel@tonic-gate if (sidpp == NULL || *sidpp == NULL) { 965*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 966*7c478bd9Sstevel@tonic-gate "snap_prop_op: could not find state for " 967*7c478bd9Sstevel@tonic-gate "snapshot %d.", minor); 968*7c478bd9Sstevel@tonic-gate return (DDI_PROP_NOT_FOUND); 969*7c478bd9Sstevel@tonic-gate } 970*7c478bd9Sstevel@tonic-gate 971*7c478bd9Sstevel@tonic-gate if (((*sidpp)->sid_fvp == NULL) || ((*sidpp)->sid_fvp->v_vfsp == NULL)) 972*7c478bd9Sstevel@tonic-gate return (ddi_prop_op(dev, dip, prop_op, flags, name, 973*7c478bd9Sstevel@tonic-gate valuep, lengthp)); 974*7c478bd9Sstevel@tonic-gate mdev = (*sidpp)->sid_fvp->v_vfsp->vfs_dev; 975*7c478bd9Sstevel@tonic-gate 976*7c478bd9Sstevel@tonic-gate /* get size information from the master device. */ 977*7c478bd9Sstevel@tonic-gate 978*7c478bd9Sstevel@tonic-gate if (strcmp(name, "nblocks") == 0) { 979*7c478bd9Sstevel@tonic-gate nblocks = bdev_size(mdev); 980*7c478bd9Sstevel@tonic-gate *lengthp = sizeof (nblocks); /* Set callers length */ 981*7c478bd9Sstevel@tonic-gate } else if (strcmp(name, "Nblocks") == 0) { 982*7c478bd9Sstevel@tonic-gate Nblocks = bdev_Size(mdev); 983*7c478bd9Sstevel@tonic-gate *lengthp = sizeof (Nblocks); /* Set callers length */ 984*7c478bd9Sstevel@tonic-gate } else if (strcmp(name, "size") == 0) { 985*7c478bd9Sstevel@tonic-gate size = cdev_size(mdev); 986*7c478bd9Sstevel@tonic-gate *lengthp = sizeof (size); /* Set callers length */ 987*7c478bd9Sstevel@tonic-gate } else if (strcmp(name, "Size") == 0) { 988*7c478bd9Sstevel@tonic-gate Size = cdev_Size(mdev); 989*7c478bd9Sstevel@tonic-gate *lengthp = sizeof (Size); /* Set callers length */ 990*7c478bd9Sstevel@tonic-gate } else { /* not for us */ 991*7c478bd9Sstevel@tonic-gate return (ddi_prop_op(dev, dip, prop_op, flags, name, 992*7c478bd9Sstevel@tonic-gate valuep, lengthp)); 993*7c478bd9Sstevel@tonic-gate } 994*7c478bd9Sstevel@tonic-gate 995*7c478bd9Sstevel@tonic-gate /* 996*7c478bd9Sstevel@tonic-gate * If length only request, just return the length. 997*7c478bd9Sstevel@tonic-gate */ 998*7c478bd9Sstevel@tonic-gate if (prop_op == PROP_LEN) { 999*7c478bd9Sstevel@tonic-gate return (DDI_PROP_SUCCESS); 1000*7c478bd9Sstevel@tonic-gate } 1001*7c478bd9Sstevel@tonic-gate 1002*7c478bd9Sstevel@tonic-gate /* 1003*7c478bd9Sstevel@tonic-gate * Allocate buffer, if required. Either way, set `buffer' variable. 1004*7c478bd9Sstevel@tonic-gate */ 1005*7c478bd9Sstevel@tonic-gate switch (prop_op) { 1006*7c478bd9Sstevel@tonic-gate case PROP_LEN_AND_VAL_ALLOC: 1007*7c478bd9Sstevel@tonic-gate 1008*7c478bd9Sstevel@tonic-gate km_flags = KM_NOSLEEP; 1009*7c478bd9Sstevel@tonic-gate 1010*7c478bd9Sstevel@tonic-gate if (flags & DDI_PROP_CANSLEEP) 1011*7c478bd9Sstevel@tonic-gate km_flags = KM_SLEEP; 1012*7c478bd9Sstevel@tonic-gate 1013*7c478bd9Sstevel@tonic-gate buffer = kmem_alloc(*lengthp, km_flags); 1014*7c478bd9Sstevel@tonic-gate if (buffer == NULL) { 1015*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_get_prop: no mem for " 1016*7c478bd9Sstevel@tonic-gate "property %s.", name); 1017*7c478bd9Sstevel@tonic-gate return (DDI_PROP_NO_MEMORY); 1018*7c478bd9Sstevel@tonic-gate } 1019*7c478bd9Sstevel@tonic-gate *(caddr_t *)valuep = buffer; /* Set callers buf ptr */ 1020*7c478bd9Sstevel@tonic-gate break; 1021*7c478bd9Sstevel@tonic-gate 1022*7c478bd9Sstevel@tonic-gate case PROP_LEN_AND_VAL_BUF: 1023*7c478bd9Sstevel@tonic-gate 1024*7c478bd9Sstevel@tonic-gate if (*lengthp > length) 1025*7c478bd9Sstevel@tonic-gate return (DDI_PROP_BUF_TOO_SMALL); 1026*7c478bd9Sstevel@tonic-gate 1027*7c478bd9Sstevel@tonic-gate buffer = valuep; /* get callers buf ptr */ 1028*7c478bd9Sstevel@tonic-gate break; 1029*7c478bd9Sstevel@tonic-gate } 1030*7c478bd9Sstevel@tonic-gate 1031*7c478bd9Sstevel@tonic-gate if (strcmp(name, "nblocks") == 0) { 1032*7c478bd9Sstevel@tonic-gate *((uint_t *)buffer) = nblocks; 1033*7c478bd9Sstevel@tonic-gate } else if (strcmp(name, "Nblocks") == 0) { 1034*7c478bd9Sstevel@tonic-gate *((uint64_t *)buffer) = Nblocks; 1035*7c478bd9Sstevel@tonic-gate } else if (strcmp(name, "size") == 0) { 1036*7c478bd9Sstevel@tonic-gate *((uint_t *)buffer) = size; 1037*7c478bd9Sstevel@tonic-gate } else if (strcmp(name, "Size") == 0) { 1038*7c478bd9Sstevel@tonic-gate *((uint64_t *)buffer) = Size; 1039*7c478bd9Sstevel@tonic-gate } 1040*7c478bd9Sstevel@tonic-gate 1041*7c478bd9Sstevel@tonic-gate return (DDI_PROP_SUCCESS); 1042*7c478bd9Sstevel@tonic-gate } 1043*7c478bd9Sstevel@tonic-gate 1044*7c478bd9Sstevel@tonic-gate /* 1045*7c478bd9Sstevel@tonic-gate * snap_ioctl() - snapshot driver ioctl(9E) routine 1046*7c478bd9Sstevel@tonic-gate * 1047*7c478bd9Sstevel@tonic-gate * only applies to the control device. The control device accepts two 1048*7c478bd9Sstevel@tonic-gate * ioctl requests: create a snapshot or delete a snapshot. In either 1049*7c478bd9Sstevel@tonic-gate * case, the vnode for the requested file system is extracted, and the 1050*7c478bd9Sstevel@tonic-gate * request is passed on to the file system via the same ioctl. The file 1051*7c478bd9Sstevel@tonic-gate * system is responsible for doing the things necessary for creating or 1052*7c478bd9Sstevel@tonic-gate * destroying a snapshot, including any file system specific operations 1053*7c478bd9Sstevel@tonic-gate * that must be performed as well as setting up and deleting the snapshot 1054*7c478bd9Sstevel@tonic-gate * state through the fssnap interfaces. 1055*7c478bd9Sstevel@tonic-gate */ 1056*7c478bd9Sstevel@tonic-gate static int 1057*7c478bd9Sstevel@tonic-gate snap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 1058*7c478bd9Sstevel@tonic-gate int *rvalp) 1059*7c478bd9Sstevel@tonic-gate { 1060*7c478bd9Sstevel@tonic-gate minor_t minor; 1061*7c478bd9Sstevel@tonic-gate int error = 0; 1062*7c478bd9Sstevel@tonic-gate 1063*7c478bd9Sstevel@tonic-gate minor = getminor(dev); 1064*7c478bd9Sstevel@tonic-gate 1065*7c478bd9Sstevel@tonic-gate if (minor != SNAP_CTL_MINOR) { 1066*7c478bd9Sstevel@tonic-gate return (EINVAL); 1067*7c478bd9Sstevel@tonic-gate } 1068*7c478bd9Sstevel@tonic-gate 1069*7c478bd9Sstevel@tonic-gate switch (cmd) { 1070*7c478bd9Sstevel@tonic-gate case _FIOSNAPSHOTCREATE: 1071*7c478bd9Sstevel@tonic-gate { 1072*7c478bd9Sstevel@tonic-gate struct fiosnapcreate fc; 1073*7c478bd9Sstevel@tonic-gate struct file *fp; 1074*7c478bd9Sstevel@tonic-gate struct vnode *vp; 1075*7c478bd9Sstevel@tonic-gate 1076*7c478bd9Sstevel@tonic-gate if (ddi_copyin((void *)arg, &fc, sizeof (fc), mode)) 1077*7c478bd9Sstevel@tonic-gate return (EFAULT); 1078*7c478bd9Sstevel@tonic-gate 1079*7c478bd9Sstevel@tonic-gate /* get vnode for file system mount point */ 1080*7c478bd9Sstevel@tonic-gate if ((fp = getf(fc.rootfiledesc)) == NULL) 1081*7c478bd9Sstevel@tonic-gate return (EBADF); 1082*7c478bd9Sstevel@tonic-gate 1083*7c478bd9Sstevel@tonic-gate ASSERT(fp->f_vnode); 1084*7c478bd9Sstevel@tonic-gate vp = fp->f_vnode; 1085*7c478bd9Sstevel@tonic-gate VN_HOLD(vp); 1086*7c478bd9Sstevel@tonic-gate releasef(fc.rootfiledesc); 1087*7c478bd9Sstevel@tonic-gate 1088*7c478bd9Sstevel@tonic-gate /* pass ioctl request to file system */ 1089*7c478bd9Sstevel@tonic-gate error = VOP_IOCTL(vp, cmd, arg, 0, credp, rvalp); 1090*7c478bd9Sstevel@tonic-gate VN_RELE(vp); 1091*7c478bd9Sstevel@tonic-gate break; 1092*7c478bd9Sstevel@tonic-gate } 1093*7c478bd9Sstevel@tonic-gate case _FIOSNAPSHOTCREATE_MULTI: 1094*7c478bd9Sstevel@tonic-gate { 1095*7c478bd9Sstevel@tonic-gate struct fiosnapcreate_multi fc; 1096*7c478bd9Sstevel@tonic-gate struct file *fp; 1097*7c478bd9Sstevel@tonic-gate struct vnode *vp; 1098*7c478bd9Sstevel@tonic-gate 1099*7c478bd9Sstevel@tonic-gate if (ddi_copyin((void *)arg, &fc, sizeof (fc), mode)) 1100*7c478bd9Sstevel@tonic-gate return (EFAULT); 1101*7c478bd9Sstevel@tonic-gate 1102*7c478bd9Sstevel@tonic-gate /* get vnode for file system mount point */ 1103*7c478bd9Sstevel@tonic-gate if ((fp = getf(fc.rootfiledesc)) == NULL) 1104*7c478bd9Sstevel@tonic-gate return (EBADF); 1105*7c478bd9Sstevel@tonic-gate 1106*7c478bd9Sstevel@tonic-gate ASSERT(fp->f_vnode); 1107*7c478bd9Sstevel@tonic-gate vp = fp->f_vnode; 1108*7c478bd9Sstevel@tonic-gate VN_HOLD(vp); 1109*7c478bd9Sstevel@tonic-gate releasef(fc.rootfiledesc); 1110*7c478bd9Sstevel@tonic-gate 1111*7c478bd9Sstevel@tonic-gate /* pass ioctl request to file system */ 1112*7c478bd9Sstevel@tonic-gate error = VOP_IOCTL(vp, cmd, arg, 0, credp, rvalp); 1113*7c478bd9Sstevel@tonic-gate VN_RELE(vp); 1114*7c478bd9Sstevel@tonic-gate break; 1115*7c478bd9Sstevel@tonic-gate } 1116*7c478bd9Sstevel@tonic-gate case _FIOSNAPSHOTDELETE: 1117*7c478bd9Sstevel@tonic-gate { 1118*7c478bd9Sstevel@tonic-gate major_t major; 1119*7c478bd9Sstevel@tonic-gate struct fiosnapdelete fc; 1120*7c478bd9Sstevel@tonic-gate snapshot_id_t *sidp = NULL; 1121*7c478bd9Sstevel@tonic-gate snapshot_id_t *sidnextp = NULL; 1122*7c478bd9Sstevel@tonic-gate struct file *fp = NULL; 1123*7c478bd9Sstevel@tonic-gate struct vnode *vp = NULL; 1124*7c478bd9Sstevel@tonic-gate struct vfs *vfsp = NULL; 1125*7c478bd9Sstevel@tonic-gate vfsops_t *vfsops = EIO_vfsops; 1126*7c478bd9Sstevel@tonic-gate 1127*7c478bd9Sstevel@tonic-gate if (ddi_copyin((void *)arg, &fc, sizeof (fc), mode)) 1128*7c478bd9Sstevel@tonic-gate return (EFAULT); 1129*7c478bd9Sstevel@tonic-gate 1130*7c478bd9Sstevel@tonic-gate /* get vnode for file system mount point */ 1131*7c478bd9Sstevel@tonic-gate if ((fp = getf(fc.rootfiledesc)) == NULL) 1132*7c478bd9Sstevel@tonic-gate return (EBADF); 1133*7c478bd9Sstevel@tonic-gate 1134*7c478bd9Sstevel@tonic-gate ASSERT(fp->f_vnode); 1135*7c478bd9Sstevel@tonic-gate vp = fp->f_vnode; 1136*7c478bd9Sstevel@tonic-gate VN_HOLD(vp); 1137*7c478bd9Sstevel@tonic-gate releasef(fc.rootfiledesc); 1138*7c478bd9Sstevel@tonic-gate /* 1139*7c478bd9Sstevel@tonic-gate * Test for two formats of delete and set correct minor/vp: 1140*7c478bd9Sstevel@tonic-gate * pseudo device: 1141*7c478bd9Sstevel@tonic-gate * fssnap -d [/dev/fssnap/x] 1142*7c478bd9Sstevel@tonic-gate * or 1143*7c478bd9Sstevel@tonic-gate * mount point: 1144*7c478bd9Sstevel@tonic-gate * fssnap -d [/mntpt] 1145*7c478bd9Sstevel@tonic-gate * Note that minor is verified to be equal to SNAP_CTL_MINOR 1146*7c478bd9Sstevel@tonic-gate * at this point which is an invalid minor number. 1147*7c478bd9Sstevel@tonic-gate */ 1148*7c478bd9Sstevel@tonic-gate ASSERT(fssnap_dip != NULL); 1149*7c478bd9Sstevel@tonic-gate major = ddi_driver_major(fssnap_dip); 1150*7c478bd9Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 1151*7c478bd9Sstevel@tonic-gate for (sidp = snapshot; sidp != NULL; sidp = sidnextp) { 1152*7c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 1153*7c478bd9Sstevel@tonic-gate sidnextp = sidp->sid_next; 1154*7c478bd9Sstevel@tonic-gate /* pseudo device: */ 1155*7c478bd9Sstevel@tonic-gate if (major == getmajor(vp->v_rdev)) { 1156*7c478bd9Sstevel@tonic-gate minor = getminor(vp->v_rdev); 1157*7c478bd9Sstevel@tonic-gate if (sidp->sid_snapnumber == (uint_t)minor && 1158*7c478bd9Sstevel@tonic-gate sidp->sid_fvp) { 1159*7c478bd9Sstevel@tonic-gate VN_RELE(vp); 1160*7c478bd9Sstevel@tonic-gate vp = sidp->sid_fvp; 1161*7c478bd9Sstevel@tonic-gate VN_HOLD(vp); 1162*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1163*7c478bd9Sstevel@tonic-gate break; 1164*7c478bd9Sstevel@tonic-gate } 1165*7c478bd9Sstevel@tonic-gate /* Mount point: */ 1166*7c478bd9Sstevel@tonic-gate } else { 1167*7c478bd9Sstevel@tonic-gate if (sidp->sid_fvp == vp) { 1168*7c478bd9Sstevel@tonic-gate minor = sidp->sid_snapnumber; 1169*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1170*7c478bd9Sstevel@tonic-gate break; 1171*7c478bd9Sstevel@tonic-gate } 1172*7c478bd9Sstevel@tonic-gate } 1173*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1174*7c478bd9Sstevel@tonic-gate } 1175*7c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 1176*7c478bd9Sstevel@tonic-gate /* Verify minor got set correctly above */ 1177*7c478bd9Sstevel@tonic-gate if (minor == SNAP_CTL_MINOR) { 1178*7c478bd9Sstevel@tonic-gate VN_RELE(vp); 1179*7c478bd9Sstevel@tonic-gate return (EINVAL); 1180*7c478bd9Sstevel@tonic-gate } 1181*7c478bd9Sstevel@tonic-gate dev = makedevice(major, minor); 1182*7c478bd9Sstevel@tonic-gate /* 1183*7c478bd9Sstevel@tonic-gate * Create dummy vfs entry 1184*7c478bd9Sstevel@tonic-gate * to use as a locking semaphore across the IOCTL 1185*7c478bd9Sstevel@tonic-gate * for mount in progress cases... 1186*7c478bd9Sstevel@tonic-gate */ 1187*7c478bd9Sstevel@tonic-gate vfsp = kmem_alloc(sizeof (vfs_t), KM_SLEEP); 1188*7c478bd9Sstevel@tonic-gate VFS_INIT(vfsp, vfsops, NULL); 1189*7c478bd9Sstevel@tonic-gate vfs_addmip(dev, vfsp); 1190*7c478bd9Sstevel@tonic-gate if ((vfs_devmounting(dev, vfsp)) || 1191*7c478bd9Sstevel@tonic-gate (vfs_devismounted(dev))) { 1192*7c478bd9Sstevel@tonic-gate vfs_delmip(vfsp); 1193*7c478bd9Sstevel@tonic-gate kmem_free(vfsp, sizeof (struct vfs)); 1194*7c478bd9Sstevel@tonic-gate VN_RELE(vp); 1195*7c478bd9Sstevel@tonic-gate return (EBUSY); 1196*7c478bd9Sstevel@tonic-gate } 1197*7c478bd9Sstevel@tonic-gate /* 1198*7c478bd9Sstevel@tonic-gate * Nobody mounted but do not release mount in progress lock 1199*7c478bd9Sstevel@tonic-gate * until IOCTL complete to prohibit a mount sneaking 1200*7c478bd9Sstevel@tonic-gate * in 1201*7c478bd9Sstevel@tonic-gate */ 1202*7c478bd9Sstevel@tonic-gate error = VOP_IOCTL(vp, cmd, arg, 0, credp, rvalp); 1203*7c478bd9Sstevel@tonic-gate vfs_delmip(vfsp); 1204*7c478bd9Sstevel@tonic-gate kmem_free(vfsp, sizeof (struct vfs)); 1205*7c478bd9Sstevel@tonic-gate VN_RELE(vp); 1206*7c478bd9Sstevel@tonic-gate break; 1207*7c478bd9Sstevel@tonic-gate } 1208*7c478bd9Sstevel@tonic-gate default: 1209*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_ioctl: Invalid ioctl cmd %d, minor %d.", 1210*7c478bd9Sstevel@tonic-gate cmd, minor); 1211*7c478bd9Sstevel@tonic-gate return (EINVAL); 1212*7c478bd9Sstevel@tonic-gate } 1213*7c478bd9Sstevel@tonic-gate 1214*7c478bd9Sstevel@tonic-gate return (error); 1215*7c478bd9Sstevel@tonic-gate } 1216*7c478bd9Sstevel@tonic-gate 1217*7c478bd9Sstevel@tonic-gate 1218*7c478bd9Sstevel@tonic-gate /* ************************************************************************ */ 1219*7c478bd9Sstevel@tonic-gate 1220*7c478bd9Sstevel@tonic-gate /* 1221*7c478bd9Sstevel@tonic-gate * Translation Table Routines 1222*7c478bd9Sstevel@tonic-gate * 1223*7c478bd9Sstevel@tonic-gate * These support routines implement a simple doubly linked list 1224*7c478bd9Sstevel@tonic-gate * to keep track of chunks that are currently in memory. The maximum 1225*7c478bd9Sstevel@tonic-gate * size of the list is determined by the fssnap_max_mem_chunks variable. 1226*7c478bd9Sstevel@tonic-gate * The cmap_rwlock is used to protect the linkage of the list. 1227*7c478bd9Sstevel@tonic-gate */ 1228*7c478bd9Sstevel@tonic-gate 1229*7c478bd9Sstevel@tonic-gate /* 1230*7c478bd9Sstevel@tonic-gate * transtbl_add() - add a node to the translation table 1231*7c478bd9Sstevel@tonic-gate * 1232*7c478bd9Sstevel@tonic-gate * allocates a new node and points it at the buffer passed in. The node 1233*7c478bd9Sstevel@tonic-gate * is added to the beginning of the doubly linked list and the head of 1234*7c478bd9Sstevel@tonic-gate * the list is moved. The cmap_rwlock must be held as a writer through 1235*7c478bd9Sstevel@tonic-gate * this operation. 1236*7c478bd9Sstevel@tonic-gate */ 1237*7c478bd9Sstevel@tonic-gate static cow_map_node_t * 1238*7c478bd9Sstevel@tonic-gate transtbl_add(cow_map_t *cmap, chunknumber_t chunk, caddr_t buf) 1239*7c478bd9Sstevel@tonic-gate { 1240*7c478bd9Sstevel@tonic-gate cow_map_node_t *cmnode; 1241*7c478bd9Sstevel@tonic-gate 1242*7c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&cmap->cmap_rwlock)); 1243*7c478bd9Sstevel@tonic-gate 1244*7c478bd9Sstevel@tonic-gate cmnode = kmem_alloc(sizeof (cow_map_node_t), KM_SLEEP); 1245*7c478bd9Sstevel@tonic-gate 1246*7c478bd9Sstevel@tonic-gate /* 1247*7c478bd9Sstevel@tonic-gate * insert new translations at the beginning so cmn_table is always 1248*7c478bd9Sstevel@tonic-gate * the first node. 1249*7c478bd9Sstevel@tonic-gate */ 1250*7c478bd9Sstevel@tonic-gate cmnode->cmn_chunk = chunk; 1251*7c478bd9Sstevel@tonic-gate cmnode->cmn_buf = buf; 1252*7c478bd9Sstevel@tonic-gate cmnode->cmn_prev = NULL; 1253*7c478bd9Sstevel@tonic-gate cmnode->cmn_next = cmap->cmap_table; 1254*7c478bd9Sstevel@tonic-gate if (cmnode->cmn_next) 1255*7c478bd9Sstevel@tonic-gate cmnode->cmn_next->cmn_prev = cmnode; 1256*7c478bd9Sstevel@tonic-gate cmap->cmap_table = cmnode; 1257*7c478bd9Sstevel@tonic-gate 1258*7c478bd9Sstevel@tonic-gate return (cmnode); 1259*7c478bd9Sstevel@tonic-gate } 1260*7c478bd9Sstevel@tonic-gate 1261*7c478bd9Sstevel@tonic-gate /* 1262*7c478bd9Sstevel@tonic-gate * transtbl_get() - look up a node in the translation table 1263*7c478bd9Sstevel@tonic-gate * 1264*7c478bd9Sstevel@tonic-gate * called by the snapshot driver to find data that has been translated. 1265*7c478bd9Sstevel@tonic-gate * The lookup is done by the chunk number, and the node is returned. 1266*7c478bd9Sstevel@tonic-gate * If the node was not found, NULL is returned. 1267*7c478bd9Sstevel@tonic-gate */ 1268*7c478bd9Sstevel@tonic-gate static cow_map_node_t * 1269*7c478bd9Sstevel@tonic-gate transtbl_get(cow_map_t *cmap, chunknumber_t chunk) 1270*7c478bd9Sstevel@tonic-gate { 1271*7c478bd9Sstevel@tonic-gate cow_map_node_t *cmn; 1272*7c478bd9Sstevel@tonic-gate 1273*7c478bd9Sstevel@tonic-gate ASSERT(RW_READ_HELD(&cmap->cmap_rwlock)); 1274*7c478bd9Sstevel@tonic-gate ASSERT(cmap); 1275*7c478bd9Sstevel@tonic-gate 1276*7c478bd9Sstevel@tonic-gate /* search the translation table */ 1277*7c478bd9Sstevel@tonic-gate for (cmn = cmap->cmap_table; cmn != NULL; cmn = cmn->cmn_next) { 1278*7c478bd9Sstevel@tonic-gate if (cmn->cmn_chunk == chunk) 1279*7c478bd9Sstevel@tonic-gate return (cmn); 1280*7c478bd9Sstevel@tonic-gate } 1281*7c478bd9Sstevel@tonic-gate 1282*7c478bd9Sstevel@tonic-gate /* not found */ 1283*7c478bd9Sstevel@tonic-gate return (NULL); 1284*7c478bd9Sstevel@tonic-gate } 1285*7c478bd9Sstevel@tonic-gate 1286*7c478bd9Sstevel@tonic-gate /* 1287*7c478bd9Sstevel@tonic-gate * transtbl_delete() - delete a node from the translation table 1288*7c478bd9Sstevel@tonic-gate * 1289*7c478bd9Sstevel@tonic-gate * called when a node's data has been written out to disk. The 1290*7c478bd9Sstevel@tonic-gate * cmap_rwlock must be held as a writer for this operation. If the node 1291*7c478bd9Sstevel@tonic-gate * being deleted is the head of the list, then the head is moved to the 1292*7c478bd9Sstevel@tonic-gate * next node. Both the node's data and the node itself are freed. 1293*7c478bd9Sstevel@tonic-gate */ 1294*7c478bd9Sstevel@tonic-gate static void 1295*7c478bd9Sstevel@tonic-gate transtbl_delete(cow_map_t *cmap, cow_map_node_t *cmn) 1296*7c478bd9Sstevel@tonic-gate { 1297*7c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&cmap->cmap_rwlock)); 1298*7c478bd9Sstevel@tonic-gate ASSERT(cmn); 1299*7c478bd9Sstevel@tonic-gate ASSERT(cmap->cmap_table); 1300*7c478bd9Sstevel@tonic-gate 1301*7c478bd9Sstevel@tonic-gate /* if the head of the list is being deleted, then move the head up */ 1302*7c478bd9Sstevel@tonic-gate if (cmap->cmap_table == cmn) { 1303*7c478bd9Sstevel@tonic-gate ASSERT(cmn->cmn_prev == NULL); 1304*7c478bd9Sstevel@tonic-gate cmap->cmap_table = cmn->cmn_next; 1305*7c478bd9Sstevel@tonic-gate } 1306*7c478bd9Sstevel@tonic-gate 1307*7c478bd9Sstevel@tonic-gate 1308*7c478bd9Sstevel@tonic-gate /* make previous node's next pointer skip over current node */ 1309*7c478bd9Sstevel@tonic-gate if (cmn->cmn_prev != NULL) { 1310*7c478bd9Sstevel@tonic-gate ASSERT(cmn->cmn_prev->cmn_next == cmn); 1311*7c478bd9Sstevel@tonic-gate cmn->cmn_prev->cmn_next = cmn->cmn_next; 1312*7c478bd9Sstevel@tonic-gate } 1313*7c478bd9Sstevel@tonic-gate 1314*7c478bd9Sstevel@tonic-gate /* make next node's previous pointer skip over current node */ 1315*7c478bd9Sstevel@tonic-gate if (cmn->cmn_next != NULL) { 1316*7c478bd9Sstevel@tonic-gate ASSERT(cmn->cmn_next->cmn_prev == cmn); 1317*7c478bd9Sstevel@tonic-gate cmn->cmn_next->cmn_prev = cmn->cmn_prev; 1318*7c478bd9Sstevel@tonic-gate } 1319*7c478bd9Sstevel@tonic-gate 1320*7c478bd9Sstevel@tonic-gate /* free the data and the node */ 1321*7c478bd9Sstevel@tonic-gate ASSERT(cmn->cmn_buf); 1322*7c478bd9Sstevel@tonic-gate kmem_free(cmn->cmn_buf, cmap->cmap_chunksz); 1323*7c478bd9Sstevel@tonic-gate kmem_free(cmn, sizeof (cow_map_node_t)); 1324*7c478bd9Sstevel@tonic-gate } 1325*7c478bd9Sstevel@tonic-gate 1326*7c478bd9Sstevel@tonic-gate /* 1327*7c478bd9Sstevel@tonic-gate * transtbl_free() - free the entire translation table 1328*7c478bd9Sstevel@tonic-gate * 1329*7c478bd9Sstevel@tonic-gate * called when the snapshot is deleted. This frees all of the nodes in 1330*7c478bd9Sstevel@tonic-gate * the translation table (but not the bitmaps). 1331*7c478bd9Sstevel@tonic-gate */ 1332*7c478bd9Sstevel@tonic-gate static void 1333*7c478bd9Sstevel@tonic-gate transtbl_free(cow_map_t *cmap) 1334*7c478bd9Sstevel@tonic-gate { 1335*7c478bd9Sstevel@tonic-gate cow_map_node_t *curnode; 1336*7c478bd9Sstevel@tonic-gate cow_map_node_t *tempnode; 1337*7c478bd9Sstevel@tonic-gate 1338*7c478bd9Sstevel@tonic-gate for (curnode = cmap->cmap_table; curnode != NULL; curnode = tempnode) { 1339*7c478bd9Sstevel@tonic-gate tempnode = curnode->cmn_next; 1340*7c478bd9Sstevel@tonic-gate 1341*7c478bd9Sstevel@tonic-gate kmem_free(curnode->cmn_buf, cmap->cmap_chunksz); 1342*7c478bd9Sstevel@tonic-gate kmem_free(curnode, sizeof (cow_map_node_t)); 1343*7c478bd9Sstevel@tonic-gate } 1344*7c478bd9Sstevel@tonic-gate } 1345*7c478bd9Sstevel@tonic-gate 1346*7c478bd9Sstevel@tonic-gate 1347*7c478bd9Sstevel@tonic-gate /* ************************************************************************ */ 1348*7c478bd9Sstevel@tonic-gate 1349*7c478bd9Sstevel@tonic-gate /* 1350*7c478bd9Sstevel@tonic-gate * Interface Implementation Routines 1351*7c478bd9Sstevel@tonic-gate * 1352*7c478bd9Sstevel@tonic-gate * The following functions implement snapshot interface routines that are 1353*7c478bd9Sstevel@tonic-gate * called by the file system to create, delete, and use a snapshot. The 1354*7c478bd9Sstevel@tonic-gate * interfaces are defined in fssnap_if.c and are filled in by this driver 1355*7c478bd9Sstevel@tonic-gate * when it is loaded. This technique allows the file system to depend on 1356*7c478bd9Sstevel@tonic-gate * the interface module without having to load the full implementation and 1357*7c478bd9Sstevel@tonic-gate * snapshot device drivers. 1358*7c478bd9Sstevel@tonic-gate */ 1359*7c478bd9Sstevel@tonic-gate 1360*7c478bd9Sstevel@tonic-gate /* 1361*7c478bd9Sstevel@tonic-gate * fssnap_strategy_impl() - strategy routine called by the file system 1362*7c478bd9Sstevel@tonic-gate * 1363*7c478bd9Sstevel@tonic-gate * called by the file system to handle copy-on-write when necessary. All 1364*7c478bd9Sstevel@tonic-gate * reads and writes that the file system performs should go through this 1365*7c478bd9Sstevel@tonic-gate * function. If the file system calls the underlying device's strategy 1366*7c478bd9Sstevel@tonic-gate * routine without going through fssnap_strategy() (eg. by calling 1367*7c478bd9Sstevel@tonic-gate * bdev_strategy()), the snapshot may not be consistent. 1368*7c478bd9Sstevel@tonic-gate * 1369*7c478bd9Sstevel@tonic-gate * This function starts by doing significant sanity checking to insure 1370*7c478bd9Sstevel@tonic-gate * the snapshot was not deleted out from under it or deleted and then 1371*7c478bd9Sstevel@tonic-gate * recreated. To do this, it checks the actual pointer passed into it 1372*7c478bd9Sstevel@tonic-gate * (ie. the handle held by the file system). NOTE that the parameter is 1373*7c478bd9Sstevel@tonic-gate * a POINTER TO A POINTER to the snapshot id. Once the snapshot id is 1374*7c478bd9Sstevel@tonic-gate * locked, it knows things are ok and that this snapshot is really for 1375*7c478bd9Sstevel@tonic-gate * this file system. 1376*7c478bd9Sstevel@tonic-gate * 1377*7c478bd9Sstevel@tonic-gate * If the request is a write, fssnap_translate() is called to determine 1378*7c478bd9Sstevel@tonic-gate * whether a copy-on-write is required. If it is a read, the read is 1379*7c478bd9Sstevel@tonic-gate * simply passed on to the underlying device. 1380*7c478bd9Sstevel@tonic-gate */ 1381*7c478bd9Sstevel@tonic-gate static void 1382*7c478bd9Sstevel@tonic-gate fssnap_strategy_impl(void *snapshot_id, buf_t *bp) 1383*7c478bd9Sstevel@tonic-gate { 1384*7c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp; 1385*7c478bd9Sstevel@tonic-gate struct snapshot_id *sidp; 1386*7c478bd9Sstevel@tonic-gate int error; 1387*7c478bd9Sstevel@tonic-gate 1388*7c478bd9Sstevel@tonic-gate /* read requests are always passed through */ 1389*7c478bd9Sstevel@tonic-gate if (bp->b_flags & B_READ) { 1390*7c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 1391*7c478bd9Sstevel@tonic-gate return; 1392*7c478bd9Sstevel@tonic-gate } 1393*7c478bd9Sstevel@tonic-gate 1394*7c478bd9Sstevel@tonic-gate /* 1395*7c478bd9Sstevel@tonic-gate * Because we were not able to take the snapshot read lock BEFORE 1396*7c478bd9Sstevel@tonic-gate * checking for a snapshot back in the file system, things may have 1397*7c478bd9Sstevel@tonic-gate * drastically changed out from under us. For instance, the snapshot 1398*7c478bd9Sstevel@tonic-gate * may have been deleted, deleted and recreated, or worse yet, deleted 1399*7c478bd9Sstevel@tonic-gate * for this file system but now the snapshot number is in use by another 1400*7c478bd9Sstevel@tonic-gate * file system. 1401*7c478bd9Sstevel@tonic-gate * 1402*7c478bd9Sstevel@tonic-gate * Having a pointer to the file system's snapshot id pointer allows us 1403*7c478bd9Sstevel@tonic-gate * to sanity check most of this, though it assumes the file system is 1404*7c478bd9Sstevel@tonic-gate * keeping track of a pointer to the snapshot_id somewhere. 1405*7c478bd9Sstevel@tonic-gate */ 1406*7c478bd9Sstevel@tonic-gate sidpp = (struct snapshot_id **)snapshot_id; 1407*7c478bd9Sstevel@tonic-gate sidp = *sidpp; 1408*7c478bd9Sstevel@tonic-gate 1409*7c478bd9Sstevel@tonic-gate /* 1410*7c478bd9Sstevel@tonic-gate * if this file system's snapshot was disabled, just pass the 1411*7c478bd9Sstevel@tonic-gate * request through. 1412*7c478bd9Sstevel@tonic-gate */ 1413*7c478bd9Sstevel@tonic-gate if (sidp == NULL) { 1414*7c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 1415*7c478bd9Sstevel@tonic-gate return; 1416*7c478bd9Sstevel@tonic-gate } 1417*7c478bd9Sstevel@tonic-gate 1418*7c478bd9Sstevel@tonic-gate /* 1419*7c478bd9Sstevel@tonic-gate * Once we have the reader lock the snapshot will not magically go 1420*7c478bd9Sstevel@tonic-gate * away. But things may have changed on us before this so double check. 1421*7c478bd9Sstevel@tonic-gate */ 1422*7c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 1423*7c478bd9Sstevel@tonic-gate 1424*7c478bd9Sstevel@tonic-gate /* 1425*7c478bd9Sstevel@tonic-gate * if an error was founds somewhere the DELETE flag will be 1426*7c478bd9Sstevel@tonic-gate * set to indicate the snapshot should be deleted and no new 1427*7c478bd9Sstevel@tonic-gate * translations should occur. 1428*7c478bd9Sstevel@tonic-gate */ 1429*7c478bd9Sstevel@tonic-gate if (sidp->sid_flags & SID_DELETE) { 1430*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1431*7c478bd9Sstevel@tonic-gate (void) fssnap_delete_impl(sidpp); 1432*7c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 1433*7c478bd9Sstevel@tonic-gate return; 1434*7c478bd9Sstevel@tonic-gate } 1435*7c478bd9Sstevel@tonic-gate 1436*7c478bd9Sstevel@tonic-gate /* 1437*7c478bd9Sstevel@tonic-gate * If the file system is no longer pointing to the snapshot we were 1438*7c478bd9Sstevel@tonic-gate * called with, then it should not attempt to translate this buffer as 1439*7c478bd9Sstevel@tonic-gate * it may be going to a snapshot for a different file system. 1440*7c478bd9Sstevel@tonic-gate * Even if the file system snapshot pointer is still the same, the 1441*7c478bd9Sstevel@tonic-gate * snapshot may have been disabled before we got the reader lock. 1442*7c478bd9Sstevel@tonic-gate */ 1443*7c478bd9Sstevel@tonic-gate if (sidp != *sidpp || SID_INACTIVE(sidp)) { 1444*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1445*7c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 1446*7c478bd9Sstevel@tonic-gate return; 1447*7c478bd9Sstevel@tonic-gate } 1448*7c478bd9Sstevel@tonic-gate 1449*7c478bd9Sstevel@tonic-gate /* 1450*7c478bd9Sstevel@tonic-gate * At this point we're sure the snapshot will not go away while the 1451*7c478bd9Sstevel@tonic-gate * reader lock is held, and we are reasonably certain that we are 1452*7c478bd9Sstevel@tonic-gate * writing to the correct snapshot. 1453*7c478bd9Sstevel@tonic-gate */ 1454*7c478bd9Sstevel@tonic-gate if ((error = fssnap_translate(sidpp, bp)) != 0) { 1455*7c478bd9Sstevel@tonic-gate /* 1456*7c478bd9Sstevel@tonic-gate * fssnap_translate can release the reader lock if it 1457*7c478bd9Sstevel@tonic-gate * has to wait for a semaphore. In this case it is possible 1458*7c478bd9Sstevel@tonic-gate * for the snapshot to be deleted in this time frame. If this 1459*7c478bd9Sstevel@tonic-gate * happens just sent the buf thru to the filesystems device. 1460*7c478bd9Sstevel@tonic-gate */ 1461*7c478bd9Sstevel@tonic-gate if (sidp != *sidpp || SID_INACTIVE(sidp)) { 1462*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1463*7c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 1464*7c478bd9Sstevel@tonic-gate return; 1465*7c478bd9Sstevel@tonic-gate } 1466*7c478bd9Sstevel@tonic-gate bioerror(bp, error); 1467*7c478bd9Sstevel@tonic-gate biodone(bp); 1468*7c478bd9Sstevel@tonic-gate } 1469*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1470*7c478bd9Sstevel@tonic-gate } 1471*7c478bd9Sstevel@tonic-gate 1472*7c478bd9Sstevel@tonic-gate /* 1473*7c478bd9Sstevel@tonic-gate * fssnap_translate() - helper function for fssnap_strategy() 1474*7c478bd9Sstevel@tonic-gate * 1475*7c478bd9Sstevel@tonic-gate * performs the actual copy-on-write for write requests, if required. 1476*7c478bd9Sstevel@tonic-gate * This function does the real work of the file system side of things. 1477*7c478bd9Sstevel@tonic-gate * 1478*7c478bd9Sstevel@tonic-gate * It first checks the candidate bitmap to quickly determine whether any 1479*7c478bd9Sstevel@tonic-gate * action is necessary. If the candidate bitmap indicates the chunk was 1480*7c478bd9Sstevel@tonic-gate * allocated when the snapshot was created, then it checks to see whether 1481*7c478bd9Sstevel@tonic-gate * a translation already exists. If a translation already exists then no 1482*7c478bd9Sstevel@tonic-gate * action is required. If the chunk is a candidate for copy-on-write, 1483*7c478bd9Sstevel@tonic-gate * and a translation does not already exist, then the chunk is read in 1484*7c478bd9Sstevel@tonic-gate * and a node is added to the translation table. 1485*7c478bd9Sstevel@tonic-gate * 1486*7c478bd9Sstevel@tonic-gate * Once all of the chunks in the request range have been copied (if they 1487*7c478bd9Sstevel@tonic-gate * needed to be), then the original request can be satisfied and the old 1488*7c478bd9Sstevel@tonic-gate * data can be overwritten. 1489*7c478bd9Sstevel@tonic-gate */ 1490*7c478bd9Sstevel@tonic-gate static int 1491*7c478bd9Sstevel@tonic-gate fssnap_translate(struct snapshot_id **sidpp, struct buf *wbp) 1492*7c478bd9Sstevel@tonic-gate { 1493*7c478bd9Sstevel@tonic-gate snapshot_id_t *sidp = *sidpp; 1494*7c478bd9Sstevel@tonic-gate struct buf *oldbp; /* buffer to store old data in */ 1495*7c478bd9Sstevel@tonic-gate struct cow_info *cowp = sidp->sid_cowinfo; 1496*7c478bd9Sstevel@tonic-gate cow_map_t *cmap = &cowp->cow_map; 1497*7c478bd9Sstevel@tonic-gate cow_map_node_t *cmn; 1498*7c478bd9Sstevel@tonic-gate chunknumber_t cowchunk, startchunk, endchunk; 1499*7c478bd9Sstevel@tonic-gate int error; 1500*7c478bd9Sstevel@tonic-gate int throttle_write = 0; 1501*7c478bd9Sstevel@tonic-gate 1502*7c478bd9Sstevel@tonic-gate /* make sure the snapshot is active */ 1503*7c478bd9Sstevel@tonic-gate ASSERT(RW_READ_HELD(&sidp->sid_rwlock)); 1504*7c478bd9Sstevel@tonic-gate 1505*7c478bd9Sstevel@tonic-gate startchunk = dbtocowchunk(cmap, wbp->b_lblkno); 1506*7c478bd9Sstevel@tonic-gate endchunk = dbtocowchunk(cmap, wbp->b_lblkno + 1507*7c478bd9Sstevel@tonic-gate ((wbp->b_bcount-1) >> DEV_BSHIFT)); 1508*7c478bd9Sstevel@tonic-gate 1509*7c478bd9Sstevel@tonic-gate /* 1510*7c478bd9Sstevel@tonic-gate * Do not throttle the writes of the fssnap taskq thread and 1511*7c478bd9Sstevel@tonic-gate * the log roll (trans_roll) thread. Furthermore the writes to 1512*7c478bd9Sstevel@tonic-gate * the on-disk log are also not subject to throttling. 1513*7c478bd9Sstevel@tonic-gate * The fssnap_write_taskq thread's write can block on the throttling 1514*7c478bd9Sstevel@tonic-gate * semaphore which leads to self-deadlock as this same thread 1515*7c478bd9Sstevel@tonic-gate * releases the throttling semaphore after completing the IO. 1516*7c478bd9Sstevel@tonic-gate * If the trans_roll thread's write is throttled then we can deadlock 1517*7c478bd9Sstevel@tonic-gate * because the fssnap_taskq_thread which releases the throttling 1518*7c478bd9Sstevel@tonic-gate * semaphore can block waiting for log space which can only be 1519*7c478bd9Sstevel@tonic-gate * released by the trans_roll thread. 1520*7c478bd9Sstevel@tonic-gate */ 1521*7c478bd9Sstevel@tonic-gate 1522*7c478bd9Sstevel@tonic-gate throttle_write = !(taskq_member(cowp->cow_taskq, curthread) || 1523*7c478bd9Sstevel@tonic-gate tsd_get(bypass_snapshot_throttle_key)); 1524*7c478bd9Sstevel@tonic-gate 1525*7c478bd9Sstevel@tonic-gate /* 1526*7c478bd9Sstevel@tonic-gate * Iterate through all chunks covered by this write and perform the 1527*7c478bd9Sstevel@tonic-gate * copy-aside if necessary. Once all chunks have been safely 1528*7c478bd9Sstevel@tonic-gate * stowed away, the new data may be written in a single sweep. 1529*7c478bd9Sstevel@tonic-gate * 1530*7c478bd9Sstevel@tonic-gate * For each chunk in the range, the following sequence is performed: 1531*7c478bd9Sstevel@tonic-gate * - Is the chunk a candidate for translation? 1532*7c478bd9Sstevel@tonic-gate * o If not, then no translation is necessary, continue 1533*7c478bd9Sstevel@tonic-gate * - If it is a candidate, then does it already have a translation? 1534*7c478bd9Sstevel@tonic-gate * o If so, then no translation is necessary, continue 1535*7c478bd9Sstevel@tonic-gate * - If it is a candidate, but does not yet have a translation, 1536*7c478bd9Sstevel@tonic-gate * then read the old data and schedule an asynchronous taskq 1537*7c478bd9Sstevel@tonic-gate * to write the old data to the backing file. 1538*7c478bd9Sstevel@tonic-gate * 1539*7c478bd9Sstevel@tonic-gate * Once this has been performed over the entire range of chunks, then 1540*7c478bd9Sstevel@tonic-gate * it is safe to overwrite the data that is there. 1541*7c478bd9Sstevel@tonic-gate * 1542*7c478bd9Sstevel@tonic-gate * Note that no lock is required to check the candidate bitmap because 1543*7c478bd9Sstevel@tonic-gate * it never changes once the snapshot is created. The reader lock is 1544*7c478bd9Sstevel@tonic-gate * taken to check the hastrans bitmap since it may change. If it 1545*7c478bd9Sstevel@tonic-gate * turns out a copy is required, then the lock is upgraded to a 1546*7c478bd9Sstevel@tonic-gate * writer, and the bitmap is re-checked as it may have changed while 1547*7c478bd9Sstevel@tonic-gate * the lock was released. Finally, the write lock is held while 1548*7c478bd9Sstevel@tonic-gate * reading the old data to make sure it is not translated out from 1549*7c478bd9Sstevel@tonic-gate * under us. 1550*7c478bd9Sstevel@tonic-gate * 1551*7c478bd9Sstevel@tonic-gate * This locking mechanism should be sufficient to handle multiple 1552*7c478bd9Sstevel@tonic-gate * threads writing to overlapping chunks simultaneously. 1553*7c478bd9Sstevel@tonic-gate */ 1554*7c478bd9Sstevel@tonic-gate for (cowchunk = startchunk; cowchunk <= endchunk; cowchunk++) { 1555*7c478bd9Sstevel@tonic-gate /* 1556*7c478bd9Sstevel@tonic-gate * If the cowchunk is outside of the range of our 1557*7c478bd9Sstevel@tonic-gate * candidate maps, then simply break out of the 1558*7c478bd9Sstevel@tonic-gate * loop and pass the I/O through to bdev_strategy. 1559*7c478bd9Sstevel@tonic-gate * This would occur if the file system has grown 1560*7c478bd9Sstevel@tonic-gate * larger since the snapshot was taken. 1561*7c478bd9Sstevel@tonic-gate */ 1562*7c478bd9Sstevel@tonic-gate if (cowchunk >= (cmap->cmap_bmsize * NBBY)) 1563*7c478bd9Sstevel@tonic-gate break; 1564*7c478bd9Sstevel@tonic-gate 1565*7c478bd9Sstevel@tonic-gate /* 1566*7c478bd9Sstevel@tonic-gate * If no disk blocks were allocated in this chunk when the 1567*7c478bd9Sstevel@tonic-gate * snapshot was created then no copy-on-write will be 1568*7c478bd9Sstevel@tonic-gate * required. Since this bitmap is read-only no locks are 1569*7c478bd9Sstevel@tonic-gate * necessary. 1570*7c478bd9Sstevel@tonic-gate */ 1571*7c478bd9Sstevel@tonic-gate if (isclr(cmap->cmap_candidate, cowchunk)) { 1572*7c478bd9Sstevel@tonic-gate continue; 1573*7c478bd9Sstevel@tonic-gate } 1574*7c478bd9Sstevel@tonic-gate 1575*7c478bd9Sstevel@tonic-gate /* 1576*7c478bd9Sstevel@tonic-gate * If a translation already exists, the data can be written 1577*7c478bd9Sstevel@tonic-gate * through since the old data has already been saved off. 1578*7c478bd9Sstevel@tonic-gate */ 1579*7c478bd9Sstevel@tonic-gate if (isset(cmap->cmap_hastrans, cowchunk)) { 1580*7c478bd9Sstevel@tonic-gate continue; 1581*7c478bd9Sstevel@tonic-gate } 1582*7c478bd9Sstevel@tonic-gate 1583*7c478bd9Sstevel@tonic-gate 1584*7c478bd9Sstevel@tonic-gate /* 1585*7c478bd9Sstevel@tonic-gate * Throttle translations if there are too many outstanding 1586*7c478bd9Sstevel@tonic-gate * chunks in memory. The semaphore is sema_v'd by the taskq. 1587*7c478bd9Sstevel@tonic-gate * 1588*7c478bd9Sstevel@tonic-gate * You can't keep the sid_rwlock if you would go to sleep. 1589*7c478bd9Sstevel@tonic-gate * This will result in deadlock when someone tries to delete 1590*7c478bd9Sstevel@tonic-gate * the snapshot (wants the sid_rwlock as a writer, but can't 1591*7c478bd9Sstevel@tonic-gate * get it). 1592*7c478bd9Sstevel@tonic-gate */ 1593*7c478bd9Sstevel@tonic-gate if (throttle_write) { 1594*7c478bd9Sstevel@tonic-gate if (sema_tryp(&cmap->cmap_throttle_sem) == 0) { 1595*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1596*7c478bd9Sstevel@tonic-gate atomic_add_32(&cmap->cmap_waiters, 1); 1597*7c478bd9Sstevel@tonic-gate sema_p(&cmap->cmap_throttle_sem); 1598*7c478bd9Sstevel@tonic-gate atomic_add_32(&cmap->cmap_waiters, -1); 1599*7c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 1600*7c478bd9Sstevel@tonic-gate 1601*7c478bd9Sstevel@tonic-gate /* 1602*7c478bd9Sstevel@tonic-gate * Now since we released the sid_rwlock the state may 1603*7c478bd9Sstevel@tonic-gate * have transitioned underneath us. so check that again. 1604*7c478bd9Sstevel@tonic-gate */ 1605*7c478bd9Sstevel@tonic-gate if (sidp != *sidpp || SID_INACTIVE(sidp)) { 1606*7c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 1607*7c478bd9Sstevel@tonic-gate return (ENXIO); 1608*7c478bd9Sstevel@tonic-gate } 1609*7c478bd9Sstevel@tonic-gate } 1610*7c478bd9Sstevel@tonic-gate } 1611*7c478bd9Sstevel@tonic-gate 1612*7c478bd9Sstevel@tonic-gate /* 1613*7c478bd9Sstevel@tonic-gate * Acquire the lock as a writer and check to see if a 1614*7c478bd9Sstevel@tonic-gate * translation has been added in the meantime. 1615*7c478bd9Sstevel@tonic-gate */ 1616*7c478bd9Sstevel@tonic-gate rw_enter(&cmap->cmap_rwlock, RW_WRITER); 1617*7c478bd9Sstevel@tonic-gate if (isset(cmap->cmap_hastrans, cowchunk)) { 1618*7c478bd9Sstevel@tonic-gate if (throttle_write) 1619*7c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 1620*7c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 1621*7c478bd9Sstevel@tonic-gate continue; /* go to the next chunk */ 1622*7c478bd9Sstevel@tonic-gate } 1623*7c478bd9Sstevel@tonic-gate 1624*7c478bd9Sstevel@tonic-gate /* 1625*7c478bd9Sstevel@tonic-gate * read a full chunk of data from the requested offset rounded 1626*7c478bd9Sstevel@tonic-gate * down to the nearest chunk size. 1627*7c478bd9Sstevel@tonic-gate */ 1628*7c478bd9Sstevel@tonic-gate oldbp = getrbuf(KM_SLEEP); 1629*7c478bd9Sstevel@tonic-gate oldbp->b_lblkno = cowchunktodb(cmap, cowchunk); 1630*7c478bd9Sstevel@tonic-gate oldbp->b_edev = wbp->b_edev; 1631*7c478bd9Sstevel@tonic-gate oldbp->b_bcount = cmap->cmap_chunksz; 1632*7c478bd9Sstevel@tonic-gate oldbp->b_bufsize = cmap->cmap_chunksz; 1633*7c478bd9Sstevel@tonic-gate oldbp->b_iodone = NULL; 1634*7c478bd9Sstevel@tonic-gate oldbp->b_proc = NULL; 1635*7c478bd9Sstevel@tonic-gate oldbp->b_flags = B_READ; 1636*7c478bd9Sstevel@tonic-gate oldbp->b_un.b_addr = kmem_alloc(cmap->cmap_chunksz, KM_SLEEP); 1637*7c478bd9Sstevel@tonic-gate 1638*7c478bd9Sstevel@tonic-gate (void) bdev_strategy(oldbp); 1639*7c478bd9Sstevel@tonic-gate (void) biowait(oldbp); 1640*7c478bd9Sstevel@tonic-gate 1641*7c478bd9Sstevel@tonic-gate /* 1642*7c478bd9Sstevel@tonic-gate * It's ok to bail in the middle of translating the range 1643*7c478bd9Sstevel@tonic-gate * because the extra copy-asides will not hurt anything 1644*7c478bd9Sstevel@tonic-gate * (except by using extra space in the backing store). 1645*7c478bd9Sstevel@tonic-gate */ 1646*7c478bd9Sstevel@tonic-gate if ((error = geterror(oldbp)) != 0) { 1647*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_translate: error reading " 1648*7c478bd9Sstevel@tonic-gate "old data for snapshot %d, chunk %llu, disk block " 1649*7c478bd9Sstevel@tonic-gate "%lld, size %lu, error %d.", sidp->sid_snapnumber, 1650*7c478bd9Sstevel@tonic-gate cowchunk, oldbp->b_lblkno, oldbp->b_bcount, error); 1651*7c478bd9Sstevel@tonic-gate kmem_free(oldbp->b_un.b_addr, cmap->cmap_chunksz); 1652*7c478bd9Sstevel@tonic-gate freerbuf(oldbp); 1653*7c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 1654*7c478bd9Sstevel@tonic-gate if (throttle_write) 1655*7c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 1656*7c478bd9Sstevel@tonic-gate return (error); 1657*7c478bd9Sstevel@tonic-gate } 1658*7c478bd9Sstevel@tonic-gate 1659*7c478bd9Sstevel@tonic-gate /* 1660*7c478bd9Sstevel@tonic-gate * add the node to the translation table and save a reference 1661*7c478bd9Sstevel@tonic-gate * to pass to the taskq for writing out to the backing file 1662*7c478bd9Sstevel@tonic-gate */ 1663*7c478bd9Sstevel@tonic-gate cmn = transtbl_add(cmap, cowchunk, oldbp->b_un.b_addr); 1664*7c478bd9Sstevel@tonic-gate freerbuf(oldbp); 1665*7c478bd9Sstevel@tonic-gate 1666*7c478bd9Sstevel@tonic-gate /* 1667*7c478bd9Sstevel@tonic-gate * Add a reference to the snapshot id so the lower level 1668*7c478bd9Sstevel@tonic-gate * processing (ie. the taskq) can get back to the state 1669*7c478bd9Sstevel@tonic-gate * information. 1670*7c478bd9Sstevel@tonic-gate */ 1671*7c478bd9Sstevel@tonic-gate cmn->cmn_sid = sidp; 1672*7c478bd9Sstevel@tonic-gate cmn->release_sem = throttle_write; 1673*7c478bd9Sstevel@tonic-gate setbit(cmap->cmap_hastrans, cowchunk); 1674*7c478bd9Sstevel@tonic-gate 1675*7c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 1676*7c478bd9Sstevel@tonic-gate 1677*7c478bd9Sstevel@tonic-gate /* 1678*7c478bd9Sstevel@tonic-gate * schedule the asynchronous write to the backing file 1679*7c478bd9Sstevel@tonic-gate */ 1680*7c478bd9Sstevel@tonic-gate if (cowp->cow_backfile_array != NULL) 1681*7c478bd9Sstevel@tonic-gate (void) taskq_dispatch(cowp->cow_taskq, 1682*7c478bd9Sstevel@tonic-gate fssnap_write_taskq, cmn, TQ_SLEEP); 1683*7c478bd9Sstevel@tonic-gate } 1684*7c478bd9Sstevel@tonic-gate 1685*7c478bd9Sstevel@tonic-gate /* 1686*7c478bd9Sstevel@tonic-gate * Write new data in place of the old data. At this point all of the 1687*7c478bd9Sstevel@tonic-gate * chunks touched by this write have been copied aside and so the new 1688*7c478bd9Sstevel@tonic-gate * data can be written out all at once. 1689*7c478bd9Sstevel@tonic-gate */ 1690*7c478bd9Sstevel@tonic-gate (void) bdev_strategy(wbp); 1691*7c478bd9Sstevel@tonic-gate 1692*7c478bd9Sstevel@tonic-gate return (0); 1693*7c478bd9Sstevel@tonic-gate } 1694*7c478bd9Sstevel@tonic-gate 1695*7c478bd9Sstevel@tonic-gate /* 1696*7c478bd9Sstevel@tonic-gate * fssnap_write_taskq() - write in-memory translations to the backing file 1697*7c478bd9Sstevel@tonic-gate * 1698*7c478bd9Sstevel@tonic-gate * writes in-memory translations to the backing file asynchronously. A 1699*7c478bd9Sstevel@tonic-gate * task is dispatched each time a new translation is created. The task 1700*7c478bd9Sstevel@tonic-gate * writes the data to the backing file and removes it from the memory 1701*7c478bd9Sstevel@tonic-gate * list. The throttling semaphore is released only if the particular 1702*7c478bd9Sstevel@tonic-gate * translation was throttled in fssnap_translate. 1703*7c478bd9Sstevel@tonic-gate */ 1704*7c478bd9Sstevel@tonic-gate static void 1705*7c478bd9Sstevel@tonic-gate fssnap_write_taskq(void *arg) 1706*7c478bd9Sstevel@tonic-gate { 1707*7c478bd9Sstevel@tonic-gate cow_map_node_t *cmn = (cow_map_node_t *)arg; 1708*7c478bd9Sstevel@tonic-gate snapshot_id_t *sidp = cmn->cmn_sid; 1709*7c478bd9Sstevel@tonic-gate cow_info_t *cowp = sidp->sid_cowinfo; 1710*7c478bd9Sstevel@tonic-gate cow_map_t *cmap = &cowp->cow_map; 1711*7c478bd9Sstevel@tonic-gate int error; 1712*7c478bd9Sstevel@tonic-gate int bf_index; 1713*7c478bd9Sstevel@tonic-gate int release_sem = cmn->release_sem; 1714*7c478bd9Sstevel@tonic-gate 1715*7c478bd9Sstevel@tonic-gate /* 1716*7c478bd9Sstevel@tonic-gate * The sid_rwlock does not need to be held here because the taskqs 1717*7c478bd9Sstevel@tonic-gate * are destroyed explicitly by fssnap_delete (with the sid_rwlock 1718*7c478bd9Sstevel@tonic-gate * held as a writer). taskq_destroy() will flush all of the tasks 1719*7c478bd9Sstevel@tonic-gate * out before fssnap_delete frees up all of the structures. 1720*7c478bd9Sstevel@tonic-gate */ 1721*7c478bd9Sstevel@tonic-gate 1722*7c478bd9Sstevel@tonic-gate /* if the snapshot was disabled from under us, drop the request. */ 1723*7c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 1724*7c478bd9Sstevel@tonic-gate if (SID_INACTIVE(sidp)) { 1725*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1726*7c478bd9Sstevel@tonic-gate if (release_sem) 1727*7c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 1728*7c478bd9Sstevel@tonic-gate return; 1729*7c478bd9Sstevel@tonic-gate } 1730*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1731*7c478bd9Sstevel@tonic-gate 1732*7c478bd9Sstevel@tonic-gate atomic_add_64((uint64_t *)&cmap->cmap_nchunks, 1); 1733*7c478bd9Sstevel@tonic-gate 1734*7c478bd9Sstevel@tonic-gate if ((cmap->cmap_maxsize != 0) && 1735*7c478bd9Sstevel@tonic-gate ((cmap->cmap_nchunks * cmap->cmap_chunksz) > cmap->cmap_maxsize)) { 1736*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_write_taskq: snapshot %d (%s) has " 1737*7c478bd9Sstevel@tonic-gate "reached the maximum backing file size specified (%llu " 1738*7c478bd9Sstevel@tonic-gate "bytes) and will be deleted.", sidp->sid_snapnumber, 1739*7c478bd9Sstevel@tonic-gate (char *)cowp->cow_kstat_mntpt->ks_data, 1740*7c478bd9Sstevel@tonic-gate cmap->cmap_maxsize); 1741*7c478bd9Sstevel@tonic-gate if (release_sem) 1742*7c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 1743*7c478bd9Sstevel@tonic-gate atomic_or_uint(&sidp->sid_flags, SID_DELETE); 1744*7c478bd9Sstevel@tonic-gate return; 1745*7c478bd9Sstevel@tonic-gate } 1746*7c478bd9Sstevel@tonic-gate 1747*7c478bd9Sstevel@tonic-gate /* perform the write */ 1748*7c478bd9Sstevel@tonic-gate bf_index = cmn->cmn_chunk / cmap->cmap_chunksperbf; 1749*7c478bd9Sstevel@tonic-gate 1750*7c478bd9Sstevel@tonic-gate if (error = vn_rdwr(UIO_WRITE, (cowp->cow_backfile_array)[bf_index], 1751*7c478bd9Sstevel@tonic-gate cmn->cmn_buf, cmap->cmap_chunksz, 1752*7c478bd9Sstevel@tonic-gate (cmn->cmn_chunk % cmap->cmap_chunksperbf) * cmap->cmap_chunksz, 1753*7c478bd9Sstevel@tonic-gate UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, (ssize_t *)NULL)) { 1754*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_write_taskq: error writing to " 1755*7c478bd9Sstevel@tonic-gate "backing file. DELETING SNAPSHOT %d, backing file path " 1756*7c478bd9Sstevel@tonic-gate "%s, offset %llu bytes, error %d.", sidp->sid_snapnumber, 1757*7c478bd9Sstevel@tonic-gate (char *)cowp->cow_kstat_bfname->ks_data, 1758*7c478bd9Sstevel@tonic-gate cmn->cmn_chunk * cmap->cmap_chunksz, error); 1759*7c478bd9Sstevel@tonic-gate if (release_sem) 1760*7c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 1761*7c478bd9Sstevel@tonic-gate atomic_or_uint(&sidp->sid_flags, SID_DELETE); 1762*7c478bd9Sstevel@tonic-gate return; 1763*7c478bd9Sstevel@tonic-gate } 1764*7c478bd9Sstevel@tonic-gate 1765*7c478bd9Sstevel@tonic-gate /* 1766*7c478bd9Sstevel@tonic-gate * now remove the node and buffer from memory 1767*7c478bd9Sstevel@tonic-gate */ 1768*7c478bd9Sstevel@tonic-gate rw_enter(&cmap->cmap_rwlock, RW_WRITER); 1769*7c478bd9Sstevel@tonic-gate transtbl_delete(cmap, cmn); 1770*7c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 1771*7c478bd9Sstevel@tonic-gate 1772*7c478bd9Sstevel@tonic-gate /* Allow more translations */ 1773*7c478bd9Sstevel@tonic-gate if (release_sem) 1774*7c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 1775*7c478bd9Sstevel@tonic-gate 1776*7c478bd9Sstevel@tonic-gate } 1777*7c478bd9Sstevel@tonic-gate 1778*7c478bd9Sstevel@tonic-gate /* 1779*7c478bd9Sstevel@tonic-gate * fssnap_create_impl() - called from the file system to create a new snapshot 1780*7c478bd9Sstevel@tonic-gate * 1781*7c478bd9Sstevel@tonic-gate * allocates and initializes the structures needed for a new snapshot. 1782*7c478bd9Sstevel@tonic-gate * This is called by the file system when it receives an ioctl request to 1783*7c478bd9Sstevel@tonic-gate * create a new snapshot. An unused snapshot identifier is either found 1784*7c478bd9Sstevel@tonic-gate * or created, and eventually returned as the opaque handle the file 1785*7c478bd9Sstevel@tonic-gate * system will use to identify this snapshot. The snapshot number 1786*7c478bd9Sstevel@tonic-gate * associated with the snapshot identifier is the same as the minor 1787*7c478bd9Sstevel@tonic-gate * number for the snapshot device that is used to access that snapshot. 1788*7c478bd9Sstevel@tonic-gate * 1789*7c478bd9Sstevel@tonic-gate * The snapshot can not be used until the candidate bitmap is populated 1790*7c478bd9Sstevel@tonic-gate * by the file system (see fssnap_set_candidate_impl()), and the file 1791*7c478bd9Sstevel@tonic-gate * system finishes the setup process by calling fssnap_create_done(). 1792*7c478bd9Sstevel@tonic-gate * Nearly all of the snapshot locks are held for the duration of the 1793*7c478bd9Sstevel@tonic-gate * create, and are not released until fssnap_create_done is called(). 1794*7c478bd9Sstevel@tonic-gate */ 1795*7c478bd9Sstevel@tonic-gate static void * 1796*7c478bd9Sstevel@tonic-gate fssnap_create_impl(chunknumber_t nchunks, uint_t chunksz, u_offset_t maxsize, 1797*7c478bd9Sstevel@tonic-gate struct vnode *fsvp, int backfilecount, struct vnode **bfvpp, char *backpath, 1798*7c478bd9Sstevel@tonic-gate u_offset_t max_backfile_size) 1799*7c478bd9Sstevel@tonic-gate { 1800*7c478bd9Sstevel@tonic-gate refstr_t *mountpoint; 1801*7c478bd9Sstevel@tonic-gate char taskqname[50]; 1802*7c478bd9Sstevel@tonic-gate struct cow_info *cowp; 1803*7c478bd9Sstevel@tonic-gate struct cow_map *cmap; 1804*7c478bd9Sstevel@tonic-gate struct snapshot_id *sidp; 1805*7c478bd9Sstevel@tonic-gate int lastsnap; 1806*7c478bd9Sstevel@tonic-gate 1807*7c478bd9Sstevel@tonic-gate /* 1808*7c478bd9Sstevel@tonic-gate * Sanity check the parameters we care about 1809*7c478bd9Sstevel@tonic-gate * (we don't care about the informational parameters) 1810*7c478bd9Sstevel@tonic-gate */ 1811*7c478bd9Sstevel@tonic-gate if ((nchunks == 0) || 1812*7c478bd9Sstevel@tonic-gate ((chunksz % DEV_BSIZE) != 0) || 1813*7c478bd9Sstevel@tonic-gate (bfvpp == NULL)) { 1814*7c478bd9Sstevel@tonic-gate return (NULL); 1815*7c478bd9Sstevel@tonic-gate } 1816*7c478bd9Sstevel@tonic-gate 1817*7c478bd9Sstevel@tonic-gate /* 1818*7c478bd9Sstevel@tonic-gate * Look for unused snapshot identifiers. Snapshot ids are never 1819*7c478bd9Sstevel@tonic-gate * freed, but deleted snapshot ids will be recycled as needed. 1820*7c478bd9Sstevel@tonic-gate */ 1821*7c478bd9Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 1822*7c478bd9Sstevel@tonic-gate 1823*7c478bd9Sstevel@tonic-gate findagain: 1824*7c478bd9Sstevel@tonic-gate lastsnap = 0; 1825*7c478bd9Sstevel@tonic-gate for (sidp = snapshot; sidp != NULL; sidp = sidp->sid_next) { 1826*7c478bd9Sstevel@tonic-gate if (sidp->sid_snapnumber > lastsnap) 1827*7c478bd9Sstevel@tonic-gate lastsnap = sidp->sid_snapnumber; 1828*7c478bd9Sstevel@tonic-gate 1829*7c478bd9Sstevel@tonic-gate /* 1830*7c478bd9Sstevel@tonic-gate * The sid_rwlock is taken as a reader initially so that 1831*7c478bd9Sstevel@tonic-gate * activity on each snapshot is not stalled while searching 1832*7c478bd9Sstevel@tonic-gate * for a free snapshot id. 1833*7c478bd9Sstevel@tonic-gate */ 1834*7c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_READER); 1835*7c478bd9Sstevel@tonic-gate 1836*7c478bd9Sstevel@tonic-gate /* 1837*7c478bd9Sstevel@tonic-gate * If the snapshot has been deleted and nobody is using the 1838*7c478bd9Sstevel@tonic-gate * snapshot device than we can reuse this snapshot_id. If 1839*7c478bd9Sstevel@tonic-gate * the snapshot is marked to be deleted (SID_DELETE), then 1840*7c478bd9Sstevel@tonic-gate * it hasn't been deleted yet so don't reuse it. 1841*7c478bd9Sstevel@tonic-gate */ 1842*7c478bd9Sstevel@tonic-gate if (SID_AVAILABLE(sidp)) 1843*7c478bd9Sstevel@tonic-gate break; /* This spot is unused, so take it */ 1844*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1845*7c478bd9Sstevel@tonic-gate } 1846*7c478bd9Sstevel@tonic-gate 1847*7c478bd9Sstevel@tonic-gate /* 1848*7c478bd9Sstevel@tonic-gate * add a new snapshot identifier if there are no deleted 1849*7c478bd9Sstevel@tonic-gate * entries. Since it doesn't matter what order the entries 1850*7c478bd9Sstevel@tonic-gate * are in we can just add it to the beginning of the list. 1851*7c478bd9Sstevel@tonic-gate */ 1852*7c478bd9Sstevel@tonic-gate if (sidp) { 1853*7c478bd9Sstevel@tonic-gate if (rw_tryupgrade(&sidp->sid_rwlock) == 0) { 1854*7c478bd9Sstevel@tonic-gate /* someone else grabbed it as a writer, try again */ 1855*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 1856*7c478bd9Sstevel@tonic-gate goto findagain; 1857*7c478bd9Sstevel@tonic-gate } 1858*7c478bd9Sstevel@tonic-gate } else { 1859*7c478bd9Sstevel@tonic-gate /* Create a new node if we didn't find an unused one */ 1860*7c478bd9Sstevel@tonic-gate sidp = kmem_alloc(sizeof (struct snapshot_id), KM_SLEEP); 1861*7c478bd9Sstevel@tonic-gate rw_init(&sidp->sid_rwlock, NULL, RW_DEFAULT, NULL); 1862*7c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 1863*7c478bd9Sstevel@tonic-gate sidp->sid_snapnumber = (snapshot == NULL) ? 0 : lastsnap + 1; 1864*7c478bd9Sstevel@tonic-gate sidp->sid_cowinfo = NULL; 1865*7c478bd9Sstevel@tonic-gate sidp->sid_flags = 0; 1866*7c478bd9Sstevel@tonic-gate sidp->sid_next = snapshot; 1867*7c478bd9Sstevel@tonic-gate snapshot = sidp; 1868*7c478bd9Sstevel@tonic-gate } 1869*7c478bd9Sstevel@tonic-gate 1870*7c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&sidp->sid_rwlock)); 1871*7c478bd9Sstevel@tonic-gate ASSERT(sidp->sid_cowinfo == NULL); 1872*7c478bd9Sstevel@tonic-gate ASSERT(sidp->sid_snapnumber <= (lastsnap + 1)); 1873*7c478bd9Sstevel@tonic-gate 1874*7c478bd9Sstevel@tonic-gate sidp->sid_flags |= SID_CREATING; 1875*7c478bd9Sstevel@tonic-gate /* The root vnode is held until snap_delete_impl() is called */ 1876*7c478bd9Sstevel@tonic-gate VN_HOLD(fsvp); 1877*7c478bd9Sstevel@tonic-gate sidp->sid_fvp = fsvp; 1878*7c478bd9Sstevel@tonic-gate num_snapshots++; 1879*7c478bd9Sstevel@tonic-gate 1880*7c478bd9Sstevel@tonic-gate /* allocate and initialize structures */ 1881*7c478bd9Sstevel@tonic-gate 1882*7c478bd9Sstevel@tonic-gate cowp = kmem_zalloc(sizeof (struct cow_info), KM_SLEEP); 1883*7c478bd9Sstevel@tonic-gate 1884*7c478bd9Sstevel@tonic-gate cowp->cow_backfile_array = bfvpp; 1885*7c478bd9Sstevel@tonic-gate cowp->cow_backcount = backfilecount; 1886*7c478bd9Sstevel@tonic-gate cowp->cow_backfile_sz = max_backfile_size; 1887*7c478bd9Sstevel@tonic-gate 1888*7c478bd9Sstevel@tonic-gate /* 1889*7c478bd9Sstevel@tonic-gate * Initialize task queues for this snapshot. Only a small number 1890*7c478bd9Sstevel@tonic-gate * of threads are required because they will be serialized on the 1891*7c478bd9Sstevel@tonic-gate * backing file's reader/writer lock anyway. 1892*7c478bd9Sstevel@tonic-gate */ 1893*7c478bd9Sstevel@tonic-gate (void) snprintf(taskqname, sizeof (taskqname), "%s_taskq_%d", snapname, 1894*7c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 1895*7c478bd9Sstevel@tonic-gate cowp->cow_taskq = taskq_create(taskqname, fssnap_taskq_nthreads, 1896*7c478bd9Sstevel@tonic-gate minclsyspri, 1, fssnap_taskq_maxtasks, 0); 1897*7c478bd9Sstevel@tonic-gate 1898*7c478bd9Sstevel@tonic-gate /* don't allow tasks to start until after everything is ready */ 1899*7c478bd9Sstevel@tonic-gate taskq_suspend(cowp->cow_taskq); 1900*7c478bd9Sstevel@tonic-gate 1901*7c478bd9Sstevel@tonic-gate /* initialize translation table */ 1902*7c478bd9Sstevel@tonic-gate cmap = &cowp->cow_map; 1903*7c478bd9Sstevel@tonic-gate rw_init(&cmap->cmap_rwlock, NULL, RW_DEFAULT, NULL); 1904*7c478bd9Sstevel@tonic-gate rw_enter(&cmap->cmap_rwlock, RW_WRITER); 1905*7c478bd9Sstevel@tonic-gate 1906*7c478bd9Sstevel@tonic-gate sema_init(&cmap->cmap_throttle_sem, fssnap_max_mem_chunks, NULL, 1907*7c478bd9Sstevel@tonic-gate SEMA_DEFAULT, NULL); 1908*7c478bd9Sstevel@tonic-gate 1909*7c478bd9Sstevel@tonic-gate cmap->cmap_chunksz = chunksz; 1910*7c478bd9Sstevel@tonic-gate cmap->cmap_maxsize = maxsize; 1911*7c478bd9Sstevel@tonic-gate cmap->cmap_chunksperbf = max_backfile_size / chunksz; 1912*7c478bd9Sstevel@tonic-gate 1913*7c478bd9Sstevel@tonic-gate /* 1914*7c478bd9Sstevel@tonic-gate * allocate one bit per chunk for the bitmaps, round up 1915*7c478bd9Sstevel@tonic-gate */ 1916*7c478bd9Sstevel@tonic-gate cmap->cmap_bmsize = (nchunks + (NBBY - 1)) / NBBY; 1917*7c478bd9Sstevel@tonic-gate cmap->cmap_hastrans = kmem_zalloc(cmap->cmap_bmsize, KM_SLEEP); 1918*7c478bd9Sstevel@tonic-gate cmap->cmap_candidate = kmem_zalloc(cmap->cmap_bmsize, KM_SLEEP); 1919*7c478bd9Sstevel@tonic-gate 1920*7c478bd9Sstevel@tonic-gate sidp->sid_cowinfo = cowp; 1921*7c478bd9Sstevel@tonic-gate 1922*7c478bd9Sstevel@tonic-gate /* initialize kstats for this snapshot */ 1923*7c478bd9Sstevel@tonic-gate mountpoint = vfs_getmntpoint(fsvp->v_vfsp); 1924*7c478bd9Sstevel@tonic-gate fssnap_create_kstats(sidp, sidp->sid_snapnumber, 1925*7c478bd9Sstevel@tonic-gate refstr_value(mountpoint), backpath); 1926*7c478bd9Sstevel@tonic-gate refstr_rele(mountpoint); 1927*7c478bd9Sstevel@tonic-gate 1928*7c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 1929*7c478bd9Sstevel@tonic-gate 1930*7c478bd9Sstevel@tonic-gate /* 1931*7c478bd9Sstevel@tonic-gate * return with snapshot id rwlock held as a writer until 1932*7c478bd9Sstevel@tonic-gate * fssnap_create_done is called 1933*7c478bd9Sstevel@tonic-gate */ 1934*7c478bd9Sstevel@tonic-gate return (sidp); 1935*7c478bd9Sstevel@tonic-gate } 1936*7c478bd9Sstevel@tonic-gate 1937*7c478bd9Sstevel@tonic-gate /* 1938*7c478bd9Sstevel@tonic-gate * fssnap_set_candidate_impl() - mark a chunk as a candidate for copy-on-write 1939*7c478bd9Sstevel@tonic-gate * 1940*7c478bd9Sstevel@tonic-gate * sets a bit in the candidate bitmap that indicates that a chunk is a 1941*7c478bd9Sstevel@tonic-gate * candidate for copy-on-write. Typically, chunks that are allocated on 1942*7c478bd9Sstevel@tonic-gate * the file system at the time the snapshot is taken are candidates, 1943*7c478bd9Sstevel@tonic-gate * while chunks that have no allocated data do not need to be copied. 1944*7c478bd9Sstevel@tonic-gate * Chunks containing metadata must be marked as candidates as well. 1945*7c478bd9Sstevel@tonic-gate */ 1946*7c478bd9Sstevel@tonic-gate static void 1947*7c478bd9Sstevel@tonic-gate fssnap_set_candidate_impl(void *snapshot_id, chunknumber_t chunknumber) 1948*7c478bd9Sstevel@tonic-gate { 1949*7c478bd9Sstevel@tonic-gate struct snapshot_id *sid = snapshot_id; 1950*7c478bd9Sstevel@tonic-gate struct cow_info *cowp = sid->sid_cowinfo; 1951*7c478bd9Sstevel@tonic-gate struct cow_map *cmap = &cowp->cow_map; 1952*7c478bd9Sstevel@tonic-gate 1953*7c478bd9Sstevel@tonic-gate /* simple bitmap operation for now */ 1954*7c478bd9Sstevel@tonic-gate ASSERT(chunknumber < (cmap->cmap_bmsize * NBBY)); 1955*7c478bd9Sstevel@tonic-gate setbit(cmap->cmap_candidate, chunknumber); 1956*7c478bd9Sstevel@tonic-gate } 1957*7c478bd9Sstevel@tonic-gate 1958*7c478bd9Sstevel@tonic-gate /* 1959*7c478bd9Sstevel@tonic-gate * fssnap_is_candidate_impl() - check whether a chunk is a candidate 1960*7c478bd9Sstevel@tonic-gate * 1961*7c478bd9Sstevel@tonic-gate * returns 0 if the chunk is not a candidate and 1 if the chunk is a 1962*7c478bd9Sstevel@tonic-gate * candidate. This can be used by the file system to change behavior for 1963*7c478bd9Sstevel@tonic-gate * chunks that might induce a copy-on-write. The offset is specified in 1964*7c478bd9Sstevel@tonic-gate * bytes since the chunk size may not be known by the file system. 1965*7c478bd9Sstevel@tonic-gate */ 1966*7c478bd9Sstevel@tonic-gate static int 1967*7c478bd9Sstevel@tonic-gate fssnap_is_candidate_impl(void *snapshot_id, u_offset_t off) 1968*7c478bd9Sstevel@tonic-gate { 1969*7c478bd9Sstevel@tonic-gate struct snapshot_id *sid = snapshot_id; 1970*7c478bd9Sstevel@tonic-gate struct cow_info *cowp = sid->sid_cowinfo; 1971*7c478bd9Sstevel@tonic-gate struct cow_map *cmap = &cowp->cow_map; 1972*7c478bd9Sstevel@tonic-gate ulong_t chunknumber = off / cmap->cmap_chunksz; 1973*7c478bd9Sstevel@tonic-gate 1974*7c478bd9Sstevel@tonic-gate /* simple bitmap operation for now */ 1975*7c478bd9Sstevel@tonic-gate ASSERT(chunknumber < (cmap->cmap_bmsize * NBBY)); 1976*7c478bd9Sstevel@tonic-gate return (isset(cmap->cmap_candidate, chunknumber)); 1977*7c478bd9Sstevel@tonic-gate } 1978*7c478bd9Sstevel@tonic-gate 1979*7c478bd9Sstevel@tonic-gate /* 1980*7c478bd9Sstevel@tonic-gate * fssnap_create_done_impl() - complete the snapshot setup process 1981*7c478bd9Sstevel@tonic-gate * 1982*7c478bd9Sstevel@tonic-gate * called when the file system is done populating the candidate bitmap 1983*7c478bd9Sstevel@tonic-gate * and it is ready to start using the snapshot. This routine releases 1984*7c478bd9Sstevel@tonic-gate * the snapshot locks, allows taskq tasks to start processing, and 1985*7c478bd9Sstevel@tonic-gate * creates the device minor nodes associated with the snapshot. 1986*7c478bd9Sstevel@tonic-gate */ 1987*7c478bd9Sstevel@tonic-gate static int 1988*7c478bd9Sstevel@tonic-gate fssnap_create_done_impl(void *snapshot_id) 1989*7c478bd9Sstevel@tonic-gate { 1990*7c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp, *sidp = snapshot_id; 1991*7c478bd9Sstevel@tonic-gate struct cow_info *cowp; 1992*7c478bd9Sstevel@tonic-gate struct cow_map *cmap; 1993*7c478bd9Sstevel@tonic-gate int snapnumber = -1; 1994*7c478bd9Sstevel@tonic-gate char name[20]; 1995*7c478bd9Sstevel@tonic-gate 1996*7c478bd9Sstevel@tonic-gate /* sid rwlock and cmap rwlock should be taken from fssnap_create */ 1997*7c478bd9Sstevel@tonic-gate ASSERT(sidp); 1998*7c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&sidp->sid_rwlock)); 1999*7c478bd9Sstevel@tonic-gate ASSERT(sidp->sid_cowinfo); 2000*7c478bd9Sstevel@tonic-gate 2001*7c478bd9Sstevel@tonic-gate cowp = sidp->sid_cowinfo; 2002*7c478bd9Sstevel@tonic-gate cmap = &cowp->cow_map; 2003*7c478bd9Sstevel@tonic-gate 2004*7c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&cmap->cmap_rwlock)); 2005*7c478bd9Sstevel@tonic-gate 2006*7c478bd9Sstevel@tonic-gate sidp->sid_flags &= ~(SID_CREATING | SID_DISABLED); 2007*7c478bd9Sstevel@tonic-gate snapnumber = sidp->sid_snapnumber; 2008*7c478bd9Sstevel@tonic-gate 2009*7c478bd9Sstevel@tonic-gate /* allocate state structure and find new snapshot id */ 2010*7c478bd9Sstevel@tonic-gate if (ddi_soft_state_zalloc(statep, snapnumber) != DDI_SUCCESS) { 2011*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 2012*7c478bd9Sstevel@tonic-gate "snap_ioctl: create: could not allocate " 2013*7c478bd9Sstevel@tonic-gate "state for snapshot %d.", snapnumber); 2014*7c478bd9Sstevel@tonic-gate snapnumber = -1; 2015*7c478bd9Sstevel@tonic-gate goto out; 2016*7c478bd9Sstevel@tonic-gate } 2017*7c478bd9Sstevel@tonic-gate 2018*7c478bd9Sstevel@tonic-gate sidpp = ddi_get_soft_state(statep, snapnumber); 2019*7c478bd9Sstevel@tonic-gate *sidpp = sidp; 2020*7c478bd9Sstevel@tonic-gate 2021*7c478bd9Sstevel@tonic-gate /* create minor node based on snapshot number */ 2022*7c478bd9Sstevel@tonic-gate ASSERT(fssnap_dip != NULL); 2023*7c478bd9Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d", snapnumber); 2024*7c478bd9Sstevel@tonic-gate if (ddi_create_minor_node(fssnap_dip, name, S_IFBLK, 2025*7c478bd9Sstevel@tonic-gate snapnumber, DDI_PSEUDO, 0) != DDI_SUCCESS) { 2026*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_ioctl: could not create " 2027*7c478bd9Sstevel@tonic-gate "block minor node for snapshot %d.", snapnumber); 2028*7c478bd9Sstevel@tonic-gate snapnumber = -1; 2029*7c478bd9Sstevel@tonic-gate goto out; 2030*7c478bd9Sstevel@tonic-gate } 2031*7c478bd9Sstevel@tonic-gate 2032*7c478bd9Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d,raw", snapnumber); 2033*7c478bd9Sstevel@tonic-gate if (ddi_create_minor_node(fssnap_dip, name, S_IFCHR, 2034*7c478bd9Sstevel@tonic-gate snapnumber, DDI_PSEUDO, 0) != DDI_SUCCESS) { 2035*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "snap_ioctl: could not create " 2036*7c478bd9Sstevel@tonic-gate "character minor node for snapshot %d.", snapnumber); 2037*7c478bd9Sstevel@tonic-gate snapnumber = -1; 2038*7c478bd9Sstevel@tonic-gate } 2039*7c478bd9Sstevel@tonic-gate 2040*7c478bd9Sstevel@tonic-gate out: 2041*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 2042*7c478bd9Sstevel@tonic-gate rw_exit(&cmap->cmap_rwlock); 2043*7c478bd9Sstevel@tonic-gate 2044*7c478bd9Sstevel@tonic-gate /* let the taskq threads start processing */ 2045*7c478bd9Sstevel@tonic-gate taskq_resume(cowp->cow_taskq); 2046*7c478bd9Sstevel@tonic-gate 2047*7c478bd9Sstevel@tonic-gate return (snapnumber); 2048*7c478bd9Sstevel@tonic-gate } 2049*7c478bd9Sstevel@tonic-gate 2050*7c478bd9Sstevel@tonic-gate /* 2051*7c478bd9Sstevel@tonic-gate * fssnap_delete_impl() - delete a snapshot 2052*7c478bd9Sstevel@tonic-gate * 2053*7c478bd9Sstevel@tonic-gate * used when a snapshot is no longer needed. This is called by the file 2054*7c478bd9Sstevel@tonic-gate * system when it receives an ioctl request to delete a snapshot. It is 2055*7c478bd9Sstevel@tonic-gate * also called internally when error conditions such as disk full, errors 2056*7c478bd9Sstevel@tonic-gate * writing to the backing file, or backing file maxsize exceeded occur. 2057*7c478bd9Sstevel@tonic-gate * If the snapshot device is busy when the delete request is received, 2058*7c478bd9Sstevel@tonic-gate * all state will be deleted except for the soft state and device files 2059*7c478bd9Sstevel@tonic-gate * associated with the snapshot; they will be deleted when the snapshot 2060*7c478bd9Sstevel@tonic-gate * device is closed. 2061*7c478bd9Sstevel@tonic-gate * 2062*7c478bd9Sstevel@tonic-gate * NOTE this function takes a POINTER TO A POINTER to the snapshot id, 2063*7c478bd9Sstevel@tonic-gate * and expects to be able to set the handle held by the file system to 2064*7c478bd9Sstevel@tonic-gate * NULL. This depends on the file system checking that variable for NULL 2065*7c478bd9Sstevel@tonic-gate * before calling fssnap_strategy(). 2066*7c478bd9Sstevel@tonic-gate */ 2067*7c478bd9Sstevel@tonic-gate static int 2068*7c478bd9Sstevel@tonic-gate fssnap_delete_impl(void *snapshot_id) 2069*7c478bd9Sstevel@tonic-gate { 2070*7c478bd9Sstevel@tonic-gate struct snapshot_id **sidpp = (struct snapshot_id **)snapshot_id; 2071*7c478bd9Sstevel@tonic-gate struct snapshot_id *sidp; 2072*7c478bd9Sstevel@tonic-gate struct snapshot_id **statesidpp; 2073*7c478bd9Sstevel@tonic-gate struct cow_info *cowp; 2074*7c478bd9Sstevel@tonic-gate struct cow_map *cmap; 2075*7c478bd9Sstevel@tonic-gate char name[20]; 2076*7c478bd9Sstevel@tonic-gate int snapnumber = -1; 2077*7c478bd9Sstevel@tonic-gate vnode_t **vpp; 2078*7c478bd9Sstevel@tonic-gate 2079*7c478bd9Sstevel@tonic-gate /* 2080*7c478bd9Sstevel@tonic-gate * sidp is guaranteed to be valid if sidpp is valid because 2081*7c478bd9Sstevel@tonic-gate * the snapshot list is append-only. 2082*7c478bd9Sstevel@tonic-gate */ 2083*7c478bd9Sstevel@tonic-gate if (sidpp == NULL) { 2084*7c478bd9Sstevel@tonic-gate return (-1); 2085*7c478bd9Sstevel@tonic-gate } 2086*7c478bd9Sstevel@tonic-gate 2087*7c478bd9Sstevel@tonic-gate sidp = *sidpp; 2088*7c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 2089*7c478bd9Sstevel@tonic-gate 2090*7c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&sidp->sid_rwlock)); 2091*7c478bd9Sstevel@tonic-gate 2092*7c478bd9Sstevel@tonic-gate /* 2093*7c478bd9Sstevel@tonic-gate * double check that the snapshot is still valid for THIS file system 2094*7c478bd9Sstevel@tonic-gate */ 2095*7c478bd9Sstevel@tonic-gate if (*sidpp == NULL) { 2096*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 2097*7c478bd9Sstevel@tonic-gate return (-1); 2098*7c478bd9Sstevel@tonic-gate } 2099*7c478bd9Sstevel@tonic-gate 2100*7c478bd9Sstevel@tonic-gate /* 2101*7c478bd9Sstevel@tonic-gate * Now we know the snapshot is still valid and will not go away 2102*7c478bd9Sstevel@tonic-gate * because we have the write lock. Once the state is transitioned 2103*7c478bd9Sstevel@tonic-gate * to "disabling", the sid_rwlock can be released. Any pending I/O 2104*7c478bd9Sstevel@tonic-gate * waiting for the lock as a reader will check for this state and 2105*7c478bd9Sstevel@tonic-gate * abort without touching data that may be getting freed. 2106*7c478bd9Sstevel@tonic-gate */ 2107*7c478bd9Sstevel@tonic-gate sidp->sid_flags |= SID_DISABLING; 2108*7c478bd9Sstevel@tonic-gate if (sidp->sid_flags & SID_DELETE) { 2109*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "Snapshot %d automatically deleted.", 2110*7c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 2111*7c478bd9Sstevel@tonic-gate sidp->sid_flags &= ~(SID_DELETE); 2112*7c478bd9Sstevel@tonic-gate } 2113*7c478bd9Sstevel@tonic-gate 2114*7c478bd9Sstevel@tonic-gate 2115*7c478bd9Sstevel@tonic-gate /* 2116*7c478bd9Sstevel@tonic-gate * This is pointing into file system specific data! The assumption is 2117*7c478bd9Sstevel@tonic-gate * that fssnap_strategy() gets called from the file system based on 2118*7c478bd9Sstevel@tonic-gate * whether this reference to the snapshot_id is NULL or not. So 2119*7c478bd9Sstevel@tonic-gate * setting this to NULL should disable snapshots for the file system. 2120*7c478bd9Sstevel@tonic-gate */ 2121*7c478bd9Sstevel@tonic-gate *sidpp = NULL; 2122*7c478bd9Sstevel@tonic-gate 2123*7c478bd9Sstevel@tonic-gate /* remove cowinfo */ 2124*7c478bd9Sstevel@tonic-gate cowp = sidp->sid_cowinfo; 2125*7c478bd9Sstevel@tonic-gate if (cowp == NULL) { 2126*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 2127*7c478bd9Sstevel@tonic-gate return (-1); 2128*7c478bd9Sstevel@tonic-gate } 2129*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 2130*7c478bd9Sstevel@tonic-gate 2131*7c478bd9Sstevel@tonic-gate /* destroy task queues first so they don't reference freed data. */ 2132*7c478bd9Sstevel@tonic-gate if (cowp->cow_taskq) { 2133*7c478bd9Sstevel@tonic-gate taskq_destroy(cowp->cow_taskq); 2134*7c478bd9Sstevel@tonic-gate cowp->cow_taskq = NULL; 2135*7c478bd9Sstevel@tonic-gate } 2136*7c478bd9Sstevel@tonic-gate 2137*7c478bd9Sstevel@tonic-gate if (cowp->cow_backfile_array != NULL) { 2138*7c478bd9Sstevel@tonic-gate for (vpp = cowp->cow_backfile_array; *vpp; vpp++) 2139*7c478bd9Sstevel@tonic-gate VN_RELE(*vpp); 2140*7c478bd9Sstevel@tonic-gate kmem_free(cowp->cow_backfile_array, 2141*7c478bd9Sstevel@tonic-gate (cowp->cow_backcount + 1) * sizeof (vnode_t *)); 2142*7c478bd9Sstevel@tonic-gate cowp->cow_backfile_array = NULL; 2143*7c478bd9Sstevel@tonic-gate } 2144*7c478bd9Sstevel@tonic-gate 2145*7c478bd9Sstevel@tonic-gate sidp->sid_cowinfo = NULL; 2146*7c478bd9Sstevel@tonic-gate 2147*7c478bd9Sstevel@tonic-gate /* remove cmap */ 2148*7c478bd9Sstevel@tonic-gate cmap = &cowp->cow_map; 2149*7c478bd9Sstevel@tonic-gate ASSERT(cmap); 2150*7c478bd9Sstevel@tonic-gate 2151*7c478bd9Sstevel@tonic-gate if (cmap->cmap_candidate) 2152*7c478bd9Sstevel@tonic-gate kmem_free(cmap->cmap_candidate, cmap->cmap_bmsize); 2153*7c478bd9Sstevel@tonic-gate 2154*7c478bd9Sstevel@tonic-gate if (cmap->cmap_hastrans) 2155*7c478bd9Sstevel@tonic-gate kmem_free(cmap->cmap_hastrans, cmap->cmap_bmsize); 2156*7c478bd9Sstevel@tonic-gate 2157*7c478bd9Sstevel@tonic-gate if (cmap->cmap_table) 2158*7c478bd9Sstevel@tonic-gate transtbl_free(&cowp->cow_map); 2159*7c478bd9Sstevel@tonic-gate 2160*7c478bd9Sstevel@tonic-gate rw_destroy(&cmap->cmap_rwlock); 2161*7c478bd9Sstevel@tonic-gate 2162*7c478bd9Sstevel@tonic-gate while (cmap->cmap_waiters) { 2163*7c478bd9Sstevel@tonic-gate sema_p(&cmap->cmap_throttle_sem); 2164*7c478bd9Sstevel@tonic-gate sema_v(&cmap->cmap_throttle_sem); 2165*7c478bd9Sstevel@tonic-gate } 2166*7c478bd9Sstevel@tonic-gate sema_destroy(&cmap->cmap_throttle_sem); 2167*7c478bd9Sstevel@tonic-gate 2168*7c478bd9Sstevel@tonic-gate /* remove kstats */ 2169*7c478bd9Sstevel@tonic-gate fssnap_delete_kstats(cowp); 2170*7c478bd9Sstevel@tonic-gate 2171*7c478bd9Sstevel@tonic-gate kmem_free(cowp, sizeof (struct cow_info)); 2172*7c478bd9Sstevel@tonic-gate 2173*7c478bd9Sstevel@tonic-gate statesidpp = ddi_get_soft_state(statep, sidp->sid_snapnumber); 2174*7c478bd9Sstevel@tonic-gate if (statesidpp == NULL || *statesidpp == NULL) { 2175*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 2176*7c478bd9Sstevel@tonic-gate "fssnap_delete_impl: could not find state for snapshot %d.", 2177*7c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 2178*7c478bd9Sstevel@tonic-gate } 2179*7c478bd9Sstevel@tonic-gate ASSERT(*statesidpp == sidp); 2180*7c478bd9Sstevel@tonic-gate 2181*7c478bd9Sstevel@tonic-gate /* 2182*7c478bd9Sstevel@tonic-gate * Leave the node in the list marked DISABLED so it can be reused 2183*7c478bd9Sstevel@tonic-gate * and avoid many race conditions. Return the snapshot number 2184*7c478bd9Sstevel@tonic-gate * that was deleted. 2185*7c478bd9Sstevel@tonic-gate */ 2186*7c478bd9Sstevel@tonic-gate mutex_enter(&snapshot_mutex); 2187*7c478bd9Sstevel@tonic-gate rw_enter(&sidp->sid_rwlock, RW_WRITER); 2188*7c478bd9Sstevel@tonic-gate sidp->sid_flags &= ~(SID_DISABLING); 2189*7c478bd9Sstevel@tonic-gate sidp->sid_flags |= SID_DISABLED; 2190*7c478bd9Sstevel@tonic-gate VN_RELE(sidp->sid_fvp); 2191*7c478bd9Sstevel@tonic-gate sidp->sid_fvp = NULL; 2192*7c478bd9Sstevel@tonic-gate snapnumber = sidp->sid_snapnumber; 2193*7c478bd9Sstevel@tonic-gate 2194*7c478bd9Sstevel@tonic-gate /* 2195*7c478bd9Sstevel@tonic-gate * If the snapshot is not busy, free the device info now. Otherwise 2196*7c478bd9Sstevel@tonic-gate * the device nodes are freed in snap_close() when the device is 2197*7c478bd9Sstevel@tonic-gate * closed. The sid will not be reused until the device is not busy. 2198*7c478bd9Sstevel@tonic-gate */ 2199*7c478bd9Sstevel@tonic-gate if (SID_AVAILABLE(sidp)) { 2200*7c478bd9Sstevel@tonic-gate /* remove the device nodes */ 2201*7c478bd9Sstevel@tonic-gate ASSERT(fssnap_dip != NULL); 2202*7c478bd9Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d", 2203*7c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 2204*7c478bd9Sstevel@tonic-gate ddi_remove_minor_node(fssnap_dip, name); 2205*7c478bd9Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d,raw", 2206*7c478bd9Sstevel@tonic-gate sidp->sid_snapnumber); 2207*7c478bd9Sstevel@tonic-gate ddi_remove_minor_node(fssnap_dip, name); 2208*7c478bd9Sstevel@tonic-gate 2209*7c478bd9Sstevel@tonic-gate /* delete the state structure */ 2210*7c478bd9Sstevel@tonic-gate ddi_soft_state_free(statep, sidp->sid_snapnumber); 2211*7c478bd9Sstevel@tonic-gate num_snapshots--; 2212*7c478bd9Sstevel@tonic-gate } 2213*7c478bd9Sstevel@tonic-gate 2214*7c478bd9Sstevel@tonic-gate mutex_exit(&snapshot_mutex); 2215*7c478bd9Sstevel@tonic-gate rw_exit(&sidp->sid_rwlock); 2216*7c478bd9Sstevel@tonic-gate 2217*7c478bd9Sstevel@tonic-gate return (snapnumber); 2218*7c478bd9Sstevel@tonic-gate } 2219*7c478bd9Sstevel@tonic-gate 2220*7c478bd9Sstevel@tonic-gate /* 2221*7c478bd9Sstevel@tonic-gate * fssnap_create_kstats() - allocate and initialize snapshot kstats 2222*7c478bd9Sstevel@tonic-gate * 2223*7c478bd9Sstevel@tonic-gate */ 2224*7c478bd9Sstevel@tonic-gate static void 2225*7c478bd9Sstevel@tonic-gate fssnap_create_kstats(snapshot_id_t *sidp, int snapnum, 2226*7c478bd9Sstevel@tonic-gate const char *mountpoint, const char *backfilename) 2227*7c478bd9Sstevel@tonic-gate { 2228*7c478bd9Sstevel@tonic-gate kstat_t *num, *mntpoint, *bfname; 2229*7c478bd9Sstevel@tonic-gate kstat_named_t *hw; 2230*7c478bd9Sstevel@tonic-gate struct cow_info *cowp = sidp->sid_cowinfo; 2231*7c478bd9Sstevel@tonic-gate struct cow_kstat_num *stats; 2232*7c478bd9Sstevel@tonic-gate 2233*7c478bd9Sstevel@tonic-gate /* update the high water mark */ 2234*7c478bd9Sstevel@tonic-gate if (fssnap_highwater_kstat == NULL) { 2235*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: failed to lookup " 2236*7c478bd9Sstevel@tonic-gate "high water mark kstat."); 2237*7c478bd9Sstevel@tonic-gate return; 2238*7c478bd9Sstevel@tonic-gate } 2239*7c478bd9Sstevel@tonic-gate 2240*7c478bd9Sstevel@tonic-gate hw = (kstat_named_t *)fssnap_highwater_kstat->ks_data; 2241*7c478bd9Sstevel@tonic-gate if (hw->value.ui32 < snapnum) 2242*7c478bd9Sstevel@tonic-gate hw->value.ui32 = snapnum; 2243*7c478bd9Sstevel@tonic-gate 2244*7c478bd9Sstevel@tonic-gate /* initialize the mount point kstat */ 2245*7c478bd9Sstevel@tonic-gate kstat_delete_byname(snapname, snapnum, FSSNAP_KSTAT_MNTPT); 2246*7c478bd9Sstevel@tonic-gate 2247*7c478bd9Sstevel@tonic-gate if (mountpoint != NULL) { 2248*7c478bd9Sstevel@tonic-gate mntpoint = kstat_create(snapname, snapnum, FSSNAP_KSTAT_MNTPT, 2249*7c478bd9Sstevel@tonic-gate "misc", KSTAT_TYPE_RAW, strlen(mountpoint) + 1, 0); 2250*7c478bd9Sstevel@tonic-gate if (mntpoint == NULL) { 2251*7c478bd9Sstevel@tonic-gate cowp->cow_kstat_mntpt = NULL; 2252*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: failed to " 2253*7c478bd9Sstevel@tonic-gate "create mount point kstat"); 2254*7c478bd9Sstevel@tonic-gate } else { 2255*7c478bd9Sstevel@tonic-gate (void) strncpy(mntpoint->ks_data, mountpoint, 2256*7c478bd9Sstevel@tonic-gate strlen(mountpoint)); 2257*7c478bd9Sstevel@tonic-gate cowp->cow_kstat_mntpt = mntpoint; 2258*7c478bd9Sstevel@tonic-gate kstat_install(mntpoint); 2259*7c478bd9Sstevel@tonic-gate } 2260*7c478bd9Sstevel@tonic-gate } else { 2261*7c478bd9Sstevel@tonic-gate cowp->cow_kstat_mntpt = NULL; 2262*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: mount point not " 2263*7c478bd9Sstevel@tonic-gate "specified."); 2264*7c478bd9Sstevel@tonic-gate } 2265*7c478bd9Sstevel@tonic-gate 2266*7c478bd9Sstevel@tonic-gate /* initialize the backing file kstat */ 2267*7c478bd9Sstevel@tonic-gate kstat_delete_byname(snapname, snapnum, FSSNAP_KSTAT_BFNAME); 2268*7c478bd9Sstevel@tonic-gate 2269*7c478bd9Sstevel@tonic-gate if (backfilename == NULL) { 2270*7c478bd9Sstevel@tonic-gate cowp->cow_kstat_bfname = NULL; 2271*7c478bd9Sstevel@tonic-gate } else { 2272*7c478bd9Sstevel@tonic-gate bfname = kstat_create(snapname, snapnum, FSSNAP_KSTAT_BFNAME, 2273*7c478bd9Sstevel@tonic-gate "misc", KSTAT_TYPE_RAW, strlen(backfilename) + 1, 0); 2274*7c478bd9Sstevel@tonic-gate if (bfname != NULL) { 2275*7c478bd9Sstevel@tonic-gate (void) strncpy(bfname->ks_data, backfilename, 2276*7c478bd9Sstevel@tonic-gate strlen(backfilename)); 2277*7c478bd9Sstevel@tonic-gate cowp->cow_kstat_bfname = bfname; 2278*7c478bd9Sstevel@tonic-gate kstat_install(bfname); 2279*7c478bd9Sstevel@tonic-gate } else { 2280*7c478bd9Sstevel@tonic-gate cowp->cow_kstat_bfname = NULL; 2281*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: failed to " 2282*7c478bd9Sstevel@tonic-gate "create backing file name kstat"); 2283*7c478bd9Sstevel@tonic-gate } 2284*7c478bd9Sstevel@tonic-gate } 2285*7c478bd9Sstevel@tonic-gate 2286*7c478bd9Sstevel@tonic-gate /* initialize numeric kstats */ 2287*7c478bd9Sstevel@tonic-gate kstat_delete_byname(snapname, snapnum, FSSNAP_KSTAT_NUM); 2288*7c478bd9Sstevel@tonic-gate 2289*7c478bd9Sstevel@tonic-gate num = kstat_create(snapname, snapnum, FSSNAP_KSTAT_NUM, 2290*7c478bd9Sstevel@tonic-gate "misc", KSTAT_TYPE_NAMED, 2291*7c478bd9Sstevel@tonic-gate sizeof (struct cow_kstat_num) / sizeof (kstat_named_t), 2292*7c478bd9Sstevel@tonic-gate 0); 2293*7c478bd9Sstevel@tonic-gate if (num == NULL) { 2294*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "fssnap_create_kstats: failed to create " 2295*7c478bd9Sstevel@tonic-gate "numeric kstats"); 2296*7c478bd9Sstevel@tonic-gate cowp->cow_kstat_num = NULL; 2297*7c478bd9Sstevel@tonic-gate return; 2298*7c478bd9Sstevel@tonic-gate } 2299*7c478bd9Sstevel@tonic-gate 2300*7c478bd9Sstevel@tonic-gate cowp->cow_kstat_num = num; 2301*7c478bd9Sstevel@tonic-gate stats = num->ks_data; 2302*7c478bd9Sstevel@tonic-gate num->ks_update = fssnap_update_kstat_num; 2303*7c478bd9Sstevel@tonic-gate num->ks_private = sidp; 2304*7c478bd9Sstevel@tonic-gate 2305*7c478bd9Sstevel@tonic-gate kstat_named_init(&stats->ckn_state, FSSNAP_KSTAT_NUM_STATE, 2306*7c478bd9Sstevel@tonic-gate KSTAT_DATA_INT32); 2307*7c478bd9Sstevel@tonic-gate kstat_named_init(&stats->ckn_bfsize, FSSNAP_KSTAT_NUM_BFSIZE, 2308*7c478bd9Sstevel@tonic-gate KSTAT_DATA_UINT64); 2309*7c478bd9Sstevel@tonic-gate kstat_named_init(&stats->ckn_maxsize, FSSNAP_KSTAT_NUM_MAXSIZE, 2310*7c478bd9Sstevel@tonic-gate KSTAT_DATA_UINT64); 2311*7c478bd9Sstevel@tonic-gate kstat_named_init(&stats->ckn_createtime, FSSNAP_KSTAT_NUM_CREATETIME, 2312*7c478bd9Sstevel@tonic-gate KSTAT_DATA_LONG); 2313*7c478bd9Sstevel@tonic-gate kstat_named_init(&stats->ckn_chunksize, FSSNAP_KSTAT_NUM_CHUNKSIZE, 2314*7c478bd9Sstevel@tonic-gate KSTAT_DATA_UINT32); 2315*7c478bd9Sstevel@tonic-gate 2316*7c478bd9Sstevel@tonic-gate /* initialize the static kstats */ 2317*7c478bd9Sstevel@tonic-gate stats->ckn_chunksize.value.ui32 = cowp->cow_map.cmap_chunksz; 2318*7c478bd9Sstevel@tonic-gate stats->ckn_maxsize.value.ui64 = cowp->cow_map.cmap_maxsize; 2319*7c478bd9Sstevel@tonic-gate stats->ckn_createtime.value.l = gethrestime_sec(); 2320*7c478bd9Sstevel@tonic-gate 2321*7c478bd9Sstevel@tonic-gate kstat_install(num); 2322*7c478bd9Sstevel@tonic-gate } 2323*7c478bd9Sstevel@tonic-gate 2324*7c478bd9Sstevel@tonic-gate /* 2325*7c478bd9Sstevel@tonic-gate * fssnap_update_kstat_num() - update a numerical snapshot kstat value 2326*7c478bd9Sstevel@tonic-gate * 2327*7c478bd9Sstevel@tonic-gate */ 2328*7c478bd9Sstevel@tonic-gate int 2329*7c478bd9Sstevel@tonic-gate fssnap_update_kstat_num(kstat_t *ksp, int rw) 2330*7c478bd9Sstevel@tonic-gate { 2331*7c478bd9Sstevel@tonic-gate snapshot_id_t *sidp = (snapshot_id_t *)ksp->ks_private; 2332*7c478bd9Sstevel@tonic-gate struct cow_info *cowp = sidp->sid_cowinfo; 2333*7c478bd9Sstevel@tonic-gate struct cow_kstat_num *stats = ksp->ks_data; 2334*7c478bd9Sstevel@tonic-gate 2335*7c478bd9Sstevel@tonic-gate if (rw == KSTAT_WRITE) 2336*7c478bd9Sstevel@tonic-gate return (EACCES); 2337*7c478bd9Sstevel@tonic-gate 2338*7c478bd9Sstevel@tonic-gate /* state */ 2339*7c478bd9Sstevel@tonic-gate if (sidp->sid_flags & SID_CREATING) 2340*7c478bd9Sstevel@tonic-gate stats->ckn_state.value.i32 = COWSTATE_CREATING; 2341*7c478bd9Sstevel@tonic-gate else if (SID_INACTIVE(sidp)) 2342*7c478bd9Sstevel@tonic-gate stats->ckn_state.value.i32 = COWSTATE_DISABLED; 2343*7c478bd9Sstevel@tonic-gate else if (SID_BUSY(sidp)) 2344*7c478bd9Sstevel@tonic-gate stats->ckn_state.value.i32 = COWSTATE_ACTIVE; 2345*7c478bd9Sstevel@tonic-gate else 2346*7c478bd9Sstevel@tonic-gate stats->ckn_state.value.i32 = COWSTATE_IDLE; 2347*7c478bd9Sstevel@tonic-gate 2348*7c478bd9Sstevel@tonic-gate /* bfsize */ 2349*7c478bd9Sstevel@tonic-gate stats->ckn_bfsize.value.ui64 = cowp->cow_map.cmap_nchunks * 2350*7c478bd9Sstevel@tonic-gate cowp->cow_map.cmap_chunksz; 2351*7c478bd9Sstevel@tonic-gate 2352*7c478bd9Sstevel@tonic-gate return (0); 2353*7c478bd9Sstevel@tonic-gate } 2354*7c478bd9Sstevel@tonic-gate 2355*7c478bd9Sstevel@tonic-gate /* 2356*7c478bd9Sstevel@tonic-gate * fssnap_delete_kstats() - deallocate snapshot kstats 2357*7c478bd9Sstevel@tonic-gate * 2358*7c478bd9Sstevel@tonic-gate */ 2359*7c478bd9Sstevel@tonic-gate void 2360*7c478bd9Sstevel@tonic-gate fssnap_delete_kstats(struct cow_info *cowp) 2361*7c478bd9Sstevel@tonic-gate { 2362*7c478bd9Sstevel@tonic-gate if (cowp->cow_kstat_num != NULL) { 2363*7c478bd9Sstevel@tonic-gate kstat_delete(cowp->cow_kstat_num); 2364*7c478bd9Sstevel@tonic-gate cowp->cow_kstat_num = NULL; 2365*7c478bd9Sstevel@tonic-gate } 2366*7c478bd9Sstevel@tonic-gate if (cowp->cow_kstat_mntpt != NULL) { 2367*7c478bd9Sstevel@tonic-gate kstat_delete(cowp->cow_kstat_mntpt); 2368*7c478bd9Sstevel@tonic-gate cowp->cow_kstat_mntpt = NULL; 2369*7c478bd9Sstevel@tonic-gate } 2370*7c478bd9Sstevel@tonic-gate if (cowp->cow_kstat_bfname != NULL) { 2371*7c478bd9Sstevel@tonic-gate kstat_delete(cowp->cow_kstat_bfname); 2372*7c478bd9Sstevel@tonic-gate cowp->cow_kstat_bfname = NULL; 2373*7c478bd9Sstevel@tonic-gate } 2374*7c478bd9Sstevel@tonic-gate } 2375