1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * ZFS control directory (a.k.a. ".zfs") 28 * 29 * This directory provides a common location for all ZFS meta-objects. 30 * Currently, this is only the 'snapshot' directory, but this may expand in the 31 * future. The elements are built using the GFS primitives, as the hierarchy 32 * does not actually exist on disk. 33 * 34 * For 'snapshot', we don't want to have all snapshots always mounted, because 35 * this would take up a huge amount of space in /etc/mnttab. We have three 36 * types of objects: 37 * 38 * ctldir ------> snapshotdir -------> snapshot 39 * | 40 * | 41 * V 42 * mounted fs 43 * 44 * The 'snapshot' node contains just enough information to lookup '..' and act 45 * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we 46 * perform an automount of the underlying filesystem and return the 47 * corresponding vnode. 48 * 49 * All mounts are handled automatically by the kernel, but unmounts are 50 * (currently) handled from user land. The main reason is that there is no 51 * reliable way to auto-unmount the filesystem when it's "no longer in use". 52 * When the user unmounts a filesystem, we call zfsctl_unmount(), which 53 * unmounts any snapshots within the snapshot directory. 54 * 55 * The '.zfs', '.zfs/snapshot', and all directories created under 56 * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and 57 * share the same vfs_t as the head filesystem (what '.zfs' lives under). 58 * 59 * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>' 60 * (ie: snapshots) are ZFS nodes and have their own unique vfs_t. 61 * However, vnodes within these mounted on file systems have their v_vfsp 62 * fields set to the head filesystem to make NFS happy (see 63 * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t 64 * so that it cannot be freed until all snapshots have been unmounted. 65 */ 66 67 #include <fs/fs_subr.h> 68 #include <sys/zfs_ctldir.h> 69 #include <sys/zfs_ioctl.h> 70 #include <sys/zfs_vfsops.h> 71 #include <sys/vfs_opreg.h> 72 #include <sys/gfs.h> 73 #include <sys/stat.h> 74 #include <sys/dmu.h> 75 #include <sys/dsl_deleg.h> 76 #include <sys/mount.h> 77 #include <sys/sunddi.h> 78 79 #include "zfs_namecheck.h" 80 81 typedef struct zfsctl_node { 82 gfs_dir_t zc_gfs_private; 83 uint64_t zc_id; 84 timestruc_t zc_cmtime; /* ctime and mtime, always the same */ 85 } zfsctl_node_t; 86 87 typedef struct zfsctl_snapdir { 88 zfsctl_node_t sd_node; 89 kmutex_t sd_lock; 90 avl_tree_t sd_snaps; 91 } zfsctl_snapdir_t; 92 93 typedef struct { 94 char *se_name; 95 vnode_t *se_root; 96 avl_node_t se_node; 97 } zfs_snapentry_t; 98 99 static int 100 snapentry_compare(const void *a, const void *b) 101 { 102 const zfs_snapentry_t *sa = a; 103 const zfs_snapentry_t *sb = b; 104 int ret = strcmp(sa->se_name, sb->se_name); 105 106 if (ret < 0) 107 return (-1); 108 else if (ret > 0) 109 return (1); 110 else 111 return (0); 112 } 113 114 vnodeops_t *zfsctl_ops_root; 115 vnodeops_t *zfsctl_ops_snapdir; 116 vnodeops_t *zfsctl_ops_snapshot; 117 118 static const fs_operation_def_t zfsctl_tops_root[]; 119 static const fs_operation_def_t zfsctl_tops_snapdir[]; 120 static const fs_operation_def_t zfsctl_tops_snapshot[]; 121 122 static vnode_t *zfsctl_mknode_snapdir(vnode_t *); 123 static vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset); 124 static int zfsctl_unmount_snap(zfs_snapentry_t *, int, cred_t *); 125 126 static gfs_opsvec_t zfsctl_opsvec[] = { 127 { ".zfs", zfsctl_tops_root, &zfsctl_ops_root }, 128 { ".zfs/snapshot", zfsctl_tops_snapdir, &zfsctl_ops_snapdir }, 129 { ".zfs/snapshot/vnode", zfsctl_tops_snapshot, &zfsctl_ops_snapshot }, 130 { NULL } 131 }; 132 133 /* 134 * Root directory elements. We have only a single static entry, 'snapshot'. 135 */ 136 static gfs_dirent_t zfsctl_root_entries[] = { 137 { "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE }, 138 { NULL } 139 }; 140 141 /* include . and .. in the calculation */ 142 #define NROOT_ENTRIES ((sizeof (zfsctl_root_entries) / \ 143 sizeof (gfs_dirent_t)) + 1) 144 145 146 /* 147 * Initialize the various GFS pieces we'll need to create and manipulate .zfs 148 * directories. This is called from the ZFS init routine, and initializes the 149 * vnode ops vectors that we'll be using. 150 */ 151 void 152 zfsctl_init(void) 153 { 154 VERIFY(gfs_make_opsvec(zfsctl_opsvec) == 0); 155 } 156 157 void 158 zfsctl_fini(void) 159 { 160 /* 161 * Remove vfsctl vnode ops 162 */ 163 if (zfsctl_ops_root) 164 vn_freevnodeops(zfsctl_ops_root); 165 if (zfsctl_ops_snapdir) 166 vn_freevnodeops(zfsctl_ops_snapdir); 167 if (zfsctl_ops_snapshot) 168 vn_freevnodeops(zfsctl_ops_snapshot); 169 170 zfsctl_ops_root = NULL; 171 zfsctl_ops_snapdir = NULL; 172 zfsctl_ops_snapshot = NULL; 173 } 174 175 /* 176 * Return the inode number associated with the 'snapshot' directory. 177 */ 178 /* ARGSUSED */ 179 static ino64_t 180 zfsctl_root_inode_cb(vnode_t *vp, int index) 181 { 182 ASSERT(index == 0); 183 return (ZFSCTL_INO_SNAPDIR); 184 } 185 186 /* 187 * Create the '.zfs' directory. This directory is cached as part of the VFS 188 * structure. This results in a hold on the vfs_t. The code in zfs_umount() 189 * therefore checks against a vfs_count of 2 instead of 1. This reference 190 * is removed when the ctldir is destroyed in the unmount. 191 */ 192 void 193 zfsctl_create(zfsvfs_t *zfsvfs) 194 { 195 vnode_t *vp, *rvp; 196 zfsctl_node_t *zcp; 197 198 ASSERT(zfsvfs->z_ctldir == NULL); 199 200 vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs, 201 zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries, 202 zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL); 203 zcp = vp->v_data; 204 zcp->zc_id = ZFSCTL_INO_ROOT; 205 206 VERIFY(VFS_ROOT(zfsvfs->z_vfs, &rvp) == 0); 207 ZFS_TIME_DECODE(&zcp->zc_cmtime, VTOZ(rvp)->z_phys->zp_crtime); 208 VN_RELE(rvp); 209 210 /* 211 * We're only faking the fact that we have a root of a filesystem for 212 * the sake of the GFS interfaces. Undo the flag manipulation it did 213 * for us. 214 */ 215 vp->v_flag &= ~(VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT); 216 217 zfsvfs->z_ctldir = vp; 218 } 219 220 /* 221 * Destroy the '.zfs' directory. Only called when the filesystem is unmounted. 222 * There might still be more references if we were force unmounted, but only 223 * new zfs_inactive() calls can occur and they don't reference .zfs 224 */ 225 void 226 zfsctl_destroy(zfsvfs_t *zfsvfs) 227 { 228 VN_RELE(zfsvfs->z_ctldir); 229 zfsvfs->z_ctldir = NULL; 230 } 231 232 /* 233 * Given a root znode, retrieve the associated .zfs directory. 234 * Add a hold to the vnode and return it. 235 */ 236 vnode_t * 237 zfsctl_root(znode_t *zp) 238 { 239 ASSERT(zfs_has_ctldir(zp)); 240 VN_HOLD(zp->z_zfsvfs->z_ctldir); 241 return (zp->z_zfsvfs->z_ctldir); 242 } 243 244 /* 245 * Common open routine. Disallow any write access. 246 */ 247 /* ARGSUSED */ 248 static int 249 zfsctl_common_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct) 250 { 251 if (flags & FWRITE) 252 return (EACCES); 253 254 return (0); 255 } 256 257 /* 258 * Common close routine. Nothing to do here. 259 */ 260 /* ARGSUSED */ 261 static int 262 zfsctl_common_close(vnode_t *vpp, int flags, int count, offset_t off, 263 cred_t *cr, caller_context_t *ct) 264 { 265 return (0); 266 } 267 268 /* 269 * Common access routine. Disallow writes. 270 */ 271 /* ARGSUSED */ 272 static int 273 zfsctl_common_access(vnode_t *vp, int mode, int flags, cred_t *cr, 274 caller_context_t *ct) 275 { 276 if (flags & V_ACE_MASK) { 277 if (mode & ACE_ALL_WRITE_PERMS) 278 return (EACCES); 279 } else { 280 if (mode & VWRITE) 281 return (EACCES); 282 } 283 284 return (0); 285 } 286 287 /* 288 * Common getattr function. Fill in basic information. 289 */ 290 static void 291 zfsctl_common_getattr(vnode_t *vp, vattr_t *vap) 292 { 293 zfsctl_node_t *zcp = vp->v_data; 294 timestruc_t now; 295 296 vap->va_uid = 0; 297 vap->va_gid = 0; 298 vap->va_rdev = 0; 299 /* 300 * We are a purly virtual object, so we have no 301 * blocksize or allocated blocks. 302 */ 303 vap->va_blksize = 0; 304 vap->va_nblocks = 0; 305 vap->va_seq = 0; 306 vap->va_fsid = vp->v_vfsp->vfs_dev; 307 vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | 308 S_IROTH | S_IXOTH; 309 vap->va_type = VDIR; 310 /* 311 * We live in the now (for atime). 312 */ 313 gethrestime(&now); 314 vap->va_atime = now; 315 vap->va_mtime = vap->va_ctime = zcp->zc_cmtime; 316 } 317 318 /*ARGSUSED*/ 319 static int 320 zfsctl_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 321 { 322 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 323 zfsctl_node_t *zcp = vp->v_data; 324 uint64_t object = zcp->zc_id; 325 zfid_short_t *zfid; 326 int i; 327 328 ZFS_ENTER(zfsvfs); 329 330 if (fidp->fid_len < SHORT_FID_LEN) { 331 fidp->fid_len = SHORT_FID_LEN; 332 ZFS_EXIT(zfsvfs); 333 return (ENOSPC); 334 } 335 336 zfid = (zfid_short_t *)fidp; 337 338 zfid->zf_len = SHORT_FID_LEN; 339 340 for (i = 0; i < sizeof (zfid->zf_object); i++) 341 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 342 343 /* .zfs znodes always have a generation number of 0 */ 344 for (i = 0; i < sizeof (zfid->zf_gen); i++) 345 zfid->zf_gen[i] = 0; 346 347 ZFS_EXIT(zfsvfs); 348 return (0); 349 } 350 351 /* 352 * .zfs inode namespace 353 * 354 * We need to generate unique inode numbers for all files and directories 355 * within the .zfs pseudo-filesystem. We use the following scheme: 356 * 357 * ENTRY ZFSCTL_INODE 358 * .zfs 1 359 * .zfs/snapshot 2 360 * .zfs/snapshot/<snap> objectid(snap) 361 */ 362 363 #define ZFSCTL_INO_SNAP(id) (id) 364 365 /* 366 * Get root directory attributes. 367 */ 368 /* ARGSUSED */ 369 static int 370 zfsctl_root_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 371 caller_context_t *ct) 372 { 373 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 374 375 ZFS_ENTER(zfsvfs); 376 vap->va_nodeid = ZFSCTL_INO_ROOT; 377 vap->va_nlink = vap->va_size = NROOT_ENTRIES; 378 379 zfsctl_common_getattr(vp, vap); 380 ZFS_EXIT(zfsvfs); 381 382 return (0); 383 } 384 385 /* 386 * Special case the handling of "..". 387 */ 388 /* ARGSUSED */ 389 int 390 zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, 391 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 392 int *direntflags, pathname_t *realpnp) 393 { 394 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; 395 int err; 396 397 /* 398 * No extended attributes allowed under .zfs 399 */ 400 if (flags & LOOKUP_XATTR) 401 return (EINVAL); 402 403 ZFS_ENTER(zfsvfs); 404 405 if (strcmp(nm, "..") == 0) { 406 err = VFS_ROOT(dvp->v_vfsp, vpp); 407 } else { 408 err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir, 409 cr, ct, direntflags, realpnp); 410 } 411 412 ZFS_EXIT(zfsvfs); 413 414 return (err); 415 } 416 417 static int 418 zfsctl_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 419 caller_context_t *ct) 420 { 421 /* 422 * We only care about ACL_ENABLED so that libsec can 423 * display ACL correctly and not default to POSIX draft. 424 */ 425 if (cmd == _PC_ACL_ENABLED) { 426 *valp = _ACL_ACE_ENABLED; 427 return (0); 428 } 429 430 return (fs_pathconf(vp, cmd, valp, cr, ct)); 431 } 432 433 static const fs_operation_def_t zfsctl_tops_root[] = { 434 { VOPNAME_OPEN, { .vop_open = zfsctl_common_open } }, 435 { VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } }, 436 { VOPNAME_IOCTL, { .error = fs_inval } }, 437 { VOPNAME_GETATTR, { .vop_getattr = zfsctl_root_getattr } }, 438 { VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } }, 439 { VOPNAME_READDIR, { .vop_readdir = gfs_vop_readdir } }, 440 { VOPNAME_LOOKUP, { .vop_lookup = zfsctl_root_lookup } }, 441 { VOPNAME_SEEK, { .vop_seek = fs_seek } }, 442 { VOPNAME_INACTIVE, { .vop_inactive = gfs_vop_inactive } }, 443 { VOPNAME_PATHCONF, { .vop_pathconf = zfsctl_pathconf } }, 444 { VOPNAME_FID, { .vop_fid = zfsctl_common_fid } }, 445 { NULL } 446 }; 447 448 static int 449 zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname) 450 { 451 objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os; 452 453 if (snapshot_namecheck(name, NULL, NULL) != 0) 454 return (EILSEQ); 455 dmu_objset_name(os, zname); 456 if (strlen(zname) + 1 + strlen(name) >= len) 457 return (ENAMETOOLONG); 458 (void) strcat(zname, "@"); 459 (void) strcat(zname, name); 460 return (0); 461 } 462 463 static int 464 zfsctl_unmount_snap(zfs_snapentry_t *sep, int fflags, cred_t *cr) 465 { 466 vnode_t *svp = sep->se_root; 467 int error; 468 469 ASSERT(vn_ismntpt(svp)); 470 471 /* this will be dropped by dounmount() */ 472 if ((error = vn_vfswlock(svp)) != 0) 473 return (error); 474 475 VN_HOLD(svp); 476 error = dounmount(vn_mountedvfs(svp), fflags, cr); 477 if (error) { 478 VN_RELE(svp); 479 return (error); 480 } 481 VFS_RELE(svp->v_vfsp); 482 /* 483 * We can't use VN_RELE(), as that will try to invoke 484 * zfsctl_snapdir_inactive(), which would cause us to destroy 485 * the sd_lock mutex held by our caller. 486 */ 487 ASSERT(svp->v_count == 1); 488 gfs_vop_inactive(svp, cr, NULL); 489 490 kmem_free(sep->se_name, strlen(sep->se_name) + 1); 491 kmem_free(sep, sizeof (zfs_snapentry_t)); 492 493 return (0); 494 } 495 496 static void 497 zfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm) 498 { 499 avl_index_t where; 500 vfs_t *vfsp; 501 refstr_t *pathref; 502 char newpath[MAXNAMELEN]; 503 char *tail; 504 505 ASSERT(MUTEX_HELD(&sdp->sd_lock)); 506 ASSERT(sep != NULL); 507 508 vfsp = vn_mountedvfs(sep->se_root); 509 ASSERT(vfsp != NULL); 510 511 vfs_lock_wait(vfsp); 512 513 /* 514 * Change the name in the AVL tree. 515 */ 516 avl_remove(&sdp->sd_snaps, sep); 517 kmem_free(sep->se_name, strlen(sep->se_name) + 1); 518 sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP); 519 (void) strcpy(sep->se_name, nm); 520 VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL); 521 avl_insert(&sdp->sd_snaps, sep, where); 522 523 /* 524 * Change the current mountpoint info: 525 * - update the tail of the mntpoint path 526 * - update the tail of the resource path 527 */ 528 pathref = vfs_getmntpoint(vfsp); 529 (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath)); 530 VERIFY((tail = strrchr(newpath, '/')) != NULL); 531 *(tail+1) = '\0'; 532 ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath)); 533 (void) strcat(newpath, nm); 534 refstr_rele(pathref); 535 vfs_setmntpoint(vfsp, newpath); 536 537 pathref = vfs_getresource(vfsp); 538 (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath)); 539 VERIFY((tail = strrchr(newpath, '@')) != NULL); 540 *(tail+1) = '\0'; 541 ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath)); 542 (void) strcat(newpath, nm); 543 refstr_rele(pathref); 544 vfs_setresource(vfsp, newpath); 545 546 vfs_unlock(vfsp); 547 } 548 549 /*ARGSUSED*/ 550 static int 551 zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, 552 cred_t *cr, caller_context_t *ct, int flags) 553 { 554 zfsctl_snapdir_t *sdp = sdvp->v_data; 555 zfs_snapentry_t search, *sep; 556 zfsvfs_t *zfsvfs; 557 avl_index_t where; 558 char from[MAXNAMELEN], to[MAXNAMELEN]; 559 char real[MAXNAMELEN]; 560 int err; 561 562 zfsvfs = sdvp->v_vfsp->vfs_data; 563 ZFS_ENTER(zfsvfs); 564 565 if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 566 err = dmu_snapshot_realname(zfsvfs->z_os, snm, real, 567 MAXNAMELEN, NULL); 568 if (err == 0) { 569 snm = real; 570 } else if (err != ENOTSUP) { 571 ZFS_EXIT(zfsvfs); 572 return (err); 573 } 574 } 575 576 ZFS_EXIT(zfsvfs); 577 578 err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from); 579 if (!err) 580 err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to); 581 if (!err) 582 err = zfs_secpolicy_rename_perms(from, to, cr); 583 if (err) 584 return (err); 585 586 /* 587 * Cannot move snapshots out of the snapdir. 588 */ 589 if (sdvp != tdvp) 590 return (EINVAL); 591 592 if (strcmp(snm, tnm) == 0) 593 return (0); 594 595 mutex_enter(&sdp->sd_lock); 596 597 search.se_name = (char *)snm; 598 if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) { 599 mutex_exit(&sdp->sd_lock); 600 return (ENOENT); 601 } 602 603 err = dmu_objset_rename(from, to, B_FALSE); 604 if (err == 0) 605 zfsctl_rename_snap(sdp, sep, tnm); 606 607 mutex_exit(&sdp->sd_lock); 608 609 return (err); 610 } 611 612 /* ARGSUSED */ 613 static int 614 zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 615 caller_context_t *ct, int flags) 616 { 617 zfsctl_snapdir_t *sdp = dvp->v_data; 618 zfs_snapentry_t *sep; 619 zfs_snapentry_t search; 620 zfsvfs_t *zfsvfs; 621 char snapname[MAXNAMELEN]; 622 char real[MAXNAMELEN]; 623 int err; 624 625 zfsvfs = dvp->v_vfsp->vfs_data; 626 ZFS_ENTER(zfsvfs); 627 628 if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 629 630 err = dmu_snapshot_realname(zfsvfs->z_os, name, real, 631 MAXNAMELEN, NULL); 632 if (err == 0) { 633 name = real; 634 } else if (err != ENOTSUP) { 635 ZFS_EXIT(zfsvfs); 636 return (err); 637 } 638 } 639 640 ZFS_EXIT(zfsvfs); 641 642 err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname); 643 if (!err) 644 err = zfs_secpolicy_destroy_perms(snapname, cr); 645 if (err) 646 return (err); 647 648 mutex_enter(&sdp->sd_lock); 649 650 search.se_name = name; 651 sep = avl_find(&sdp->sd_snaps, &search, NULL); 652 if (sep) { 653 avl_remove(&sdp->sd_snaps, sep); 654 err = zfsctl_unmount_snap(sep, MS_FORCE, cr); 655 if (err) 656 avl_add(&sdp->sd_snaps, sep); 657 else 658 err = dmu_objset_destroy(snapname); 659 } else { 660 err = ENOENT; 661 } 662 663 mutex_exit(&sdp->sd_lock); 664 665 return (err); 666 } 667 668 /* 669 * This creates a snapshot under '.zfs/snapshot'. 670 */ 671 /* ARGSUSED */ 672 static int 673 zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, 674 cred_t *cr, caller_context_t *cc, int flags, vsecattr_t *vsecp) 675 { 676 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; 677 char name[MAXNAMELEN]; 678 int err; 679 static enum symfollow follow = NO_FOLLOW; 680 static enum uio_seg seg = UIO_SYSSPACE; 681 682 if (snapshot_namecheck(dirname, NULL, NULL) != 0) 683 return (EILSEQ); 684 685 dmu_objset_name(zfsvfs->z_os, name); 686 687 *vpp = NULL; 688 689 err = zfs_secpolicy_snapshot_perms(name, cr); 690 if (err) 691 return (err); 692 693 if (err == 0) { 694 err = dmu_objset_snapshot(name, dirname, B_FALSE); 695 if (err) 696 return (err); 697 err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp); 698 } 699 700 return (err); 701 } 702 703 /* 704 * Lookup entry point for the 'snapshot' directory. Try to open the 705 * snapshot if it exist, creating the pseudo filesystem vnode as necessary. 706 * Perform a mount of the associated dataset on top of the vnode. 707 */ 708 /* ARGSUSED */ 709 static int 710 zfsctl_snapdir_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, 711 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 712 int *direntflags, pathname_t *realpnp) 713 { 714 zfsctl_snapdir_t *sdp = dvp->v_data; 715 objset_t *snap; 716 char snapname[MAXNAMELEN]; 717 char real[MAXNAMELEN]; 718 char *mountpoint; 719 zfs_snapentry_t *sep, search; 720 struct mounta margs; 721 vfs_t *vfsp; 722 size_t mountpoint_len; 723 avl_index_t where; 724 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; 725 int err; 726 727 /* 728 * No extended attributes allowed under .zfs 729 */ 730 if (flags & LOOKUP_XATTR) 731 return (EINVAL); 732 733 ASSERT(dvp->v_type == VDIR); 734 735 if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) 736 return (0); 737 738 /* 739 * If we get a recursive call, that means we got called 740 * from the domount() code while it was trying to look up the 741 * spec (which looks like a local path for zfs). We need to 742 * add some flag to domount() to tell it not to do this lookup. 743 */ 744 if (MUTEX_HELD(&sdp->sd_lock)) 745 return (ENOENT); 746 747 ZFS_ENTER(zfsvfs); 748 749 if (flags & FIGNORECASE) { 750 boolean_t conflict = B_FALSE; 751 752 err = dmu_snapshot_realname(zfsvfs->z_os, nm, real, 753 MAXNAMELEN, &conflict); 754 if (err == 0) { 755 nm = real; 756 } else if (err != ENOTSUP) { 757 ZFS_EXIT(zfsvfs); 758 return (err); 759 } 760 if (realpnp) 761 (void) strlcpy(realpnp->pn_buf, nm, 762 realpnp->pn_bufsize); 763 if (conflict && direntflags) 764 *direntflags = ED_CASE_CONFLICT; 765 } 766 767 mutex_enter(&sdp->sd_lock); 768 search.se_name = (char *)nm; 769 if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) { 770 *vpp = sep->se_root; 771 VN_HOLD(*vpp); 772 err = traverse(vpp); 773 if (err) { 774 VN_RELE(*vpp); 775 *vpp = NULL; 776 } else if (*vpp == sep->se_root) { 777 /* 778 * The snapshot was unmounted behind our backs, 779 * try to remount it. 780 */ 781 goto domount; 782 } else { 783 /* 784 * VROOT was set during the traverse call. We need 785 * to clear it since we're pretending to be part 786 * of our parent's vfs. 787 */ 788 (*vpp)->v_flag &= ~VROOT; 789 } 790 mutex_exit(&sdp->sd_lock); 791 ZFS_EXIT(zfsvfs); 792 return (err); 793 } 794 795 /* 796 * The requested snapshot is not currently mounted, look it up. 797 */ 798 err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname); 799 if (err) { 800 mutex_exit(&sdp->sd_lock); 801 ZFS_EXIT(zfsvfs); 802 /* 803 * handle "ls *" or "?" in a graceful manner, 804 * forcing EILSEQ to ENOENT. 805 * Since shell ultimately passes "*" or "?" as name to lookup 806 */ 807 return (err == EILSEQ ? ENOENT : err); 808 } 809 if (dmu_objset_open(snapname, DMU_OST_ZFS, 810 DS_MODE_USER | DS_MODE_READONLY, &snap) != 0) { 811 mutex_exit(&sdp->sd_lock); 812 ZFS_EXIT(zfsvfs); 813 return (ENOENT); 814 } 815 816 sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP); 817 sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP); 818 (void) strcpy(sep->se_name, nm); 819 *vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap)); 820 avl_insert(&sdp->sd_snaps, sep, where); 821 822 dmu_objset_close(snap); 823 domount: 824 mountpoint_len = strlen(refstr_value(dvp->v_vfsp->vfs_mntpt)) + 825 strlen("/.zfs/snapshot/") + strlen(nm) + 1; 826 mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP); 827 (void) snprintf(mountpoint, mountpoint_len, "%s/.zfs/snapshot/%s", 828 refstr_value(dvp->v_vfsp->vfs_mntpt), nm); 829 830 margs.spec = snapname; 831 margs.dir = mountpoint; 832 margs.flags = MS_SYSSPACE | MS_NOMNTTAB; 833 margs.fstype = "zfs"; 834 margs.dataptr = NULL; 835 margs.datalen = 0; 836 margs.optptr = NULL; 837 margs.optlen = 0; 838 839 err = domount("zfs", &margs, *vpp, kcred, &vfsp); 840 kmem_free(mountpoint, mountpoint_len); 841 842 if (err == 0) { 843 /* 844 * Return the mounted root rather than the covered mount point. 845 * Takes the GFS vnode at .zfs/snapshot/<snapname> and returns 846 * the ZFS vnode mounted on top of the GFS node. This ZFS 847 * vnode is the root the newly created vfsp. 848 */ 849 VFS_RELE(vfsp); 850 err = traverse(vpp); 851 } 852 853 if (err == 0) { 854 /* 855 * Fix up the root vnode mounted on .zfs/snapshot/<snapname>. 856 * 857 * This is where we lie about our v_vfsp in order to 858 * make .zfs/snapshot/<snapname> accessible over NFS 859 * without requiring manual mounts of <snapname>. 860 */ 861 ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs); 862 VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs; 863 (*vpp)->v_vfsp = zfsvfs->z_vfs; 864 (*vpp)->v_flag &= ~VROOT; 865 } 866 mutex_exit(&sdp->sd_lock); 867 ZFS_EXIT(zfsvfs); 868 869 /* 870 * If we had an error, drop our hold on the vnode and 871 * zfsctl_snapshot_inactive() will clean up. 872 */ 873 if (err) { 874 VN_RELE(*vpp); 875 *vpp = NULL; 876 } 877 return (err); 878 } 879 880 /* ARGSUSED */ 881 static int 882 zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp, 883 offset_t *offp, offset_t *nextp, void *data, int flags) 884 { 885 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 886 char snapname[MAXNAMELEN]; 887 uint64_t id, cookie; 888 boolean_t case_conflict; 889 int error; 890 891 ZFS_ENTER(zfsvfs); 892 893 cookie = *offp; 894 error = dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id, 895 &cookie, &case_conflict); 896 if (error) { 897 ZFS_EXIT(zfsvfs); 898 if (error == ENOENT) { 899 *eofp = 1; 900 return (0); 901 } 902 return (error); 903 } 904 905 if (flags & V_RDDIR_ENTFLAGS) { 906 edirent_t *eodp = dp; 907 908 (void) strcpy(eodp->ed_name, snapname); 909 eodp->ed_ino = ZFSCTL_INO_SNAP(id); 910 eodp->ed_eflags = case_conflict ? ED_CASE_CONFLICT : 0; 911 } else { 912 struct dirent64 *odp = dp; 913 914 (void) strcpy(odp->d_name, snapname); 915 odp->d_ino = ZFSCTL_INO_SNAP(id); 916 } 917 *nextp = cookie; 918 919 ZFS_EXIT(zfsvfs); 920 921 return (0); 922 } 923 924 /* 925 * pvp is the '.zfs' directory (zfsctl_node_t). 926 * Creates vp, which is '.zfs/snapshot' (zfsctl_snapdir_t). 927 * 928 * This function is the callback to create a GFS vnode for '.zfs/snapshot' 929 * when a lookup is performed on .zfs for "snapshot". 930 */ 931 vnode_t * 932 zfsctl_mknode_snapdir(vnode_t *pvp) 933 { 934 vnode_t *vp; 935 zfsctl_snapdir_t *sdp; 936 937 vp = gfs_dir_create(sizeof (zfsctl_snapdir_t), pvp, 938 zfsctl_ops_snapdir, NULL, NULL, MAXNAMELEN, 939 zfsctl_snapdir_readdir_cb, NULL); 940 sdp = vp->v_data; 941 sdp->sd_node.zc_id = ZFSCTL_INO_SNAPDIR; 942 sdp->sd_node.zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime; 943 mutex_init(&sdp->sd_lock, NULL, MUTEX_DEFAULT, NULL); 944 avl_create(&sdp->sd_snaps, snapentry_compare, 945 sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node)); 946 return (vp); 947 } 948 949 /* ARGSUSED */ 950 static int 951 zfsctl_snapdir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 952 caller_context_t *ct) 953 { 954 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 955 zfsctl_snapdir_t *sdp = vp->v_data; 956 957 ZFS_ENTER(zfsvfs); 958 zfsctl_common_getattr(vp, vap); 959 vap->va_nodeid = gfs_file_inode(vp); 960 vap->va_nlink = vap->va_size = avl_numnodes(&sdp->sd_snaps) + 2; 961 ZFS_EXIT(zfsvfs); 962 963 return (0); 964 } 965 966 /* ARGSUSED */ 967 static void 968 zfsctl_snapdir_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 969 { 970 zfsctl_snapdir_t *sdp = vp->v_data; 971 void *private; 972 973 private = gfs_dir_inactive(vp); 974 if (private != NULL) { 975 ASSERT(avl_numnodes(&sdp->sd_snaps) == 0); 976 mutex_destroy(&sdp->sd_lock); 977 avl_destroy(&sdp->sd_snaps); 978 kmem_free(private, sizeof (zfsctl_snapdir_t)); 979 } 980 } 981 982 static const fs_operation_def_t zfsctl_tops_snapdir[] = { 983 { VOPNAME_OPEN, { .vop_open = zfsctl_common_open } }, 984 { VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } }, 985 { VOPNAME_IOCTL, { .error = fs_inval } }, 986 { VOPNAME_GETATTR, { .vop_getattr = zfsctl_snapdir_getattr } }, 987 { VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } }, 988 { VOPNAME_RENAME, { .vop_rename = zfsctl_snapdir_rename } }, 989 { VOPNAME_RMDIR, { .vop_rmdir = zfsctl_snapdir_remove } }, 990 { VOPNAME_MKDIR, { .vop_mkdir = zfsctl_snapdir_mkdir } }, 991 { VOPNAME_READDIR, { .vop_readdir = gfs_vop_readdir } }, 992 { VOPNAME_LOOKUP, { .vop_lookup = zfsctl_snapdir_lookup } }, 993 { VOPNAME_SEEK, { .vop_seek = fs_seek } }, 994 { VOPNAME_INACTIVE, { .vop_inactive = zfsctl_snapdir_inactive } }, 995 { VOPNAME_FID, { .vop_fid = zfsctl_common_fid } }, 996 { NULL } 997 }; 998 999 /* 1000 * pvp is the GFS vnode '.zfs/snapshot'. 1001 * 1002 * This creates a GFS node under '.zfs/snapshot' representing each 1003 * snapshot. This newly created GFS node is what we mount snapshot 1004 * vfs_t's ontop of. 1005 */ 1006 static vnode_t * 1007 zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset) 1008 { 1009 vnode_t *vp; 1010 zfsctl_node_t *zcp; 1011 1012 vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp, 1013 zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL); 1014 zcp = vp->v_data; 1015 zcp->zc_id = objset; 1016 VFS_HOLD(vp->v_vfsp); 1017 1018 return (vp); 1019 } 1020 1021 static void 1022 zfsctl_snapshot_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 1023 { 1024 zfsctl_snapdir_t *sdp; 1025 zfs_snapentry_t *sep, *next; 1026 vnode_t *dvp; 1027 1028 VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0); 1029 sdp = dvp->v_data; 1030 1031 mutex_enter(&sdp->sd_lock); 1032 1033 if (vp->v_count > 1) { 1034 mutex_exit(&sdp->sd_lock); 1035 return; 1036 } 1037 ASSERT(!vn_ismntpt(vp)); 1038 1039 sep = avl_first(&sdp->sd_snaps); 1040 while (sep != NULL) { 1041 next = AVL_NEXT(&sdp->sd_snaps, sep); 1042 1043 if (sep->se_root == vp) { 1044 avl_remove(&sdp->sd_snaps, sep); 1045 kmem_free(sep->se_name, strlen(sep->se_name) + 1); 1046 kmem_free(sep, sizeof (zfs_snapentry_t)); 1047 break; 1048 } 1049 sep = next; 1050 } 1051 ASSERT(sep != NULL); 1052 1053 mutex_exit(&sdp->sd_lock); 1054 VN_RELE(dvp); 1055 VFS_RELE(vp->v_vfsp); 1056 1057 /* 1058 * Dispose of the vnode for the snapshot mount point. 1059 * This is safe to do because once this entry has been removed 1060 * from the AVL tree, it can't be found again, so cannot become 1061 * "active". If we lookup the same name again we will end up 1062 * creating a new vnode. 1063 */ 1064 gfs_vop_inactive(vp, cr, ct); 1065 } 1066 1067 1068 /* 1069 * These VP's should never see the light of day. They should always 1070 * be covered. 1071 */ 1072 static const fs_operation_def_t zfsctl_tops_snapshot[] = { 1073 VOPNAME_INACTIVE, { .vop_inactive = zfsctl_snapshot_inactive }, 1074 NULL, NULL 1075 }; 1076 1077 int 1078 zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp) 1079 { 1080 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1081 vnode_t *dvp, *vp; 1082 zfsctl_snapdir_t *sdp; 1083 zfsctl_node_t *zcp; 1084 zfs_snapentry_t *sep; 1085 int error; 1086 1087 ASSERT(zfsvfs->z_ctldir != NULL); 1088 error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp, 1089 NULL, 0, NULL, kcred, NULL, NULL, NULL); 1090 if (error != 0) 1091 return (error); 1092 sdp = dvp->v_data; 1093 1094 mutex_enter(&sdp->sd_lock); 1095 sep = avl_first(&sdp->sd_snaps); 1096 while (sep != NULL) { 1097 vp = sep->se_root; 1098 zcp = vp->v_data; 1099 if (zcp->zc_id == objsetid) 1100 break; 1101 1102 sep = AVL_NEXT(&sdp->sd_snaps, sep); 1103 } 1104 1105 if (sep != NULL) { 1106 VN_HOLD(vp); 1107 /* 1108 * Return the mounted root rather than the covered mount point. 1109 * Takes the GFS vnode at .zfs/snapshot/<snapshot objsetid> 1110 * and returns the ZFS vnode mounted on top of the GFS node. 1111 * This ZFS vnode is the root of the vfs for objset 'objsetid'. 1112 */ 1113 error = traverse(&vp); 1114 if (error == 0) { 1115 if (vp == sep->se_root) 1116 error = EINVAL; 1117 else 1118 *zfsvfsp = VTOZ(vp)->z_zfsvfs; 1119 } 1120 mutex_exit(&sdp->sd_lock); 1121 VN_RELE(vp); 1122 } else { 1123 error = EINVAL; 1124 mutex_exit(&sdp->sd_lock); 1125 } 1126 1127 VN_RELE(dvp); 1128 1129 return (error); 1130 } 1131 1132 /* 1133 * Unmount any snapshots for the given filesystem. This is called from 1134 * zfs_umount() - if we have a ctldir, then go through and unmount all the 1135 * snapshots. 1136 */ 1137 int 1138 zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr) 1139 { 1140 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1141 vnode_t *dvp; 1142 zfsctl_snapdir_t *sdp; 1143 zfs_snapentry_t *sep, *next; 1144 int error; 1145 1146 ASSERT(zfsvfs->z_ctldir != NULL); 1147 error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp, 1148 NULL, 0, NULL, cr, NULL, NULL, NULL); 1149 if (error != 0) 1150 return (error); 1151 sdp = dvp->v_data; 1152 1153 mutex_enter(&sdp->sd_lock); 1154 1155 sep = avl_first(&sdp->sd_snaps); 1156 while (sep != NULL) { 1157 next = AVL_NEXT(&sdp->sd_snaps, sep); 1158 1159 /* 1160 * If this snapshot is not mounted, then it must 1161 * have just been unmounted by somebody else, and 1162 * will be cleaned up by zfsctl_snapdir_inactive(). 1163 */ 1164 if (vn_ismntpt(sep->se_root)) { 1165 avl_remove(&sdp->sd_snaps, sep); 1166 error = zfsctl_unmount_snap(sep, fflags, cr); 1167 if (error) { 1168 avl_add(&sdp->sd_snaps, sep); 1169 break; 1170 } 1171 } 1172 sep = next; 1173 } 1174 1175 mutex_exit(&sdp->sd_lock); 1176 VN_RELE(dvp); 1177 1178 return (error); 1179 } 1180