1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * ZFS control directory (a.k.a. ".zfs") 30 * 31 * This directory provides a common location for all ZFS meta-objects. 32 * Currently, this is only the 'snapshot' directory, but this may expand in the 33 * future. The elements are built using the GFS primitives, as the hierarchy 34 * does not actually exist on disk. 35 * 36 * For 'snapshot', we don't want to have all snapshots always mounted, because 37 * this would take up a huge amount of space in /etc/mnttab. We have three 38 * types of objects: 39 * 40 * ctldir ------> snapshotdir -------> snapshot 41 * | 42 * | 43 * V 44 * mounted fs 45 * 46 * The 'snapshot' node contains just enough information to lookup '..' and act 47 * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we 48 * perform an automount of the underlying filesystem and return the 49 * corresponding vnode. 50 * 51 * All mounts are handled automatically by the kernel, but unmounts are 52 * (currently) handled from user land. The main reason is that there is no 53 * reliable way to auto-unmount the filesystem when it's "no longer in use". 54 * When the user unmounts a filesystem, we call zfsctl_unmount(), which 55 * unmounts any snapshots within the snapshot directory. 56 * 57 * The '.zfs', '.zfs/snapshot', and all directories created under 58 * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and 59 * share the same vfs_t as the head filesystem (what '.zfs' lives under). 60 * 61 * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>' 62 * (ie: snapshots) are ZFS nodes and have their own unique vfs_t. 63 * However, vnodes within these mounted on file systems have their v_vfsp 64 * fields set to the head filesystem to make NFS happy (see 65 * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t 66 * so that it cannot be freed until all snapshots have been unmounted. 67 */ 68 69 #include <fs/fs_subr.h> 70 #include <sys/zfs_ctldir.h> 71 #include <sys/zfs_ioctl.h> 72 #include <sys/zfs_vfsops.h> 73 #include <sys/vfs_opreg.h> 74 #include <sys/gfs.h> 75 #include <sys/stat.h> 76 #include <sys/dmu.h> 77 #include <sys/dsl_deleg.h> 78 #include <sys/mount.h> 79 #include <sys/sunddi.h> 80 81 typedef struct zfsctl_node { 82 gfs_dir_t zc_gfs_private; 83 uint64_t zc_id; 84 timestruc_t zc_cmtime; /* ctime and mtime, always the same */ 85 } zfsctl_node_t; 86 87 typedef struct zfsctl_snapdir { 88 zfsctl_node_t sd_node; 89 kmutex_t sd_lock; 90 avl_tree_t sd_snaps; 91 } zfsctl_snapdir_t; 92 93 typedef struct { 94 char *se_name; 95 vnode_t *se_root; 96 avl_node_t se_node; 97 } zfs_snapentry_t; 98 99 static int 100 snapentry_compare(const void *a, const void *b) 101 { 102 const zfs_snapentry_t *sa = a; 103 const zfs_snapentry_t *sb = b; 104 int ret = strcmp(sa->se_name, sb->se_name); 105 106 if (ret < 0) 107 return (-1); 108 else if (ret > 0) 109 return (1); 110 else 111 return (0); 112 } 113 114 vnodeops_t *zfsctl_ops_root; 115 vnodeops_t *zfsctl_ops_snapdir; 116 vnodeops_t *zfsctl_ops_snapshot; 117 118 static const fs_operation_def_t zfsctl_tops_root[]; 119 static const fs_operation_def_t zfsctl_tops_snapdir[]; 120 static const fs_operation_def_t zfsctl_tops_snapshot[]; 121 122 static vnode_t *zfsctl_mknode_snapdir(vnode_t *); 123 static vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset); 124 static int zfsctl_unmount_snap(zfs_snapentry_t *, int, cred_t *); 125 126 static gfs_opsvec_t zfsctl_opsvec[] = { 127 { ".zfs", zfsctl_tops_root, &zfsctl_ops_root }, 128 { ".zfs/snapshot", zfsctl_tops_snapdir, &zfsctl_ops_snapdir }, 129 { ".zfs/snapshot/vnode", zfsctl_tops_snapshot, &zfsctl_ops_snapshot }, 130 { NULL } 131 }; 132 133 /* 134 * Root directory elements. We have only a single static entry, 'snapshot'. 135 */ 136 static gfs_dirent_t zfsctl_root_entries[] = { 137 { "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE }, 138 { NULL } 139 }; 140 141 /* include . and .. in the calculation */ 142 #define NROOT_ENTRIES ((sizeof (zfsctl_root_entries) / \ 143 sizeof (gfs_dirent_t)) + 1) 144 145 146 /* 147 * Initialize the various GFS pieces we'll need to create and manipulate .zfs 148 * directories. This is called from the ZFS init routine, and initializes the 149 * vnode ops vectors that we'll be using. 150 */ 151 void 152 zfsctl_init(void) 153 { 154 VERIFY(gfs_make_opsvec(zfsctl_opsvec) == 0); 155 } 156 157 void 158 zfsctl_fini(void) 159 { 160 /* 161 * Remove vfsctl vnode ops 162 */ 163 if (zfsctl_ops_root) 164 vn_freevnodeops(zfsctl_ops_root); 165 if (zfsctl_ops_snapdir) 166 vn_freevnodeops(zfsctl_ops_snapdir); 167 if (zfsctl_ops_snapshot) 168 vn_freevnodeops(zfsctl_ops_snapshot); 169 170 zfsctl_ops_root = NULL; 171 zfsctl_ops_snapdir = NULL; 172 zfsctl_ops_snapshot = NULL; 173 } 174 175 /* 176 * Return the inode number associated with the 'snapshot' directory. 177 */ 178 /* ARGSUSED */ 179 static ino64_t 180 zfsctl_root_inode_cb(vnode_t *vp, int index) 181 { 182 ASSERT(index == 0); 183 return (ZFSCTL_INO_SNAPDIR); 184 } 185 186 /* 187 * Create the '.zfs' directory. This directory is cached as part of the VFS 188 * structure. This results in a hold on the vfs_t. The code in zfs_umount() 189 * therefore checks against a vfs_count of 2 instead of 1. This reference 190 * is removed when the ctldir is destroyed in the unmount. 191 */ 192 void 193 zfsctl_create(zfsvfs_t *zfsvfs) 194 { 195 vnode_t *vp, *rvp; 196 zfsctl_node_t *zcp; 197 198 ASSERT(zfsvfs->z_ctldir == NULL); 199 200 vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs, 201 zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries, 202 zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL); 203 zcp = vp->v_data; 204 zcp->zc_id = ZFSCTL_INO_ROOT; 205 206 VERIFY(VFS_ROOT(zfsvfs->z_vfs, &rvp) == 0); 207 ZFS_TIME_DECODE(&zcp->zc_cmtime, VTOZ(rvp)->z_phys->zp_crtime); 208 VN_RELE(rvp); 209 210 /* 211 * We're only faking the fact that we have a root of a filesystem for 212 * the sake of the GFS interfaces. Undo the flag manipulation it did 213 * for us. 214 */ 215 vp->v_flag &= ~(VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT); 216 217 zfsvfs->z_ctldir = vp; 218 } 219 220 /* 221 * Destroy the '.zfs' directory. Only called when the filesystem is unmounted. 222 * There might still be more references if we were force unmounted, but only 223 * new zfs_inactive() calls can occur and they don't reference .zfs 224 */ 225 void 226 zfsctl_destroy(zfsvfs_t *zfsvfs) 227 { 228 VN_RELE(zfsvfs->z_ctldir); 229 zfsvfs->z_ctldir = NULL; 230 } 231 232 /* 233 * Given a root znode, retrieve the associated .zfs directory. 234 * Add a hold to the vnode and return it. 235 */ 236 vnode_t * 237 zfsctl_root(znode_t *zp) 238 { 239 ASSERT(zfs_has_ctldir(zp)); 240 VN_HOLD(zp->z_zfsvfs->z_ctldir); 241 return (zp->z_zfsvfs->z_ctldir); 242 } 243 244 /* 245 * Common open routine. Disallow any write access. 246 */ 247 /* ARGSUSED */ 248 static int 249 zfsctl_common_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct) 250 { 251 if (flags & FWRITE) 252 return (EACCES); 253 254 return (0); 255 } 256 257 /* 258 * Common close routine. Nothing to do here. 259 */ 260 /* ARGSUSED */ 261 static int 262 zfsctl_common_close(vnode_t *vpp, int flags, int count, offset_t off, 263 cred_t *cr, caller_context_t *ct) 264 { 265 return (0); 266 } 267 268 /* 269 * Common access routine. Disallow writes. 270 */ 271 /* ARGSUSED */ 272 static int 273 zfsctl_common_access(vnode_t *vp, int mode, int flags, cred_t *cr, 274 caller_context_t *ct) 275 { 276 if (mode & VWRITE) 277 return (EACCES); 278 279 return (0); 280 } 281 282 /* 283 * Common getattr function. Fill in basic information. 284 */ 285 static void 286 zfsctl_common_getattr(vnode_t *vp, vattr_t *vap) 287 { 288 zfsctl_node_t *zcp = vp->v_data; 289 timestruc_t now; 290 291 vap->va_uid = 0; 292 vap->va_gid = 0; 293 vap->va_rdev = 0; 294 /* 295 * We are a purly virtual object, so we have no 296 * blocksize or allocated blocks. 297 */ 298 vap->va_blksize = 0; 299 vap->va_nblocks = 0; 300 vap->va_seq = 0; 301 vap->va_fsid = vp->v_vfsp->vfs_dev; 302 vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | 303 S_IROTH | S_IXOTH; 304 vap->va_type = VDIR; 305 /* 306 * We live in the now (for atime). 307 */ 308 gethrestime(&now); 309 vap->va_atime = now; 310 vap->va_mtime = vap->va_ctime = zcp->zc_cmtime; 311 } 312 313 /*ARGSUSED*/ 314 static int 315 zfsctl_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 316 { 317 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 318 zfsctl_node_t *zcp = vp->v_data; 319 uint64_t object = zcp->zc_id; 320 zfid_short_t *zfid; 321 int i; 322 323 ZFS_ENTER(zfsvfs); 324 325 if (fidp->fid_len < SHORT_FID_LEN) { 326 fidp->fid_len = SHORT_FID_LEN; 327 ZFS_EXIT(zfsvfs); 328 return (ENOSPC); 329 } 330 331 zfid = (zfid_short_t *)fidp; 332 333 zfid->zf_len = SHORT_FID_LEN; 334 335 for (i = 0; i < sizeof (zfid->zf_object); i++) 336 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 337 338 /* .zfs znodes always have a generation number of 0 */ 339 for (i = 0; i < sizeof (zfid->zf_gen); i++) 340 zfid->zf_gen[i] = 0; 341 342 ZFS_EXIT(zfsvfs); 343 return (0); 344 } 345 346 /* 347 * .zfs inode namespace 348 * 349 * We need to generate unique inode numbers for all files and directories 350 * within the .zfs pseudo-filesystem. We use the following scheme: 351 * 352 * ENTRY ZFSCTL_INODE 353 * .zfs 1 354 * .zfs/snapshot 2 355 * .zfs/snapshot/<snap> objectid(snap) 356 */ 357 358 #define ZFSCTL_INO_SNAP(id) (id) 359 360 /* 361 * Get root directory attributes. 362 */ 363 /* ARGSUSED */ 364 static int 365 zfsctl_root_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 366 caller_context_t *ct) 367 { 368 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 369 370 ZFS_ENTER(zfsvfs); 371 vap->va_nodeid = ZFSCTL_INO_ROOT; 372 vap->va_nlink = vap->va_size = NROOT_ENTRIES; 373 374 zfsctl_common_getattr(vp, vap); 375 ZFS_EXIT(zfsvfs); 376 377 return (0); 378 } 379 380 /* 381 * Special case the handling of "..". 382 */ 383 /* ARGSUSED */ 384 int 385 zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, 386 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 387 int *direntflags, pathname_t *realpnp) 388 { 389 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; 390 int err; 391 392 /* 393 * No extended attributes allowed under .zfs 394 */ 395 if (flags & LOOKUP_XATTR) 396 return (EINVAL); 397 398 ZFS_ENTER(zfsvfs); 399 400 if (strcmp(nm, "..") == 0) { 401 err = VFS_ROOT(dvp->v_vfsp, vpp); 402 } else { 403 err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir, 404 cr, ct, direntflags, realpnp); 405 } 406 407 ZFS_EXIT(zfsvfs); 408 409 return (err); 410 } 411 412 static const fs_operation_def_t zfsctl_tops_root[] = { 413 { VOPNAME_OPEN, { .vop_open = zfsctl_common_open } }, 414 { VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } }, 415 { VOPNAME_IOCTL, { .error = fs_inval } }, 416 { VOPNAME_GETATTR, { .vop_getattr = zfsctl_root_getattr } }, 417 { VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } }, 418 { VOPNAME_READDIR, { .vop_readdir = gfs_vop_readdir } }, 419 { VOPNAME_LOOKUP, { .vop_lookup = zfsctl_root_lookup } }, 420 { VOPNAME_SEEK, { .vop_seek = fs_seek } }, 421 { VOPNAME_INACTIVE, { .vop_inactive = gfs_vop_inactive } }, 422 { VOPNAME_FID, { .vop_fid = zfsctl_common_fid } }, 423 { NULL } 424 }; 425 426 static int 427 zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname) 428 { 429 objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os; 430 431 dmu_objset_name(os, zname); 432 if (strlen(zname) + 1 + strlen(name) >= len) 433 return (ENAMETOOLONG); 434 (void) strcat(zname, "@"); 435 (void) strcat(zname, name); 436 return (0); 437 } 438 439 static int 440 zfsctl_unmount_snap(zfs_snapentry_t *sep, int fflags, cred_t *cr) 441 { 442 vnode_t *svp = sep->se_root; 443 int error; 444 445 ASSERT(vn_ismntpt(svp)); 446 447 /* this will be dropped by dounmount() */ 448 if ((error = vn_vfswlock(svp)) != 0) 449 return (error); 450 451 VN_HOLD(svp); 452 error = dounmount(vn_mountedvfs(svp), fflags, cr); 453 if (error) { 454 VN_RELE(svp); 455 return (error); 456 } 457 VFS_RELE(svp->v_vfsp); 458 /* 459 * We can't use VN_RELE(), as that will try to invoke 460 * zfsctl_snapdir_inactive(), which would cause us to destroy 461 * the sd_lock mutex held by our caller. 462 */ 463 ASSERT(svp->v_count == 1); 464 gfs_vop_inactive(svp, cr, NULL); 465 466 kmem_free(sep->se_name, strlen(sep->se_name) + 1); 467 kmem_free(sep, sizeof (zfs_snapentry_t)); 468 469 return (0); 470 } 471 472 static void 473 zfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm) 474 { 475 avl_index_t where; 476 vfs_t *vfsp; 477 refstr_t *pathref; 478 char newpath[MAXNAMELEN]; 479 char *tail; 480 481 ASSERT(MUTEX_HELD(&sdp->sd_lock)); 482 ASSERT(sep != NULL); 483 484 vfsp = vn_mountedvfs(sep->se_root); 485 ASSERT(vfsp != NULL); 486 487 vfs_lock_wait(vfsp); 488 489 /* 490 * Change the name in the AVL tree. 491 */ 492 avl_remove(&sdp->sd_snaps, sep); 493 kmem_free(sep->se_name, strlen(sep->se_name) + 1); 494 sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP); 495 (void) strcpy(sep->se_name, nm); 496 VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL); 497 avl_insert(&sdp->sd_snaps, sep, where); 498 499 /* 500 * Change the current mountpoint info: 501 * - update the tail of the mntpoint path 502 * - update the tail of the resource path 503 */ 504 pathref = vfs_getmntpoint(vfsp); 505 (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath)); 506 VERIFY((tail = strrchr(newpath, '/')) != NULL); 507 *(tail+1) = '\0'; 508 ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath)); 509 (void) strcat(newpath, nm); 510 refstr_rele(pathref); 511 vfs_setmntpoint(vfsp, newpath); 512 513 pathref = vfs_getresource(vfsp); 514 (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath)); 515 VERIFY((tail = strrchr(newpath, '@')) != NULL); 516 *(tail+1) = '\0'; 517 ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath)); 518 (void) strcat(newpath, nm); 519 refstr_rele(pathref); 520 vfs_setresource(vfsp, newpath); 521 522 vfs_unlock(vfsp); 523 } 524 525 /*ARGSUSED*/ 526 static int 527 zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, 528 cred_t *cr, caller_context_t *ct, int flags) 529 { 530 zfsctl_snapdir_t *sdp = sdvp->v_data; 531 zfs_snapentry_t search, *sep; 532 zfsvfs_t *zfsvfs; 533 avl_index_t where; 534 char from[MAXNAMELEN], to[MAXNAMELEN]; 535 char real[MAXNAMELEN]; 536 int err; 537 538 zfsvfs = sdvp->v_vfsp->vfs_data; 539 ZFS_ENTER(zfsvfs); 540 541 if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 542 err = dmu_snapshot_realname(zfsvfs->z_os, snm, real, 543 MAXNAMELEN, NULL); 544 if (err == 0) { 545 snm = real; 546 } else if (err != ENOTSUP) { 547 ZFS_EXIT(zfsvfs); 548 return (err); 549 } 550 } 551 552 ZFS_EXIT(zfsvfs); 553 554 err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from); 555 if (!err) 556 err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to); 557 if (!err) 558 err = zfs_secpolicy_rename_perms(from, to, cr); 559 if (err) 560 return (err); 561 562 /* 563 * Cannot move snapshots out of the snapdir. 564 */ 565 if (sdvp != tdvp) 566 return (EINVAL); 567 568 if (strcmp(snm, tnm) == 0) 569 return (0); 570 571 mutex_enter(&sdp->sd_lock); 572 573 search.se_name = (char *)snm; 574 if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) { 575 mutex_exit(&sdp->sd_lock); 576 return (ENOENT); 577 } 578 579 err = dmu_objset_rename(from, to, B_FALSE); 580 if (err == 0) 581 zfsctl_rename_snap(sdp, sep, tnm); 582 583 mutex_exit(&sdp->sd_lock); 584 585 return (err); 586 } 587 588 /* ARGSUSED */ 589 static int 590 zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 591 caller_context_t *ct, int flags) 592 { 593 zfsctl_snapdir_t *sdp = dvp->v_data; 594 zfs_snapentry_t *sep; 595 zfs_snapentry_t search; 596 zfsvfs_t *zfsvfs; 597 char snapname[MAXNAMELEN]; 598 char real[MAXNAMELEN]; 599 int err; 600 601 zfsvfs = dvp->v_vfsp->vfs_data; 602 ZFS_ENTER(zfsvfs); 603 604 if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 605 606 err = dmu_snapshot_realname(zfsvfs->z_os, name, real, 607 MAXNAMELEN, NULL); 608 if (err == 0) { 609 name = real; 610 } else if (err != ENOTSUP) { 611 ZFS_EXIT(zfsvfs); 612 return (err); 613 } 614 } 615 616 ZFS_EXIT(zfsvfs); 617 618 err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname); 619 if (!err) 620 err = zfs_secpolicy_destroy_perms(snapname, cr); 621 if (err) 622 return (err); 623 624 mutex_enter(&sdp->sd_lock); 625 626 search.se_name = name; 627 sep = avl_find(&sdp->sd_snaps, &search, NULL); 628 if (sep) { 629 avl_remove(&sdp->sd_snaps, sep); 630 err = zfsctl_unmount_snap(sep, MS_FORCE, cr); 631 if (err) 632 avl_add(&sdp->sd_snaps, sep); 633 else 634 err = dmu_objset_destroy(snapname); 635 } else { 636 err = ENOENT; 637 } 638 639 mutex_exit(&sdp->sd_lock); 640 641 return (err); 642 } 643 644 /* 645 * This creates a snapshot under '.zfs/snapshot'. 646 */ 647 /* ARGSUSED */ 648 static int 649 zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, 650 cred_t *cr, caller_context_t *cc, int flags, vsecattr_t *vsecp) 651 { 652 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; 653 char name[MAXNAMELEN]; 654 int err; 655 static enum symfollow follow = NO_FOLLOW; 656 static enum uio_seg seg = UIO_SYSSPACE; 657 658 dmu_objset_name(zfsvfs->z_os, name); 659 660 *vpp = NULL; 661 662 err = zfs_secpolicy_snapshot_perms(name, cr); 663 if (err) 664 return (err); 665 666 if (err == 0) { 667 err = dmu_objset_snapshot(name, dirname, B_FALSE); 668 if (err) 669 return (err); 670 err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp); 671 } 672 673 return (err); 674 } 675 676 /* 677 * Lookup entry point for the 'snapshot' directory. Try to open the 678 * snapshot if it exist, creating the pseudo filesystem vnode as necessary. 679 * Perform a mount of the associated dataset on top of the vnode. 680 */ 681 /* ARGSUSED */ 682 static int 683 zfsctl_snapdir_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, 684 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 685 int *direntflags, pathname_t *realpnp) 686 { 687 zfsctl_snapdir_t *sdp = dvp->v_data; 688 objset_t *snap; 689 char snapname[MAXNAMELEN]; 690 char real[MAXNAMELEN]; 691 char *mountpoint; 692 zfs_snapentry_t *sep, search; 693 struct mounta margs; 694 vfs_t *vfsp; 695 size_t mountpoint_len; 696 avl_index_t where; 697 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; 698 int err; 699 700 /* 701 * No extended attributes allowed under .zfs 702 */ 703 if (flags & LOOKUP_XATTR) 704 return (EINVAL); 705 706 ASSERT(dvp->v_type == VDIR); 707 708 if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) 709 return (0); 710 711 /* 712 * If we get a recursive call, that means we got called 713 * from the domount() code while it was trying to look up the 714 * spec (which looks like a local path for zfs). We need to 715 * add some flag to domount() to tell it not to do this lookup. 716 */ 717 if (MUTEX_HELD(&sdp->sd_lock)) 718 return (ENOENT); 719 720 ZFS_ENTER(zfsvfs); 721 722 if (flags & FIGNORECASE) { 723 boolean_t conflict = B_FALSE; 724 725 err = dmu_snapshot_realname(zfsvfs->z_os, nm, real, 726 MAXNAMELEN, &conflict); 727 if (err == 0) { 728 nm = real; 729 } else if (err != ENOTSUP) { 730 ZFS_EXIT(zfsvfs); 731 return (err); 732 } 733 if (realpnp) 734 (void) strlcpy(realpnp->pn_buf, nm, 735 realpnp->pn_bufsize); 736 if (conflict && direntflags) 737 *direntflags = ED_CASE_CONFLICT; 738 } 739 740 mutex_enter(&sdp->sd_lock); 741 search.se_name = (char *)nm; 742 if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) { 743 *vpp = sep->se_root; 744 VN_HOLD(*vpp); 745 err = traverse(vpp); 746 if (err) { 747 VN_RELE(*vpp); 748 *vpp = NULL; 749 } else if (*vpp == sep->se_root) { 750 /* 751 * The snapshot was unmounted behind our backs, 752 * try to remount it. 753 */ 754 goto domount; 755 } else { 756 /* 757 * VROOT was set during the traverse call. We need 758 * to clear it since we're pretending to be part 759 * of our parent's vfs. 760 */ 761 (*vpp)->v_flag &= ~VROOT; 762 } 763 mutex_exit(&sdp->sd_lock); 764 ZFS_EXIT(zfsvfs); 765 return (err); 766 } 767 768 /* 769 * The requested snapshot is not currently mounted, look it up. 770 */ 771 err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname); 772 if (err) { 773 mutex_exit(&sdp->sd_lock); 774 ZFS_EXIT(zfsvfs); 775 return (err); 776 } 777 if (dmu_objset_open(snapname, DMU_OST_ZFS, 778 DS_MODE_STANDARD | DS_MODE_READONLY, &snap) != 0) { 779 mutex_exit(&sdp->sd_lock); 780 ZFS_EXIT(zfsvfs); 781 return (ENOENT); 782 } 783 784 sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP); 785 sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP); 786 (void) strcpy(sep->se_name, nm); 787 *vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap)); 788 avl_insert(&sdp->sd_snaps, sep, where); 789 790 dmu_objset_close(snap); 791 domount: 792 mountpoint_len = strlen(refstr_value(dvp->v_vfsp->vfs_mntpt)) + 793 strlen("/.zfs/snapshot/") + strlen(nm) + 1; 794 mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP); 795 (void) snprintf(mountpoint, mountpoint_len, "%s/.zfs/snapshot/%s", 796 refstr_value(dvp->v_vfsp->vfs_mntpt), nm); 797 798 margs.spec = snapname; 799 margs.dir = mountpoint; 800 margs.flags = MS_SYSSPACE | MS_NOMNTTAB; 801 margs.fstype = "zfs"; 802 margs.dataptr = NULL; 803 margs.datalen = 0; 804 margs.optptr = NULL; 805 margs.optlen = 0; 806 807 err = domount("zfs", &margs, *vpp, kcred, &vfsp); 808 kmem_free(mountpoint, mountpoint_len); 809 810 if (err == 0) { 811 /* 812 * Return the mounted root rather than the covered mount point. 813 * Takes the GFS vnode at .zfs/snapshot/<snapname> and returns 814 * the ZFS vnode mounted on top of the GFS node. This ZFS 815 * vnode is the root the newly created vfsp. 816 */ 817 VFS_RELE(vfsp); 818 err = traverse(vpp); 819 } 820 821 if (err == 0) { 822 /* 823 * Fix up the root vnode mounted on .zfs/snapshot/<snapname>. 824 * 825 * This is where we lie about our v_vfsp in order to 826 * make .zfs/snapshot/<snapname> accessible over NFS 827 * without requiring manual mounts of <snapname>. 828 */ 829 ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs); 830 VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs; 831 (*vpp)->v_vfsp = zfsvfs->z_vfs; 832 (*vpp)->v_flag &= ~VROOT; 833 } 834 mutex_exit(&sdp->sd_lock); 835 ZFS_EXIT(zfsvfs); 836 837 /* 838 * If we had an error, drop our hold on the vnode and 839 * zfsctl_snapshot_inactive() will clean up. 840 */ 841 if (err) { 842 VN_RELE(*vpp); 843 *vpp = NULL; 844 } 845 return (err); 846 } 847 848 /* ARGSUSED */ 849 static int 850 zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp, 851 offset_t *offp, offset_t *nextp, void *data, int flags) 852 { 853 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 854 char snapname[MAXNAMELEN]; 855 uint64_t id, cookie; 856 boolean_t case_conflict; 857 int error; 858 859 ZFS_ENTER(zfsvfs); 860 861 cookie = *offp; 862 error = dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id, 863 &cookie, &case_conflict); 864 if (error) { 865 ZFS_EXIT(zfsvfs); 866 if (error == ENOENT) { 867 *eofp = 1; 868 return (0); 869 } 870 return (error); 871 } 872 873 if (flags & V_RDDIR_ENTFLAGS) { 874 edirent_t *eodp = dp; 875 876 (void) strcpy(eodp->ed_name, snapname); 877 eodp->ed_ino = ZFSCTL_INO_SNAP(id); 878 eodp->ed_eflags = case_conflict ? ED_CASE_CONFLICT : 0; 879 } else { 880 struct dirent64 *odp = dp; 881 882 (void) strcpy(odp->d_name, snapname); 883 odp->d_ino = ZFSCTL_INO_SNAP(id); 884 } 885 *nextp = cookie; 886 887 ZFS_EXIT(zfsvfs); 888 889 return (0); 890 } 891 892 /* 893 * pvp is the '.zfs' directory (zfsctl_node_t). 894 * Creates vp, which is '.zfs/snapshot' (zfsctl_snapdir_t). 895 * 896 * This function is the callback to create a GFS vnode for '.zfs/snapshot' 897 * when a lookup is performed on .zfs for "snapshot". 898 */ 899 vnode_t * 900 zfsctl_mknode_snapdir(vnode_t *pvp) 901 { 902 vnode_t *vp; 903 zfsctl_snapdir_t *sdp; 904 905 vp = gfs_dir_create(sizeof (zfsctl_snapdir_t), pvp, 906 zfsctl_ops_snapdir, NULL, NULL, MAXNAMELEN, 907 zfsctl_snapdir_readdir_cb, NULL); 908 sdp = vp->v_data; 909 sdp->sd_node.zc_id = ZFSCTL_INO_SNAPDIR; 910 sdp->sd_node.zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime; 911 mutex_init(&sdp->sd_lock, NULL, MUTEX_DEFAULT, NULL); 912 avl_create(&sdp->sd_snaps, snapentry_compare, 913 sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node)); 914 return (vp); 915 } 916 917 /* ARGSUSED */ 918 static int 919 zfsctl_snapdir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 920 caller_context_t *ct) 921 { 922 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 923 zfsctl_snapdir_t *sdp = vp->v_data; 924 925 ZFS_ENTER(zfsvfs); 926 zfsctl_common_getattr(vp, vap); 927 vap->va_nodeid = gfs_file_inode(vp); 928 vap->va_nlink = vap->va_size = avl_numnodes(&sdp->sd_snaps) + 2; 929 ZFS_EXIT(zfsvfs); 930 931 return (0); 932 } 933 934 /* ARGSUSED */ 935 static void 936 zfsctl_snapdir_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 937 { 938 zfsctl_snapdir_t *sdp = vp->v_data; 939 void *private; 940 941 private = gfs_dir_inactive(vp); 942 if (private != NULL) { 943 ASSERT(avl_numnodes(&sdp->sd_snaps) == 0); 944 mutex_destroy(&sdp->sd_lock); 945 avl_destroy(&sdp->sd_snaps); 946 kmem_free(private, sizeof (zfsctl_snapdir_t)); 947 } 948 } 949 950 static const fs_operation_def_t zfsctl_tops_snapdir[] = { 951 { VOPNAME_OPEN, { .vop_open = zfsctl_common_open } }, 952 { VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } }, 953 { VOPNAME_IOCTL, { .error = fs_inval } }, 954 { VOPNAME_GETATTR, { .vop_getattr = zfsctl_snapdir_getattr } }, 955 { VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } }, 956 { VOPNAME_RENAME, { .vop_rename = zfsctl_snapdir_rename } }, 957 { VOPNAME_RMDIR, { .vop_rmdir = zfsctl_snapdir_remove } }, 958 { VOPNAME_MKDIR, { .vop_mkdir = zfsctl_snapdir_mkdir } }, 959 { VOPNAME_READDIR, { .vop_readdir = gfs_vop_readdir } }, 960 { VOPNAME_LOOKUP, { .vop_lookup = zfsctl_snapdir_lookup } }, 961 { VOPNAME_SEEK, { .vop_seek = fs_seek } }, 962 { VOPNAME_INACTIVE, { .vop_inactive = zfsctl_snapdir_inactive } }, 963 { VOPNAME_FID, { .vop_fid = zfsctl_common_fid } }, 964 { NULL } 965 }; 966 967 /* 968 * pvp is the GFS vnode '.zfs/snapshot'. 969 * 970 * This creates a GFS node under '.zfs/snapshot' representing each 971 * snapshot. This newly created GFS node is what we mount snapshot 972 * vfs_t's ontop of. 973 */ 974 static vnode_t * 975 zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset) 976 { 977 vnode_t *vp; 978 zfsctl_node_t *zcp; 979 980 vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp, 981 zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL); 982 zcp = vp->v_data; 983 zcp->zc_id = objset; 984 VFS_HOLD(vp->v_vfsp); 985 986 return (vp); 987 } 988 989 static void 990 zfsctl_snapshot_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 991 { 992 zfsctl_snapdir_t *sdp; 993 zfs_snapentry_t *sep, *next; 994 vnode_t *dvp; 995 996 VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0); 997 sdp = dvp->v_data; 998 999 mutex_enter(&sdp->sd_lock); 1000 1001 if (vp->v_count > 1) { 1002 mutex_exit(&sdp->sd_lock); 1003 return; 1004 } 1005 ASSERT(!vn_ismntpt(vp)); 1006 1007 sep = avl_first(&sdp->sd_snaps); 1008 while (sep != NULL) { 1009 next = AVL_NEXT(&sdp->sd_snaps, sep); 1010 1011 if (sep->se_root == vp) { 1012 avl_remove(&sdp->sd_snaps, sep); 1013 kmem_free(sep->se_name, strlen(sep->se_name) + 1); 1014 kmem_free(sep, sizeof (zfs_snapentry_t)); 1015 break; 1016 } 1017 sep = next; 1018 } 1019 ASSERT(sep != NULL); 1020 1021 mutex_exit(&sdp->sd_lock); 1022 VN_RELE(dvp); 1023 VFS_RELE(vp->v_vfsp); 1024 1025 /* 1026 * Dispose of the vnode for the snapshot mount point. 1027 * This is safe to do because once this entry has been removed 1028 * from the AVL tree, it can't be found again, so cannot become 1029 * "active". If we lookup the same name again we will end up 1030 * creating a new vnode. 1031 */ 1032 gfs_vop_inactive(vp, cr, ct); 1033 } 1034 1035 1036 /* 1037 * These VP's should never see the light of day. They should always 1038 * be covered. 1039 */ 1040 static const fs_operation_def_t zfsctl_tops_snapshot[] = { 1041 VOPNAME_INACTIVE, { .vop_inactive = zfsctl_snapshot_inactive }, 1042 NULL, NULL 1043 }; 1044 1045 int 1046 zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp) 1047 { 1048 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1049 vnode_t *dvp, *vp; 1050 zfsctl_snapdir_t *sdp; 1051 zfsctl_node_t *zcp; 1052 zfs_snapentry_t *sep; 1053 int error; 1054 1055 ASSERT(zfsvfs->z_ctldir != NULL); 1056 error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp, 1057 NULL, 0, NULL, kcred, NULL, NULL, NULL); 1058 if (error != 0) 1059 return (error); 1060 sdp = dvp->v_data; 1061 1062 mutex_enter(&sdp->sd_lock); 1063 sep = avl_first(&sdp->sd_snaps); 1064 while (sep != NULL) { 1065 vp = sep->se_root; 1066 zcp = vp->v_data; 1067 if (zcp->zc_id == objsetid) 1068 break; 1069 1070 sep = AVL_NEXT(&sdp->sd_snaps, sep); 1071 } 1072 1073 if (sep != NULL) { 1074 VN_HOLD(vp); 1075 /* 1076 * Return the mounted root rather than the covered mount point. 1077 * Takes the GFS vnode at .zfs/snapshot/<snapshot objsetid> 1078 * and returns the ZFS vnode mounted on top of the GFS node. 1079 * This ZFS vnode is the root of the vfs for objset 'objsetid'. 1080 */ 1081 error = traverse(&vp); 1082 if (error == 0) { 1083 if (vp == sep->se_root) 1084 error = EINVAL; 1085 else 1086 *zfsvfsp = VTOZ(vp)->z_zfsvfs; 1087 } 1088 mutex_exit(&sdp->sd_lock); 1089 VN_RELE(vp); 1090 } else { 1091 error = EINVAL; 1092 mutex_exit(&sdp->sd_lock); 1093 } 1094 1095 VN_RELE(dvp); 1096 1097 return (error); 1098 } 1099 1100 /* 1101 * Unmount any snapshots for the given filesystem. This is called from 1102 * zfs_umount() - if we have a ctldir, then go through and unmount all the 1103 * snapshots. 1104 */ 1105 int 1106 zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr) 1107 { 1108 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1109 vnode_t *dvp; 1110 zfsctl_snapdir_t *sdp; 1111 zfs_snapentry_t *sep, *next; 1112 int error; 1113 1114 ASSERT(zfsvfs->z_ctldir != NULL); 1115 error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp, 1116 NULL, 0, NULL, cr, NULL, NULL, NULL); 1117 if (error != 0) 1118 return (error); 1119 sdp = dvp->v_data; 1120 1121 mutex_enter(&sdp->sd_lock); 1122 1123 sep = avl_first(&sdp->sd_snaps); 1124 while (sep != NULL) { 1125 next = AVL_NEXT(&sdp->sd_snaps, sep); 1126 1127 /* 1128 * If this snapshot is not mounted, then it must 1129 * have just been unmounted by somebody else, and 1130 * will be cleaned up by zfsctl_snapdir_inactive(). 1131 */ 1132 if (vn_ismntpt(sep->se_root)) { 1133 avl_remove(&sdp->sd_snaps, sep); 1134 error = zfsctl_unmount_snap(sep, fflags, cr); 1135 if (error) { 1136 avl_add(&sdp->sd_snaps, sep); 1137 break; 1138 } 1139 } 1140 sep = next; 1141 } 1142 1143 mutex_exit(&sdp->sd_lock); 1144 VN_RELE(dvp); 1145 1146 return (error); 1147 } 1148