1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * 24 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 25 * Copyright (C) 2011 Lawrence Livermore National Security, LLC. 26 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 27 * LLNL-CODE-403049. 28 * Rewritten for Linux by: 29 * Rohan Puri <rohan.puri15@gmail.com> 30 * Brian Behlendorf <behlendorf1@llnl.gov> 31 * Copyright (c) 2013 by Delphix. All rights reserved. 32 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved. 33 * Copyright (c) 2018 George Melikov. All Rights Reserved. 34 * Copyright (c) 2019 Datto, Inc. All rights reserved. 35 * Copyright (c) 2020 The MathWorks, Inc. All rights reserved. 36 */ 37 38 /* 39 * ZFS control directory (a.k.a. ".zfs") 40 * 41 * This directory provides a common location for all ZFS meta-objects. 42 * Currently, this is only the 'snapshot' and 'shares' directory, but this may 43 * expand in the future. The elements are built dynamically, as the hierarchy 44 * does not actually exist on disk. 45 * 46 * For 'snapshot', we don't want to have all snapshots always mounted, because 47 * this would take up a huge amount of space in /etc/mnttab. We have three 48 * types of objects: 49 * 50 * ctldir ------> snapshotdir -------> snapshot 51 * | 52 * | 53 * V 54 * mounted fs 55 * 56 * The 'snapshot' node contains just enough information to lookup '..' and act 57 * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we 58 * perform an automount of the underlying filesystem and return the 59 * corresponding inode. 60 * 61 * All mounts are handled automatically by an user mode helper which invokes 62 * the mount procedure. Unmounts are handled by allowing the mount 63 * point to expire so the kernel may automatically unmount it. 64 * 65 * The '.zfs', '.zfs/snapshot', and all directories created under 66 * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same 67 * zfsvfs_t as the head filesystem (what '.zfs' lives under). 68 * 69 * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths 70 * (ie: snapshots) are complete ZFS filesystems and have their own unique 71 * zfsvfs_t. However, the fsid reported by these mounts will be the same 72 * as that used by the parent zfsvfs_t to make NFS happy. 73 */ 74 75 #include <sys/types.h> 76 #include <sys/param.h> 77 #include <sys/time.h> 78 #include <sys/sysmacros.h> 79 #include <sys/pathname.h> 80 #include <sys/vfs.h> 81 #include <sys/zfs_ctldir.h> 82 #include <sys/zfs_ioctl.h> 83 #include <sys/zfs_vfsops.h> 84 #include <sys/zfs_vnops.h> 85 #include <sys/stat.h> 86 #include <sys/dmu.h> 87 #include <sys/dmu_objset.h> 88 #include <sys/dsl_destroy.h> 89 #include <sys/dsl_deleg.h> 90 #include <sys/zpl.h> 91 #include <sys/mntent.h> 92 #include "zfs_namecheck.h" 93 94 /* 95 * Two AVL trees are maintained which contain all currently automounted 96 * snapshots. Every automounted snapshots maps to a single zfs_snapentry_t 97 * entry which MUST: 98 * 99 * - be attached to both trees, and 100 * - be unique, no duplicate entries are allowed. 101 * 102 * The zfs_snapshots_by_name tree is indexed by the full dataset name 103 * while the zfs_snapshots_by_objsetid tree is indexed by the unique 104 * objsetid. This allows for fast lookups either by name or objsetid. 105 */ 106 static avl_tree_t zfs_snapshots_by_name; 107 static avl_tree_t zfs_snapshots_by_objsetid; 108 static krwlock_t zfs_snapshot_lock; 109 110 /* 111 * Control Directory Tunables (.zfs) 112 */ 113 int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT; 114 static int zfs_admin_snapshot = 0; 115 static int zfs_snapshot_no_setuid = 0; 116 117 typedef struct { 118 char *se_name; /* full snapshot name */ 119 char *se_path; /* full mount path */ 120 spa_t *se_spa; /* pool spa */ 121 uint64_t se_objsetid; /* snapshot objset id */ 122 struct dentry *se_root_dentry; /* snapshot root dentry */ 123 krwlock_t se_taskqid_lock; /* scheduled unmount taskqid lock */ 124 taskqid_t se_taskqid; /* scheduled unmount taskqid */ 125 avl_node_t se_node_name; /* zfs_snapshots_by_name link */ 126 avl_node_t se_node_objsetid; /* zfs_snapshots_by_objsetid link */ 127 zfs_refcount_t se_refcount; /* reference count */ 128 } zfs_snapentry_t; 129 130 static void zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay); 131 132 /* 133 * Allocate a new zfs_snapentry_t being careful to make a copy of the 134 * the snapshot name and provided mount point. No reference is taken. 135 */ 136 static zfs_snapentry_t * 137 zfsctl_snapshot_alloc(const char *full_name, const char *full_path, spa_t *spa, 138 uint64_t objsetid, struct dentry *root_dentry) 139 { 140 zfs_snapentry_t *se; 141 142 se = kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP); 143 144 se->se_name = kmem_strdup(full_name); 145 se->se_path = kmem_strdup(full_path); 146 se->se_spa = spa; 147 se->se_objsetid = objsetid; 148 se->se_root_dentry = root_dentry; 149 se->se_taskqid = TASKQID_INVALID; 150 rw_init(&se->se_taskqid_lock, NULL, RW_DEFAULT, NULL); 151 152 zfs_refcount_create(&se->se_refcount); 153 154 return (se); 155 } 156 157 /* 158 * Free a zfs_snapentry_t the caller must ensure there are no active 159 * references. 160 */ 161 static void 162 zfsctl_snapshot_free(zfs_snapentry_t *se) 163 { 164 zfs_refcount_destroy(&se->se_refcount); 165 kmem_strfree(se->se_name); 166 kmem_strfree(se->se_path); 167 rw_destroy(&se->se_taskqid_lock); 168 169 kmem_free(se, sizeof (zfs_snapentry_t)); 170 } 171 172 /* 173 * Hold a reference on the zfs_snapentry_t. 174 */ 175 static void 176 zfsctl_snapshot_hold(zfs_snapentry_t *se) 177 { 178 zfs_refcount_add(&se->se_refcount, NULL); 179 } 180 181 /* 182 * Release a reference on the zfs_snapentry_t. When the number of 183 * references drops to zero the structure will be freed. 184 */ 185 static void 186 zfsctl_snapshot_rele(zfs_snapentry_t *se) 187 { 188 if (zfs_refcount_remove(&se->se_refcount, NULL) == 0) 189 zfsctl_snapshot_free(se); 190 } 191 192 /* 193 * Add a zfs_snapentry_t to both the zfs_snapshots_by_name and 194 * zfs_snapshots_by_objsetid trees. While the zfs_snapentry_t is part 195 * of the trees a reference is held. 196 */ 197 static void 198 zfsctl_snapshot_add(zfs_snapentry_t *se) 199 { 200 ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); 201 zfsctl_snapshot_hold(se); 202 avl_add(&zfs_snapshots_by_name, se); 203 avl_add(&zfs_snapshots_by_objsetid, se); 204 } 205 206 /* 207 * Remove a zfs_snapentry_t from both the zfs_snapshots_by_name and 208 * zfs_snapshots_by_objsetid trees. Upon removal a reference is dropped, 209 * this can result in the structure being freed if that was the last 210 * remaining reference. 211 */ 212 static void 213 zfsctl_snapshot_remove(zfs_snapentry_t *se) 214 { 215 ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); 216 avl_remove(&zfs_snapshots_by_name, se); 217 avl_remove(&zfs_snapshots_by_objsetid, se); 218 zfsctl_snapshot_rele(se); 219 } 220 221 /* 222 * Snapshot name comparison function for the zfs_snapshots_by_name. 223 */ 224 static int 225 snapentry_compare_by_name(const void *a, const void *b) 226 { 227 const zfs_snapentry_t *se_a = a; 228 const zfs_snapentry_t *se_b = b; 229 int ret; 230 231 ret = strcmp(se_a->se_name, se_b->se_name); 232 233 if (ret < 0) 234 return (-1); 235 else if (ret > 0) 236 return (1); 237 else 238 return (0); 239 } 240 241 /* 242 * Snapshot name comparison function for the zfs_snapshots_by_objsetid. 243 */ 244 static int 245 snapentry_compare_by_objsetid(const void *a, const void *b) 246 { 247 const zfs_snapentry_t *se_a = a; 248 const zfs_snapentry_t *se_b = b; 249 250 if (se_a->se_spa != se_b->se_spa) 251 return ((ulong_t)se_a->se_spa < (ulong_t)se_b->se_spa ? -1 : 1); 252 253 if (se_a->se_objsetid < se_b->se_objsetid) 254 return (-1); 255 else if (se_a->se_objsetid > se_b->se_objsetid) 256 return (1); 257 else 258 return (0); 259 } 260 261 /* 262 * Find a zfs_snapentry_t in zfs_snapshots_by_name. If the snapname 263 * is found a pointer to the zfs_snapentry_t is returned and a reference 264 * taken on the structure. The caller is responsible for dropping the 265 * reference with zfsctl_snapshot_rele(). If the snapname is not found 266 * NULL will be returned. 267 */ 268 static zfs_snapentry_t * 269 zfsctl_snapshot_find_by_name(const char *snapname) 270 { 271 zfs_snapentry_t *se, search; 272 273 ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock)); 274 275 search.se_name = (char *)snapname; 276 se = avl_find(&zfs_snapshots_by_name, &search, NULL); 277 if (se) 278 zfsctl_snapshot_hold(se); 279 280 return (se); 281 } 282 283 /* 284 * Find a zfs_snapentry_t in zfs_snapshots_by_objsetid given the objset id 285 * rather than the snapname. In all other respects it behaves the same 286 * as zfsctl_snapshot_find_by_name(). 287 */ 288 static zfs_snapentry_t * 289 zfsctl_snapshot_find_by_objsetid(spa_t *spa, uint64_t objsetid) 290 { 291 zfs_snapentry_t *se, search; 292 293 ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock)); 294 295 search.se_spa = spa; 296 search.se_objsetid = objsetid; 297 se = avl_find(&zfs_snapshots_by_objsetid, &search, NULL); 298 if (se) 299 zfsctl_snapshot_hold(se); 300 301 return (se); 302 } 303 304 /* 305 * Rename a zfs_snapentry_t in the zfs_snapshots_by_name. The structure is 306 * removed, renamed, and added back to the new correct location in the tree. 307 */ 308 static int 309 zfsctl_snapshot_rename(const char *old_snapname, const char *new_snapname) 310 { 311 zfs_snapentry_t *se; 312 313 ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); 314 315 se = zfsctl_snapshot_find_by_name(old_snapname); 316 if (se == NULL) 317 return (SET_ERROR(ENOENT)); 318 319 zfsctl_snapshot_remove(se); 320 kmem_strfree(se->se_name); 321 se->se_name = kmem_strdup(new_snapname); 322 zfsctl_snapshot_add(se); 323 zfsctl_snapshot_rele(se); 324 325 return (0); 326 } 327 328 /* 329 * Delayed task responsible for unmounting an expired automounted snapshot. 330 */ 331 static void 332 snapentry_expire(void *data) 333 { 334 zfs_snapentry_t *se = (zfs_snapentry_t *)data; 335 spa_t *spa = se->se_spa; 336 uint64_t objsetid = se->se_objsetid; 337 338 if (zfs_expire_snapshot <= 0) { 339 zfsctl_snapshot_rele(se); 340 return; 341 } 342 343 rw_enter(&se->se_taskqid_lock, RW_WRITER); 344 se->se_taskqid = TASKQID_INVALID; 345 rw_exit(&se->se_taskqid_lock); 346 (void) zfsctl_snapshot_unmount(se->se_name, MNT_EXPIRE); 347 zfsctl_snapshot_rele(se); 348 349 /* 350 * Reschedule the unmount if the zfs_snapentry_t wasn't removed. 351 * This can occur when the snapshot is busy. 352 */ 353 rw_enter(&zfs_snapshot_lock, RW_READER); 354 if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) { 355 zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot); 356 zfsctl_snapshot_rele(se); 357 } 358 rw_exit(&zfs_snapshot_lock); 359 } 360 361 /* 362 * Cancel an automatic unmount of a snapname. This callback is responsible 363 * for dropping the reference on the zfs_snapentry_t which was taken when 364 * during dispatch. 365 */ 366 static void 367 zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se) 368 { 369 int err = 0; 370 rw_enter(&se->se_taskqid_lock, RW_WRITER); 371 err = taskq_cancel_id(system_delay_taskq, se->se_taskqid); 372 /* 373 * if we get ENOENT, the taskq couldn't be found to be 374 * canceled, so we can just mark it as invalid because 375 * it's already gone. If we got EBUSY, then we already 376 * blocked until it was gone _anyway_, so we don't care. 377 */ 378 se->se_taskqid = TASKQID_INVALID; 379 rw_exit(&se->se_taskqid_lock); 380 if (err == 0) { 381 zfsctl_snapshot_rele(se); 382 } 383 } 384 385 /* 386 * Dispatch the unmount task for delayed handling with a hold protecting it. 387 */ 388 static void 389 zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay) 390 { 391 392 if (delay <= 0) 393 return; 394 395 zfsctl_snapshot_hold(se); 396 rw_enter(&se->se_taskqid_lock, RW_WRITER); 397 /* 398 * If this condition happens, we managed to: 399 * - dispatch once 400 * - want to dispatch _again_ before it returned 401 * 402 * So let's just return - if that task fails at unmounting, 403 * we'll eventually dispatch again, and if it succeeds, 404 * no problem. 405 */ 406 if (se->se_taskqid != TASKQID_INVALID) { 407 rw_exit(&se->se_taskqid_lock); 408 zfsctl_snapshot_rele(se); 409 return; 410 } 411 se->se_taskqid = taskq_dispatch_delay(system_delay_taskq, 412 snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ); 413 rw_exit(&se->se_taskqid_lock); 414 } 415 416 /* 417 * Schedule an automatic unmount of objset id to occur in delay seconds from 418 * now. Any previous delayed unmount will be cancelled in favor of the 419 * updated deadline. A reference is taken by zfsctl_snapshot_find_by_name() 420 * and held until the outstanding task is handled or cancelled. 421 */ 422 int 423 zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid, int delay) 424 { 425 zfs_snapentry_t *se; 426 int error = ENOENT; 427 428 rw_enter(&zfs_snapshot_lock, RW_READER); 429 if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) { 430 zfsctl_snapshot_unmount_cancel(se); 431 zfsctl_snapshot_unmount_delay_impl(se, delay); 432 zfsctl_snapshot_rele(se); 433 error = 0; 434 } 435 rw_exit(&zfs_snapshot_lock); 436 437 return (error); 438 } 439 440 /* 441 * Check if snapname is currently mounted. Returned non-zero when mounted 442 * and zero when unmounted. 443 */ 444 static boolean_t 445 zfsctl_snapshot_ismounted(const char *snapname) 446 { 447 zfs_snapentry_t *se; 448 boolean_t ismounted = B_FALSE; 449 450 rw_enter(&zfs_snapshot_lock, RW_READER); 451 if ((se = zfsctl_snapshot_find_by_name(snapname)) != NULL) { 452 zfsctl_snapshot_rele(se); 453 ismounted = B_TRUE; 454 } 455 rw_exit(&zfs_snapshot_lock); 456 457 return (ismounted); 458 } 459 460 /* 461 * Check if the given inode is a part of the virtual .zfs directory. 462 */ 463 boolean_t 464 zfsctl_is_node(struct inode *ip) 465 { 466 return (ITOZ(ip)->z_is_ctldir); 467 } 468 469 /* 470 * Check if the given inode is a .zfs/snapshots/snapname directory. 471 */ 472 boolean_t 473 zfsctl_is_snapdir(struct inode *ip) 474 { 475 return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS)); 476 } 477 478 /* 479 * Allocate a new inode with the passed id and ops. 480 */ 481 static struct inode * 482 zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, 483 const struct file_operations *fops, const struct inode_operations *ops, 484 uint64_t creation) 485 { 486 struct inode *ip; 487 znode_t *zp; 488 inode_timespec_t now = {.tv_sec = creation}; 489 490 ip = new_inode(zfsvfs->z_sb); 491 if (ip == NULL) 492 return (NULL); 493 494 if (!creation) 495 now = current_time(ip); 496 zp = ITOZ(ip); 497 ASSERT0P(zp->z_dirlocks); 498 ASSERT0P(zp->z_acl_cached); 499 ASSERT0P(zp->z_xattr_cached); 500 zp->z_id = id; 501 zp->z_unlinked = B_FALSE; 502 zp->z_atime_dirty = B_FALSE; 503 zp->z_zn_prefetch = B_FALSE; 504 zp->z_is_sa = B_FALSE; 505 zp->z_is_ctldir = B_TRUE; 506 zp->z_sa_hdl = NULL; 507 zp->z_blksz = 0; 508 zp->z_seq = 0; 509 zp->z_mapcnt = 0; 510 zp->z_size = 0; 511 zp->z_pflags = 0; 512 zp->z_mode = 0; 513 zp->z_sync_cnt = 0; 514 ip->i_generation = 0; 515 ip->i_ino = id; 516 ip->i_mode = (S_IFDIR | S_IRWXUGO); 517 ip->i_uid = SUID_TO_KUID(0); 518 ip->i_gid = SGID_TO_KGID(0); 519 ip->i_blkbits = SPA_MINBLOCKSHIFT; 520 zpl_inode_set_atime_to_ts(ip, now); 521 zpl_inode_set_mtime_to_ts(ip, now); 522 zpl_inode_set_ctime_to_ts(ip, now); 523 ip->i_fop = fops; 524 ip->i_op = ops; 525 #if defined(IOP_XATTR) 526 ip->i_opflags &= ~IOP_XATTR; 527 #endif 528 529 if (insert_inode_locked(ip)) { 530 unlock_new_inode(ip); 531 iput(ip); 532 return (NULL); 533 } 534 535 mutex_enter(&zfsvfs->z_znodes_lock); 536 list_insert_tail(&zfsvfs->z_all_znodes, zp); 537 membar_producer(); 538 mutex_exit(&zfsvfs->z_znodes_lock); 539 540 unlock_new_inode(ip); 541 542 return (ip); 543 } 544 545 /* 546 * Lookup the inode with given id, it will be allocated if needed. 547 */ 548 static struct inode * 549 zfsctl_inode_lookup(zfsvfs_t *zfsvfs, uint64_t id, 550 const struct file_operations *fops, const struct inode_operations *ops) 551 { 552 struct inode *ip = NULL; 553 uint64_t creation = 0; 554 dsl_dataset_t *snap_ds; 555 dsl_pool_t *pool; 556 557 while (ip == NULL) { 558 ip = ilookup(zfsvfs->z_sb, (unsigned long)id); 559 if (ip) 560 break; 561 562 if (id <= ZFSCTL_INO_SNAPDIRS && !creation) { 563 pool = dmu_objset_pool(zfsvfs->z_os); 564 dsl_pool_config_enter(pool, FTAG); 565 if (!dsl_dataset_hold_obj(pool, 566 ZFSCTL_INO_SNAPDIRS - id, FTAG, &snap_ds)) { 567 creation = dsl_get_creation(snap_ds); 568 dsl_dataset_rele(snap_ds, FTAG); 569 } 570 dsl_pool_config_exit(pool, FTAG); 571 } 572 573 /* May fail due to concurrent zfsctl_inode_alloc() */ 574 ip = zfsctl_inode_alloc(zfsvfs, id, fops, ops, creation); 575 } 576 577 return (ip); 578 } 579 580 /* 581 * Create the '.zfs' directory. This directory is cached as part of the VFS 582 * structure. This results in a hold on the zfsvfs_t. The code in zfs_umount() 583 * therefore checks against a vfs_count of 2 instead of 1. This reference 584 * is removed when the ctldir is destroyed in the unmount. All other entities 585 * under the '.zfs' directory are created dynamically as needed. 586 * 587 * Because the dynamically created '.zfs' directory entries assume the use 588 * of 64-bit inode numbers this support must be disabled on 32-bit systems. 589 */ 590 int 591 zfsctl_create(zfsvfs_t *zfsvfs) 592 { 593 ASSERT0P(zfsvfs->z_ctldir); 594 595 zfsvfs->z_ctldir = zfsctl_inode_alloc(zfsvfs, ZFSCTL_INO_ROOT, 596 &zpl_fops_root, &zpl_ops_root, 0); 597 if (zfsvfs->z_ctldir == NULL) 598 return (SET_ERROR(ENOENT)); 599 600 return (0); 601 } 602 603 /* 604 * Destroy the '.zfs' directory or remove a snapshot from zfs_snapshots_by_name. 605 * Only called when the filesystem is unmounted. 606 */ 607 void 608 zfsctl_destroy(zfsvfs_t *zfsvfs) 609 { 610 if (zfsvfs->z_issnap) { 611 zfs_snapentry_t *se; 612 spa_t *spa = zfsvfs->z_os->os_spa; 613 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 614 615 rw_enter(&zfs_snapshot_lock, RW_WRITER); 616 se = zfsctl_snapshot_find_by_objsetid(spa, objsetid); 617 if (se != NULL) 618 zfsctl_snapshot_remove(se); 619 rw_exit(&zfs_snapshot_lock); 620 if (se != NULL) { 621 zfsctl_snapshot_unmount_cancel(se); 622 zfsctl_snapshot_rele(se); 623 } 624 } else if (zfsvfs->z_ctldir) { 625 iput(zfsvfs->z_ctldir); 626 zfsvfs->z_ctldir = NULL; 627 } 628 } 629 630 /* 631 * Given a root znode, retrieve the associated .zfs directory. 632 * Add a hold to the vnode and return it. 633 */ 634 struct inode * 635 zfsctl_root(znode_t *zp) 636 { 637 ASSERT(zfs_has_ctldir(zp)); 638 /* Must have an existing ref, so igrab() cannot return NULL */ 639 VERIFY3P(igrab(ZTOZSB(zp)->z_ctldir), !=, NULL); 640 return (ZTOZSB(zp)->z_ctldir); 641 } 642 643 /* 644 * Generate a long fid to indicate a snapdir. We encode whether snapdir is 645 * already mounted in gen field. We do this because nfsd lookup will not 646 * trigger automount. Next time the nfsd does fh_to_dentry, we will notice 647 * this and do automount and return ESTALE to force nfsd revalidate and follow 648 * mount. 649 */ 650 static int 651 zfsctl_snapdir_fid(struct inode *ip, fid_t *fidp) 652 { 653 zfid_short_t *zfid = (zfid_short_t *)fidp; 654 zfid_long_t *zlfid = (zfid_long_t *)fidp; 655 uint32_t gen = 0; 656 uint64_t object; 657 uint64_t objsetid; 658 int i; 659 struct dentry *dentry; 660 661 if (fidp->fid_len < LONG_FID_LEN) { 662 fidp->fid_len = LONG_FID_LEN; 663 return (SET_ERROR(ENOSPC)); 664 } 665 666 object = ip->i_ino; 667 objsetid = ZFSCTL_INO_SNAPDIRS - ip->i_ino; 668 zfid->zf_len = LONG_FID_LEN; 669 670 dentry = d_obtain_alias(igrab(ip)); 671 if (!IS_ERR(dentry)) { 672 gen = !!d_mountpoint(dentry); 673 dput(dentry); 674 } 675 676 for (i = 0; i < sizeof (zfid->zf_object); i++) 677 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 678 679 for (i = 0; i < sizeof (zfid->zf_gen); i++) 680 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 681 682 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 683 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 684 685 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 686 zlfid->zf_setgen[i] = 0; 687 688 return (0); 689 } 690 691 /* 692 * Generate an appropriate fid for an entry in the .zfs directory. 693 */ 694 int 695 zfsctl_fid(struct inode *ip, fid_t *fidp) 696 { 697 znode_t *zp = ITOZ(ip); 698 zfsvfs_t *zfsvfs = ITOZSB(ip); 699 uint64_t object = zp->z_id; 700 zfid_short_t *zfid; 701 int i; 702 int error; 703 704 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 705 return (error); 706 707 if (zfsctl_is_snapdir(ip)) { 708 zfs_exit(zfsvfs, FTAG); 709 return (zfsctl_snapdir_fid(ip, fidp)); 710 } 711 712 if (fidp->fid_len < SHORT_FID_LEN) { 713 fidp->fid_len = SHORT_FID_LEN; 714 zfs_exit(zfsvfs, FTAG); 715 return (SET_ERROR(ENOSPC)); 716 } 717 718 zfid = (zfid_short_t *)fidp; 719 720 zfid->zf_len = SHORT_FID_LEN; 721 722 for (i = 0; i < sizeof (zfid->zf_object); i++) 723 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 724 725 /* .zfs znodes always have a generation number of 0 */ 726 for (i = 0; i < sizeof (zfid->zf_gen); i++) 727 zfid->zf_gen[i] = 0; 728 729 zfs_exit(zfsvfs, FTAG); 730 return (0); 731 } 732 733 /* 734 * Construct a full dataset name in full_name: "pool/dataset@snap_name" 735 */ 736 static int 737 zfsctl_snapshot_name(zfsvfs_t *zfsvfs, const char *snap_name, int len, 738 char *full_name) 739 { 740 objset_t *os = zfsvfs->z_os; 741 742 if (zfs_component_namecheck(snap_name, NULL, NULL) != 0) 743 return (SET_ERROR(EILSEQ)); 744 745 dmu_objset_name(os, full_name); 746 if ((strlen(full_name) + 1 + strlen(snap_name)) >= len) 747 return (SET_ERROR(ENAMETOOLONG)); 748 749 (void) strcat(full_name, "@"); 750 (void) strcat(full_name, snap_name); 751 752 return (0); 753 } 754 755 /* 756 * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/" 757 */ 758 static int 759 zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid, 760 int path_len, char *full_path) 761 { 762 objset_t *os = zfsvfs->z_os; 763 fstrans_cookie_t cookie; 764 char *snapname; 765 boolean_t case_conflict; 766 uint64_t id, pos = 0; 767 int error = 0; 768 769 cookie = spl_fstrans_mark(); 770 snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 771 772 while (error == 0) { 773 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 774 error = dmu_snapshot_list_next(zfsvfs->z_os, 775 ZFS_MAX_DATASET_NAME_LEN, snapname, &id, &pos, 776 &case_conflict); 777 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 778 if (error) 779 goto out; 780 781 if (id == objsetid) 782 break; 783 } 784 785 mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock); 786 if (zfsvfs->z_vfs->vfs_mntpoint != NULL) { 787 snprintf(full_path, path_len, "%s/.zfs/snapshot/%s", 788 zfsvfs->z_vfs->vfs_mntpoint, snapname); 789 } else 790 error = SET_ERROR(ENOENT); 791 mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock); 792 793 out: 794 kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN); 795 spl_fstrans_unmark(cookie); 796 797 return (error); 798 } 799 800 /* 801 * Special case the handling of "..". 802 */ 803 int 804 zfsctl_root_lookup(struct inode *dip, const char *name, struct inode **ipp, 805 int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) 806 { 807 zfsvfs_t *zfsvfs = ITOZSB(dip); 808 int error = 0; 809 810 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 811 return (error); 812 813 if (zfsvfs->z_show_ctldir == ZFS_SNAPDIR_DISABLED) { 814 *ipp = NULL; 815 } else if (strcmp(name, "..") == 0) { 816 *ipp = dip->i_sb->s_root->d_inode; 817 } else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) { 818 *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIR, 819 &zpl_fops_snapdir, &zpl_ops_snapdir); 820 } else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) { 821 *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SHARES, 822 &zpl_fops_shares, &zpl_ops_shares); 823 } else { 824 *ipp = NULL; 825 } 826 827 if (*ipp == NULL) 828 error = SET_ERROR(ENOENT); 829 830 zfs_exit(zfsvfs, FTAG); 831 832 return (error); 833 } 834 835 /* 836 * Lookup entry point for the 'snapshot' directory. Try to open the 837 * snapshot if it exist, creating the pseudo filesystem inode as necessary. 838 */ 839 int 840 zfsctl_snapdir_lookup(struct inode *dip, const char *name, struct inode **ipp, 841 int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) 842 { 843 zfsvfs_t *zfsvfs = ITOZSB(dip); 844 uint64_t id; 845 int error; 846 847 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 848 return (error); 849 850 error = dmu_snapshot_lookup(zfsvfs->z_os, name, &id); 851 if (error) { 852 zfs_exit(zfsvfs, FTAG); 853 return (error); 854 } 855 856 *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIRS - id, 857 &simple_dir_operations, &simple_dir_inode_operations); 858 if (*ipp == NULL) 859 error = SET_ERROR(ENOENT); 860 861 zfs_exit(zfsvfs, FTAG); 862 863 return (error); 864 } 865 866 /* 867 * Renaming a directory under '.zfs/snapshot' will automatically trigger 868 * a rename of the snapshot to the new given name. The rename is confined 869 * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere. 870 */ 871 int 872 zfsctl_snapdir_rename(struct inode *sdip, const char *snm, 873 struct inode *tdip, const char *tnm, cred_t *cr, int flags) 874 { 875 zfsvfs_t *zfsvfs = ITOZSB(sdip); 876 char *to, *from, *real, *fsname; 877 int error; 878 879 if (!zfs_admin_snapshot) 880 return (SET_ERROR(EACCES)); 881 882 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 883 return (error); 884 885 to = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 886 from = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 887 real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 888 fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 889 890 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 891 error = dmu_snapshot_realname(zfsvfs->z_os, snm, real, 892 ZFS_MAX_DATASET_NAME_LEN, NULL); 893 if (error == 0) { 894 snm = real; 895 } else if (error != ENOTSUP) { 896 goto out; 897 } 898 } 899 900 dmu_objset_name(zfsvfs->z_os, fsname); 901 902 error = zfsctl_snapshot_name(ITOZSB(sdip), snm, 903 ZFS_MAX_DATASET_NAME_LEN, from); 904 if (error == 0) 905 error = zfsctl_snapshot_name(ITOZSB(tdip), tnm, 906 ZFS_MAX_DATASET_NAME_LEN, to); 907 if (error == 0) 908 error = zfs_secpolicy_rename_perms(from, to, cr); 909 if (error != 0) 910 goto out; 911 912 /* 913 * Cannot move snapshots out of the snapdir. 914 */ 915 if (sdip != tdip) { 916 error = SET_ERROR(EINVAL); 917 goto out; 918 } 919 920 /* 921 * No-op when names are identical. 922 */ 923 if (strcmp(snm, tnm) == 0) { 924 error = 0; 925 goto out; 926 } 927 928 rw_enter(&zfs_snapshot_lock, RW_WRITER); 929 930 error = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE); 931 if (error == 0) 932 (void) zfsctl_snapshot_rename(snm, tnm); 933 934 rw_exit(&zfs_snapshot_lock); 935 out: 936 kmem_free(from, ZFS_MAX_DATASET_NAME_LEN); 937 kmem_free(to, ZFS_MAX_DATASET_NAME_LEN); 938 kmem_free(real, ZFS_MAX_DATASET_NAME_LEN); 939 kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN); 940 941 zfs_exit(zfsvfs, FTAG); 942 943 return (error); 944 } 945 946 /* 947 * Removing a directory under '.zfs/snapshot' will automatically trigger 948 * the removal of the snapshot with the given name. 949 */ 950 int 951 zfsctl_snapdir_remove(struct inode *dip, const char *name, cred_t *cr, 952 int flags) 953 { 954 zfsvfs_t *zfsvfs = ITOZSB(dip); 955 char *snapname, *real; 956 int error; 957 958 if (!zfs_admin_snapshot) 959 return (SET_ERROR(EACCES)); 960 961 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 962 return (error); 963 964 snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 965 real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 966 967 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 968 error = dmu_snapshot_realname(zfsvfs->z_os, name, real, 969 ZFS_MAX_DATASET_NAME_LEN, NULL); 970 if (error == 0) { 971 name = real; 972 } else if (error != ENOTSUP) { 973 goto out; 974 } 975 } 976 977 error = zfsctl_snapshot_name(ITOZSB(dip), name, 978 ZFS_MAX_DATASET_NAME_LEN, snapname); 979 if (error == 0) 980 error = zfs_secpolicy_destroy_perms(snapname, cr); 981 if (error != 0) 982 goto out; 983 984 error = zfsctl_snapshot_unmount(snapname, MNT_FORCE); 985 if ((error == 0) || (error == ENOENT)) 986 error = dsl_destroy_snapshot(snapname, B_FALSE); 987 out: 988 kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN); 989 kmem_free(real, ZFS_MAX_DATASET_NAME_LEN); 990 991 zfs_exit(zfsvfs, FTAG); 992 993 return (error); 994 } 995 996 /* 997 * Creating a directory under '.zfs/snapshot' will automatically trigger 998 * the creation of a new snapshot with the given name. 999 */ 1000 int 1001 zfsctl_snapdir_mkdir(struct inode *dip, const char *dirname, vattr_t *vap, 1002 struct inode **ipp, cred_t *cr, int flags) 1003 { 1004 zfsvfs_t *zfsvfs = ITOZSB(dip); 1005 char *dsname; 1006 int error; 1007 1008 if (!zfs_admin_snapshot) 1009 return (SET_ERROR(EACCES)); 1010 1011 dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 1012 1013 if (zfs_component_namecheck(dirname, NULL, NULL) != 0) { 1014 error = SET_ERROR(EILSEQ); 1015 goto out; 1016 } 1017 1018 dmu_objset_name(zfsvfs->z_os, dsname); 1019 1020 error = zfs_secpolicy_snapshot_perms(dsname, cr); 1021 if (error != 0) 1022 goto out; 1023 1024 if (error == 0) { 1025 error = dmu_objset_snapshot_one(dsname, dirname); 1026 if (error != 0) 1027 goto out; 1028 1029 error = zfsctl_snapdir_lookup(dip, dirname, ipp, 1030 0, cr, NULL, NULL); 1031 } 1032 out: 1033 kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN); 1034 1035 return (error); 1036 } 1037 1038 /* 1039 * Flush everything out of the kernel's export table and such. 1040 * This is needed as once the snapshot is used over NFS, its 1041 * entries in svc_export and svc_expkey caches hold reference 1042 * to the snapshot mount point. There is no known way of flushing 1043 * only the entries related to the snapshot. 1044 */ 1045 static void 1046 exportfs_flush(void) 1047 { 1048 char *argv[] = { "/usr/sbin/exportfs", "-f", NULL }; 1049 char *envp[] = { NULL }; 1050 1051 (void) call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); 1052 } 1053 1054 /* 1055 * Returns the path in char format for given struct path. Uses 1056 * d_path exported by kernel to convert struct path to char 1057 * format. Returns the correct path for mountpoints and chroot 1058 * environments. 1059 * 1060 * If chroot environment has directories that are mounted with 1061 * --bind or --rbind flag, d_path returns the complete path inside 1062 * chroot environment but does not return the absolute path, i.e. 1063 * the path to chroot environment is missing. 1064 */ 1065 static int 1066 get_root_path(struct path *path, char *buff, int len) 1067 { 1068 char *path_buffer, *path_ptr; 1069 int error = 0; 1070 1071 path_get(path); 1072 path_buffer = kmem_zalloc(len, KM_SLEEP); 1073 path_ptr = d_path(path, path_buffer, len); 1074 if (IS_ERR(path_ptr)) 1075 error = SET_ERROR(-PTR_ERR(path_ptr)); 1076 else 1077 strcpy(buff, path_ptr); 1078 1079 kmem_free(path_buffer, len); 1080 path_put(path); 1081 return (error); 1082 } 1083 1084 /* 1085 * Returns if the current process root is chrooted or not. Linux 1086 * kernel exposes the task_struct for current process and init. 1087 * Since init process root points to actual root filesystem when 1088 * Linux runtime is reached, we can compare the current process 1089 * root with init process root to determine if root of the current 1090 * process is different from init, which can reliably determine if 1091 * current process is in chroot context or not. 1092 */ 1093 static int 1094 is_current_chrooted(void) 1095 { 1096 struct task_struct *curr = current, *global = &init_task; 1097 struct path cr_root, gl_root; 1098 1099 task_lock(curr); 1100 get_fs_root(curr->fs, &cr_root); 1101 task_unlock(curr); 1102 1103 task_lock(global); 1104 get_fs_root(global->fs, &gl_root); 1105 task_unlock(global); 1106 1107 int chrooted = !path_equal(&cr_root, &gl_root); 1108 path_put(&gl_root); 1109 path_put(&cr_root); 1110 1111 return (chrooted); 1112 } 1113 1114 /* 1115 * Attempt to unmount a snapshot by making a call to user space. 1116 * There is no assurance that this can or will succeed, is just a 1117 * best effort. In the case where it does fail, perhaps because 1118 * it's in use, the unmount will fail harmlessly. 1119 */ 1120 int 1121 zfsctl_snapshot_unmount(const char *snapname, int flags) 1122 { 1123 char *argv[] = { "/usr/bin/env", "umount", "-t", "zfs", "-n", NULL, 1124 NULL }; 1125 char *envp[] = { NULL }; 1126 zfs_snapentry_t *se; 1127 int error; 1128 1129 rw_enter(&zfs_snapshot_lock, RW_READER); 1130 if ((se = zfsctl_snapshot_find_by_name(snapname)) == NULL) { 1131 rw_exit(&zfs_snapshot_lock); 1132 return (SET_ERROR(ENOENT)); 1133 } 1134 rw_exit(&zfs_snapshot_lock); 1135 1136 exportfs_flush(); 1137 1138 if (flags & MNT_FORCE) 1139 argv[4] = "-fn"; 1140 argv[5] = se->se_path; 1141 dprintf("unmount; path=%s\n", se->se_path); 1142 error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); 1143 zfsctl_snapshot_rele(se); 1144 1145 1146 /* 1147 * The umount system utility will return 256 on error. We must 1148 * assume this error is because the file system is busy so it is 1149 * converted to the more sensible EBUSY. 1150 */ 1151 if (error) 1152 error = SET_ERROR(EBUSY); 1153 1154 return (error); 1155 } 1156 1157 int 1158 zfsctl_snapshot_mount(struct path *path, int flags) 1159 { 1160 struct dentry *dentry = path->dentry; 1161 struct inode *ip = dentry->d_inode; 1162 zfsvfs_t *zfsvfs; 1163 zfsvfs_t *snap_zfsvfs; 1164 zfs_snapentry_t *se; 1165 char *full_name, *full_path, *options; 1166 char *argv[] = { "/usr/bin/env", "mount", "-i", "-t", "zfs", "-n", 1167 "-o", NULL, NULL, NULL, NULL }; 1168 char *envp[] = { NULL }; 1169 int error; 1170 struct path spath; 1171 1172 if (ip == NULL) 1173 return (SET_ERROR(EISDIR)); 1174 1175 zfsvfs = ITOZSB(ip); 1176 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1177 return (error); 1178 1179 full_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 1180 full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1181 options = kmem_zalloc(7, KM_SLEEP); 1182 1183 error = zfsctl_snapshot_name(zfsvfs, dname(dentry), 1184 ZFS_MAX_DATASET_NAME_LEN, full_name); 1185 if (error) 1186 goto error; 1187 1188 if (is_current_chrooted() == 0) { 1189 /* 1190 * Current process is not in chroot context 1191 */ 1192 1193 char *m = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1194 struct path mnt_path; 1195 mnt_path.mnt = path->mnt; 1196 mnt_path.dentry = path->mnt->mnt_root; 1197 1198 /* 1199 * Get path to current mountpoint 1200 */ 1201 error = get_root_path(&mnt_path, m, MAXPATHLEN); 1202 if (error != 0) { 1203 kmem_free(m, MAXPATHLEN); 1204 goto error; 1205 } 1206 mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock); 1207 if (zfsvfs->z_vfs->vfs_mntpoint != NULL) { 1208 /* 1209 * If current mnountpoint and vfs_mntpoint are not same, 1210 * store current mountpoint in vfs_mntpoint. 1211 */ 1212 if (strcmp(zfsvfs->z_vfs->vfs_mntpoint, m) != 0) { 1213 kmem_strfree(zfsvfs->z_vfs->vfs_mntpoint); 1214 zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m); 1215 } 1216 } else 1217 zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m); 1218 mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock); 1219 kmem_free(m, MAXPATHLEN); 1220 } 1221 1222 /* 1223 * Construct a mount point path from sb of the ctldir inode and dirent 1224 * name, instead of from d_path(), so that chroot'd process doesn't fail 1225 * on mount.zfs(8). 1226 */ 1227 mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock); 1228 snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s", 1229 zfsvfs->z_vfs->vfs_mntpoint ? zfsvfs->z_vfs->vfs_mntpoint : "", 1230 dname(dentry)); 1231 mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock); 1232 1233 snprintf(options, 7, "%s", 1234 zfs_snapshot_no_setuid ? "nosuid" : "suid"); 1235 1236 /* 1237 * Multiple concurrent automounts of a snapshot are never allowed. 1238 * The snapshot may be manually mounted as many times as desired. 1239 */ 1240 if (zfsctl_snapshot_ismounted(full_name)) { 1241 error = 0; 1242 goto error; 1243 } 1244 1245 /* 1246 * Attempt to mount the snapshot from user space. Normally this 1247 * would be done using the vfs_kern_mount() function, however that 1248 * function is marked GPL-only and cannot be used. On error we 1249 * careful to log the real error to the console and return EISDIR 1250 * to safely abort the automount. This should be very rare. 1251 * 1252 * If the user mode helper happens to return EBUSY, a concurrent 1253 * mount is already in progress in which case the error is ignored. 1254 * Take note that if the program was executed successfully the return 1255 * value from call_usermodehelper() will be (exitcode << 8 + signal). 1256 */ 1257 dprintf("mount; name=%s path=%s\n", full_name, full_path); 1258 argv[7] = options; 1259 argv[8] = full_name; 1260 argv[9] = full_path; 1261 error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); 1262 if (error) { 1263 if (!(error & MOUNT_BUSY << 8)) { 1264 zfs_dbgmsg("Unable to automount %s error=%d", 1265 full_path, error); 1266 error = SET_ERROR(EISDIR); 1267 } else { 1268 /* 1269 * EBUSY, this could mean a concurrent mount, or the 1270 * snapshot has already been mounted at completely 1271 * different place. We return 0 so VFS will retry. For 1272 * the latter case the VFS will retry several times 1273 * and return ELOOP, which is probably not a very good 1274 * behavior. 1275 */ 1276 error = 0; 1277 } 1278 goto error; 1279 } 1280 1281 /* 1282 * Follow down in to the mounted snapshot and set MNT_SHRINKABLE 1283 * to identify this as an automounted filesystem. 1284 */ 1285 spath = *path; 1286 path_get(&spath); 1287 if (follow_down_one(&spath)) { 1288 snap_zfsvfs = ITOZSB(spath.dentry->d_inode); 1289 snap_zfsvfs->z_parent = zfsvfs; 1290 dentry = spath.dentry; 1291 spath.mnt->mnt_flags |= MNT_SHRINKABLE; 1292 1293 rw_enter(&zfs_snapshot_lock, RW_WRITER); 1294 se = zfsctl_snapshot_alloc(full_name, full_path, 1295 snap_zfsvfs->z_os->os_spa, dmu_objset_id(snap_zfsvfs->z_os), 1296 dentry); 1297 zfsctl_snapshot_add(se); 1298 zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot); 1299 rw_exit(&zfs_snapshot_lock); 1300 } 1301 path_put(&spath); 1302 error: 1303 kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN); 1304 kmem_free(full_path, MAXPATHLEN); 1305 1306 zfs_exit(zfsvfs, FTAG); 1307 1308 return (error); 1309 } 1310 1311 /* 1312 * Get the snapdir inode from fid 1313 */ 1314 int 1315 zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid, int gen, 1316 struct inode **ipp) 1317 { 1318 int error; 1319 struct path path; 1320 char *mnt; 1321 struct dentry *dentry; 1322 1323 mnt = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1324 1325 error = zfsctl_snapshot_path_objset(sb->s_fs_info, objsetid, 1326 MAXPATHLEN, mnt); 1327 if (error) 1328 goto out; 1329 1330 /* Trigger automount */ 1331 error = -kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path); 1332 if (error) 1333 goto out; 1334 1335 path_put(&path); 1336 /* 1337 * Get the snapdir inode. Note, we don't want to use the above 1338 * path because it contains the root of the snapshot rather 1339 * than the snapdir. 1340 */ 1341 *ipp = ilookup(sb, ZFSCTL_INO_SNAPDIRS - objsetid); 1342 if (*ipp == NULL) { 1343 error = SET_ERROR(ENOENT); 1344 goto out; 1345 } 1346 1347 /* check gen, see zfsctl_snapdir_fid */ 1348 dentry = d_obtain_alias(igrab(*ipp)); 1349 if (gen != (!IS_ERR(dentry) && d_mountpoint(dentry))) { 1350 iput(*ipp); 1351 *ipp = NULL; 1352 error = SET_ERROR(ENOENT); 1353 } 1354 if (!IS_ERR(dentry)) 1355 dput(dentry); 1356 out: 1357 kmem_free(mnt, MAXPATHLEN); 1358 return (error); 1359 } 1360 1361 int 1362 zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp, 1363 int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) 1364 { 1365 zfsvfs_t *zfsvfs = ITOZSB(dip); 1366 znode_t *zp; 1367 znode_t *dzp; 1368 int error; 1369 1370 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1371 return (error); 1372 1373 if (zfsvfs->z_shares_dir == 0) { 1374 zfs_exit(zfsvfs, FTAG); 1375 return (SET_ERROR(ENOTSUP)); 1376 } 1377 1378 if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) { 1379 error = zfs_lookup(dzp, name, &zp, 0, cr, NULL, NULL); 1380 zrele(dzp); 1381 } 1382 1383 zfs_exit(zfsvfs, FTAG); 1384 1385 return (error); 1386 } 1387 1388 /* 1389 * Initialize the various pieces we'll need to create and manipulate .zfs 1390 * directories. Currently this is unused but available. 1391 */ 1392 void 1393 zfsctl_init(void) 1394 { 1395 avl_create(&zfs_snapshots_by_name, snapentry_compare_by_name, 1396 sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, 1397 se_node_name)); 1398 avl_create(&zfs_snapshots_by_objsetid, snapentry_compare_by_objsetid, 1399 sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, 1400 se_node_objsetid)); 1401 rw_init(&zfs_snapshot_lock, NULL, RW_DEFAULT, NULL); 1402 } 1403 1404 /* 1405 * Cleanup the various pieces we needed for .zfs directories. In particular 1406 * ensure the expiry timer is canceled safely. 1407 */ 1408 void 1409 zfsctl_fini(void) 1410 { 1411 avl_destroy(&zfs_snapshots_by_name); 1412 avl_destroy(&zfs_snapshots_by_objsetid); 1413 rw_destroy(&zfs_snapshot_lock); 1414 } 1415 1416 module_param(zfs_admin_snapshot, int, 0644); 1417 MODULE_PARM_DESC(zfs_admin_snapshot, "Enable mkdir/rmdir/mv in .zfs/snapshot"); 1418 1419 module_param(zfs_expire_snapshot, int, 0644); 1420 MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot"); 1421 1422 module_param(zfs_snapshot_no_setuid, int, 0644); 1423 MODULE_PARM_DESC(zfs_snapshot_no_setuid, 1424 "Disable setuid/setgid for automounts in .zfs/snapshot"); 1425