1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * 24 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 25 * Copyright (C) 2011 Lawrence Livermore National Security, LLC. 26 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 27 * LLNL-CODE-403049. 28 * Rewritten for Linux by: 29 * Rohan Puri <rohan.puri15@gmail.com> 30 * Brian Behlendorf <behlendorf1@llnl.gov> 31 * Copyright (c) 2013 by Delphix. All rights reserved. 32 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved. 33 * Copyright (c) 2018 George Melikov. All Rights Reserved. 34 * Copyright (c) 2019 Datto, Inc. All rights reserved. 35 * Copyright (c) 2020 The MathWorks, Inc. All rights reserved. 36 */ 37 38 /* 39 * ZFS control directory (a.k.a. ".zfs") 40 * 41 * This directory provides a common location for all ZFS meta-objects. 42 * Currently, this is only the 'snapshot' and 'shares' directory, but this may 43 * expand in the future. The elements are built dynamically, as the hierarchy 44 * does not actually exist on disk. 45 * 46 * For 'snapshot', we don't want to have all snapshots always mounted, because 47 * this would take up a huge amount of space in /etc/mnttab. We have three 48 * types of objects: 49 * 50 * ctldir ------> snapshotdir -------> snapshot 51 * | 52 * | 53 * V 54 * mounted fs 55 * 56 * The 'snapshot' node contains just enough information to lookup '..' and act 57 * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we 58 * perform an automount of the underlying filesystem and return the 59 * corresponding inode. 60 * 61 * All mounts are handled automatically by an user mode helper which invokes 62 * the mount procedure. Unmounts are handled by allowing the mount 63 * point to expire so the kernel may automatically unmount it. 64 * 65 * The '.zfs', '.zfs/snapshot', and all directories created under 66 * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same 67 * zfsvfs_t as the head filesystem (what '.zfs' lives under). 68 * 69 * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths 70 * (ie: snapshots) are complete ZFS filesystems and have their own unique 71 * zfsvfs_t. However, the fsid reported by these mounts will be the same 72 * as that used by the parent zfsvfs_t to make NFS happy. 73 */ 74 75 #include <sys/types.h> 76 #include <sys/param.h> 77 #include <sys/time.h> 78 #include <sys/sysmacros.h> 79 #include <sys/pathname.h> 80 #include <sys/vfs.h> 81 #include <sys/zfs_ctldir.h> 82 #include <sys/zfs_ioctl.h> 83 #include <sys/zfs_vfsops.h> 84 #include <sys/zfs_vnops.h> 85 #include <sys/stat.h> 86 #include <sys/dmu.h> 87 #include <sys/dmu_objset.h> 88 #include <sys/dsl_destroy.h> 89 #include <sys/dsl_deleg.h> 90 #include <sys/zpl.h> 91 #include <sys/mntent.h> 92 #include "zfs_namecheck.h" 93 94 /* 95 * Two AVL trees are maintained which contain all currently automounted 96 * snapshots. Every automounted snapshots maps to a single zfs_snapentry_t 97 * entry which MUST: 98 * 99 * - be attached to both trees, and 100 * - be unique, no duplicate entries are allowed. 101 * 102 * The zfs_snapshots_by_name tree is indexed by the full dataset name 103 * while the zfs_snapshots_by_objsetid tree is indexed by the unique 104 * objsetid. This allows for fast lookups either by name or objsetid. 105 */ 106 static avl_tree_t zfs_snapshots_by_name; 107 static avl_tree_t zfs_snapshots_by_objsetid; 108 static krwlock_t zfs_snapshot_lock; 109 110 /* 111 * Control Directory Tunables (.zfs) 112 */ 113 int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT; 114 static int zfs_admin_snapshot = 0; 115 static int zfs_snapshot_no_setuid = 0; 116 117 typedef struct { 118 char *se_name; /* full snapshot name */ 119 char *se_path; /* full mount path */ 120 spa_t *se_spa; /* pool spa */ 121 uint64_t se_objsetid; /* snapshot objset id */ 122 struct dentry *se_root_dentry; /* snapshot root dentry */ 123 krwlock_t se_taskqid_lock; /* scheduled unmount taskqid lock */ 124 taskqid_t se_taskqid; /* scheduled unmount taskqid */ 125 avl_node_t se_node_name; /* zfs_snapshots_by_name link */ 126 avl_node_t se_node_objsetid; /* zfs_snapshots_by_objsetid link */ 127 zfs_refcount_t se_refcount; /* reference count */ 128 } zfs_snapentry_t; 129 130 static void zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay); 131 132 /* 133 * Allocate a new zfs_snapentry_t being careful to make a copy of the 134 * the snapshot name and provided mount point. No reference is taken. 135 */ 136 static zfs_snapentry_t * 137 zfsctl_snapshot_alloc(const char *full_name, const char *full_path, spa_t *spa, 138 uint64_t objsetid, struct dentry *root_dentry) 139 { 140 zfs_snapentry_t *se; 141 142 se = kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP); 143 144 se->se_name = kmem_strdup(full_name); 145 se->se_path = kmem_strdup(full_path); 146 se->se_spa = spa; 147 se->se_objsetid = objsetid; 148 se->se_root_dentry = root_dentry; 149 se->se_taskqid = TASKQID_INVALID; 150 rw_init(&se->se_taskqid_lock, NULL, RW_DEFAULT, NULL); 151 152 zfs_refcount_create(&se->se_refcount); 153 154 return (se); 155 } 156 157 /* 158 * Free a zfs_snapentry_t the caller must ensure there are no active 159 * references. 160 */ 161 static void 162 zfsctl_snapshot_free(zfs_snapentry_t *se) 163 { 164 zfs_refcount_destroy(&se->se_refcount); 165 kmem_strfree(se->se_name); 166 kmem_strfree(se->se_path); 167 rw_destroy(&se->se_taskqid_lock); 168 169 kmem_free(se, sizeof (zfs_snapentry_t)); 170 } 171 172 /* 173 * Hold a reference on the zfs_snapentry_t. 174 */ 175 static void 176 zfsctl_snapshot_hold(zfs_snapentry_t *se) 177 { 178 zfs_refcount_add(&se->se_refcount, NULL); 179 } 180 181 /* 182 * Release a reference on the zfs_snapentry_t. When the number of 183 * references drops to zero the structure will be freed. 184 */ 185 static void 186 zfsctl_snapshot_rele(zfs_snapentry_t *se) 187 { 188 if (zfs_refcount_remove(&se->se_refcount, NULL) == 0) 189 zfsctl_snapshot_free(se); 190 } 191 192 /* 193 * Add a zfs_snapentry_t to both the zfs_snapshots_by_name and 194 * zfs_snapshots_by_objsetid trees. While the zfs_snapentry_t is part 195 * of the trees a reference is held. 196 */ 197 static void 198 zfsctl_snapshot_add(zfs_snapentry_t *se) 199 { 200 ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); 201 zfsctl_snapshot_hold(se); 202 avl_add(&zfs_snapshots_by_name, se); 203 avl_add(&zfs_snapshots_by_objsetid, se); 204 } 205 206 /* 207 * Remove a zfs_snapentry_t from both the zfs_snapshots_by_name and 208 * zfs_snapshots_by_objsetid trees. Upon removal a reference is dropped, 209 * this can result in the structure being freed if that was the last 210 * remaining reference. 211 */ 212 static void 213 zfsctl_snapshot_remove(zfs_snapentry_t *se) 214 { 215 ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); 216 avl_remove(&zfs_snapshots_by_name, se); 217 avl_remove(&zfs_snapshots_by_objsetid, se); 218 zfsctl_snapshot_rele(se); 219 } 220 221 /* 222 * Snapshot name comparison function for the zfs_snapshots_by_name. 223 */ 224 static int 225 snapentry_compare_by_name(const void *a, const void *b) 226 { 227 const zfs_snapentry_t *se_a = a; 228 const zfs_snapentry_t *se_b = b; 229 int ret; 230 231 ret = strcmp(se_a->se_name, se_b->se_name); 232 233 if (ret < 0) 234 return (-1); 235 else if (ret > 0) 236 return (1); 237 else 238 return (0); 239 } 240 241 /* 242 * Snapshot name comparison function for the zfs_snapshots_by_objsetid. 243 */ 244 static int 245 snapentry_compare_by_objsetid(const void *a, const void *b) 246 { 247 const zfs_snapentry_t *se_a = a; 248 const zfs_snapentry_t *se_b = b; 249 250 if (se_a->se_spa != se_b->se_spa) 251 return ((ulong_t)se_a->se_spa < (ulong_t)se_b->se_spa ? -1 : 1); 252 253 if (se_a->se_objsetid < se_b->se_objsetid) 254 return (-1); 255 else if (se_a->se_objsetid > se_b->se_objsetid) 256 return (1); 257 else 258 return (0); 259 } 260 261 /* 262 * Find a zfs_snapentry_t in zfs_snapshots_by_name. If the snapname 263 * is found a pointer to the zfs_snapentry_t is returned and a reference 264 * taken on the structure. The caller is responsible for dropping the 265 * reference with zfsctl_snapshot_rele(). If the snapname is not found 266 * NULL will be returned. 267 */ 268 static zfs_snapentry_t * 269 zfsctl_snapshot_find_by_name(const char *snapname) 270 { 271 zfs_snapentry_t *se, search; 272 273 ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock)); 274 275 search.se_name = (char *)snapname; 276 se = avl_find(&zfs_snapshots_by_name, &search, NULL); 277 if (se) 278 zfsctl_snapshot_hold(se); 279 280 return (se); 281 } 282 283 /* 284 * Find a zfs_snapentry_t in zfs_snapshots_by_objsetid given the objset id 285 * rather than the snapname. In all other respects it behaves the same 286 * as zfsctl_snapshot_find_by_name(). 287 */ 288 static zfs_snapentry_t * 289 zfsctl_snapshot_find_by_objsetid(spa_t *spa, uint64_t objsetid) 290 { 291 zfs_snapentry_t *se, search; 292 293 ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock)); 294 295 search.se_spa = spa; 296 search.se_objsetid = objsetid; 297 se = avl_find(&zfs_snapshots_by_objsetid, &search, NULL); 298 if (se) 299 zfsctl_snapshot_hold(se); 300 301 return (se); 302 } 303 304 /* 305 * Rename a zfs_snapentry_t in the zfs_snapshots_by_name. The structure is 306 * removed, renamed, and added back to the new correct location in the tree. 307 */ 308 static int 309 zfsctl_snapshot_rename(const char *old_snapname, const char *new_snapname) 310 { 311 zfs_snapentry_t *se; 312 313 ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); 314 315 se = zfsctl_snapshot_find_by_name(old_snapname); 316 if (se == NULL) 317 return (SET_ERROR(ENOENT)); 318 319 zfsctl_snapshot_remove(se); 320 kmem_strfree(se->se_name); 321 se->se_name = kmem_strdup(new_snapname); 322 zfsctl_snapshot_add(se); 323 zfsctl_snapshot_rele(se); 324 325 return (0); 326 } 327 328 /* 329 * Delayed task responsible for unmounting an expired automounted snapshot. 330 */ 331 static void 332 snapentry_expire(void *data) 333 { 334 zfs_snapentry_t *se = (zfs_snapentry_t *)data; 335 spa_t *spa = se->se_spa; 336 uint64_t objsetid = se->se_objsetid; 337 338 if (zfs_expire_snapshot <= 0) { 339 zfsctl_snapshot_rele(se); 340 return; 341 } 342 343 rw_enter(&se->se_taskqid_lock, RW_WRITER); 344 se->se_taskqid = TASKQID_INVALID; 345 rw_exit(&se->se_taskqid_lock); 346 (void) zfsctl_snapshot_unmount(se->se_name, MNT_EXPIRE); 347 zfsctl_snapshot_rele(se); 348 349 /* 350 * Reschedule the unmount if the zfs_snapentry_t wasn't removed. 351 * This can occur when the snapshot is busy. 352 */ 353 rw_enter(&zfs_snapshot_lock, RW_READER); 354 if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) { 355 zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot); 356 zfsctl_snapshot_rele(se); 357 } 358 rw_exit(&zfs_snapshot_lock); 359 } 360 361 /* 362 * Cancel an automatic unmount of a snapname. This callback is responsible 363 * for dropping the reference on the zfs_snapentry_t which was taken when 364 * during dispatch. 365 */ 366 static void 367 zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se) 368 { 369 int err = 0; 370 rw_enter(&se->se_taskqid_lock, RW_WRITER); 371 err = taskq_cancel_id(system_delay_taskq, se->se_taskqid); 372 /* 373 * if we get ENOENT, the taskq couldn't be found to be 374 * canceled, so we can just mark it as invalid because 375 * it's already gone. If we got EBUSY, then we already 376 * blocked until it was gone _anyway_, so we don't care. 377 */ 378 se->se_taskqid = TASKQID_INVALID; 379 rw_exit(&se->se_taskqid_lock); 380 if (err == 0) { 381 zfsctl_snapshot_rele(se); 382 } 383 } 384 385 /* 386 * Dispatch the unmount task for delayed handling with a hold protecting it. 387 */ 388 static void 389 zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay) 390 { 391 392 if (delay <= 0) 393 return; 394 395 zfsctl_snapshot_hold(se); 396 rw_enter(&se->se_taskqid_lock, RW_WRITER); 397 /* 398 * If this condition happens, we managed to: 399 * - dispatch once 400 * - want to dispatch _again_ before it returned 401 * 402 * So let's just return - if that task fails at unmounting, 403 * we'll eventually dispatch again, and if it succeeds, 404 * no problem. 405 */ 406 if (se->se_taskqid != TASKQID_INVALID) { 407 rw_exit(&se->se_taskqid_lock); 408 zfsctl_snapshot_rele(se); 409 return; 410 } 411 se->se_taskqid = taskq_dispatch_delay(system_delay_taskq, 412 snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ); 413 rw_exit(&se->se_taskqid_lock); 414 } 415 416 /* 417 * Schedule an automatic unmount of objset id to occur in delay seconds from 418 * now. Any previous delayed unmount will be cancelled in favor of the 419 * updated deadline. A reference is taken by zfsctl_snapshot_find_by_name() 420 * and held until the outstanding task is handled or cancelled. 421 */ 422 int 423 zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid, int delay) 424 { 425 zfs_snapentry_t *se; 426 int error = ENOENT; 427 428 rw_enter(&zfs_snapshot_lock, RW_READER); 429 if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) { 430 zfsctl_snapshot_unmount_cancel(se); 431 zfsctl_snapshot_unmount_delay_impl(se, delay); 432 zfsctl_snapshot_rele(se); 433 error = 0; 434 } 435 rw_exit(&zfs_snapshot_lock); 436 437 return (error); 438 } 439 440 /* 441 * Check if snapname is currently mounted. Returned non-zero when mounted 442 * and zero when unmounted. 443 */ 444 static boolean_t 445 zfsctl_snapshot_ismounted(const char *snapname) 446 { 447 zfs_snapentry_t *se; 448 boolean_t ismounted = B_FALSE; 449 450 rw_enter(&zfs_snapshot_lock, RW_READER); 451 if ((se = zfsctl_snapshot_find_by_name(snapname)) != NULL) { 452 zfsctl_snapshot_rele(se); 453 ismounted = B_TRUE; 454 } 455 rw_exit(&zfs_snapshot_lock); 456 457 return (ismounted); 458 } 459 460 /* 461 * Check if the given inode is a part of the virtual .zfs directory. 462 */ 463 boolean_t 464 zfsctl_is_node(struct inode *ip) 465 { 466 return (ITOZ(ip)->z_is_ctldir); 467 } 468 469 /* 470 * Check if the given inode is a .zfs/snapshots/snapname directory. 471 */ 472 boolean_t 473 zfsctl_is_snapdir(struct inode *ip) 474 { 475 return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS)); 476 } 477 478 /* 479 * Allocate a new inode with the passed id and ops. 480 */ 481 static struct inode * 482 zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, 483 const struct file_operations *fops, const struct inode_operations *ops, 484 uint64_t creation) 485 { 486 struct inode *ip; 487 znode_t *zp; 488 inode_timespec_t now = {.tv_sec = creation}; 489 490 ip = new_inode(zfsvfs->z_sb); 491 if (ip == NULL) 492 return (NULL); 493 494 if (!creation) 495 now = current_time(ip); 496 zp = ITOZ(ip); 497 ASSERT3P(zp->z_dirlocks, ==, NULL); 498 ASSERT3P(zp->z_acl_cached, ==, NULL); 499 ASSERT3P(zp->z_xattr_cached, ==, NULL); 500 zp->z_id = id; 501 zp->z_unlinked = B_FALSE; 502 zp->z_atime_dirty = B_FALSE; 503 zp->z_zn_prefetch = B_FALSE; 504 zp->z_is_sa = B_FALSE; 505 zp->z_is_ctldir = B_TRUE; 506 zp->z_sa_hdl = NULL; 507 zp->z_blksz = 0; 508 zp->z_seq = 0; 509 zp->z_mapcnt = 0; 510 zp->z_size = 0; 511 zp->z_pflags = 0; 512 zp->z_mode = 0; 513 zp->z_sync_cnt = 0; 514 zp->z_sync_writes_cnt = 0; 515 zp->z_async_writes_cnt = 0; 516 ip->i_generation = 0; 517 ip->i_ino = id; 518 ip->i_mode = (S_IFDIR | S_IRWXUGO); 519 ip->i_uid = SUID_TO_KUID(0); 520 ip->i_gid = SGID_TO_KGID(0); 521 ip->i_blkbits = SPA_MINBLOCKSHIFT; 522 zpl_inode_set_atime_to_ts(ip, now); 523 zpl_inode_set_mtime_to_ts(ip, now); 524 zpl_inode_set_ctime_to_ts(ip, now); 525 ip->i_fop = fops; 526 ip->i_op = ops; 527 #if defined(IOP_XATTR) 528 ip->i_opflags &= ~IOP_XATTR; 529 #endif 530 531 if (insert_inode_locked(ip)) { 532 unlock_new_inode(ip); 533 iput(ip); 534 return (NULL); 535 } 536 537 mutex_enter(&zfsvfs->z_znodes_lock); 538 list_insert_tail(&zfsvfs->z_all_znodes, zp); 539 membar_producer(); 540 mutex_exit(&zfsvfs->z_znodes_lock); 541 542 unlock_new_inode(ip); 543 544 return (ip); 545 } 546 547 /* 548 * Lookup the inode with given id, it will be allocated if needed. 549 */ 550 static struct inode * 551 zfsctl_inode_lookup(zfsvfs_t *zfsvfs, uint64_t id, 552 const struct file_operations *fops, const struct inode_operations *ops) 553 { 554 struct inode *ip = NULL; 555 uint64_t creation = 0; 556 dsl_dataset_t *snap_ds; 557 dsl_pool_t *pool; 558 559 while (ip == NULL) { 560 ip = ilookup(zfsvfs->z_sb, (unsigned long)id); 561 if (ip) 562 break; 563 564 if (id <= ZFSCTL_INO_SNAPDIRS && !creation) { 565 pool = dmu_objset_pool(zfsvfs->z_os); 566 dsl_pool_config_enter(pool, FTAG); 567 if (!dsl_dataset_hold_obj(pool, 568 ZFSCTL_INO_SNAPDIRS - id, FTAG, &snap_ds)) { 569 creation = dsl_get_creation(snap_ds); 570 dsl_dataset_rele(snap_ds, FTAG); 571 } 572 dsl_pool_config_exit(pool, FTAG); 573 } 574 575 /* May fail due to concurrent zfsctl_inode_alloc() */ 576 ip = zfsctl_inode_alloc(zfsvfs, id, fops, ops, creation); 577 } 578 579 return (ip); 580 } 581 582 /* 583 * Create the '.zfs' directory. This directory is cached as part of the VFS 584 * structure. This results in a hold on the zfsvfs_t. The code in zfs_umount() 585 * therefore checks against a vfs_count of 2 instead of 1. This reference 586 * is removed when the ctldir is destroyed in the unmount. All other entities 587 * under the '.zfs' directory are created dynamically as needed. 588 * 589 * Because the dynamically created '.zfs' directory entries assume the use 590 * of 64-bit inode numbers this support must be disabled on 32-bit systems. 591 */ 592 int 593 zfsctl_create(zfsvfs_t *zfsvfs) 594 { 595 ASSERT(zfsvfs->z_ctldir == NULL); 596 597 zfsvfs->z_ctldir = zfsctl_inode_alloc(zfsvfs, ZFSCTL_INO_ROOT, 598 &zpl_fops_root, &zpl_ops_root, 0); 599 if (zfsvfs->z_ctldir == NULL) 600 return (SET_ERROR(ENOENT)); 601 602 return (0); 603 } 604 605 /* 606 * Destroy the '.zfs' directory or remove a snapshot from zfs_snapshots_by_name. 607 * Only called when the filesystem is unmounted. 608 */ 609 void 610 zfsctl_destroy(zfsvfs_t *zfsvfs) 611 { 612 if (zfsvfs->z_issnap) { 613 zfs_snapentry_t *se; 614 spa_t *spa = zfsvfs->z_os->os_spa; 615 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 616 617 rw_enter(&zfs_snapshot_lock, RW_WRITER); 618 se = zfsctl_snapshot_find_by_objsetid(spa, objsetid); 619 if (se != NULL) 620 zfsctl_snapshot_remove(se); 621 rw_exit(&zfs_snapshot_lock); 622 if (se != NULL) { 623 zfsctl_snapshot_unmount_cancel(se); 624 zfsctl_snapshot_rele(se); 625 } 626 } else if (zfsvfs->z_ctldir) { 627 iput(zfsvfs->z_ctldir); 628 zfsvfs->z_ctldir = NULL; 629 } 630 } 631 632 /* 633 * Given a root znode, retrieve the associated .zfs directory. 634 * Add a hold to the vnode and return it. 635 */ 636 struct inode * 637 zfsctl_root(znode_t *zp) 638 { 639 ASSERT(zfs_has_ctldir(zp)); 640 /* Must have an existing ref, so igrab() cannot return NULL */ 641 VERIFY3P(igrab(ZTOZSB(zp)->z_ctldir), !=, NULL); 642 return (ZTOZSB(zp)->z_ctldir); 643 } 644 645 /* 646 * Generate a long fid to indicate a snapdir. We encode whether snapdir is 647 * already mounted in gen field. We do this because nfsd lookup will not 648 * trigger automount. Next time the nfsd does fh_to_dentry, we will notice 649 * this and do automount and return ESTALE to force nfsd revalidate and follow 650 * mount. 651 */ 652 static int 653 zfsctl_snapdir_fid(struct inode *ip, fid_t *fidp) 654 { 655 zfid_short_t *zfid = (zfid_short_t *)fidp; 656 zfid_long_t *zlfid = (zfid_long_t *)fidp; 657 uint32_t gen = 0; 658 uint64_t object; 659 uint64_t objsetid; 660 int i; 661 struct dentry *dentry; 662 663 if (fidp->fid_len < LONG_FID_LEN) { 664 fidp->fid_len = LONG_FID_LEN; 665 return (SET_ERROR(ENOSPC)); 666 } 667 668 object = ip->i_ino; 669 objsetid = ZFSCTL_INO_SNAPDIRS - ip->i_ino; 670 zfid->zf_len = LONG_FID_LEN; 671 672 dentry = d_obtain_alias(igrab(ip)); 673 if (!IS_ERR(dentry)) { 674 gen = !!d_mountpoint(dentry); 675 dput(dentry); 676 } 677 678 for (i = 0; i < sizeof (zfid->zf_object); i++) 679 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 680 681 for (i = 0; i < sizeof (zfid->zf_gen); i++) 682 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 683 684 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 685 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 686 687 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 688 zlfid->zf_setgen[i] = 0; 689 690 return (0); 691 } 692 693 /* 694 * Generate an appropriate fid for an entry in the .zfs directory. 695 */ 696 int 697 zfsctl_fid(struct inode *ip, fid_t *fidp) 698 { 699 znode_t *zp = ITOZ(ip); 700 zfsvfs_t *zfsvfs = ITOZSB(ip); 701 uint64_t object = zp->z_id; 702 zfid_short_t *zfid; 703 int i; 704 int error; 705 706 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 707 return (error); 708 709 if (zfsctl_is_snapdir(ip)) { 710 zfs_exit(zfsvfs, FTAG); 711 return (zfsctl_snapdir_fid(ip, fidp)); 712 } 713 714 if (fidp->fid_len < SHORT_FID_LEN) { 715 fidp->fid_len = SHORT_FID_LEN; 716 zfs_exit(zfsvfs, FTAG); 717 return (SET_ERROR(ENOSPC)); 718 } 719 720 zfid = (zfid_short_t *)fidp; 721 722 zfid->zf_len = SHORT_FID_LEN; 723 724 for (i = 0; i < sizeof (zfid->zf_object); i++) 725 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 726 727 /* .zfs znodes always have a generation number of 0 */ 728 for (i = 0; i < sizeof (zfid->zf_gen); i++) 729 zfid->zf_gen[i] = 0; 730 731 zfs_exit(zfsvfs, FTAG); 732 return (0); 733 } 734 735 /* 736 * Construct a full dataset name in full_name: "pool/dataset@snap_name" 737 */ 738 static int 739 zfsctl_snapshot_name(zfsvfs_t *zfsvfs, const char *snap_name, int len, 740 char *full_name) 741 { 742 objset_t *os = zfsvfs->z_os; 743 744 if (zfs_component_namecheck(snap_name, NULL, NULL) != 0) 745 return (SET_ERROR(EILSEQ)); 746 747 dmu_objset_name(os, full_name); 748 if ((strlen(full_name) + 1 + strlen(snap_name)) >= len) 749 return (SET_ERROR(ENAMETOOLONG)); 750 751 (void) strcat(full_name, "@"); 752 (void) strcat(full_name, snap_name); 753 754 return (0); 755 } 756 757 /* 758 * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/" 759 */ 760 static int 761 zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid, 762 int path_len, char *full_path) 763 { 764 objset_t *os = zfsvfs->z_os; 765 fstrans_cookie_t cookie; 766 char *snapname; 767 boolean_t case_conflict; 768 uint64_t id, pos = 0; 769 int error = 0; 770 771 cookie = spl_fstrans_mark(); 772 snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 773 774 while (error == 0) { 775 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 776 error = dmu_snapshot_list_next(zfsvfs->z_os, 777 ZFS_MAX_DATASET_NAME_LEN, snapname, &id, &pos, 778 &case_conflict); 779 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 780 if (error) 781 goto out; 782 783 if (id == objsetid) 784 break; 785 } 786 787 mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock); 788 if (zfsvfs->z_vfs->vfs_mntpoint != NULL) { 789 snprintf(full_path, path_len, "%s/.zfs/snapshot/%s", 790 zfsvfs->z_vfs->vfs_mntpoint, snapname); 791 } else 792 error = SET_ERROR(ENOENT); 793 mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock); 794 795 out: 796 kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN); 797 spl_fstrans_unmark(cookie); 798 799 return (error); 800 } 801 802 /* 803 * Special case the handling of "..". 804 */ 805 int 806 zfsctl_root_lookup(struct inode *dip, const char *name, struct inode **ipp, 807 int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) 808 { 809 zfsvfs_t *zfsvfs = ITOZSB(dip); 810 int error = 0; 811 812 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 813 return (error); 814 815 if (zfsvfs->z_show_ctldir == ZFS_SNAPDIR_DISABLED) { 816 *ipp = NULL; 817 } else if (strcmp(name, "..") == 0) { 818 *ipp = dip->i_sb->s_root->d_inode; 819 } else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) { 820 *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIR, 821 &zpl_fops_snapdir, &zpl_ops_snapdir); 822 } else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) { 823 *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SHARES, 824 &zpl_fops_shares, &zpl_ops_shares); 825 } else { 826 *ipp = NULL; 827 } 828 829 if (*ipp == NULL) 830 error = SET_ERROR(ENOENT); 831 832 zfs_exit(zfsvfs, FTAG); 833 834 return (error); 835 } 836 837 /* 838 * Lookup entry point for the 'snapshot' directory. Try to open the 839 * snapshot if it exist, creating the pseudo filesystem inode as necessary. 840 */ 841 int 842 zfsctl_snapdir_lookup(struct inode *dip, const char *name, struct inode **ipp, 843 int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) 844 { 845 zfsvfs_t *zfsvfs = ITOZSB(dip); 846 uint64_t id; 847 int error; 848 849 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 850 return (error); 851 852 error = dmu_snapshot_lookup(zfsvfs->z_os, name, &id); 853 if (error) { 854 zfs_exit(zfsvfs, FTAG); 855 return (error); 856 } 857 858 *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIRS - id, 859 &simple_dir_operations, &simple_dir_inode_operations); 860 if (*ipp == NULL) 861 error = SET_ERROR(ENOENT); 862 863 zfs_exit(zfsvfs, FTAG); 864 865 return (error); 866 } 867 868 /* 869 * Renaming a directory under '.zfs/snapshot' will automatically trigger 870 * a rename of the snapshot to the new given name. The rename is confined 871 * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere. 872 */ 873 int 874 zfsctl_snapdir_rename(struct inode *sdip, const char *snm, 875 struct inode *tdip, const char *tnm, cred_t *cr, int flags) 876 { 877 zfsvfs_t *zfsvfs = ITOZSB(sdip); 878 char *to, *from, *real, *fsname; 879 int error; 880 881 if (!zfs_admin_snapshot) 882 return (SET_ERROR(EACCES)); 883 884 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 885 return (error); 886 887 to = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 888 from = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 889 real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 890 fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 891 892 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 893 error = dmu_snapshot_realname(zfsvfs->z_os, snm, real, 894 ZFS_MAX_DATASET_NAME_LEN, NULL); 895 if (error == 0) { 896 snm = real; 897 } else if (error != ENOTSUP) { 898 goto out; 899 } 900 } 901 902 dmu_objset_name(zfsvfs->z_os, fsname); 903 904 error = zfsctl_snapshot_name(ITOZSB(sdip), snm, 905 ZFS_MAX_DATASET_NAME_LEN, from); 906 if (error == 0) 907 error = zfsctl_snapshot_name(ITOZSB(tdip), tnm, 908 ZFS_MAX_DATASET_NAME_LEN, to); 909 if (error == 0) 910 error = zfs_secpolicy_rename_perms(from, to, cr); 911 if (error != 0) 912 goto out; 913 914 /* 915 * Cannot move snapshots out of the snapdir. 916 */ 917 if (sdip != tdip) { 918 error = SET_ERROR(EINVAL); 919 goto out; 920 } 921 922 /* 923 * No-op when names are identical. 924 */ 925 if (strcmp(snm, tnm) == 0) { 926 error = 0; 927 goto out; 928 } 929 930 rw_enter(&zfs_snapshot_lock, RW_WRITER); 931 932 error = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE); 933 if (error == 0) 934 (void) zfsctl_snapshot_rename(snm, tnm); 935 936 rw_exit(&zfs_snapshot_lock); 937 out: 938 kmem_free(from, ZFS_MAX_DATASET_NAME_LEN); 939 kmem_free(to, ZFS_MAX_DATASET_NAME_LEN); 940 kmem_free(real, ZFS_MAX_DATASET_NAME_LEN); 941 kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN); 942 943 zfs_exit(zfsvfs, FTAG); 944 945 return (error); 946 } 947 948 /* 949 * Removing a directory under '.zfs/snapshot' will automatically trigger 950 * the removal of the snapshot with the given name. 951 */ 952 int 953 zfsctl_snapdir_remove(struct inode *dip, const char *name, cred_t *cr, 954 int flags) 955 { 956 zfsvfs_t *zfsvfs = ITOZSB(dip); 957 char *snapname, *real; 958 int error; 959 960 if (!zfs_admin_snapshot) 961 return (SET_ERROR(EACCES)); 962 963 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 964 return (error); 965 966 snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 967 real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 968 969 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 970 error = dmu_snapshot_realname(zfsvfs->z_os, name, real, 971 ZFS_MAX_DATASET_NAME_LEN, NULL); 972 if (error == 0) { 973 name = real; 974 } else if (error != ENOTSUP) { 975 goto out; 976 } 977 } 978 979 error = zfsctl_snapshot_name(ITOZSB(dip), name, 980 ZFS_MAX_DATASET_NAME_LEN, snapname); 981 if (error == 0) 982 error = zfs_secpolicy_destroy_perms(snapname, cr); 983 if (error != 0) 984 goto out; 985 986 error = zfsctl_snapshot_unmount(snapname, MNT_FORCE); 987 if ((error == 0) || (error == ENOENT)) 988 error = dsl_destroy_snapshot(snapname, B_FALSE); 989 out: 990 kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN); 991 kmem_free(real, ZFS_MAX_DATASET_NAME_LEN); 992 993 zfs_exit(zfsvfs, FTAG); 994 995 return (error); 996 } 997 998 /* 999 * Creating a directory under '.zfs/snapshot' will automatically trigger 1000 * the creation of a new snapshot with the given name. 1001 */ 1002 int 1003 zfsctl_snapdir_mkdir(struct inode *dip, const char *dirname, vattr_t *vap, 1004 struct inode **ipp, cred_t *cr, int flags) 1005 { 1006 zfsvfs_t *zfsvfs = ITOZSB(dip); 1007 char *dsname; 1008 int error; 1009 1010 if (!zfs_admin_snapshot) 1011 return (SET_ERROR(EACCES)); 1012 1013 dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 1014 1015 if (zfs_component_namecheck(dirname, NULL, NULL) != 0) { 1016 error = SET_ERROR(EILSEQ); 1017 goto out; 1018 } 1019 1020 dmu_objset_name(zfsvfs->z_os, dsname); 1021 1022 error = zfs_secpolicy_snapshot_perms(dsname, cr); 1023 if (error != 0) 1024 goto out; 1025 1026 if (error == 0) { 1027 error = dmu_objset_snapshot_one(dsname, dirname); 1028 if (error != 0) 1029 goto out; 1030 1031 error = zfsctl_snapdir_lookup(dip, dirname, ipp, 1032 0, cr, NULL, NULL); 1033 } 1034 out: 1035 kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN); 1036 1037 return (error); 1038 } 1039 1040 /* 1041 * Flush everything out of the kernel's export table and such. 1042 * This is needed as once the snapshot is used over NFS, its 1043 * entries in svc_export and svc_expkey caches hold reference 1044 * to the snapshot mount point. There is no known way of flushing 1045 * only the entries related to the snapshot. 1046 */ 1047 static void 1048 exportfs_flush(void) 1049 { 1050 char *argv[] = { "/usr/sbin/exportfs", "-f", NULL }; 1051 char *envp[] = { NULL }; 1052 1053 (void) call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); 1054 } 1055 1056 /* 1057 * Returns the path in char format for given struct path. Uses 1058 * d_path exported by kernel to convert struct path to char 1059 * format. Returns the correct path for mountpoints and chroot 1060 * environments. 1061 * 1062 * If chroot environment has directories that are mounted with 1063 * --bind or --rbind flag, d_path returns the complete path inside 1064 * chroot environment but does not return the absolute path, i.e. 1065 * the path to chroot environment is missing. 1066 */ 1067 static int 1068 get_root_path(struct path *path, char *buff, int len) 1069 { 1070 char *path_buffer, *path_ptr; 1071 int error = 0; 1072 1073 path_get(path); 1074 path_buffer = kmem_zalloc(len, KM_SLEEP); 1075 path_ptr = d_path(path, path_buffer, len); 1076 if (IS_ERR(path_ptr)) 1077 error = SET_ERROR(-PTR_ERR(path_ptr)); 1078 else 1079 strcpy(buff, path_ptr); 1080 1081 kmem_free(path_buffer, len); 1082 path_put(path); 1083 return (error); 1084 } 1085 1086 /* 1087 * Returns if the current process root is chrooted or not. Linux 1088 * kernel exposes the task_struct for current process and init. 1089 * Since init process root points to actual root filesystem when 1090 * Linux runtime is reached, we can compare the current process 1091 * root with init process root to determine if root of the current 1092 * process is different from init, which can reliably determine if 1093 * current process is in chroot context or not. 1094 */ 1095 static int 1096 is_current_chrooted(void) 1097 { 1098 struct task_struct *curr = current, *global = &init_task; 1099 struct path cr_root, gl_root; 1100 1101 task_lock(curr); 1102 get_fs_root(curr->fs, &cr_root); 1103 task_unlock(curr); 1104 1105 task_lock(global); 1106 get_fs_root(global->fs, &gl_root); 1107 task_unlock(global); 1108 1109 int chrooted = !path_equal(&cr_root, &gl_root); 1110 path_put(&gl_root); 1111 path_put(&cr_root); 1112 1113 return (chrooted); 1114 } 1115 1116 /* 1117 * Attempt to unmount a snapshot by making a call to user space. 1118 * There is no assurance that this can or will succeed, is just a 1119 * best effort. In the case where it does fail, perhaps because 1120 * it's in use, the unmount will fail harmlessly. 1121 */ 1122 int 1123 zfsctl_snapshot_unmount(const char *snapname, int flags) 1124 { 1125 char *argv[] = { "/usr/bin/env", "umount", "-t", "zfs", "-n", NULL, 1126 NULL }; 1127 char *envp[] = { NULL }; 1128 zfs_snapentry_t *se; 1129 int error; 1130 1131 rw_enter(&zfs_snapshot_lock, RW_READER); 1132 if ((se = zfsctl_snapshot_find_by_name(snapname)) == NULL) { 1133 rw_exit(&zfs_snapshot_lock); 1134 return (SET_ERROR(ENOENT)); 1135 } 1136 rw_exit(&zfs_snapshot_lock); 1137 1138 exportfs_flush(); 1139 1140 if (flags & MNT_FORCE) 1141 argv[4] = "-fn"; 1142 argv[5] = se->se_path; 1143 dprintf("unmount; path=%s\n", se->se_path); 1144 error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); 1145 zfsctl_snapshot_rele(se); 1146 1147 1148 /* 1149 * The umount system utility will return 256 on error. We must 1150 * assume this error is because the file system is busy so it is 1151 * converted to the more sensible EBUSY. 1152 */ 1153 if (error) 1154 error = SET_ERROR(EBUSY); 1155 1156 return (error); 1157 } 1158 1159 int 1160 zfsctl_snapshot_mount(struct path *path, int flags) 1161 { 1162 struct dentry *dentry = path->dentry; 1163 struct inode *ip = dentry->d_inode; 1164 zfsvfs_t *zfsvfs; 1165 zfsvfs_t *snap_zfsvfs; 1166 zfs_snapentry_t *se; 1167 char *full_name, *full_path, *options; 1168 char *argv[] = { "/usr/bin/env", "mount", "-i", "-t", "zfs", "-n", 1169 "-o", NULL, NULL, NULL, NULL }; 1170 char *envp[] = { NULL }; 1171 int error; 1172 struct path spath; 1173 1174 if (ip == NULL) 1175 return (SET_ERROR(EISDIR)); 1176 1177 zfsvfs = ITOZSB(ip); 1178 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1179 return (error); 1180 1181 full_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 1182 full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1183 options = kmem_zalloc(7, KM_SLEEP); 1184 1185 error = zfsctl_snapshot_name(zfsvfs, dname(dentry), 1186 ZFS_MAX_DATASET_NAME_LEN, full_name); 1187 if (error) 1188 goto error; 1189 1190 if (is_current_chrooted() == 0) { 1191 /* 1192 * Current process is not in chroot context 1193 */ 1194 1195 char *m = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1196 struct path mnt_path; 1197 mnt_path.mnt = path->mnt; 1198 mnt_path.dentry = path->mnt->mnt_root; 1199 1200 /* 1201 * Get path to current mountpoint 1202 */ 1203 error = get_root_path(&mnt_path, m, MAXPATHLEN); 1204 if (error != 0) { 1205 kmem_free(m, MAXPATHLEN); 1206 goto error; 1207 } 1208 mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock); 1209 if (zfsvfs->z_vfs->vfs_mntpoint != NULL) { 1210 /* 1211 * If current mnountpoint and vfs_mntpoint are not same, 1212 * store current mountpoint in vfs_mntpoint. 1213 */ 1214 if (strcmp(zfsvfs->z_vfs->vfs_mntpoint, m) != 0) { 1215 kmem_strfree(zfsvfs->z_vfs->vfs_mntpoint); 1216 zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m); 1217 } 1218 } else 1219 zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m); 1220 mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock); 1221 kmem_free(m, MAXPATHLEN); 1222 } 1223 1224 /* 1225 * Construct a mount point path from sb of the ctldir inode and dirent 1226 * name, instead of from d_path(), so that chroot'd process doesn't fail 1227 * on mount.zfs(8). 1228 */ 1229 mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock); 1230 snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s", 1231 zfsvfs->z_vfs->vfs_mntpoint ? zfsvfs->z_vfs->vfs_mntpoint : "", 1232 dname(dentry)); 1233 mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock); 1234 1235 snprintf(options, 7, "%s", 1236 zfs_snapshot_no_setuid ? "nosuid" : "suid"); 1237 1238 /* 1239 * Multiple concurrent automounts of a snapshot are never allowed. 1240 * The snapshot may be manually mounted as many times as desired. 1241 */ 1242 if (zfsctl_snapshot_ismounted(full_name)) { 1243 error = 0; 1244 goto error; 1245 } 1246 1247 /* 1248 * Attempt to mount the snapshot from user space. Normally this 1249 * would be done using the vfs_kern_mount() function, however that 1250 * function is marked GPL-only and cannot be used. On error we 1251 * careful to log the real error to the console and return EISDIR 1252 * to safely abort the automount. This should be very rare. 1253 * 1254 * If the user mode helper happens to return EBUSY, a concurrent 1255 * mount is already in progress in which case the error is ignored. 1256 * Take note that if the program was executed successfully the return 1257 * value from call_usermodehelper() will be (exitcode << 8 + signal). 1258 */ 1259 dprintf("mount; name=%s path=%s\n", full_name, full_path); 1260 argv[7] = options; 1261 argv[8] = full_name; 1262 argv[9] = full_path; 1263 error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); 1264 if (error) { 1265 if (!(error & MOUNT_BUSY << 8)) { 1266 zfs_dbgmsg("Unable to automount %s error=%d", 1267 full_path, error); 1268 error = SET_ERROR(EISDIR); 1269 } else { 1270 /* 1271 * EBUSY, this could mean a concurrent mount, or the 1272 * snapshot has already been mounted at completely 1273 * different place. We return 0 so VFS will retry. For 1274 * the latter case the VFS will retry several times 1275 * and return ELOOP, which is probably not a very good 1276 * behavior. 1277 */ 1278 error = 0; 1279 } 1280 goto error; 1281 } 1282 1283 /* 1284 * Follow down in to the mounted snapshot and set MNT_SHRINKABLE 1285 * to identify this as an automounted filesystem. 1286 */ 1287 spath = *path; 1288 path_get(&spath); 1289 if (follow_down_one(&spath)) { 1290 snap_zfsvfs = ITOZSB(spath.dentry->d_inode); 1291 snap_zfsvfs->z_parent = zfsvfs; 1292 dentry = spath.dentry; 1293 spath.mnt->mnt_flags |= MNT_SHRINKABLE; 1294 1295 rw_enter(&zfs_snapshot_lock, RW_WRITER); 1296 se = zfsctl_snapshot_alloc(full_name, full_path, 1297 snap_zfsvfs->z_os->os_spa, dmu_objset_id(snap_zfsvfs->z_os), 1298 dentry); 1299 zfsctl_snapshot_add(se); 1300 zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot); 1301 rw_exit(&zfs_snapshot_lock); 1302 } 1303 path_put(&spath); 1304 error: 1305 kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN); 1306 kmem_free(full_path, MAXPATHLEN); 1307 1308 zfs_exit(zfsvfs, FTAG); 1309 1310 return (error); 1311 } 1312 1313 /* 1314 * Get the snapdir inode from fid 1315 */ 1316 int 1317 zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid, int gen, 1318 struct inode **ipp) 1319 { 1320 int error; 1321 struct path path; 1322 char *mnt; 1323 struct dentry *dentry; 1324 1325 mnt = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1326 1327 error = zfsctl_snapshot_path_objset(sb->s_fs_info, objsetid, 1328 MAXPATHLEN, mnt); 1329 if (error) 1330 goto out; 1331 1332 /* Trigger automount */ 1333 error = -kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path); 1334 if (error) 1335 goto out; 1336 1337 path_put(&path); 1338 /* 1339 * Get the snapdir inode. Note, we don't want to use the above 1340 * path because it contains the root of the snapshot rather 1341 * than the snapdir. 1342 */ 1343 *ipp = ilookup(sb, ZFSCTL_INO_SNAPDIRS - objsetid); 1344 if (*ipp == NULL) { 1345 error = SET_ERROR(ENOENT); 1346 goto out; 1347 } 1348 1349 /* check gen, see zfsctl_snapdir_fid */ 1350 dentry = d_obtain_alias(igrab(*ipp)); 1351 if (gen != (!IS_ERR(dentry) && d_mountpoint(dentry))) { 1352 iput(*ipp); 1353 *ipp = NULL; 1354 error = SET_ERROR(ENOENT); 1355 } 1356 if (!IS_ERR(dentry)) 1357 dput(dentry); 1358 out: 1359 kmem_free(mnt, MAXPATHLEN); 1360 return (error); 1361 } 1362 1363 int 1364 zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp, 1365 int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) 1366 { 1367 zfsvfs_t *zfsvfs = ITOZSB(dip); 1368 znode_t *zp; 1369 znode_t *dzp; 1370 int error; 1371 1372 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1373 return (error); 1374 1375 if (zfsvfs->z_shares_dir == 0) { 1376 zfs_exit(zfsvfs, FTAG); 1377 return (SET_ERROR(ENOTSUP)); 1378 } 1379 1380 if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) { 1381 error = zfs_lookup(dzp, name, &zp, 0, cr, NULL, NULL); 1382 zrele(dzp); 1383 } 1384 1385 zfs_exit(zfsvfs, FTAG); 1386 1387 return (error); 1388 } 1389 1390 /* 1391 * Initialize the various pieces we'll need to create and manipulate .zfs 1392 * directories. Currently this is unused but available. 1393 */ 1394 void 1395 zfsctl_init(void) 1396 { 1397 avl_create(&zfs_snapshots_by_name, snapentry_compare_by_name, 1398 sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, 1399 se_node_name)); 1400 avl_create(&zfs_snapshots_by_objsetid, snapentry_compare_by_objsetid, 1401 sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, 1402 se_node_objsetid)); 1403 rw_init(&zfs_snapshot_lock, NULL, RW_DEFAULT, NULL); 1404 } 1405 1406 /* 1407 * Cleanup the various pieces we needed for .zfs directories. In particular 1408 * ensure the expiry timer is canceled safely. 1409 */ 1410 void 1411 zfsctl_fini(void) 1412 { 1413 avl_destroy(&zfs_snapshots_by_name); 1414 avl_destroy(&zfs_snapshots_by_objsetid); 1415 rw_destroy(&zfs_snapshot_lock); 1416 } 1417 1418 module_param(zfs_admin_snapshot, int, 0644); 1419 MODULE_PARM_DESC(zfs_admin_snapshot, "Enable mkdir/rmdir/mv in .zfs/snapshot"); 1420 1421 module_param(zfs_expire_snapshot, int, 0644); 1422 MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot"); 1423 1424 module_param(zfs_snapshot_no_setuid, int, 0644); 1425 MODULE_PARM_DESC(zfs_snapshot_no_setuid, 1426 "Disable setuid/setgid for automounts in .zfs/snapshot"); 1427