1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2018 by Delphix. All rights reserved. 24 * Copyright (c) 2014 Integros [integros.com] 25 */ 26 27 /* Portions Copyright 2007 Jeremy Teo */ 28 29 #ifdef _KERNEL 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/time.h> 33 #include <sys/systm.h> 34 #include <sys/sysmacros.h> 35 #include <sys/resource.h> 36 #include <sys/mntent.h> 37 #include <sys/mkdev.h> 38 #include <sys/u8_textprep.h> 39 #include <sys/dsl_dataset.h> 40 #include <sys/vfs.h> 41 #include <sys/vfs_opreg.h> 42 #include <sys/vnode.h> 43 #include <sys/file.h> 44 #include <sys/kmem.h> 45 #include <sys/errno.h> 46 #include <sys/unistd.h> 47 #include <sys/mode.h> 48 #include <sys/atomic.h> 49 #include <vm/pvn.h> 50 #include "fs/fs_subr.h" 51 #include <sys/zfs_dir.h> 52 #include <sys/zfs_acl.h> 53 #include <sys/zfs_ioctl.h> 54 #include <sys/zfs_rlock.h> 55 #include <sys/zfs_fuid.h> 56 #include <sys/dnode.h> 57 #include <sys/fs/zfs.h> 58 #include <sys/kidmap.h> 59 #endif /* _KERNEL */ 60 61 #include <sys/dmu.h> 62 #include <sys/dmu_objset.h> 63 #include <sys/dmu_tx.h> 64 #include <sys/refcount.h> 65 #include <sys/stat.h> 66 #include <sys/zap.h> 67 #include <sys/zfs_znode.h> 68 #include <sys/sa.h> 69 #include <sys/zfs_sa.h> 70 #include <sys/zfs_stat.h> 71 72 #include "zfs_prop.h" 73 #include "zfs_comutil.h" 74 75 /* 76 * Define ZNODE_STATS to turn on statistic gathering. By default, it is only 77 * turned on when DEBUG is also defined. 78 */ 79 #ifdef DEBUG 80 #define ZNODE_STATS 81 #endif /* DEBUG */ 82 83 #ifdef ZNODE_STATS 84 #define ZNODE_STAT_ADD(stat) ((stat)++) 85 #else 86 #define ZNODE_STAT_ADD(stat) /* nothing */ 87 #endif /* ZNODE_STATS */ 88 89 /* 90 * Functions needed for userland (ie: libzpool) are not put under 91 * #ifdef_KERNEL; the rest of the functions have dependencies 92 * (such as VFS logic) that will not compile easily in userland. 93 */ 94 #ifdef _KERNEL 95 /* 96 * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to 97 * be freed before it can be safely accessed. 98 */ 99 krwlock_t zfsvfs_lock; 100 101 static kmem_cache_t *znode_cache = NULL; 102 103 /*ARGSUSED*/ 104 static void 105 znode_evict_error(dmu_buf_t *dbuf, void *user_ptr) 106 { 107 /* 108 * We should never drop all dbuf refs without first clearing 109 * the eviction callback. 110 */ 111 panic("evicting znode %p\n", user_ptr); 112 } 113 114 /* 115 * This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on 116 * z_rangelock. It will modify the offset and length of the lock to reflect 117 * znode-specific information, and convert RL_APPEND to RL_WRITER. This is 118 * called with the rangelock_t's rl_lock held, which avoids races. 119 */ 120 static void 121 zfs_rangelock_cb(locked_range_t *new, void *arg) 122 { 123 znode_t *zp = arg; 124 125 /* 126 * If in append mode, convert to writer and lock starting at the 127 * current end of file. 128 */ 129 if (new->lr_type == RL_APPEND) { 130 new->lr_offset = zp->z_size; 131 new->lr_type = RL_WRITER; 132 } 133 134 /* 135 * If we need to grow the block size then lock the whole file range. 136 */ 137 uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length); 138 if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) || 139 zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) { 140 new->lr_offset = 0; 141 new->lr_length = UINT64_MAX; 142 } 143 } 144 145 /*ARGSUSED*/ 146 static int 147 zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) 148 { 149 znode_t *zp = buf; 150 151 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); 152 153 zp->z_vnode = vn_alloc(kmflags); 154 if (zp->z_vnode == NULL) { 155 return (-1); 156 } 157 ZTOV(zp)->v_data = zp; 158 159 list_link_init(&zp->z_link_node); 160 161 mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); 162 rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL); 163 rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL); 164 mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); 165 166 rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp); 167 168 zp->z_dirlocks = NULL; 169 zp->z_acl_cached = NULL; 170 zp->z_moved = 0; 171 return (0); 172 } 173 174 /*ARGSUSED*/ 175 static void 176 zfs_znode_cache_destructor(void *buf, void *arg) 177 { 178 znode_t *zp = buf; 179 180 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); 181 ASSERT(ZTOV(zp)->v_data == zp); 182 vn_free(ZTOV(zp)); 183 ASSERT(!list_link_active(&zp->z_link_node)); 184 mutex_destroy(&zp->z_lock); 185 rw_destroy(&zp->z_parent_lock); 186 rw_destroy(&zp->z_name_lock); 187 mutex_destroy(&zp->z_acl_lock); 188 rangelock_fini(&zp->z_rangelock); 189 190 ASSERT(zp->z_dirlocks == NULL); 191 ASSERT(zp->z_acl_cached == NULL); 192 } 193 194 #ifdef ZNODE_STATS 195 static struct { 196 uint64_t zms_zfsvfs_invalid; 197 uint64_t zms_zfsvfs_recheck1; 198 uint64_t zms_zfsvfs_unmounted; 199 uint64_t zms_zfsvfs_recheck2; 200 uint64_t zms_obj_held; 201 uint64_t zms_vnode_locked; 202 uint64_t zms_not_only_dnlc; 203 } znode_move_stats; 204 #endif /* ZNODE_STATS */ 205 206 static void 207 zfs_znode_move_impl(znode_t *ozp, znode_t *nzp) 208 { 209 vnode_t *vp; 210 211 /* Copy fields. */ 212 nzp->z_zfsvfs = ozp->z_zfsvfs; 213 214 /* Swap vnodes. */ 215 vp = nzp->z_vnode; 216 nzp->z_vnode = ozp->z_vnode; 217 ozp->z_vnode = vp; /* let destructor free the overwritten vnode */ 218 ZTOV(ozp)->v_data = ozp; 219 ZTOV(nzp)->v_data = nzp; 220 221 nzp->z_id = ozp->z_id; 222 ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */ 223 nzp->z_unlinked = ozp->z_unlinked; 224 nzp->z_atime_dirty = ozp->z_atime_dirty; 225 nzp->z_zn_prefetch = ozp->z_zn_prefetch; 226 nzp->z_blksz = ozp->z_blksz; 227 nzp->z_seq = ozp->z_seq; 228 nzp->z_mapcnt = ozp->z_mapcnt; 229 nzp->z_gen = ozp->z_gen; 230 nzp->z_sync_cnt = ozp->z_sync_cnt; 231 nzp->z_is_sa = ozp->z_is_sa; 232 nzp->z_sa_hdl = ozp->z_sa_hdl; 233 bcopy(ozp->z_atime, nzp->z_atime, sizeof (uint64_t) * 2); 234 nzp->z_links = ozp->z_links; 235 nzp->z_size = ozp->z_size; 236 nzp->z_pflags = ozp->z_pflags; 237 nzp->z_uid = ozp->z_uid; 238 nzp->z_gid = ozp->z_gid; 239 nzp->z_mode = ozp->z_mode; 240 241 /* 242 * Since this is just an idle znode and kmem is already dealing with 243 * memory pressure, release any cached ACL. 244 */ 245 if (ozp->z_acl_cached) { 246 zfs_acl_free(ozp->z_acl_cached); 247 ozp->z_acl_cached = NULL; 248 } 249 250 sa_set_userp(nzp->z_sa_hdl, nzp); 251 252 /* 253 * Invalidate the original znode by clearing fields that provide a 254 * pointer back to the znode. Set the low bit of the vfs pointer to 255 * ensure that zfs_znode_move() recognizes the znode as invalid in any 256 * subsequent callback. 257 */ 258 ozp->z_sa_hdl = NULL; 259 POINTER_INVALIDATE(&ozp->z_zfsvfs); 260 261 /* 262 * Mark the znode. 263 */ 264 nzp->z_moved = 1; 265 ozp->z_moved = (uint8_t)-1; 266 } 267 268 /*ARGSUSED*/ 269 static kmem_cbrc_t 270 zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg) 271 { 272 znode_t *ozp = buf, *nzp = newbuf; 273 zfsvfs_t *zfsvfs; 274 vnode_t *vp; 275 276 /* 277 * The znode is on the file system's list of known znodes if the vfs 278 * pointer is valid. We set the low bit of the vfs pointer when freeing 279 * the znode to invalidate it, and the memory patterns written by kmem 280 * (baddcafe and deadbeef) set at least one of the two low bits. A newly 281 * created znode sets the vfs pointer last of all to indicate that the 282 * znode is known and in a valid state to be moved by this function. 283 */ 284 zfsvfs = ozp->z_zfsvfs; 285 if (!POINTER_IS_VALID(zfsvfs)) { 286 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid); 287 return (KMEM_CBRC_DONT_KNOW); 288 } 289 290 /* 291 * Close a small window in which it's possible that the filesystem could 292 * be unmounted and freed, and zfsvfs, though valid in the previous 293 * statement, could point to unrelated memory by the time we try to 294 * prevent the filesystem from being unmounted. 295 */ 296 rw_enter(&zfsvfs_lock, RW_WRITER); 297 if (zfsvfs != ozp->z_zfsvfs) { 298 rw_exit(&zfsvfs_lock); 299 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1); 300 return (KMEM_CBRC_DONT_KNOW); 301 } 302 303 /* 304 * If the znode is still valid, then so is the file system. We know that 305 * no valid file system can be freed while we hold zfsvfs_lock, so we 306 * can safely ensure that the filesystem is not and will not be 307 * unmounted. The next statement is equivalent to ZFS_ENTER(). 308 */ 309 rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); 310 if (zfsvfs->z_unmounted) { 311 ZFS_EXIT(zfsvfs); 312 rw_exit(&zfsvfs_lock); 313 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted); 314 return (KMEM_CBRC_DONT_KNOW); 315 } 316 rw_exit(&zfsvfs_lock); 317 318 mutex_enter(&zfsvfs->z_znodes_lock); 319 /* 320 * Recheck the vfs pointer in case the znode was removed just before 321 * acquiring the lock. 322 */ 323 if (zfsvfs != ozp->z_zfsvfs) { 324 mutex_exit(&zfsvfs->z_znodes_lock); 325 ZFS_EXIT(zfsvfs); 326 ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2); 327 return (KMEM_CBRC_DONT_KNOW); 328 } 329 330 /* 331 * At this point we know that as long as we hold z_znodes_lock, the 332 * znode cannot be freed and fields within the znode can be safely 333 * accessed. Now, prevent a race with zfs_zget(). 334 */ 335 if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) { 336 mutex_exit(&zfsvfs->z_znodes_lock); 337 ZFS_EXIT(zfsvfs); 338 ZNODE_STAT_ADD(znode_move_stats.zms_obj_held); 339 return (KMEM_CBRC_LATER); 340 } 341 342 vp = ZTOV(ozp); 343 if (mutex_tryenter(&vp->v_lock) == 0) { 344 ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); 345 mutex_exit(&zfsvfs->z_znodes_lock); 346 ZFS_EXIT(zfsvfs); 347 ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked); 348 return (KMEM_CBRC_LATER); 349 } 350 351 /* Only move znodes that are referenced _only_ by the DNLC. */ 352 if (vp->v_count != 1 || !vn_in_dnlc(vp)) { 353 mutex_exit(&vp->v_lock); 354 ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); 355 mutex_exit(&zfsvfs->z_znodes_lock); 356 ZFS_EXIT(zfsvfs); 357 ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc); 358 return (KMEM_CBRC_LATER); 359 } 360 361 /* 362 * The znode is known and in a valid state to move. We're holding the 363 * locks needed to execute the critical section. 364 */ 365 zfs_znode_move_impl(ozp, nzp); 366 mutex_exit(&vp->v_lock); 367 ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); 368 369 list_link_replace(&ozp->z_link_node, &nzp->z_link_node); 370 mutex_exit(&zfsvfs->z_znodes_lock); 371 ZFS_EXIT(zfsvfs); 372 373 return (KMEM_CBRC_YES); 374 } 375 376 void 377 zfs_znode_init(void) 378 { 379 /* 380 * Initialize zcache 381 */ 382 rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL); 383 ASSERT(znode_cache == NULL); 384 znode_cache = kmem_cache_create("zfs_znode_cache", 385 sizeof (znode_t), 0, zfs_znode_cache_constructor, 386 zfs_znode_cache_destructor, NULL, NULL, NULL, 0); 387 kmem_cache_set_move(znode_cache, zfs_znode_move); 388 } 389 390 void 391 zfs_znode_fini(void) 392 { 393 /* 394 * Cleanup vfs & vnode ops 395 */ 396 zfs_remove_op_tables(); 397 398 /* 399 * Cleanup zcache 400 */ 401 if (znode_cache) 402 kmem_cache_destroy(znode_cache); 403 znode_cache = NULL; 404 rw_destroy(&zfsvfs_lock); 405 } 406 407 struct vnodeops *zfs_dvnodeops; 408 struct vnodeops *zfs_fvnodeops; 409 struct vnodeops *zfs_symvnodeops; 410 struct vnodeops *zfs_xdvnodeops; 411 struct vnodeops *zfs_evnodeops; 412 struct vnodeops *zfs_sharevnodeops; 413 414 void 415 zfs_remove_op_tables() 416 { 417 /* 418 * Remove vfs ops 419 */ 420 ASSERT(zfsfstype); 421 (void) vfs_freevfsops_by_type(zfsfstype); 422 zfsfstype = 0; 423 424 /* 425 * Remove vnode ops 426 */ 427 if (zfs_dvnodeops) 428 vn_freevnodeops(zfs_dvnodeops); 429 if (zfs_fvnodeops) 430 vn_freevnodeops(zfs_fvnodeops); 431 if (zfs_symvnodeops) 432 vn_freevnodeops(zfs_symvnodeops); 433 if (zfs_xdvnodeops) 434 vn_freevnodeops(zfs_xdvnodeops); 435 if (zfs_evnodeops) 436 vn_freevnodeops(zfs_evnodeops); 437 if (zfs_sharevnodeops) 438 vn_freevnodeops(zfs_sharevnodeops); 439 440 zfs_dvnodeops = NULL; 441 zfs_fvnodeops = NULL; 442 zfs_symvnodeops = NULL; 443 zfs_xdvnodeops = NULL; 444 zfs_evnodeops = NULL; 445 zfs_sharevnodeops = NULL; 446 } 447 448 extern const fs_operation_def_t zfs_dvnodeops_template[]; 449 extern const fs_operation_def_t zfs_fvnodeops_template[]; 450 extern const fs_operation_def_t zfs_xdvnodeops_template[]; 451 extern const fs_operation_def_t zfs_symvnodeops_template[]; 452 extern const fs_operation_def_t zfs_evnodeops_template[]; 453 extern const fs_operation_def_t zfs_sharevnodeops_template[]; 454 455 int 456 zfs_create_op_tables() 457 { 458 int error; 459 460 /* 461 * zfs_dvnodeops can be set if mod_remove() calls mod_installfs() 462 * due to a failure to remove the the 2nd modlinkage (zfs_modldrv). 463 * In this case we just return as the ops vectors are already set up. 464 */ 465 if (zfs_dvnodeops) 466 return (0); 467 468 error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template, 469 &zfs_dvnodeops); 470 if (error) 471 return (error); 472 473 error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template, 474 &zfs_fvnodeops); 475 if (error) 476 return (error); 477 478 error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template, 479 &zfs_symvnodeops); 480 if (error) 481 return (error); 482 483 error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template, 484 &zfs_xdvnodeops); 485 if (error) 486 return (error); 487 488 error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template, 489 &zfs_evnodeops); 490 if (error) 491 return (error); 492 493 error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template, 494 &zfs_sharevnodeops); 495 496 return (error); 497 } 498 499 int 500 zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) 501 { 502 zfs_acl_ids_t acl_ids; 503 vattr_t vattr; 504 znode_t *sharezp; 505 vnode_t *vp; 506 znode_t *zp; 507 int error; 508 509 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; 510 vattr.va_type = VDIR; 511 vattr.va_mode = S_IFDIR|0555; 512 vattr.va_uid = crgetuid(kcred); 513 vattr.va_gid = crgetgid(kcred); 514 515 sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP); 516 ASSERT(!POINTER_IS_VALID(sharezp->z_zfsvfs)); 517 sharezp->z_moved = 0; 518 sharezp->z_unlinked = 0; 519 sharezp->z_atime_dirty = 0; 520 sharezp->z_zfsvfs = zfsvfs; 521 sharezp->z_is_sa = zfsvfs->z_use_sa; 522 sharezp->z_pflags = 0; 523 524 vp = ZTOV(sharezp); 525 vn_reinit(vp); 526 vp->v_type = VDIR; 527 528 VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr, 529 kcred, NULL, &acl_ids)); 530 zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids); 531 ASSERT3P(zp, ==, sharezp); 532 ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */ 533 POINTER_INVALIDATE(&sharezp->z_zfsvfs); 534 error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, 535 ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx); 536 zfsvfs->z_shares_dir = sharezp->z_id; 537 538 zfs_acl_ids_free(&acl_ids); 539 ZTOV(sharezp)->v_count = 0; 540 sa_handle_destroy(sharezp->z_sa_hdl); 541 kmem_cache_free(znode_cache, sharezp); 542 543 return (error); 544 } 545 546 /* 547 * define a couple of values we need available 548 * for both 64 and 32 bit environments. 549 */ 550 #ifndef NBITSMINOR64 551 #define NBITSMINOR64 32 552 #endif 553 #ifndef MAXMAJ64 554 #define MAXMAJ64 0xffffffffUL 555 #endif 556 #ifndef MAXMIN64 557 #define MAXMIN64 0xffffffffUL 558 #endif 559 560 /* 561 * Create special expldev for ZFS private use. 562 * Can't use standard expldev since it doesn't do 563 * what we want. The standard expldev() takes a 564 * dev32_t in LP64 and expands it to a long dev_t. 565 * We need an interface that takes a dev32_t in ILP32 566 * and expands it to a long dev_t. 567 */ 568 static uint64_t 569 zfs_expldev(dev_t dev) 570 { 571 #ifndef _LP64 572 major_t major = (major_t)dev >> NBITSMINOR32 & MAXMAJ32; 573 return (((uint64_t)major << NBITSMINOR64) | 574 ((minor_t)dev & MAXMIN32)); 575 #else 576 return (dev); 577 #endif 578 } 579 580 /* 581 * Special cmpldev for ZFS private use. 582 * Can't use standard cmpldev since it takes 583 * a long dev_t and compresses it to dev32_t in 584 * LP64. We need to do a compaction of a long dev_t 585 * to a dev32_t in ILP32. 586 */ 587 dev_t 588 zfs_cmpldev(uint64_t dev) 589 { 590 #ifndef _LP64 591 minor_t minor = (minor_t)dev & MAXMIN64; 592 major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64; 593 594 if (major > MAXMAJ32 || minor > MAXMIN32) 595 return (NODEV32); 596 597 return (((dev32_t)major << NBITSMINOR32) | minor); 598 #else 599 return (dev); 600 #endif 601 } 602 603 static void 604 zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, 605 dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl) 606 { 607 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs)); 608 ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id))); 609 610 mutex_enter(&zp->z_lock); 611 612 ASSERT(zp->z_sa_hdl == NULL); 613 ASSERT(zp->z_acl_cached == NULL); 614 if (sa_hdl == NULL) { 615 VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp, 616 SA_HDL_SHARED, &zp->z_sa_hdl)); 617 } else { 618 zp->z_sa_hdl = sa_hdl; 619 sa_set_userp(sa_hdl, zp); 620 } 621 622 zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE; 623 624 /* 625 * Slap on VROOT if we are the root znode 626 */ 627 if (zp->z_id == zfsvfs->z_root) 628 ZTOV(zp)->v_flag |= VROOT; 629 630 mutex_exit(&zp->z_lock); 631 vn_exists(ZTOV(zp)); 632 } 633 634 void 635 zfs_znode_dmu_fini(znode_t *zp) 636 { 637 ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) || 638 zp->z_unlinked || 639 RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock)); 640 641 sa_handle_destroy(zp->z_sa_hdl); 642 zp->z_sa_hdl = NULL; 643 } 644 645 /* 646 * Construct a new znode/vnode and intialize. 647 * 648 * This does not do a call to dmu_set_user() that is 649 * up to the caller to do, in case you don't want to 650 * return the znode 651 */ 652 static znode_t * 653 zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, 654 dmu_object_type_t obj_type, sa_handle_t *hdl) 655 { 656 znode_t *zp; 657 vnode_t *vp; 658 uint64_t mode; 659 uint64_t parent; 660 uint64_t projid = ZFS_DEFAULT_PROJID; 661 sa_bulk_attr_t bulk[11]; 662 int count = 0; 663 664 zp = kmem_cache_alloc(znode_cache, KM_SLEEP); 665 666 ASSERT(zp->z_dirlocks == NULL); 667 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); 668 zp->z_moved = 0; 669 670 /* 671 * Defer setting z_zfsvfs until the znode is ready to be a candidate for 672 * the zfs_znode_move() callback. 673 */ 674 zp->z_sa_hdl = NULL; 675 zp->z_unlinked = 0; 676 zp->z_atime_dirty = 0; 677 zp->z_mapcnt = 0; 678 zp->z_id = db->db_object; 679 zp->z_blksz = blksz; 680 zp->z_seq = 0x7A4653; 681 zp->z_sync_cnt = 0; 682 683 vp = ZTOV(zp); 684 vn_reinit(vp); 685 686 zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl); 687 688 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); 689 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8); 690 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 691 &zp->z_size, 8); 692 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 693 &zp->z_links, 8); 694 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 695 &zp->z_pflags, 8); 696 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); 697 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 698 &zp->z_atime, 16); 699 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 700 &zp->z_uid, 8); 701 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, 702 &zp->z_gid, 8); 703 704 if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0 || 705 (dmu_objset_projectquota_enabled(zfsvfs->z_os) && 706 (zp->z_pflags & ZFS_PROJID) && 707 sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs), &projid, 8) != 0)) { 708 if (hdl == NULL) 709 sa_handle_destroy(zp->z_sa_hdl); 710 kmem_cache_free(znode_cache, zp); 711 return (NULL); 712 } 713 714 zp->z_projid = projid; 715 zp->z_mode = mode; 716 vp->v_vfsp = zfsvfs->z_parent->z_vfs; 717 718 vp->v_type = IFTOVT((mode_t)mode); 719 720 switch (vp->v_type) { 721 case VDIR: 722 if (zp->z_pflags & ZFS_XATTR) { 723 vn_setops(vp, zfs_xdvnodeops); 724 vp->v_flag |= V_XATTRDIR; 725 } else { 726 vn_setops(vp, zfs_dvnodeops); 727 } 728 zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ 729 break; 730 case VBLK: 731 case VCHR: 732 { 733 uint64_t rdev; 734 VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs), 735 &rdev, sizeof (rdev)) == 0); 736 737 vp->v_rdev = zfs_cmpldev(rdev); 738 } 739 /*FALLTHROUGH*/ 740 case VFIFO: 741 case VSOCK: 742 case VDOOR: 743 vn_setops(vp, zfs_fvnodeops); 744 break; 745 case VREG: 746 vp->v_flag |= VMODSORT; 747 if (parent == zfsvfs->z_shares_dir) { 748 ASSERT(zp->z_uid == 0 && zp->z_gid == 0); 749 vn_setops(vp, zfs_sharevnodeops); 750 } else { 751 vn_setops(vp, zfs_fvnodeops); 752 } 753 break; 754 case VLNK: 755 vn_setops(vp, zfs_symvnodeops); 756 break; 757 default: 758 vn_setops(vp, zfs_evnodeops); 759 break; 760 } 761 762 mutex_enter(&zfsvfs->z_znodes_lock); 763 list_insert_tail(&zfsvfs->z_all_znodes, zp); 764 membar_producer(); 765 /* 766 * Everything else must be valid before assigning z_zfsvfs makes the 767 * znode eligible for zfs_znode_move(). 768 */ 769 zp->z_zfsvfs = zfsvfs; 770 mutex_exit(&zfsvfs->z_znodes_lock); 771 772 VFS_HOLD(zfsvfs->z_vfs); 773 return (zp); 774 } 775 776 static uint64_t empty_xattr; 777 static uint64_t pad[4]; 778 static zfs_acl_phys_t acl_phys; 779 /* 780 * Create a new DMU object to hold a zfs znode. 781 * 782 * IN: dzp - parent directory for new znode 783 * vap - file attributes for new znode 784 * tx - dmu transaction id for zap operations 785 * cr - credentials of caller 786 * flag - flags: 787 * IS_ROOT_NODE - new object will be root 788 * IS_XATTR - new object is an attribute 789 * bonuslen - length of bonus buffer 790 * setaclp - File/Dir initial ACL 791 * fuidp - Tracks fuid allocation. 792 * 793 * OUT: zpp - allocated znode 794 * 795 */ 796 void 797 zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, 798 uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids) 799 { 800 uint64_t crtime[2], atime[2], mtime[2], ctime[2]; 801 uint64_t mode, size, links, parent, pflags; 802 uint64_t dzp_pflags = 0; 803 uint64_t projid = ZFS_DEFAULT_PROJID; 804 uint64_t rdev = 0; 805 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 806 dmu_buf_t *db; 807 timestruc_t now; 808 uint64_t gen, obj; 809 int bonuslen; 810 int dnodesize; 811 sa_handle_t *sa_hdl; 812 dmu_object_type_t obj_type; 813 sa_bulk_attr_t *sa_attrs; 814 int cnt = 0; 815 zfs_acl_locator_cb_t locate = { 0 }; 816 817 ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); 818 819 if (zfsvfs->z_replay) { 820 obj = vap->va_nodeid; 821 now = vap->va_ctime; /* see zfs_replay_create() */ 822 gen = vap->va_nblocks; /* ditto */ 823 dnodesize = vap->va_fsid; /* ditto */ 824 } else { 825 obj = 0; 826 gethrestime(&now); 827 gen = dmu_tx_get_txg(tx); 828 dnodesize = dmu_objset_dnodesize(zfsvfs->z_os); 829 } 830 831 if (dnodesize == 0) 832 dnodesize = DNODE_MIN_SIZE; 833 834 obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE; 835 bonuslen = (obj_type == DMU_OT_SA) ? 836 DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE; 837 838 /* 839 * Create a new DMU object. 840 */ 841 /* 842 * There's currently no mechanism for pre-reading the blocks that will 843 * be needed to allocate a new object, so we accept the small chance 844 * that there will be an i/o error and we will fail one of the 845 * assertions below. 846 */ 847 if (vap->va_type == VDIR) { 848 if (zfsvfs->z_replay) { 849 VERIFY0(zap_create_claim_norm_dnsize(zfsvfs->z_os, obj, 850 zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, 851 obj_type, bonuslen, dnodesize, tx)); 852 } else { 853 obj = zap_create_norm_dnsize(zfsvfs->z_os, 854 zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, 855 obj_type, bonuslen, dnodesize, tx); 856 } 857 } else { 858 if (zfsvfs->z_replay) { 859 VERIFY0(dmu_object_claim_dnsize(zfsvfs->z_os, obj, 860 DMU_OT_PLAIN_FILE_CONTENTS, 0, 861 obj_type, bonuslen, dnodesize, tx)); 862 } else { 863 obj = dmu_object_alloc_dnsize(zfsvfs->z_os, 864 DMU_OT_PLAIN_FILE_CONTENTS, 0, 865 obj_type, bonuslen, dnodesize, tx); 866 } 867 } 868 869 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); 870 VERIFY0(sa_buf_hold(zfsvfs->z_os, obj, NULL, &db)); 871 872 /* 873 * If this is the root, fix up the half-initialized parent pointer 874 * to reference the just-allocated physical data area. 875 */ 876 if (flag & IS_ROOT_NODE) { 877 dzp->z_id = obj; 878 } 879 880 /* 881 * If parent is an xattr, so am I. 882 */ 883 if (dzp->z_pflags & ZFS_XATTR) { 884 flag |= IS_XATTR; 885 } 886 887 if (zfsvfs->z_use_fuids) 888 pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; 889 else 890 pflags = 0; 891 892 if (vap->va_type == VDIR) { 893 size = 2; /* contents ("." and "..") */ 894 links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; 895 } else { 896 size = links = 0; 897 } 898 899 if (vap->va_type == VBLK || vap->va_type == VCHR) { 900 rdev = zfs_expldev(vap->va_rdev); 901 } 902 903 parent = dzp->z_id; 904 mode = acl_ids->z_mode; 905 if (flag & IS_XATTR) 906 pflags |= ZFS_XATTR; 907 908 if (vap->va_type == VREG || vap->va_type == VDIR) { 909 /* 910 * With ZFS_PROJID flag, we can easily know whether there is 911 * project ID stored on disk or not. See zfs_space_delta_cb(). 912 */ 913 if (obj_type != DMU_OT_ZNODE && 914 dmu_objset_projectquota_enabled(zfsvfs->z_os)) 915 pflags |= ZFS_PROJID; 916 917 /* 918 * Inherit project ID from parent if required. 919 */ 920 projid = zfs_inherit_projid(dzp); 921 if (dzp->z_pflags & ZFS_PROJINHERIT) 922 pflags |= ZFS_PROJINHERIT; 923 } 924 925 /* 926 * No execs denied will be deterimed when zfs_mode_compute() is called. 927 */ 928 pflags |= acl_ids->z_aclp->z_hints & 929 (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT| 930 ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED); 931 932 ZFS_TIME_ENCODE(&now, crtime); 933 ZFS_TIME_ENCODE(&now, ctime); 934 935 if (vap->va_mask & AT_ATIME) { 936 ZFS_TIME_ENCODE(&vap->va_atime, atime); 937 } else { 938 ZFS_TIME_ENCODE(&now, atime); 939 } 940 941 if (vap->va_mask & AT_MTIME) { 942 ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 943 } else { 944 ZFS_TIME_ENCODE(&now, mtime); 945 } 946 947 /* Now add in all of the "SA" attributes */ 948 VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED, 949 &sa_hdl)); 950 951 /* 952 * Setup the array of attributes to be replaced/set on the new file 953 * 954 * order for DMU_OT_ZNODE is critical since it needs to be constructed 955 * in the old znode_phys_t format. Don't change this ordering 956 */ 957 sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP); 958 959 if (obj_type == DMU_OT_ZNODE) { 960 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), 961 NULL, &atime, 16); 962 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), 963 NULL, &mtime, 16); 964 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), 965 NULL, &ctime, 16); 966 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), 967 NULL, &crtime, 16); 968 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), 969 NULL, &gen, 8); 970 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), 971 NULL, &mode, 8); 972 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), 973 NULL, &size, 8); 974 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), 975 NULL, &parent, 8); 976 } else { 977 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), 978 NULL, &mode, 8); 979 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), 980 NULL, &size, 8); 981 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), 982 NULL, &gen, 8); 983 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), 984 NULL, &acl_ids->z_fuid, 8); 985 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), 986 NULL, &acl_ids->z_fgid, 8); 987 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), 988 NULL, &parent, 8); 989 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), 990 NULL, &pflags, 8); 991 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), 992 NULL, &atime, 16); 993 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), 994 NULL, &mtime, 16); 995 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), 996 NULL, &ctime, 16); 997 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), 998 NULL, &crtime, 16); 999 } 1000 1001 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8); 1002 1003 if (obj_type == DMU_OT_ZNODE) { 1004 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL, 1005 &empty_xattr, 8); 1006 } else if (dmu_objset_projectquota_enabled(zfsvfs->z_os) && 1007 pflags & ZFS_PROJID) { 1008 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PROJID(zfsvfs), 1009 NULL, &projid, 8); 1010 } 1011 if (obj_type == DMU_OT_ZNODE || 1012 (vap->va_type == VBLK || vap->va_type == VCHR)) { 1013 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs), 1014 NULL, &rdev, 8); 1015 1016 } 1017 if (obj_type == DMU_OT_ZNODE) { 1018 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), 1019 NULL, &pflags, 8); 1020 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL, 1021 &acl_ids->z_fuid, 8); 1022 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL, 1023 &acl_ids->z_fgid, 8); 1024 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad, 1025 sizeof (uint64_t) * 4); 1026 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, 1027 &acl_phys, sizeof (zfs_acl_phys_t)); 1028 } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) { 1029 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL, 1030 &acl_ids->z_aclp->z_acl_count, 8); 1031 locate.cb_aclp = acl_ids->z_aclp; 1032 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs), 1033 zfs_acl_data_locator, &locate, 1034 acl_ids->z_aclp->z_acl_bytes); 1035 mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags, 1036 acl_ids->z_fuid, acl_ids->z_fgid); 1037 } 1038 1039 VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0); 1040 1041 if (!(flag & IS_ROOT_NODE)) { 1042 *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl); 1043 ASSERT(*zpp != NULL); 1044 } else { 1045 /* 1046 * If we are creating the root node, the "parent" we 1047 * passed in is the znode for the root. 1048 */ 1049 *zpp = dzp; 1050 1051 (*zpp)->z_sa_hdl = sa_hdl; 1052 } 1053 1054 (*zpp)->z_pflags = pflags; 1055 (*zpp)->z_mode = mode; 1056 (*zpp)->z_dnodesize = dnodesize; 1057 (*zpp)->z_projid = projid; 1058 1059 if (vap->va_mask & AT_XVATTR) 1060 zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx); 1061 1062 if (obj_type == DMU_OT_ZNODE || 1063 acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { 1064 VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx)); 1065 } 1066 kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END); 1067 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); 1068 } 1069 1070 /* 1071 * Update in-core attributes. It is assumed the caller will be doing an 1072 * sa_bulk_update to push the changes out. 1073 */ 1074 void 1075 zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) 1076 { 1077 xoptattr_t *xoap; 1078 1079 xoap = xva_getxoptattr(xvap); 1080 ASSERT(xoap); 1081 1082 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 1083 uint64_t times[2]; 1084 ZFS_TIME_ENCODE(&xoap->xoa_createtime, times); 1085 (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs), 1086 ×, sizeof (times), tx); 1087 XVA_SET_RTN(xvap, XAT_CREATETIME); 1088 } 1089 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 1090 ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly, 1091 zp->z_pflags, tx); 1092 XVA_SET_RTN(xvap, XAT_READONLY); 1093 } 1094 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 1095 ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden, 1096 zp->z_pflags, tx); 1097 XVA_SET_RTN(xvap, XAT_HIDDEN); 1098 } 1099 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 1100 ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system, 1101 zp->z_pflags, tx); 1102 XVA_SET_RTN(xvap, XAT_SYSTEM); 1103 } 1104 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 1105 ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive, 1106 zp->z_pflags, tx); 1107 XVA_SET_RTN(xvap, XAT_ARCHIVE); 1108 } 1109 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 1110 ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable, 1111 zp->z_pflags, tx); 1112 XVA_SET_RTN(xvap, XAT_IMMUTABLE); 1113 } 1114 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 1115 ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink, 1116 zp->z_pflags, tx); 1117 XVA_SET_RTN(xvap, XAT_NOUNLINK); 1118 } 1119 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 1120 ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly, 1121 zp->z_pflags, tx); 1122 XVA_SET_RTN(xvap, XAT_APPENDONLY); 1123 } 1124 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 1125 ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump, 1126 zp->z_pflags, tx); 1127 XVA_SET_RTN(xvap, XAT_NODUMP); 1128 } 1129 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 1130 ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque, 1131 zp->z_pflags, tx); 1132 XVA_SET_RTN(xvap, XAT_OPAQUE); 1133 } 1134 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 1135 ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED, 1136 xoap->xoa_av_quarantined, zp->z_pflags, tx); 1137 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 1138 } 1139 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 1140 ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified, 1141 zp->z_pflags, tx); 1142 XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 1143 } 1144 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { 1145 zfs_sa_set_scanstamp(zp, xvap, tx); 1146 XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); 1147 } 1148 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 1149 ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse, 1150 zp->z_pflags, tx); 1151 XVA_SET_RTN(xvap, XAT_REPARSE); 1152 } 1153 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 1154 ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline, 1155 zp->z_pflags, tx); 1156 XVA_SET_RTN(xvap, XAT_OFFLINE); 1157 } 1158 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 1159 ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse, 1160 zp->z_pflags, tx); 1161 XVA_SET_RTN(xvap, XAT_SPARSE); 1162 } 1163 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) { 1164 ZFS_ATTR_SET(zp, ZFS_PROJINHERIT, xoap->xoa_projinherit, 1165 zp->z_pflags, tx); 1166 XVA_SET_RTN(xvap, XAT_PROJINHERIT); 1167 } 1168 } 1169 1170 int 1171 zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) 1172 { 1173 dmu_object_info_t doi; 1174 dmu_buf_t *db; 1175 znode_t *zp; 1176 int err; 1177 sa_handle_t *hdl; 1178 1179 *zpp = NULL; 1180 1181 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); 1182 1183 err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); 1184 if (err) { 1185 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1186 return (err); 1187 } 1188 1189 dmu_object_info_from_db(db, &doi); 1190 if (doi.doi_bonus_type != DMU_OT_SA && 1191 (doi.doi_bonus_type != DMU_OT_ZNODE || 1192 (doi.doi_bonus_type == DMU_OT_ZNODE && 1193 doi.doi_bonus_size < sizeof (znode_phys_t)))) { 1194 sa_buf_rele(db, NULL); 1195 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1196 return (SET_ERROR(EINVAL)); 1197 } 1198 1199 hdl = dmu_buf_get_user(db); 1200 if (hdl != NULL) { 1201 zp = sa_get_userdata(hdl); 1202 1203 1204 /* 1205 * Since "SA" does immediate eviction we 1206 * should never find a sa handle that doesn't 1207 * know about the znode. 1208 */ 1209 1210 ASSERT3P(zp, !=, NULL); 1211 1212 mutex_enter(&zp->z_lock); 1213 ASSERT3U(zp->z_id, ==, obj_num); 1214 if (zp->z_unlinked) { 1215 err = SET_ERROR(ENOENT); 1216 } else { 1217 VN_HOLD(ZTOV(zp)); 1218 *zpp = zp; 1219 err = 0; 1220 } 1221 mutex_exit(&zp->z_lock); 1222 sa_buf_rele(db, NULL); 1223 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1224 return (err); 1225 } 1226 1227 /* 1228 * Not found create new znode/vnode 1229 * but only if file exists. 1230 * 1231 * There is a small window where zfs_vget() could 1232 * find this object while a file create is still in 1233 * progress. This is checked for in zfs_znode_alloc() 1234 * 1235 * if zfs_znode_alloc() fails it will drop the hold on the 1236 * bonus buffer. 1237 */ 1238 zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size, 1239 doi.doi_bonus_type, NULL); 1240 if (zp == NULL) { 1241 err = SET_ERROR(ENOENT); 1242 } else { 1243 *zpp = zp; 1244 } 1245 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1246 return (err); 1247 } 1248 1249 int 1250 zfs_rezget(znode_t *zp) 1251 { 1252 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1253 dmu_object_info_t doi; 1254 dmu_buf_t *db; 1255 uint64_t obj_num = zp->z_id; 1256 uint64_t mode; 1257 sa_bulk_attr_t bulk[10]; 1258 int err; 1259 int count = 0; 1260 uint64_t gen; 1261 uint64_t projid = ZFS_DEFAULT_PROJID; 1262 1263 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); 1264 1265 mutex_enter(&zp->z_acl_lock); 1266 if (zp->z_acl_cached) { 1267 zfs_acl_free(zp->z_acl_cached); 1268 zp->z_acl_cached = NULL; 1269 } 1270 1271 mutex_exit(&zp->z_acl_lock); 1272 ASSERT(zp->z_sa_hdl == NULL); 1273 err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); 1274 if (err) { 1275 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1276 return (err); 1277 } 1278 1279 dmu_object_info_from_db(db, &doi); 1280 if (doi.doi_bonus_type != DMU_OT_SA && 1281 (doi.doi_bonus_type != DMU_OT_ZNODE || 1282 (doi.doi_bonus_type == DMU_OT_ZNODE && 1283 doi.doi_bonus_size < sizeof (znode_phys_t)))) { 1284 sa_buf_rele(db, NULL); 1285 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1286 return (SET_ERROR(EINVAL)); 1287 } 1288 1289 zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL); 1290 1291 /* reload cached values */ 1292 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, 1293 &gen, sizeof (gen)); 1294 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 1295 &zp->z_size, sizeof (zp->z_size)); 1296 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 1297 &zp->z_links, sizeof (zp->z_links)); 1298 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 1299 &zp->z_pflags, sizeof (zp->z_pflags)); 1300 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 1301 &zp->z_atime, sizeof (zp->z_atime)); 1302 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 1303 &zp->z_uid, sizeof (zp->z_uid)); 1304 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, 1305 &zp->z_gid, sizeof (zp->z_gid)); 1306 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 1307 &mode, sizeof (mode)); 1308 1309 if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) { 1310 zfs_znode_dmu_fini(zp); 1311 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1312 return (SET_ERROR(EIO)); 1313 } 1314 1315 if (dmu_objset_projectquota_enabled(zfsvfs->z_os)) { 1316 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs), 1317 &projid, 8); 1318 if (err != 0 && err != ENOENT) { 1319 zfs_znode_dmu_fini(zp); 1320 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1321 return (SET_ERROR(err)); 1322 } 1323 } 1324 1325 zp->z_projid = projid; 1326 zp->z_mode = mode; 1327 1328 if (gen != zp->z_gen) { 1329 zfs_znode_dmu_fini(zp); 1330 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1331 return (SET_ERROR(EIO)); 1332 } 1333 1334 zp->z_blksz = doi.doi_data_block_size; 1335 1336 /* 1337 * If the file has zero links, then it has been unlinked on the send 1338 * side and it must be in the received unlinked set. 1339 * We call zfs_znode_dmu_fini() now to prevent any accesses to the 1340 * stale data and to prevent automatical removal of the file in 1341 * zfs_zinactive(). The file will be removed either when it is removed 1342 * on the send side and the next incremental stream is received or 1343 * when the unlinked set gets processed. 1344 */ 1345 zp->z_unlinked = (zp->z_links == 0); 1346 if (zp->z_unlinked) 1347 zfs_znode_dmu_fini(zp); 1348 1349 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1350 1351 return (0); 1352 } 1353 1354 void 1355 zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) 1356 { 1357 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1358 objset_t *os = zfsvfs->z_os; 1359 uint64_t obj = zp->z_id; 1360 uint64_t acl_obj = zfs_external_acl(zp); 1361 1362 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); 1363 if (acl_obj) { 1364 VERIFY(!zp->z_is_sa); 1365 VERIFY(0 == dmu_object_free(os, acl_obj, tx)); 1366 } 1367 VERIFY(0 == dmu_object_free(os, obj, tx)); 1368 zfs_znode_dmu_fini(zp); 1369 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); 1370 zfs_znode_free(zp); 1371 } 1372 1373 void 1374 zfs_zinactive(znode_t *zp) 1375 { 1376 vnode_t *vp = ZTOV(zp); 1377 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1378 uint64_t z_id = zp->z_id; 1379 1380 ASSERT(zp->z_sa_hdl); 1381 1382 /* 1383 * Don't allow a zfs_zget() while were trying to release this znode 1384 */ 1385 ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); 1386 1387 mutex_enter(&zp->z_lock); 1388 mutex_enter(&vp->v_lock); 1389 VN_RELE_LOCKED(vp); 1390 if (vp->v_count > 0 || vn_has_cached_data(vp)) { 1391 /* 1392 * If the hold count is greater than zero, somebody has 1393 * obtained a new reference on this znode while we were 1394 * processing it here, so we are done. If we still have 1395 * mapped pages then we are also done, since we don't 1396 * want to inactivate the znode until the pages get pushed. 1397 * 1398 * XXX - if vn_has_cached_data(vp) is true, but count == 0, 1399 * this seems like it would leave the znode hanging with 1400 * no chance to go inactive... 1401 */ 1402 mutex_exit(&vp->v_lock); 1403 mutex_exit(&zp->z_lock); 1404 ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 1405 return; 1406 } 1407 mutex_exit(&vp->v_lock); 1408 1409 /* 1410 * If this was the last reference to a file with no links, remove 1411 * the file from the file system unless the file system is mounted 1412 * read-only. That can happen, for example, if the file system was 1413 * originally read-write, the file was opened, then unlinked and 1414 * the file system was made read-only before the file was finally 1415 * closed. The file will remain in the unlinked set. 1416 */ 1417 if (zp->z_unlinked) { 1418 ASSERT(!zfsvfs->z_issnap); 1419 if ((zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) == 0) { 1420 mutex_exit(&zp->z_lock); 1421 ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 1422 zfs_rmnode(zp); 1423 return; 1424 } 1425 } 1426 1427 mutex_exit(&zp->z_lock); 1428 zfs_znode_dmu_fini(zp); 1429 ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 1430 zfs_znode_free(zp); 1431 } 1432 1433 void 1434 zfs_znode_free(znode_t *zp) 1435 { 1436 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1437 1438 vn_invalid(ZTOV(zp)); 1439 1440 ASSERT(ZTOV(zp)->v_count == 0); 1441 1442 mutex_enter(&zfsvfs->z_znodes_lock); 1443 POINTER_INVALIDATE(&zp->z_zfsvfs); 1444 list_remove(&zfsvfs->z_all_znodes, zp); 1445 mutex_exit(&zfsvfs->z_znodes_lock); 1446 1447 if (zp->z_acl_cached) { 1448 zfs_acl_free(zp->z_acl_cached); 1449 zp->z_acl_cached = NULL; 1450 } 1451 1452 kmem_cache_free(znode_cache, zp); 1453 1454 VFS_RELE(zfsvfs->z_vfs); 1455 } 1456 1457 void 1458 zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], 1459 uint64_t ctime[2], boolean_t have_tx) 1460 { 1461 timestruc_t now; 1462 1463 gethrestime(&now); 1464 1465 if (have_tx) { /* will sa_bulk_update happen really soon? */ 1466 zp->z_atime_dirty = 0; 1467 zp->z_seq++; 1468 } else { 1469 zp->z_atime_dirty = 1; 1470 } 1471 1472 if (flag & AT_ATIME) { 1473 ZFS_TIME_ENCODE(&now, zp->z_atime); 1474 } 1475 1476 if (flag & AT_MTIME) { 1477 ZFS_TIME_ENCODE(&now, mtime); 1478 if (zp->z_zfsvfs->z_use_fuids) { 1479 zp->z_pflags |= (ZFS_ARCHIVE | 1480 ZFS_AV_MODIFIED); 1481 } 1482 } 1483 1484 if (flag & AT_CTIME) { 1485 ZFS_TIME_ENCODE(&now, ctime); 1486 if (zp->z_zfsvfs->z_use_fuids) 1487 zp->z_pflags |= ZFS_ARCHIVE; 1488 } 1489 } 1490 1491 /* 1492 * Grow the block size for a file. 1493 * 1494 * IN: zp - znode of file to free data in. 1495 * size - requested block size 1496 * tx - open transaction. 1497 * 1498 * NOTE: this function assumes that the znode is write locked. 1499 */ 1500 void 1501 zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) 1502 { 1503 int error; 1504 u_longlong_t dummy; 1505 1506 if (size <= zp->z_blksz) 1507 return; 1508 /* 1509 * If the file size is already greater than the current blocksize, 1510 * we will not grow. If there is more than one block in a file, 1511 * the blocksize cannot change. 1512 */ 1513 if (zp->z_blksz && zp->z_size > zp->z_blksz) 1514 return; 1515 1516 error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id, 1517 size, 0, tx); 1518 1519 if (error == ENOTSUP) 1520 return; 1521 ASSERT0(error); 1522 1523 /* What blocksize did we actually get? */ 1524 dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy); 1525 } 1526 1527 /* 1528 * This is a dummy interface used when pvn_vplist_dirty() should *not* 1529 * be calling back into the fs for a putpage(). E.g.: when truncating 1530 * a file, the pages being "thrown away* don't need to be written out. 1531 */ 1532 /* ARGSUSED */ 1533 static int 1534 zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp, 1535 int flags, cred_t *cr) 1536 { 1537 ASSERT(0); 1538 return (0); 1539 } 1540 1541 /* 1542 * Increase the file length 1543 * 1544 * IN: zp - znode of file to free data in. 1545 * end - new end-of-file 1546 * 1547 * RETURN: 0 on success, error code on failure 1548 */ 1549 static int 1550 zfs_extend(znode_t *zp, uint64_t end) 1551 { 1552 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1553 dmu_tx_t *tx; 1554 locked_range_t *lr; 1555 uint64_t newblksz; 1556 int error; 1557 1558 /* 1559 * We will change zp_size, lock the whole file. 1560 */ 1561 lr = rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); 1562 1563 /* 1564 * Nothing to do if file already at desired length. 1565 */ 1566 if (end <= zp->z_size) { 1567 rangelock_exit(lr); 1568 return (0); 1569 } 1570 tx = dmu_tx_create(zfsvfs->z_os); 1571 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1572 zfs_sa_upgrade_txholds(tx, zp); 1573 if (end > zp->z_blksz && 1574 (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { 1575 /* 1576 * We are growing the file past the current block size. 1577 */ 1578 if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) { 1579 /* 1580 * File's blocksize is already larger than the 1581 * "recordsize" property. Only let it grow to 1582 * the next power of 2. 1583 */ 1584 ASSERT(!ISP2(zp->z_blksz)); 1585 newblksz = MIN(end, 1 << highbit64(zp->z_blksz)); 1586 } else { 1587 newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz); 1588 } 1589 dmu_tx_hold_write(tx, zp->z_id, 0, newblksz); 1590 } else { 1591 newblksz = 0; 1592 } 1593 1594 error = dmu_tx_assign(tx, TXG_WAIT); 1595 if (error) { 1596 dmu_tx_abort(tx); 1597 rangelock_exit(lr); 1598 return (error); 1599 } 1600 1601 if (newblksz) 1602 zfs_grow_blocksize(zp, newblksz, tx); 1603 1604 zp->z_size = end; 1605 1606 VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs), 1607 &zp->z_size, sizeof (zp->z_size), tx)); 1608 1609 rangelock_exit(lr); 1610 1611 dmu_tx_commit(tx); 1612 1613 return (0); 1614 } 1615 1616 /* 1617 * Free space in a file. 1618 * 1619 * IN: zp - znode of file to free data in. 1620 * off - start of section to free. 1621 * len - length of section to free. 1622 * 1623 * RETURN: 0 on success, error code on failure 1624 */ 1625 static int 1626 zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) 1627 { 1628 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1629 locked_range_t *lr; 1630 int error; 1631 1632 /* 1633 * Lock the range being freed. 1634 */ 1635 lr = rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER); 1636 1637 /* 1638 * Nothing to do if file already at desired length. 1639 */ 1640 if (off >= zp->z_size) { 1641 rangelock_exit(lr); 1642 return (0); 1643 } 1644 1645 if (off + len > zp->z_size) 1646 len = zp->z_size - off; 1647 1648 error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); 1649 1650 rangelock_exit(lr); 1651 1652 return (error); 1653 } 1654 1655 /* 1656 * Truncate a file 1657 * 1658 * IN: zp - znode of file to free data in. 1659 * end - new end-of-file. 1660 * 1661 * RETURN: 0 on success, error code on failure 1662 */ 1663 static int 1664 zfs_trunc(znode_t *zp, uint64_t end) 1665 { 1666 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1667 vnode_t *vp = ZTOV(zp); 1668 dmu_tx_t *tx; 1669 locked_range_t *lr; 1670 int error; 1671 sa_bulk_attr_t bulk[2]; 1672 int count = 0; 1673 1674 /* 1675 * We will change zp_size, lock the whole file. 1676 */ 1677 lr = rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); 1678 1679 /* 1680 * Nothing to do if file already at desired length. 1681 */ 1682 if (end >= zp->z_size) { 1683 rangelock_exit(lr); 1684 return (0); 1685 } 1686 1687 error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, 1688 DMU_OBJECT_END); 1689 if (error) { 1690 rangelock_exit(lr); 1691 return (error); 1692 } 1693 tx = dmu_tx_create(zfsvfs->z_os); 1694 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1695 zfs_sa_upgrade_txholds(tx, zp); 1696 dmu_tx_mark_netfree(tx); 1697 error = dmu_tx_assign(tx, TXG_WAIT); 1698 if (error) { 1699 dmu_tx_abort(tx); 1700 rangelock_exit(lr); 1701 return (error); 1702 } 1703 1704 zp->z_size = end; 1705 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), 1706 NULL, &zp->z_size, sizeof (zp->z_size)); 1707 1708 if (end == 0) { 1709 zp->z_pflags &= ~ZFS_SPARSE; 1710 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 1711 NULL, &zp->z_pflags, 8); 1712 } 1713 VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0); 1714 1715 dmu_tx_commit(tx); 1716 1717 /* 1718 * Clear any mapped pages in the truncated region. This has to 1719 * happen outside of the transaction to avoid the possibility of 1720 * a deadlock with someone trying to push a page that we are 1721 * about to invalidate. 1722 */ 1723 if (vn_has_cached_data(vp)) { 1724 page_t *pp; 1725 uint64_t start = end & PAGEMASK; 1726 int poff = end & PAGEOFFSET; 1727 1728 if (poff != 0 && (pp = page_lookup(vp, start, SE_SHARED))) { 1729 /* 1730 * We need to zero a partial page. 1731 */ 1732 pagezero(pp, poff, PAGESIZE - poff); 1733 start += PAGESIZE; 1734 page_unlock(pp); 1735 } 1736 error = pvn_vplist_dirty(vp, start, zfs_no_putpage, 1737 B_INVAL | B_TRUNC, NULL); 1738 ASSERT(error == 0); 1739 } 1740 1741 rangelock_exit(lr); 1742 1743 return (0); 1744 } 1745 1746 /* 1747 * Free space in a file 1748 * 1749 * IN: zp - znode of file to free data in. 1750 * off - start of range 1751 * len - end of range (0 => EOF) 1752 * flag - current file open mode flags. 1753 * log - TRUE if this action should be logged 1754 * 1755 * RETURN: 0 on success, error code on failure 1756 */ 1757 int 1758 zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) 1759 { 1760 vnode_t *vp = ZTOV(zp); 1761 dmu_tx_t *tx; 1762 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1763 zilog_t *zilog = zfsvfs->z_log; 1764 uint64_t mode; 1765 uint64_t mtime[2], ctime[2]; 1766 sa_bulk_attr_t bulk[3]; 1767 int count = 0; 1768 int error; 1769 1770 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode, 1771 sizeof (mode))) != 0) 1772 return (error); 1773 1774 if (off > zp->z_size) { 1775 error = zfs_extend(zp, off+len); 1776 if (error == 0 && log) 1777 goto log; 1778 else 1779 return (error); 1780 } 1781 1782 /* 1783 * Check for any locks in the region to be freed. 1784 */ 1785 1786 if (MANDLOCK(vp, (mode_t)mode)) { 1787 uint64_t length = (len ? len : zp->z_size - off); 1788 if (error = chklock(vp, FWRITE, off, length, flag, NULL)) 1789 return (error); 1790 } 1791 1792 if (len == 0) { 1793 error = zfs_trunc(zp, off); 1794 } else { 1795 if ((error = zfs_free_range(zp, off, len)) == 0 && 1796 off + len > zp->z_size) 1797 error = zfs_extend(zp, off+len); 1798 } 1799 if (error || !log) 1800 return (error); 1801 log: 1802 tx = dmu_tx_create(zfsvfs->z_os); 1803 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1804 zfs_sa_upgrade_txholds(tx, zp); 1805 error = dmu_tx_assign(tx, TXG_WAIT); 1806 if (error) { 1807 dmu_tx_abort(tx); 1808 return (error); 1809 } 1810 1811 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16); 1812 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16); 1813 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 1814 NULL, &zp->z_pflags, 8); 1815 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); 1816 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1817 ASSERT(error == 0); 1818 1819 zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); 1820 1821 dmu_tx_commit(tx); 1822 return (0); 1823 } 1824 1825 void 1826 zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) 1827 { 1828 uint64_t moid, obj, sa_obj, version; 1829 uint64_t sense = ZFS_CASE_SENSITIVE; 1830 uint64_t norm = 0; 1831 nvpair_t *elem; 1832 int error; 1833 int i; 1834 znode_t *rootzp = NULL; 1835 zfsvfs_t *zfsvfs; 1836 vnode_t *vp; 1837 vattr_t vattr; 1838 znode_t *zp; 1839 zfs_acl_ids_t acl_ids; 1840 1841 /* 1842 * First attempt to create master node. 1843 */ 1844 /* 1845 * In an empty objset, there are no blocks to read and thus 1846 * there can be no i/o errors (which we assert below). 1847 */ 1848 moid = MASTER_NODE_OBJ; 1849 error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, 1850 DMU_OT_NONE, 0, tx); 1851 ASSERT(error == 0); 1852 1853 /* 1854 * Set starting attributes. 1855 */ 1856 version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os))); 1857 elem = NULL; 1858 while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) { 1859 /* For the moment we expect all zpl props to be uint64_ts */ 1860 uint64_t val; 1861 char *name; 1862 1863 ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64); 1864 VERIFY(nvpair_value_uint64(elem, &val) == 0); 1865 name = nvpair_name(elem); 1866 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) { 1867 if (val < version) 1868 version = val; 1869 } else { 1870 error = zap_update(os, moid, name, 8, 1, &val, tx); 1871 } 1872 ASSERT(error == 0); 1873 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) 1874 norm = val; 1875 else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) 1876 sense = val; 1877 } 1878 ASSERT(version != 0); 1879 error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); 1880 1881 /* 1882 * Create zap object used for SA attribute registration 1883 */ 1884 1885 if (version >= ZPL_VERSION_SA) { 1886 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 1887 DMU_OT_NONE, 0, tx); 1888 error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 1889 ASSERT(error == 0); 1890 } else { 1891 sa_obj = 0; 1892 } 1893 /* 1894 * Create a delete queue. 1895 */ 1896 obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); 1897 1898 error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); 1899 ASSERT(error == 0); 1900 1901 /* 1902 * Create root znode. Create minimal znode/vnode/zfsvfs 1903 * to allow zfs_mknode to work. 1904 */ 1905 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; 1906 vattr.va_type = VDIR; 1907 vattr.va_mode = S_IFDIR|0755; 1908 vattr.va_uid = crgetuid(cr); 1909 vattr.va_gid = crgetgid(cr); 1910 1911 rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); 1912 ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs)); 1913 rootzp->z_moved = 0; 1914 rootzp->z_unlinked = 0; 1915 rootzp->z_atime_dirty = 0; 1916 rootzp->z_is_sa = USE_SA(version, os); 1917 rootzp->z_pflags = 0; 1918 1919 vp = ZTOV(rootzp); 1920 vn_reinit(vp); 1921 vp->v_type = VDIR; 1922 1923 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 1924 zfsvfs->z_os = os; 1925 zfsvfs->z_parent = zfsvfs; 1926 zfsvfs->z_version = version; 1927 zfsvfs->z_use_fuids = USE_FUIDS(version, os); 1928 zfsvfs->z_use_sa = USE_SA(version, os); 1929 zfsvfs->z_norm = norm; 1930 1931 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 1932 &zfsvfs->z_attr_table); 1933 1934 ASSERT(error == 0); 1935 1936 /* 1937 * Fold case on file systems that are always or sometimes case 1938 * insensitive. 1939 */ 1940 if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED) 1941 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 1942 1943 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 1944 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 1945 offsetof(znode_t, z_link_node)); 1946 1947 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1948 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 1949 1950 rootzp->z_zfsvfs = zfsvfs; 1951 VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, 1952 cr, NULL, &acl_ids)); 1953 zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids); 1954 ASSERT3P(zp, ==, rootzp); 1955 ASSERT(!vn_in_dnlc(ZTOV(rootzp))); /* not valid to move */ 1956 error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); 1957 ASSERT(error == 0); 1958 zfs_acl_ids_free(&acl_ids); 1959 POINTER_INVALIDATE(&rootzp->z_zfsvfs); 1960 1961 ZTOV(rootzp)->v_count = 0; 1962 sa_handle_destroy(rootzp->z_sa_hdl); 1963 kmem_cache_free(znode_cache, rootzp); 1964 1965 /* 1966 * Create shares directory 1967 */ 1968 1969 error = zfs_create_share_dir(zfsvfs, tx); 1970 1971 ASSERT(error == 0); 1972 1973 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1974 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1975 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1976 } 1977 1978 #endif /* _KERNEL */ 1979 1980 static int 1981 zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) 1982 { 1983 uint64_t sa_obj = 0; 1984 int error; 1985 1986 error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); 1987 if (error != 0 && error != ENOENT) 1988 return (error); 1989 1990 error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table); 1991 return (error); 1992 } 1993 1994 static int 1995 zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, 1996 dmu_buf_t **db, void *tag) 1997 { 1998 dmu_object_info_t doi; 1999 int error; 2000 2001 if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) 2002 return (error); 2003 2004 dmu_object_info_from_db(*db, &doi); 2005 if ((doi.doi_bonus_type != DMU_OT_SA && 2006 doi.doi_bonus_type != DMU_OT_ZNODE) || 2007 doi.doi_bonus_type == DMU_OT_ZNODE && 2008 doi.doi_bonus_size < sizeof (znode_phys_t)) { 2009 sa_buf_rele(*db, tag); 2010 return (SET_ERROR(ENOTSUP)); 2011 } 2012 2013 error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); 2014 if (error != 0) { 2015 sa_buf_rele(*db, tag); 2016 return (error); 2017 } 2018 2019 return (0); 2020 } 2021 2022 void 2023 zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag) 2024 { 2025 sa_handle_destroy(hdl); 2026 sa_buf_rele(db, tag); 2027 } 2028 2029 /* 2030 * Given an object number, return its parent object number and whether 2031 * or not the object is an extended attribute directory. 2032 */ 2033 static int 2034 zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table, 2035 uint64_t *pobjp, int *is_xattrdir) 2036 { 2037 uint64_t parent; 2038 uint64_t pflags; 2039 uint64_t mode; 2040 uint64_t parent_mode; 2041 sa_bulk_attr_t bulk[3]; 2042 sa_handle_t *sa_hdl; 2043 dmu_buf_t *sa_db; 2044 int count = 0; 2045 int error; 2046 2047 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL, 2048 &parent, sizeof (parent)); 2049 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL, 2050 &pflags, sizeof (pflags)); 2051 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, 2052 &mode, sizeof (mode)); 2053 2054 if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0) 2055 return (error); 2056 2057 /* 2058 * When a link is removed its parent pointer is not changed and will 2059 * be invalid. There are two cases where a link is removed but the 2060 * file stays around, when it goes to the delete queue and when there 2061 * are additional links. 2062 */ 2063 error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG); 2064 if (error != 0) 2065 return (error); 2066 2067 error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode)); 2068 zfs_release_sa_handle(sa_hdl, sa_db, FTAG); 2069 if (error != 0) 2070 return (error); 2071 2072 *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode); 2073 2074 /* 2075 * Extended attributes can be applied to files, directories, etc. 2076 * Otherwise the parent must be a directory. 2077 */ 2078 if (!*is_xattrdir && !S_ISDIR(parent_mode)) 2079 return (SET_ERROR(EINVAL)); 2080 2081 *pobjp = parent; 2082 2083 return (0); 2084 } 2085 2086 /* 2087 * Given an object number, return some zpl level statistics 2088 */ 2089 static int 2090 zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table, 2091 zfs_stat_t *sb) 2092 { 2093 sa_bulk_attr_t bulk[4]; 2094 int count = 0; 2095 2096 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, 2097 &sb->zs_mode, sizeof (sb->zs_mode)); 2098 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, 2099 &sb->zs_gen, sizeof (sb->zs_gen)); 2100 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL, 2101 &sb->zs_links, sizeof (sb->zs_links)); 2102 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL, 2103 &sb->zs_ctime, sizeof (sb->zs_ctime)); 2104 2105 return (sa_bulk_lookup(hdl, bulk, count)); 2106 } 2107 2108 static int 2109 zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, 2110 sa_attr_type_t *sa_table, char *buf, int len) 2111 { 2112 sa_handle_t *sa_hdl; 2113 sa_handle_t *prevhdl = NULL; 2114 dmu_buf_t *prevdb = NULL; 2115 dmu_buf_t *sa_db = NULL; 2116 char *path = buf + len - 1; 2117 int error; 2118 2119 *path = '\0'; 2120 sa_hdl = hdl; 2121 2122 uint64_t deleteq_obj; 2123 VERIFY0(zap_lookup(osp, MASTER_NODE_OBJ, 2124 ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj)); 2125 error = zap_lookup_int(osp, deleteq_obj, obj); 2126 if (error == 0) { 2127 return (ESTALE); 2128 } else if (error != ENOENT) { 2129 return (error); 2130 } 2131 error = 0; 2132 2133 for (;;) { 2134 uint64_t pobj; 2135 char component[MAXNAMELEN + 2]; 2136 size_t complen; 2137 int is_xattrdir; 2138 2139 if (prevdb) 2140 zfs_release_sa_handle(prevhdl, prevdb, FTAG); 2141 2142 if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj, 2143 &is_xattrdir)) != 0) 2144 break; 2145 2146 if (pobj == obj) { 2147 if (path[0] != '/') 2148 *--path = '/'; 2149 break; 2150 } 2151 2152 component[0] = '/'; 2153 if (is_xattrdir) { 2154 (void) sprintf(component + 1, "<xattrdir>"); 2155 } else { 2156 error = zap_value_search(osp, pobj, obj, 2157 ZFS_DIRENT_OBJ(-1ULL), component + 1); 2158 if (error != 0) 2159 break; 2160 } 2161 2162 complen = strlen(component); 2163 path -= complen; 2164 ASSERT(path >= buf); 2165 bcopy(component, path, complen); 2166 obj = pobj; 2167 2168 if (sa_hdl != hdl) { 2169 prevhdl = sa_hdl; 2170 prevdb = sa_db; 2171 } 2172 error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG); 2173 if (error != 0) { 2174 sa_hdl = prevhdl; 2175 sa_db = prevdb; 2176 break; 2177 } 2178 } 2179 2180 if (sa_hdl != NULL && sa_hdl != hdl) { 2181 ASSERT(sa_db != NULL); 2182 zfs_release_sa_handle(sa_hdl, sa_db, FTAG); 2183 } 2184 2185 if (error == 0) 2186 (void) memmove(buf, path, buf + len - path); 2187 2188 return (error); 2189 } 2190 2191 int 2192 zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) 2193 { 2194 sa_attr_type_t *sa_table; 2195 sa_handle_t *hdl; 2196 dmu_buf_t *db; 2197 int error; 2198 2199 error = zfs_sa_setup(osp, &sa_table); 2200 if (error != 0) 2201 return (error); 2202 2203 error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); 2204 if (error != 0) 2205 return (error); 2206 2207 error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); 2208 2209 zfs_release_sa_handle(hdl, db, FTAG); 2210 return (error); 2211 } 2212 2213 int 2214 zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, 2215 char *buf, int len) 2216 { 2217 char *path = buf + len - 1; 2218 sa_attr_type_t *sa_table; 2219 sa_handle_t *hdl; 2220 dmu_buf_t *db; 2221 int error; 2222 2223 *path = '\0'; 2224 2225 error = zfs_sa_setup(osp, &sa_table); 2226 if (error != 0) 2227 return (error); 2228 2229 error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); 2230 if (error != 0) 2231 return (error); 2232 2233 error = zfs_obj_to_stats_impl(hdl, sa_table, sb); 2234 if (error != 0) { 2235 zfs_release_sa_handle(hdl, db, FTAG); 2236 return (error); 2237 } 2238 2239 error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); 2240 2241 zfs_release_sa_handle(hdl, db, FTAG); 2242 return (error); 2243 } 2244