xref: /illumos-gate/usr/src/uts/common/fs/zfs/zfs_znode.c (revision 12042ab213b3af68474f48555504db816a449211)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
24  * Copyright (c) 2014 Integros [integros.com]
25  */
26 
27 /* Portions Copyright 2007 Jeremy Teo */
28 
29 #ifdef _KERNEL
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/time.h>
33 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
35 #include <sys/resource.h>
36 #include <sys/mntent.h>
37 #include <sys/mkdev.h>
38 #include <sys/u8_textprep.h>
39 #include <sys/dsl_dataset.h>
40 #include <sys/vfs.h>
41 #include <sys/vfs_opreg.h>
42 #include <sys/vnode.h>
43 #include <sys/file.h>
44 #include <sys/kmem.h>
45 #include <sys/errno.h>
46 #include <sys/unistd.h>
47 #include <sys/mode.h>
48 #include <sys/atomic.h>
49 #include <vm/pvn.h>
50 #include "fs/fs_subr.h"
51 #include <sys/zfs_dir.h>
52 #include <sys/zfs_acl.h>
53 #include <sys/zfs_ioctl.h>
54 #include <sys/zfs_rlock.h>
55 #include <sys/zfs_fuid.h>
56 #include <sys/dnode.h>
57 #include <sys/fs/zfs.h>
58 #include <sys/kidmap.h>
59 #endif /* _KERNEL */
60 
61 #include <sys/dmu.h>
62 #include <sys/dmu_objset.h>
63 #include <sys/dmu_tx.h>
64 #include <sys/refcount.h>
65 #include <sys/stat.h>
66 #include <sys/zap.h>
67 #include <sys/zfs_znode.h>
68 #include <sys/sa.h>
69 #include <sys/zfs_sa.h>
70 #include <sys/zfs_stat.h>
71 
72 #include "zfs_prop.h"
73 #include "zfs_comutil.h"
74 
75 /*
76  * Define ZNODE_STATS to turn on statistic gathering. By default, it is only
77  * turned on when DEBUG is also defined.
78  */
79 #ifdef	DEBUG
80 #define	ZNODE_STATS
81 #endif	/* DEBUG */
82 
83 #ifdef	ZNODE_STATS
84 #define	ZNODE_STAT_ADD(stat)			((stat)++)
85 #else
86 #define	ZNODE_STAT_ADD(stat)			/* nothing */
87 #endif	/* ZNODE_STATS */
88 
89 /*
90  * Functions needed for userland (ie: libzpool) are not put under
91  * #ifdef_KERNEL; the rest of the functions have dependencies
92  * (such as VFS logic) that will not compile easily in userland.
93  */
94 #ifdef _KERNEL
95 /*
96  * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to
97  * be freed before it can be safely accessed.
98  */
99 krwlock_t zfsvfs_lock;
100 
101 static kmem_cache_t *znode_cache = NULL;
102 
103 /*
104  * This is used by the test suite so that it can delay znodes from being
105  * freed in order to inspect the unlinked set.
106  */
107 int zfs_unlink_suspend_progress = 0;
108 
109 /*ARGSUSED*/
110 static void
111 znode_evict_error(dmu_buf_t *dbuf, void *user_ptr)
112 {
113 	/*
114 	 * We should never drop all dbuf refs without first clearing
115 	 * the eviction callback.
116 	 */
117 	panic("evicting znode %p\n", user_ptr);
118 }
119 
120 /*
121  * This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on
122  * z_rangelock. It will modify the offset and length of the lock to reflect
123  * znode-specific information, and convert RL_APPEND to RL_WRITER.  This is
124  * called with the rangelock_t's rl_lock held, which avoids races.
125  */
126 static void
127 zfs_rangelock_cb(locked_range_t *new, void *arg)
128 {
129 	znode_t *zp = arg;
130 
131 	/*
132 	 * If in append mode, convert to writer and lock starting at the
133 	 * current end of file.
134 	 */
135 	if (new->lr_type == RL_APPEND) {
136 		new->lr_offset = zp->z_size;
137 		new->lr_type = RL_WRITER;
138 	}
139 
140 	/*
141 	 * If we need to grow the block size then lock the whole file range.
142 	 */
143 	uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length);
144 	if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
145 	    zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) {
146 		new->lr_offset = 0;
147 		new->lr_length = UINT64_MAX;
148 	}
149 }
150 
151 /*ARGSUSED*/
152 static int
153 zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
154 {
155 	znode_t *zp = buf;
156 
157 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
158 
159 	zp->z_vnode = vn_alloc(kmflags);
160 	if (zp->z_vnode == NULL) {
161 		return (-1);
162 	}
163 	ZTOV(zp)->v_data = zp;
164 
165 	list_link_init(&zp->z_link_node);
166 
167 	mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
168 	rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
169 	rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL);
170 	mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
171 
172 	rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp);
173 
174 	zp->z_dirlocks = NULL;
175 	zp->z_acl_cached = NULL;
176 	zp->z_moved = 0;
177 	return (0);
178 }
179 
180 /*ARGSUSED*/
181 static void
182 zfs_znode_cache_destructor(void *buf, void *arg)
183 {
184 	znode_t *zp = buf;
185 
186 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
187 	ASSERT(ZTOV(zp)->v_data == zp);
188 	vn_free(ZTOV(zp));
189 	ASSERT(!list_link_active(&zp->z_link_node));
190 	mutex_destroy(&zp->z_lock);
191 	rw_destroy(&zp->z_parent_lock);
192 	rw_destroy(&zp->z_name_lock);
193 	mutex_destroy(&zp->z_acl_lock);
194 	rangelock_fini(&zp->z_rangelock);
195 
196 	ASSERT(zp->z_dirlocks == NULL);
197 	ASSERT(zp->z_acl_cached == NULL);
198 }
199 
200 #ifdef	ZNODE_STATS
201 static struct {
202 	uint64_t zms_zfsvfs_invalid;
203 	uint64_t zms_zfsvfs_recheck1;
204 	uint64_t zms_zfsvfs_unmounted;
205 	uint64_t zms_zfsvfs_recheck2;
206 	uint64_t zms_obj_held;
207 	uint64_t zms_vnode_locked;
208 	uint64_t zms_not_only_dnlc;
209 } znode_move_stats;
210 #endif	/* ZNODE_STATS */
211 
212 static void
213 zfs_znode_move_impl(znode_t *ozp, znode_t *nzp)
214 {
215 	vnode_t *vp;
216 
217 	/* Copy fields. */
218 	nzp->z_zfsvfs = ozp->z_zfsvfs;
219 
220 	/* Swap vnodes. */
221 	vp = nzp->z_vnode;
222 	nzp->z_vnode = ozp->z_vnode;
223 	ozp->z_vnode = vp; /* let destructor free the overwritten vnode */
224 	ZTOV(ozp)->v_data = ozp;
225 	ZTOV(nzp)->v_data = nzp;
226 
227 	nzp->z_id = ozp->z_id;
228 	ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */
229 	nzp->z_unlinked = ozp->z_unlinked;
230 	nzp->z_atime_dirty = ozp->z_atime_dirty;
231 	nzp->z_zn_prefetch = ozp->z_zn_prefetch;
232 	nzp->z_blksz = ozp->z_blksz;
233 	nzp->z_seq = ozp->z_seq;
234 	nzp->z_mapcnt = ozp->z_mapcnt;
235 	nzp->z_gen = ozp->z_gen;
236 	nzp->z_sync_cnt = ozp->z_sync_cnt;
237 	nzp->z_is_sa = ozp->z_is_sa;
238 	nzp->z_sa_hdl = ozp->z_sa_hdl;
239 	bcopy(ozp->z_atime, nzp->z_atime, sizeof (uint64_t) * 2);
240 	nzp->z_links = ozp->z_links;
241 	nzp->z_size = ozp->z_size;
242 	nzp->z_pflags = ozp->z_pflags;
243 	nzp->z_uid = ozp->z_uid;
244 	nzp->z_gid = ozp->z_gid;
245 	nzp->z_mode = ozp->z_mode;
246 
247 	/*
248 	 * Since this is just an idle znode and kmem is already dealing with
249 	 * memory pressure, release any cached ACL.
250 	 */
251 	if (ozp->z_acl_cached) {
252 		zfs_acl_free(ozp->z_acl_cached);
253 		ozp->z_acl_cached = NULL;
254 	}
255 
256 	sa_set_userp(nzp->z_sa_hdl, nzp);
257 
258 	/*
259 	 * Invalidate the original znode by clearing fields that provide a
260 	 * pointer back to the znode. Set the low bit of the vfs pointer to
261 	 * ensure that zfs_znode_move() recognizes the znode as invalid in any
262 	 * subsequent callback.
263 	 */
264 	ozp->z_sa_hdl = NULL;
265 	POINTER_INVALIDATE(&ozp->z_zfsvfs);
266 
267 	/*
268 	 * Mark the znode.
269 	 */
270 	nzp->z_moved = 1;
271 	ozp->z_moved = (uint8_t)-1;
272 }
273 
274 /*ARGSUSED*/
275 static kmem_cbrc_t
276 zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg)
277 {
278 	znode_t *ozp = buf, *nzp = newbuf;
279 	zfsvfs_t *zfsvfs;
280 	vnode_t *vp;
281 
282 	/*
283 	 * The znode is on the file system's list of known znodes if the vfs
284 	 * pointer is valid. We set the low bit of the vfs pointer when freeing
285 	 * the znode to invalidate it, and the memory patterns written by kmem
286 	 * (baddcafe and deadbeef) set at least one of the two low bits. A newly
287 	 * created znode sets the vfs pointer last of all to indicate that the
288 	 * znode is known and in a valid state to be moved by this function.
289 	 */
290 	zfsvfs = ozp->z_zfsvfs;
291 	if (!POINTER_IS_VALID(zfsvfs)) {
292 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid);
293 		return (KMEM_CBRC_DONT_KNOW);
294 	}
295 
296 	/*
297 	 * Close a small window in which it's possible that the filesystem could
298 	 * be unmounted and freed, and zfsvfs, though valid in the previous
299 	 * statement, could point to unrelated memory by the time we try to
300 	 * prevent the filesystem from being unmounted.
301 	 */
302 	rw_enter(&zfsvfs_lock, RW_WRITER);
303 	if (zfsvfs != ozp->z_zfsvfs) {
304 		rw_exit(&zfsvfs_lock);
305 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1);
306 		return (KMEM_CBRC_DONT_KNOW);
307 	}
308 
309 	/*
310 	 * If the znode is still valid, then so is the file system. We know that
311 	 * no valid file system can be freed while we hold zfsvfs_lock, so we
312 	 * can safely ensure that the filesystem is not and will not be
313 	 * unmounted. The next statement is equivalent to ZFS_ENTER().
314 	 */
315 	rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
316 	if (zfsvfs->z_unmounted) {
317 		ZFS_EXIT(zfsvfs);
318 		rw_exit(&zfsvfs_lock);
319 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted);
320 		return (KMEM_CBRC_DONT_KNOW);
321 	}
322 	rw_exit(&zfsvfs_lock);
323 
324 	mutex_enter(&zfsvfs->z_znodes_lock);
325 	/*
326 	 * Recheck the vfs pointer in case the znode was removed just before
327 	 * acquiring the lock.
328 	 */
329 	if (zfsvfs != ozp->z_zfsvfs) {
330 		mutex_exit(&zfsvfs->z_znodes_lock);
331 		ZFS_EXIT(zfsvfs);
332 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2);
333 		return (KMEM_CBRC_DONT_KNOW);
334 	}
335 
336 	/*
337 	 * At this point we know that as long as we hold z_znodes_lock, the
338 	 * znode cannot be freed and fields within the znode can be safely
339 	 * accessed. Now, prevent a race with zfs_zget().
340 	 */
341 	if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) {
342 		mutex_exit(&zfsvfs->z_znodes_lock);
343 		ZFS_EXIT(zfsvfs);
344 		ZNODE_STAT_ADD(znode_move_stats.zms_obj_held);
345 		return (KMEM_CBRC_LATER);
346 	}
347 
348 	vp = ZTOV(ozp);
349 	if (mutex_tryenter(&vp->v_lock) == 0) {
350 		ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
351 		mutex_exit(&zfsvfs->z_znodes_lock);
352 		ZFS_EXIT(zfsvfs);
353 		ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked);
354 		return (KMEM_CBRC_LATER);
355 	}
356 
357 	/* Only move znodes that are referenced _only_ by the DNLC. */
358 	if (vp->v_count != 1 || !vn_in_dnlc(vp)) {
359 		mutex_exit(&vp->v_lock);
360 		ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
361 		mutex_exit(&zfsvfs->z_znodes_lock);
362 		ZFS_EXIT(zfsvfs);
363 		ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc);
364 		return (KMEM_CBRC_LATER);
365 	}
366 
367 	/*
368 	 * The znode is known and in a valid state to move. We're holding the
369 	 * locks needed to execute the critical section.
370 	 */
371 	zfs_znode_move_impl(ozp, nzp);
372 	mutex_exit(&vp->v_lock);
373 	ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
374 
375 	list_link_replace(&ozp->z_link_node, &nzp->z_link_node);
376 	mutex_exit(&zfsvfs->z_znodes_lock);
377 	ZFS_EXIT(zfsvfs);
378 
379 	return (KMEM_CBRC_YES);
380 }
381 
382 void
383 zfs_znode_init(void)
384 {
385 	/*
386 	 * Initialize zcache
387 	 */
388 	rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL);
389 	ASSERT(znode_cache == NULL);
390 	znode_cache = kmem_cache_create("zfs_znode_cache",
391 	    sizeof (znode_t), 0, zfs_znode_cache_constructor,
392 	    zfs_znode_cache_destructor, NULL, NULL, NULL, 0);
393 	kmem_cache_set_move(znode_cache, zfs_znode_move);
394 }
395 
396 void
397 zfs_znode_fini(void)
398 {
399 	/*
400 	 * Cleanup vfs & vnode ops
401 	 */
402 	zfs_remove_op_tables();
403 
404 	/*
405 	 * Cleanup zcache
406 	 */
407 	if (znode_cache)
408 		kmem_cache_destroy(znode_cache);
409 	znode_cache = NULL;
410 	rw_destroy(&zfsvfs_lock);
411 }
412 
413 struct vnodeops *zfs_dvnodeops;
414 struct vnodeops *zfs_fvnodeops;
415 struct vnodeops *zfs_symvnodeops;
416 struct vnodeops *zfs_xdvnodeops;
417 struct vnodeops *zfs_evnodeops;
418 struct vnodeops *zfs_sharevnodeops;
419 
420 void
421 zfs_remove_op_tables()
422 {
423 	/*
424 	 * Remove vfs ops
425 	 */
426 	ASSERT(zfsfstype);
427 	(void) vfs_freevfsops_by_type(zfsfstype);
428 	zfsfstype = 0;
429 
430 	/*
431 	 * Remove vnode ops
432 	 */
433 	if (zfs_dvnodeops)
434 		vn_freevnodeops(zfs_dvnodeops);
435 	if (zfs_fvnodeops)
436 		vn_freevnodeops(zfs_fvnodeops);
437 	if (zfs_symvnodeops)
438 		vn_freevnodeops(zfs_symvnodeops);
439 	if (zfs_xdvnodeops)
440 		vn_freevnodeops(zfs_xdvnodeops);
441 	if (zfs_evnodeops)
442 		vn_freevnodeops(zfs_evnodeops);
443 	if (zfs_sharevnodeops)
444 		vn_freevnodeops(zfs_sharevnodeops);
445 
446 	zfs_dvnodeops = NULL;
447 	zfs_fvnodeops = NULL;
448 	zfs_symvnodeops = NULL;
449 	zfs_xdvnodeops = NULL;
450 	zfs_evnodeops = NULL;
451 	zfs_sharevnodeops = NULL;
452 }
453 
454 extern const fs_operation_def_t zfs_dvnodeops_template[];
455 extern const fs_operation_def_t zfs_fvnodeops_template[];
456 extern const fs_operation_def_t zfs_xdvnodeops_template[];
457 extern const fs_operation_def_t zfs_symvnodeops_template[];
458 extern const fs_operation_def_t zfs_evnodeops_template[];
459 extern const fs_operation_def_t zfs_sharevnodeops_template[];
460 
461 int
462 zfs_create_op_tables()
463 {
464 	int error;
465 
466 	/*
467 	 * zfs_dvnodeops can be set if mod_remove() calls mod_installfs()
468 	 * due to a failure to remove the the 2nd modlinkage (zfs_modldrv).
469 	 * In this case we just return as the ops vectors are already set up.
470 	 */
471 	if (zfs_dvnodeops)
472 		return (0);
473 
474 	error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template,
475 	    &zfs_dvnodeops);
476 	if (error)
477 		return (error);
478 
479 	error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template,
480 	    &zfs_fvnodeops);
481 	if (error)
482 		return (error);
483 
484 	error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template,
485 	    &zfs_symvnodeops);
486 	if (error)
487 		return (error);
488 
489 	error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template,
490 	    &zfs_xdvnodeops);
491 	if (error)
492 		return (error);
493 
494 	error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template,
495 	    &zfs_evnodeops);
496 	if (error)
497 		return (error);
498 
499 	error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template,
500 	    &zfs_sharevnodeops);
501 
502 	return (error);
503 }
504 
505 int
506 zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
507 {
508 	zfs_acl_ids_t acl_ids;
509 	vattr_t vattr;
510 	znode_t *sharezp;
511 	vnode_t *vp;
512 	znode_t *zp;
513 	int error;
514 
515 	vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
516 	vattr.va_type = VDIR;
517 	vattr.va_mode = S_IFDIR|0555;
518 	vattr.va_uid = crgetuid(kcred);
519 	vattr.va_gid = crgetgid(kcred);
520 
521 	sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
522 	ASSERT(!POINTER_IS_VALID(sharezp->z_zfsvfs));
523 	sharezp->z_moved = 0;
524 	sharezp->z_unlinked = 0;
525 	sharezp->z_atime_dirty = 0;
526 	sharezp->z_zfsvfs = zfsvfs;
527 	sharezp->z_is_sa = zfsvfs->z_use_sa;
528 	sharezp->z_pflags = 0;
529 
530 	vp = ZTOV(sharezp);
531 	vn_reinit(vp);
532 	vp->v_type = VDIR;
533 
534 	VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
535 	    kcred, NULL, &acl_ids));
536 	zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids);
537 	ASSERT3P(zp, ==, sharezp);
538 	ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */
539 	POINTER_INVALIDATE(&sharezp->z_zfsvfs);
540 	error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
541 	    ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx);
542 	zfsvfs->z_shares_dir = sharezp->z_id;
543 
544 	zfs_acl_ids_free(&acl_ids);
545 	ZTOV(sharezp)->v_count = 0;
546 	sa_handle_destroy(sharezp->z_sa_hdl);
547 	kmem_cache_free(znode_cache, sharezp);
548 
549 	return (error);
550 }
551 
552 /*
553  * define a couple of values we need available
554  * for both 64 and 32 bit environments.
555  */
556 #ifndef NBITSMINOR64
557 #define	NBITSMINOR64	32
558 #endif
559 #ifndef MAXMAJ64
560 #define	MAXMAJ64	0xffffffffUL
561 #endif
562 #ifndef	MAXMIN64
563 #define	MAXMIN64	0xffffffffUL
564 #endif
565 
566 /*
567  * Create special expldev for ZFS private use.
568  * Can't use standard expldev since it doesn't do
569  * what we want.  The standard expldev() takes a
570  * dev32_t in LP64 and expands it to a long dev_t.
571  * We need an interface that takes a dev32_t in ILP32
572  * and expands it to a long dev_t.
573  */
574 static uint64_t
575 zfs_expldev(dev_t dev)
576 {
577 #ifndef _LP64
578 	major_t major = (major_t)dev >> NBITSMINOR32 & MAXMAJ32;
579 	return (((uint64_t)major << NBITSMINOR64) |
580 	    ((minor_t)dev & MAXMIN32));
581 #else
582 	return (dev);
583 #endif
584 }
585 
586 /*
587  * Special cmpldev for ZFS private use.
588  * Can't use standard cmpldev since it takes
589  * a long dev_t and compresses it to dev32_t in
590  * LP64.  We need to do a compaction of a long dev_t
591  * to a dev32_t in ILP32.
592  */
593 dev_t
594 zfs_cmpldev(uint64_t dev)
595 {
596 #ifndef _LP64
597 	minor_t minor = (minor_t)dev & MAXMIN64;
598 	major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64;
599 
600 	if (major > MAXMAJ32 || minor > MAXMIN32)
601 		return (NODEV32);
602 
603 	return (((dev32_t)major << NBITSMINOR32) | minor);
604 #else
605 	return (dev);
606 #endif
607 }
608 
609 static void
610 zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
611     dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
612 {
613 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs));
614 	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)));
615 
616 	mutex_enter(&zp->z_lock);
617 
618 	ASSERT(zp->z_sa_hdl == NULL);
619 	ASSERT(zp->z_acl_cached == NULL);
620 	if (sa_hdl == NULL) {
621 		VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp,
622 		    SA_HDL_SHARED, &zp->z_sa_hdl));
623 	} else {
624 		zp->z_sa_hdl = sa_hdl;
625 		sa_set_userp(sa_hdl, zp);
626 	}
627 
628 	zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
629 
630 	/*
631 	 * Slap on VROOT if we are the root znode
632 	 */
633 	if (zp->z_id == zfsvfs->z_root)
634 		ZTOV(zp)->v_flag |= VROOT;
635 
636 	mutex_exit(&zp->z_lock);
637 	vn_exists(ZTOV(zp));
638 }
639 
640 void
641 zfs_znode_dmu_fini(znode_t *zp)
642 {
643 	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) ||
644 	    zp->z_unlinked ||
645 	    RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock));
646 
647 	sa_handle_destroy(zp->z_sa_hdl);
648 	zp->z_sa_hdl = NULL;
649 }
650 
651 /*
652  * Construct a new znode/vnode and intialize.
653  *
654  * This does not do a call to dmu_set_user() that is
655  * up to the caller to do, in case you don't want to
656  * return the znode
657  */
658 static znode_t *
659 zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
660     dmu_object_type_t obj_type, sa_handle_t *hdl)
661 {
662 	znode_t	*zp;
663 	vnode_t *vp;
664 	uint64_t mode;
665 	uint64_t parent;
666 	uint64_t projid = ZFS_DEFAULT_PROJID;
667 	sa_bulk_attr_t bulk[11];
668 	int count = 0;
669 
670 	zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
671 
672 	ASSERT(zp->z_dirlocks == NULL);
673 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
674 	zp->z_moved = 0;
675 
676 	/*
677 	 * Defer setting z_zfsvfs until the znode is ready to be a candidate for
678 	 * the zfs_znode_move() callback.
679 	 */
680 	zp->z_sa_hdl = NULL;
681 	zp->z_unlinked = 0;
682 	zp->z_atime_dirty = 0;
683 	zp->z_mapcnt = 0;
684 	zp->z_id = db->db_object;
685 	zp->z_blksz = blksz;
686 	zp->z_seq = 0x7A4653;
687 	zp->z_sync_cnt = 0;
688 
689 	vp = ZTOV(zp);
690 	vn_reinit(vp);
691 
692 	zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
693 
694 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
695 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8);
696 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
697 	    &zp->z_size, 8);
698 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
699 	    &zp->z_links, 8);
700 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
701 	    &zp->z_pflags, 8);
702 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8);
703 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
704 	    &zp->z_atime, 16);
705 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
706 	    &zp->z_uid, 8);
707 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
708 	    &zp->z_gid, 8);
709 
710 	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0 ||
711 	    (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
712 	    (zp->z_pflags & ZFS_PROJID) &&
713 	    sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs), &projid, 8) != 0)) {
714 		if (hdl == NULL)
715 			sa_handle_destroy(zp->z_sa_hdl);
716 		kmem_cache_free(znode_cache, zp);
717 		return (NULL);
718 	}
719 
720 	zp->z_projid = projid;
721 	zp->z_mode = mode;
722 	vp->v_vfsp = zfsvfs->z_parent->z_vfs;
723 
724 	vp->v_type = IFTOVT((mode_t)mode);
725 
726 	switch (vp->v_type) {
727 	case VDIR:
728 		if (zp->z_pflags & ZFS_XATTR) {
729 			vn_setops(vp, zfs_xdvnodeops);
730 			vp->v_flag |= V_XATTRDIR;
731 		} else {
732 			vn_setops(vp, zfs_dvnodeops);
733 		}
734 		zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */
735 		break;
736 	case VBLK:
737 	case VCHR:
738 		{
739 			uint64_t rdev;
740 			VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs),
741 			    &rdev, sizeof (rdev)) == 0);
742 
743 			vp->v_rdev = zfs_cmpldev(rdev);
744 		}
745 		/*FALLTHROUGH*/
746 	case VFIFO:
747 	case VSOCK:
748 	case VDOOR:
749 		vn_setops(vp, zfs_fvnodeops);
750 		break;
751 	case VREG:
752 		vp->v_flag |= VMODSORT;
753 		if (parent == zfsvfs->z_shares_dir) {
754 			ASSERT(zp->z_uid == 0 && zp->z_gid == 0);
755 			vn_setops(vp, zfs_sharevnodeops);
756 		} else {
757 			vn_setops(vp, zfs_fvnodeops);
758 		}
759 		break;
760 	case VLNK:
761 		vn_setops(vp, zfs_symvnodeops);
762 		break;
763 	default:
764 		vn_setops(vp, zfs_evnodeops);
765 		break;
766 	}
767 
768 	mutex_enter(&zfsvfs->z_znodes_lock);
769 	list_insert_tail(&zfsvfs->z_all_znodes, zp);
770 	membar_producer();
771 	/*
772 	 * Everything else must be valid before assigning z_zfsvfs makes the
773 	 * znode eligible for zfs_znode_move().
774 	 */
775 	zp->z_zfsvfs = zfsvfs;
776 	mutex_exit(&zfsvfs->z_znodes_lock);
777 
778 	VFS_HOLD(zfsvfs->z_vfs);
779 	return (zp);
780 }
781 
782 static uint64_t empty_xattr;
783 static uint64_t pad[4];
784 static zfs_acl_phys_t acl_phys;
785 /*
786  * Create a new DMU object to hold a zfs znode.
787  *
788  *	IN:	dzp	- parent directory for new znode
789  *		vap	- file attributes for new znode
790  *		tx	- dmu transaction id for zap operations
791  *		cr	- credentials of caller
792  *		flag	- flags:
793  *			  IS_ROOT_NODE	- new object will be root
794  *			  IS_XATTR	- new object is an attribute
795  *		bonuslen - length of bonus buffer
796  *		setaclp  - File/Dir initial ACL
797  *		fuidp	 - Tracks fuid allocation.
798  *
799  *	OUT:	zpp	- allocated znode
800  *
801  */
802 void
803 zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
804     uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
805 {
806 	uint64_t	crtime[2], atime[2], mtime[2], ctime[2];
807 	uint64_t	mode, size, links, parent, pflags;
808 	uint64_t	dzp_pflags = 0;
809 	uint64_t	projid = ZFS_DEFAULT_PROJID;
810 	uint64_t	rdev = 0;
811 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
812 	dmu_buf_t	*db;
813 	timestruc_t	now;
814 	uint64_t	gen, obj;
815 	int		bonuslen;
816 	int		dnodesize;
817 	sa_handle_t	*sa_hdl;
818 	dmu_object_type_t obj_type;
819 	sa_bulk_attr_t	*sa_attrs;
820 	int		cnt = 0;
821 	zfs_acl_locator_cb_t locate = { 0 };
822 
823 	ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
824 
825 	if (zfsvfs->z_replay) {
826 		obj = vap->va_nodeid;
827 		now = vap->va_ctime;		/* see zfs_replay_create() */
828 		gen = vap->va_nblocks;		/* ditto */
829 		dnodesize = vap->va_fsid;	/* ditto */
830 	} else {
831 		obj = 0;
832 		gethrestime(&now);
833 		gen = dmu_tx_get_txg(tx);
834 		dnodesize = dmu_objset_dnodesize(zfsvfs->z_os);
835 	}
836 
837 	if (dnodesize == 0)
838 		dnodesize = DNODE_MIN_SIZE;
839 
840 	obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
841 	bonuslen = (obj_type == DMU_OT_SA) ?
842 	    DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE;
843 
844 	/*
845 	 * Create a new DMU object.
846 	 */
847 	/*
848 	 * There's currently no mechanism for pre-reading the blocks that will
849 	 * be needed to allocate a new object, so we accept the small chance
850 	 * that there will be an i/o error and we will fail one of the
851 	 * assertions below.
852 	 */
853 	if (vap->va_type == VDIR) {
854 		if (zfsvfs->z_replay) {
855 			VERIFY0(zap_create_claim_norm_dnsize(zfsvfs->z_os, obj,
856 			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
857 			    obj_type, bonuslen, dnodesize, tx));
858 		} else {
859 			obj = zap_create_norm_dnsize(zfsvfs->z_os,
860 			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
861 			    obj_type, bonuslen, dnodesize, tx);
862 		}
863 	} else {
864 		if (zfsvfs->z_replay) {
865 			VERIFY0(dmu_object_claim_dnsize(zfsvfs->z_os, obj,
866 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
867 			    obj_type, bonuslen, dnodesize, tx));
868 		} else {
869 			obj = dmu_object_alloc_dnsize(zfsvfs->z_os,
870 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
871 			    obj_type, bonuslen, dnodesize, tx);
872 		}
873 	}
874 
875 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
876 	VERIFY0(sa_buf_hold(zfsvfs->z_os, obj, NULL, &db));
877 
878 	/*
879 	 * If this is the root, fix up the half-initialized parent pointer
880 	 * to reference the just-allocated physical data area.
881 	 */
882 	if (flag & IS_ROOT_NODE) {
883 		dzp->z_id = obj;
884 	}
885 
886 	/*
887 	 * If parent is an xattr, so am I.
888 	 */
889 	if (dzp->z_pflags & ZFS_XATTR) {
890 		flag |= IS_XATTR;
891 	}
892 
893 	if (zfsvfs->z_use_fuids)
894 		pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
895 	else
896 		pflags = 0;
897 
898 	if (vap->va_type == VDIR) {
899 		size = 2;		/* contents ("." and "..") */
900 		links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
901 	} else {
902 		size = links = 0;
903 	}
904 
905 	if (vap->va_type == VBLK || vap->va_type == VCHR) {
906 		rdev = zfs_expldev(vap->va_rdev);
907 	}
908 
909 	parent = dzp->z_id;
910 	mode = acl_ids->z_mode;
911 	if (flag & IS_XATTR)
912 		pflags |= ZFS_XATTR;
913 
914 	if (vap->va_type == VREG || vap->va_type == VDIR) {
915 		/*
916 		 * With ZFS_PROJID flag, we can easily know whether there is
917 		 * project ID stored on disk or not. See zfs_space_delta_cb().
918 		 */
919 		if (obj_type != DMU_OT_ZNODE &&
920 		    dmu_objset_projectquota_enabled(zfsvfs->z_os))
921 			pflags |= ZFS_PROJID;
922 
923 		/*
924 		 * Inherit project ID from parent if required.
925 		 */
926 		projid = zfs_inherit_projid(dzp);
927 		if (dzp->z_pflags & ZFS_PROJINHERIT)
928 			pflags |= ZFS_PROJINHERIT;
929 	}
930 
931 	/*
932 	 * No execs denied will be deterimed when zfs_mode_compute() is called.
933 	 */
934 	pflags |= acl_ids->z_aclp->z_hints &
935 	    (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
936 	    ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
937 
938 	ZFS_TIME_ENCODE(&now, crtime);
939 	ZFS_TIME_ENCODE(&now, ctime);
940 
941 	if (vap->va_mask & AT_ATIME) {
942 		ZFS_TIME_ENCODE(&vap->va_atime, atime);
943 	} else {
944 		ZFS_TIME_ENCODE(&now, atime);
945 	}
946 
947 	if (vap->va_mask & AT_MTIME) {
948 		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
949 	} else {
950 		ZFS_TIME_ENCODE(&now, mtime);
951 	}
952 
953 	/* Now add in all of the "SA" attributes */
954 	VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED,
955 	    &sa_hdl));
956 
957 	/*
958 	 * Setup the array of attributes to be replaced/set on the new file
959 	 *
960 	 * order for  DMU_OT_ZNODE is critical since it needs to be constructed
961 	 * in the old znode_phys_t format.  Don't change this ordering
962 	 */
963 	sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
964 
965 	if (obj_type == DMU_OT_ZNODE) {
966 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
967 		    NULL, &atime, 16);
968 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
969 		    NULL, &mtime, 16);
970 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
971 		    NULL, &ctime, 16);
972 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
973 		    NULL, &crtime, 16);
974 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
975 		    NULL, &gen, 8);
976 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
977 		    NULL, &mode, 8);
978 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
979 		    NULL, &size, 8);
980 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
981 		    NULL, &parent, 8);
982 	} else {
983 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
984 		    NULL, &mode, 8);
985 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
986 		    NULL, &size, 8);
987 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
988 		    NULL, &gen, 8);
989 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs),
990 		    NULL, &acl_ids->z_fuid, 8);
991 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs),
992 		    NULL, &acl_ids->z_fgid, 8);
993 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
994 		    NULL, &parent, 8);
995 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
996 		    NULL, &pflags, 8);
997 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
998 		    NULL, &atime, 16);
999 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
1000 		    NULL, &mtime, 16);
1001 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
1002 		    NULL, &ctime, 16);
1003 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
1004 		    NULL, &crtime, 16);
1005 	}
1006 
1007 	SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
1008 
1009 	if (obj_type == DMU_OT_ZNODE) {
1010 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL,
1011 		    &empty_xattr, 8);
1012 	} else if (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
1013 	    pflags & ZFS_PROJID) {
1014 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PROJID(zfsvfs),
1015 		    NULL, &projid, 8);
1016 	}
1017 	if (obj_type == DMU_OT_ZNODE ||
1018 	    (vap->va_type == VBLK || vap->va_type == VCHR)) {
1019 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs),
1020 		    NULL, &rdev, 8);
1021 
1022 	}
1023 	if (obj_type == DMU_OT_ZNODE) {
1024 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
1025 		    NULL, &pflags, 8);
1026 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
1027 		    &acl_ids->z_fuid, 8);
1028 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
1029 		    &acl_ids->z_fgid, 8);
1030 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad,
1031 		    sizeof (uint64_t) * 4);
1032 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
1033 		    &acl_phys, sizeof (zfs_acl_phys_t));
1034 	} else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
1035 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
1036 		    &acl_ids->z_aclp->z_acl_count, 8);
1037 		locate.cb_aclp = acl_ids->z_aclp;
1038 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs),
1039 		    zfs_acl_data_locator, &locate,
1040 		    acl_ids->z_aclp->z_acl_bytes);
1041 		mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
1042 		    acl_ids->z_fuid, acl_ids->z_fgid);
1043 	}
1044 
1045 	VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
1046 
1047 	if (!(flag & IS_ROOT_NODE)) {
1048 		*zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl);
1049 		ASSERT(*zpp != NULL);
1050 	} else {
1051 		/*
1052 		 * If we are creating the root node, the "parent" we
1053 		 * passed in is the znode for the root.
1054 		 */
1055 		*zpp = dzp;
1056 
1057 		(*zpp)->z_sa_hdl = sa_hdl;
1058 	}
1059 
1060 	(*zpp)->z_pflags = pflags;
1061 	(*zpp)->z_mode = mode;
1062 	(*zpp)->z_dnodesize = dnodesize;
1063 	(*zpp)->z_projid = projid;
1064 
1065 	if (vap->va_mask & AT_XVATTR)
1066 		zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx);
1067 
1068 	if (obj_type == DMU_OT_ZNODE ||
1069 	    acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
1070 		VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
1071 	}
1072 	kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
1073 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
1074 }
1075 
1076 /*
1077  * Update in-core attributes.  It is assumed the caller will be doing an
1078  * sa_bulk_update to push the changes out.
1079  */
1080 void
1081 zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
1082 {
1083 	xoptattr_t *xoap;
1084 
1085 	xoap = xva_getxoptattr(xvap);
1086 	ASSERT(xoap);
1087 
1088 	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
1089 		uint64_t times[2];
1090 		ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
1091 		(void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs),
1092 		    &times, sizeof (times), tx);
1093 		XVA_SET_RTN(xvap, XAT_CREATETIME);
1094 	}
1095 	if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
1096 		ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
1097 		    zp->z_pflags, tx);
1098 		XVA_SET_RTN(xvap, XAT_READONLY);
1099 	}
1100 	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
1101 		ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
1102 		    zp->z_pflags, tx);
1103 		XVA_SET_RTN(xvap, XAT_HIDDEN);
1104 	}
1105 	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
1106 		ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
1107 		    zp->z_pflags, tx);
1108 		XVA_SET_RTN(xvap, XAT_SYSTEM);
1109 	}
1110 	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
1111 		ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
1112 		    zp->z_pflags, tx);
1113 		XVA_SET_RTN(xvap, XAT_ARCHIVE);
1114 	}
1115 	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
1116 		ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
1117 		    zp->z_pflags, tx);
1118 		XVA_SET_RTN(xvap, XAT_IMMUTABLE);
1119 	}
1120 	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
1121 		ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
1122 		    zp->z_pflags, tx);
1123 		XVA_SET_RTN(xvap, XAT_NOUNLINK);
1124 	}
1125 	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
1126 		ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
1127 		    zp->z_pflags, tx);
1128 		XVA_SET_RTN(xvap, XAT_APPENDONLY);
1129 	}
1130 	if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
1131 		ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
1132 		    zp->z_pflags, tx);
1133 		XVA_SET_RTN(xvap, XAT_NODUMP);
1134 	}
1135 	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
1136 		ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
1137 		    zp->z_pflags, tx);
1138 		XVA_SET_RTN(xvap, XAT_OPAQUE);
1139 	}
1140 	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
1141 		ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
1142 		    xoap->xoa_av_quarantined, zp->z_pflags, tx);
1143 		XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
1144 	}
1145 	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
1146 		ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
1147 		    zp->z_pflags, tx);
1148 		XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
1149 	}
1150 	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
1151 		zfs_sa_set_scanstamp(zp, xvap, tx);
1152 		XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
1153 	}
1154 	if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
1155 		ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
1156 		    zp->z_pflags, tx);
1157 		XVA_SET_RTN(xvap, XAT_REPARSE);
1158 	}
1159 	if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
1160 		ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
1161 		    zp->z_pflags, tx);
1162 		XVA_SET_RTN(xvap, XAT_OFFLINE);
1163 	}
1164 	if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
1165 		ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
1166 		    zp->z_pflags, tx);
1167 		XVA_SET_RTN(xvap, XAT_SPARSE);
1168 	}
1169 	if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
1170 		ZFS_ATTR_SET(zp, ZFS_PROJINHERIT, xoap->xoa_projinherit,
1171 		    zp->z_pflags, tx);
1172 		XVA_SET_RTN(xvap, XAT_PROJINHERIT);
1173 	}
1174 }
1175 
1176 int
1177 zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
1178 {
1179 	dmu_object_info_t doi;
1180 	dmu_buf_t	*db;
1181 	znode_t		*zp;
1182 	int err;
1183 	sa_handle_t	*hdl;
1184 
1185 	*zpp = NULL;
1186 
1187 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
1188 
1189 	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
1190 	if (err) {
1191 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1192 		return (err);
1193 	}
1194 
1195 	dmu_object_info_from_db(db, &doi);
1196 	if (doi.doi_bonus_type != DMU_OT_SA &&
1197 	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
1198 	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
1199 	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
1200 		sa_buf_rele(db, NULL);
1201 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1202 		return (SET_ERROR(EINVAL));
1203 	}
1204 
1205 	hdl = dmu_buf_get_user(db);
1206 	if (hdl != NULL) {
1207 		zp  = sa_get_userdata(hdl);
1208 
1209 
1210 		/*
1211 		 * Since "SA" does immediate eviction we
1212 		 * should never find a sa handle that doesn't
1213 		 * know about the znode.
1214 		 */
1215 
1216 		ASSERT3P(zp, !=, NULL);
1217 
1218 		mutex_enter(&zp->z_lock);
1219 		ASSERT3U(zp->z_id, ==, obj_num);
1220 		if (zp->z_unlinked) {
1221 			err = SET_ERROR(ENOENT);
1222 		} else {
1223 			VN_HOLD(ZTOV(zp));
1224 			*zpp = zp;
1225 			err = 0;
1226 		}
1227 		mutex_exit(&zp->z_lock);
1228 		sa_buf_rele(db, NULL);
1229 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1230 		return (err);
1231 	}
1232 
1233 	/*
1234 	 * Not found create new znode/vnode
1235 	 * but only if file exists.
1236 	 *
1237 	 * There is a small window where zfs_vget() could
1238 	 * find this object while a file create is still in
1239 	 * progress.  This is checked for in zfs_znode_alloc()
1240 	 *
1241 	 * if zfs_znode_alloc() fails it will drop the hold on the
1242 	 * bonus buffer.
1243 	 */
1244 	zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size,
1245 	    doi.doi_bonus_type, NULL);
1246 	if (zp == NULL) {
1247 		err = SET_ERROR(ENOENT);
1248 	} else {
1249 		*zpp = zp;
1250 	}
1251 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1252 	return (err);
1253 }
1254 
1255 int
1256 zfs_rezget(znode_t *zp)
1257 {
1258 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1259 	dmu_object_info_t doi;
1260 	dmu_buf_t *db;
1261 	uint64_t obj_num = zp->z_id;
1262 	uint64_t mode;
1263 	sa_bulk_attr_t bulk[10];
1264 	int err;
1265 	int count = 0;
1266 	uint64_t gen;
1267 	uint64_t projid = ZFS_DEFAULT_PROJID;
1268 
1269 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
1270 
1271 	mutex_enter(&zp->z_acl_lock);
1272 	if (zp->z_acl_cached) {
1273 		zfs_acl_free(zp->z_acl_cached);
1274 		zp->z_acl_cached = NULL;
1275 	}
1276 
1277 	mutex_exit(&zp->z_acl_lock);
1278 	ASSERT(zp->z_sa_hdl == NULL);
1279 	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
1280 	if (err) {
1281 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1282 		return (err);
1283 	}
1284 
1285 	dmu_object_info_from_db(db, &doi);
1286 	if (doi.doi_bonus_type != DMU_OT_SA &&
1287 	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
1288 	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
1289 	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
1290 		sa_buf_rele(db, NULL);
1291 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1292 		return (SET_ERROR(EINVAL));
1293 	}
1294 
1295 	zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL);
1296 
1297 	/* reload cached values */
1298 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL,
1299 	    &gen, sizeof (gen));
1300 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
1301 	    &zp->z_size, sizeof (zp->z_size));
1302 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
1303 	    &zp->z_links, sizeof (zp->z_links));
1304 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
1305 	    &zp->z_pflags, sizeof (zp->z_pflags));
1306 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
1307 	    &zp->z_atime, sizeof (zp->z_atime));
1308 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
1309 	    &zp->z_uid, sizeof (zp->z_uid));
1310 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
1311 	    &zp->z_gid, sizeof (zp->z_gid));
1312 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
1313 	    &mode, sizeof (mode));
1314 
1315 	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
1316 		zfs_znode_dmu_fini(zp);
1317 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1318 		return (SET_ERROR(EIO));
1319 	}
1320 
1321 	if (dmu_objset_projectquota_enabled(zfsvfs->z_os)) {
1322 		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs),
1323 		    &projid, 8);
1324 		if (err != 0 && err != ENOENT) {
1325 			zfs_znode_dmu_fini(zp);
1326 			ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1327 			return (SET_ERROR(err));
1328 		}
1329 	}
1330 
1331 	zp->z_projid = projid;
1332 	zp->z_mode = mode;
1333 
1334 	if (gen != zp->z_gen) {
1335 		zfs_znode_dmu_fini(zp);
1336 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1337 		return (SET_ERROR(EIO));
1338 	}
1339 
1340 	zp->z_blksz = doi.doi_data_block_size;
1341 
1342 	/*
1343 	 * If the file has zero links, then it has been unlinked on the send
1344 	 * side and it must be in the received unlinked set.
1345 	 * We call zfs_znode_dmu_fini() now to prevent any accesses to the
1346 	 * stale data and to prevent automatical removal of the file in
1347 	 * zfs_zinactive().  The file will be removed either when it is removed
1348 	 * on the send side and the next incremental stream is received or
1349 	 * when the unlinked set gets processed.
1350 	 */
1351 	zp->z_unlinked = (zp->z_links == 0);
1352 	if (zp->z_unlinked)
1353 		zfs_znode_dmu_fini(zp);
1354 
1355 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1356 
1357 	return (0);
1358 }
1359 
1360 void
1361 zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
1362 {
1363 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1364 	objset_t *os = zfsvfs->z_os;
1365 	uint64_t obj = zp->z_id;
1366 	uint64_t acl_obj = zfs_external_acl(zp);
1367 
1368 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
1369 	if (acl_obj) {
1370 		VERIFY(!zp->z_is_sa);
1371 		VERIFY(0 == dmu_object_free(os, acl_obj, tx));
1372 	}
1373 	VERIFY(0 == dmu_object_free(os, obj, tx));
1374 	zfs_znode_dmu_fini(zp);
1375 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
1376 	zfs_znode_free(zp);
1377 }
1378 
1379 void
1380 zfs_zinactive(znode_t *zp)
1381 {
1382 	vnode_t	*vp = ZTOV(zp);
1383 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1384 	uint64_t z_id = zp->z_id;
1385 
1386 	ASSERT(zp->z_sa_hdl);
1387 
1388 	/*
1389 	 * Don't allow a zfs_zget() while were trying to release this znode
1390 	 */
1391 	ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id);
1392 
1393 	mutex_enter(&zp->z_lock);
1394 	mutex_enter(&vp->v_lock);
1395 	VN_RELE_LOCKED(vp);
1396 	if (vp->v_count > 0 || vn_has_cached_data(vp)) {
1397 		/*
1398 		 * If the hold count is greater than zero, somebody has
1399 		 * obtained a new reference on this znode while we were
1400 		 * processing it here, so we are done.  If we still have
1401 		 * mapped pages then we are also done, since we don't
1402 		 * want to inactivate the znode until the pages get pushed.
1403 		 *
1404 		 * XXX - if vn_has_cached_data(vp) is true, but count == 0,
1405 		 * this seems like it would leave the znode hanging with
1406 		 * no chance to go inactive...
1407 		 */
1408 		mutex_exit(&vp->v_lock);
1409 		mutex_exit(&zp->z_lock);
1410 		ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
1411 		return;
1412 	}
1413 	mutex_exit(&vp->v_lock);
1414 
1415 	/*
1416 	 * If this was the last reference to a file with no links, remove
1417 	 * the file from the file system unless the file system is mounted
1418 	 * read-only.  That can happen, for example, if the file system was
1419 	 * originally read-write, the file was opened, then unlinked and
1420 	 * the file system was made read-only before the file was finally
1421 	 * closed.  The file will remain in the unlinked set.
1422 	 */
1423 	if (zp->z_unlinked) {
1424 		ASSERT(!zfsvfs->z_issnap);
1425 		if ((zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) == 0 &&
1426 		    !zfs_unlink_suspend_progress) {
1427 			mutex_exit(&zp->z_lock);
1428 			ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
1429 			zfs_rmnode(zp);
1430 			return;
1431 		}
1432 	}
1433 
1434 	mutex_exit(&zp->z_lock);
1435 	zfs_znode_dmu_fini(zp);
1436 	ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
1437 	zfs_znode_free(zp);
1438 }
1439 
1440 void
1441 zfs_znode_free(znode_t *zp)
1442 {
1443 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1444 
1445 	vn_invalid(ZTOV(zp));
1446 
1447 	ASSERT(ZTOV(zp)->v_count == 0);
1448 
1449 	mutex_enter(&zfsvfs->z_znodes_lock);
1450 	POINTER_INVALIDATE(&zp->z_zfsvfs);
1451 	list_remove(&zfsvfs->z_all_znodes, zp);
1452 	mutex_exit(&zfsvfs->z_znodes_lock);
1453 
1454 	if (zp->z_acl_cached) {
1455 		zfs_acl_free(zp->z_acl_cached);
1456 		zp->z_acl_cached = NULL;
1457 	}
1458 
1459 	kmem_cache_free(znode_cache, zp);
1460 
1461 	VFS_RELE(zfsvfs->z_vfs);
1462 }
1463 
1464 void
1465 zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
1466     uint64_t ctime[2], boolean_t have_tx)
1467 {
1468 	timestruc_t	now;
1469 
1470 	gethrestime(&now);
1471 
1472 	if (have_tx) {	/* will sa_bulk_update happen really soon? */
1473 		zp->z_atime_dirty = 0;
1474 		zp->z_seq++;
1475 	} else {
1476 		zp->z_atime_dirty = 1;
1477 	}
1478 
1479 	if (flag & AT_ATIME) {
1480 		ZFS_TIME_ENCODE(&now, zp->z_atime);
1481 	}
1482 
1483 	if (flag & AT_MTIME) {
1484 		ZFS_TIME_ENCODE(&now, mtime);
1485 		if (zp->z_zfsvfs->z_use_fuids) {
1486 			zp->z_pflags |= (ZFS_ARCHIVE |
1487 			    ZFS_AV_MODIFIED);
1488 		}
1489 	}
1490 
1491 	if (flag & AT_CTIME) {
1492 		ZFS_TIME_ENCODE(&now, ctime);
1493 		if (zp->z_zfsvfs->z_use_fuids)
1494 			zp->z_pflags |= ZFS_ARCHIVE;
1495 	}
1496 }
1497 
1498 /*
1499  * Grow the block size for a file.
1500  *
1501  *	IN:	zp	- znode of file to free data in.
1502  *		size	- requested block size
1503  *		tx	- open transaction.
1504  *
1505  * NOTE: this function assumes that the znode is write locked.
1506  */
1507 void
1508 zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
1509 {
1510 	int		error;
1511 	u_longlong_t	dummy;
1512 
1513 	if (size <= zp->z_blksz)
1514 		return;
1515 	/*
1516 	 * If the file size is already greater than the current blocksize,
1517 	 * we will not grow.  If there is more than one block in a file,
1518 	 * the blocksize cannot change.
1519 	 */
1520 	if (zp->z_blksz && zp->z_size > zp->z_blksz)
1521 		return;
1522 
1523 	error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id,
1524 	    size, 0, tx);
1525 
1526 	if (error == ENOTSUP)
1527 		return;
1528 	ASSERT0(error);
1529 
1530 	/* What blocksize did we actually get? */
1531 	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
1532 }
1533 
1534 /*
1535  * This is a dummy interface used when pvn_vplist_dirty() should *not*
1536  * be calling back into the fs for a putpage().  E.g.: when truncating
1537  * a file, the pages being "thrown away* don't need to be written out.
1538  */
1539 /* ARGSUSED */
1540 static int
1541 zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
1542     int flags, cred_t *cr)
1543 {
1544 	ASSERT(0);
1545 	return (0);
1546 }
1547 
1548 /*
1549  * Increase the file length
1550  *
1551  *	IN:	zp	- znode of file to free data in.
1552  *		end	- new end-of-file
1553  *
1554  *	RETURN:	0 on success, error code on failure
1555  */
1556 static int
1557 zfs_extend(znode_t *zp, uint64_t end)
1558 {
1559 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1560 	dmu_tx_t *tx;
1561 	locked_range_t *lr;
1562 	uint64_t newblksz;
1563 	int error;
1564 
1565 	/*
1566 	 * We will change zp_size, lock the whole file.
1567 	 */
1568 	lr = rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
1569 
1570 	/*
1571 	 * Nothing to do if file already at desired length.
1572 	 */
1573 	if (end <= zp->z_size) {
1574 		rangelock_exit(lr);
1575 		return (0);
1576 	}
1577 	tx = dmu_tx_create(zfsvfs->z_os);
1578 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1579 	zfs_sa_upgrade_txholds(tx, zp);
1580 	if (end > zp->z_blksz &&
1581 	    (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
1582 		/*
1583 		 * We are growing the file past the current block size.
1584 		 */
1585 		if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) {
1586 			/*
1587 			 * File's blocksize is already larger than the
1588 			 * "recordsize" property.  Only let it grow to
1589 			 * the next power of 2.
1590 			 */
1591 			ASSERT(!ISP2(zp->z_blksz));
1592 			newblksz = MIN(end, 1 << highbit64(zp->z_blksz));
1593 		} else {
1594 			newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz);
1595 		}
1596 		dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
1597 	} else {
1598 		newblksz = 0;
1599 	}
1600 
1601 	error = dmu_tx_assign(tx, TXG_WAIT);
1602 	if (error) {
1603 		dmu_tx_abort(tx);
1604 		rangelock_exit(lr);
1605 		return (error);
1606 	}
1607 
1608 	if (newblksz)
1609 		zfs_grow_blocksize(zp, newblksz, tx);
1610 
1611 	zp->z_size = end;
1612 
1613 	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs),
1614 	    &zp->z_size, sizeof (zp->z_size), tx));
1615 
1616 	rangelock_exit(lr);
1617 
1618 	dmu_tx_commit(tx);
1619 
1620 	return (0);
1621 }
1622 
1623 /*
1624  * Free space in a file.
1625  *
1626  *	IN:	zp	- znode of file to free data in.
1627  *		off	- start of section to free.
1628  *		len	- length of section to free.
1629  *
1630  *	RETURN:	0 on success, error code on failure
1631  */
1632 static int
1633 zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
1634 {
1635 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1636 	locked_range_t *lr;
1637 	int error;
1638 
1639 	/*
1640 	 * Lock the range being freed.
1641 	 */
1642 	lr = rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER);
1643 
1644 	/*
1645 	 * Nothing to do if file already at desired length.
1646 	 */
1647 	if (off >= zp->z_size) {
1648 		rangelock_exit(lr);
1649 		return (0);
1650 	}
1651 
1652 	if (off + len > zp->z_size)
1653 		len = zp->z_size - off;
1654 
1655 	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
1656 
1657 	rangelock_exit(lr);
1658 
1659 	return (error);
1660 }
1661 
1662 /*
1663  * Truncate a file
1664  *
1665  *	IN:	zp	- znode of file to free data in.
1666  *		end	- new end-of-file.
1667  *
1668  *	RETURN:	0 on success, error code on failure
1669  */
1670 static int
1671 zfs_trunc(znode_t *zp, uint64_t end)
1672 {
1673 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1674 	vnode_t *vp = ZTOV(zp);
1675 	dmu_tx_t *tx;
1676 	locked_range_t *lr;
1677 	int error;
1678 	sa_bulk_attr_t bulk[2];
1679 	int count = 0;
1680 
1681 	/*
1682 	 * We will change zp_size, lock the whole file.
1683 	 */
1684 	lr = rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
1685 
1686 	/*
1687 	 * Nothing to do if file already at desired length.
1688 	 */
1689 	if (end >= zp->z_size) {
1690 		rangelock_exit(lr);
1691 		return (0);
1692 	}
1693 
1694 	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end,
1695 	    DMU_OBJECT_END);
1696 	if (error) {
1697 		rangelock_exit(lr);
1698 		return (error);
1699 	}
1700 	tx = dmu_tx_create(zfsvfs->z_os);
1701 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1702 	zfs_sa_upgrade_txholds(tx, zp);
1703 	dmu_tx_mark_netfree(tx);
1704 	error = dmu_tx_assign(tx, TXG_WAIT);
1705 	if (error) {
1706 		dmu_tx_abort(tx);
1707 		rangelock_exit(lr);
1708 		return (error);
1709 	}
1710 
1711 	zp->z_size = end;
1712 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
1713 	    NULL, &zp->z_size, sizeof (zp->z_size));
1714 
1715 	if (end == 0) {
1716 		zp->z_pflags &= ~ZFS_SPARSE;
1717 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
1718 		    NULL, &zp->z_pflags, 8);
1719 	}
1720 	VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
1721 
1722 	dmu_tx_commit(tx);
1723 
1724 	/*
1725 	 * Clear any mapped pages in the truncated region.  This has to
1726 	 * happen outside of the transaction to avoid the possibility of
1727 	 * a deadlock with someone trying to push a page that we are
1728 	 * about to invalidate.
1729 	 */
1730 	if (vn_has_cached_data(vp)) {
1731 		page_t *pp;
1732 		uint64_t start = end & PAGEMASK;
1733 		int poff = end & PAGEOFFSET;
1734 
1735 		if (poff != 0 && (pp = page_lookup(vp, start, SE_SHARED))) {
1736 			/*
1737 			 * We need to zero a partial page.
1738 			 */
1739 			pagezero(pp, poff, PAGESIZE - poff);
1740 			start += PAGESIZE;
1741 			page_unlock(pp);
1742 		}
1743 		error = pvn_vplist_dirty(vp, start, zfs_no_putpage,
1744 		    B_INVAL | B_TRUNC, NULL);
1745 		ASSERT(error == 0);
1746 	}
1747 
1748 	rangelock_exit(lr);
1749 
1750 	return (0);
1751 }
1752 
1753 /*
1754  * Free space in a file
1755  *
1756  *	IN:	zp	- znode of file to free data in.
1757  *		off	- start of range
1758  *		len	- end of range (0 => EOF)
1759  *		flag	- current file open mode flags.
1760  *		log	- TRUE if this action should be logged
1761  *
1762  *	RETURN:	0 on success, error code on failure
1763  */
1764 int
1765 zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
1766 {
1767 	vnode_t *vp = ZTOV(zp);
1768 	dmu_tx_t *tx;
1769 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1770 	zilog_t *zilog = zfsvfs->z_log;
1771 	uint64_t mode;
1772 	uint64_t mtime[2], ctime[2];
1773 	sa_bulk_attr_t bulk[3];
1774 	int count = 0;
1775 	int error;
1776 
1777 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode,
1778 	    sizeof (mode))) != 0)
1779 		return (error);
1780 
1781 	if (off > zp->z_size) {
1782 		error =  zfs_extend(zp, off+len);
1783 		if (error == 0 && log)
1784 			goto log;
1785 		else
1786 			return (error);
1787 	}
1788 
1789 	/*
1790 	 * Check for any locks in the region to be freed.
1791 	 */
1792 
1793 	if (MANDLOCK(vp, (mode_t)mode)) {
1794 		uint64_t length = (len ? len : zp->z_size - off);
1795 		if (error = chklock(vp, FWRITE, off, length, flag, NULL))
1796 			return (error);
1797 	}
1798 
1799 	if (len == 0) {
1800 		error = zfs_trunc(zp, off);
1801 	} else {
1802 		if ((error = zfs_free_range(zp, off, len)) == 0 &&
1803 		    off + len > zp->z_size)
1804 			error = zfs_extend(zp, off+len);
1805 	}
1806 	if (error || !log)
1807 		return (error);
1808 log:
1809 	tx = dmu_tx_create(zfsvfs->z_os);
1810 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1811 	zfs_sa_upgrade_txholds(tx, zp);
1812 	error = dmu_tx_assign(tx, TXG_WAIT);
1813 	if (error) {
1814 		dmu_tx_abort(tx);
1815 		return (error);
1816 	}
1817 
1818 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16);
1819 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16);
1820 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
1821 	    NULL, &zp->z_pflags, 8);
1822 	zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
1823 	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
1824 	ASSERT(error == 0);
1825 
1826 	zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
1827 
1828 	dmu_tx_commit(tx);
1829 	return (0);
1830 }
1831 
1832 void
1833 zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
1834 {
1835 	uint64_t	moid, obj, sa_obj, version;
1836 	uint64_t	sense = ZFS_CASE_SENSITIVE;
1837 	uint64_t	norm = 0;
1838 	nvpair_t	*elem;
1839 	int		error;
1840 	int		i;
1841 	znode_t		*rootzp = NULL;
1842 	zfsvfs_t	*zfsvfs;
1843 	vnode_t		*vp;
1844 	vattr_t		vattr;
1845 	znode_t		*zp;
1846 	zfs_acl_ids_t	acl_ids;
1847 
1848 	/*
1849 	 * First attempt to create master node.
1850 	 */
1851 	/*
1852 	 * In an empty objset, there are no blocks to read and thus
1853 	 * there can be no i/o errors (which we assert below).
1854 	 */
1855 	moid = MASTER_NODE_OBJ;
1856 	error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
1857 	    DMU_OT_NONE, 0, tx);
1858 	ASSERT(error == 0);
1859 
1860 	/*
1861 	 * Set starting attributes.
1862 	 */
1863 	version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
1864 	elem = NULL;
1865 	while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
1866 		/* For the moment we expect all zpl props to be uint64_ts */
1867 		uint64_t val;
1868 		char *name;
1869 
1870 		ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
1871 		VERIFY(nvpair_value_uint64(elem, &val) == 0);
1872 		name = nvpair_name(elem);
1873 		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
1874 			if (val < version)
1875 				version = val;
1876 		} else {
1877 			error = zap_update(os, moid, name, 8, 1, &val, tx);
1878 		}
1879 		ASSERT(error == 0);
1880 		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
1881 			norm = val;
1882 		else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
1883 			sense = val;
1884 	}
1885 	ASSERT(version != 0);
1886 	error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
1887 
1888 	/*
1889 	 * Create zap object used for SA attribute registration
1890 	 */
1891 
1892 	if (version >= ZPL_VERSION_SA) {
1893 		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
1894 		    DMU_OT_NONE, 0, tx);
1895 		error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
1896 		ASSERT(error == 0);
1897 	} else {
1898 		sa_obj = 0;
1899 	}
1900 	/*
1901 	 * Create a delete queue.
1902 	 */
1903 	obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
1904 
1905 	error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
1906 	ASSERT(error == 0);
1907 
1908 	/*
1909 	 * Create root znode.  Create minimal znode/vnode/zfsvfs
1910 	 * to allow zfs_mknode to work.
1911 	 */
1912 	vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
1913 	vattr.va_type = VDIR;
1914 	vattr.va_mode = S_IFDIR|0755;
1915 	vattr.va_uid = crgetuid(cr);
1916 	vattr.va_gid = crgetgid(cr);
1917 
1918 	rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
1919 	ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs));
1920 	rootzp->z_moved = 0;
1921 	rootzp->z_unlinked = 0;
1922 	rootzp->z_atime_dirty = 0;
1923 	rootzp->z_is_sa = USE_SA(version, os);
1924 	rootzp->z_pflags = 0;
1925 
1926 	vp = ZTOV(rootzp);
1927 	vn_reinit(vp);
1928 	vp->v_type = VDIR;
1929 
1930 	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
1931 	zfsvfs->z_os = os;
1932 	zfsvfs->z_parent = zfsvfs;
1933 	zfsvfs->z_version = version;
1934 	zfsvfs->z_use_fuids = USE_FUIDS(version, os);
1935 	zfsvfs->z_use_sa = USE_SA(version, os);
1936 	zfsvfs->z_norm = norm;
1937 
1938 	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
1939 	    &zfsvfs->z_attr_table);
1940 
1941 	ASSERT(error == 0);
1942 
1943 	/*
1944 	 * Fold case on file systems that are always or sometimes case
1945 	 * insensitive.
1946 	 */
1947 	if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
1948 		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
1949 
1950 	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
1951 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
1952 	    offsetof(znode_t, z_link_node));
1953 
1954 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
1955 		mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
1956 
1957 	rootzp->z_zfsvfs = zfsvfs;
1958 	VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
1959 	    cr, NULL, &acl_ids));
1960 	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
1961 	ASSERT3P(zp, ==, rootzp);
1962 	ASSERT(!vn_in_dnlc(ZTOV(rootzp))); /* not valid to move */
1963 	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
1964 	ASSERT(error == 0);
1965 	zfs_acl_ids_free(&acl_ids);
1966 	POINTER_INVALIDATE(&rootzp->z_zfsvfs);
1967 
1968 	ZTOV(rootzp)->v_count = 0;
1969 	sa_handle_destroy(rootzp->z_sa_hdl);
1970 	kmem_cache_free(znode_cache, rootzp);
1971 
1972 	/*
1973 	 * Create shares directory
1974 	 */
1975 
1976 	error = zfs_create_share_dir(zfsvfs, tx);
1977 
1978 	ASSERT(error == 0);
1979 
1980 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
1981 		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
1982 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
1983 }
1984 
1985 #endif /* _KERNEL */
1986 
1987 static int
1988 zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
1989 {
1990 	uint64_t sa_obj = 0;
1991 	int error;
1992 
1993 	error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
1994 	if (error != 0 && error != ENOENT)
1995 		return (error);
1996 
1997 	error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
1998 	return (error);
1999 }
2000 
2001 static int
2002 zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
2003     dmu_buf_t **db, void *tag)
2004 {
2005 	dmu_object_info_t doi;
2006 	int error;
2007 
2008 	if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
2009 		return (error);
2010 
2011 	dmu_object_info_from_db(*db, &doi);
2012 	if ((doi.doi_bonus_type != DMU_OT_SA &&
2013 	    doi.doi_bonus_type != DMU_OT_ZNODE) ||
2014 	    doi.doi_bonus_type == DMU_OT_ZNODE &&
2015 	    doi.doi_bonus_size < sizeof (znode_phys_t)) {
2016 		sa_buf_rele(*db, tag);
2017 		return (SET_ERROR(ENOTSUP));
2018 	}
2019 
2020 	error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
2021 	if (error != 0) {
2022 		sa_buf_rele(*db, tag);
2023 		return (error);
2024 	}
2025 
2026 	return (0);
2027 }
2028 
2029 void
2030 zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
2031 {
2032 	sa_handle_destroy(hdl);
2033 	sa_buf_rele(db, tag);
2034 }
2035 
2036 /*
2037  * Given an object number, return its parent object number and whether
2038  * or not the object is an extended attribute directory.
2039  */
2040 static int
2041 zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
2042     uint64_t *pobjp, int *is_xattrdir)
2043 {
2044 	uint64_t parent;
2045 	uint64_t pflags;
2046 	uint64_t mode;
2047 	uint64_t parent_mode;
2048 	sa_bulk_attr_t bulk[3];
2049 	sa_handle_t *sa_hdl;
2050 	dmu_buf_t *sa_db;
2051 	int count = 0;
2052 	int error;
2053 
2054 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
2055 	    &parent, sizeof (parent));
2056 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
2057 	    &pflags, sizeof (pflags));
2058 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
2059 	    &mode, sizeof (mode));
2060 
2061 	if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
2062 		return (error);
2063 
2064 	/*
2065 	 * When a link is removed its parent pointer is not changed and will
2066 	 * be invalid.  There are two cases where a link is removed but the
2067 	 * file stays around, when it goes to the delete queue and when there
2068 	 * are additional links.
2069 	 */
2070 	error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
2071 	if (error != 0)
2072 		return (error);
2073 
2074 	error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
2075 	zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
2076 	if (error != 0)
2077 		return (error);
2078 
2079 	*is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
2080 
2081 	/*
2082 	 * Extended attributes can be applied to files, directories, etc.
2083 	 * Otherwise the parent must be a directory.
2084 	 */
2085 	if (!*is_xattrdir && !S_ISDIR(parent_mode))
2086 		return (SET_ERROR(EINVAL));
2087 
2088 	*pobjp = parent;
2089 
2090 	return (0);
2091 }
2092 
2093 /*
2094  * Given an object number, return some zpl level statistics
2095  */
2096 static int
2097 zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table,
2098     zfs_stat_t *sb)
2099 {
2100 	sa_bulk_attr_t bulk[4];
2101 	int count = 0;
2102 
2103 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
2104 	    &sb->zs_mode, sizeof (sb->zs_mode));
2105 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
2106 	    &sb->zs_gen, sizeof (sb->zs_gen));
2107 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
2108 	    &sb->zs_links, sizeof (sb->zs_links));
2109 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
2110 	    &sb->zs_ctime, sizeof (sb->zs_ctime));
2111 
2112 	return (sa_bulk_lookup(hdl, bulk, count));
2113 }
2114 
2115 static int
2116 zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
2117     sa_attr_type_t *sa_table, char *buf, int len)
2118 {
2119 	sa_handle_t *sa_hdl;
2120 	sa_handle_t *prevhdl = NULL;
2121 	dmu_buf_t *prevdb = NULL;
2122 	dmu_buf_t *sa_db = NULL;
2123 	char *path = buf + len - 1;
2124 	int error;
2125 
2126 	*path = '\0';
2127 	sa_hdl = hdl;
2128 
2129 	uint64_t deleteq_obj;
2130 	VERIFY0(zap_lookup(osp, MASTER_NODE_OBJ,
2131 	    ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj));
2132 	error = zap_lookup_int(osp, deleteq_obj, obj);
2133 	if (error == 0) {
2134 		return (ESTALE);
2135 	} else if (error != ENOENT) {
2136 		return (error);
2137 	}
2138 	error = 0;
2139 
2140 	for (;;) {
2141 		uint64_t pobj;
2142 		char component[MAXNAMELEN + 2];
2143 		size_t complen;
2144 		int is_xattrdir;
2145 
2146 		if (prevdb)
2147 			zfs_release_sa_handle(prevhdl, prevdb, FTAG);
2148 
2149 		if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
2150 		    &is_xattrdir)) != 0)
2151 			break;
2152 
2153 		if (pobj == obj) {
2154 			if (path[0] != '/')
2155 				*--path = '/';
2156 			break;
2157 		}
2158 
2159 		component[0] = '/';
2160 		if (is_xattrdir) {
2161 			(void) sprintf(component + 1, "<xattrdir>");
2162 		} else {
2163 			error = zap_value_search(osp, pobj, obj,
2164 			    ZFS_DIRENT_OBJ(-1ULL), component + 1);
2165 			if (error != 0)
2166 				break;
2167 		}
2168 
2169 		complen = strlen(component);
2170 		path -= complen;
2171 		ASSERT(path >= buf);
2172 		bcopy(component, path, complen);
2173 		obj = pobj;
2174 
2175 		if (sa_hdl != hdl) {
2176 			prevhdl = sa_hdl;
2177 			prevdb = sa_db;
2178 		}
2179 		error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
2180 		if (error != 0) {
2181 			sa_hdl = prevhdl;
2182 			sa_db = prevdb;
2183 			break;
2184 		}
2185 	}
2186 
2187 	if (sa_hdl != NULL && sa_hdl != hdl) {
2188 		ASSERT(sa_db != NULL);
2189 		zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
2190 	}
2191 
2192 	if (error == 0)
2193 		(void) memmove(buf, path, buf + len - path);
2194 
2195 	return (error);
2196 }
2197 
2198 int
2199 zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
2200 {
2201 	sa_attr_type_t *sa_table;
2202 	sa_handle_t *hdl;
2203 	dmu_buf_t *db;
2204 	int error;
2205 
2206 	error = zfs_sa_setup(osp, &sa_table);
2207 	if (error != 0)
2208 		return (error);
2209 
2210 	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
2211 	if (error != 0)
2212 		return (error);
2213 
2214 	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
2215 
2216 	zfs_release_sa_handle(hdl, db, FTAG);
2217 	return (error);
2218 }
2219 
2220 int
2221 zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
2222     char *buf, int len)
2223 {
2224 	char *path = buf + len - 1;
2225 	sa_attr_type_t *sa_table;
2226 	sa_handle_t *hdl;
2227 	dmu_buf_t *db;
2228 	int error;
2229 
2230 	*path = '\0';
2231 
2232 	error = zfs_sa_setup(osp, &sa_table);
2233 	if (error != 0)
2234 		return (error);
2235 
2236 	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
2237 	if (error != 0)
2238 		return (error);
2239 
2240 	error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
2241 	if (error != 0) {
2242 		zfs_release_sa_handle(hdl, db, FTAG);
2243 		return (error);
2244 	}
2245 
2246 	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
2247 
2248 	zfs_release_sa_handle(hdl, db, FTAG);
2249 	return (error);
2250 }
2251