xref: /illumos-gate/usr/src/uts/common/fs/zfs/zfs_znode.c (revision 5328fc53d11d7151861fa272e4fb0248b8f0e145)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
24  * Copyright (c) 2014 Integros [integros.com]
25  */
26 
27 /* Portions Copyright 2007 Jeremy Teo */
28 
29 #ifdef _KERNEL
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/time.h>
33 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
35 #include <sys/resource.h>
36 #include <sys/mntent.h>
37 #include <sys/mkdev.h>
38 #include <sys/u8_textprep.h>
39 #include <sys/dsl_dataset.h>
40 #include <sys/vfs.h>
41 #include <sys/vfs_opreg.h>
42 #include <sys/vnode.h>
43 #include <sys/file.h>
44 #include <sys/kmem.h>
45 #include <sys/errno.h>
46 #include <sys/unistd.h>
47 #include <sys/mode.h>
48 #include <sys/atomic.h>
49 #include <vm/pvn.h>
50 #include "fs/fs_subr.h"
51 #include <sys/zfs_dir.h>
52 #include <sys/zfs_acl.h>
53 #include <sys/zfs_ioctl.h>
54 #include <sys/zfs_rlock.h>
55 #include <sys/zfs_fuid.h>
56 #include <sys/dnode.h>
57 #include <sys/fs/zfs.h>
58 #include <sys/kidmap.h>
59 #endif /* _KERNEL */
60 
61 #include <sys/dmu.h>
62 #include <sys/dmu_objset.h>
63 #include <sys/dmu_tx.h>
64 #include <sys/refcount.h>
65 #include <sys/stat.h>
66 #include <sys/zap.h>
67 #include <sys/zfs_znode.h>
68 #include <sys/sa.h>
69 #include <sys/zfs_sa.h>
70 #include <sys/zfs_stat.h>
71 
72 #include "zfs_prop.h"
73 #include "zfs_comutil.h"
74 
75 /*
76  * Define ZNODE_STATS to turn on statistic gathering. By default, it is only
77  * turned on when DEBUG is also defined.
78  */
79 #ifdef	DEBUG
80 #define	ZNODE_STATS
81 #endif	/* DEBUG */
82 
83 #ifdef	ZNODE_STATS
84 #define	ZNODE_STAT_ADD(stat)			((stat)++)
85 #else
86 #define	ZNODE_STAT_ADD(stat)			/* nothing */
87 #endif	/* ZNODE_STATS */
88 
89 /*
90  * Functions needed for userland (ie: libzpool) are not put under
91  * #ifdef_KERNEL; the rest of the functions have dependencies
92  * (such as VFS logic) that will not compile easily in userland.
93  */
94 #ifdef _KERNEL
95 /*
96  * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to
97  * be freed before it can be safely accessed.
98  */
99 krwlock_t zfsvfs_lock;
100 
101 static kmem_cache_t *znode_cache = NULL;
102 
103 /*ARGSUSED*/
104 static void
105 znode_evict_error(dmu_buf_t *dbuf, void *user_ptr)
106 {
107 	/*
108 	 * We should never drop all dbuf refs without first clearing
109 	 * the eviction callback.
110 	 */
111 	panic("evicting znode %p\n", user_ptr);
112 }
113 
114 /*
115  * This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on
116  * z_rangelock. It will modify the offset and length of the lock to reflect
117  * znode-specific information, and convert RL_APPEND to RL_WRITER.  This is
118  * called with the rangelock_t's rl_lock held, which avoids races.
119  */
120 static void
121 zfs_rangelock_cb(locked_range_t *new, void *arg)
122 {
123 	znode_t *zp = arg;
124 
125 	/*
126 	 * If in append mode, convert to writer and lock starting at the
127 	 * current end of file.
128 	 */
129 	if (new->lr_type == RL_APPEND) {
130 		new->lr_offset = zp->z_size;
131 		new->lr_type = RL_WRITER;
132 	}
133 
134 	/*
135 	 * If we need to grow the block size then lock the whole file range.
136 	 */
137 	uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length);
138 	if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
139 	    zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) {
140 		new->lr_offset = 0;
141 		new->lr_length = UINT64_MAX;
142 	}
143 }
144 
145 /*ARGSUSED*/
146 static int
147 zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
148 {
149 	znode_t *zp = buf;
150 
151 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
152 
153 	zp->z_vnode = vn_alloc(kmflags);
154 	if (zp->z_vnode == NULL) {
155 		return (-1);
156 	}
157 	ZTOV(zp)->v_data = zp;
158 
159 	list_link_init(&zp->z_link_node);
160 
161 	mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
162 	rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
163 	rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL);
164 	mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
165 
166 	rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp);
167 
168 	zp->z_dirlocks = NULL;
169 	zp->z_acl_cached = NULL;
170 	zp->z_moved = 0;
171 	return (0);
172 }
173 
174 /*ARGSUSED*/
175 static void
176 zfs_znode_cache_destructor(void *buf, void *arg)
177 {
178 	znode_t *zp = buf;
179 
180 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
181 	ASSERT(ZTOV(zp)->v_data == zp);
182 	vn_free(ZTOV(zp));
183 	ASSERT(!list_link_active(&zp->z_link_node));
184 	mutex_destroy(&zp->z_lock);
185 	rw_destroy(&zp->z_parent_lock);
186 	rw_destroy(&zp->z_name_lock);
187 	mutex_destroy(&zp->z_acl_lock);
188 	rangelock_fini(&zp->z_rangelock);
189 
190 	ASSERT(zp->z_dirlocks == NULL);
191 	ASSERT(zp->z_acl_cached == NULL);
192 }
193 
194 #ifdef	ZNODE_STATS
195 static struct {
196 	uint64_t zms_zfsvfs_invalid;
197 	uint64_t zms_zfsvfs_recheck1;
198 	uint64_t zms_zfsvfs_unmounted;
199 	uint64_t zms_zfsvfs_recheck2;
200 	uint64_t zms_obj_held;
201 	uint64_t zms_vnode_locked;
202 	uint64_t zms_not_only_dnlc;
203 } znode_move_stats;
204 #endif	/* ZNODE_STATS */
205 
206 static void
207 zfs_znode_move_impl(znode_t *ozp, znode_t *nzp)
208 {
209 	vnode_t *vp;
210 
211 	/* Copy fields. */
212 	nzp->z_zfsvfs = ozp->z_zfsvfs;
213 
214 	/* Swap vnodes. */
215 	vp = nzp->z_vnode;
216 	nzp->z_vnode = ozp->z_vnode;
217 	ozp->z_vnode = vp; /* let destructor free the overwritten vnode */
218 	ZTOV(ozp)->v_data = ozp;
219 	ZTOV(nzp)->v_data = nzp;
220 
221 	nzp->z_id = ozp->z_id;
222 	ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */
223 	nzp->z_unlinked = ozp->z_unlinked;
224 	nzp->z_atime_dirty = ozp->z_atime_dirty;
225 	nzp->z_zn_prefetch = ozp->z_zn_prefetch;
226 	nzp->z_blksz = ozp->z_blksz;
227 	nzp->z_seq = ozp->z_seq;
228 	nzp->z_mapcnt = ozp->z_mapcnt;
229 	nzp->z_gen = ozp->z_gen;
230 	nzp->z_sync_cnt = ozp->z_sync_cnt;
231 	nzp->z_is_sa = ozp->z_is_sa;
232 	nzp->z_sa_hdl = ozp->z_sa_hdl;
233 	bcopy(ozp->z_atime, nzp->z_atime, sizeof (uint64_t) * 2);
234 	nzp->z_links = ozp->z_links;
235 	nzp->z_size = ozp->z_size;
236 	nzp->z_pflags = ozp->z_pflags;
237 	nzp->z_uid = ozp->z_uid;
238 	nzp->z_gid = ozp->z_gid;
239 	nzp->z_mode = ozp->z_mode;
240 
241 	/*
242 	 * Since this is just an idle znode and kmem is already dealing with
243 	 * memory pressure, release any cached ACL.
244 	 */
245 	if (ozp->z_acl_cached) {
246 		zfs_acl_free(ozp->z_acl_cached);
247 		ozp->z_acl_cached = NULL;
248 	}
249 
250 	sa_set_userp(nzp->z_sa_hdl, nzp);
251 
252 	/*
253 	 * Invalidate the original znode by clearing fields that provide a
254 	 * pointer back to the znode. Set the low bit of the vfs pointer to
255 	 * ensure that zfs_znode_move() recognizes the znode as invalid in any
256 	 * subsequent callback.
257 	 */
258 	ozp->z_sa_hdl = NULL;
259 	POINTER_INVALIDATE(&ozp->z_zfsvfs);
260 
261 	/*
262 	 * Mark the znode.
263 	 */
264 	nzp->z_moved = 1;
265 	ozp->z_moved = (uint8_t)-1;
266 }
267 
268 /*ARGSUSED*/
269 static kmem_cbrc_t
270 zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg)
271 {
272 	znode_t *ozp = buf, *nzp = newbuf;
273 	zfsvfs_t *zfsvfs;
274 	vnode_t *vp;
275 
276 	/*
277 	 * The znode is on the file system's list of known znodes if the vfs
278 	 * pointer is valid. We set the low bit of the vfs pointer when freeing
279 	 * the znode to invalidate it, and the memory patterns written by kmem
280 	 * (baddcafe and deadbeef) set at least one of the two low bits. A newly
281 	 * created znode sets the vfs pointer last of all to indicate that the
282 	 * znode is known and in a valid state to be moved by this function.
283 	 */
284 	zfsvfs = ozp->z_zfsvfs;
285 	if (!POINTER_IS_VALID(zfsvfs)) {
286 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid);
287 		return (KMEM_CBRC_DONT_KNOW);
288 	}
289 
290 	/*
291 	 * Close a small window in which it's possible that the filesystem could
292 	 * be unmounted and freed, and zfsvfs, though valid in the previous
293 	 * statement, could point to unrelated memory by the time we try to
294 	 * prevent the filesystem from being unmounted.
295 	 */
296 	rw_enter(&zfsvfs_lock, RW_WRITER);
297 	if (zfsvfs != ozp->z_zfsvfs) {
298 		rw_exit(&zfsvfs_lock);
299 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1);
300 		return (KMEM_CBRC_DONT_KNOW);
301 	}
302 
303 	/*
304 	 * If the znode is still valid, then so is the file system. We know that
305 	 * no valid file system can be freed while we hold zfsvfs_lock, so we
306 	 * can safely ensure that the filesystem is not and will not be
307 	 * unmounted. The next statement is equivalent to ZFS_ENTER().
308 	 */
309 	rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
310 	if (zfsvfs->z_unmounted) {
311 		ZFS_EXIT(zfsvfs);
312 		rw_exit(&zfsvfs_lock);
313 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted);
314 		return (KMEM_CBRC_DONT_KNOW);
315 	}
316 	rw_exit(&zfsvfs_lock);
317 
318 	mutex_enter(&zfsvfs->z_znodes_lock);
319 	/*
320 	 * Recheck the vfs pointer in case the znode was removed just before
321 	 * acquiring the lock.
322 	 */
323 	if (zfsvfs != ozp->z_zfsvfs) {
324 		mutex_exit(&zfsvfs->z_znodes_lock);
325 		ZFS_EXIT(zfsvfs);
326 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2);
327 		return (KMEM_CBRC_DONT_KNOW);
328 	}
329 
330 	/*
331 	 * At this point we know that as long as we hold z_znodes_lock, the
332 	 * znode cannot be freed and fields within the znode can be safely
333 	 * accessed. Now, prevent a race with zfs_zget().
334 	 */
335 	if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) {
336 		mutex_exit(&zfsvfs->z_znodes_lock);
337 		ZFS_EXIT(zfsvfs);
338 		ZNODE_STAT_ADD(znode_move_stats.zms_obj_held);
339 		return (KMEM_CBRC_LATER);
340 	}
341 
342 	vp = ZTOV(ozp);
343 	if (mutex_tryenter(&vp->v_lock) == 0) {
344 		ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
345 		mutex_exit(&zfsvfs->z_znodes_lock);
346 		ZFS_EXIT(zfsvfs);
347 		ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked);
348 		return (KMEM_CBRC_LATER);
349 	}
350 
351 	/* Only move znodes that are referenced _only_ by the DNLC. */
352 	if (vp->v_count != 1 || !vn_in_dnlc(vp)) {
353 		mutex_exit(&vp->v_lock);
354 		ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
355 		mutex_exit(&zfsvfs->z_znodes_lock);
356 		ZFS_EXIT(zfsvfs);
357 		ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc);
358 		return (KMEM_CBRC_LATER);
359 	}
360 
361 	/*
362 	 * The znode is known and in a valid state to move. We're holding the
363 	 * locks needed to execute the critical section.
364 	 */
365 	zfs_znode_move_impl(ozp, nzp);
366 	mutex_exit(&vp->v_lock);
367 	ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
368 
369 	list_link_replace(&ozp->z_link_node, &nzp->z_link_node);
370 	mutex_exit(&zfsvfs->z_znodes_lock);
371 	ZFS_EXIT(zfsvfs);
372 
373 	return (KMEM_CBRC_YES);
374 }
375 
376 void
377 zfs_znode_init(void)
378 {
379 	/*
380 	 * Initialize zcache
381 	 */
382 	rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL);
383 	ASSERT(znode_cache == NULL);
384 	znode_cache = kmem_cache_create("zfs_znode_cache",
385 	    sizeof (znode_t), 0, zfs_znode_cache_constructor,
386 	    zfs_znode_cache_destructor, NULL, NULL, NULL, 0);
387 	kmem_cache_set_move(znode_cache, zfs_znode_move);
388 }
389 
390 void
391 zfs_znode_fini(void)
392 {
393 	/*
394 	 * Cleanup vfs & vnode ops
395 	 */
396 	zfs_remove_op_tables();
397 
398 	/*
399 	 * Cleanup zcache
400 	 */
401 	if (znode_cache)
402 		kmem_cache_destroy(znode_cache);
403 	znode_cache = NULL;
404 	rw_destroy(&zfsvfs_lock);
405 }
406 
407 struct vnodeops *zfs_dvnodeops;
408 struct vnodeops *zfs_fvnodeops;
409 struct vnodeops *zfs_symvnodeops;
410 struct vnodeops *zfs_xdvnodeops;
411 struct vnodeops *zfs_evnodeops;
412 struct vnodeops *zfs_sharevnodeops;
413 
414 void
415 zfs_remove_op_tables()
416 {
417 	/*
418 	 * Remove vfs ops
419 	 */
420 	ASSERT(zfsfstype);
421 	(void) vfs_freevfsops_by_type(zfsfstype);
422 	zfsfstype = 0;
423 
424 	/*
425 	 * Remove vnode ops
426 	 */
427 	if (zfs_dvnodeops)
428 		vn_freevnodeops(zfs_dvnodeops);
429 	if (zfs_fvnodeops)
430 		vn_freevnodeops(zfs_fvnodeops);
431 	if (zfs_symvnodeops)
432 		vn_freevnodeops(zfs_symvnodeops);
433 	if (zfs_xdvnodeops)
434 		vn_freevnodeops(zfs_xdvnodeops);
435 	if (zfs_evnodeops)
436 		vn_freevnodeops(zfs_evnodeops);
437 	if (zfs_sharevnodeops)
438 		vn_freevnodeops(zfs_sharevnodeops);
439 
440 	zfs_dvnodeops = NULL;
441 	zfs_fvnodeops = NULL;
442 	zfs_symvnodeops = NULL;
443 	zfs_xdvnodeops = NULL;
444 	zfs_evnodeops = NULL;
445 	zfs_sharevnodeops = NULL;
446 }
447 
448 extern const fs_operation_def_t zfs_dvnodeops_template[];
449 extern const fs_operation_def_t zfs_fvnodeops_template[];
450 extern const fs_operation_def_t zfs_xdvnodeops_template[];
451 extern const fs_operation_def_t zfs_symvnodeops_template[];
452 extern const fs_operation_def_t zfs_evnodeops_template[];
453 extern const fs_operation_def_t zfs_sharevnodeops_template[];
454 
455 int
456 zfs_create_op_tables()
457 {
458 	int error;
459 
460 	/*
461 	 * zfs_dvnodeops can be set if mod_remove() calls mod_installfs()
462 	 * due to a failure to remove the the 2nd modlinkage (zfs_modldrv).
463 	 * In this case we just return as the ops vectors are already set up.
464 	 */
465 	if (zfs_dvnodeops)
466 		return (0);
467 
468 	error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template,
469 	    &zfs_dvnodeops);
470 	if (error)
471 		return (error);
472 
473 	error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template,
474 	    &zfs_fvnodeops);
475 	if (error)
476 		return (error);
477 
478 	error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template,
479 	    &zfs_symvnodeops);
480 	if (error)
481 		return (error);
482 
483 	error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template,
484 	    &zfs_xdvnodeops);
485 	if (error)
486 		return (error);
487 
488 	error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template,
489 	    &zfs_evnodeops);
490 	if (error)
491 		return (error);
492 
493 	error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template,
494 	    &zfs_sharevnodeops);
495 
496 	return (error);
497 }
498 
499 int
500 zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
501 {
502 	zfs_acl_ids_t acl_ids;
503 	vattr_t vattr;
504 	znode_t *sharezp;
505 	vnode_t *vp;
506 	znode_t *zp;
507 	int error;
508 
509 	vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
510 	vattr.va_type = VDIR;
511 	vattr.va_mode = S_IFDIR|0555;
512 	vattr.va_uid = crgetuid(kcred);
513 	vattr.va_gid = crgetgid(kcred);
514 
515 	sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
516 	ASSERT(!POINTER_IS_VALID(sharezp->z_zfsvfs));
517 	sharezp->z_moved = 0;
518 	sharezp->z_unlinked = 0;
519 	sharezp->z_atime_dirty = 0;
520 	sharezp->z_zfsvfs = zfsvfs;
521 	sharezp->z_is_sa = zfsvfs->z_use_sa;
522 	sharezp->z_pflags = 0;
523 
524 	vp = ZTOV(sharezp);
525 	vn_reinit(vp);
526 	vp->v_type = VDIR;
527 
528 	VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
529 	    kcred, NULL, &acl_ids));
530 	zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids);
531 	ASSERT3P(zp, ==, sharezp);
532 	ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */
533 	POINTER_INVALIDATE(&sharezp->z_zfsvfs);
534 	error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
535 	    ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx);
536 	zfsvfs->z_shares_dir = sharezp->z_id;
537 
538 	zfs_acl_ids_free(&acl_ids);
539 	ZTOV(sharezp)->v_count = 0;
540 	sa_handle_destroy(sharezp->z_sa_hdl);
541 	kmem_cache_free(znode_cache, sharezp);
542 
543 	return (error);
544 }
545 
546 /*
547  * define a couple of values we need available
548  * for both 64 and 32 bit environments.
549  */
550 #ifndef NBITSMINOR64
551 #define	NBITSMINOR64	32
552 #endif
553 #ifndef MAXMAJ64
554 #define	MAXMAJ64	0xffffffffUL
555 #endif
556 #ifndef	MAXMIN64
557 #define	MAXMIN64	0xffffffffUL
558 #endif
559 
560 /*
561  * Create special expldev for ZFS private use.
562  * Can't use standard expldev since it doesn't do
563  * what we want.  The standard expldev() takes a
564  * dev32_t in LP64 and expands it to a long dev_t.
565  * We need an interface that takes a dev32_t in ILP32
566  * and expands it to a long dev_t.
567  */
568 static uint64_t
569 zfs_expldev(dev_t dev)
570 {
571 #ifndef _LP64
572 	major_t major = (major_t)dev >> NBITSMINOR32 & MAXMAJ32;
573 	return (((uint64_t)major << NBITSMINOR64) |
574 	    ((minor_t)dev & MAXMIN32));
575 #else
576 	return (dev);
577 #endif
578 }
579 
580 /*
581  * Special cmpldev for ZFS private use.
582  * Can't use standard cmpldev since it takes
583  * a long dev_t and compresses it to dev32_t in
584  * LP64.  We need to do a compaction of a long dev_t
585  * to a dev32_t in ILP32.
586  */
587 dev_t
588 zfs_cmpldev(uint64_t dev)
589 {
590 #ifndef _LP64
591 	minor_t minor = (minor_t)dev & MAXMIN64;
592 	major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64;
593 
594 	if (major > MAXMAJ32 || minor > MAXMIN32)
595 		return (NODEV32);
596 
597 	return (((dev32_t)major << NBITSMINOR32) | minor);
598 #else
599 	return (dev);
600 #endif
601 }
602 
603 static void
604 zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
605     dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
606 {
607 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs));
608 	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)));
609 
610 	mutex_enter(&zp->z_lock);
611 
612 	ASSERT(zp->z_sa_hdl == NULL);
613 	ASSERT(zp->z_acl_cached == NULL);
614 	if (sa_hdl == NULL) {
615 		VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp,
616 		    SA_HDL_SHARED, &zp->z_sa_hdl));
617 	} else {
618 		zp->z_sa_hdl = sa_hdl;
619 		sa_set_userp(sa_hdl, zp);
620 	}
621 
622 	zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
623 
624 	/*
625 	 * Slap on VROOT if we are the root znode
626 	 */
627 	if (zp->z_id == zfsvfs->z_root)
628 		ZTOV(zp)->v_flag |= VROOT;
629 
630 	mutex_exit(&zp->z_lock);
631 	vn_exists(ZTOV(zp));
632 }
633 
634 void
635 zfs_znode_dmu_fini(znode_t *zp)
636 {
637 	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) ||
638 	    zp->z_unlinked ||
639 	    RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock));
640 
641 	sa_handle_destroy(zp->z_sa_hdl);
642 	zp->z_sa_hdl = NULL;
643 }
644 
645 /*
646  * Construct a new znode/vnode and intialize.
647  *
648  * This does not do a call to dmu_set_user() that is
649  * up to the caller to do, in case you don't want to
650  * return the znode
651  */
652 static znode_t *
653 zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
654     dmu_object_type_t obj_type, sa_handle_t *hdl)
655 {
656 	znode_t	*zp;
657 	vnode_t *vp;
658 	uint64_t mode;
659 	uint64_t parent;
660 	uint64_t projid = ZFS_DEFAULT_PROJID;
661 	sa_bulk_attr_t bulk[11];
662 	int count = 0;
663 
664 	zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
665 
666 	ASSERT(zp->z_dirlocks == NULL);
667 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
668 	zp->z_moved = 0;
669 
670 	/*
671 	 * Defer setting z_zfsvfs until the znode is ready to be a candidate for
672 	 * the zfs_znode_move() callback.
673 	 */
674 	zp->z_sa_hdl = NULL;
675 	zp->z_unlinked = 0;
676 	zp->z_atime_dirty = 0;
677 	zp->z_mapcnt = 0;
678 	zp->z_id = db->db_object;
679 	zp->z_blksz = blksz;
680 	zp->z_seq = 0x7A4653;
681 	zp->z_sync_cnt = 0;
682 
683 	vp = ZTOV(zp);
684 	vn_reinit(vp);
685 
686 	zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
687 
688 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
689 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8);
690 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
691 	    &zp->z_size, 8);
692 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
693 	    &zp->z_links, 8);
694 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
695 	    &zp->z_pflags, 8);
696 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8);
697 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
698 	    &zp->z_atime, 16);
699 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
700 	    &zp->z_uid, 8);
701 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
702 	    &zp->z_gid, 8);
703 
704 	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0 ||
705 	    (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
706 	    (zp->z_pflags & ZFS_PROJID) &&
707 	    sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs), &projid, 8) != 0)) {
708 		if (hdl == NULL)
709 			sa_handle_destroy(zp->z_sa_hdl);
710 		kmem_cache_free(znode_cache, zp);
711 		return (NULL);
712 	}
713 
714 	zp->z_projid = projid;
715 	zp->z_mode = mode;
716 	vp->v_vfsp = zfsvfs->z_parent->z_vfs;
717 
718 	vp->v_type = IFTOVT((mode_t)mode);
719 
720 	switch (vp->v_type) {
721 	case VDIR:
722 		if (zp->z_pflags & ZFS_XATTR) {
723 			vn_setops(vp, zfs_xdvnodeops);
724 			vp->v_flag |= V_XATTRDIR;
725 		} else {
726 			vn_setops(vp, zfs_dvnodeops);
727 		}
728 		zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */
729 		break;
730 	case VBLK:
731 	case VCHR:
732 		{
733 			uint64_t rdev;
734 			VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs),
735 			    &rdev, sizeof (rdev)) == 0);
736 
737 			vp->v_rdev = zfs_cmpldev(rdev);
738 		}
739 		/*FALLTHROUGH*/
740 	case VFIFO:
741 	case VSOCK:
742 	case VDOOR:
743 		vn_setops(vp, zfs_fvnodeops);
744 		break;
745 	case VREG:
746 		vp->v_flag |= VMODSORT;
747 		if (parent == zfsvfs->z_shares_dir) {
748 			ASSERT(zp->z_uid == 0 && zp->z_gid == 0);
749 			vn_setops(vp, zfs_sharevnodeops);
750 		} else {
751 			vn_setops(vp, zfs_fvnodeops);
752 		}
753 		break;
754 	case VLNK:
755 		vn_setops(vp, zfs_symvnodeops);
756 		break;
757 	default:
758 		vn_setops(vp, zfs_evnodeops);
759 		break;
760 	}
761 
762 	mutex_enter(&zfsvfs->z_znodes_lock);
763 	list_insert_tail(&zfsvfs->z_all_znodes, zp);
764 	membar_producer();
765 	/*
766 	 * Everything else must be valid before assigning z_zfsvfs makes the
767 	 * znode eligible for zfs_znode_move().
768 	 */
769 	zp->z_zfsvfs = zfsvfs;
770 	mutex_exit(&zfsvfs->z_znodes_lock);
771 
772 	VFS_HOLD(zfsvfs->z_vfs);
773 	return (zp);
774 }
775 
776 static uint64_t empty_xattr;
777 static uint64_t pad[4];
778 static zfs_acl_phys_t acl_phys;
779 /*
780  * Create a new DMU object to hold a zfs znode.
781  *
782  *	IN:	dzp	- parent directory for new znode
783  *		vap	- file attributes for new znode
784  *		tx	- dmu transaction id for zap operations
785  *		cr	- credentials of caller
786  *		flag	- flags:
787  *			  IS_ROOT_NODE	- new object will be root
788  *			  IS_XATTR	- new object is an attribute
789  *		bonuslen - length of bonus buffer
790  *		setaclp  - File/Dir initial ACL
791  *		fuidp	 - Tracks fuid allocation.
792  *
793  *	OUT:	zpp	- allocated znode
794  *
795  */
796 void
797 zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
798     uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
799 {
800 	uint64_t	crtime[2], atime[2], mtime[2], ctime[2];
801 	uint64_t	mode, size, links, parent, pflags;
802 	uint64_t	dzp_pflags = 0;
803 	uint64_t	projid = ZFS_DEFAULT_PROJID;
804 	uint64_t	rdev = 0;
805 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
806 	dmu_buf_t	*db;
807 	timestruc_t	now;
808 	uint64_t	gen, obj;
809 	int		bonuslen;
810 	int		dnodesize;
811 	sa_handle_t	*sa_hdl;
812 	dmu_object_type_t obj_type;
813 	sa_bulk_attr_t	*sa_attrs;
814 	int		cnt = 0;
815 	zfs_acl_locator_cb_t locate = { 0 };
816 
817 	ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
818 
819 	if (zfsvfs->z_replay) {
820 		obj = vap->va_nodeid;
821 		now = vap->va_ctime;		/* see zfs_replay_create() */
822 		gen = vap->va_nblocks;		/* ditto */
823 		dnodesize = vap->va_fsid;	/* ditto */
824 	} else {
825 		obj = 0;
826 		gethrestime(&now);
827 		gen = dmu_tx_get_txg(tx);
828 		dnodesize = dmu_objset_dnodesize(zfsvfs->z_os);
829 	}
830 
831 	if (dnodesize == 0)
832 		dnodesize = DNODE_MIN_SIZE;
833 
834 	obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
835 	bonuslen = (obj_type == DMU_OT_SA) ?
836 	    DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE;
837 
838 	/*
839 	 * Create a new DMU object.
840 	 */
841 	/*
842 	 * There's currently no mechanism for pre-reading the blocks that will
843 	 * be needed to allocate a new object, so we accept the small chance
844 	 * that there will be an i/o error and we will fail one of the
845 	 * assertions below.
846 	 */
847 	if (vap->va_type == VDIR) {
848 		if (zfsvfs->z_replay) {
849 			VERIFY0(zap_create_claim_norm_dnsize(zfsvfs->z_os, obj,
850 			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
851 			    obj_type, bonuslen, dnodesize, tx));
852 		} else {
853 			obj = zap_create_norm_dnsize(zfsvfs->z_os,
854 			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
855 			    obj_type, bonuslen, dnodesize, tx);
856 		}
857 	} else {
858 		if (zfsvfs->z_replay) {
859 			VERIFY0(dmu_object_claim_dnsize(zfsvfs->z_os, obj,
860 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
861 			    obj_type, bonuslen, dnodesize, tx));
862 		} else {
863 			obj = dmu_object_alloc_dnsize(zfsvfs->z_os,
864 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
865 			    obj_type, bonuslen, dnodesize, tx);
866 		}
867 	}
868 
869 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
870 	VERIFY0(sa_buf_hold(zfsvfs->z_os, obj, NULL, &db));
871 
872 	/*
873 	 * If this is the root, fix up the half-initialized parent pointer
874 	 * to reference the just-allocated physical data area.
875 	 */
876 	if (flag & IS_ROOT_NODE) {
877 		dzp->z_id = obj;
878 	}
879 
880 	/*
881 	 * If parent is an xattr, so am I.
882 	 */
883 	if (dzp->z_pflags & ZFS_XATTR) {
884 		flag |= IS_XATTR;
885 	}
886 
887 	if (zfsvfs->z_use_fuids)
888 		pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
889 	else
890 		pflags = 0;
891 
892 	if (vap->va_type == VDIR) {
893 		size = 2;		/* contents ("." and "..") */
894 		links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
895 	} else {
896 		size = links = 0;
897 	}
898 
899 	if (vap->va_type == VBLK || vap->va_type == VCHR) {
900 		rdev = zfs_expldev(vap->va_rdev);
901 	}
902 
903 	parent = dzp->z_id;
904 	mode = acl_ids->z_mode;
905 	if (flag & IS_XATTR)
906 		pflags |= ZFS_XATTR;
907 
908 	if (vap->va_type == VREG || vap->va_type == VDIR) {
909 		/*
910 		 * With ZFS_PROJID flag, we can easily know whether there is
911 		 * project ID stored on disk or not. See zfs_space_delta_cb().
912 		 */
913 		if (obj_type != DMU_OT_ZNODE &&
914 		    dmu_objset_projectquota_enabled(zfsvfs->z_os))
915 			pflags |= ZFS_PROJID;
916 
917 		/*
918 		 * Inherit project ID from parent if required.
919 		 */
920 		projid = zfs_inherit_projid(dzp);
921 		if (dzp->z_pflags & ZFS_PROJINHERIT)
922 			pflags |= ZFS_PROJINHERIT;
923 	}
924 
925 	/*
926 	 * No execs denied will be deterimed when zfs_mode_compute() is called.
927 	 */
928 	pflags |= acl_ids->z_aclp->z_hints &
929 	    (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
930 	    ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
931 
932 	ZFS_TIME_ENCODE(&now, crtime);
933 	ZFS_TIME_ENCODE(&now, ctime);
934 
935 	if (vap->va_mask & AT_ATIME) {
936 		ZFS_TIME_ENCODE(&vap->va_atime, atime);
937 	} else {
938 		ZFS_TIME_ENCODE(&now, atime);
939 	}
940 
941 	if (vap->va_mask & AT_MTIME) {
942 		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
943 	} else {
944 		ZFS_TIME_ENCODE(&now, mtime);
945 	}
946 
947 	/* Now add in all of the "SA" attributes */
948 	VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED,
949 	    &sa_hdl));
950 
951 	/*
952 	 * Setup the array of attributes to be replaced/set on the new file
953 	 *
954 	 * order for  DMU_OT_ZNODE is critical since it needs to be constructed
955 	 * in the old znode_phys_t format.  Don't change this ordering
956 	 */
957 	sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
958 
959 	if (obj_type == DMU_OT_ZNODE) {
960 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
961 		    NULL, &atime, 16);
962 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
963 		    NULL, &mtime, 16);
964 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
965 		    NULL, &ctime, 16);
966 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
967 		    NULL, &crtime, 16);
968 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
969 		    NULL, &gen, 8);
970 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
971 		    NULL, &mode, 8);
972 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
973 		    NULL, &size, 8);
974 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
975 		    NULL, &parent, 8);
976 	} else {
977 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
978 		    NULL, &mode, 8);
979 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
980 		    NULL, &size, 8);
981 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
982 		    NULL, &gen, 8);
983 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs),
984 		    NULL, &acl_ids->z_fuid, 8);
985 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs),
986 		    NULL, &acl_ids->z_fgid, 8);
987 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
988 		    NULL, &parent, 8);
989 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
990 		    NULL, &pflags, 8);
991 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
992 		    NULL, &atime, 16);
993 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
994 		    NULL, &mtime, 16);
995 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
996 		    NULL, &ctime, 16);
997 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
998 		    NULL, &crtime, 16);
999 	}
1000 
1001 	SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
1002 
1003 	if (obj_type == DMU_OT_ZNODE) {
1004 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL,
1005 		    &empty_xattr, 8);
1006 	} else if (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
1007 	    pflags & ZFS_PROJID) {
1008 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PROJID(zfsvfs),
1009 		    NULL, &projid, 8);
1010 	}
1011 	if (obj_type == DMU_OT_ZNODE ||
1012 	    (vap->va_type == VBLK || vap->va_type == VCHR)) {
1013 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs),
1014 		    NULL, &rdev, 8);
1015 
1016 	}
1017 	if (obj_type == DMU_OT_ZNODE) {
1018 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
1019 		    NULL, &pflags, 8);
1020 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
1021 		    &acl_ids->z_fuid, 8);
1022 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
1023 		    &acl_ids->z_fgid, 8);
1024 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad,
1025 		    sizeof (uint64_t) * 4);
1026 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
1027 		    &acl_phys, sizeof (zfs_acl_phys_t));
1028 	} else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
1029 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
1030 		    &acl_ids->z_aclp->z_acl_count, 8);
1031 		locate.cb_aclp = acl_ids->z_aclp;
1032 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs),
1033 		    zfs_acl_data_locator, &locate,
1034 		    acl_ids->z_aclp->z_acl_bytes);
1035 		mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
1036 		    acl_ids->z_fuid, acl_ids->z_fgid);
1037 	}
1038 
1039 	VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
1040 
1041 	if (!(flag & IS_ROOT_NODE)) {
1042 		*zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl);
1043 		ASSERT(*zpp != NULL);
1044 	} else {
1045 		/*
1046 		 * If we are creating the root node, the "parent" we
1047 		 * passed in is the znode for the root.
1048 		 */
1049 		*zpp = dzp;
1050 
1051 		(*zpp)->z_sa_hdl = sa_hdl;
1052 	}
1053 
1054 	(*zpp)->z_pflags = pflags;
1055 	(*zpp)->z_mode = mode;
1056 	(*zpp)->z_dnodesize = dnodesize;
1057 	(*zpp)->z_projid = projid;
1058 
1059 	if (vap->va_mask & AT_XVATTR)
1060 		zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx);
1061 
1062 	if (obj_type == DMU_OT_ZNODE ||
1063 	    acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
1064 		VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
1065 	}
1066 	kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
1067 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
1068 }
1069 
1070 /*
1071  * Update in-core attributes.  It is assumed the caller will be doing an
1072  * sa_bulk_update to push the changes out.
1073  */
1074 void
1075 zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
1076 {
1077 	xoptattr_t *xoap;
1078 
1079 	xoap = xva_getxoptattr(xvap);
1080 	ASSERT(xoap);
1081 
1082 	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
1083 		uint64_t times[2];
1084 		ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
1085 		(void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs),
1086 		    &times, sizeof (times), tx);
1087 		XVA_SET_RTN(xvap, XAT_CREATETIME);
1088 	}
1089 	if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
1090 		ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
1091 		    zp->z_pflags, tx);
1092 		XVA_SET_RTN(xvap, XAT_READONLY);
1093 	}
1094 	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
1095 		ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
1096 		    zp->z_pflags, tx);
1097 		XVA_SET_RTN(xvap, XAT_HIDDEN);
1098 	}
1099 	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
1100 		ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
1101 		    zp->z_pflags, tx);
1102 		XVA_SET_RTN(xvap, XAT_SYSTEM);
1103 	}
1104 	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
1105 		ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
1106 		    zp->z_pflags, tx);
1107 		XVA_SET_RTN(xvap, XAT_ARCHIVE);
1108 	}
1109 	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
1110 		ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
1111 		    zp->z_pflags, tx);
1112 		XVA_SET_RTN(xvap, XAT_IMMUTABLE);
1113 	}
1114 	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
1115 		ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
1116 		    zp->z_pflags, tx);
1117 		XVA_SET_RTN(xvap, XAT_NOUNLINK);
1118 	}
1119 	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
1120 		ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
1121 		    zp->z_pflags, tx);
1122 		XVA_SET_RTN(xvap, XAT_APPENDONLY);
1123 	}
1124 	if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
1125 		ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
1126 		    zp->z_pflags, tx);
1127 		XVA_SET_RTN(xvap, XAT_NODUMP);
1128 	}
1129 	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
1130 		ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
1131 		    zp->z_pflags, tx);
1132 		XVA_SET_RTN(xvap, XAT_OPAQUE);
1133 	}
1134 	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
1135 		ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
1136 		    xoap->xoa_av_quarantined, zp->z_pflags, tx);
1137 		XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
1138 	}
1139 	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
1140 		ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
1141 		    zp->z_pflags, tx);
1142 		XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
1143 	}
1144 	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
1145 		zfs_sa_set_scanstamp(zp, xvap, tx);
1146 		XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
1147 	}
1148 	if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
1149 		ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
1150 		    zp->z_pflags, tx);
1151 		XVA_SET_RTN(xvap, XAT_REPARSE);
1152 	}
1153 	if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
1154 		ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
1155 		    zp->z_pflags, tx);
1156 		XVA_SET_RTN(xvap, XAT_OFFLINE);
1157 	}
1158 	if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
1159 		ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
1160 		    zp->z_pflags, tx);
1161 		XVA_SET_RTN(xvap, XAT_SPARSE);
1162 	}
1163 	if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
1164 		ZFS_ATTR_SET(zp, ZFS_PROJINHERIT, xoap->xoa_projinherit,
1165 		    zp->z_pflags, tx);
1166 		XVA_SET_RTN(xvap, XAT_PROJINHERIT);
1167 	}
1168 }
1169 
1170 int
1171 zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
1172 {
1173 	dmu_object_info_t doi;
1174 	dmu_buf_t	*db;
1175 	znode_t		*zp;
1176 	int err;
1177 	sa_handle_t	*hdl;
1178 
1179 	*zpp = NULL;
1180 
1181 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
1182 
1183 	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
1184 	if (err) {
1185 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1186 		return (err);
1187 	}
1188 
1189 	dmu_object_info_from_db(db, &doi);
1190 	if (doi.doi_bonus_type != DMU_OT_SA &&
1191 	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
1192 	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
1193 	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
1194 		sa_buf_rele(db, NULL);
1195 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1196 		return (SET_ERROR(EINVAL));
1197 	}
1198 
1199 	hdl = dmu_buf_get_user(db);
1200 	if (hdl != NULL) {
1201 		zp  = sa_get_userdata(hdl);
1202 
1203 
1204 		/*
1205 		 * Since "SA" does immediate eviction we
1206 		 * should never find a sa handle that doesn't
1207 		 * know about the znode.
1208 		 */
1209 
1210 		ASSERT3P(zp, !=, NULL);
1211 
1212 		mutex_enter(&zp->z_lock);
1213 		ASSERT3U(zp->z_id, ==, obj_num);
1214 		if (zp->z_unlinked) {
1215 			err = SET_ERROR(ENOENT);
1216 		} else {
1217 			VN_HOLD(ZTOV(zp));
1218 			*zpp = zp;
1219 			err = 0;
1220 		}
1221 		mutex_exit(&zp->z_lock);
1222 		sa_buf_rele(db, NULL);
1223 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1224 		return (err);
1225 	}
1226 
1227 	/*
1228 	 * Not found create new znode/vnode
1229 	 * but only if file exists.
1230 	 *
1231 	 * There is a small window where zfs_vget() could
1232 	 * find this object while a file create is still in
1233 	 * progress.  This is checked for in zfs_znode_alloc()
1234 	 *
1235 	 * if zfs_znode_alloc() fails it will drop the hold on the
1236 	 * bonus buffer.
1237 	 */
1238 	zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size,
1239 	    doi.doi_bonus_type, NULL);
1240 	if (zp == NULL) {
1241 		err = SET_ERROR(ENOENT);
1242 	} else {
1243 		*zpp = zp;
1244 	}
1245 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1246 	return (err);
1247 }
1248 
1249 int
1250 zfs_rezget(znode_t *zp)
1251 {
1252 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1253 	dmu_object_info_t doi;
1254 	dmu_buf_t *db;
1255 	uint64_t obj_num = zp->z_id;
1256 	uint64_t mode;
1257 	sa_bulk_attr_t bulk[10];
1258 	int err;
1259 	int count = 0;
1260 	uint64_t gen;
1261 	uint64_t projid = ZFS_DEFAULT_PROJID;
1262 
1263 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
1264 
1265 	mutex_enter(&zp->z_acl_lock);
1266 	if (zp->z_acl_cached) {
1267 		zfs_acl_free(zp->z_acl_cached);
1268 		zp->z_acl_cached = NULL;
1269 	}
1270 
1271 	mutex_exit(&zp->z_acl_lock);
1272 	ASSERT(zp->z_sa_hdl == NULL);
1273 	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
1274 	if (err) {
1275 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1276 		return (err);
1277 	}
1278 
1279 	dmu_object_info_from_db(db, &doi);
1280 	if (doi.doi_bonus_type != DMU_OT_SA &&
1281 	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
1282 	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
1283 	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
1284 		sa_buf_rele(db, NULL);
1285 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1286 		return (SET_ERROR(EINVAL));
1287 	}
1288 
1289 	zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL);
1290 
1291 	/* reload cached values */
1292 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL,
1293 	    &gen, sizeof (gen));
1294 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
1295 	    &zp->z_size, sizeof (zp->z_size));
1296 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
1297 	    &zp->z_links, sizeof (zp->z_links));
1298 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
1299 	    &zp->z_pflags, sizeof (zp->z_pflags));
1300 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
1301 	    &zp->z_atime, sizeof (zp->z_atime));
1302 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
1303 	    &zp->z_uid, sizeof (zp->z_uid));
1304 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
1305 	    &zp->z_gid, sizeof (zp->z_gid));
1306 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
1307 	    &mode, sizeof (mode));
1308 
1309 	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
1310 		zfs_znode_dmu_fini(zp);
1311 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1312 		return (SET_ERROR(EIO));
1313 	}
1314 
1315 	if (dmu_objset_projectquota_enabled(zfsvfs->z_os)) {
1316 		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs),
1317 		    &projid, 8);
1318 		if (err != 0 && err != ENOENT) {
1319 			zfs_znode_dmu_fini(zp);
1320 			ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1321 			return (SET_ERROR(err));
1322 		}
1323 	}
1324 
1325 	zp->z_projid = projid;
1326 	zp->z_mode = mode;
1327 
1328 	if (gen != zp->z_gen) {
1329 		zfs_znode_dmu_fini(zp);
1330 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1331 		return (SET_ERROR(EIO));
1332 	}
1333 
1334 	zp->z_blksz = doi.doi_data_block_size;
1335 
1336 	/*
1337 	 * If the file has zero links, then it has been unlinked on the send
1338 	 * side and it must be in the received unlinked set.
1339 	 * We call zfs_znode_dmu_fini() now to prevent any accesses to the
1340 	 * stale data and to prevent automatical removal of the file in
1341 	 * zfs_zinactive().  The file will be removed either when it is removed
1342 	 * on the send side and the next incremental stream is received or
1343 	 * when the unlinked set gets processed.
1344 	 */
1345 	zp->z_unlinked = (zp->z_links == 0);
1346 	if (zp->z_unlinked)
1347 		zfs_znode_dmu_fini(zp);
1348 
1349 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1350 
1351 	return (0);
1352 }
1353 
1354 void
1355 zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
1356 {
1357 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1358 	objset_t *os = zfsvfs->z_os;
1359 	uint64_t obj = zp->z_id;
1360 	uint64_t acl_obj = zfs_external_acl(zp);
1361 
1362 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
1363 	if (acl_obj) {
1364 		VERIFY(!zp->z_is_sa);
1365 		VERIFY(0 == dmu_object_free(os, acl_obj, tx));
1366 	}
1367 	VERIFY(0 == dmu_object_free(os, obj, tx));
1368 	zfs_znode_dmu_fini(zp);
1369 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
1370 	zfs_znode_free(zp);
1371 }
1372 
1373 void
1374 zfs_zinactive(znode_t *zp)
1375 {
1376 	vnode_t	*vp = ZTOV(zp);
1377 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1378 	uint64_t z_id = zp->z_id;
1379 
1380 	ASSERT(zp->z_sa_hdl);
1381 
1382 	/*
1383 	 * Don't allow a zfs_zget() while were trying to release this znode
1384 	 */
1385 	ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id);
1386 
1387 	mutex_enter(&zp->z_lock);
1388 	mutex_enter(&vp->v_lock);
1389 	VN_RELE_LOCKED(vp);
1390 	if (vp->v_count > 0 || vn_has_cached_data(vp)) {
1391 		/*
1392 		 * If the hold count is greater than zero, somebody has
1393 		 * obtained a new reference on this znode while we were
1394 		 * processing it here, so we are done.  If we still have
1395 		 * mapped pages then we are also done, since we don't
1396 		 * want to inactivate the znode until the pages get pushed.
1397 		 *
1398 		 * XXX - if vn_has_cached_data(vp) is true, but count == 0,
1399 		 * this seems like it would leave the znode hanging with
1400 		 * no chance to go inactive...
1401 		 */
1402 		mutex_exit(&vp->v_lock);
1403 		mutex_exit(&zp->z_lock);
1404 		ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
1405 		return;
1406 	}
1407 	mutex_exit(&vp->v_lock);
1408 
1409 	/*
1410 	 * If this was the last reference to a file with no links, remove
1411 	 * the file from the file system unless the file system is mounted
1412 	 * read-only.  That can happen, for example, if the file system was
1413 	 * originally read-write, the file was opened, then unlinked and
1414 	 * the file system was made read-only before the file was finally
1415 	 * closed.  The file will remain in the unlinked set.
1416 	 */
1417 	if (zp->z_unlinked) {
1418 		ASSERT(!zfsvfs->z_issnap);
1419 		if ((zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) == 0) {
1420 			mutex_exit(&zp->z_lock);
1421 			ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
1422 			zfs_rmnode(zp);
1423 			return;
1424 		}
1425 	}
1426 
1427 	mutex_exit(&zp->z_lock);
1428 	zfs_znode_dmu_fini(zp);
1429 	ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
1430 	zfs_znode_free(zp);
1431 }
1432 
1433 void
1434 zfs_znode_free(znode_t *zp)
1435 {
1436 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1437 
1438 	vn_invalid(ZTOV(zp));
1439 
1440 	ASSERT(ZTOV(zp)->v_count == 0);
1441 
1442 	mutex_enter(&zfsvfs->z_znodes_lock);
1443 	POINTER_INVALIDATE(&zp->z_zfsvfs);
1444 	list_remove(&zfsvfs->z_all_znodes, zp);
1445 	mutex_exit(&zfsvfs->z_znodes_lock);
1446 
1447 	if (zp->z_acl_cached) {
1448 		zfs_acl_free(zp->z_acl_cached);
1449 		zp->z_acl_cached = NULL;
1450 	}
1451 
1452 	kmem_cache_free(znode_cache, zp);
1453 
1454 	VFS_RELE(zfsvfs->z_vfs);
1455 }
1456 
1457 void
1458 zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
1459     uint64_t ctime[2], boolean_t have_tx)
1460 {
1461 	timestruc_t	now;
1462 
1463 	gethrestime(&now);
1464 
1465 	if (have_tx) {	/* will sa_bulk_update happen really soon? */
1466 		zp->z_atime_dirty = 0;
1467 		zp->z_seq++;
1468 	} else {
1469 		zp->z_atime_dirty = 1;
1470 	}
1471 
1472 	if (flag & AT_ATIME) {
1473 		ZFS_TIME_ENCODE(&now, zp->z_atime);
1474 	}
1475 
1476 	if (flag & AT_MTIME) {
1477 		ZFS_TIME_ENCODE(&now, mtime);
1478 		if (zp->z_zfsvfs->z_use_fuids) {
1479 			zp->z_pflags |= (ZFS_ARCHIVE |
1480 			    ZFS_AV_MODIFIED);
1481 		}
1482 	}
1483 
1484 	if (flag & AT_CTIME) {
1485 		ZFS_TIME_ENCODE(&now, ctime);
1486 		if (zp->z_zfsvfs->z_use_fuids)
1487 			zp->z_pflags |= ZFS_ARCHIVE;
1488 	}
1489 }
1490 
1491 /*
1492  * Grow the block size for a file.
1493  *
1494  *	IN:	zp	- znode of file to free data in.
1495  *		size	- requested block size
1496  *		tx	- open transaction.
1497  *
1498  * NOTE: this function assumes that the znode is write locked.
1499  */
1500 void
1501 zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
1502 {
1503 	int		error;
1504 	u_longlong_t	dummy;
1505 
1506 	if (size <= zp->z_blksz)
1507 		return;
1508 	/*
1509 	 * If the file size is already greater than the current blocksize,
1510 	 * we will not grow.  If there is more than one block in a file,
1511 	 * the blocksize cannot change.
1512 	 */
1513 	if (zp->z_blksz && zp->z_size > zp->z_blksz)
1514 		return;
1515 
1516 	error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id,
1517 	    size, 0, tx);
1518 
1519 	if (error == ENOTSUP)
1520 		return;
1521 	ASSERT0(error);
1522 
1523 	/* What blocksize did we actually get? */
1524 	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
1525 }
1526 
1527 /*
1528  * This is a dummy interface used when pvn_vplist_dirty() should *not*
1529  * be calling back into the fs for a putpage().  E.g.: when truncating
1530  * a file, the pages being "thrown away* don't need to be written out.
1531  */
1532 /* ARGSUSED */
1533 static int
1534 zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
1535     int flags, cred_t *cr)
1536 {
1537 	ASSERT(0);
1538 	return (0);
1539 }
1540 
1541 /*
1542  * Increase the file length
1543  *
1544  *	IN:	zp	- znode of file to free data in.
1545  *		end	- new end-of-file
1546  *
1547  *	RETURN:	0 on success, error code on failure
1548  */
1549 static int
1550 zfs_extend(znode_t *zp, uint64_t end)
1551 {
1552 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1553 	dmu_tx_t *tx;
1554 	locked_range_t *lr;
1555 	uint64_t newblksz;
1556 	int error;
1557 
1558 	/*
1559 	 * We will change zp_size, lock the whole file.
1560 	 */
1561 	lr = rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
1562 
1563 	/*
1564 	 * Nothing to do if file already at desired length.
1565 	 */
1566 	if (end <= zp->z_size) {
1567 		rangelock_exit(lr);
1568 		return (0);
1569 	}
1570 	tx = dmu_tx_create(zfsvfs->z_os);
1571 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1572 	zfs_sa_upgrade_txholds(tx, zp);
1573 	if (end > zp->z_blksz &&
1574 	    (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
1575 		/*
1576 		 * We are growing the file past the current block size.
1577 		 */
1578 		if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) {
1579 			/*
1580 			 * File's blocksize is already larger than the
1581 			 * "recordsize" property.  Only let it grow to
1582 			 * the next power of 2.
1583 			 */
1584 			ASSERT(!ISP2(zp->z_blksz));
1585 			newblksz = MIN(end, 1 << highbit64(zp->z_blksz));
1586 		} else {
1587 			newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz);
1588 		}
1589 		dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
1590 	} else {
1591 		newblksz = 0;
1592 	}
1593 
1594 	error = dmu_tx_assign(tx, TXG_WAIT);
1595 	if (error) {
1596 		dmu_tx_abort(tx);
1597 		rangelock_exit(lr);
1598 		return (error);
1599 	}
1600 
1601 	if (newblksz)
1602 		zfs_grow_blocksize(zp, newblksz, tx);
1603 
1604 	zp->z_size = end;
1605 
1606 	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs),
1607 	    &zp->z_size, sizeof (zp->z_size), tx));
1608 
1609 	rangelock_exit(lr);
1610 
1611 	dmu_tx_commit(tx);
1612 
1613 	return (0);
1614 }
1615 
1616 /*
1617  * Free space in a file.
1618  *
1619  *	IN:	zp	- znode of file to free data in.
1620  *		off	- start of section to free.
1621  *		len	- length of section to free.
1622  *
1623  *	RETURN:	0 on success, error code on failure
1624  */
1625 static int
1626 zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
1627 {
1628 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1629 	locked_range_t *lr;
1630 	int error;
1631 
1632 	/*
1633 	 * Lock the range being freed.
1634 	 */
1635 	lr = rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER);
1636 
1637 	/*
1638 	 * Nothing to do if file already at desired length.
1639 	 */
1640 	if (off >= zp->z_size) {
1641 		rangelock_exit(lr);
1642 		return (0);
1643 	}
1644 
1645 	if (off + len > zp->z_size)
1646 		len = zp->z_size - off;
1647 
1648 	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
1649 
1650 	rangelock_exit(lr);
1651 
1652 	return (error);
1653 }
1654 
1655 /*
1656  * Truncate a file
1657  *
1658  *	IN:	zp	- znode of file to free data in.
1659  *		end	- new end-of-file.
1660  *
1661  *	RETURN:	0 on success, error code on failure
1662  */
1663 static int
1664 zfs_trunc(znode_t *zp, uint64_t end)
1665 {
1666 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1667 	vnode_t *vp = ZTOV(zp);
1668 	dmu_tx_t *tx;
1669 	locked_range_t *lr;
1670 	int error;
1671 	sa_bulk_attr_t bulk[2];
1672 	int count = 0;
1673 
1674 	/*
1675 	 * We will change zp_size, lock the whole file.
1676 	 */
1677 	lr = rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
1678 
1679 	/*
1680 	 * Nothing to do if file already at desired length.
1681 	 */
1682 	if (end >= zp->z_size) {
1683 		rangelock_exit(lr);
1684 		return (0);
1685 	}
1686 
1687 	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end,
1688 	    DMU_OBJECT_END);
1689 	if (error) {
1690 		rangelock_exit(lr);
1691 		return (error);
1692 	}
1693 	tx = dmu_tx_create(zfsvfs->z_os);
1694 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1695 	zfs_sa_upgrade_txholds(tx, zp);
1696 	dmu_tx_mark_netfree(tx);
1697 	error = dmu_tx_assign(tx, TXG_WAIT);
1698 	if (error) {
1699 		dmu_tx_abort(tx);
1700 		rangelock_exit(lr);
1701 		return (error);
1702 	}
1703 
1704 	zp->z_size = end;
1705 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
1706 	    NULL, &zp->z_size, sizeof (zp->z_size));
1707 
1708 	if (end == 0) {
1709 		zp->z_pflags &= ~ZFS_SPARSE;
1710 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
1711 		    NULL, &zp->z_pflags, 8);
1712 	}
1713 	VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
1714 
1715 	dmu_tx_commit(tx);
1716 
1717 	/*
1718 	 * Clear any mapped pages in the truncated region.  This has to
1719 	 * happen outside of the transaction to avoid the possibility of
1720 	 * a deadlock with someone trying to push a page that we are
1721 	 * about to invalidate.
1722 	 */
1723 	if (vn_has_cached_data(vp)) {
1724 		page_t *pp;
1725 		uint64_t start = end & PAGEMASK;
1726 		int poff = end & PAGEOFFSET;
1727 
1728 		if (poff != 0 && (pp = page_lookup(vp, start, SE_SHARED))) {
1729 			/*
1730 			 * We need to zero a partial page.
1731 			 */
1732 			pagezero(pp, poff, PAGESIZE - poff);
1733 			start += PAGESIZE;
1734 			page_unlock(pp);
1735 		}
1736 		error = pvn_vplist_dirty(vp, start, zfs_no_putpage,
1737 		    B_INVAL | B_TRUNC, NULL);
1738 		ASSERT(error == 0);
1739 	}
1740 
1741 	rangelock_exit(lr);
1742 
1743 	return (0);
1744 }
1745 
1746 /*
1747  * Free space in a file
1748  *
1749  *	IN:	zp	- znode of file to free data in.
1750  *		off	- start of range
1751  *		len	- end of range (0 => EOF)
1752  *		flag	- current file open mode flags.
1753  *		log	- TRUE if this action should be logged
1754  *
1755  *	RETURN:	0 on success, error code on failure
1756  */
1757 int
1758 zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
1759 {
1760 	vnode_t *vp = ZTOV(zp);
1761 	dmu_tx_t *tx;
1762 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1763 	zilog_t *zilog = zfsvfs->z_log;
1764 	uint64_t mode;
1765 	uint64_t mtime[2], ctime[2];
1766 	sa_bulk_attr_t bulk[3];
1767 	int count = 0;
1768 	int error;
1769 
1770 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode,
1771 	    sizeof (mode))) != 0)
1772 		return (error);
1773 
1774 	if (off > zp->z_size) {
1775 		error =  zfs_extend(zp, off+len);
1776 		if (error == 0 && log)
1777 			goto log;
1778 		else
1779 			return (error);
1780 	}
1781 
1782 	/*
1783 	 * Check for any locks in the region to be freed.
1784 	 */
1785 
1786 	if (MANDLOCK(vp, (mode_t)mode)) {
1787 		uint64_t length = (len ? len : zp->z_size - off);
1788 		if (error = chklock(vp, FWRITE, off, length, flag, NULL))
1789 			return (error);
1790 	}
1791 
1792 	if (len == 0) {
1793 		error = zfs_trunc(zp, off);
1794 	} else {
1795 		if ((error = zfs_free_range(zp, off, len)) == 0 &&
1796 		    off + len > zp->z_size)
1797 			error = zfs_extend(zp, off+len);
1798 	}
1799 	if (error || !log)
1800 		return (error);
1801 log:
1802 	tx = dmu_tx_create(zfsvfs->z_os);
1803 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1804 	zfs_sa_upgrade_txholds(tx, zp);
1805 	error = dmu_tx_assign(tx, TXG_WAIT);
1806 	if (error) {
1807 		dmu_tx_abort(tx);
1808 		return (error);
1809 	}
1810 
1811 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16);
1812 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16);
1813 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
1814 	    NULL, &zp->z_pflags, 8);
1815 	zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
1816 	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
1817 	ASSERT(error == 0);
1818 
1819 	zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
1820 
1821 	dmu_tx_commit(tx);
1822 	return (0);
1823 }
1824 
1825 void
1826 zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
1827 {
1828 	uint64_t	moid, obj, sa_obj, version;
1829 	uint64_t	sense = ZFS_CASE_SENSITIVE;
1830 	uint64_t	norm = 0;
1831 	nvpair_t	*elem;
1832 	int		error;
1833 	int		i;
1834 	znode_t		*rootzp = NULL;
1835 	zfsvfs_t	*zfsvfs;
1836 	vnode_t		*vp;
1837 	vattr_t		vattr;
1838 	znode_t		*zp;
1839 	zfs_acl_ids_t	acl_ids;
1840 
1841 	/*
1842 	 * First attempt to create master node.
1843 	 */
1844 	/*
1845 	 * In an empty objset, there are no blocks to read and thus
1846 	 * there can be no i/o errors (which we assert below).
1847 	 */
1848 	moid = MASTER_NODE_OBJ;
1849 	error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
1850 	    DMU_OT_NONE, 0, tx);
1851 	ASSERT(error == 0);
1852 
1853 	/*
1854 	 * Set starting attributes.
1855 	 */
1856 	version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
1857 	elem = NULL;
1858 	while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
1859 		/* For the moment we expect all zpl props to be uint64_ts */
1860 		uint64_t val;
1861 		char *name;
1862 
1863 		ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
1864 		VERIFY(nvpair_value_uint64(elem, &val) == 0);
1865 		name = nvpair_name(elem);
1866 		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
1867 			if (val < version)
1868 				version = val;
1869 		} else {
1870 			error = zap_update(os, moid, name, 8, 1, &val, tx);
1871 		}
1872 		ASSERT(error == 0);
1873 		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
1874 			norm = val;
1875 		else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
1876 			sense = val;
1877 	}
1878 	ASSERT(version != 0);
1879 	error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
1880 
1881 	/*
1882 	 * Create zap object used for SA attribute registration
1883 	 */
1884 
1885 	if (version >= ZPL_VERSION_SA) {
1886 		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
1887 		    DMU_OT_NONE, 0, tx);
1888 		error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
1889 		ASSERT(error == 0);
1890 	} else {
1891 		sa_obj = 0;
1892 	}
1893 	/*
1894 	 * Create a delete queue.
1895 	 */
1896 	obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
1897 
1898 	error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
1899 	ASSERT(error == 0);
1900 
1901 	/*
1902 	 * Create root znode.  Create minimal znode/vnode/zfsvfs
1903 	 * to allow zfs_mknode to work.
1904 	 */
1905 	vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
1906 	vattr.va_type = VDIR;
1907 	vattr.va_mode = S_IFDIR|0755;
1908 	vattr.va_uid = crgetuid(cr);
1909 	vattr.va_gid = crgetgid(cr);
1910 
1911 	rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
1912 	ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs));
1913 	rootzp->z_moved = 0;
1914 	rootzp->z_unlinked = 0;
1915 	rootzp->z_atime_dirty = 0;
1916 	rootzp->z_is_sa = USE_SA(version, os);
1917 	rootzp->z_pflags = 0;
1918 
1919 	vp = ZTOV(rootzp);
1920 	vn_reinit(vp);
1921 	vp->v_type = VDIR;
1922 
1923 	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
1924 	zfsvfs->z_os = os;
1925 	zfsvfs->z_parent = zfsvfs;
1926 	zfsvfs->z_version = version;
1927 	zfsvfs->z_use_fuids = USE_FUIDS(version, os);
1928 	zfsvfs->z_use_sa = USE_SA(version, os);
1929 	zfsvfs->z_norm = norm;
1930 
1931 	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
1932 	    &zfsvfs->z_attr_table);
1933 
1934 	ASSERT(error == 0);
1935 
1936 	/*
1937 	 * Fold case on file systems that are always or sometimes case
1938 	 * insensitive.
1939 	 */
1940 	if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
1941 		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
1942 
1943 	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
1944 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
1945 	    offsetof(znode_t, z_link_node));
1946 
1947 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
1948 		mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
1949 
1950 	rootzp->z_zfsvfs = zfsvfs;
1951 	VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
1952 	    cr, NULL, &acl_ids));
1953 	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
1954 	ASSERT3P(zp, ==, rootzp);
1955 	ASSERT(!vn_in_dnlc(ZTOV(rootzp))); /* not valid to move */
1956 	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
1957 	ASSERT(error == 0);
1958 	zfs_acl_ids_free(&acl_ids);
1959 	POINTER_INVALIDATE(&rootzp->z_zfsvfs);
1960 
1961 	ZTOV(rootzp)->v_count = 0;
1962 	sa_handle_destroy(rootzp->z_sa_hdl);
1963 	kmem_cache_free(znode_cache, rootzp);
1964 
1965 	/*
1966 	 * Create shares directory
1967 	 */
1968 
1969 	error = zfs_create_share_dir(zfsvfs, tx);
1970 
1971 	ASSERT(error == 0);
1972 
1973 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
1974 		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
1975 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
1976 }
1977 
1978 #endif /* _KERNEL */
1979 
1980 static int
1981 zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
1982 {
1983 	uint64_t sa_obj = 0;
1984 	int error;
1985 
1986 	error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
1987 	if (error != 0 && error != ENOENT)
1988 		return (error);
1989 
1990 	error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
1991 	return (error);
1992 }
1993 
1994 static int
1995 zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
1996     dmu_buf_t **db, void *tag)
1997 {
1998 	dmu_object_info_t doi;
1999 	int error;
2000 
2001 	if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
2002 		return (error);
2003 
2004 	dmu_object_info_from_db(*db, &doi);
2005 	if ((doi.doi_bonus_type != DMU_OT_SA &&
2006 	    doi.doi_bonus_type != DMU_OT_ZNODE) ||
2007 	    doi.doi_bonus_type == DMU_OT_ZNODE &&
2008 	    doi.doi_bonus_size < sizeof (znode_phys_t)) {
2009 		sa_buf_rele(*db, tag);
2010 		return (SET_ERROR(ENOTSUP));
2011 	}
2012 
2013 	error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
2014 	if (error != 0) {
2015 		sa_buf_rele(*db, tag);
2016 		return (error);
2017 	}
2018 
2019 	return (0);
2020 }
2021 
2022 void
2023 zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
2024 {
2025 	sa_handle_destroy(hdl);
2026 	sa_buf_rele(db, tag);
2027 }
2028 
2029 /*
2030  * Given an object number, return its parent object number and whether
2031  * or not the object is an extended attribute directory.
2032  */
2033 static int
2034 zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
2035     uint64_t *pobjp, int *is_xattrdir)
2036 {
2037 	uint64_t parent;
2038 	uint64_t pflags;
2039 	uint64_t mode;
2040 	uint64_t parent_mode;
2041 	sa_bulk_attr_t bulk[3];
2042 	sa_handle_t *sa_hdl;
2043 	dmu_buf_t *sa_db;
2044 	int count = 0;
2045 	int error;
2046 
2047 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
2048 	    &parent, sizeof (parent));
2049 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
2050 	    &pflags, sizeof (pflags));
2051 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
2052 	    &mode, sizeof (mode));
2053 
2054 	if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
2055 		return (error);
2056 
2057 	/*
2058 	 * When a link is removed its parent pointer is not changed and will
2059 	 * be invalid.  There are two cases where a link is removed but the
2060 	 * file stays around, when it goes to the delete queue and when there
2061 	 * are additional links.
2062 	 */
2063 	error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
2064 	if (error != 0)
2065 		return (error);
2066 
2067 	error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
2068 	zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
2069 	if (error != 0)
2070 		return (error);
2071 
2072 	*is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
2073 
2074 	/*
2075 	 * Extended attributes can be applied to files, directories, etc.
2076 	 * Otherwise the parent must be a directory.
2077 	 */
2078 	if (!*is_xattrdir && !S_ISDIR(parent_mode))
2079 		return (SET_ERROR(EINVAL));
2080 
2081 	*pobjp = parent;
2082 
2083 	return (0);
2084 }
2085 
2086 /*
2087  * Given an object number, return some zpl level statistics
2088  */
2089 static int
2090 zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table,
2091     zfs_stat_t *sb)
2092 {
2093 	sa_bulk_attr_t bulk[4];
2094 	int count = 0;
2095 
2096 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
2097 	    &sb->zs_mode, sizeof (sb->zs_mode));
2098 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
2099 	    &sb->zs_gen, sizeof (sb->zs_gen));
2100 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
2101 	    &sb->zs_links, sizeof (sb->zs_links));
2102 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
2103 	    &sb->zs_ctime, sizeof (sb->zs_ctime));
2104 
2105 	return (sa_bulk_lookup(hdl, bulk, count));
2106 }
2107 
2108 static int
2109 zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
2110     sa_attr_type_t *sa_table, char *buf, int len)
2111 {
2112 	sa_handle_t *sa_hdl;
2113 	sa_handle_t *prevhdl = NULL;
2114 	dmu_buf_t *prevdb = NULL;
2115 	dmu_buf_t *sa_db = NULL;
2116 	char *path = buf + len - 1;
2117 	int error;
2118 
2119 	*path = '\0';
2120 	sa_hdl = hdl;
2121 
2122 	uint64_t deleteq_obj;
2123 	VERIFY0(zap_lookup(osp, MASTER_NODE_OBJ,
2124 	    ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj));
2125 	error = zap_lookup_int(osp, deleteq_obj, obj);
2126 	if (error == 0) {
2127 		return (ESTALE);
2128 	} else if (error != ENOENT) {
2129 		return (error);
2130 	}
2131 	error = 0;
2132 
2133 	for (;;) {
2134 		uint64_t pobj;
2135 		char component[MAXNAMELEN + 2];
2136 		size_t complen;
2137 		int is_xattrdir;
2138 
2139 		if (prevdb)
2140 			zfs_release_sa_handle(prevhdl, prevdb, FTAG);
2141 
2142 		if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
2143 		    &is_xattrdir)) != 0)
2144 			break;
2145 
2146 		if (pobj == obj) {
2147 			if (path[0] != '/')
2148 				*--path = '/';
2149 			break;
2150 		}
2151 
2152 		component[0] = '/';
2153 		if (is_xattrdir) {
2154 			(void) sprintf(component + 1, "<xattrdir>");
2155 		} else {
2156 			error = zap_value_search(osp, pobj, obj,
2157 			    ZFS_DIRENT_OBJ(-1ULL), component + 1);
2158 			if (error != 0)
2159 				break;
2160 		}
2161 
2162 		complen = strlen(component);
2163 		path -= complen;
2164 		ASSERT(path >= buf);
2165 		bcopy(component, path, complen);
2166 		obj = pobj;
2167 
2168 		if (sa_hdl != hdl) {
2169 			prevhdl = sa_hdl;
2170 			prevdb = sa_db;
2171 		}
2172 		error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
2173 		if (error != 0) {
2174 			sa_hdl = prevhdl;
2175 			sa_db = prevdb;
2176 			break;
2177 		}
2178 	}
2179 
2180 	if (sa_hdl != NULL && sa_hdl != hdl) {
2181 		ASSERT(sa_db != NULL);
2182 		zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
2183 	}
2184 
2185 	if (error == 0)
2186 		(void) memmove(buf, path, buf + len - path);
2187 
2188 	return (error);
2189 }
2190 
2191 int
2192 zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
2193 {
2194 	sa_attr_type_t *sa_table;
2195 	sa_handle_t *hdl;
2196 	dmu_buf_t *db;
2197 	int error;
2198 
2199 	error = zfs_sa_setup(osp, &sa_table);
2200 	if (error != 0)
2201 		return (error);
2202 
2203 	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
2204 	if (error != 0)
2205 		return (error);
2206 
2207 	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
2208 
2209 	zfs_release_sa_handle(hdl, db, FTAG);
2210 	return (error);
2211 }
2212 
2213 int
2214 zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
2215     char *buf, int len)
2216 {
2217 	char *path = buf + len - 1;
2218 	sa_attr_type_t *sa_table;
2219 	sa_handle_t *hdl;
2220 	dmu_buf_t *db;
2221 	int error;
2222 
2223 	*path = '\0';
2224 
2225 	error = zfs_sa_setup(osp, &sa_table);
2226 	if (error != 0)
2227 		return (error);
2228 
2229 	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
2230 	if (error != 0)
2231 		return (error);
2232 
2233 	error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
2234 	if (error != 0) {
2235 		zfs_release_sa_handle(hdl, db, FTAG);
2236 		return (error);
2237 	}
2238 
2239 	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
2240 
2241 	zfs_release_sa_handle(hdl, db, FTAG);
2242 	return (error);
2243 }
2244