xref: /freebsd/sys/contrib/openzfs/module/zfs/zap_micro.c (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1*61145dc2SMartin Matuska // SPDX-License-Identifier: CDDL-1.0
2eda14cbcSMatt Macy /*
3eda14cbcSMatt Macy  * CDDL HEADER START
4eda14cbcSMatt Macy  *
5eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
6eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
7eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
8eda14cbcSMatt Macy  *
9eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10271171e0SMartin Matuska  * or https://opensource.org/licenses/CDDL-1.0.
11eda14cbcSMatt Macy  * See the License for the specific language governing permissions
12eda14cbcSMatt Macy  * and limitations under the License.
13eda14cbcSMatt Macy  *
14eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
15eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
17eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
18eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
19eda14cbcSMatt Macy  *
20eda14cbcSMatt Macy  * CDDL HEADER END
21eda14cbcSMatt Macy  */
22eda14cbcSMatt Macy 
23eda14cbcSMatt Macy /*
24eda14cbcSMatt Macy  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25eda14cbcSMatt Macy  * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
26eda14cbcSMatt Macy  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
27eda14cbcSMatt Macy  * Copyright 2017 Nexenta Systems, Inc.
287a7741afSMartin Matuska  * Copyright (c) 2024, Klara, Inc.
29eda14cbcSMatt Macy  */
30eda14cbcSMatt Macy 
31eda14cbcSMatt Macy #include <sys/zio.h>
32eda14cbcSMatt Macy #include <sys/spa.h>
33eda14cbcSMatt Macy #include <sys/dmu.h>
34eda14cbcSMatt Macy #include <sys/zfs_context.h>
35eda14cbcSMatt Macy #include <sys/zap.h>
36eda14cbcSMatt Macy #include <sys/zap_impl.h>
37eda14cbcSMatt Macy #include <sys/zap_leaf.h>
38dbd5678dSMartin Matuska #include <sys/btree.h>
39eda14cbcSMatt Macy #include <sys/arc.h>
40eda14cbcSMatt Macy #include <sys/dmu_objset.h>
417a7741afSMartin Matuska #include <sys/spa_impl.h>
42eda14cbcSMatt Macy 
43eda14cbcSMatt Macy #ifdef _KERNEL
44eda14cbcSMatt Macy #include <sys/sunddi.h>
45eda14cbcSMatt Macy #endif
46eda14cbcSMatt Macy 
477a7741afSMartin Matuska /*
487a7741afSMartin Matuska  * The maximum size (in bytes) of a microzap before it is converted to a
497a7741afSMartin Matuska  * fatzap. It will be rounded up to next multiple of 512 (SPA_MINBLOCKSIZE).
507a7741afSMartin Matuska  *
517a7741afSMartin Matuska  * By definition, a microzap must fit into a single block, so this has
527a7741afSMartin Matuska  * traditionally been SPA_OLD_MAXBLOCKSIZE, and is set to that by default.
537a7741afSMartin Matuska  * Setting this higher requires both the large_blocks feature (to even create
547a7741afSMartin Matuska  * blocks that large) and the large_microzap feature (to enable the stream
557a7741afSMartin Matuska  * machinery to understand not to try to split a microzap block).
567a7741afSMartin Matuska  *
577a7741afSMartin Matuska  * If large_microzap is enabled, this value will be clamped to
58dd215568SMartin Matuska  * spa_maxblocksize(), up to 1M. If not, it will be clamped to
59dd215568SMartin Matuska  * SPA_OLD_MAXBLOCKSIZE.
607a7741afSMartin Matuska  */
617a7741afSMartin Matuska static int zap_micro_max_size = SPA_OLD_MAXBLOCKSIZE;
627a7741afSMartin Matuska 
63dd215568SMartin Matuska /*
64dd215568SMartin Matuska  * The 1M upper limit is necessary because the count of chunks in a microzap
65dd215568SMartin Matuska  * block is stored as a uint16_t (mze_chunkid). Each chunk is 64 bytes, and the
66dd215568SMartin Matuska  * first is used to store a header, so there are 32767 usable chunks, which is
67dd215568SMartin Matuska  * just under 2M. 1M is the largest power-2-rounded block size under 2M, so we
68dd215568SMartin Matuska  * must set the limit there.
69dd215568SMartin Matuska  */
70dd215568SMartin Matuska #define	MZAP_MAX_SIZE	(1048576)
71dd215568SMartin Matuska 
727a7741afSMartin Matuska uint64_t
zap_get_micro_max_size(spa_t * spa)737a7741afSMartin Matuska zap_get_micro_max_size(spa_t *spa)
747a7741afSMartin Matuska {
75dd215568SMartin Matuska 	uint64_t maxsz = MIN(MZAP_MAX_SIZE,
76dd215568SMartin Matuska 	    P2ROUNDUP(zap_micro_max_size, SPA_MINBLOCKSIZE));
777a7741afSMartin Matuska 	if (maxsz <= SPA_OLD_MAXBLOCKSIZE)
787a7741afSMartin Matuska 		return (maxsz);
797a7741afSMartin Matuska 	if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_MICROZAP))
807a7741afSMartin Matuska 		return (MIN(maxsz, spa_maxblocksize(spa)));
817a7741afSMartin Matuska 	return (SPA_OLD_MAXBLOCKSIZE);
827a7741afSMartin Matuska }
8315f0b8c3SMartin Matuska 
84eda14cbcSMatt Macy static int mzap_upgrade(zap_t **zapp,
85a0b956f5SMartin Matuska     const void *tag, dmu_tx_t *tx, zap_flags_t flags);
86eda14cbcSMatt Macy 
87eda14cbcSMatt Macy uint64_t
zap_getflags(zap_t * zap)88eda14cbcSMatt Macy zap_getflags(zap_t *zap)
89eda14cbcSMatt Macy {
90eda14cbcSMatt Macy 	if (zap->zap_ismicro)
91eda14cbcSMatt Macy 		return (0);
92eda14cbcSMatt Macy 	return (zap_f_phys(zap)->zap_flags);
93eda14cbcSMatt Macy }
94eda14cbcSMatt Macy 
95eda14cbcSMatt Macy int
zap_hashbits(zap_t * zap)96eda14cbcSMatt Macy zap_hashbits(zap_t *zap)
97eda14cbcSMatt Macy {
98eda14cbcSMatt Macy 	if (zap_getflags(zap) & ZAP_FLAG_HASH64)
99eda14cbcSMatt Macy 		return (48);
100eda14cbcSMatt Macy 	else
101eda14cbcSMatt Macy 		return (28);
102eda14cbcSMatt Macy }
103eda14cbcSMatt Macy 
104eda14cbcSMatt Macy uint32_t
zap_maxcd(zap_t * zap)105eda14cbcSMatt Macy zap_maxcd(zap_t *zap)
106eda14cbcSMatt Macy {
107eda14cbcSMatt Macy 	if (zap_getflags(zap) & ZAP_FLAG_HASH64)
108eda14cbcSMatt Macy 		return ((1<<16)-1);
109eda14cbcSMatt Macy 	else
110eda14cbcSMatt Macy 		return (-1U);
111eda14cbcSMatt Macy }
112eda14cbcSMatt Macy 
113eda14cbcSMatt Macy static uint64_t
zap_hash(zap_name_t * zn)114eda14cbcSMatt Macy zap_hash(zap_name_t *zn)
115eda14cbcSMatt Macy {
116eda14cbcSMatt Macy 	zap_t *zap = zn->zn_zap;
117eda14cbcSMatt Macy 	uint64_t h = 0;
118eda14cbcSMatt Macy 
119eda14cbcSMatt Macy 	if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) {
120eda14cbcSMatt Macy 		ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY);
121eda14cbcSMatt Macy 		h = *(uint64_t *)zn->zn_key_orig;
122eda14cbcSMatt Macy 	} else {
123eda14cbcSMatt Macy 		h = zap->zap_salt;
124eda14cbcSMatt Macy 		ASSERT(h != 0);
125eda14cbcSMatt Macy 		ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
126eda14cbcSMatt Macy 
127eda14cbcSMatt Macy 		if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) {
128eda14cbcSMatt Macy 			const uint64_t *wp = zn->zn_key_norm;
129eda14cbcSMatt Macy 
130eda14cbcSMatt Macy 			ASSERT(zn->zn_key_intlen == 8);
131eda14cbcSMatt Macy 			for (int i = 0; i < zn->zn_key_norm_numints;
132eda14cbcSMatt Macy 			    wp++, i++) {
133eda14cbcSMatt Macy 				uint64_t word = *wp;
134eda14cbcSMatt Macy 
135dbd5678dSMartin Matuska 				for (int j = 0; j < 8; j++) {
136eda14cbcSMatt Macy 					h = (h >> 8) ^
137eda14cbcSMatt Macy 					    zfs_crc64_table[(h ^ word) & 0xFF];
138eda14cbcSMatt Macy 					word >>= NBBY;
139eda14cbcSMatt Macy 				}
140eda14cbcSMatt Macy 			}
141eda14cbcSMatt Macy 		} else {
142eda14cbcSMatt Macy 			const uint8_t *cp = zn->zn_key_norm;
143eda14cbcSMatt Macy 
144eda14cbcSMatt Macy 			/*
145eda14cbcSMatt Macy 			 * We previously stored the terminating null on
146eda14cbcSMatt Macy 			 * disk, but didn't hash it, so we need to
147eda14cbcSMatt Macy 			 * continue to not hash it.  (The
148eda14cbcSMatt Macy 			 * zn_key_*_numints includes the terminating
149eda14cbcSMatt Macy 			 * null for non-binary keys.)
150eda14cbcSMatt Macy 			 */
151eda14cbcSMatt Macy 			int len = zn->zn_key_norm_numints - 1;
152eda14cbcSMatt Macy 
153eda14cbcSMatt Macy 			ASSERT(zn->zn_key_intlen == 1);
154eda14cbcSMatt Macy 			for (int i = 0; i < len; cp++, i++) {
155eda14cbcSMatt Macy 				h = (h >> 8) ^
156eda14cbcSMatt Macy 				    zfs_crc64_table[(h ^ *cp) & 0xFF];
157eda14cbcSMatt Macy 			}
158eda14cbcSMatt Macy 		}
159eda14cbcSMatt Macy 	}
160eda14cbcSMatt Macy 	/*
161eda14cbcSMatt Macy 	 * Don't use all 64 bits, since we need some in the cookie for
162eda14cbcSMatt Macy 	 * the collision differentiator.  We MUST use the high bits,
163eda14cbcSMatt Macy 	 * since those are the ones that we first pay attention to when
164eda14cbcSMatt Macy 	 * choosing the bucket.
165eda14cbcSMatt Macy 	 */
166eda14cbcSMatt Macy 	h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1);
167eda14cbcSMatt Macy 
168eda14cbcSMatt Macy 	return (h);
169eda14cbcSMatt Macy }
170eda14cbcSMatt Macy 
171eda14cbcSMatt Macy static int
zap_normalize(zap_t * zap,const char * name,char * namenorm,int normflags,size_t outlen)1727a7741afSMartin Matuska zap_normalize(zap_t *zap, const char *name, char *namenorm, int normflags,
1737a7741afSMartin Matuska     size_t outlen)
174eda14cbcSMatt Macy {
175eda14cbcSMatt Macy 	ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY));
176eda14cbcSMatt Macy 
177eda14cbcSMatt Macy 	size_t inlen = strlen(name) + 1;
178eda14cbcSMatt Macy 
179eda14cbcSMatt Macy 	int err = 0;
180eda14cbcSMatt Macy 	(void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen,
181eda14cbcSMatt Macy 	    normflags | U8_TEXTPREP_IGNORE_NULL | U8_TEXTPREP_IGNORE_INVALID,
182eda14cbcSMatt Macy 	    U8_UNICODE_LATEST, &err);
183eda14cbcSMatt Macy 
184eda14cbcSMatt Macy 	return (err);
185eda14cbcSMatt Macy }
186eda14cbcSMatt Macy 
187eda14cbcSMatt Macy boolean_t
zap_match(zap_name_t * zn,const char * matchname)188eda14cbcSMatt Macy zap_match(zap_name_t *zn, const char *matchname)
189eda14cbcSMatt Macy {
1907a7741afSMartin Matuska 	boolean_t res = B_FALSE;
191eda14cbcSMatt Macy 	ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY));
192eda14cbcSMatt Macy 
193eda14cbcSMatt Macy 	if (zn->zn_matchtype & MT_NORMALIZE) {
1947a7741afSMartin Matuska 		size_t namelen = zn->zn_normbuf_len;
1957a7741afSMartin Matuska 		char normbuf[ZAP_MAXNAMELEN];
1967a7741afSMartin Matuska 		char *norm = normbuf;
1977a7741afSMartin Matuska 
1987a7741afSMartin Matuska 		/*
1997a7741afSMartin Matuska 		 * Cannot allocate this on-stack as it exceed the stack-limit of
2007a7741afSMartin Matuska 		 * 1024.
2017a7741afSMartin Matuska 		 */
2027a7741afSMartin Matuska 		if (namelen > ZAP_MAXNAMELEN)
2037a7741afSMartin Matuska 			norm = kmem_alloc(namelen, KM_SLEEP);
204eda14cbcSMatt Macy 
205eda14cbcSMatt Macy 		if (zap_normalize(zn->zn_zap, matchname, norm,
2067a7741afSMartin Matuska 		    zn->zn_normflags, namelen) != 0) {
2077a7741afSMartin Matuska 			res = B_FALSE;
208eda14cbcSMatt Macy 		} else {
2097a7741afSMartin Matuska 			res = (strcmp(zn->zn_key_norm, norm) == 0);
210eda14cbcSMatt Macy 		}
2117a7741afSMartin Matuska 		if (norm != normbuf)
2127a7741afSMartin Matuska 			kmem_free(norm, namelen);
2137a7741afSMartin Matuska 	} else {
2147a7741afSMartin Matuska 		res = (strcmp(zn->zn_key_orig, matchname) == 0);
2157a7741afSMartin Matuska 	}
2167a7741afSMartin Matuska 	return (res);
2177a7741afSMartin Matuska }
2187a7741afSMartin Matuska 
2197a7741afSMartin Matuska static kmem_cache_t *zap_name_cache;
2207a7741afSMartin Matuska static kmem_cache_t *zap_attr_cache;
2217a7741afSMartin Matuska static kmem_cache_t *zap_name_long_cache;
2227a7741afSMartin Matuska static kmem_cache_t *zap_attr_long_cache;
2237a7741afSMartin Matuska 
2247a7741afSMartin Matuska void
zap_init(void)2257a7741afSMartin Matuska zap_init(void)
2267a7741afSMartin Matuska {
2277a7741afSMartin Matuska 	zap_name_cache = kmem_cache_create("zap_name",
2287a7741afSMartin Matuska 	    sizeof (zap_name_t) + ZAP_MAXNAMELEN, 0, NULL, NULL,
2297a7741afSMartin Matuska 	    NULL, NULL, NULL, 0);
2307a7741afSMartin Matuska 
2317a7741afSMartin Matuska 	zap_attr_cache = kmem_cache_create("zap_attr_cache",
2327a7741afSMartin Matuska 	    sizeof (zap_attribute_t) + ZAP_MAXNAMELEN,  0, NULL,
2337a7741afSMartin Matuska 	    NULL, NULL, NULL, NULL, 0);
2347a7741afSMartin Matuska 
2357a7741afSMartin Matuska 	zap_name_long_cache = kmem_cache_create("zap_name_long",
2367a7741afSMartin Matuska 	    sizeof (zap_name_t) + ZAP_MAXNAMELEN_NEW, 0, NULL, NULL,
2377a7741afSMartin Matuska 	    NULL, NULL, NULL, 0);
2387a7741afSMartin Matuska 
2397a7741afSMartin Matuska 	zap_attr_long_cache = kmem_cache_create("zap_attr_long_cache",
2407a7741afSMartin Matuska 	    sizeof (zap_attribute_t) + ZAP_MAXNAMELEN_NEW,  0, NULL,
2417a7741afSMartin Matuska 	    NULL, NULL, NULL, NULL, 0);
2427a7741afSMartin Matuska }
2437a7741afSMartin Matuska 
2447a7741afSMartin Matuska void
zap_fini(void)2457a7741afSMartin Matuska zap_fini(void)
2467a7741afSMartin Matuska {
2477a7741afSMartin Matuska 	kmem_cache_destroy(zap_name_cache);
2487a7741afSMartin Matuska 	kmem_cache_destroy(zap_attr_cache);
2497a7741afSMartin Matuska 	kmem_cache_destroy(zap_name_long_cache);
2507a7741afSMartin Matuska 	kmem_cache_destroy(zap_attr_long_cache);
251eda14cbcSMatt Macy }
252eda14cbcSMatt Macy 
253dbd5678dSMartin Matuska static zap_name_t *
zap_name_alloc(zap_t * zap,boolean_t longname)2547a7741afSMartin Matuska zap_name_alloc(zap_t *zap, boolean_t longname)
255dbd5678dSMartin Matuska {
2567a7741afSMartin Matuska 	kmem_cache_t *cache = longname ? zap_name_long_cache : zap_name_cache;
2577a7741afSMartin Matuska 	zap_name_t *zn = kmem_cache_alloc(cache, KM_SLEEP);
2587a7741afSMartin Matuska 
259dbd5678dSMartin Matuska 	zn->zn_zap = zap;
2607a7741afSMartin Matuska 	zn->zn_normbuf_len = longname ? ZAP_MAXNAMELEN_NEW : ZAP_MAXNAMELEN;
261dbd5678dSMartin Matuska 	return (zn);
262dbd5678dSMartin Matuska }
263dbd5678dSMartin Matuska 
264eda14cbcSMatt Macy void
zap_name_free(zap_name_t * zn)265eda14cbcSMatt Macy zap_name_free(zap_name_t *zn)
266eda14cbcSMatt Macy {
2677a7741afSMartin Matuska 	if (zn->zn_normbuf_len == ZAP_MAXNAMELEN) {
2687a7741afSMartin Matuska 		kmem_cache_free(zap_name_cache, zn);
2697a7741afSMartin Matuska 	} else {
2707a7741afSMartin Matuska 		ASSERT3U(zn->zn_normbuf_len, ==, ZAP_MAXNAMELEN_NEW);
2717a7741afSMartin Matuska 		kmem_cache_free(zap_name_long_cache, zn);
2727a7741afSMartin Matuska 	}
273eda14cbcSMatt Macy }
274eda14cbcSMatt Macy 
275dbd5678dSMartin Matuska static int
zap_name_init_str(zap_name_t * zn,const char * key,matchtype_t mt)276dbd5678dSMartin Matuska zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt)
277eda14cbcSMatt Macy {
278dbd5678dSMartin Matuska 	zap_t *zap = zn->zn_zap;
2797a7741afSMartin Matuska 	size_t key_len = strlen(key) + 1;
2807a7741afSMartin Matuska 
2817a7741afSMartin Matuska 	/* Make sure zn is allocated for longname if key is long */
2827a7741afSMartin Matuska 	IMPLY(key_len > ZAP_MAXNAMELEN,
2837a7741afSMartin Matuska 	    zn->zn_normbuf_len == ZAP_MAXNAMELEN_NEW);
284eda14cbcSMatt Macy 
285eda14cbcSMatt Macy 	zn->zn_key_intlen = sizeof (*key);
286eda14cbcSMatt Macy 	zn->zn_key_orig = key;
2877a7741afSMartin Matuska 	zn->zn_key_orig_numints = key_len;
288eda14cbcSMatt Macy 	zn->zn_matchtype = mt;
289eda14cbcSMatt Macy 	zn->zn_normflags = zap->zap_normflags;
290eda14cbcSMatt Macy 
291eda14cbcSMatt Macy 	/*
292eda14cbcSMatt Macy 	 * If we're dealing with a case sensitive lookup on a mixed or
293eda14cbcSMatt Macy 	 * insensitive fs, remove U8_TEXTPREP_TOUPPER or the lookup
294eda14cbcSMatt Macy 	 * will fold case to all caps overriding the lookup request.
295eda14cbcSMatt Macy 	 */
296eda14cbcSMatt Macy 	if (mt & MT_MATCH_CASE)
297eda14cbcSMatt Macy 		zn->zn_normflags &= ~U8_TEXTPREP_TOUPPER;
298eda14cbcSMatt Macy 
299eda14cbcSMatt Macy 	if (zap->zap_normflags) {
300eda14cbcSMatt Macy 		/*
301eda14cbcSMatt Macy 		 * We *must* use zap_normflags because this normalization is
302eda14cbcSMatt Macy 		 * what the hash is computed from.
303eda14cbcSMatt Macy 		 */
304eda14cbcSMatt Macy 		if (zap_normalize(zap, key, zn->zn_normbuf,
3057a7741afSMartin Matuska 		    zap->zap_normflags, zn->zn_normbuf_len) != 0)
306dbd5678dSMartin Matuska 			return (SET_ERROR(ENOTSUP));
307eda14cbcSMatt Macy 		zn->zn_key_norm = zn->zn_normbuf;
308eda14cbcSMatt Macy 		zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
309eda14cbcSMatt Macy 	} else {
310dbd5678dSMartin Matuska 		if (mt != 0)
311dbd5678dSMartin Matuska 			return (SET_ERROR(ENOTSUP));
312eda14cbcSMatt Macy 		zn->zn_key_norm = zn->zn_key_orig;
313eda14cbcSMatt Macy 		zn->zn_key_norm_numints = zn->zn_key_orig_numints;
314eda14cbcSMatt Macy 	}
315eda14cbcSMatt Macy 
316eda14cbcSMatt Macy 	zn->zn_hash = zap_hash(zn);
317eda14cbcSMatt Macy 
318eda14cbcSMatt Macy 	if (zap->zap_normflags != zn->zn_normflags) {
319eda14cbcSMatt Macy 		/*
320eda14cbcSMatt Macy 		 * We *must* use zn_normflags because this normalization is
321eda14cbcSMatt Macy 		 * what the matching is based on.  (Not the hash!)
322eda14cbcSMatt Macy 		 */
323eda14cbcSMatt Macy 		if (zap_normalize(zap, key, zn->zn_normbuf,
3247a7741afSMartin Matuska 		    zn->zn_normflags, zn->zn_normbuf_len) != 0)
325dbd5678dSMartin Matuska 			return (SET_ERROR(ENOTSUP));
326eda14cbcSMatt Macy 		zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
327eda14cbcSMatt Macy 	}
328eda14cbcSMatt Macy 
329dbd5678dSMartin Matuska 	return (0);
330dbd5678dSMartin Matuska }
331dbd5678dSMartin Matuska 
332dbd5678dSMartin Matuska zap_name_t *
zap_name_alloc_str(zap_t * zap,const char * key,matchtype_t mt)333dbd5678dSMartin Matuska zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt)
334dbd5678dSMartin Matuska {
3357a7741afSMartin Matuska 	size_t key_len = strlen(key) + 1;
3367a7741afSMartin Matuska 	zap_name_t *zn = zap_name_alloc(zap, (key_len > ZAP_MAXNAMELEN));
337dbd5678dSMartin Matuska 	if (zap_name_init_str(zn, key, mt) != 0) {
338dbd5678dSMartin Matuska 		zap_name_free(zn);
339dbd5678dSMartin Matuska 		return (NULL);
340dbd5678dSMartin Matuska 	}
341eda14cbcSMatt Macy 	return (zn);
342eda14cbcSMatt Macy }
343eda14cbcSMatt Macy 
344eda14cbcSMatt Macy static zap_name_t *
zap_name_alloc_uint64(zap_t * zap,const uint64_t * key,int numints)345eda14cbcSMatt Macy zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints)
346eda14cbcSMatt Macy {
3477a7741afSMartin Matuska 	zap_name_t *zn = kmem_cache_alloc(zap_name_cache, KM_SLEEP);
348eda14cbcSMatt Macy 
349eda14cbcSMatt Macy 	ASSERT(zap->zap_normflags == 0);
350eda14cbcSMatt Macy 	zn->zn_zap = zap;
351eda14cbcSMatt Macy 	zn->zn_key_intlen = sizeof (*key);
352eda14cbcSMatt Macy 	zn->zn_key_orig = zn->zn_key_norm = key;
353eda14cbcSMatt Macy 	zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints;
354eda14cbcSMatt Macy 	zn->zn_matchtype = 0;
3557a7741afSMartin Matuska 	zn->zn_normbuf_len = ZAP_MAXNAMELEN;
356eda14cbcSMatt Macy 
357eda14cbcSMatt Macy 	zn->zn_hash = zap_hash(zn);
358eda14cbcSMatt Macy 	return (zn);
359eda14cbcSMatt Macy }
360eda14cbcSMatt Macy 
361eda14cbcSMatt Macy static void
mzap_byteswap(mzap_phys_t * buf,size_t size)362eda14cbcSMatt Macy mzap_byteswap(mzap_phys_t *buf, size_t size)
363eda14cbcSMatt Macy {
364eda14cbcSMatt Macy 	buf->mz_block_type = BSWAP_64(buf->mz_block_type);
365eda14cbcSMatt Macy 	buf->mz_salt = BSWAP_64(buf->mz_salt);
366eda14cbcSMatt Macy 	buf->mz_normflags = BSWAP_64(buf->mz_normflags);
367eda14cbcSMatt Macy 	int max = (size / MZAP_ENT_LEN) - 1;
368eda14cbcSMatt Macy 	for (int i = 0; i < max; i++) {
369eda14cbcSMatt Macy 		buf->mz_chunk[i].mze_value =
370eda14cbcSMatt Macy 		    BSWAP_64(buf->mz_chunk[i].mze_value);
371eda14cbcSMatt Macy 		buf->mz_chunk[i].mze_cd =
372eda14cbcSMatt Macy 		    BSWAP_32(buf->mz_chunk[i].mze_cd);
373eda14cbcSMatt Macy 	}
374eda14cbcSMatt Macy }
375eda14cbcSMatt Macy 
376eda14cbcSMatt Macy void
zap_byteswap(void * buf,size_t size)377eda14cbcSMatt Macy zap_byteswap(void *buf, size_t size)
378eda14cbcSMatt Macy {
379eda14cbcSMatt Macy 	uint64_t block_type = *(uint64_t *)buf;
380eda14cbcSMatt Macy 
381eda14cbcSMatt Macy 	if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) {
382eda14cbcSMatt Macy 		/* ASSERT(magic == ZAP_LEAF_MAGIC); */
383eda14cbcSMatt Macy 		mzap_byteswap(buf, size);
384eda14cbcSMatt Macy 	} else {
385eda14cbcSMatt Macy 		fzap_byteswap(buf, size);
386eda14cbcSMatt Macy 	}
387eda14cbcSMatt Macy }
388eda14cbcSMatt Macy 
3894e8d558cSMartin Matuska __attribute__((always_inline)) inline
390eda14cbcSMatt Macy static int
mze_compare(const void * arg1,const void * arg2)391eda14cbcSMatt Macy mze_compare(const void *arg1, const void *arg2)
392eda14cbcSMatt Macy {
393eda14cbcSMatt Macy 	const mzap_ent_t *mze1 = arg1;
394eda14cbcSMatt Macy 	const mzap_ent_t *mze2 = arg2;
395eda14cbcSMatt Macy 
396dbd5678dSMartin Matuska 	return (TREE_CMP((uint64_t)(mze1->mze_hash) << 32 | mze1->mze_cd,
397dbd5678dSMartin Matuska 	    (uint64_t)(mze2->mze_hash) << 32 | mze2->mze_cd));
398eda14cbcSMatt Macy }
399eda14cbcSMatt Macy 
ZFS_BTREE_FIND_IN_BUF_FUNC(mze_find_in_buf,mzap_ent_t,mze_compare)4004e8d558cSMartin Matuska ZFS_BTREE_FIND_IN_BUF_FUNC(mze_find_in_buf, mzap_ent_t,
4014e8d558cSMartin Matuska     mze_compare)
4024e8d558cSMartin Matuska 
403eda14cbcSMatt Macy static void
404dbd5678dSMartin Matuska mze_insert(zap_t *zap, uint16_t chunkid, uint64_t hash)
405eda14cbcSMatt Macy {
406dbd5678dSMartin Matuska 	mzap_ent_t mze;
407dbd5678dSMartin Matuska 
408eda14cbcSMatt Macy 	ASSERT(zap->zap_ismicro);
409eda14cbcSMatt Macy 	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
410eda14cbcSMatt Macy 
411dbd5678dSMartin Matuska 	mze.mze_chunkid = chunkid;
412dbd5678dSMartin Matuska 	ASSERT0(hash & 0xffffffff);
413dbd5678dSMartin Matuska 	mze.mze_hash = hash >> 32;
414dbd5678dSMartin Matuska 	ASSERT3U(MZE_PHYS(zap, &mze)->mze_cd, <=, 0xffff);
415dbd5678dSMartin Matuska 	mze.mze_cd = (uint16_t)MZE_PHYS(zap, &mze)->mze_cd;
416dbd5678dSMartin Matuska 	ASSERT(MZE_PHYS(zap, &mze)->mze_name[0] != 0);
417dbd5678dSMartin Matuska 	zfs_btree_add(&zap->zap_m.zap_tree, &mze);
418eda14cbcSMatt Macy }
419eda14cbcSMatt Macy 
420eda14cbcSMatt Macy static mzap_ent_t *
mze_find(zap_name_t * zn,zfs_btree_index_t * idx)421dbd5678dSMartin Matuska mze_find(zap_name_t *zn, zfs_btree_index_t *idx)
422eda14cbcSMatt Macy {
423eda14cbcSMatt Macy 	mzap_ent_t mze_tofind;
424eda14cbcSMatt Macy 	mzap_ent_t *mze;
425dbd5678dSMartin Matuska 	zfs_btree_t *tree = &zn->zn_zap->zap_m.zap_tree;
426eda14cbcSMatt Macy 
427eda14cbcSMatt Macy 	ASSERT(zn->zn_zap->zap_ismicro);
428eda14cbcSMatt Macy 	ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock));
429eda14cbcSMatt Macy 
430dbd5678dSMartin Matuska 	ASSERT0(zn->zn_hash & 0xffffffff);
431dbd5678dSMartin Matuska 	mze_tofind.mze_hash = zn->zn_hash >> 32;
432eda14cbcSMatt Macy 	mze_tofind.mze_cd = 0;
433eda14cbcSMatt Macy 
434dbd5678dSMartin Matuska 	mze = zfs_btree_find(tree, &mze_tofind, idx);
435eda14cbcSMatt Macy 	if (mze == NULL)
436dbd5678dSMartin Matuska 		mze = zfs_btree_next(tree, idx, idx);
437dbd5678dSMartin Matuska 	for (; mze && mze->mze_hash == mze_tofind.mze_hash;
438dbd5678dSMartin Matuska 	    mze = zfs_btree_next(tree, idx, idx)) {
439eda14cbcSMatt Macy 		ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd);
440eda14cbcSMatt Macy 		if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name))
441eda14cbcSMatt Macy 			return (mze);
442eda14cbcSMatt Macy 	}
443eda14cbcSMatt Macy 
444eda14cbcSMatt Macy 	return (NULL);
445eda14cbcSMatt Macy }
446eda14cbcSMatt Macy 
447eda14cbcSMatt Macy static uint32_t
mze_find_unused_cd(zap_t * zap,uint64_t hash)448eda14cbcSMatt Macy mze_find_unused_cd(zap_t *zap, uint64_t hash)
449eda14cbcSMatt Macy {
450eda14cbcSMatt Macy 	mzap_ent_t mze_tofind;
451dbd5678dSMartin Matuska 	zfs_btree_index_t idx;
452dbd5678dSMartin Matuska 	zfs_btree_t *tree = &zap->zap_m.zap_tree;
453eda14cbcSMatt Macy 
454eda14cbcSMatt Macy 	ASSERT(zap->zap_ismicro);
455eda14cbcSMatt Macy 	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
456eda14cbcSMatt Macy 
457dbd5678dSMartin Matuska 	ASSERT0(hash & 0xffffffff);
458dbd5678dSMartin Matuska 	hash >>= 32;
459eda14cbcSMatt Macy 	mze_tofind.mze_hash = hash;
460eda14cbcSMatt Macy 	mze_tofind.mze_cd = 0;
461eda14cbcSMatt Macy 
462eda14cbcSMatt Macy 	uint32_t cd = 0;
463dbd5678dSMartin Matuska 	for (mzap_ent_t *mze = zfs_btree_find(tree, &mze_tofind, &idx);
464dbd5678dSMartin Matuska 	    mze && mze->mze_hash == hash;
465dbd5678dSMartin Matuska 	    mze = zfs_btree_next(tree, &idx, &idx)) {
466eda14cbcSMatt Macy 		if (mze->mze_cd != cd)
467eda14cbcSMatt Macy 			break;
468eda14cbcSMatt Macy 		cd++;
469eda14cbcSMatt Macy 	}
470eda14cbcSMatt Macy 
471eda14cbcSMatt Macy 	return (cd);
472eda14cbcSMatt Macy }
473eda14cbcSMatt Macy 
474eda14cbcSMatt Macy /*
475eda14cbcSMatt Macy  * Each mzap entry requires at max : 4 chunks
476eda14cbcSMatt Macy  * 3 chunks for names + 1 chunk for value.
477eda14cbcSMatt Macy  */
478eda14cbcSMatt Macy #define	MZAP_ENT_CHUNKS	(1 + ZAP_LEAF_ARRAY_NCHUNKS(MZAP_NAME_LEN) + \
479eda14cbcSMatt Macy 	ZAP_LEAF_ARRAY_NCHUNKS(sizeof (uint64_t)))
480eda14cbcSMatt Macy 
481eda14cbcSMatt Macy /*
482eda14cbcSMatt Macy  * Check if the current entry keeps the colliding entries under the fatzap leaf
483eda14cbcSMatt Macy  * size.
484eda14cbcSMatt Macy  */
485eda14cbcSMatt Macy static boolean_t
mze_canfit_fzap_leaf(zap_name_t * zn,uint64_t hash)486eda14cbcSMatt Macy mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash)
487eda14cbcSMatt Macy {
488eda14cbcSMatt Macy 	zap_t *zap = zn->zn_zap;
489eda14cbcSMatt Macy 	mzap_ent_t mze_tofind;
490dbd5678dSMartin Matuska 	zfs_btree_index_t idx;
491dbd5678dSMartin Matuska 	zfs_btree_t *tree = &zap->zap_m.zap_tree;
492eda14cbcSMatt Macy 	uint32_t mzap_ents = 0;
493eda14cbcSMatt Macy 
494dbd5678dSMartin Matuska 	ASSERT0(hash & 0xffffffff);
495dbd5678dSMartin Matuska 	hash >>= 32;
496eda14cbcSMatt Macy 	mze_tofind.mze_hash = hash;
497eda14cbcSMatt Macy 	mze_tofind.mze_cd = 0;
498eda14cbcSMatt Macy 
499dbd5678dSMartin Matuska 	for (mzap_ent_t *mze = zfs_btree_find(tree, &mze_tofind, &idx);
500dbd5678dSMartin Matuska 	    mze && mze->mze_hash == hash;
501dbd5678dSMartin Matuska 	    mze = zfs_btree_next(tree, &idx, &idx)) {
502eda14cbcSMatt Macy 		mzap_ents++;
503eda14cbcSMatt Macy 	}
504eda14cbcSMatt Macy 
505eda14cbcSMatt Macy 	/* Include the new entry being added */
506eda14cbcSMatt Macy 	mzap_ents++;
507eda14cbcSMatt Macy 
508eda14cbcSMatt Macy 	return (ZAP_LEAF_NUMCHUNKS_DEF > (mzap_ents * MZAP_ENT_CHUNKS));
509eda14cbcSMatt Macy }
510eda14cbcSMatt Macy 
511eda14cbcSMatt Macy static void
mze_destroy(zap_t * zap)512eda14cbcSMatt Macy mze_destroy(zap_t *zap)
513eda14cbcSMatt Macy {
514dbd5678dSMartin Matuska 	zfs_btree_clear(&zap->zap_m.zap_tree);
515dbd5678dSMartin Matuska 	zfs_btree_destroy(&zap->zap_m.zap_tree);
516eda14cbcSMatt Macy }
517eda14cbcSMatt Macy 
518eda14cbcSMatt Macy static zap_t *
mzap_open(dmu_buf_t * db)519783d3ff6SMartin Matuska mzap_open(dmu_buf_t *db)
520eda14cbcSMatt Macy {
521eda14cbcSMatt Macy 	zap_t *winner;
522eda14cbcSMatt Macy 	uint64_t *zap_hdr = (uint64_t *)db->db_data;
523eda14cbcSMatt Macy 	uint64_t zap_block_type = zap_hdr[0];
524eda14cbcSMatt Macy 	uint64_t zap_magic = zap_hdr[1];
525eda14cbcSMatt Macy 
526eda14cbcSMatt Macy 	ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t));
527eda14cbcSMatt Macy 
528eda14cbcSMatt Macy 	zap_t *zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
529eda14cbcSMatt Macy 	rw_init(&zap->zap_rwlock, NULL, RW_DEFAULT, NULL);
530eda14cbcSMatt Macy 	rw_enter(&zap->zap_rwlock, RW_WRITER);
531783d3ff6SMartin Matuska 	zap->zap_objset = dmu_buf_get_objset(db);
532783d3ff6SMartin Matuska 	zap->zap_object = db->db_object;
533eda14cbcSMatt Macy 	zap->zap_dbuf = db;
534eda14cbcSMatt Macy 
535eda14cbcSMatt Macy 	if (zap_block_type != ZBT_MICRO) {
536eda14cbcSMatt Macy 		mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, MUTEX_DEFAULT,
537eda14cbcSMatt Macy 		    0);
538eda14cbcSMatt Macy 		zap->zap_f.zap_block_shift = highbit64(db->db_size) - 1;
539eda14cbcSMatt Macy 		if (zap_block_type != ZBT_HEADER || zap_magic != ZAP_MAGIC) {
540eda14cbcSMatt Macy 			winner = NULL;	/* No actual winner here... */
541eda14cbcSMatt Macy 			goto handle_winner;
542eda14cbcSMatt Macy 		}
543eda14cbcSMatt Macy 	} else {
544eda14cbcSMatt Macy 		zap->zap_ismicro = TRUE;
545eda14cbcSMatt Macy 	}
546eda14cbcSMatt Macy 
547eda14cbcSMatt Macy 	/*
548eda14cbcSMatt Macy 	 * Make sure that zap_ismicro is set before we let others see
549eda14cbcSMatt Macy 	 * it, because zap_lockdir() checks zap_ismicro without the lock
550eda14cbcSMatt Macy 	 * held.
551eda14cbcSMatt Macy 	 */
552eda14cbcSMatt Macy 	dmu_buf_init_user(&zap->zap_dbu, zap_evict_sync, NULL, &zap->zap_dbuf);
553eda14cbcSMatt Macy 	winner = dmu_buf_set_user(db, &zap->zap_dbu);
554eda14cbcSMatt Macy 
555eda14cbcSMatt Macy 	if (winner != NULL)
556eda14cbcSMatt Macy 		goto handle_winner;
557eda14cbcSMatt Macy 
558eda14cbcSMatt Macy 	if (zap->zap_ismicro) {
559eda14cbcSMatt Macy 		zap->zap_salt = zap_m_phys(zap)->mz_salt;
560eda14cbcSMatt Macy 		zap->zap_normflags = zap_m_phys(zap)->mz_normflags;
561eda14cbcSMatt Macy 		zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
562eda14cbcSMatt Macy 
563dbd5678dSMartin Matuska 		/*
564dbd5678dSMartin Matuska 		 * Reduce B-tree leaf from 4KB to 512 bytes to reduce memmove()
565dbd5678dSMartin Matuska 		 * overhead on massive inserts below.  It still allows to store
566dbd5678dSMartin Matuska 		 * 62 entries before we have to add 2KB B-tree core node.
567dbd5678dSMartin Matuska 		 */
568dbd5678dSMartin Matuska 		zfs_btree_create_custom(&zap->zap_m.zap_tree, mze_compare,
5694e8d558cSMartin Matuska 		    mze_find_in_buf, sizeof (mzap_ent_t), 512);
570dbd5678dSMartin Matuska 
5717a7741afSMartin Matuska 		zap_name_t *zn = zap_name_alloc(zap, B_FALSE);
572dbd5678dSMartin Matuska 		for (uint16_t i = 0; i < zap->zap_m.zap_num_chunks; i++) {
573eda14cbcSMatt Macy 			mzap_ent_phys_t *mze =
574eda14cbcSMatt Macy 			    &zap_m_phys(zap)->mz_chunk[i];
575eda14cbcSMatt Macy 			if (mze->mze_name[0]) {
576eda14cbcSMatt Macy 				zap->zap_m.zap_num_entries++;
577dbd5678dSMartin Matuska 				zap_name_init_str(zn, mze->mze_name, 0);
578eda14cbcSMatt Macy 				mze_insert(zap, i, zn->zn_hash);
579dbd5678dSMartin Matuska 			}
580dbd5678dSMartin Matuska 		}
581eda14cbcSMatt Macy 		zap_name_free(zn);
582eda14cbcSMatt Macy 	} else {
583eda14cbcSMatt Macy 		zap->zap_salt = zap_f_phys(zap)->zap_salt;
584eda14cbcSMatt Macy 		zap->zap_normflags = zap_f_phys(zap)->zap_normflags;
585eda14cbcSMatt Macy 
586eda14cbcSMatt Macy 		ASSERT3U(sizeof (struct zap_leaf_header), ==,
587eda14cbcSMatt Macy 		    2*ZAP_LEAF_CHUNKSIZE);
588eda14cbcSMatt Macy 
589eda14cbcSMatt Macy 		/*
590eda14cbcSMatt Macy 		 * The embedded pointer table should not overlap the
591eda14cbcSMatt Macy 		 * other members.
592eda14cbcSMatt Macy 		 */
593eda14cbcSMatt Macy 		ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >,
594eda14cbcSMatt Macy 		    &zap_f_phys(zap)->zap_salt);
595eda14cbcSMatt Macy 
596eda14cbcSMatt Macy 		/*
597eda14cbcSMatt Macy 		 * The embedded pointer table should end at the end of
598eda14cbcSMatt Macy 		 * the block
599eda14cbcSMatt Macy 		 */
600eda14cbcSMatt Macy 		ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap,
601eda14cbcSMatt Macy 		    1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) -
602eda14cbcSMatt Macy 		    (uintptr_t)zap_f_phys(zap), ==,
603eda14cbcSMatt Macy 		    zap->zap_dbuf->db_size);
604eda14cbcSMatt Macy 	}
605eda14cbcSMatt Macy 	rw_exit(&zap->zap_rwlock);
606eda14cbcSMatt Macy 	return (zap);
607eda14cbcSMatt Macy 
608eda14cbcSMatt Macy handle_winner:
609eda14cbcSMatt Macy 	rw_exit(&zap->zap_rwlock);
610eda14cbcSMatt Macy 	rw_destroy(&zap->zap_rwlock);
611eda14cbcSMatt Macy 	if (!zap->zap_ismicro)
612eda14cbcSMatt Macy 		mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
613eda14cbcSMatt Macy 	kmem_free(zap, sizeof (zap_t));
614eda14cbcSMatt Macy 	return (winner);
615eda14cbcSMatt Macy }
616eda14cbcSMatt Macy 
617eda14cbcSMatt Macy /*
618eda14cbcSMatt Macy  * This routine "consumes" the caller's hold on the dbuf, which must
619eda14cbcSMatt Macy  * have the specified tag.
620eda14cbcSMatt Macy  */
621eda14cbcSMatt Macy static int
zap_lockdir_impl(dnode_t * dn,dmu_buf_t * db,const void * tag,dmu_tx_t * tx,krw_t lti,boolean_t fatreader,boolean_t adding,zap_t ** zapp)622783d3ff6SMartin Matuska zap_lockdir_impl(dnode_t *dn, dmu_buf_t *db, const void *tag, dmu_tx_t *tx,
623eda14cbcSMatt Macy     krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
624eda14cbcSMatt Macy {
625eda14cbcSMatt Macy 	ASSERT0(db->db_offset);
626eda14cbcSMatt Macy 	objset_t *os = dmu_buf_get_objset(db);
627eda14cbcSMatt Macy 	uint64_t obj = db->db_object;
628eda14cbcSMatt Macy 	dmu_object_info_t doi;
629eda14cbcSMatt Macy 
630eda14cbcSMatt Macy 	*zapp = NULL;
631eda14cbcSMatt Macy 
632783d3ff6SMartin Matuska 	dmu_object_info_from_dnode(dn, &doi);
633eda14cbcSMatt Macy 	if (DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP)
634eda14cbcSMatt Macy 		return (SET_ERROR(EINVAL));
635eda14cbcSMatt Macy 
636eda14cbcSMatt Macy 	zap_t *zap = dmu_buf_get_user(db);
637eda14cbcSMatt Macy 	if (zap == NULL) {
638783d3ff6SMartin Matuska 		zap = mzap_open(db);
639eda14cbcSMatt Macy 		if (zap == NULL) {
640eda14cbcSMatt Macy 			/*
641eda14cbcSMatt Macy 			 * mzap_open() didn't like what it saw on-disk.
642eda14cbcSMatt Macy 			 * Check for corruption!
643eda14cbcSMatt Macy 			 */
644eda14cbcSMatt Macy 			return (SET_ERROR(EIO));
645eda14cbcSMatt Macy 		}
646eda14cbcSMatt Macy 	}
647eda14cbcSMatt Macy 
648eda14cbcSMatt Macy 	/*
649eda14cbcSMatt Macy 	 * We're checking zap_ismicro without the lock held, in order to
650eda14cbcSMatt Macy 	 * tell what type of lock we want.  Once we have some sort of
651eda14cbcSMatt Macy 	 * lock, see if it really is the right type.  In practice this
652eda14cbcSMatt Macy 	 * can only be different if it was upgraded from micro to fat,
653eda14cbcSMatt Macy 	 * and micro wanted WRITER but fat only needs READER.
654eda14cbcSMatt Macy 	 */
655eda14cbcSMatt Macy 	krw_t lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;
656eda14cbcSMatt Macy 	rw_enter(&zap->zap_rwlock, lt);
657eda14cbcSMatt Macy 	if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {
658eda14cbcSMatt Macy 		/* it was upgraded, now we only need reader */
659eda14cbcSMatt Macy 		ASSERT(lt == RW_WRITER);
660eda14cbcSMatt Macy 		ASSERT(RW_READER ==
661eda14cbcSMatt Macy 		    ((!zap->zap_ismicro && fatreader) ? RW_READER : lti));
662eda14cbcSMatt Macy 		rw_downgrade(&zap->zap_rwlock);
663eda14cbcSMatt Macy 		lt = RW_READER;
664eda14cbcSMatt Macy 	}
665eda14cbcSMatt Macy 
666eda14cbcSMatt Macy 	zap->zap_objset = os;
667783d3ff6SMartin Matuska 	zap->zap_dnode = dn;
668eda14cbcSMatt Macy 
669eda14cbcSMatt Macy 	if (lt == RW_WRITER)
670eda14cbcSMatt Macy 		dmu_buf_will_dirty(db, tx);
671eda14cbcSMatt Macy 
672eda14cbcSMatt Macy 	ASSERT3P(zap->zap_dbuf, ==, db);
673eda14cbcSMatt Macy 
674eda14cbcSMatt Macy 	ASSERT(!zap->zap_ismicro ||
675eda14cbcSMatt Macy 	    zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks);
676eda14cbcSMatt Macy 	if (zap->zap_ismicro && tx && adding &&
677eda14cbcSMatt Macy 	    zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) {
678eda14cbcSMatt Macy 		uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE;
6797a7741afSMartin Matuska 		if (newsz > zap_get_micro_max_size(dmu_objset_spa(os))) {
680eda14cbcSMatt Macy 			dprintf("upgrading obj %llu: num_entries=%u\n",
68133b8c039SMartin Matuska 			    (u_longlong_t)obj, zap->zap_m.zap_num_entries);
682eda14cbcSMatt Macy 			*zapp = zap;
683eda14cbcSMatt Macy 			int err = mzap_upgrade(zapp, tag, tx, 0);
684eda14cbcSMatt Macy 			if (err != 0)
685eda14cbcSMatt Macy 				rw_exit(&zap->zap_rwlock);
686eda14cbcSMatt Macy 			return (err);
687eda14cbcSMatt Macy 		}
688eda14cbcSMatt Macy 		VERIFY0(dmu_object_set_blocksize(os, obj, newsz, 0, tx));
689eda14cbcSMatt Macy 		zap->zap_m.zap_num_chunks =
690eda14cbcSMatt Macy 		    db->db_size / MZAP_ENT_LEN - 1;
6917a7741afSMartin Matuska 
6927a7741afSMartin Matuska 		if (newsz > SPA_OLD_MAXBLOCKSIZE) {
6937a7741afSMartin Matuska 			dsl_dataset_t *ds = dmu_objset_ds(os);
6947a7741afSMartin Matuska 			if (!dsl_dataset_feature_is_active(ds,
6957a7741afSMartin Matuska 			    SPA_FEATURE_LARGE_MICROZAP)) {
6967a7741afSMartin Matuska 				/*
6977a7741afSMartin Matuska 				 * A microzap just grew beyond the old limit
6987a7741afSMartin Matuska 				 * for the first time, so we have to ensure the
6997a7741afSMartin Matuska 				 * feature flag is activated.
7007a7741afSMartin Matuska 				 * zap_get_micro_max_size() won't let us get
7017a7741afSMartin Matuska 				 * here if the feature is not enabled, so we
7027a7741afSMartin Matuska 				 * don't need any other checks beforehand.
7037a7741afSMartin Matuska 				 *
7047a7741afSMartin Matuska 				 * Since we're in open context, we can't
7057a7741afSMartin Matuska 				 * activate the feature directly, so we instead
7067a7741afSMartin Matuska 				 * flag it on the dataset for next sync.
7077a7741afSMartin Matuska 				 */
7087a7741afSMartin Matuska 				dsl_dataset_dirty(ds, tx);
7097a7741afSMartin Matuska 				mutex_enter(&ds->ds_lock);
7107a7741afSMartin Matuska 				ds->ds_feature_activation
7117a7741afSMartin Matuska 				    [SPA_FEATURE_LARGE_MICROZAP] =
7127a7741afSMartin Matuska 				    (void *)B_TRUE;
7137a7741afSMartin Matuska 				mutex_exit(&ds->ds_lock);
7147a7741afSMartin Matuska 			}
7157a7741afSMartin Matuska 		}
716eda14cbcSMatt Macy 	}
717eda14cbcSMatt Macy 
718eda14cbcSMatt Macy 	*zapp = zap;
719eda14cbcSMatt Macy 	return (0);
720eda14cbcSMatt Macy }
721eda14cbcSMatt Macy 
722eda14cbcSMatt Macy static int
zap_lockdir_by_dnode(dnode_t * dn,dmu_tx_t * tx,krw_t lti,boolean_t fatreader,boolean_t adding,const void * tag,zap_t ** zapp)723eda14cbcSMatt Macy zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx,
724a0b956f5SMartin Matuska     krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,
725a0b956f5SMartin Matuska     zap_t **zapp)
726eda14cbcSMatt Macy {
727eda14cbcSMatt Macy 	dmu_buf_t *db;
728783d3ff6SMartin Matuska 	int err;
729eda14cbcSMatt Macy 
730783d3ff6SMartin Matuska 	err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
731783d3ff6SMartin Matuska 	if (err != 0)
732eda14cbcSMatt Macy 		return (err);
733783d3ff6SMartin Matuska 	err = zap_lockdir_impl(dn, db, tag, tx, lti, fatreader, adding, zapp);
734783d3ff6SMartin Matuska 	if (err != 0)
735eda14cbcSMatt Macy 		dmu_buf_rele(db, tag);
736783d3ff6SMartin Matuska 	else
737783d3ff6SMartin Matuska 		VERIFY(dnode_add_ref(dn, tag));
738eda14cbcSMatt Macy 	return (err);
739eda14cbcSMatt Macy }
740eda14cbcSMatt Macy 
741eda14cbcSMatt Macy int
zap_lockdir(objset_t * os,uint64_t obj,dmu_tx_t * tx,krw_t lti,boolean_t fatreader,boolean_t adding,const void * tag,zap_t ** zapp)742eda14cbcSMatt Macy zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
743a0b956f5SMartin Matuska     krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,
744a0b956f5SMartin Matuska     zap_t **zapp)
745eda14cbcSMatt Macy {
746783d3ff6SMartin Matuska 	dnode_t *dn;
747eda14cbcSMatt Macy 	dmu_buf_t *db;
748783d3ff6SMartin Matuska 	int err;
749eda14cbcSMatt Macy 
750783d3ff6SMartin Matuska 	err = dnode_hold(os, obj, tag, &dn);
751eda14cbcSMatt Macy 	if (err != 0)
752eda14cbcSMatt Macy 		return (err);
753783d3ff6SMartin Matuska 	err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
754783d3ff6SMartin Matuska 	if (err != 0) {
755783d3ff6SMartin Matuska 		dnode_rele(dn, tag);
756783d3ff6SMartin Matuska 		return (err);
757eda14cbcSMatt Macy 	}
758783d3ff6SMartin Matuska 	err = zap_lockdir_impl(dn, db, tag, tx, lti, fatreader, adding, zapp);
759783d3ff6SMartin Matuska 	if (err != 0) {
760eda14cbcSMatt Macy 		dmu_buf_rele(db, tag);
761783d3ff6SMartin Matuska 		dnode_rele(dn, tag);
762783d3ff6SMartin Matuska 	}
763eda14cbcSMatt Macy 	return (err);
764eda14cbcSMatt Macy }
765eda14cbcSMatt Macy 
766eda14cbcSMatt Macy void
zap_unlockdir(zap_t * zap,const void * tag)767a0b956f5SMartin Matuska zap_unlockdir(zap_t *zap, const void *tag)
768eda14cbcSMatt Macy {
769eda14cbcSMatt Macy 	rw_exit(&zap->zap_rwlock);
770783d3ff6SMartin Matuska 	dnode_rele(zap->zap_dnode, tag);
771eda14cbcSMatt Macy 	dmu_buf_rele(zap->zap_dbuf, tag);
772eda14cbcSMatt Macy }
773eda14cbcSMatt Macy 
774eda14cbcSMatt Macy static int
mzap_upgrade(zap_t ** zapp,const void * tag,dmu_tx_t * tx,zap_flags_t flags)775a0b956f5SMartin Matuska mzap_upgrade(zap_t **zapp, const void *tag, dmu_tx_t *tx, zap_flags_t flags)
776eda14cbcSMatt Macy {
777eda14cbcSMatt Macy 	int err = 0;
778eda14cbcSMatt Macy 	zap_t *zap = *zapp;
779eda14cbcSMatt Macy 
780eda14cbcSMatt Macy 	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
781eda14cbcSMatt Macy 
782eda14cbcSMatt Macy 	int sz = zap->zap_dbuf->db_size;
783eda14cbcSMatt Macy 	mzap_phys_t *mzp = vmem_alloc(sz, KM_SLEEP);
784da5137abSMartin Matuska 	memcpy(mzp, zap->zap_dbuf->db_data, sz);
785eda14cbcSMatt Macy 	int nchunks = zap->zap_m.zap_num_chunks;
786eda14cbcSMatt Macy 
787eda14cbcSMatt Macy 	if (!flags) {
788eda14cbcSMatt Macy 		err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object,
789eda14cbcSMatt Macy 		    1ULL << fzap_default_block_shift, 0, tx);
790eda14cbcSMatt Macy 		if (err != 0) {
791eda14cbcSMatt Macy 			vmem_free(mzp, sz);
792eda14cbcSMatt Macy 			return (err);
793eda14cbcSMatt Macy 		}
794eda14cbcSMatt Macy 	}
795eda14cbcSMatt Macy 
796eda14cbcSMatt Macy 	dprintf("upgrading obj=%llu with %u chunks\n",
79733b8c039SMartin Matuska 	    (u_longlong_t)zap->zap_object, nchunks);
798dbd5678dSMartin Matuska 	/* XXX destroy the tree later, so we can use the stored hash value */
799eda14cbcSMatt Macy 	mze_destroy(zap);
800eda14cbcSMatt Macy 
801eda14cbcSMatt Macy 	fzap_upgrade(zap, tx, flags);
802eda14cbcSMatt Macy 
8037a7741afSMartin Matuska 	zap_name_t *zn = zap_name_alloc(zap, B_FALSE);
804eda14cbcSMatt Macy 	for (int i = 0; i < nchunks; i++) {
805eda14cbcSMatt Macy 		mzap_ent_phys_t *mze = &mzp->mz_chunk[i];
806eda14cbcSMatt Macy 		if (mze->mze_name[0] == 0)
807eda14cbcSMatt Macy 			continue;
808eda14cbcSMatt Macy 		dprintf("adding %s=%llu\n",
80933b8c039SMartin Matuska 		    mze->mze_name, (u_longlong_t)mze->mze_value);
810dbd5678dSMartin Matuska 		zap_name_init_str(zn, mze->mze_name, 0);
811eda14cbcSMatt Macy 		/* If we fail here, we would end up losing entries */
812eda14cbcSMatt Macy 		VERIFY0(fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
813eda14cbcSMatt Macy 		    tag, tx));
814eda14cbcSMatt Macy 		zap = zn->zn_zap;	/* fzap_add_cd() may change zap */
815eda14cbcSMatt Macy 	}
816dbd5678dSMartin Matuska 	zap_name_free(zn);
817eda14cbcSMatt Macy 	vmem_free(mzp, sz);
818eda14cbcSMatt Macy 	*zapp = zap;
819eda14cbcSMatt Macy 	return (0);
820eda14cbcSMatt Macy }
821eda14cbcSMatt Macy 
822eda14cbcSMatt Macy /*
823eda14cbcSMatt Macy  * The "normflags" determine the behavior of the matchtype_t which is
824eda14cbcSMatt Macy  * passed to zap_lookup_norm().  Names which have the same normalized
825eda14cbcSMatt Macy  * version will be stored with the same hash value, and therefore we can
826eda14cbcSMatt Macy  * perform normalization-insensitive lookups.  We can be Unicode form-
827eda14cbcSMatt Macy  * insensitive and/or case-insensitive.  The following flags are valid for
828eda14cbcSMatt Macy  * "normflags":
829eda14cbcSMatt Macy  *
830eda14cbcSMatt Macy  * U8_TEXTPREP_NFC
831eda14cbcSMatt Macy  * U8_TEXTPREP_NFD
832eda14cbcSMatt Macy  * U8_TEXTPREP_NFKC
833eda14cbcSMatt Macy  * U8_TEXTPREP_NFKD
834eda14cbcSMatt Macy  * U8_TEXTPREP_TOUPPER
835eda14cbcSMatt Macy  *
836eda14cbcSMatt Macy  * The *_NF* (Normalization Form) flags are mutually exclusive; at most one
837eda14cbcSMatt Macy  * of them may be supplied.
838eda14cbcSMatt Macy  */
839eda14cbcSMatt Macy void
mzap_create_impl(dnode_t * dn,int normflags,zap_flags_t flags,dmu_tx_t * tx)840eda14cbcSMatt Macy mzap_create_impl(dnode_t *dn, int normflags, zap_flags_t flags, dmu_tx_t *tx)
841eda14cbcSMatt Macy {
842eda14cbcSMatt Macy 	dmu_buf_t *db;
843eda14cbcSMatt Macy 
844eda14cbcSMatt Macy 	VERIFY0(dmu_buf_hold_by_dnode(dn, 0, FTAG, &db, DMU_READ_NO_PREFETCH));
845eda14cbcSMatt Macy 
846eda14cbcSMatt Macy 	dmu_buf_will_dirty(db, tx);
847eda14cbcSMatt Macy 	mzap_phys_t *zp = db->db_data;
848eda14cbcSMatt Macy 	zp->mz_block_type = ZBT_MICRO;
849eda14cbcSMatt Macy 	zp->mz_salt =
850eda14cbcSMatt Macy 	    ((uintptr_t)db ^ (uintptr_t)tx ^ (dn->dn_object << 1)) | 1ULL;
851eda14cbcSMatt Macy 	zp->mz_normflags = normflags;
852eda14cbcSMatt Macy 
853eda14cbcSMatt Macy 	if (flags != 0) {
854eda14cbcSMatt Macy 		zap_t *zap;
855eda14cbcSMatt Macy 		/* Only fat zap supports flags; upgrade immediately. */
856783d3ff6SMartin Matuska 		VERIFY(dnode_add_ref(dn, FTAG));
857783d3ff6SMartin Matuska 		VERIFY0(zap_lockdir_impl(dn, db, FTAG, tx, RW_WRITER,
858eda14cbcSMatt Macy 		    B_FALSE, B_FALSE, &zap));
859eda14cbcSMatt Macy 		VERIFY0(mzap_upgrade(&zap, FTAG, tx, flags));
860eda14cbcSMatt Macy 		zap_unlockdir(zap, FTAG);
861eda14cbcSMatt Macy 	} else {
862eda14cbcSMatt Macy 		dmu_buf_rele(db, FTAG);
863eda14cbcSMatt Macy 	}
864eda14cbcSMatt Macy }
865eda14cbcSMatt Macy 
866eda14cbcSMatt Macy static uint64_t
zap_create_impl(objset_t * os,int normflags,zap_flags_t flags,dmu_object_type_t ot,int leaf_blockshift,int indirect_blockshift,dmu_object_type_t bonustype,int bonuslen,int dnodesize,dnode_t ** allocated_dnode,const void * tag,dmu_tx_t * tx)867eda14cbcSMatt Macy zap_create_impl(objset_t *os, int normflags, zap_flags_t flags,
868eda14cbcSMatt Macy     dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
869eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, int dnodesize,
870a0b956f5SMartin Matuska     dnode_t **allocated_dnode, const void *tag, dmu_tx_t *tx)
871eda14cbcSMatt Macy {
872eda14cbcSMatt Macy 	uint64_t obj;
873eda14cbcSMatt Macy 
874eda14cbcSMatt Macy 	ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP);
875eda14cbcSMatt Macy 
876eda14cbcSMatt Macy 	if (allocated_dnode == NULL) {
877eda14cbcSMatt Macy 		dnode_t *dn;
878eda14cbcSMatt Macy 		obj = dmu_object_alloc_hold(os, ot, 1ULL << leaf_blockshift,
879eda14cbcSMatt Macy 		    indirect_blockshift, bonustype, bonuslen, dnodesize,
880eda14cbcSMatt Macy 		    &dn, FTAG, tx);
881eda14cbcSMatt Macy 		mzap_create_impl(dn, normflags, flags, tx);
882eda14cbcSMatt Macy 		dnode_rele(dn, FTAG);
883eda14cbcSMatt Macy 	} else {
884eda14cbcSMatt Macy 		obj = dmu_object_alloc_hold(os, ot, 1ULL << leaf_blockshift,
885eda14cbcSMatt Macy 		    indirect_blockshift, bonustype, bonuslen, dnodesize,
886eda14cbcSMatt Macy 		    allocated_dnode, tag, tx);
887eda14cbcSMatt Macy 		mzap_create_impl(*allocated_dnode, normflags, flags, tx);
888eda14cbcSMatt Macy 	}
889eda14cbcSMatt Macy 
890eda14cbcSMatt Macy 	return (obj);
891eda14cbcSMatt Macy }
892eda14cbcSMatt Macy 
893eda14cbcSMatt Macy int
zap_create_claim(objset_t * os,uint64_t obj,dmu_object_type_t ot,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)894eda14cbcSMatt Macy zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot,
895eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
896eda14cbcSMatt Macy {
897eda14cbcSMatt Macy 	return (zap_create_claim_dnsize(os, obj, ot, bonustype, bonuslen,
898eda14cbcSMatt Macy 	    0, tx));
899eda14cbcSMatt Macy }
900eda14cbcSMatt Macy 
901eda14cbcSMatt Macy int
zap_create_claim_dnsize(objset_t * os,uint64_t obj,dmu_object_type_t ot,dmu_object_type_t bonustype,int bonuslen,int dnodesize,dmu_tx_t * tx)902eda14cbcSMatt Macy zap_create_claim_dnsize(objset_t *os, uint64_t obj, dmu_object_type_t ot,
903eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
904eda14cbcSMatt Macy {
905eda14cbcSMatt Macy 	return (zap_create_claim_norm_dnsize(os, obj,
906eda14cbcSMatt Macy 	    0, ot, bonustype, bonuslen, dnodesize, tx));
907eda14cbcSMatt Macy }
908eda14cbcSMatt Macy 
909eda14cbcSMatt Macy int
zap_create_claim_norm(objset_t * os,uint64_t obj,int normflags,dmu_object_type_t ot,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)910eda14cbcSMatt Macy zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags,
911eda14cbcSMatt Macy     dmu_object_type_t ot,
912eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
913eda14cbcSMatt Macy {
914eda14cbcSMatt Macy 	return (zap_create_claim_norm_dnsize(os, obj, normflags, ot, bonustype,
915eda14cbcSMatt Macy 	    bonuslen, 0, tx));
916eda14cbcSMatt Macy }
917eda14cbcSMatt Macy 
918eda14cbcSMatt Macy int
zap_create_claim_norm_dnsize(objset_t * os,uint64_t obj,int normflags,dmu_object_type_t ot,dmu_object_type_t bonustype,int bonuslen,int dnodesize,dmu_tx_t * tx)919eda14cbcSMatt Macy zap_create_claim_norm_dnsize(objset_t *os, uint64_t obj, int normflags,
920eda14cbcSMatt Macy     dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen,
921eda14cbcSMatt Macy     int dnodesize, dmu_tx_t *tx)
922eda14cbcSMatt Macy {
923eda14cbcSMatt Macy 	dnode_t *dn;
924eda14cbcSMatt Macy 	int error;
925eda14cbcSMatt Macy 
926eda14cbcSMatt Macy 	ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP);
927eda14cbcSMatt Macy 	error = dmu_object_claim_dnsize(os, obj, ot, 0, bonustype, bonuslen,
928eda14cbcSMatt Macy 	    dnodesize, tx);
929eda14cbcSMatt Macy 	if (error != 0)
930eda14cbcSMatt Macy 		return (error);
931eda14cbcSMatt Macy 
932eda14cbcSMatt Macy 	error = dnode_hold(os, obj, FTAG, &dn);
933eda14cbcSMatt Macy 	if (error != 0)
934eda14cbcSMatt Macy 		return (error);
935eda14cbcSMatt Macy 
936eda14cbcSMatt Macy 	mzap_create_impl(dn, normflags, 0, tx);
937eda14cbcSMatt Macy 
938eda14cbcSMatt Macy 	dnode_rele(dn, FTAG);
939eda14cbcSMatt Macy 
940eda14cbcSMatt Macy 	return (0);
941eda14cbcSMatt Macy }
942eda14cbcSMatt Macy 
943eda14cbcSMatt Macy uint64_t
zap_create(objset_t * os,dmu_object_type_t ot,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)944eda14cbcSMatt Macy zap_create(objset_t *os, dmu_object_type_t ot,
945eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
946eda14cbcSMatt Macy {
947eda14cbcSMatt Macy 	return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx));
948eda14cbcSMatt Macy }
949eda14cbcSMatt Macy 
950eda14cbcSMatt Macy uint64_t
zap_create_dnsize(objset_t * os,dmu_object_type_t ot,dmu_object_type_t bonustype,int bonuslen,int dnodesize,dmu_tx_t * tx)951eda14cbcSMatt Macy zap_create_dnsize(objset_t *os, dmu_object_type_t ot,
952eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
953eda14cbcSMatt Macy {
954eda14cbcSMatt Macy 	return (zap_create_norm_dnsize(os, 0, ot, bonustype, bonuslen,
955eda14cbcSMatt Macy 	    dnodesize, tx));
956eda14cbcSMatt Macy }
957eda14cbcSMatt Macy 
958eda14cbcSMatt Macy uint64_t
zap_create_norm(objset_t * os,int normflags,dmu_object_type_t ot,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)959eda14cbcSMatt Macy zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot,
960eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
961eda14cbcSMatt Macy {
962eda14cbcSMatt Macy 	return (zap_create_norm_dnsize(os, normflags, ot, bonustype, bonuslen,
963eda14cbcSMatt Macy 	    0, tx));
964eda14cbcSMatt Macy }
965eda14cbcSMatt Macy 
966eda14cbcSMatt Macy uint64_t
zap_create_norm_dnsize(objset_t * os,int normflags,dmu_object_type_t ot,dmu_object_type_t bonustype,int bonuslen,int dnodesize,dmu_tx_t * tx)967eda14cbcSMatt Macy zap_create_norm_dnsize(objset_t *os, int normflags, dmu_object_type_t ot,
968eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
969eda14cbcSMatt Macy {
970eda14cbcSMatt Macy 	return (zap_create_impl(os, normflags, 0, ot, 0, 0,
971eda14cbcSMatt Macy 	    bonustype, bonuslen, dnodesize, NULL, NULL, tx));
972eda14cbcSMatt Macy }
973eda14cbcSMatt Macy 
974eda14cbcSMatt Macy uint64_t
zap_create_flags(objset_t * os,int normflags,zap_flags_t flags,dmu_object_type_t ot,int leaf_blockshift,int indirect_blockshift,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)975eda14cbcSMatt Macy zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
976eda14cbcSMatt Macy     dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
977eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
978eda14cbcSMatt Macy {
979eda14cbcSMatt Macy 	return (zap_create_flags_dnsize(os, normflags, flags, ot,
980eda14cbcSMatt Macy 	    leaf_blockshift, indirect_blockshift, bonustype, bonuslen, 0, tx));
981eda14cbcSMatt Macy }
982eda14cbcSMatt Macy 
983eda14cbcSMatt Macy uint64_t
zap_create_flags_dnsize(objset_t * os,int normflags,zap_flags_t flags,dmu_object_type_t ot,int leaf_blockshift,int indirect_blockshift,dmu_object_type_t bonustype,int bonuslen,int dnodesize,dmu_tx_t * tx)984eda14cbcSMatt Macy zap_create_flags_dnsize(objset_t *os, int normflags, zap_flags_t flags,
985eda14cbcSMatt Macy     dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
986eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
987eda14cbcSMatt Macy {
988eda14cbcSMatt Macy 	return (zap_create_impl(os, normflags, flags, ot, leaf_blockshift,
989eda14cbcSMatt Macy 	    indirect_blockshift, bonustype, bonuslen, dnodesize, NULL, NULL,
990eda14cbcSMatt Macy 	    tx));
991eda14cbcSMatt Macy }
992eda14cbcSMatt Macy 
993eda14cbcSMatt Macy /*
994eda14cbcSMatt Macy  * Create a zap object and return a pointer to the newly allocated dnode via
995eda14cbcSMatt Macy  * the allocated_dnode argument.  The returned dnode will be held and the
996eda14cbcSMatt Macy  * caller is responsible for releasing the hold by calling dnode_rele().
997eda14cbcSMatt Macy  */
998eda14cbcSMatt Macy uint64_t
zap_create_hold(objset_t * os,int normflags,zap_flags_t flags,dmu_object_type_t ot,int leaf_blockshift,int indirect_blockshift,dmu_object_type_t bonustype,int bonuslen,int dnodesize,dnode_t ** allocated_dnode,const void * tag,dmu_tx_t * tx)999eda14cbcSMatt Macy zap_create_hold(objset_t *os, int normflags, zap_flags_t flags,
1000eda14cbcSMatt Macy     dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
1001eda14cbcSMatt Macy     dmu_object_type_t bonustype, int bonuslen, int dnodesize,
1002a0b956f5SMartin Matuska     dnode_t **allocated_dnode, const void *tag, dmu_tx_t *tx)
1003eda14cbcSMatt Macy {
1004eda14cbcSMatt Macy 	return (zap_create_impl(os, normflags, flags, ot, leaf_blockshift,
1005eda14cbcSMatt Macy 	    indirect_blockshift, bonustype, bonuslen, dnodesize,
1006eda14cbcSMatt Macy 	    allocated_dnode, tag, tx));
1007eda14cbcSMatt Macy }
1008eda14cbcSMatt Macy 
1009eda14cbcSMatt Macy int
zap_destroy(objset_t * os,uint64_t zapobj,dmu_tx_t * tx)1010eda14cbcSMatt Macy zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
1011eda14cbcSMatt Macy {
1012eda14cbcSMatt Macy 	/*
1013eda14cbcSMatt Macy 	 * dmu_object_free will free the object number and free the
1014eda14cbcSMatt Macy 	 * data.  Freeing the data will cause our pageout function to be
1015eda14cbcSMatt Macy 	 * called, which will destroy our data (zap_leaf_t's and zap_t).
1016eda14cbcSMatt Macy 	 */
1017eda14cbcSMatt Macy 
1018eda14cbcSMatt Macy 	return (dmu_object_free(os, zapobj, tx));
1019eda14cbcSMatt Macy }
1020eda14cbcSMatt Macy 
1021eda14cbcSMatt Macy void
zap_evict_sync(void * dbu)1022eda14cbcSMatt Macy zap_evict_sync(void *dbu)
1023eda14cbcSMatt Macy {
1024eda14cbcSMatt Macy 	zap_t *zap = dbu;
1025eda14cbcSMatt Macy 
1026eda14cbcSMatt Macy 	rw_destroy(&zap->zap_rwlock);
1027eda14cbcSMatt Macy 
1028eda14cbcSMatt Macy 	if (zap->zap_ismicro)
1029eda14cbcSMatt Macy 		mze_destroy(zap);
1030eda14cbcSMatt Macy 	else
1031eda14cbcSMatt Macy 		mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
1032eda14cbcSMatt Macy 
1033eda14cbcSMatt Macy 	kmem_free(zap, sizeof (zap_t));
1034eda14cbcSMatt Macy }
1035eda14cbcSMatt Macy 
1036eda14cbcSMatt Macy int
zap_count(objset_t * os,uint64_t zapobj,uint64_t * count)1037eda14cbcSMatt Macy zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
1038eda14cbcSMatt Macy {
1039eda14cbcSMatt Macy 	zap_t *zap;
1040eda14cbcSMatt Macy 
1041eda14cbcSMatt Macy 	int err =
1042eda14cbcSMatt Macy 	    zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
1043eda14cbcSMatt Macy 	if (err != 0)
1044eda14cbcSMatt Macy 		return (err);
1045eda14cbcSMatt Macy 	if (!zap->zap_ismicro) {
1046eda14cbcSMatt Macy 		err = fzap_count(zap, count);
1047eda14cbcSMatt Macy 	} else {
1048eda14cbcSMatt Macy 		*count = zap->zap_m.zap_num_entries;
1049eda14cbcSMatt Macy 	}
1050eda14cbcSMatt Macy 	zap_unlockdir(zap, FTAG);
1051eda14cbcSMatt Macy 	return (err);
1052eda14cbcSMatt Macy }
1053eda14cbcSMatt Macy 
1054eda14cbcSMatt Macy /*
1055eda14cbcSMatt Macy  * zn may be NULL; if not specified, it will be computed if needed.
1056eda14cbcSMatt Macy  * See also the comment above zap_entry_normalization_conflict().
1057eda14cbcSMatt Macy  */
1058eda14cbcSMatt Macy static boolean_t
mzap_normalization_conflict(zap_t * zap,zap_name_t * zn,mzap_ent_t * mze,zfs_btree_index_t * idx)1059dbd5678dSMartin Matuska mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze,
1060dbd5678dSMartin Matuska     zfs_btree_index_t *idx)
1061eda14cbcSMatt Macy {
1062eda14cbcSMatt Macy 	boolean_t allocdzn = B_FALSE;
1063dbd5678dSMartin Matuska 	mzap_ent_t *other;
1064dbd5678dSMartin Matuska 	zfs_btree_index_t oidx;
1065eda14cbcSMatt Macy 
1066eda14cbcSMatt Macy 	if (zap->zap_normflags == 0)
1067eda14cbcSMatt Macy 		return (B_FALSE);
1068eda14cbcSMatt Macy 
1069dbd5678dSMartin Matuska 	for (other = zfs_btree_prev(&zap->zap_m.zap_tree, idx, &oidx);
1070eda14cbcSMatt Macy 	    other && other->mze_hash == mze->mze_hash;
1071dbd5678dSMartin Matuska 	    other = zfs_btree_prev(&zap->zap_m.zap_tree, &oidx, &oidx)) {
1072eda14cbcSMatt Macy 
1073eda14cbcSMatt Macy 		if (zn == NULL) {
1074dbd5678dSMartin Matuska 			zn = zap_name_alloc_str(zap,
1075dbd5678dSMartin Matuska 			    MZE_PHYS(zap, mze)->mze_name, MT_NORMALIZE);
1076eda14cbcSMatt Macy 			allocdzn = B_TRUE;
1077eda14cbcSMatt Macy 		}
1078eda14cbcSMatt Macy 		if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) {
1079eda14cbcSMatt Macy 			if (allocdzn)
1080eda14cbcSMatt Macy 				zap_name_free(zn);
1081eda14cbcSMatt Macy 			return (B_TRUE);
1082eda14cbcSMatt Macy 		}
1083eda14cbcSMatt Macy 	}
1084eda14cbcSMatt Macy 
1085dbd5678dSMartin Matuska 	for (other = zfs_btree_next(&zap->zap_m.zap_tree, idx, &oidx);
1086dbd5678dSMartin Matuska 	    other && other->mze_hash == mze->mze_hash;
1087dbd5678dSMartin Matuska 	    other = zfs_btree_next(&zap->zap_m.zap_tree, &oidx, &oidx)) {
1088dbd5678dSMartin Matuska 
1089dbd5678dSMartin Matuska 		if (zn == NULL) {
1090dbd5678dSMartin Matuska 			zn = zap_name_alloc_str(zap,
1091dbd5678dSMartin Matuska 			    MZE_PHYS(zap, mze)->mze_name, MT_NORMALIZE);
1092dbd5678dSMartin Matuska 			allocdzn = B_TRUE;
1093dbd5678dSMartin Matuska 		}
1094dbd5678dSMartin Matuska 		if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) {
1095dbd5678dSMartin Matuska 			if (allocdzn)
1096dbd5678dSMartin Matuska 				zap_name_free(zn);
1097dbd5678dSMartin Matuska 			return (B_TRUE);
1098dbd5678dSMartin Matuska 		}
1099eda14cbcSMatt Macy 	}
1100eda14cbcSMatt Macy 
1101eda14cbcSMatt Macy 	if (allocdzn)
1102eda14cbcSMatt Macy 		zap_name_free(zn);
1103eda14cbcSMatt Macy 	return (B_FALSE);
1104eda14cbcSMatt Macy }
1105eda14cbcSMatt Macy 
1106eda14cbcSMatt Macy /*
1107eda14cbcSMatt Macy  * Routines for manipulating attributes.
1108eda14cbcSMatt Macy  */
1109eda14cbcSMatt Macy 
1110eda14cbcSMatt Macy int
zap_lookup(objset_t * os,uint64_t zapobj,const char * name,uint64_t integer_size,uint64_t num_integers,void * buf)1111eda14cbcSMatt Macy zap_lookup(objset_t *os, uint64_t zapobj, const char *name,
1112eda14cbcSMatt Macy     uint64_t integer_size, uint64_t num_integers, void *buf)
1113eda14cbcSMatt Macy {
1114eda14cbcSMatt Macy 	return (zap_lookup_norm(os, zapobj, name, integer_size,
1115eda14cbcSMatt Macy 	    num_integers, buf, 0, NULL, 0, NULL));
1116eda14cbcSMatt Macy }
1117eda14cbcSMatt Macy 
1118eda14cbcSMatt Macy static int
zap_lookup_impl(zap_t * zap,const char * name,uint64_t integer_size,uint64_t num_integers,void * buf,matchtype_t mt,char * realname,int rn_len,boolean_t * ncp)1119eda14cbcSMatt Macy zap_lookup_impl(zap_t *zap, const char *name,
1120eda14cbcSMatt Macy     uint64_t integer_size, uint64_t num_integers, void *buf,
1121eda14cbcSMatt Macy     matchtype_t mt, char *realname, int rn_len,
1122eda14cbcSMatt Macy     boolean_t *ncp)
1123eda14cbcSMatt Macy {
1124eda14cbcSMatt Macy 	int err = 0;
1125eda14cbcSMatt Macy 
1126dbd5678dSMartin Matuska 	zap_name_t *zn = zap_name_alloc_str(zap, name, mt);
1127eda14cbcSMatt Macy 	if (zn == NULL)
1128eda14cbcSMatt Macy 		return (SET_ERROR(ENOTSUP));
1129eda14cbcSMatt Macy 
1130eda14cbcSMatt Macy 	if (!zap->zap_ismicro) {
1131eda14cbcSMatt Macy 		err = fzap_lookup(zn, integer_size, num_integers, buf,
1132eda14cbcSMatt Macy 		    realname, rn_len, ncp);
1133eda14cbcSMatt Macy 	} else {
1134dbd5678dSMartin Matuska 		zfs_btree_index_t idx;
1135dbd5678dSMartin Matuska 		mzap_ent_t *mze = mze_find(zn, &idx);
1136eda14cbcSMatt Macy 		if (mze == NULL) {
1137eda14cbcSMatt Macy 			err = SET_ERROR(ENOENT);
1138eda14cbcSMatt Macy 		} else {
1139eda14cbcSMatt Macy 			if (num_integers < 1) {
1140eda14cbcSMatt Macy 				err = SET_ERROR(EOVERFLOW);
1141eda14cbcSMatt Macy 			} else if (integer_size != 8) {
1142eda14cbcSMatt Macy 				err = SET_ERROR(EINVAL);
1143eda14cbcSMatt Macy 			} else {
1144eda14cbcSMatt Macy 				*(uint64_t *)buf =
1145eda14cbcSMatt Macy 				    MZE_PHYS(zap, mze)->mze_value;
1146dbd5678dSMartin Matuska 				if (realname != NULL)
1147eda14cbcSMatt Macy 					(void) strlcpy(realname,
1148dbd5678dSMartin Matuska 					    MZE_PHYS(zap, mze)->mze_name,
1149dbd5678dSMartin Matuska 					    rn_len);
1150eda14cbcSMatt Macy 				if (ncp) {
1151eda14cbcSMatt Macy 					*ncp = mzap_normalization_conflict(zap,
1152dbd5678dSMartin Matuska 					    zn, mze, &idx);
1153eda14cbcSMatt Macy 				}
1154eda14cbcSMatt Macy 			}
1155eda14cbcSMatt Macy 		}
1156eda14cbcSMatt Macy 	}
1157eda14cbcSMatt Macy 	zap_name_free(zn);
1158eda14cbcSMatt Macy 	return (err);
1159eda14cbcSMatt Macy }
1160eda14cbcSMatt Macy 
1161eda14cbcSMatt Macy int
zap_lookup_norm(objset_t * os,uint64_t zapobj,const char * name,uint64_t integer_size,uint64_t num_integers,void * buf,matchtype_t mt,char * realname,int rn_len,boolean_t * ncp)1162eda14cbcSMatt Macy zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
1163eda14cbcSMatt Macy     uint64_t integer_size, uint64_t num_integers, void *buf,
1164eda14cbcSMatt Macy     matchtype_t mt, char *realname, int rn_len,
1165eda14cbcSMatt Macy     boolean_t *ncp)
1166eda14cbcSMatt Macy {
1167eda14cbcSMatt Macy 	zap_t *zap;
1168eda14cbcSMatt Macy 
1169eda14cbcSMatt Macy 	int err =
1170eda14cbcSMatt Macy 	    zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
1171eda14cbcSMatt Macy 	if (err != 0)
1172eda14cbcSMatt Macy 		return (err);
1173eda14cbcSMatt Macy 	err = zap_lookup_impl(zap, name, integer_size,
1174eda14cbcSMatt Macy 	    num_integers, buf, mt, realname, rn_len, ncp);
1175eda14cbcSMatt Macy 	zap_unlockdir(zap, FTAG);
1176eda14cbcSMatt Macy 	return (err);
1177eda14cbcSMatt Macy }
1178eda14cbcSMatt Macy 
1179eda14cbcSMatt Macy int
zap_prefetch(objset_t * os,uint64_t zapobj,const char * name)1180eda14cbcSMatt Macy zap_prefetch(objset_t *os, uint64_t zapobj, const char *name)
1181eda14cbcSMatt Macy {
1182eda14cbcSMatt Macy 	zap_t *zap;
1183eda14cbcSMatt Macy 	int err;
1184eda14cbcSMatt Macy 	zap_name_t *zn;
1185eda14cbcSMatt Macy 
1186eda14cbcSMatt Macy 	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
1187eda14cbcSMatt Macy 	if (err)
1188eda14cbcSMatt Macy 		return (err);
1189dbd5678dSMartin Matuska 	zn = zap_name_alloc_str(zap, name, 0);
1190eda14cbcSMatt Macy 	if (zn == NULL) {
1191eda14cbcSMatt Macy 		zap_unlockdir(zap, FTAG);
1192eda14cbcSMatt Macy 		return (SET_ERROR(ENOTSUP));
1193eda14cbcSMatt Macy 	}
1194eda14cbcSMatt Macy 
1195eda14cbcSMatt Macy 	fzap_prefetch(zn);
1196eda14cbcSMatt Macy 	zap_name_free(zn);
1197eda14cbcSMatt Macy 	zap_unlockdir(zap, FTAG);
1198eda14cbcSMatt Macy 	return (err);
1199eda14cbcSMatt Macy }
1200eda14cbcSMatt Macy 
1201eda14cbcSMatt Macy int
zap_prefetch_object(objset_t * os,uint64_t zapobj)1202ce4dcb97SMartin Matuska zap_prefetch_object(objset_t *os, uint64_t zapobj)
1203ce4dcb97SMartin Matuska {
1204ce4dcb97SMartin Matuska 	int error;
1205ce4dcb97SMartin Matuska 	dmu_object_info_t doi;
1206ce4dcb97SMartin Matuska 
1207ce4dcb97SMartin Matuska 	error = dmu_object_info(os, zapobj, &doi);
1208ce4dcb97SMartin Matuska 	if (error == 0 && DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP)
1209ce4dcb97SMartin Matuska 		error = SET_ERROR(EINVAL);
1210ce4dcb97SMartin Matuska 	if (error == 0)
1211ce4dcb97SMartin Matuska 		dmu_prefetch_wait(os, zapobj, 0, doi.doi_max_offset);
1212ce4dcb97SMartin Matuska 
1213ce4dcb97SMartin Matuska 	return (error);
1214ce4dcb97SMartin Matuska }
1215ce4dcb97SMartin Matuska 
1216ce4dcb97SMartin Matuska int
zap_lookup_by_dnode(dnode_t * dn,const char * name,uint64_t integer_size,uint64_t num_integers,void * buf)1217eda14cbcSMatt Macy zap_lookup_by_dnode(dnode_t *dn, const char *name,
1218eda14cbcSMatt Macy     uint64_t integer_size, uint64_t num_integers, void *buf)
1219eda14cbcSMatt Macy {
1220eda14cbcSMatt Macy 	return (zap_lookup_norm_by_dnode(dn, name, integer_size,
1221eda14cbcSMatt Macy 	    num_integers, buf, 0, NULL, 0, NULL));
1222eda14cbcSMatt Macy }
1223eda14cbcSMatt Macy 
1224eda14cbcSMatt Macy int
zap_lookup_norm_by_dnode(dnode_t * dn,const char * name,uint64_t integer_size,uint64_t num_integers,void * buf,matchtype_t mt,char * realname,int rn_len,boolean_t * ncp)1225eda14cbcSMatt Macy zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
1226eda14cbcSMatt Macy     uint64_t integer_size, uint64_t num_integers, void *buf,
1227eda14cbcSMatt Macy     matchtype_t mt, char *realname, int rn_len,
1228eda14cbcSMatt Macy     boolean_t *ncp)
1229eda14cbcSMatt Macy {
1230eda14cbcSMatt Macy 	zap_t *zap;
1231eda14cbcSMatt Macy 
1232eda14cbcSMatt Macy 	int err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
1233eda14cbcSMatt Macy 	    FTAG, &zap);
1234eda14cbcSMatt Macy 	if (err != 0)
1235eda14cbcSMatt Macy 		return (err);
1236eda14cbcSMatt Macy 	err = zap_lookup_impl(zap, name, integer_size,
1237eda14cbcSMatt Macy 	    num_integers, buf, mt, realname, rn_len, ncp);
1238eda14cbcSMatt Macy 	zap_unlockdir(zap, FTAG);
1239eda14cbcSMatt Macy 	return (err);
1240eda14cbcSMatt Macy }
1241eda14cbcSMatt Macy 
1242718519f4SMartin Matuska static int
zap_prefetch_uint64_impl(zap_t * zap,const uint64_t * key,int key_numints)1243718519f4SMartin Matuska zap_prefetch_uint64_impl(zap_t *zap, const uint64_t *key, int key_numints)
1244718519f4SMartin Matuska {
1245718519f4SMartin Matuska 	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
1246718519f4SMartin Matuska 	if (zn == NULL) {
1247718519f4SMartin Matuska 		zap_unlockdir(zap, FTAG);
1248718519f4SMartin Matuska 		return (SET_ERROR(ENOTSUP));
1249718519f4SMartin Matuska 	}
1250718519f4SMartin Matuska 
1251718519f4SMartin Matuska 	fzap_prefetch(zn);
1252718519f4SMartin Matuska 	zap_name_free(zn);
1253718519f4SMartin Matuska 	zap_unlockdir(zap, FTAG);
1254718519f4SMartin Matuska 	return (0);
1255718519f4SMartin Matuska }
1256718519f4SMartin Matuska 
1257eda14cbcSMatt Macy int
zap_prefetch_uint64(objset_t * os,uint64_t zapobj,const uint64_t * key,int key_numints)1258eda14cbcSMatt Macy zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1259eda14cbcSMatt Macy     int key_numints)
1260eda14cbcSMatt Macy {
1261eda14cbcSMatt Macy 	zap_t *zap;
1262eda14cbcSMatt Macy 
1263eda14cbcSMatt Macy 	int err =
1264eda14cbcSMatt Macy 	    zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
1265eda14cbcSMatt Macy 	if (err != 0)
1266eda14cbcSMatt Macy 		return (err);
1267718519f4SMartin Matuska 	err = zap_prefetch_uint64_impl(zap, key, key_numints);
1268718519f4SMartin Matuska 	/* zap_prefetch_uint64_impl() calls zap_unlockdir() */
1269718519f4SMartin Matuska 	return (err);
1270718519f4SMartin Matuska }
1271718519f4SMartin Matuska 
1272718519f4SMartin Matuska int
zap_prefetch_uint64_by_dnode(dnode_t * dn,const uint64_t * key,int key_numints)1273718519f4SMartin Matuska zap_prefetch_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints)
1274718519f4SMartin Matuska {
1275718519f4SMartin Matuska 	zap_t *zap;
1276718519f4SMartin Matuska 
1277718519f4SMartin Matuska 	int err =
1278718519f4SMartin Matuska 	    zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
1279718519f4SMartin Matuska 	if (err != 0)
1280718519f4SMartin Matuska 		return (err);
1281718519f4SMartin Matuska 	err = zap_prefetch_uint64_impl(zap, key, key_numints);
1282718519f4SMartin Matuska 	/* zap_prefetch_uint64_impl() calls zap_unlockdir() */
1283718519f4SMartin Matuska 	return (err);
1284718519f4SMartin Matuska }
1285718519f4SMartin Matuska 
1286718519f4SMartin Matuska static int
zap_lookup_uint64_impl(zap_t * zap,const uint64_t * key,int key_numints,uint64_t integer_size,uint64_t num_integers,void * buf)1287718519f4SMartin Matuska zap_lookup_uint64_impl(zap_t *zap, const uint64_t *key,
1288718519f4SMartin Matuska     int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)
1289718519f4SMartin Matuska {
1290eda14cbcSMatt Macy 	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
1291eda14cbcSMatt Macy 	if (zn == NULL) {
1292eda14cbcSMatt Macy 		zap_unlockdir(zap, FTAG);
1293eda14cbcSMatt Macy 		return (SET_ERROR(ENOTSUP));
1294eda14cbcSMatt Macy 	}
1295eda14cbcSMatt Macy 
1296718519f4SMartin Matuska 	int err = fzap_lookup(zn, integer_size, num_integers, buf,
1297718519f4SMartin Matuska 	    NULL, 0, NULL);
1298eda14cbcSMatt Macy 	zap_name_free(zn);
1299eda14cbcSMatt Macy 	zap_unlockdir(zap, FTAG);
1300eda14cbcSMatt Macy 	return (err);
1301eda14cbcSMatt Macy }
1302eda14cbcSMatt Macy 
1303eda14cbcSMatt Macy int
zap_lookup_uint64(objset_t * os,uint64_t zapobj,const uint64_t * key,int key_numints,uint64_t integer_size,uint64_t num_integers,void * buf)1304eda14cbcSMatt Macy zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1305eda14cbcSMatt Macy     int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)
1306eda14cbcSMatt Macy {
1307eda14cbcSMatt Macy 	zap_t *zap;
1308eda14cbcSMatt Macy 
1309eda14cbcSMatt Macy 	int err =
1310eda14cbcSMatt Macy 	    zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
1311eda14cbcSMatt Macy 	if (err != 0)
1312eda14cbcSMatt Macy 		return (err);
1313718519f4SMartin Matuska 	err = zap_lookup_uint64_impl(zap, key, key_numints, integer_size,
1314718519f4SMartin Matuska 	    num_integers, buf);
1315718519f4SMartin Matuska 	/* zap_lookup_uint64_impl() calls zap_unlockdir() */
1316718519f4SMartin Matuska 	return (err);
1317eda14cbcSMatt Macy }
1318eda14cbcSMatt Macy 
1319718519f4SMartin Matuska int
zap_lookup_uint64_by_dnode(dnode_t * dn,const uint64_t * key,int key_numints,uint64_t integer_size,uint64_t num_integers,void * buf)1320718519f4SMartin Matuska zap_lookup_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
1321718519f4SMartin Matuska     int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)
1322718519f4SMartin Matuska {
1323718519f4SMartin Matuska 	zap_t *zap;
1324718519f4SMartin Matuska 
1325718519f4SMartin Matuska 	int err =
1326718519f4SMartin Matuska 	    zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
1327718519f4SMartin Matuska 	if (err != 0)
1328718519f4SMartin Matuska 		return (err);
1329718519f4SMartin Matuska 	err = zap_lookup_uint64_impl(zap, key, key_numints, integer_size,
1330718519f4SMartin Matuska 	    num_integers, buf);
1331718519f4SMartin Matuska 	/* zap_lookup_uint64_impl() calls zap_unlockdir() */
1332eda14cbcSMatt Macy 	return (err);
1333eda14cbcSMatt Macy }
1334eda14cbcSMatt Macy 
1335eda14cbcSMatt Macy int
zap_contains(objset_t * os,uint64_t zapobj,const char * name)1336eda14cbcSMatt Macy zap_contains(objset_t *os, uint64_t zapobj, const char *name)
1337eda14cbcSMatt Macy {
1338eda14cbcSMatt Macy 	int err = zap_lookup_norm(os, zapobj, name, 0,
1339eda14cbcSMatt Macy 	    0, NULL, 0, NULL, 0, NULL);
1340eda14cbcSMatt Macy 	if (err == EOVERFLOW || err == EINVAL)
1341eda14cbcSMatt Macy 		err = 0; /* found, but skipped reading the value */
1342eda14cbcSMatt Macy 	return (err);
1343eda14cbcSMatt Macy }
1344eda14cbcSMatt Macy 
1345eda14cbcSMatt Macy int
zap_length(objset_t * os,uint64_t zapobj,const char * name,uint64_t * integer_size,uint64_t * num_integers)1346eda14cbcSMatt Macy zap_length(objset_t *os, uint64_t zapobj, const char *name,
1347eda14cbcSMatt Macy     uint64_t *integer_size, uint64_t *num_integers)
1348eda14cbcSMatt Macy {
1349eda14cbcSMatt Macy 	zap_t *zap;
1350eda14cbcSMatt Macy 
1351eda14cbcSMatt Macy 	int err =
1352eda14cbcSMatt Macy 	    zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
1353eda14cbcSMatt Macy 	if (err != 0)
1354eda14cbcSMatt Macy 		return (err);
1355dbd5678dSMartin Matuska 	zap_name_t *zn = zap_name_alloc_str(zap, name, 0);
1356eda14cbcSMatt Macy 	if (zn == NULL) {
1357eda14cbcSMatt Macy 		zap_unlockdir(zap, FTAG);
1358eda14cbcSMatt Macy 		return (SET_ERROR(ENOTSUP));
1359eda14cbcSMatt Macy 	}
1360eda14cbcSMatt Macy 	if (!zap->zap_ismicro) {
1361eda14cbcSMatt Macy 		err = fzap_length(zn, integer_size, num_integers);
1362eda14cbcSMatt Macy 	} else {
1363dbd5678dSMartin Matuska 		zfs_btree_index_t idx;
1364dbd5678dSMartin Matuska 		mzap_ent_t *mze = mze_find(zn, &idx);
1365eda14cbcSMatt Macy 		if (mze == NULL) {
1366eda14cbcSMatt Macy 			err = SET_ERROR(ENOENT);
1367eda14cbcSMatt Macy 		} else {
1368eda14cbcSMatt Macy 			if (integer_size)
1369eda14cbcSMatt Macy 				*integer_size = 8;
1370eda14cbcSMatt Macy 			if (num_integers)
1371eda14cbcSMatt Macy 				*num_integers = 1;
1372eda14cbcSMatt Macy 		}
1373eda14cbcSMatt Macy 	}
1374eda14cbcSMatt Macy 	zap_name_free(zn);
1375eda14cbcSMatt Macy 	zap_unlockdir(zap, FTAG);
1376eda14cbcSMatt Macy 	return (err);
1377eda14cbcSMatt Macy }
1378eda14cbcSMatt Macy 
1379eda14cbcSMatt Macy int
zap_length_uint64(objset_t * os,uint64_t zapobj,const uint64_t * key,int key_numints,uint64_t * integer_size,uint64_t * num_integers)1380eda14cbcSMatt Macy zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1381eda14cbcSMatt Macy     int key_numints, uint64_t *integer_size, uint64_t *num_integers)
1382eda14cbcSMatt Macy {
1383eda14cbcSMatt Macy 	zap_t *zap;
1384eda14cbcSMatt Macy 
1385eda14cbcSMatt Macy 	int err =
1386eda14cbcSMatt Macy 	    zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
1387eda14cbcSMatt Macy 	if (err != 0)
1388eda14cbcSMatt Macy 		return (err);
1389eda14cbcSMatt Macy 	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
1390eda14cbcSMatt Macy 	if (zn == NULL) {
1391eda14cbcSMatt Macy 		zap_unlockdir(zap, FTAG);
1392eda14cbcSMatt Macy 		return (SET_ERROR(ENOTSUP));
1393eda14cbcSMatt Macy 	}
1394eda14cbcSMatt Macy 	err = fzap_length(zn, integer_size, num_integers);
1395eda14cbcSMatt Macy 	zap_name_free(zn);
1396eda14cbcSMatt Macy 	zap_unlockdir(zap, FTAG);
1397eda14cbcSMatt Macy 	return (err);
1398eda14cbcSMatt Macy }
1399eda14cbcSMatt Macy 
1400eda14cbcSMatt Macy static void
mzap_addent(zap_name_t * zn,uint64_t value)1401eda14cbcSMatt Macy mzap_addent(zap_name_t *zn, uint64_t value)
1402eda14cbcSMatt Macy {
1403eda14cbcSMatt Macy 	zap_t *zap = zn->zn_zap;
1404dbd5678dSMartin Matuska 	uint16_t start = zap->zap_m.zap_alloc_next;
1405eda14cbcSMatt Macy 
1406eda14cbcSMatt Macy 	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
1407eda14cbcSMatt Macy 
1408eda14cbcSMatt Macy #ifdef ZFS_DEBUG
1409eda14cbcSMatt Macy 	for (int i = 0; i < zap->zap_m.zap_num_chunks; i++) {
1410eda14cbcSMatt Macy 		mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i];
1411eda14cbcSMatt Macy 		ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0);
1412eda14cbcSMatt Macy 	}
1413eda14cbcSMatt Macy #endif
1414eda14cbcSMatt Macy 
1415eda14cbcSMatt Macy 	uint32_t cd = mze_find_unused_cd(zap, zn->zn_hash);
1416eda14cbcSMatt Macy 	/* given the limited size of the microzap, this can't happen */
1417eda14cbcSMatt Macy 	ASSERT(cd < zap_maxcd(zap));
1418eda14cbcSMatt Macy 
1419eda14cbcSMatt Macy again:
1420dbd5678dSMartin Matuska 	for (uint16_t i = start; i < zap->zap_m.zap_num_chunks; i++) {
1421eda14cbcSMatt Macy 		mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i];
1422eda14cbcSMatt Macy 		if (mze->mze_name[0] == 0) {
1423eda14cbcSMatt Macy 			mze->mze_value = value;
1424eda14cbcSMatt Macy 			mze->mze_cd = cd;
1425eda14cbcSMatt Macy 			(void) strlcpy(mze->mze_name, zn->zn_key_orig,
1426eda14cbcSMatt Macy 			    sizeof (mze->mze_name));
1427eda14cbcSMatt Macy 			zap->zap_m.zap_num_entries++;
1428eda14cbcSMatt Macy 			zap->zap_m.zap_alloc_next = i+1;
1429eda14cbcSMatt Macy 			if (zap->zap_m.zap_alloc_next ==
1430eda14cbcSMatt Macy 			    zap->zap_m.zap_num_chunks)
1431eda14cbcSMatt Macy 				zap->zap_m.zap_alloc_next = 0;
1432eda14cbcSMatt Macy 			mze_insert(zap, i, zn->zn_hash);
1433eda14cbcSMatt Macy 			return;
1434eda14cbcSMatt Macy 		}
1435eda14cbcSMatt Macy 	}
1436eda14cbcSMatt Macy 	if (start != 0) {
1437eda14cbcSMatt Macy 		start = 0;
1438eda14cbcSMatt Macy 		goto again;
1439eda14cbcSMatt Macy 	}
1440eda14cbcSMatt Macy 	cmn_err(CE_PANIC, "out of entries!");
1441eda14cbcSMatt Macy }
1442eda14cbcSMatt Macy 
1443eda14cbcSMatt Macy static int
zap_add_impl(zap_t * zap,const char * key,int integer_size,uint64_t num_integers,const void * val,dmu_tx_t * tx,const void * tag)1444eda14cbcSMatt Macy zap_add_impl(zap_t *zap, const char *key,
1445eda14cbcSMatt Macy     int integer_size, uint64_t num_integers,
1446a0b956f5SMartin Matuska     const void *val, dmu_tx_t *tx, const void *tag)
1447eda14cbcSMatt Macy {
1448eda14cbcSMatt Macy 	const uint64_t *intval = val;
1449eda14cbcSMatt Macy 	int err = 0;
1450eda14cbcSMatt Macy 
1451dbd5678dSMartin Matuska 	zap_name_t *zn = zap_name_alloc_str(zap, key, 0);
1452eda14cbcSMatt Macy 	if (zn == NULL) {
1453eda14cbcSMatt Macy 		zap_unlockdir(zap, tag);
1454eda14cbcSMatt Macy 		return (SET_ERROR(ENOTSUP));
1455eda14cbcSMatt Macy 	}
1456eda14cbcSMatt Macy 	if (!zap->zap_ismicro) {
1457eda14cbcSMatt Macy 		err = fzap_add(zn, integer_size, num_integers, val, tag, tx);
1458eda14cbcSMatt Macy 		zap = zn->zn_zap;	/* fzap_add() may change zap */
1459eda14cbcSMatt Macy 	} else if (integer_size != 8 || num_integers != 1 ||
1460eda14cbcSMatt Macy 	    strlen(key) >= MZAP_NAME_LEN ||
1461eda14cbcSMatt Macy 	    !mze_canfit_fzap_leaf(zn, zn->zn_hash)) {
1462eda14cbcSMatt Macy 		err = mzap_upgrade(&zn->zn_zap, tag, tx, 0);
1463eda14cbcSMatt Macy 		if (err == 0) {
1464eda14cbcSMatt Macy 			err = fzap_add(zn, integer_size, num_integers, val,
1465eda14cbcSMatt Macy 			    tag, tx);
1466eda14cbcSMatt Macy 		}
1467eda14cbcSMatt Macy 		zap = zn->zn_zap;	/* fzap_add() may change zap */
1468eda14cbcSMatt Macy 	} else {
1469dbd5678dSMartin Matuska 		zfs_btree_index_t idx;
1470dbd5678dSMartin Matuska 		if (mze_find(zn, &idx) != NULL) {
1471eda14cbcSMatt Macy 			err = SET_ERROR(EEXIST);
1472eda14cbcSMatt Macy 		} else {
1473eda14cbcSMatt Macy 			mzap_addent(zn, *intval);
1474eda14cbcSMatt Macy 		}
1475eda14cbcSMatt Macy 	}
1476eda14cbcSMatt Macy 	ASSERT(zap == zn->zn_zap);
1477eda14cbcSMatt Macy 	zap_name_free(zn);
1478eda14cbcSMatt Macy 	if (zap != NULL)	/* may be NULL if fzap_add() failed */
1479eda14cbcSMatt Macy 		zap_unlockdir(zap, tag);
1480eda14cbcSMatt Macy 	return (err);
1481eda14cbcSMatt Macy }
1482eda14cbcSMatt Macy 
1483eda14cbcSMatt Macy int
zap_add(objset_t * os,uint64_t zapobj,const char * key,int integer_size,uint64_t num_integers,const void * val,dmu_tx_t * tx)1484eda14cbcSMatt Macy zap_add(objset_t *os, uint64_t zapobj, const char *key,
1485eda14cbcSMatt Macy     int integer_size, uint64_t num_integers,
1486eda14cbcSMatt Macy     const void *val, dmu_tx_t *tx)
1487eda14cbcSMatt Macy {
1488eda14cbcSMatt Macy 	zap_t *zap;
1489eda14cbcSMatt Macy 	int err;
1490eda14cbcSMatt Macy 
1491eda14cbcSMatt Macy 	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
1492eda14cbcSMatt Macy 	if (err != 0)
1493eda14cbcSMatt Macy 		return (err);
1494eda14cbcSMatt Macy 	err = zap_add_impl(zap, key, integer_size, num_integers, val, tx, FTAG);
1495eda14cbcSMatt Macy 	/* zap_add_impl() calls zap_unlockdir() */
1496eda14cbcSMatt Macy 	return (err);
1497eda14cbcSMatt Macy }
1498eda14cbcSMatt Macy 
1499eda14cbcSMatt Macy int
zap_add_by_dnode(dnode_t * dn,const char * key,int integer_size,uint64_t num_integers,const void * val,dmu_tx_t * tx)1500eda14cbcSMatt Macy zap_add_by_dnode(dnode_t *dn, const char *key,
1501eda14cbcSMatt Macy     int integer_size, uint64_t num_integers,
1502eda14cbcSMatt Macy     const void *val, dmu_tx_t *tx)
1503eda14cbcSMatt Macy {
1504eda14cbcSMatt Macy 	zap_t *zap;
1505eda14cbcSMatt Macy 	int err;
1506eda14cbcSMatt Macy 
1507eda14cbcSMatt Macy 	err = zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
1508eda14cbcSMatt Macy 	if (err != 0)
1509eda14cbcSMatt Macy 		return (err);
1510eda14cbcSMatt Macy 	err = zap_add_impl(zap, key, integer_size, num_integers, val, tx, FTAG);
1511eda14cbcSMatt Macy 	/* zap_add_impl() calls zap_unlockdir() */
1512eda14cbcSMatt Macy 	return (err);
1513eda14cbcSMatt Macy }
1514eda14cbcSMatt Macy 
1515783d3ff6SMartin Matuska static int
zap_add_uint64_impl(zap_t * zap,const uint64_t * key,int key_numints,int integer_size,uint64_t num_integers,const void * val,dmu_tx_t * tx,const void * tag)1516783d3ff6SMartin Matuska zap_add_uint64_impl(zap_t *zap, const uint64_t *key,
1517783d3ff6SMartin Matuska     int key_numints, int integer_size, uint64_t num_integers,
1518783d3ff6SMartin Matuska     const void *val, dmu_tx_t *tx, const void *tag)
1519783d3ff6SMartin Matuska {
1520783d3ff6SMartin Matuska 	int err;
1521783d3ff6SMartin Matuska 
1522783d3ff6SMartin Matuska 	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
1523783d3ff6SMartin Matuska 	if (zn == NULL) {
1524783d3ff6SMartin Matuska 		zap_unlockdir(zap, tag);
1525783d3ff6SMartin Matuska 		return (SET_ERROR(ENOTSUP));
1526783d3ff6SMartin Matuska 	}
1527783d3ff6SMartin Matuska 	err = fzap_add(zn, integer_size, num_integers, val, tag, tx);
1528783d3ff6SMartin Matuska 	zap = zn->zn_zap;	/* fzap_add() may change zap */
1529783d3ff6SMartin Matuska 	zap_name_free(zn);
1530783d3ff6SMartin Matuska 	if (zap != NULL)	/* may be NULL if fzap_add() failed */
1531783d3ff6SMartin Matuska 		zap_unlockdir(zap, tag);
1532783d3ff6SMartin Matuska 	return (err);
1533783d3ff6SMartin Matuska }
1534783d3ff6SMartin Matuska 
1535eda14cbcSMatt Macy int
zap_add_uint64(objset_t * os,uint64_t zapobj,const uint64_t * key,int key_numints,int integer_size,uint64_t num_integers,const void * val,dmu_tx_t * tx)1536eda14cbcSMatt Macy zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1537eda14cbcSMatt Macy     int key_numints, int integer_size, uint64_t num_integers,
1538eda14cbcSMatt Macy     const void *val, dmu_tx_t *tx)
1539eda14cbcSMatt Macy {
1540eda14cbcSMatt Macy 	zap_t *zap;
1541eda14cbcSMatt Macy 
1542eda14cbcSMatt Macy 	int err =
1543eda14cbcSMatt Macy 	    zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
1544eda14cbcSMatt Macy 	if (err != 0)
1545eda14cbcSMatt Macy 		return (err);
1546783d3ff6SMartin Matuska 	err = zap_add_uint64_impl(zap, key, key_numints,
1547783d3ff6SMartin Matuska 	    integer_size, num_integers, val, tx, FTAG);
1548783d3ff6SMartin Matuska 	/* zap_add_uint64_impl() calls zap_unlockdir() */
1549783d3ff6SMartin Matuska 	return (err);
1550eda14cbcSMatt Macy }
1551783d3ff6SMartin Matuska 
1552783d3ff6SMartin Matuska int
zap_add_uint64_by_dnode(dnode_t * dn,const uint64_t * key,int key_numints,int integer_size,uint64_t num_integers,const void * val,dmu_tx_t * tx)1553783d3ff6SMartin Matuska zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
1554783d3ff6SMartin Matuska     int key_numints, int integer_size, uint64_t num_integers,
1555783d3ff6SMartin Matuska     const void *val, dmu_tx_t *tx)
1556783d3ff6SMartin Matuska {
1557783d3ff6SMartin Matuska 	zap_t *zap;
1558783d3ff6SMartin Matuska 
1559783d3ff6SMartin Matuska 	int err =
1560783d3ff6SMartin Matuska 	    zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
1561783d3ff6SMartin Matuska 	if (err != 0)
1562783d3ff6SMartin Matuska 		return (err);
1563783d3ff6SMartin Matuska 	err = zap_add_uint64_impl(zap, key, key_numints,
1564783d3ff6SMartin Matuska 	    integer_size, num_integers, val, tx, FTAG);
1565783d3ff6SMartin Matuska 	/* zap_add_uint64_impl() calls zap_unlockdir() */
1566eda14cbcSMatt Macy 	return (err);
1567eda14cbcSMatt Macy }
1568eda14cbcSMatt Macy 
1569eda14cbcSMatt Macy int
zap_update(objset_t * os,uint64_t zapobj,const char * name,int integer_size,uint64_t num_integers,const void * val,dmu_tx_t * tx)1570eda14cbcSMatt Macy zap_update(objset_t *os, uint64_t zapobj, const char *name,
1571eda14cbcSMatt Macy     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
1572eda14cbcSMatt Macy {
1573eda14cbcSMatt Macy 	zap_t *zap;
1574eda14cbcSMatt Macy 	const uint64_t *intval = val;
1575eda14cbcSMatt Macy 
1576eda14cbcSMatt Macy 	int err =
1577eda14cbcSMatt Macy 	    zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
1578eda14cbcSMatt Macy 	if (err != 0)
1579eda14cbcSMatt Macy 		return (err);
1580dbd5678dSMartin Matuska 	zap_name_t *zn = zap_name_alloc_str(zap, name, 0);
1581eda14cbcSMatt Macy 	if (zn == NULL) {
1582eda14cbcSMatt Macy 		zap_unlockdir(zap, FTAG);
1583eda14cbcSMatt Macy 		return (SET_ERROR(ENOTSUP));
1584eda14cbcSMatt Macy 	}
1585eda14cbcSMatt Macy 	if (!zap->zap_ismicro) {
1586eda14cbcSMatt Macy 		err = fzap_update(zn, integer_size, num_integers, val,
1587eda14cbcSMatt Macy 		    FTAG, tx);
1588eda14cbcSMatt Macy 		zap = zn->zn_zap;	/* fzap_update() may change zap */
1589eda14cbcSMatt Macy 	} else if (integer_size != 8 || num_integers != 1 ||
1590eda14cbcSMatt Macy 	    strlen(name) >= MZAP_NAME_LEN) {
1591eda14cbcSMatt Macy 		dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
159233b8c039SMartin Matuska 		    (u_longlong_t)zapobj, integer_size,
159333b8c039SMartin Matuska 		    (u_longlong_t)num_integers, name);
1594eda14cbcSMatt Macy 		err = mzap_upgrade(&zn->zn_zap, FTAG, tx, 0);
1595eda14cbcSMatt Macy 		if (err == 0) {
1596eda14cbcSMatt Macy 			err = fzap_update(zn, integer_size, num_integers,
1597eda14cbcSMatt Macy 			    val, FTAG, tx);
1598eda14cbcSMatt Macy 		}
1599eda14cbcSMatt Macy 		zap = zn->zn_zap;	/* fzap_update() may change zap */
1600eda14cbcSMatt Macy 	} else {
1601dbd5678dSMartin Matuska 		zfs_btree_index_t idx;
1602dbd5678dSMartin Matuska 		mzap_ent_t *mze = mze_find(zn, &idx);
1603eda14cbcSMatt Macy 		if (mze != NULL) {
1604eda14cbcSMatt Macy 			MZE_PHYS(zap, mze)->mze_value = *intval;
1605eda14cbcSMatt Macy 		} else {
1606eda14cbcSMatt Macy 			mzap_addent(zn, *intval);
1607eda14cbcSMatt Macy 		}
1608eda14cbcSMatt Macy 	}
1609eda14cbcSMatt Macy 	ASSERT(zap == zn->zn_zap);
1610eda14cbcSMatt Macy 	zap_name_free(zn);
1611eda14cbcSMatt Macy 	if (zap != NULL)	/* may be NULL if fzap_upgrade() failed */
1612eda14cbcSMatt Macy 		zap_unlockdir(zap, FTAG);
1613eda14cbcSMatt Macy 	return (err);
1614eda14cbcSMatt Macy }
1615eda14cbcSMatt Macy 
1616783d3ff6SMartin Matuska static int
zap_update_uint64_impl(zap_t * zap,const uint64_t * key,int key_numints,int integer_size,uint64_t num_integers,const void * val,dmu_tx_t * tx,const void * tag)1617783d3ff6SMartin Matuska zap_update_uint64_impl(zap_t *zap, const uint64_t *key, int key_numints,
1618783d3ff6SMartin Matuska     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx,
1619783d3ff6SMartin Matuska     const void *tag)
1620783d3ff6SMartin Matuska {
1621783d3ff6SMartin Matuska 	int err;
1622783d3ff6SMartin Matuska 
1623783d3ff6SMartin Matuska 	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
1624783d3ff6SMartin Matuska 	if (zn == NULL) {
1625783d3ff6SMartin Matuska 		zap_unlockdir(zap, tag);
1626783d3ff6SMartin Matuska 		return (SET_ERROR(ENOTSUP));
1627783d3ff6SMartin Matuska 	}
1628783d3ff6SMartin Matuska 	err = fzap_update(zn, integer_size, num_integers, val, tag, tx);
1629783d3ff6SMartin Matuska 	zap = zn->zn_zap;	/* fzap_update() may change zap */
1630783d3ff6SMartin Matuska 	zap_name_free(zn);
1631783d3ff6SMartin Matuska 	if (zap != NULL)	/* may be NULL if fzap_upgrade() failed */
1632783d3ff6SMartin Matuska 		zap_unlockdir(zap, tag);
1633783d3ff6SMartin Matuska 	return (err);
1634783d3ff6SMartin Matuska }
1635783d3ff6SMartin Matuska 
1636eda14cbcSMatt Macy int
zap_update_uint64(objset_t * os,uint64_t zapobj,const uint64_t * key,int key_numints,int integer_size,uint64_t num_integers,const void * val,dmu_tx_t * tx)1637eda14cbcSMatt Macy zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1638783d3ff6SMartin Matuska     int key_numints, int integer_size, uint64_t num_integers, const void *val,
1639783d3ff6SMartin Matuska     dmu_tx_t *tx)
1640eda14cbcSMatt Macy {
1641eda14cbcSMatt Macy 	zap_t *zap;
1642eda14cbcSMatt Macy 
1643eda14cbcSMatt Macy 	int err =
1644eda14cbcSMatt Macy 	    zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
1645eda14cbcSMatt Macy 	if (err != 0)
1646eda14cbcSMatt Macy 		return (err);
1647783d3ff6SMartin Matuska 	err = zap_update_uint64_impl(zap, key, key_numints,
1648783d3ff6SMartin Matuska 	    integer_size, num_integers, val, tx, FTAG);
1649783d3ff6SMartin Matuska 	/* zap_update_uint64_impl() calls zap_unlockdir() */
1650783d3ff6SMartin Matuska 	return (err);
1651eda14cbcSMatt Macy }
1652783d3ff6SMartin Matuska 
1653783d3ff6SMartin Matuska int
zap_update_uint64_by_dnode(dnode_t * dn,const uint64_t * key,int key_numints,int integer_size,uint64_t num_integers,const void * val,dmu_tx_t * tx)1654783d3ff6SMartin Matuska zap_update_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints,
1655783d3ff6SMartin Matuska     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
1656783d3ff6SMartin Matuska {
1657783d3ff6SMartin Matuska 	zap_t *zap;
1658783d3ff6SMartin Matuska 
1659783d3ff6SMartin Matuska 	int err =
1660783d3ff6SMartin Matuska 	    zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
1661783d3ff6SMartin Matuska 	if (err != 0)
1662783d3ff6SMartin Matuska 		return (err);
1663783d3ff6SMartin Matuska 	err = zap_update_uint64_impl(zap, key, key_numints,
1664783d3ff6SMartin Matuska 	    integer_size, num_integers, val, tx, FTAG);
1665783d3ff6SMartin Matuska 	/* zap_update_uint64_impl() calls zap_unlockdir() */
1666eda14cbcSMatt Macy 	return (err);
1667eda14cbcSMatt Macy }
1668eda14cbcSMatt Macy 
1669eda14cbcSMatt Macy int
zap_remove(objset_t * os,uint64_t zapobj,const char * name,dmu_tx_t * tx)1670eda14cbcSMatt Macy zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx)
1671eda14cbcSMatt Macy {
1672eda14cbcSMatt Macy 	return (zap_remove_norm(os, zapobj, name, 0, tx));
1673eda14cbcSMatt Macy }
1674eda14cbcSMatt Macy 
1675eda14cbcSMatt Macy static int
zap_remove_impl(zap_t * zap,const char * name,matchtype_t mt,dmu_tx_t * tx)1676eda14cbcSMatt Macy zap_remove_impl(zap_t *zap, const char *name,
1677eda14cbcSMatt Macy     matchtype_t mt, dmu_tx_t *tx)
1678eda14cbcSMatt Macy {
1679eda14cbcSMatt Macy 	int err = 0;
1680eda14cbcSMatt Macy 
1681dbd5678dSMartin Matuska 	zap_name_t *zn = zap_name_alloc_str(zap, name, mt);
1682eda14cbcSMatt Macy 	if (zn == NULL)
1683eda14cbcSMatt Macy 		return (SET_ERROR(ENOTSUP));
1684eda14cbcSMatt Macy 	if (!zap->zap_ismicro) {
1685eda14cbcSMatt Macy 		err = fzap_remove(zn, tx);
1686eda14cbcSMatt Macy 	} else {
1687dbd5678dSMartin Matuska 		zfs_btree_index_t idx;
1688dbd5678dSMartin Matuska 		mzap_ent_t *mze = mze_find(zn, &idx);
1689eda14cbcSMatt Macy 		if (mze == NULL) {
1690eda14cbcSMatt Macy 			err = SET_ERROR(ENOENT);
1691eda14cbcSMatt Macy 		} else {
1692eda14cbcSMatt Macy 			zap->zap_m.zap_num_entries--;
1693dbd5678dSMartin Matuska 			memset(MZE_PHYS(zap, mze), 0, sizeof (mzap_ent_phys_t));
1694dbd5678dSMartin Matuska 			zfs_btree_remove_idx(&zap->zap_m.zap_tree, &idx);
1695eda14cbcSMatt Macy 		}
1696eda14cbcSMatt Macy 	}
1697eda14cbcSMatt Macy 	zap_name_free(zn);
1698eda14cbcSMatt Macy 	return (err);
1699eda14cbcSMatt Macy }
1700eda14cbcSMatt Macy 
1701eda14cbcSMatt Macy int
zap_remove_norm(objset_t * os,uint64_t zapobj,const char * name,matchtype_t mt,dmu_tx_t * tx)1702eda14cbcSMatt Macy zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,
1703eda14cbcSMatt Macy     matchtype_t mt, dmu_tx_t *tx)
1704eda14cbcSMatt Macy {
1705eda14cbcSMatt Macy 	zap_t *zap;
1706eda14cbcSMatt Macy 	int err;
1707eda14cbcSMatt Macy 
1708eda14cbcSMatt Macy 	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
1709eda14cbcSMatt Macy 	if (err)
1710eda14cbcSMatt Macy 		return (err);
1711eda14cbcSMatt Macy 	err = zap_remove_impl(zap, name, mt, tx);
1712eda14cbcSMatt Macy 	zap_unlockdir(zap, FTAG);
1713eda14cbcSMatt Macy 	return (err);
1714eda14cbcSMatt Macy }
1715eda14cbcSMatt Macy 
1716eda14cbcSMatt Macy int
zap_remove_by_dnode(dnode_t * dn,const char * name,dmu_tx_t * tx)1717eda14cbcSMatt Macy zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx)
1718eda14cbcSMatt Macy {
1719eda14cbcSMatt Macy 	zap_t *zap;
1720eda14cbcSMatt Macy 	int err;
1721eda14cbcSMatt Macy 
1722eda14cbcSMatt Macy 	err = zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
1723eda14cbcSMatt Macy 	if (err)
1724eda14cbcSMatt Macy 		return (err);
1725eda14cbcSMatt Macy 	err = zap_remove_impl(zap, name, 0, tx);
1726eda14cbcSMatt Macy 	zap_unlockdir(zap, FTAG);
1727eda14cbcSMatt Macy 	return (err);
1728eda14cbcSMatt Macy }
1729eda14cbcSMatt Macy 
1730783d3ff6SMartin Matuska static int
zap_remove_uint64_impl(zap_t * zap,const uint64_t * key,int key_numints,dmu_tx_t * tx,const void * tag)1731783d3ff6SMartin Matuska zap_remove_uint64_impl(zap_t *zap, const uint64_t *key, int key_numints,
1732783d3ff6SMartin Matuska     dmu_tx_t *tx, const void *tag)
1733783d3ff6SMartin Matuska {
1734783d3ff6SMartin Matuska 	int err;
1735783d3ff6SMartin Matuska 
1736783d3ff6SMartin Matuska 	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
1737783d3ff6SMartin Matuska 	if (zn == NULL) {
1738783d3ff6SMartin Matuska 		zap_unlockdir(zap, tag);
1739783d3ff6SMartin Matuska 		return (SET_ERROR(ENOTSUP));
1740783d3ff6SMartin Matuska 	}
1741783d3ff6SMartin Matuska 	err = fzap_remove(zn, tx);
1742783d3ff6SMartin Matuska 	zap_name_free(zn);
1743783d3ff6SMartin Matuska 	zap_unlockdir(zap, tag);
1744783d3ff6SMartin Matuska 	return (err);
1745783d3ff6SMartin Matuska }
1746783d3ff6SMartin Matuska 
1747eda14cbcSMatt Macy int
zap_remove_uint64(objset_t * os,uint64_t zapobj,const uint64_t * key,int key_numints,dmu_tx_t * tx)1748eda14cbcSMatt Macy zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1749eda14cbcSMatt Macy     int key_numints, dmu_tx_t *tx)
1750eda14cbcSMatt Macy {
1751eda14cbcSMatt Macy 	zap_t *zap;
1752eda14cbcSMatt Macy 
1753eda14cbcSMatt Macy 	int err =
1754eda14cbcSMatt Macy 	    zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
1755eda14cbcSMatt Macy 	if (err != 0)
1756eda14cbcSMatt Macy 		return (err);
1757783d3ff6SMartin Matuska 	err = zap_remove_uint64_impl(zap, key, key_numints, tx, FTAG);
1758783d3ff6SMartin Matuska 	/* zap_remove_uint64_impl() calls zap_unlockdir() */
1759783d3ff6SMartin Matuska 	return (err);
1760eda14cbcSMatt Macy }
1761783d3ff6SMartin Matuska 
1762783d3ff6SMartin Matuska int
zap_remove_uint64_by_dnode(dnode_t * dn,const uint64_t * key,int key_numints,dmu_tx_t * tx)1763783d3ff6SMartin Matuska zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints,
1764783d3ff6SMartin Matuska     dmu_tx_t *tx)
1765783d3ff6SMartin Matuska {
1766783d3ff6SMartin Matuska 	zap_t *zap;
1767783d3ff6SMartin Matuska 
1768783d3ff6SMartin Matuska 	int err =
1769783d3ff6SMartin Matuska 	    zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
1770783d3ff6SMartin Matuska 	if (err != 0)
1771783d3ff6SMartin Matuska 		return (err);
1772783d3ff6SMartin Matuska 	err = zap_remove_uint64_impl(zap, key, key_numints, tx, FTAG);
1773783d3ff6SMartin Matuska 	/* zap_remove_uint64_impl() calls zap_unlockdir() */
1774eda14cbcSMatt Macy 	return (err);
1775eda14cbcSMatt Macy }
1776eda14cbcSMatt Macy 
17777a7741afSMartin Matuska 
17787a7741afSMartin Matuska static zap_attribute_t *
zap_attribute_alloc_impl(boolean_t longname)17797a7741afSMartin Matuska zap_attribute_alloc_impl(boolean_t longname)
17807a7741afSMartin Matuska {
17817a7741afSMartin Matuska 	zap_attribute_t *za;
17827a7741afSMartin Matuska 
17837a7741afSMartin Matuska 	za = kmem_cache_alloc((longname)? zap_attr_long_cache : zap_attr_cache,
17847a7741afSMartin Matuska 	    KM_SLEEP);
17857a7741afSMartin Matuska 	za->za_name_len = (longname)? ZAP_MAXNAMELEN_NEW : ZAP_MAXNAMELEN;
17867a7741afSMartin Matuska 	return (za);
17877a7741afSMartin Matuska }
17887a7741afSMartin Matuska 
17897a7741afSMartin Matuska zap_attribute_t *
zap_attribute_alloc(void)17907a7741afSMartin Matuska zap_attribute_alloc(void)
17917a7741afSMartin Matuska {
17927a7741afSMartin Matuska 	return (zap_attribute_alloc_impl(B_FALSE));
17937a7741afSMartin Matuska }
17947a7741afSMartin Matuska 
17957a7741afSMartin Matuska zap_attribute_t *
zap_attribute_long_alloc(void)17967a7741afSMartin Matuska zap_attribute_long_alloc(void)
17977a7741afSMartin Matuska {
17987a7741afSMartin Matuska 	return (zap_attribute_alloc_impl(B_TRUE));
17997a7741afSMartin Matuska }
18007a7741afSMartin Matuska 
18017a7741afSMartin Matuska void
zap_attribute_free(zap_attribute_t * za)18027a7741afSMartin Matuska zap_attribute_free(zap_attribute_t *za)
18037a7741afSMartin Matuska {
18047a7741afSMartin Matuska 	if (za->za_name_len == ZAP_MAXNAMELEN) {
18057a7741afSMartin Matuska 		kmem_cache_free(zap_attr_cache, za);
18067a7741afSMartin Matuska 	} else {
18077a7741afSMartin Matuska 		ASSERT3U(za->za_name_len, ==, ZAP_MAXNAMELEN_NEW);
18087a7741afSMartin Matuska 		kmem_cache_free(zap_attr_long_cache, za);
18097a7741afSMartin Matuska 	}
18107a7741afSMartin Matuska }
18117a7741afSMartin Matuska 
1812eda14cbcSMatt Macy /*
1813eda14cbcSMatt Macy  * Routines for iterating over the attributes.
1814eda14cbcSMatt Macy  */
1815eda14cbcSMatt Macy 
1816eda14cbcSMatt Macy static void
zap_cursor_init_impl(zap_cursor_t * zc,objset_t * os,uint64_t zapobj,uint64_t serialized,boolean_t prefetch)1817eda14cbcSMatt Macy zap_cursor_init_impl(zap_cursor_t *zc, objset_t *os, uint64_t zapobj,
1818eda14cbcSMatt Macy     uint64_t serialized, boolean_t prefetch)
1819eda14cbcSMatt Macy {
1820eda14cbcSMatt Macy 	zc->zc_objset = os;
1821eda14cbcSMatt Macy 	zc->zc_zap = NULL;
1822eda14cbcSMatt Macy 	zc->zc_leaf = NULL;
1823eda14cbcSMatt Macy 	zc->zc_zapobj = zapobj;
1824eda14cbcSMatt Macy 	zc->zc_serialized = serialized;
1825eda14cbcSMatt Macy 	zc->zc_hash = 0;
1826eda14cbcSMatt Macy 	zc->zc_cd = 0;
1827eda14cbcSMatt Macy 	zc->zc_prefetch = prefetch;
1828eda14cbcSMatt Macy }
1829eda14cbcSMatt Macy void
zap_cursor_init_serialized(zap_cursor_t * zc,objset_t * os,uint64_t zapobj,uint64_t serialized)1830eda14cbcSMatt Macy zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj,
1831eda14cbcSMatt Macy     uint64_t serialized)
1832eda14cbcSMatt Macy {
1833eda14cbcSMatt Macy 	zap_cursor_init_impl(zc, os, zapobj, serialized, B_TRUE);
1834eda14cbcSMatt Macy }
1835eda14cbcSMatt Macy 
1836eda14cbcSMatt Macy /*
1837eda14cbcSMatt Macy  * Initialize a cursor at the beginning of the ZAP object.  The entire
1838eda14cbcSMatt Macy  * ZAP object will be prefetched.
1839eda14cbcSMatt Macy  */
1840eda14cbcSMatt Macy void
zap_cursor_init(zap_cursor_t * zc,objset_t * os,uint64_t zapobj)1841eda14cbcSMatt Macy zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj)
1842eda14cbcSMatt Macy {
1843eda14cbcSMatt Macy 	zap_cursor_init_impl(zc, os, zapobj, 0, B_TRUE);
1844eda14cbcSMatt Macy }
1845eda14cbcSMatt Macy 
1846eda14cbcSMatt Macy /*
1847eda14cbcSMatt Macy  * Initialize a cursor at the beginning, but request that we not prefetch
1848eda14cbcSMatt Macy  * the entire ZAP object.
1849eda14cbcSMatt Macy  */
1850eda14cbcSMatt Macy void
zap_cursor_init_noprefetch(zap_cursor_t * zc,objset_t * os,uint64_t zapobj)1851eda14cbcSMatt Macy zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os, uint64_t zapobj)
1852eda14cbcSMatt Macy {
1853eda14cbcSMatt Macy 	zap_cursor_init_impl(zc, os, zapobj, 0, B_FALSE);
1854eda14cbcSMatt Macy }
1855eda14cbcSMatt Macy 
1856eda14cbcSMatt Macy void
zap_cursor_fini(zap_cursor_t * zc)1857eda14cbcSMatt Macy zap_cursor_fini(zap_cursor_t *zc)
1858eda14cbcSMatt Macy {
1859eda14cbcSMatt Macy 	if (zc->zc_zap) {
1860eda14cbcSMatt Macy 		rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
1861eda14cbcSMatt Macy 		zap_unlockdir(zc->zc_zap, NULL);
1862eda14cbcSMatt Macy 		zc->zc_zap = NULL;
1863eda14cbcSMatt Macy 	}
1864eda14cbcSMatt Macy 	if (zc->zc_leaf) {
1865eda14cbcSMatt Macy 		rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
1866eda14cbcSMatt Macy 		zap_put_leaf(zc->zc_leaf);
1867eda14cbcSMatt Macy 		zc->zc_leaf = NULL;
1868eda14cbcSMatt Macy 	}
1869eda14cbcSMatt Macy 	zc->zc_objset = NULL;
1870eda14cbcSMatt Macy }
1871eda14cbcSMatt Macy 
1872eda14cbcSMatt Macy uint64_t
zap_cursor_serialize(zap_cursor_t * zc)1873eda14cbcSMatt Macy zap_cursor_serialize(zap_cursor_t *zc)
1874eda14cbcSMatt Macy {
1875eda14cbcSMatt Macy 	if (zc->zc_hash == -1ULL)
1876eda14cbcSMatt Macy 		return (-1ULL);
1877eda14cbcSMatt Macy 	if (zc->zc_zap == NULL)
1878eda14cbcSMatt Macy 		return (zc->zc_serialized);
1879eda14cbcSMatt Macy 	ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0);
1880eda14cbcSMatt Macy 	ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap));
1881eda14cbcSMatt Macy 
1882eda14cbcSMatt Macy 	/*
1883eda14cbcSMatt Macy 	 * We want to keep the high 32 bits of the cursor zero if we can, so
1884eda14cbcSMatt Macy 	 * that 32-bit programs can access this.  So usually use a small
1885eda14cbcSMatt Macy 	 * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits
1886eda14cbcSMatt Macy 	 * of the cursor.
1887eda14cbcSMatt Macy 	 *
1888eda14cbcSMatt Macy 	 * [ collision differentiator | zap_hashbits()-bit hash value ]
1889eda14cbcSMatt Macy 	 */
1890eda14cbcSMatt Macy 	return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) |
1891eda14cbcSMatt Macy 	    ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap)));
1892eda14cbcSMatt Macy }
1893eda14cbcSMatt Macy 
1894eda14cbcSMatt Macy int
zap_cursor_retrieve(zap_cursor_t * zc,zap_attribute_t * za)1895eda14cbcSMatt Macy zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
1896eda14cbcSMatt Macy {
1897eda14cbcSMatt Macy 	int err;
1898eda14cbcSMatt Macy 
1899eda14cbcSMatt Macy 	if (zc->zc_hash == -1ULL)
1900eda14cbcSMatt Macy 		return (SET_ERROR(ENOENT));
1901eda14cbcSMatt Macy 
1902eda14cbcSMatt Macy 	if (zc->zc_zap == NULL) {
1903eda14cbcSMatt Macy 		int hb;
1904eda14cbcSMatt Macy 		err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
1905eda14cbcSMatt Macy 		    RW_READER, TRUE, FALSE, NULL, &zc->zc_zap);
1906eda14cbcSMatt Macy 		if (err != 0)
1907eda14cbcSMatt Macy 			return (err);
1908eda14cbcSMatt Macy 
1909eda14cbcSMatt Macy 		/*
1910eda14cbcSMatt Macy 		 * To support zap_cursor_init_serialized, advance, retrieve,
1911eda14cbcSMatt Macy 		 * we must add to the existing zc_cd, which may already
1912eda14cbcSMatt Macy 		 * be 1 due to the zap_cursor_advance.
1913eda14cbcSMatt Macy 		 */
1914eda14cbcSMatt Macy 		ASSERT(zc->zc_hash == 0);
1915eda14cbcSMatt Macy 		hb = zap_hashbits(zc->zc_zap);
1916eda14cbcSMatt Macy 		zc->zc_hash = zc->zc_serialized << (64 - hb);
1917eda14cbcSMatt Macy 		zc->zc_cd += zc->zc_serialized >> hb;
1918eda14cbcSMatt Macy 		if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */
1919eda14cbcSMatt Macy 			zc->zc_cd = 0;
1920eda14cbcSMatt Macy 	} else {
1921eda14cbcSMatt Macy 		rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
1922eda14cbcSMatt Macy 	}
1923eda14cbcSMatt Macy 	if (!zc->zc_zap->zap_ismicro) {
1924eda14cbcSMatt Macy 		err = fzap_cursor_retrieve(zc->zc_zap, zc, za);
1925eda14cbcSMatt Macy 	} else {
1926dbd5678dSMartin Matuska 		zfs_btree_index_t idx;
1927eda14cbcSMatt Macy 		mzap_ent_t mze_tofind;
1928eda14cbcSMatt Macy 
1929dbd5678dSMartin Matuska 		mze_tofind.mze_hash = zc->zc_hash >> 32;
1930eda14cbcSMatt Macy 		mze_tofind.mze_cd = zc->zc_cd;
1931eda14cbcSMatt Macy 
1932dbd5678dSMartin Matuska 		mzap_ent_t *mze = zfs_btree_find(&zc->zc_zap->zap_m.zap_tree,
1933dbd5678dSMartin Matuska 		    &mze_tofind, &idx);
1934eda14cbcSMatt Macy 		if (mze == NULL) {
1935dbd5678dSMartin Matuska 			mze = zfs_btree_next(&zc->zc_zap->zap_m.zap_tree,
1936dbd5678dSMartin Matuska 			    &idx, &idx);
1937eda14cbcSMatt Macy 		}
1938eda14cbcSMatt Macy 		if (mze) {
1939eda14cbcSMatt Macy 			mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze);
1940eda14cbcSMatt Macy 			ASSERT3U(mze->mze_cd, ==, mzep->mze_cd);
1941eda14cbcSMatt Macy 			za->za_normalization_conflict =
1942dbd5678dSMartin Matuska 			    mzap_normalization_conflict(zc->zc_zap, NULL,
1943dbd5678dSMartin Matuska 			    mze, &idx);
1944eda14cbcSMatt Macy 			za->za_integer_length = 8;
1945eda14cbcSMatt Macy 			za->za_num_integers = 1;
1946eda14cbcSMatt Macy 			za->za_first_integer = mzep->mze_value;
1947eda14cbcSMatt Macy 			(void) strlcpy(za->za_name, mzep->mze_name,
19487a7741afSMartin Matuska 			    za->za_name_len);
1949dbd5678dSMartin Matuska 			zc->zc_hash = (uint64_t)mze->mze_hash << 32;
1950eda14cbcSMatt Macy 			zc->zc_cd = mze->mze_cd;
1951eda14cbcSMatt Macy 			err = 0;
1952eda14cbcSMatt Macy 		} else {
1953eda14cbcSMatt Macy 			zc->zc_hash = -1ULL;
1954eda14cbcSMatt Macy 			err = SET_ERROR(ENOENT);
1955eda14cbcSMatt Macy 		}
1956eda14cbcSMatt Macy 	}
1957eda14cbcSMatt Macy 	rw_exit(&zc->zc_zap->zap_rwlock);
1958eda14cbcSMatt Macy 	return (err);
1959eda14cbcSMatt Macy }
1960eda14cbcSMatt Macy 
1961eda14cbcSMatt Macy void
zap_cursor_advance(zap_cursor_t * zc)1962eda14cbcSMatt Macy zap_cursor_advance(zap_cursor_t *zc)
1963eda14cbcSMatt Macy {
1964eda14cbcSMatt Macy 	if (zc->zc_hash == -1ULL)
1965eda14cbcSMatt Macy 		return;
1966eda14cbcSMatt Macy 	zc->zc_cd++;
1967eda14cbcSMatt Macy }
1968eda14cbcSMatt Macy 
1969eda14cbcSMatt Macy int
zap_get_stats(objset_t * os,uint64_t zapobj,zap_stats_t * zs)1970eda14cbcSMatt Macy zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
1971eda14cbcSMatt Macy {
1972eda14cbcSMatt Macy 	zap_t *zap;
1973eda14cbcSMatt Macy 
1974eda14cbcSMatt Macy 	int err =
1975eda14cbcSMatt Macy 	    zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
1976eda14cbcSMatt Macy 	if (err != 0)
1977eda14cbcSMatt Macy 		return (err);
1978eda14cbcSMatt Macy 
1979da5137abSMartin Matuska 	memset(zs, 0, sizeof (zap_stats_t));
1980eda14cbcSMatt Macy 
1981eda14cbcSMatt Macy 	if (zap->zap_ismicro) {
1982eda14cbcSMatt Macy 		zs->zs_blocksize = zap->zap_dbuf->db_size;
1983eda14cbcSMatt Macy 		zs->zs_num_entries = zap->zap_m.zap_num_entries;
1984eda14cbcSMatt Macy 		zs->zs_num_blocks = 1;
1985eda14cbcSMatt Macy 	} else {
1986eda14cbcSMatt Macy 		fzap_get_stats(zap, zs);
1987eda14cbcSMatt Macy 	}
1988eda14cbcSMatt Macy 	zap_unlockdir(zap, FTAG);
1989eda14cbcSMatt Macy 	return (0);
1990eda14cbcSMatt Macy }
1991eda14cbcSMatt Macy 
1992eda14cbcSMatt Macy #if defined(_KERNEL)
1993eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create);
1994eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create_dnsize);
1995eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create_norm);
1996eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create_norm_dnsize);
1997eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create_flags);
1998eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create_flags_dnsize);
1999eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create_claim);
2000eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create_claim_norm);
2001eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create_claim_norm_dnsize);
2002eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create_hold);
2003eda14cbcSMatt Macy EXPORT_SYMBOL(zap_destroy);
2004eda14cbcSMatt Macy EXPORT_SYMBOL(zap_lookup);
2005eda14cbcSMatt Macy EXPORT_SYMBOL(zap_lookup_by_dnode);
2006eda14cbcSMatt Macy EXPORT_SYMBOL(zap_lookup_norm);
2007eda14cbcSMatt Macy EXPORT_SYMBOL(zap_lookup_uint64);
2008eda14cbcSMatt Macy EXPORT_SYMBOL(zap_contains);
2009eda14cbcSMatt Macy EXPORT_SYMBOL(zap_prefetch);
2010eda14cbcSMatt Macy EXPORT_SYMBOL(zap_prefetch_uint64);
2011ce4dcb97SMartin Matuska EXPORT_SYMBOL(zap_prefetch_object);
2012eda14cbcSMatt Macy EXPORT_SYMBOL(zap_add);
2013eda14cbcSMatt Macy EXPORT_SYMBOL(zap_add_by_dnode);
2014eda14cbcSMatt Macy EXPORT_SYMBOL(zap_add_uint64);
2015783d3ff6SMartin Matuska EXPORT_SYMBOL(zap_add_uint64_by_dnode);
2016eda14cbcSMatt Macy EXPORT_SYMBOL(zap_update);
2017eda14cbcSMatt Macy EXPORT_SYMBOL(zap_update_uint64);
2018783d3ff6SMartin Matuska EXPORT_SYMBOL(zap_update_uint64_by_dnode);
2019eda14cbcSMatt Macy EXPORT_SYMBOL(zap_length);
2020eda14cbcSMatt Macy EXPORT_SYMBOL(zap_length_uint64);
2021eda14cbcSMatt Macy EXPORT_SYMBOL(zap_remove);
2022eda14cbcSMatt Macy EXPORT_SYMBOL(zap_remove_by_dnode);
2023eda14cbcSMatt Macy EXPORT_SYMBOL(zap_remove_norm);
2024eda14cbcSMatt Macy EXPORT_SYMBOL(zap_remove_uint64);
2025783d3ff6SMartin Matuska EXPORT_SYMBOL(zap_remove_uint64_by_dnode);
2026eda14cbcSMatt Macy EXPORT_SYMBOL(zap_count);
2027eda14cbcSMatt Macy EXPORT_SYMBOL(zap_value_search);
2028eda14cbcSMatt Macy EXPORT_SYMBOL(zap_join);
2029eda14cbcSMatt Macy EXPORT_SYMBOL(zap_join_increment);
2030eda14cbcSMatt Macy EXPORT_SYMBOL(zap_add_int);
2031eda14cbcSMatt Macy EXPORT_SYMBOL(zap_remove_int);
2032eda14cbcSMatt Macy EXPORT_SYMBOL(zap_lookup_int);
2033eda14cbcSMatt Macy EXPORT_SYMBOL(zap_increment_int);
2034eda14cbcSMatt Macy EXPORT_SYMBOL(zap_add_int_key);
2035eda14cbcSMatt Macy EXPORT_SYMBOL(zap_lookup_int_key);
2036eda14cbcSMatt Macy EXPORT_SYMBOL(zap_increment);
2037eda14cbcSMatt Macy EXPORT_SYMBOL(zap_cursor_init);
2038eda14cbcSMatt Macy EXPORT_SYMBOL(zap_cursor_fini);
2039eda14cbcSMatt Macy EXPORT_SYMBOL(zap_cursor_retrieve);
2040eda14cbcSMatt Macy EXPORT_SYMBOL(zap_cursor_advance);
2041eda14cbcSMatt Macy EXPORT_SYMBOL(zap_cursor_serialize);
2042eda14cbcSMatt Macy EXPORT_SYMBOL(zap_cursor_init_serialized);
2043eda14cbcSMatt Macy EXPORT_SYMBOL(zap_get_stats);
204415f0b8c3SMartin Matuska 
204515f0b8c3SMartin Matuska ZFS_MODULE_PARAM(zfs, , zap_micro_max_size, INT, ZMOD_RW,
2046dd215568SMartin Matuska 	"Maximum micro ZAP size before converting to a fat ZAP, "
2047dd215568SMartin Matuska 	    "in bytes (max 1M)");
2048eda14cbcSMatt Macy #endif
2049