xref: /freebsd/sys/contrib/openzfs/include/sys/zap.h (revision d9497217456002b0ddad3cd319570d0b098daa29)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
26  * Copyright 2017 Nexenta Systems, Inc.
27  * Copyright (c) 2026, TrueNAS.
28  */
29 
30 #ifndef	_SYS_ZAP_H
31 #define	_SYS_ZAP_H
32 
33 /*
34  * ZAP - ZFS Attribute Processor
35  *
36  * The ZAP is a module which sits on top of the DMU (Data Management
37  * Unit) and implements a higher-level storage primitive using DMU
38  * objects.  Its primary consumer is the ZPL (ZFS Posix Layer).
39  *
40  * A "zapobj" is a DMU object which the ZAP uses to stores attributes.
41  * Users should use only zap routines to access a zapobj - they should
42  * not access the DMU object directly using DMU routines.
43  *
44  * The attributes stored in a zapobj are name-value pairs.  The name is
45  * a zero-terminated string of up to ZAP_MAXNAMELEN bytes (including
46  * terminating NULL).  The value is an array of integers, which may be
47  * 1, 2, 4, or 8 bytes long.  The total space used by the array (number
48  * of integers * integer length) can be up to ZAP_MAXVALUELEN bytes.
49  * Note that an 8-byte integer value can be used to store the location
50  * (object number) of another dmu object (which may be itself a zapobj).
51  * Note that you can use a zero-length attribute to store a single bit
52  * of information - the attribute is present or not.
53  *
54  * The ZAP routines are thread-safe.  However, you must observe the
55  * DMU's restriction that a transaction may not be operated on
56  * concurrently.
57  *
58  * Any of the routines that return an int may return an I/O error (EIO
59  * or ECHECKSUM).
60  *
61  *
62  * Implementation / Performance Notes:
63  *
64  * The ZAP is intended to operate most efficiently on attributes with
65  * short (49 bytes or less) names and single 8-byte values, for which
66  * the microzap will be used.  The ZAP should be efficient enough so
67  * that the user does not need to cache these attributes.
68  *
69  * The ZAP's locking scheme makes its routines thread-safe.  Operations
70  * on different zapobjs will be processed concurrently.  Operations on
71  * the same zapobj which only read data will be processed concurrently.
72  * Operations on the same zapobj which modify data will be processed
73  * concurrently when there are many attributes in the zapobj (because
74  * the ZAP uses per-block locking - more than 128 * (number of cpus)
75  * small attributes will suffice).
76  */
77 
78 /*
79  * We're using zero-terminated byte strings (ie. ASCII or UTF-8 C
80  * strings) for the names of attributes, rather than a byte string
81  * bounded by an explicit length.  If some day we want to support names
82  * in character sets which have embedded zeros (eg. UTF-16, UTF-32),
83  * we'll have to add routines for using length-bounded strings.
84  */
85 
86 #include <sys/dmu.h>
87 
88 #ifdef	__cplusplus
89 extern "C" {
90 #endif
91 
92 /*
93  * Specifies matching criteria for ZAP lookups.
94  * MT_NORMALIZE		Use ZAP normalization flags, which can include both
95  *			unicode normalization and case-insensitivity.
96  * MT_MATCH_CASE	Do case-sensitive lookups even if MT_NORMALIZE is
97  *			specified and ZAP normalization flags include
98  *			U8_TEXTPREP_TOUPPER.
99  */
100 typedef enum matchtype {
101 	MT_NORMALIZE = 1 << 0,
102 	MT_MATCH_CASE = 1 << 1,
103 } matchtype_t;
104 
105 typedef enum zap_flags {
106 	/* Use 64-bit hash value (serialized cursors will always use 64-bits) */
107 	ZAP_FLAG_HASH64 = 1 << 0,
108 	/* Key is binary, not string (zap_add_uint64() can be used) */
109 	ZAP_FLAG_UINT64_KEY = 1 << 1,
110 	/*
111 	 * First word of key (which must be an array of uint64) is
112 	 * already randomly distributed.
113 	 */
114 	ZAP_FLAG_PRE_HASHED_KEY = 1 << 2,
115 #if defined(__linux__) && defined(_KERNEL)
116 } zfs_zap_flags_t;
117 #define	zap_flags_t	zfs_zap_flags_t
118 #else
119 } zap_flags_t;
120 #endif
121 
122 /*
123  * Create a new zapobj with no attributes and return its object number.
124  */
125 uint64_t zap_create(objset_t *os, dmu_object_type_t ot,
126     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
127 uint64_t zap_create_dnsize(objset_t *os, dmu_object_type_t ot,
128     dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);
129 uint64_t zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot,
130     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
131 uint64_t zap_create_norm_dnsize(objset_t *os, int normflags,
132     dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen,
133     int dnodesize, dmu_tx_t *tx);
134 uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
135     dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
136     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
137 uint64_t zap_create_flags_dnsize(objset_t *os, int normflags,
138     zap_flags_t flags, dmu_object_type_t ot, int leaf_blockshift,
139     int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen,
140     int dnodesize, dmu_tx_t *tx);
141 
142 /*
143  * Create a zap object and return a pointer to the newly allocated dnode via
144  * the allocated_dnode argument.  The returned dnode will be held and the
145  * caller is responsible for releasing the hold by calling dnode_rele().
146  */
147 uint64_t zap_create_hold(objset_t *os, int normflags, zap_flags_t flags,
148     dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
149     dmu_object_type_t bonustype, int bonuslen, int dnodesize,
150     dnode_t **allocated_dnode, const void *tag, dmu_tx_t *tx);
151 
152 /*
153  * Create a new zapobj with no attributes, and add an entry to an existing
154  * zapobj with the given name as key and the object number of the new zapobj as
155  * the value. Returns the object number of the new zapobj.
156  */
157 uint64_t zap_create_link(objset_t *os, dmu_object_type_t ot,
158     uint64_t parent_obj, const char *name, dmu_tx_t *tx);
159 uint64_t zap_create_link_dnsize(objset_t *os, dmu_object_type_t ot,
160     uint64_t parent_obj, const char *name, int dnodesize, dmu_tx_t *tx);
161 
162 /*
163  * Initialize an already-allocated object.
164  */
165 void mzap_create_impl(dnode_t *dn, int normflags, zap_flags_t flags,
166     dmu_tx_t *tx);
167 
168 /*
169  * Create a new zapobj with no attributes from the given (unallocated)
170  * object number.
171  */
172 int zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot,
173     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
174 int zap_create_claim_dnsize(objset_t *os, uint64_t obj, dmu_object_type_t ot,
175     dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);
176 int zap_create_claim_norm(objset_t *os, uint64_t obj,
177     int normflags, dmu_object_type_t ot,
178     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
179 int zap_create_claim_norm_dnsize(objset_t *os, uint64_t obj,
180     int normflags, dmu_object_type_t ot,
181     dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);
182 
183 /*
184  * All operations on a zapobj take either the the objset/objectid pair
185  * that "names" the object, or an existing dnode_t for the object. The
186  * zapobj passed in must be a valid ZAP object.
187  */
188 
189 /*
190  * Destroy this zapobj and all its attributes.
191  *
192  * Frees the object number using dmu_object_free.
193  */
194 int zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx);
195 
196 /*
197  * Manipulate attributes.
198  *
199  * 'integer_size' is in bytes, and must be 1, 2, 4, or 8.
200  */
201 
202 /*
203  * Retrieve the contents of the attribute with the given name.
204  *
205  * If the requested attribute does not exist, the call will fail and
206  * return ENOENT.
207  *
208  * If 'integer_size' is smaller than the attribute's integer size, the
209  * call will fail and return EINVAL.
210  *
211  * If 'integer_size' is equal to or larger than the attribute's integer
212  * size, the call will succeed and return 0.
213  *
214  * When converting to a larger integer size, the integers will be treated as
215  * unsigned (ie. no sign-extension will be performed).
216  *
217  * 'num_integers' is the length (in integers) of 'buf'.
218  *
219  * If the attribute is longer than the buffer, as many integers as will
220  * fit will be transferred to 'buf'.  If the entire attribute was not
221  * transferred, the call will return EOVERFLOW.
222  */
223 int zap_lookup(objset_t *os, uint64_t zapobj, const char *name,
224     uint64_t integer_size, uint64_t num_integers, void *buf);
225 int zap_lookup_by_dnode(dnode_t *dn, const char *name,
226     uint64_t integer_size, uint64_t num_integers, void *buf);
227 
228 /*
229  * If rn_len is nonzero, realname will be set to the name of the found
230  * entry (which may be different from the requested name if matchtype is
231  * not zero).
232  *
233  * If normalization_conflictp is not NULL, it will be set if there is
234  * another name with the same case/unicode normalized form.
235  */
236 int zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
237     uint64_t integer_size, uint64_t num_integers, void *buf,
238     matchtype_t mt, char *realname, int rn_len,
239     boolean_t *normalization_conflictp);
240 int zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
241     uint64_t integer_size, uint64_t num_integers, void *buf,
242     matchtype_t mt, char *realname, int rn_len,
243     boolean_t *ncp);
244 
245 /*
246  * The _uint64 variants take an array of uint64_t as the key. The ZAP must
247  * be created with ZAP_FLAG_UINT64_KEY.
248  */
249 int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
250     int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);
251 int zap_lookup_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
252     int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);
253 int zap_lookup_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
254     int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf,
255     uint64_t *actual_num_integers);
256 
257 /*
258  * Lookup the attribute with the given name. Returns ENOENT if it does not
259  * exist, 0 if it does. This is like zap_lookup(), but may be more efficient.
260  */
261 int zap_contains(objset_t *os, uint64_t zapobj, const char *name);
262 int zap_contains_by_dnode(dnode_t *dn, const char *name);
263 
264 /*
265  * Prefetch the blocks within the ZAP where the given key is stored. The
266  * prefetch IO will occure in the background.
267  */
268 int zap_prefetch(objset_t *os, uint64_t zapobj, const char *name);
269 
270 /* Prefetch by uint64_t[] key. */
271 int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
272     int key_numints);
273 int zap_prefetch_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
274     int key_numints);
275 
276 /*
277  * Prefetch the entire ZAP object. Unlike zap_prefetch(), will block until
278  * the entire object is loaded into the ARC.
279  */
280 int zap_prefetch_object(objset_t *os, uint64_t zapobj);
281 
282 /*
283  * Create an attribute with the given name and value.
284  *
285  * If an attribute with the given name already exists, the call will
286  * fail and return EEXIST.
287  */
288 int zap_add(objset_t *os, uint64_t zapobj, const char *key,
289     int integer_size, uint64_t num_integers,
290     const void *val, dmu_tx_t *tx);
291 int zap_add_by_dnode(dnode_t *dn, const char *key,
292     int integer_size, uint64_t num_integers,
293     const void *val, dmu_tx_t *tx);
294 
295 /* Add by uint64_t[] key. */
296 int zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
297     int key_numints, int integer_size, uint64_t num_integers,
298     const void *val, dmu_tx_t *tx);
299 int zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
300     int key_numints, int integer_size, uint64_t num_integers,
301     const void *val, dmu_tx_t *tx);
302 
303 /*
304  * Set the attribute with the given name to the given value.  If an
305  * attribute with the given name does not exist, it will be created.  If
306  * an attribute with the given name already exists, the previous value
307  * will be overwritten.  The integer_size may be different from the
308  * existing attribute's integer size, in which case the attribute's
309  * integer size will be updated to the new value.
310  */
311 int zap_update(objset_t *os, uint64_t zapobj, const char *name,
312     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
313 int zap_update_by_dnode(dnode_t *dn, const char *name, int integer_size,
314     uint64_t num_integers, const void *val, dmu_tx_t *tx);
315 
316 /* Update by uint64_t[] key. */
317 int zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
318     int key_numints,
319     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
320 int zap_update_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
321     int key_numints,
322     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
323 
324 /*
325  * Get the length (in integers) and the integer size of the specified
326  * attribute.
327  *
328  * If the requested attribute does not exist, the call will fail and
329  * return ENOENT.
330  */
331 int zap_length(objset_t *os, uint64_t zapobj, const char *name,
332     uint64_t *integer_size, uint64_t *num_integers);
333 int zap_length_by_dnode(dnode_t *dn, const char *name,
334     uint64_t *integer_size, uint64_t *num_integers);
335 
336 /* Attribute length by uint64_t[] key. */
337 int zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
338     int key_numints, uint64_t *integer_size, uint64_t *num_integers);
339 int zap_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
340     int key_numints, uint64_t *integer_size, uint64_t *num_integers);
341 
342 /*
343  * Remove the specified attribute.
344  *
345  * If the specified attribute does not exist, the call will fail and
346  * return ENOENT.
347  */
348 int zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx);
349 int zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx);
350 int zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,
351     matchtype_t mt, dmu_tx_t *tx);
352 
353 /* Remove by uint64_t[] key. */
354 int zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
355     int key_numints, dmu_tx_t *tx);
356 int zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
357     int key_numints, dmu_tx_t *tx);
358 
359 /*
360  * Returns (in *count) the number of attributes in the specified zap
361  * object.
362  */
363 int zap_count(objset_t *os, uint64_t zapobj, uint64_t *count);
364 int zap_count_by_dnode(dnode_t *dn, uint64_t *count);
365 
366 /*
367  * Lookup an existing uint64 value, add the delta value to it, and store
368  * update it with the new value. If the new value is 0, removes the key
369  * entirely.
370  */
371 int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
372     dmu_tx_t *tx);
373 int zap_increment_by_dnode(dnode_t *dn, const char *name, int64_t delta,
374     dmu_tx_t *tx);
375 
376 /*
377  * Returns (in name) the name of the entry whose (value & mask)
378  * (za_first_integer) is value, or ENOENT if not found.  The string
379  * pointed to by name must be at least 256 bytes long.  If mask==0, the
380  * match must be exact (ie, same as mask=-1ULL).
381  */
382 int zap_value_search(objset_t *os, uint64_t zapobj,
383     uint64_t value, uint64_t mask, char *name, uint64_t namelen);
384 int zap_value_search_by_dnode(dnode_t *dn,
385     uint64_t value, uint64_t mask, char *name, uint64_t namelen);
386 
387 /*
388  * Manipulate entries where the name + value are the "same" (the name is
389  * a stringified version of the value).
390  */
391 int zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
392 int zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
393 int zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value);
394 
395 int zap_add_int_by_dnode(dnode_t *dn, uint64_t value, dmu_tx_t *tx);
396 int zap_remove_int_by_dnode(dnode_t *dn, uint64_t value, dmu_tx_t *tx);
397 int zap_lookup_int_by_dnode(dnode_t *dn, uint64_t value);
398 
399 /* Here the key is an int and the value is a different int. */
400 int zap_add_int_key(objset_t *os, uint64_t obj,
401     uint64_t key, uint64_t value, dmu_tx_t *tx);
402 int zap_update_int_key(objset_t *os, uint64_t obj,
403     uint64_t key, uint64_t value, dmu_tx_t *tx);
404 int zap_lookup_int_key(objset_t *os, uint64_t obj,
405     uint64_t key, uint64_t *valuep);
406 
407 int zap_add_int_key_by_dnode(dnode_t *dn,
408     uint64_t key, uint64_t value, dmu_tx_t *tx);
409 int zap_update_int_key_by_dnode(dnode_t *dn,
410     uint64_t key, uint64_t value, dmu_tx_t *tx);
411 int zap_lookup_int_key_by_dnode(dnode_t *dn,
412     uint64_t key, uint64_t *valuep);
413 
414 /*
415  * The interface for listing all the attributes of a zapobj can be
416  * thought of as cursor moving down a list of the attributes one by
417  * one.  The cookie returned by the zap_cursor_serialize routine is
418  * persistent across system calls (and across reboot, even).
419  */
420 
421 typedef struct {
422 	int za_integer_length;
423 	/*
424 	 * za_normalization_conflict will be set if there are additional
425 	 * entries with this normalized form (eg, "foo" and "Foo").
426 	 */
427 	boolean_t za_normalization_conflict;
428 	uint64_t za_num_integers;
429 	uint64_t za_first_integer;	/* no sign extension for <8byte ints */
430 	uint32_t za_name_len;
431 	uint32_t za_pad;	/* We want za_name aligned to uint64_t. */
432 	char za_name[];
433 } zap_attribute_t;
434 
435 /*
436  * Alloc and free zap_attribute_t.
437  */
438 zap_attribute_t *zap_attribute_alloc(void);
439 zap_attribute_t *zap_attribute_long_alloc(void);
440 void zap_attribute_free(zap_attribute_t *attrp);
441 
442 struct zap;
443 struct zap_leaf;
444 
445 typedef struct zap_cursor {
446 	/* This structure is opaque! */
447 	struct zap *zc_zap;
448 	struct zap_leaf *zc_leaf;
449 	uint64_t zc_hash;
450 	uint32_t zc_cd;
451 	boolean_t zc_prefetch;
452 	/*
453 	 * Legacy fields to main source compat with Lustre, which accesses
454 	 * them directly. Not to be used in new code!
455 	 */
456 	objset_t *zc_objset;
457 	uint64_t zc_zapobj;
458 } zap_cursor_t;
459 
460 /*
461  * Initialize a zap cursor, pointing to the "first" attribute of the zapobj.
462  * The entire zapobj will be prefetched. You must call zap_cursor_fini the
463  * cursor when you are done with it.
464  */
465 int zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj);
466 int zap_cursor_init_by_dnode(zap_cursor_t *zc, dnode_t *dn);
467 void zap_cursor_fini(zap_cursor_t *zc);
468 
469 /*
470  * Initialize a cursor at the beginning, but request that we not prefetch
471  * the entire ZAP object.
472  */
473 int zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os,
474     uint64_t zapobj);
475 
476 /*
477  * Initialize a zap cursor pointing to the position recorded by
478  * zap_cursor_serialize (in the "serialized" argument).  You can also
479  * use a "serialized" argument of 0 to start at the beginning of the
480  * zapobj (ie.  zap_cursor_init_serialized(..., 0) is equivalent to
481  * zap_cursor_init(...).)
482  */
483 int zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os,
484     uint64_t zapobj, uint64_t serialized);
485 int zap_cursor_init_serialized_by_dnode(zap_cursor_t *zc, dnode_t *dn,
486     uint64_t serialized);
487 
488 /*
489  * Get the attribute currently pointed to by the cursor.  Returns
490  * ENOENT if at the end of the attributes.
491  */
492 int zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za);
493 
494 /*
495  * Advance the cursor to the next attribute.
496  */
497 void zap_cursor_advance(zap_cursor_t *zc);
498 
499 /*
500  * Get a persistent cookie pointing to the current position of the zap
501  * cursor.  The low 4 bits in the cookie are always zero, and thus can
502  * be used as to differentiate a serialized cookie from a different type
503  * of value.  The cookie will be less than 2^32 as long as there are
504  * fewer than 2^22 (4.2 million) entries in the zap object.
505  */
506 uint64_t zap_cursor_serialize(zap_cursor_t *zc);
507 
508 #define	ZAP_HISTOGRAM_SIZE 10
509 
510 typedef struct zap_stats {
511 	/*
512 	 * Size of the pointer table (in number of entries).
513 	 * This is always a power of 2, or zero if it's a microzap.
514 	 * In general, it should be considerably greater than zs_num_leafs.
515 	 */
516 	uint64_t zs_ptrtbl_len;
517 
518 	uint64_t zs_blocksize;		/* size of zap blocks */
519 
520 	/*
521 	 * The number of blocks used.  Note that some blocks may be
522 	 * wasted because old ptrtbl's and large name/value blocks are
523 	 * not reused.  (Although their space is reclaimed, we don't
524 	 * reuse those offsets in the object.)
525 	 */
526 	uint64_t zs_num_blocks;
527 
528 	/*
529 	 * Pointer table values from zap_ptrtbl in the zap_phys_t
530 	 */
531 	uint64_t zs_ptrtbl_nextblk;	  /* next (larger) copy start block */
532 	uint64_t zs_ptrtbl_blks_copied;   /* number source blocks copied */
533 	uint64_t zs_ptrtbl_zt_blk;	  /* starting block number */
534 	uint64_t zs_ptrtbl_zt_numblks;    /* number of blocks */
535 	uint64_t zs_ptrtbl_zt_shift;	  /* bits to index it */
536 
537 	/*
538 	 * Values of the other members of the zap_phys_t
539 	 */
540 	uint64_t zs_block_type;		/* ZBT_HEADER */
541 	uint64_t zs_magic;		/* ZAP_MAGIC */
542 	uint64_t zs_num_leafs;		/* The number of leaf blocks */
543 	uint64_t zs_num_entries;	/* The number of zap entries */
544 	uint64_t zs_salt;		/* salt to stir into hash function */
545 
546 	/*
547 	 * Histograms.  For all histograms, the last index
548 	 * (ZAP_HISTOGRAM_SIZE-1) includes any values which are greater
549 	 * than what can be represented.  For example
550 	 * zs_leafs_with_n5_entries[ZAP_HISTOGRAM_SIZE-1] is the number
551 	 * of leafs with more than 45 entries.
552 	 */
553 
554 	/*
555 	 * zs_leafs_with_n_pointers[n] is the number of leafs with
556 	 * 2^n pointers to it.
557 	 */
558 	uint64_t zs_leafs_with_2n_pointers[ZAP_HISTOGRAM_SIZE];
559 
560 	/*
561 	 * zs_leafs_with_n_entries[n] is the number of leafs with
562 	 * [n*5, (n+1)*5) entries.  In the current implementation, there
563 	 * can be at most 55 entries in any block, but there may be
564 	 * fewer if the name or value is large, or the block is not
565 	 * completely full.
566 	 */
567 	uint64_t zs_blocks_with_n5_entries[ZAP_HISTOGRAM_SIZE];
568 
569 	/*
570 	 * zs_leafs_n_tenths_full[n] is the number of leafs whose
571 	 * fullness is in the range [n/10, (n+1)/10).
572 	 */
573 	uint64_t zs_blocks_n_tenths_full[ZAP_HISTOGRAM_SIZE];
574 
575 	/*
576 	 * zs_entries_using_n_chunks[n] is the number of entries which
577 	 * consume n 24-byte chunks.  (Note, large names/values only use
578 	 * one chunk, but contribute to zs_num_blocks_large.)
579 	 */
580 	uint64_t zs_entries_using_n_chunks[ZAP_HISTOGRAM_SIZE];
581 
582 	/*
583 	 * zs_buckets_with_n_entries[n] is the number of buckets (each
584 	 * leaf has 64 buckets) with n entries.
585 	 * zs_buckets_with_n_entries[1] should be very close to
586 	 * zs_num_entries.
587 	 */
588 	uint64_t zs_buckets_with_n_entries[ZAP_HISTOGRAM_SIZE];
589 } zap_stats_t;
590 
591 /*
592  * Get statistics about a ZAP object.  Note: you need to be aware of the
593  * internal implementation of the ZAP to correctly interpret some of the
594  * statistics.  This interface shouldn't be relied on unless you really
595  * know what you're doing.
596  */
597 int zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs);
598 int zap_get_stats_by_dnode(dnode_t *dn, zap_stats_t *zs);
599 
600 /* ZAP subsystem setup/teardown */
601 void zap_init(void);
602 void zap_fini(void);
603 
604 #ifdef	__cplusplus
605 }
606 #endif
607 
608 #endif	/* _SYS_ZAP_H */
609