xref: /freebsd/sys/contrib/openzfs/include/sys/zap_impl.h (revision d9497217456002b0ddad3cd319570d0b098daa29)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
26  * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
27  * Copyright 2017 Nexenta Systems, Inc.
28  * Copyright (c) 2024, Klara, Inc.
29  * Copyright (c) 2026, TrueNAS.
30  */
31 
32 #ifndef	_SYS_ZAP_IMPL_H
33 #define	_SYS_ZAP_IMPL_H
34 
35 #include <sys/zap.h>
36 #include <sys/zfs_context.h>
37 
38 #ifdef	__cplusplus
39 extern "C" {
40 #endif
41 
42 extern int fzap_default_block_shift;
43 
44 #define	ZAP_MAGIC 0x2F52AB2ABULL
45 
46 #define	FZAP_BLOCK_SHIFT(zap)	((zap)->zap_f.zap_block_shift)
47 
48 #define	MZAP_ENT_LEN		64
49 #define	MZAP_NAME_LEN		(MZAP_ENT_LEN - 8 - 4 - 2)
50 
51 #define	ZAP_NEED_CD		(-1U)
52 
53 typedef struct mzap_ent_phys {
54 	uint64_t mze_value;
55 	uint32_t mze_cd;
56 	uint16_t mze_pad;	/* in case we want to chain them someday */
57 	char mze_name[MZAP_NAME_LEN];
58 } mzap_ent_phys_t;
59 
60 typedef struct mzap_phys {
61 	uint64_t mz_block_type;	/* ZBT_MICRO */
62 	uint64_t mz_salt;
63 	uint64_t mz_normflags;
64 	uint64_t mz_pad[5];
65 
66 	/* actually variable size depending on block size */
67 	mzap_ent_phys_t mz_chunk[];
68 } mzap_phys_t;
69 
70 typedef struct mzap_ent {
71 	uint32_t mze_hash;
72 	uint16_t mze_cd; /* copy from mze_phys->mze_cd */
73 	uint16_t mze_chunkid;
74 } mzap_ent_t;
75 
76 #define	MZE_PHYS(zap, mze) \
77 	(&zap_m_phys(zap)->mz_chunk[(mze)->mze_chunkid])
78 
79 /*
80  * The (fat) zap is stored in one object. It is an array of
81  * 1<<FZAP_BLOCK_SHIFT byte blocks. The layout looks like one of:
82  *
83  * ptrtbl fits in first block:
84  * 	[zap_phys_t zap_ptrtbl_shift < 6] [zap_leaf_t] ...
85  *
86  * ptrtbl too big for first block:
87  * 	[zap_phys_t zap_ptrtbl_shift >= 6] [zap_leaf_t] [ptrtbl] ...
88  *
89  */
90 
91 struct dmu_buf;
92 struct zap_leaf;
93 
94 #define	ZBT_LEAF		((1ULL << 63) + 0)
95 #define	ZBT_HEADER		((1ULL << 63) + 1)
96 #define	ZBT_MICRO		((1ULL << 63) + 3)
97 /* any other values are ptrtbl blocks */
98 
99 /*
100  * the embedded pointer table takes up half a block:
101  * block size / entry size (2^3) / 2
102  */
103 #define	ZAP_EMBEDDED_PTRTBL_SHIFT(zap) (FZAP_BLOCK_SHIFT(zap) - 3 - 1)
104 
105 /*
106  * The embedded pointer table starts half-way through the block.  Since
107  * the pointer table itself is half the block, it starts at (64-bit)
108  * word number (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)).
109  */
110 #define	ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) \
111 	((uint64_t *)zap_f_phys(zap)) \
112 	[(idx) + (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap))]
113 
114 /*
115  * TAKE NOTE:
116  * If zap_phys_t is modified, zap_byteswap() must be modified.
117  */
118 typedef struct zap_phys {
119 	uint64_t zap_block_type;	/* ZBT_HEADER */
120 	uint64_t zap_magic;		/* ZAP_MAGIC */
121 
122 	struct zap_table_phys {
123 		uint64_t zt_blk;	/* starting block number */
124 		uint64_t zt_numblks;	/* number of blocks */
125 		uint64_t zt_shift;	/* bits to index it */
126 		uint64_t zt_nextblk;	/* next (larger) copy start block */
127 		uint64_t zt_blks_copied; /* number source blocks copied */
128 	} zap_ptrtbl;
129 
130 	uint64_t zap_freeblk;		/* the next free block */
131 	uint64_t zap_num_leafs;		/* number of leafs */
132 	uint64_t zap_num_entries;	/* number of entries */
133 	uint64_t zap_salt;		/* salt to stir into hash function */
134 	uint64_t zap_normflags;		/* flags for u8_textprep_str() */
135 	uint64_t zap_flags;		/* zap_flags_t */
136 	/*
137 	 * This structure is followed by padding, and then the embedded
138 	 * pointer table.  The embedded pointer table takes up second
139 	 * half of the block.  It is accessed using the
140 	 * ZAP_EMBEDDED_PTRTBL_ENT() macro.
141 	 */
142 } zap_phys_t;
143 
144 typedef struct zap_table_phys zap_table_phys_t;
145 
146 typedef struct zap {
147 	dmu_buf_user_t zap_dbu;
148 	objset_t *zap_objset;
149 	uint64_t zap_object;
150 	dnode_t *zap_dnode;
151 	struct dmu_buf *zap_dbuf;
152 	krwlock_t zap_rwlock;
153 	boolean_t zap_ismicro;
154 	int zap_normflags;
155 	uint64_t zap_salt;
156 	union {
157 		struct {
158 			/*
159 			 * zap_num_entries_mtx protects
160 			 * zap_num_entries
161 			 */
162 			kmutex_t zap_num_entries_mtx;
163 			int zap_block_shift;
164 		} zap_fat;
165 		struct {
166 			int16_t zap_num_entries;
167 			int16_t zap_num_chunks;
168 			int16_t zap_alloc_next;
169 			zfs_btree_t zap_tree;
170 		} zap_micro;
171 	} zap_u;
172 } zap_t;
173 
174 #define	zap_f	zap_u.zap_fat
175 #define	zap_m	zap_u.zap_micro
176 
177 static inline zap_phys_t *
zap_f_phys(zap_t * zap)178 zap_f_phys(zap_t *zap)
179 {
180 	return (zap->zap_dbuf->db_data);
181 }
182 
183 static inline mzap_phys_t *
zap_m_phys(zap_t * zap)184 zap_m_phys(zap_t *zap)
185 {
186 	return (zap->zap_dbuf->db_data);
187 }
188 
189 /*
190  * zap_name_t carries the original key and whatever we've derived from it
191  * (normalised form, hash, etc) as we work through completing the operation.
192  */
193 typedef struct zap_name {
194 	zap_t *zn_zap;
195 	int zn_key_intlen;
196 	const void *zn_key_orig;
197 	int zn_key_orig_numints;
198 	const void *zn_key_norm;
199 	int zn_key_norm_numints;
200 	uint64_t zn_hash;
201 	matchtype_t zn_matchtype;
202 	int zn_normflags;
203 	int zn_normbuf_len;
204 	char zn_normbuf[];
205 } zap_name_t;
206 
207 /*
208  * Allocate a zap_name_t. The longname flag ensures there is enough room to
209  * hold a long filename when the 'longname' pool feature is active.
210  */
211 zap_name_t *zap_name_alloc(zap_t *zap, boolean_t longname);
212 
213 /*
214  * Allocate a zap_name_t for the given key. zap_name_init_str() will be
215  * called to normalise the key and initialise the struct.
216  */
217 zap_name_t *zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt);
218 
219 /*
220  * Allocate a zap_name_t for a uint64 array key.
221  */
222 zap_name_t *zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints);
223 
224 /*
225  * Free a zap_name_t.
226  */
227 void zap_name_free(zap_name_t *zn);
228 
229 /*
230  * Initialise an existing zap_name_t with the normalised form of the key,
231  * computed according to the given matchtype.
232  */
233 int zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt);
234 
235 /*
236  * Compare 'matchname' with the name represented by the zap_name_t, applying
237  * the same normalisation method first. Returns true if the normalised forms
238  * match, false otherwise.
239  */
240 boolean_t zap_match(zap_name_t *zn, const char *matchname);
241 
242 /*
243  * Compute and return the 64-bit hash for the name, according to the name
244  * type and hash flags.
245  */
246 uint64_t zap_hash(zap_name_t *zn);
247 
248 /*
249  * Return a zap_t for the given on-disk object, locked and ready for use.
250  * The zap_t will be allocated and loaded from disk if its not already loaded.
251  */
252 int zap_lock(objset_t *os, uint64_t obj, dmu_tx_t *tx,
253     krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,
254     zap_t **zapp);
255 int zap_lock_by_dnode(dnode_t *dn, dmu_tx_t *tx,
256     krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,
257     zap_t **zapp);
258 
259 /* Unlock and release a zap_t. */
260 void zap_unlock(zap_t *zap, const void *tag);
261 
262 /*
263  * Try to upgrade a zap lock from READER to WRITER. If the upgrade is not
264  * possible without blocking, returns 0. If the upgrade happened, returns 1.
265  */
266 int zap_lock_try_upgrade(zap_t *zap, dmu_tx_t *tx);
267 
268 /*
269  * Upgrade a zap lock from READER to WRITER. If it can't be upgraded
270  * immediately it will block.
271  */
272 void zap_lock_upgrade(zap_t *zap, dmu_tx_t *tx);
273 
274 /* zap_t release function for when associated dbuf is evicted. */
275 void zap_evict_sync(void *dbu);
276 
277 /* Misc internal state & config. */
278 int zap_hashbits(zap_t *zap);
279 uint32_t zap_maxcd(zap_t *zap);
280 uint64_t zap_getflags(zap_t *zap);
281 
282 /* Microzap implementation. */
283 zap_t *mzap_open(dmu_buf_t *db);
284 int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags);
285 mzap_ent_t *mze_find(zap_name_t *zn, zfs_btree_index_t *idx);
286 boolean_t mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash);
287 void mze_destroy(zap_t *zap);
288 boolean_t mzap_normalization_conflict(zap_t *zap, zap_name_t *zn,
289     mzap_ent_t *mze, zfs_btree_index_t *idx);
290 void mzap_addent(zap_name_t *zn, uint64_t value);
291 void mzap_byteswap(mzap_phys_t *buf, size_t size);
292 uint64_t zap_get_micro_max_size(spa_t *spa);
293 
294 /* Fatzap implementation. */
295 void fzap_byteswap(void *buf, size_t size);
296 int fzap_count(zap_t *zap, uint64_t *count);
297 int fzap_lookup(zap_name_t *zn,
298     uint64_t integer_size, uint64_t num_integers, void *buf,
299     char *realname, int rn_len, boolean_t *normalization_conflictp,
300     uint64_t *actual_num_integers);
301 void fzap_prefetch(zap_name_t *zn);
302 int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
303     const void *val, dmu_tx_t *tx);
304 int fzap_update(zap_name_t *zn, int integer_size, uint64_t num_integers,
305     const void *val, dmu_tx_t *tx);
306 int fzap_length(zap_name_t *zn,
307     uint64_t *integer_size, uint64_t *num_integers);
308 int fzap_remove(zap_name_t *zn, dmu_tx_t *tx);
309 int fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za);
310 void fzap_get_stats(zap_t *zap, zap_stats_t *zs);
311 void zap_put_leaf(struct zap_leaf *l);
312 int fzap_add_cd(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
313     const void *val, uint32_t cd, dmu_tx_t *tx);
314 void fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags);
315 
316 #ifdef	__cplusplus
317 }
318 #endif
319 
320 #endif /* _SYS_ZAP_IMPL_H */
321