1240afd8cSMark Johnston /*-
24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
3240afd8cSMark Johnston *
4240afd8cSMark Johnston * Copyright (c) 2022 The FreeBSD Foundation
5240afd8cSMark Johnston *
6240afd8cSMark Johnston * This software was developed by Mark Johnston under sponsorship from
7240afd8cSMark Johnston * the FreeBSD Foundation.
8240afd8cSMark Johnston *
9240afd8cSMark Johnston * Redistribution and use in source and binary forms, with or without
10240afd8cSMark Johnston * modification, are permitted provided that the following conditions are
11240afd8cSMark Johnston * met:
12240afd8cSMark Johnston * 1. Redistributions of source code must retain the above copyright
13240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer.
14240afd8cSMark Johnston * 2. Redistributions in binary form must reproduce the above copyright
15240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer in
16240afd8cSMark Johnston * the documentation and/or other materials provided with the distribution.
17240afd8cSMark Johnston *
18240afd8cSMark Johnston * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19240afd8cSMark Johnston * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20240afd8cSMark Johnston * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21240afd8cSMark Johnston * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22240afd8cSMark Johnston * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23240afd8cSMark Johnston * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24240afd8cSMark Johnston * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25240afd8cSMark Johnston * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26240afd8cSMark Johnston * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27240afd8cSMark Johnston * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28240afd8cSMark Johnston * SUCH DAMAGE.
29240afd8cSMark Johnston */
30240afd8cSMark Johnston
31240afd8cSMark Johnston #include <sys/types.h>
32240afd8cSMark Johnston #include <sys/endian.h>
33240afd8cSMark Johnston
34240afd8cSMark Johnston #include <assert.h>
35240afd8cSMark Johnston #include <stddef.h>
36c6890399SJessica Clarke #include <stdlib.h>
37240afd8cSMark Johnston #include <string.h>
38240afd8cSMark Johnston
39240afd8cSMark Johnston #include <util.h>
40240afd8cSMark Johnston
41240afd8cSMark Johnston #include "makefs.h"
42240afd8cSMark Johnston #include "zfs.h"
43240afd8cSMark Johnston
44240afd8cSMark Johnston typedef struct zfs_zap_entry {
45240afd8cSMark Johnston char *name; /* entry key, private copy */
46240afd8cSMark Johnston uint64_t hash; /* key hash */
47240afd8cSMark Johnston union {
48240afd8cSMark Johnston uint8_t *valp;
49240afd8cSMark Johnston uint16_t *val16p;
50240afd8cSMark Johnston uint32_t *val32p;
51240afd8cSMark Johnston uint64_t *val64p;
52240afd8cSMark Johnston }; /* entry value, an integer array */
53240afd8cSMark Johnston uint64_t val64; /* embedded value for a common case */
54240afd8cSMark Johnston size_t intsz; /* array element size; 1, 2, 4 or 8 */
55240afd8cSMark Johnston size_t intcnt; /* array size */
56240afd8cSMark Johnston STAILQ_ENTRY(zfs_zap_entry) next;
57240afd8cSMark Johnston } zfs_zap_entry_t;
58240afd8cSMark Johnston
59240afd8cSMark Johnston struct zfs_zap {
60240afd8cSMark Johnston STAILQ_HEAD(, zfs_zap_entry) kvps;
61240afd8cSMark Johnston uint64_t hashsalt; /* key hash input */
62240afd8cSMark Johnston unsigned long kvpcnt; /* number of key-value pairs */
63240afd8cSMark Johnston unsigned long chunks; /* count of chunks needed for fat ZAP */
64240afd8cSMark Johnston bool micro; /* can this be a micro ZAP? */
65240afd8cSMark Johnston
66240afd8cSMark Johnston dnode_phys_t *dnode; /* backpointer */
67240afd8cSMark Johnston zfs_objset_t *os; /* backpointer */
68240afd8cSMark Johnston };
69240afd8cSMark Johnston
70240afd8cSMark Johnston static uint16_t
zap_entry_chunks(zfs_zap_entry_t * ent)71240afd8cSMark Johnston zap_entry_chunks(zfs_zap_entry_t *ent)
72240afd8cSMark Johnston {
73240afd8cSMark Johnston return (1 + howmany(strlen(ent->name) + 1, ZAP_LEAF_ARRAY_BYTES) +
74240afd8cSMark Johnston howmany(ent->intsz * ent->intcnt, ZAP_LEAF_ARRAY_BYTES));
75240afd8cSMark Johnston }
76240afd8cSMark Johnston
77240afd8cSMark Johnston static uint64_t
zap_hash(uint64_t salt,const char * name)78240afd8cSMark Johnston zap_hash(uint64_t salt, const char *name)
79240afd8cSMark Johnston {
80240afd8cSMark Johnston static uint64_t crc64_table[256];
81240afd8cSMark Johnston const uint64_t crc64_poly = 0xC96C5795D7870F42UL;
82240afd8cSMark Johnston const uint8_t *cp;
83240afd8cSMark Johnston uint64_t crc;
84240afd8cSMark Johnston uint8_t c;
85240afd8cSMark Johnston
86240afd8cSMark Johnston assert(salt != 0);
87240afd8cSMark Johnston if (crc64_table[128] == 0) {
88240afd8cSMark Johnston for (int i = 0; i < 256; i++) {
89240afd8cSMark Johnston uint64_t *t;
90240afd8cSMark Johnston
91240afd8cSMark Johnston t = crc64_table + i;
92240afd8cSMark Johnston *t = i;
93240afd8cSMark Johnston for (int j = 8; j > 0; j--)
94240afd8cSMark Johnston *t = (*t >> 1) ^ (-(*t & 1) & crc64_poly);
95240afd8cSMark Johnston }
96240afd8cSMark Johnston }
97240afd8cSMark Johnston assert(crc64_table[128] == crc64_poly);
98240afd8cSMark Johnston
99240afd8cSMark Johnston for (cp = (const uint8_t *)name, crc = salt; (c = *cp) != '\0'; cp++)
100240afd8cSMark Johnston crc = (crc >> 8) ^ crc64_table[(crc ^ c) & 0xFF];
101240afd8cSMark Johnston
102240afd8cSMark Johnston /*
103240afd8cSMark Johnston * Only use 28 bits, since we need 4 bits in the cookie for the
104240afd8cSMark Johnston * collision differentiator. We MUST use the high bits, since
105240afd8cSMark Johnston * those are the ones that we first pay attention to when
106240afd8cSMark Johnston * choosing the bucket.
107240afd8cSMark Johnston */
108240afd8cSMark Johnston crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1);
109240afd8cSMark Johnston
110240afd8cSMark Johnston return (crc);
111240afd8cSMark Johnston }
112240afd8cSMark Johnston
113240afd8cSMark Johnston zfs_zap_t *
zap_alloc(zfs_objset_t * os,dnode_phys_t * dnode)114240afd8cSMark Johnston zap_alloc(zfs_objset_t *os, dnode_phys_t *dnode)
115240afd8cSMark Johnston {
116240afd8cSMark Johnston zfs_zap_t *zap;
117240afd8cSMark Johnston
118240afd8cSMark Johnston zap = ecalloc(1, sizeof(*zap));
119240afd8cSMark Johnston STAILQ_INIT(&zap->kvps);
120240afd8cSMark Johnston zap->hashsalt = ((uint64_t)random() << 32) | random();
121240afd8cSMark Johnston zap->micro = true;
122240afd8cSMark Johnston zap->kvpcnt = 0;
123240afd8cSMark Johnston zap->chunks = 0;
124240afd8cSMark Johnston zap->dnode = dnode;
125240afd8cSMark Johnston zap->os = os;
126240afd8cSMark Johnston return (zap);
127240afd8cSMark Johnston }
128240afd8cSMark Johnston
129240afd8cSMark Johnston void
zap_add(zfs_zap_t * zap,const char * name,size_t intsz,size_t intcnt,const uint8_t * val)130240afd8cSMark Johnston zap_add(zfs_zap_t *zap, const char *name, size_t intsz, size_t intcnt,
131240afd8cSMark Johnston const uint8_t *val)
132240afd8cSMark Johnston {
133240afd8cSMark Johnston zfs_zap_entry_t *ent;
134240afd8cSMark Johnston
135240afd8cSMark Johnston assert(intsz == 1 || intsz == 2 || intsz == 4 || intsz == 8);
136240afd8cSMark Johnston assert(strlen(name) + 1 <= ZAP_MAXNAMELEN);
137240afd8cSMark Johnston assert(intcnt <= ZAP_MAXVALUELEN && intcnt * intsz <= ZAP_MAXVALUELEN);
138240afd8cSMark Johnston
139240afd8cSMark Johnston ent = ecalloc(1, sizeof(*ent));
140240afd8cSMark Johnston ent->name = estrdup(name);
141240afd8cSMark Johnston ent->hash = zap_hash(zap->hashsalt, ent->name);
142240afd8cSMark Johnston ent->intsz = intsz;
143240afd8cSMark Johnston ent->intcnt = intcnt;
144240afd8cSMark Johnston if (intsz == sizeof(uint64_t) && intcnt == 1) {
145240afd8cSMark Johnston /*
146240afd8cSMark Johnston * Micro-optimization to elide a memory allocation in that most
147240afd8cSMark Johnston * common case where this is a directory entry.
148240afd8cSMark Johnston */
149240afd8cSMark Johnston ent->val64p = &ent->val64;
150240afd8cSMark Johnston } else {
151240afd8cSMark Johnston ent->valp = ecalloc(intcnt, intsz);
152240afd8cSMark Johnston }
153240afd8cSMark Johnston memcpy(ent->valp, val, intcnt * intsz);
154240afd8cSMark Johnston zap->kvpcnt++;
155240afd8cSMark Johnston zap->chunks += zap_entry_chunks(ent);
156240afd8cSMark Johnston STAILQ_INSERT_TAIL(&zap->kvps, ent, next);
157240afd8cSMark Johnston
158240afd8cSMark Johnston if (zap->micro && (intcnt != 1 || intsz != sizeof(uint64_t) ||
159240afd8cSMark Johnston strlen(name) + 1 > MZAP_NAME_LEN || zap->kvpcnt > MZAP_ENT_MAX))
160240afd8cSMark Johnston zap->micro = false;
161240afd8cSMark Johnston }
162240afd8cSMark Johnston
163240afd8cSMark Johnston void
zap_add_uint64(zfs_zap_t * zap,const char * name,uint64_t val)164240afd8cSMark Johnston zap_add_uint64(zfs_zap_t *zap, const char *name, uint64_t val)
165240afd8cSMark Johnston {
166240afd8cSMark Johnston zap_add(zap, name, sizeof(uint64_t), 1, (uint8_t *)&val);
167240afd8cSMark Johnston }
168240afd8cSMark Johnston
169240afd8cSMark Johnston void
zap_add_uint64_self(zfs_zap_t * zap,uint64_t val)170*be2f92a9SMark Johnston zap_add_uint64_self(zfs_zap_t *zap, uint64_t val)
171*be2f92a9SMark Johnston {
172*be2f92a9SMark Johnston char name[32];
173*be2f92a9SMark Johnston
174*be2f92a9SMark Johnston snprintf(name, sizeof(name), "%jx", (uintmax_t)val);
175*be2f92a9SMark Johnston zap_add(zap, name, sizeof(uint64_t), 1, (uint8_t *)&val);
176*be2f92a9SMark Johnston }
177*be2f92a9SMark Johnston
178*be2f92a9SMark Johnston void
zap_add_string(zfs_zap_t * zap,const char * name,const char * val)179240afd8cSMark Johnston zap_add_string(zfs_zap_t *zap, const char *name, const char *val)
180240afd8cSMark Johnston {
181240afd8cSMark Johnston zap_add(zap, name, 1, strlen(val) + 1, val);
182240afd8cSMark Johnston }
183240afd8cSMark Johnston
184240afd8cSMark Johnston bool
zap_entry_exists(zfs_zap_t * zap,const char * name)185240afd8cSMark Johnston zap_entry_exists(zfs_zap_t *zap, const char *name)
186240afd8cSMark Johnston {
187240afd8cSMark Johnston zfs_zap_entry_t *ent;
188240afd8cSMark Johnston
189240afd8cSMark Johnston STAILQ_FOREACH(ent, &zap->kvps, next) {
190240afd8cSMark Johnston if (strcmp(ent->name, name) == 0)
191240afd8cSMark Johnston return (true);
192240afd8cSMark Johnston }
193240afd8cSMark Johnston return (false);
194240afd8cSMark Johnston }
195240afd8cSMark Johnston
196240afd8cSMark Johnston static void
zap_micro_write(zfs_opt_t * zfs,zfs_zap_t * zap)197240afd8cSMark Johnston zap_micro_write(zfs_opt_t *zfs, zfs_zap_t *zap)
198240afd8cSMark Johnston {
199240afd8cSMark Johnston dnode_phys_t *dnode;
200240afd8cSMark Johnston zfs_zap_entry_t *ent;
201240afd8cSMark Johnston mzap_phys_t *mzap;
202240afd8cSMark Johnston mzap_ent_phys_t *ment;
203240afd8cSMark Johnston off_t bytes, loc;
204240afd8cSMark Johnston
205240afd8cSMark Johnston memset(zfs->filebuf, 0, sizeof(zfs->filebuf));
206240afd8cSMark Johnston mzap = (mzap_phys_t *)&zfs->filebuf[0];
207240afd8cSMark Johnston mzap->mz_block_type = ZBT_MICRO;
208240afd8cSMark Johnston mzap->mz_salt = zap->hashsalt;
209240afd8cSMark Johnston mzap->mz_normflags = 0;
210240afd8cSMark Johnston
211240afd8cSMark Johnston bytes = sizeof(*mzap) + (zap->kvpcnt - 1) * sizeof(*ment);
212240afd8cSMark Johnston assert(bytes <= (off_t)MZAP_MAX_BLKSZ);
213240afd8cSMark Johnston
214240afd8cSMark Johnston ment = &mzap->mz_chunk[0];
215240afd8cSMark Johnston STAILQ_FOREACH(ent, &zap->kvps, next) {
216240afd8cSMark Johnston memcpy(&ment->mze_value, ent->valp, ent->intsz * ent->intcnt);
217240afd8cSMark Johnston ment->mze_cd = 0; /* XXX-MJ */
218240afd8cSMark Johnston strlcpy(ment->mze_name, ent->name, sizeof(ment->mze_name));
219240afd8cSMark Johnston ment++;
220240afd8cSMark Johnston }
221240afd8cSMark Johnston
222240afd8cSMark Johnston loc = objset_space_alloc(zfs, zap->os, &bytes);
223240afd8cSMark Johnston
224240afd8cSMark Johnston dnode = zap->dnode;
225240afd8cSMark Johnston dnode->dn_maxblkid = 0;
226240afd8cSMark Johnston dnode->dn_datablkszsec = bytes >> MINBLOCKSHIFT;
227240afd8cSMark Johnston
228240afd8cSMark Johnston vdev_pwrite_dnode_data(zfs, dnode, zfs->filebuf, bytes, loc);
229240afd8cSMark Johnston }
230240afd8cSMark Johnston
231240afd8cSMark Johnston /*
232240afd8cSMark Johnston * Write some data to the fat ZAP leaf chunk starting at index "li".
233240afd8cSMark Johnston *
234240afd8cSMark Johnston * Note that individual integers in the value may be split among consecutive
235240afd8cSMark Johnston * leaves.
236240afd8cSMark Johnston */
237240afd8cSMark Johnston static void
zap_fat_write_array_chunk(zap_leaf_t * l,uint16_t li,size_t sz,const uint8_t * val)238240afd8cSMark Johnston zap_fat_write_array_chunk(zap_leaf_t *l, uint16_t li, size_t sz,
239240afd8cSMark Johnston const uint8_t *val)
240240afd8cSMark Johnston {
241240afd8cSMark Johnston struct zap_leaf_array *la;
242240afd8cSMark Johnston
243240afd8cSMark Johnston assert(sz <= ZAP_MAXVALUELEN);
244240afd8cSMark Johnston
245240afd8cSMark Johnston for (uint16_t n, resid = sz; resid > 0; resid -= n, val += n, li++) {
246240afd8cSMark Johnston n = MIN(resid, ZAP_LEAF_ARRAY_BYTES);
247240afd8cSMark Johnston
248240afd8cSMark Johnston la = &ZAP_LEAF_CHUNK(l, li).l_array;
249240afd8cSMark Johnston assert(la->la_type == ZAP_CHUNK_FREE);
250240afd8cSMark Johnston la->la_type = ZAP_CHUNK_ARRAY;
251240afd8cSMark Johnston memcpy(la->la_array, val, n);
252240afd8cSMark Johnston la->la_next = li + 1;
253240afd8cSMark Johnston }
254240afd8cSMark Johnston la->la_next = 0xffff;
255240afd8cSMark Johnston }
256240afd8cSMark Johnston
257240afd8cSMark Johnston /*
258240afd8cSMark Johnston * Find the shortest hash prefix length which lets us distribute keys without
259240afd8cSMark Johnston * overflowing a leaf block. This is not (space) optimal, but is simple, and
260240afd8cSMark Johnston * directories large enough to overflow a single 128KB leaf block are uncommon.
261240afd8cSMark Johnston */
262240afd8cSMark Johnston static unsigned int
zap_fat_write_prefixlen(zfs_zap_t * zap,zap_leaf_t * l)263240afd8cSMark Johnston zap_fat_write_prefixlen(zfs_zap_t *zap, zap_leaf_t *l)
264240afd8cSMark Johnston {
265240afd8cSMark Johnston zfs_zap_entry_t *ent;
266240afd8cSMark Johnston unsigned int prefixlen;
267240afd8cSMark Johnston
268240afd8cSMark Johnston if (zap->chunks <= ZAP_LEAF_NUMCHUNKS(l)) {
269240afd8cSMark Johnston /*
270240afd8cSMark Johnston * All chunks will fit in a single leaf block.
271240afd8cSMark Johnston */
272240afd8cSMark Johnston return (0);
273240afd8cSMark Johnston }
274240afd8cSMark Johnston
275240afd8cSMark Johnston for (prefixlen = 1; prefixlen < (unsigned int)l->l_bs; prefixlen++) {
276240afd8cSMark Johnston uint32_t *leafchunks;
277240afd8cSMark Johnston
278240afd8cSMark Johnston leafchunks = ecalloc(1u << prefixlen, sizeof(*leafchunks));
279240afd8cSMark Johnston STAILQ_FOREACH(ent, &zap->kvps, next) {
280240afd8cSMark Johnston uint64_t li;
281240afd8cSMark Johnston uint16_t chunks;
282240afd8cSMark Johnston
283240afd8cSMark Johnston li = ZAP_HASH_IDX(ent->hash, prefixlen);
284240afd8cSMark Johnston
285240afd8cSMark Johnston chunks = zap_entry_chunks(ent);
286240afd8cSMark Johnston if (ZAP_LEAF_NUMCHUNKS(l) - leafchunks[li] < chunks) {
287240afd8cSMark Johnston /*
288240afd8cSMark Johnston * Not enough space, grow the prefix and retry.
289240afd8cSMark Johnston */
290240afd8cSMark Johnston break;
291240afd8cSMark Johnston }
292240afd8cSMark Johnston leafchunks[li] += chunks;
293240afd8cSMark Johnston }
294240afd8cSMark Johnston free(leafchunks);
295240afd8cSMark Johnston
296240afd8cSMark Johnston if (ent == NULL) {
297240afd8cSMark Johnston /*
298240afd8cSMark Johnston * Everything fits, we're done.
299240afd8cSMark Johnston */
300240afd8cSMark Johnston break;
301240afd8cSMark Johnston }
302240afd8cSMark Johnston }
303240afd8cSMark Johnston
304240afd8cSMark Johnston /*
305240afd8cSMark Johnston * If this fails, then we need to expand the pointer table. For now
306240afd8cSMark Johnston * this situation is unhandled since it is hard to trigger.
307240afd8cSMark Johnston */
308240afd8cSMark Johnston assert(prefixlen < (unsigned int)l->l_bs);
309240afd8cSMark Johnston
310240afd8cSMark Johnston return (prefixlen);
311240afd8cSMark Johnston }
312240afd8cSMark Johnston
313240afd8cSMark Johnston /*
314240afd8cSMark Johnston * Initialize a fat ZAP leaf block.
315240afd8cSMark Johnston */
316240afd8cSMark Johnston static void
zap_fat_write_leaf_init(zap_leaf_t * l,uint64_t prefix,int prefixlen)317240afd8cSMark Johnston zap_fat_write_leaf_init(zap_leaf_t *l, uint64_t prefix, int prefixlen)
318240afd8cSMark Johnston {
319240afd8cSMark Johnston zap_leaf_phys_t *leaf;
320240afd8cSMark Johnston
321240afd8cSMark Johnston leaf = l->l_phys;
322240afd8cSMark Johnston
323240afd8cSMark Johnston leaf->l_hdr.lh_block_type = ZBT_LEAF;
324240afd8cSMark Johnston leaf->l_hdr.lh_magic = ZAP_LEAF_MAGIC;
325240afd8cSMark Johnston leaf->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l);
326240afd8cSMark Johnston leaf->l_hdr.lh_prefix = prefix;
327240afd8cSMark Johnston leaf->l_hdr.lh_prefix_len = prefixlen;
328240afd8cSMark Johnston
329240afd8cSMark Johnston /* Initialize the leaf hash table. */
330240afd8cSMark Johnston assert(leaf->l_hdr.lh_nfree < 0xffff);
331240afd8cSMark Johnston memset(leaf->l_hash, 0xff,
332240afd8cSMark Johnston ZAP_LEAF_HASH_NUMENTRIES(l) * sizeof(*leaf->l_hash));
333240afd8cSMark Johnston
334240afd8cSMark Johnston /* Initialize the leaf chunks. */
335240afd8cSMark Johnston for (uint16_t i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
336240afd8cSMark Johnston struct zap_leaf_free *lf;
337240afd8cSMark Johnston
338240afd8cSMark Johnston lf = &ZAP_LEAF_CHUNK(l, i).l_free;
339240afd8cSMark Johnston lf->lf_type = ZAP_CHUNK_FREE;
340240afd8cSMark Johnston if (i + 1 == ZAP_LEAF_NUMCHUNKS(l))
341240afd8cSMark Johnston lf->lf_next = 0xffff;
342240afd8cSMark Johnston else
343240afd8cSMark Johnston lf->lf_next = i + 1;
344240afd8cSMark Johnston }
345240afd8cSMark Johnston }
346240afd8cSMark Johnston
347240afd8cSMark Johnston static void
zap_fat_write(zfs_opt_t * zfs,zfs_zap_t * zap)348240afd8cSMark Johnston zap_fat_write(zfs_opt_t *zfs, zfs_zap_t *zap)
349240afd8cSMark Johnston {
350240afd8cSMark Johnston struct dnode_cursor *c;
351240afd8cSMark Johnston zap_leaf_t l;
352240afd8cSMark Johnston zap_phys_t *zaphdr;
353240afd8cSMark Johnston struct zap_table_phys *zt;
354240afd8cSMark Johnston zfs_zap_entry_t *ent;
355240afd8cSMark Johnston dnode_phys_t *dnode;
356240afd8cSMark Johnston uint8_t *leafblks;
357240afd8cSMark Johnston uint64_t lblkcnt, *ptrhasht;
358240afd8cSMark Johnston off_t loc, blksz;
359240afd8cSMark Johnston size_t blkshift;
360240afd8cSMark Johnston unsigned int prefixlen;
361240afd8cSMark Johnston int ptrcnt;
362240afd8cSMark Johnston
363240afd8cSMark Johnston /*
364240afd8cSMark Johnston * For simplicity, always use the largest block size. This should be ok
365240afd8cSMark Johnston * since most directories will be micro ZAPs, but it's space inefficient
366240afd8cSMark Johnston * for small ZAPs and might need to be revisited.
367240afd8cSMark Johnston */
368240afd8cSMark Johnston blkshift = MAXBLOCKSHIFT;
369240afd8cSMark Johnston blksz = (off_t)1 << blkshift;
370240afd8cSMark Johnston
371240afd8cSMark Johnston /*
372240afd8cSMark Johnston * Embedded pointer tables give up to 8192 entries. This ought to be
373240afd8cSMark Johnston * enough for anything except massive directories.
374240afd8cSMark Johnston */
375240afd8cSMark Johnston ptrcnt = (blksz / 2) / sizeof(uint64_t);
376240afd8cSMark Johnston
377240afd8cSMark Johnston memset(zfs->filebuf, 0, sizeof(zfs->filebuf));
378240afd8cSMark Johnston zaphdr = (zap_phys_t *)&zfs->filebuf[0];
379240afd8cSMark Johnston zaphdr->zap_block_type = ZBT_HEADER;
380240afd8cSMark Johnston zaphdr->zap_magic = ZAP_MAGIC;
381240afd8cSMark Johnston zaphdr->zap_num_entries = zap->kvpcnt;
382240afd8cSMark Johnston zaphdr->zap_salt = zap->hashsalt;
383240afd8cSMark Johnston
384240afd8cSMark Johnston l.l_bs = blkshift;
385240afd8cSMark Johnston l.l_phys = NULL;
386240afd8cSMark Johnston
387240afd8cSMark Johnston zt = &zaphdr->zap_ptrtbl;
388240afd8cSMark Johnston zt->zt_blk = 0;
389240afd8cSMark Johnston zt->zt_numblks = 0;
390240afd8cSMark Johnston zt->zt_shift = flsll(ptrcnt) - 1;
391240afd8cSMark Johnston zt->zt_nextblk = 0;
392240afd8cSMark Johnston zt->zt_blks_copied = 0;
393240afd8cSMark Johnston
394240afd8cSMark Johnston /*
395240afd8cSMark Johnston * How many leaf blocks do we need? Initialize them and update the
396240afd8cSMark Johnston * header.
397240afd8cSMark Johnston */
398240afd8cSMark Johnston prefixlen = zap_fat_write_prefixlen(zap, &l);
399cba2fa7cSMark Johnston lblkcnt = (uint64_t)1 << prefixlen;
400240afd8cSMark Johnston leafblks = ecalloc(lblkcnt, blksz);
401240afd8cSMark Johnston for (unsigned int li = 0; li < lblkcnt; li++) {
402240afd8cSMark Johnston l.l_phys = (zap_leaf_phys_t *)(leafblks + li * blksz);
403240afd8cSMark Johnston zap_fat_write_leaf_init(&l, li, prefixlen);
404240afd8cSMark Johnston }
405240afd8cSMark Johnston zaphdr->zap_num_leafs = lblkcnt;
406240afd8cSMark Johnston zaphdr->zap_freeblk = lblkcnt + 1;
407240afd8cSMark Johnston
408240afd8cSMark Johnston /*
409240afd8cSMark Johnston * For each entry, figure out which leaf block it belongs to based on
410240afd8cSMark Johnston * the upper bits of its hash, allocate chunks from that leaf, and fill
411240afd8cSMark Johnston * them out.
412240afd8cSMark Johnston */
413240afd8cSMark Johnston ptrhasht = (uint64_t *)(&zfs->filebuf[0] + blksz / 2);
414240afd8cSMark Johnston STAILQ_FOREACH(ent, &zap->kvps, next) {
415240afd8cSMark Johnston struct zap_leaf_entry *le;
416240afd8cSMark Johnston uint16_t *lptr;
417240afd8cSMark Johnston uint64_t hi, li;
418240afd8cSMark Johnston uint16_t namelen, nchunks, nnamechunks, nvalchunks;
419240afd8cSMark Johnston
420240afd8cSMark Johnston hi = ZAP_HASH_IDX(ent->hash, zt->zt_shift);
421240afd8cSMark Johnston li = ZAP_HASH_IDX(ent->hash, prefixlen);
422240afd8cSMark Johnston assert(ptrhasht[hi] == 0 || ptrhasht[hi] == li + 1);
423240afd8cSMark Johnston ptrhasht[hi] = li + 1;
424240afd8cSMark Johnston l.l_phys = (zap_leaf_phys_t *)(leafblks + li * blksz);
425240afd8cSMark Johnston
426240afd8cSMark Johnston namelen = strlen(ent->name) + 1;
427240afd8cSMark Johnston
428240afd8cSMark Johnston /*
429240afd8cSMark Johnston * How many leaf chunks do we need for this entry?
430240afd8cSMark Johnston */
431240afd8cSMark Johnston nnamechunks = howmany(namelen, ZAP_LEAF_ARRAY_BYTES);
432240afd8cSMark Johnston nvalchunks = howmany(ent->intcnt,
433240afd8cSMark Johnston ZAP_LEAF_ARRAY_BYTES / ent->intsz);
434240afd8cSMark Johnston nchunks = 1 + nnamechunks + nvalchunks;
435240afd8cSMark Johnston
436240afd8cSMark Johnston /*
437240afd8cSMark Johnston * Allocate a run of free leaf chunks for this entry,
438240afd8cSMark Johnston * potentially extending a hash chain.
439240afd8cSMark Johnston */
440240afd8cSMark Johnston assert(l.l_phys->l_hdr.lh_nfree >= nchunks);
441240afd8cSMark Johnston l.l_phys->l_hdr.lh_nfree -= nchunks;
442240afd8cSMark Johnston l.l_phys->l_hdr.lh_nentries++;
443240afd8cSMark Johnston lptr = ZAP_LEAF_HASH_ENTPTR(&l, ent->hash);
444240afd8cSMark Johnston while (*lptr != 0xffff) {
445240afd8cSMark Johnston assert(*lptr < ZAP_LEAF_NUMCHUNKS(&l));
446240afd8cSMark Johnston le = ZAP_LEAF_ENTRY(&l, *lptr);
447240afd8cSMark Johnston assert(le->le_type == ZAP_CHUNK_ENTRY);
448240afd8cSMark Johnston le->le_cd++;
449240afd8cSMark Johnston lptr = &le->le_next;
450240afd8cSMark Johnston }
451240afd8cSMark Johnston *lptr = l.l_phys->l_hdr.lh_freelist;
452240afd8cSMark Johnston l.l_phys->l_hdr.lh_freelist += nchunks;
453240afd8cSMark Johnston assert(l.l_phys->l_hdr.lh_freelist <=
454240afd8cSMark Johnston ZAP_LEAF_NUMCHUNKS(&l));
455240afd8cSMark Johnston if (l.l_phys->l_hdr.lh_freelist ==
456240afd8cSMark Johnston ZAP_LEAF_NUMCHUNKS(&l))
457240afd8cSMark Johnston l.l_phys->l_hdr.lh_freelist = 0xffff;
458240afd8cSMark Johnston
459240afd8cSMark Johnston /*
460240afd8cSMark Johnston * Integer values must be stored in big-endian format.
461240afd8cSMark Johnston */
462240afd8cSMark Johnston switch (ent->intsz) {
463240afd8cSMark Johnston case 1:
464240afd8cSMark Johnston break;
465240afd8cSMark Johnston case 2:
466240afd8cSMark Johnston for (uint16_t *v = ent->val16p;
467240afd8cSMark Johnston v - ent->val16p < (ptrdiff_t)ent->intcnt;
468240afd8cSMark Johnston v++)
469240afd8cSMark Johnston *v = htobe16(*v);
470240afd8cSMark Johnston break;
471240afd8cSMark Johnston case 4:
472240afd8cSMark Johnston for (uint32_t *v = ent->val32p;
473240afd8cSMark Johnston v - ent->val32p < (ptrdiff_t)ent->intcnt;
474240afd8cSMark Johnston v++)
475240afd8cSMark Johnston *v = htobe32(*v);
476240afd8cSMark Johnston break;
477240afd8cSMark Johnston case 8:
478240afd8cSMark Johnston for (uint64_t *v = ent->val64p;
479240afd8cSMark Johnston v - ent->val64p < (ptrdiff_t)ent->intcnt;
480240afd8cSMark Johnston v++)
481240afd8cSMark Johnston *v = htobe64(*v);
482240afd8cSMark Johnston break;
483240afd8cSMark Johnston default:
484240afd8cSMark Johnston assert(0);
485240afd8cSMark Johnston }
486240afd8cSMark Johnston
487240afd8cSMark Johnston /*
488240afd8cSMark Johnston * Finally, write out the leaf chunks for this entry.
489240afd8cSMark Johnston */
490240afd8cSMark Johnston le = ZAP_LEAF_ENTRY(&l, *lptr);
491240afd8cSMark Johnston assert(le->le_type == ZAP_CHUNK_FREE);
492240afd8cSMark Johnston le->le_type = ZAP_CHUNK_ENTRY;
493240afd8cSMark Johnston le->le_next = 0xffff;
494240afd8cSMark Johnston le->le_name_chunk = *lptr + 1;
495240afd8cSMark Johnston le->le_name_numints = namelen;
496240afd8cSMark Johnston le->le_value_chunk = *lptr + 1 + nnamechunks;
497240afd8cSMark Johnston le->le_value_intlen = ent->intsz;
498240afd8cSMark Johnston le->le_value_numints = ent->intcnt;
499240afd8cSMark Johnston le->le_hash = ent->hash;
500240afd8cSMark Johnston zap_fat_write_array_chunk(&l, *lptr + 1, namelen, ent->name);
501240afd8cSMark Johnston zap_fat_write_array_chunk(&l, *lptr + 1 + nnamechunks,
502240afd8cSMark Johnston ent->intcnt * ent->intsz, ent->valp);
503240afd8cSMark Johnston }
504240afd8cSMark Johnston
505240afd8cSMark Johnston /*
506240afd8cSMark Johnston * Initialize unused slots of the pointer table.
507240afd8cSMark Johnston */
508240afd8cSMark Johnston for (int i = 0; i < ptrcnt; i++)
509240afd8cSMark Johnston if (ptrhasht[i] == 0)
510240afd8cSMark Johnston ptrhasht[i] = (i >> (zt->zt_shift - prefixlen)) + 1;
511240afd8cSMark Johnston
512240afd8cSMark Johnston /*
513240afd8cSMark Johnston * Write the whole thing to disk.
514240afd8cSMark Johnston */
515240afd8cSMark Johnston dnode = zap->dnode;
516240afd8cSMark Johnston dnode->dn_datablkszsec = blksz >> MINBLOCKSHIFT;
517240afd8cSMark Johnston dnode->dn_maxblkid = lblkcnt + 1;
518240afd8cSMark Johnston
519240afd8cSMark Johnston c = dnode_cursor_init(zfs, zap->os, zap->dnode,
520240afd8cSMark Johnston (lblkcnt + 1) * blksz, blksz);
521240afd8cSMark Johnston
522240afd8cSMark Johnston loc = objset_space_alloc(zfs, zap->os, &blksz);
523240afd8cSMark Johnston vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, zfs->filebuf, blksz, loc,
524240afd8cSMark Johnston dnode_cursor_next(zfs, c, 0));
525240afd8cSMark Johnston
526240afd8cSMark Johnston for (uint64_t i = 0; i < lblkcnt; i++) {
527240afd8cSMark Johnston loc = objset_space_alloc(zfs, zap->os, &blksz);
528240afd8cSMark Johnston vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, leafblks + i * blksz,
529240afd8cSMark Johnston blksz, loc, dnode_cursor_next(zfs, c, (i + 1) * blksz));
530240afd8cSMark Johnston }
531240afd8cSMark Johnston
532240afd8cSMark Johnston dnode_cursor_finish(zfs, c);
533240afd8cSMark Johnston
534240afd8cSMark Johnston free(leafblks);
535240afd8cSMark Johnston }
536240afd8cSMark Johnston
537240afd8cSMark Johnston void
zap_write(zfs_opt_t * zfs,zfs_zap_t * zap)538240afd8cSMark Johnston zap_write(zfs_opt_t *zfs, zfs_zap_t *zap)
539240afd8cSMark Johnston {
540240afd8cSMark Johnston zfs_zap_entry_t *ent;
541240afd8cSMark Johnston
542240afd8cSMark Johnston if (zap->micro) {
543240afd8cSMark Johnston zap_micro_write(zfs, zap);
544240afd8cSMark Johnston } else {
545240afd8cSMark Johnston assert(!STAILQ_EMPTY(&zap->kvps));
546240afd8cSMark Johnston assert(zap->kvpcnt > 0);
547240afd8cSMark Johnston zap_fat_write(zfs, zap);
548240afd8cSMark Johnston }
549240afd8cSMark Johnston
550240afd8cSMark Johnston while ((ent = STAILQ_FIRST(&zap->kvps)) != NULL) {
551240afd8cSMark Johnston STAILQ_REMOVE_HEAD(&zap->kvps, next);
552240afd8cSMark Johnston if (ent->val64p != &ent->val64)
553240afd8cSMark Johnston free(ent->valp);
554240afd8cSMark Johnston free(ent->name);
555240afd8cSMark Johnston free(ent);
556240afd8cSMark Johnston }
557240afd8cSMark Johnston free(zap);
558240afd8cSMark Johnston }
559