1240afd8cSMark Johnston /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 3240afd8cSMark Johnston * 4240afd8cSMark Johnston * Copyright (c) 2022 The FreeBSD Foundation 5240afd8cSMark Johnston * 6240afd8cSMark Johnston * This software was developed by Mark Johnston under sponsorship from 7240afd8cSMark Johnston * the FreeBSD Foundation. 8240afd8cSMark Johnston * 9240afd8cSMark Johnston * Redistribution and use in source and binary forms, with or without 10240afd8cSMark Johnston * modification, are permitted provided that the following conditions are 11240afd8cSMark Johnston * met: 12240afd8cSMark Johnston * 1. Redistributions of source code must retain the above copyright 13240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer. 14240afd8cSMark Johnston * 2. Redistributions in binary form must reproduce the above copyright 15240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer in 16240afd8cSMark Johnston * the documentation and/or other materials provided with the distribution. 17240afd8cSMark Johnston * 18240afd8cSMark Johnston * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19240afd8cSMark Johnston * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20240afd8cSMark Johnston * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21240afd8cSMark Johnston * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22240afd8cSMark Johnston * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23240afd8cSMark Johnston * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24240afd8cSMark Johnston * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25240afd8cSMark Johnston * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26240afd8cSMark Johnston * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27240afd8cSMark Johnston * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28240afd8cSMark Johnston * SUCH DAMAGE. 29240afd8cSMark Johnston */ 30240afd8cSMark Johnston 31240afd8cSMark Johnston #include <sys/types.h> 32240afd8cSMark Johnston #include <sys/endian.h> 33240afd8cSMark Johnston 34240afd8cSMark Johnston #include <assert.h> 35240afd8cSMark Johnston #include <stddef.h> 36c6890399SJessica Clarke #include <stdlib.h> 37240afd8cSMark Johnston #include <string.h> 38240afd8cSMark Johnston 39240afd8cSMark Johnston #include <util.h> 40240afd8cSMark Johnston 41240afd8cSMark Johnston #include "makefs.h" 42240afd8cSMark Johnston #include "zfs.h" 43240afd8cSMark Johnston 44240afd8cSMark Johnston typedef struct zfs_zap_entry { 45240afd8cSMark Johnston char *name; /* entry key, private copy */ 46240afd8cSMark Johnston uint64_t hash; /* key hash */ 47240afd8cSMark Johnston union { 48240afd8cSMark Johnston uint8_t *valp; 49240afd8cSMark Johnston uint16_t *val16p; 50240afd8cSMark Johnston uint32_t *val32p; 51240afd8cSMark Johnston uint64_t *val64p; 52240afd8cSMark Johnston }; /* entry value, an integer array */ 53240afd8cSMark Johnston uint64_t val64; /* embedded value for a common case */ 54240afd8cSMark Johnston size_t intsz; /* array element size; 1, 2, 4 or 8 */ 55240afd8cSMark Johnston size_t intcnt; /* array size */ 56240afd8cSMark Johnston STAILQ_ENTRY(zfs_zap_entry) next; 57240afd8cSMark Johnston } zfs_zap_entry_t; 58240afd8cSMark Johnston 59240afd8cSMark Johnston struct zfs_zap { 60240afd8cSMark Johnston STAILQ_HEAD(, zfs_zap_entry) kvps; 61240afd8cSMark Johnston uint64_t hashsalt; /* key hash input */ 62240afd8cSMark Johnston unsigned long kvpcnt; /* number of key-value pairs */ 63240afd8cSMark Johnston unsigned long chunks; /* count of chunks needed for fat ZAP */ 64240afd8cSMark Johnston bool micro; /* can this be a micro ZAP? */ 65240afd8cSMark Johnston 66240afd8cSMark Johnston dnode_phys_t *dnode; /* backpointer */ 67240afd8cSMark Johnston zfs_objset_t *os; /* backpointer */ 68240afd8cSMark Johnston }; 69240afd8cSMark Johnston 70240afd8cSMark Johnston static uint16_t 71240afd8cSMark Johnston zap_entry_chunks(zfs_zap_entry_t *ent) 72240afd8cSMark Johnston { 73240afd8cSMark Johnston return (1 + howmany(strlen(ent->name) + 1, ZAP_LEAF_ARRAY_BYTES) + 74240afd8cSMark Johnston howmany(ent->intsz * ent->intcnt, ZAP_LEAF_ARRAY_BYTES)); 75240afd8cSMark Johnston } 76240afd8cSMark Johnston 77240afd8cSMark Johnston static uint64_t 78240afd8cSMark Johnston zap_hash(uint64_t salt, const char *name) 79240afd8cSMark Johnston { 80240afd8cSMark Johnston static uint64_t crc64_table[256]; 81240afd8cSMark Johnston const uint64_t crc64_poly = 0xC96C5795D7870F42UL; 82240afd8cSMark Johnston const uint8_t *cp; 83240afd8cSMark Johnston uint64_t crc; 84240afd8cSMark Johnston uint8_t c; 85240afd8cSMark Johnston 86240afd8cSMark Johnston assert(salt != 0); 87240afd8cSMark Johnston if (crc64_table[128] == 0) { 88240afd8cSMark Johnston for (int i = 0; i < 256; i++) { 89240afd8cSMark Johnston uint64_t *t; 90240afd8cSMark Johnston 91240afd8cSMark Johnston t = crc64_table + i; 92240afd8cSMark Johnston *t = i; 93240afd8cSMark Johnston for (int j = 8; j > 0; j--) 94240afd8cSMark Johnston *t = (*t >> 1) ^ (-(*t & 1) & crc64_poly); 95240afd8cSMark Johnston } 96240afd8cSMark Johnston } 97240afd8cSMark Johnston assert(crc64_table[128] == crc64_poly); 98240afd8cSMark Johnston 99240afd8cSMark Johnston for (cp = (const uint8_t *)name, crc = salt; (c = *cp) != '\0'; cp++) 100240afd8cSMark Johnston crc = (crc >> 8) ^ crc64_table[(crc ^ c) & 0xFF]; 101240afd8cSMark Johnston 102240afd8cSMark Johnston /* 103240afd8cSMark Johnston * Only use 28 bits, since we need 4 bits in the cookie for the 104240afd8cSMark Johnston * collision differentiator. We MUST use the high bits, since 105240afd8cSMark Johnston * those are the ones that we first pay attention to when 106240afd8cSMark Johnston * choosing the bucket. 107240afd8cSMark Johnston */ 108240afd8cSMark Johnston crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1); 109240afd8cSMark Johnston 110240afd8cSMark Johnston return (crc); 111240afd8cSMark Johnston } 112240afd8cSMark Johnston 113240afd8cSMark Johnston zfs_zap_t * 114240afd8cSMark Johnston zap_alloc(zfs_objset_t *os, dnode_phys_t *dnode) 115240afd8cSMark Johnston { 116240afd8cSMark Johnston zfs_zap_t *zap; 117240afd8cSMark Johnston 118240afd8cSMark Johnston zap = ecalloc(1, sizeof(*zap)); 119240afd8cSMark Johnston STAILQ_INIT(&zap->kvps); 120240afd8cSMark Johnston zap->hashsalt = ((uint64_t)random() << 32) | random(); 121240afd8cSMark Johnston zap->micro = true; 122240afd8cSMark Johnston zap->kvpcnt = 0; 123240afd8cSMark Johnston zap->chunks = 0; 124240afd8cSMark Johnston zap->dnode = dnode; 125240afd8cSMark Johnston zap->os = os; 126240afd8cSMark Johnston return (zap); 127240afd8cSMark Johnston } 128240afd8cSMark Johnston 129240afd8cSMark Johnston void 130240afd8cSMark Johnston zap_add(zfs_zap_t *zap, const char *name, size_t intsz, size_t intcnt, 131240afd8cSMark Johnston const uint8_t *val) 132240afd8cSMark Johnston { 133240afd8cSMark Johnston zfs_zap_entry_t *ent; 134240afd8cSMark Johnston 135240afd8cSMark Johnston assert(intsz == 1 || intsz == 2 || intsz == 4 || intsz == 8); 136240afd8cSMark Johnston assert(strlen(name) + 1 <= ZAP_MAXNAMELEN); 137240afd8cSMark Johnston assert(intcnt <= ZAP_MAXVALUELEN && intcnt * intsz <= ZAP_MAXVALUELEN); 138240afd8cSMark Johnston 139240afd8cSMark Johnston ent = ecalloc(1, sizeof(*ent)); 140240afd8cSMark Johnston ent->name = estrdup(name); 141240afd8cSMark Johnston ent->hash = zap_hash(zap->hashsalt, ent->name); 142240afd8cSMark Johnston ent->intsz = intsz; 143240afd8cSMark Johnston ent->intcnt = intcnt; 144240afd8cSMark Johnston if (intsz == sizeof(uint64_t) && intcnt == 1) { 145240afd8cSMark Johnston /* 146240afd8cSMark Johnston * Micro-optimization to elide a memory allocation in that most 147240afd8cSMark Johnston * common case where this is a directory entry. 148240afd8cSMark Johnston */ 149240afd8cSMark Johnston ent->val64p = &ent->val64; 150240afd8cSMark Johnston } else { 151240afd8cSMark Johnston ent->valp = ecalloc(intcnt, intsz); 152240afd8cSMark Johnston } 153240afd8cSMark Johnston memcpy(ent->valp, val, intcnt * intsz); 154240afd8cSMark Johnston zap->kvpcnt++; 155240afd8cSMark Johnston zap->chunks += zap_entry_chunks(ent); 156240afd8cSMark Johnston STAILQ_INSERT_TAIL(&zap->kvps, ent, next); 157240afd8cSMark Johnston 158240afd8cSMark Johnston if (zap->micro && (intcnt != 1 || intsz != sizeof(uint64_t) || 159240afd8cSMark Johnston strlen(name) + 1 > MZAP_NAME_LEN || zap->kvpcnt > MZAP_ENT_MAX)) 160240afd8cSMark Johnston zap->micro = false; 161240afd8cSMark Johnston } 162240afd8cSMark Johnston 163240afd8cSMark Johnston void 164240afd8cSMark Johnston zap_add_uint64(zfs_zap_t *zap, const char *name, uint64_t val) 165240afd8cSMark Johnston { 166240afd8cSMark Johnston zap_add(zap, name, sizeof(uint64_t), 1, (uint8_t *)&val); 167240afd8cSMark Johnston } 168240afd8cSMark Johnston 169240afd8cSMark Johnston void 170*be2f92a9SMark Johnston zap_add_uint64_self(zfs_zap_t *zap, uint64_t val) 171*be2f92a9SMark Johnston { 172*be2f92a9SMark Johnston char name[32]; 173*be2f92a9SMark Johnston 174*be2f92a9SMark Johnston snprintf(name, sizeof(name), "%jx", (uintmax_t)val); 175*be2f92a9SMark Johnston zap_add(zap, name, sizeof(uint64_t), 1, (uint8_t *)&val); 176*be2f92a9SMark Johnston } 177*be2f92a9SMark Johnston 178*be2f92a9SMark Johnston void 179240afd8cSMark Johnston zap_add_string(zfs_zap_t *zap, const char *name, const char *val) 180240afd8cSMark Johnston { 181240afd8cSMark Johnston zap_add(zap, name, 1, strlen(val) + 1, val); 182240afd8cSMark Johnston } 183240afd8cSMark Johnston 184240afd8cSMark Johnston bool 185240afd8cSMark Johnston zap_entry_exists(zfs_zap_t *zap, const char *name) 186240afd8cSMark Johnston { 187240afd8cSMark Johnston zfs_zap_entry_t *ent; 188240afd8cSMark Johnston 189240afd8cSMark Johnston STAILQ_FOREACH(ent, &zap->kvps, next) { 190240afd8cSMark Johnston if (strcmp(ent->name, name) == 0) 191240afd8cSMark Johnston return (true); 192240afd8cSMark Johnston } 193240afd8cSMark Johnston return (false); 194240afd8cSMark Johnston } 195240afd8cSMark Johnston 196240afd8cSMark Johnston static void 197240afd8cSMark Johnston zap_micro_write(zfs_opt_t *zfs, zfs_zap_t *zap) 198240afd8cSMark Johnston { 199240afd8cSMark Johnston dnode_phys_t *dnode; 200240afd8cSMark Johnston zfs_zap_entry_t *ent; 201240afd8cSMark Johnston mzap_phys_t *mzap; 202240afd8cSMark Johnston mzap_ent_phys_t *ment; 203240afd8cSMark Johnston off_t bytes, loc; 204240afd8cSMark Johnston 205240afd8cSMark Johnston memset(zfs->filebuf, 0, sizeof(zfs->filebuf)); 206240afd8cSMark Johnston mzap = (mzap_phys_t *)&zfs->filebuf[0]; 207240afd8cSMark Johnston mzap->mz_block_type = ZBT_MICRO; 208240afd8cSMark Johnston mzap->mz_salt = zap->hashsalt; 209240afd8cSMark Johnston mzap->mz_normflags = 0; 210240afd8cSMark Johnston 211240afd8cSMark Johnston bytes = sizeof(*mzap) + (zap->kvpcnt - 1) * sizeof(*ment); 212240afd8cSMark Johnston assert(bytes <= (off_t)MZAP_MAX_BLKSZ); 213240afd8cSMark Johnston 214240afd8cSMark Johnston ment = &mzap->mz_chunk[0]; 215240afd8cSMark Johnston STAILQ_FOREACH(ent, &zap->kvps, next) { 216240afd8cSMark Johnston memcpy(&ment->mze_value, ent->valp, ent->intsz * ent->intcnt); 217240afd8cSMark Johnston ment->mze_cd = 0; /* XXX-MJ */ 218240afd8cSMark Johnston strlcpy(ment->mze_name, ent->name, sizeof(ment->mze_name)); 219240afd8cSMark Johnston ment++; 220240afd8cSMark Johnston } 221240afd8cSMark Johnston 222240afd8cSMark Johnston loc = objset_space_alloc(zfs, zap->os, &bytes); 223240afd8cSMark Johnston 224240afd8cSMark Johnston dnode = zap->dnode; 225240afd8cSMark Johnston dnode->dn_maxblkid = 0; 226240afd8cSMark Johnston dnode->dn_datablkszsec = bytes >> MINBLOCKSHIFT; 227240afd8cSMark Johnston 228240afd8cSMark Johnston vdev_pwrite_dnode_data(zfs, dnode, zfs->filebuf, bytes, loc); 229240afd8cSMark Johnston } 230240afd8cSMark Johnston 231240afd8cSMark Johnston /* 232240afd8cSMark Johnston * Write some data to the fat ZAP leaf chunk starting at index "li". 233240afd8cSMark Johnston * 234240afd8cSMark Johnston * Note that individual integers in the value may be split among consecutive 235240afd8cSMark Johnston * leaves. 236240afd8cSMark Johnston */ 237240afd8cSMark Johnston static void 238240afd8cSMark Johnston zap_fat_write_array_chunk(zap_leaf_t *l, uint16_t li, size_t sz, 239240afd8cSMark Johnston const uint8_t *val) 240240afd8cSMark Johnston { 241240afd8cSMark Johnston struct zap_leaf_array *la; 242240afd8cSMark Johnston 243240afd8cSMark Johnston assert(sz <= ZAP_MAXVALUELEN); 244240afd8cSMark Johnston 245240afd8cSMark Johnston for (uint16_t n, resid = sz; resid > 0; resid -= n, val += n, li++) { 246240afd8cSMark Johnston n = MIN(resid, ZAP_LEAF_ARRAY_BYTES); 247240afd8cSMark Johnston 248240afd8cSMark Johnston la = &ZAP_LEAF_CHUNK(l, li).l_array; 249240afd8cSMark Johnston assert(la->la_type == ZAP_CHUNK_FREE); 250240afd8cSMark Johnston la->la_type = ZAP_CHUNK_ARRAY; 251240afd8cSMark Johnston memcpy(la->la_array, val, n); 252240afd8cSMark Johnston la->la_next = li + 1; 253240afd8cSMark Johnston } 254240afd8cSMark Johnston la->la_next = 0xffff; 255240afd8cSMark Johnston } 256240afd8cSMark Johnston 257240afd8cSMark Johnston /* 258240afd8cSMark Johnston * Find the shortest hash prefix length which lets us distribute keys without 259240afd8cSMark Johnston * overflowing a leaf block. This is not (space) optimal, but is simple, and 260240afd8cSMark Johnston * directories large enough to overflow a single 128KB leaf block are uncommon. 261240afd8cSMark Johnston */ 262240afd8cSMark Johnston static unsigned int 263240afd8cSMark Johnston zap_fat_write_prefixlen(zfs_zap_t *zap, zap_leaf_t *l) 264240afd8cSMark Johnston { 265240afd8cSMark Johnston zfs_zap_entry_t *ent; 266240afd8cSMark Johnston unsigned int prefixlen; 267240afd8cSMark Johnston 268240afd8cSMark Johnston if (zap->chunks <= ZAP_LEAF_NUMCHUNKS(l)) { 269240afd8cSMark Johnston /* 270240afd8cSMark Johnston * All chunks will fit in a single leaf block. 271240afd8cSMark Johnston */ 272240afd8cSMark Johnston return (0); 273240afd8cSMark Johnston } 274240afd8cSMark Johnston 275240afd8cSMark Johnston for (prefixlen = 1; prefixlen < (unsigned int)l->l_bs; prefixlen++) { 276240afd8cSMark Johnston uint32_t *leafchunks; 277240afd8cSMark Johnston 278240afd8cSMark Johnston leafchunks = ecalloc(1u << prefixlen, sizeof(*leafchunks)); 279240afd8cSMark Johnston STAILQ_FOREACH(ent, &zap->kvps, next) { 280240afd8cSMark Johnston uint64_t li; 281240afd8cSMark Johnston uint16_t chunks; 282240afd8cSMark Johnston 283240afd8cSMark Johnston li = ZAP_HASH_IDX(ent->hash, prefixlen); 284240afd8cSMark Johnston 285240afd8cSMark Johnston chunks = zap_entry_chunks(ent); 286240afd8cSMark Johnston if (ZAP_LEAF_NUMCHUNKS(l) - leafchunks[li] < chunks) { 287240afd8cSMark Johnston /* 288240afd8cSMark Johnston * Not enough space, grow the prefix and retry. 289240afd8cSMark Johnston */ 290240afd8cSMark Johnston break; 291240afd8cSMark Johnston } 292240afd8cSMark Johnston leafchunks[li] += chunks; 293240afd8cSMark Johnston } 294240afd8cSMark Johnston free(leafchunks); 295240afd8cSMark Johnston 296240afd8cSMark Johnston if (ent == NULL) { 297240afd8cSMark Johnston /* 298240afd8cSMark Johnston * Everything fits, we're done. 299240afd8cSMark Johnston */ 300240afd8cSMark Johnston break; 301240afd8cSMark Johnston } 302240afd8cSMark Johnston } 303240afd8cSMark Johnston 304240afd8cSMark Johnston /* 305240afd8cSMark Johnston * If this fails, then we need to expand the pointer table. For now 306240afd8cSMark Johnston * this situation is unhandled since it is hard to trigger. 307240afd8cSMark Johnston */ 308240afd8cSMark Johnston assert(prefixlen < (unsigned int)l->l_bs); 309240afd8cSMark Johnston 310240afd8cSMark Johnston return (prefixlen); 311240afd8cSMark Johnston } 312240afd8cSMark Johnston 313240afd8cSMark Johnston /* 314240afd8cSMark Johnston * Initialize a fat ZAP leaf block. 315240afd8cSMark Johnston */ 316240afd8cSMark Johnston static void 317240afd8cSMark Johnston zap_fat_write_leaf_init(zap_leaf_t *l, uint64_t prefix, int prefixlen) 318240afd8cSMark Johnston { 319240afd8cSMark Johnston zap_leaf_phys_t *leaf; 320240afd8cSMark Johnston 321240afd8cSMark Johnston leaf = l->l_phys; 322240afd8cSMark Johnston 323240afd8cSMark Johnston leaf->l_hdr.lh_block_type = ZBT_LEAF; 324240afd8cSMark Johnston leaf->l_hdr.lh_magic = ZAP_LEAF_MAGIC; 325240afd8cSMark Johnston leaf->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l); 326240afd8cSMark Johnston leaf->l_hdr.lh_prefix = prefix; 327240afd8cSMark Johnston leaf->l_hdr.lh_prefix_len = prefixlen; 328240afd8cSMark Johnston 329240afd8cSMark Johnston /* Initialize the leaf hash table. */ 330240afd8cSMark Johnston assert(leaf->l_hdr.lh_nfree < 0xffff); 331240afd8cSMark Johnston memset(leaf->l_hash, 0xff, 332240afd8cSMark Johnston ZAP_LEAF_HASH_NUMENTRIES(l) * sizeof(*leaf->l_hash)); 333240afd8cSMark Johnston 334240afd8cSMark Johnston /* Initialize the leaf chunks. */ 335240afd8cSMark Johnston for (uint16_t i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { 336240afd8cSMark Johnston struct zap_leaf_free *lf; 337240afd8cSMark Johnston 338240afd8cSMark Johnston lf = &ZAP_LEAF_CHUNK(l, i).l_free; 339240afd8cSMark Johnston lf->lf_type = ZAP_CHUNK_FREE; 340240afd8cSMark Johnston if (i + 1 == ZAP_LEAF_NUMCHUNKS(l)) 341240afd8cSMark Johnston lf->lf_next = 0xffff; 342240afd8cSMark Johnston else 343240afd8cSMark Johnston lf->lf_next = i + 1; 344240afd8cSMark Johnston } 345240afd8cSMark Johnston } 346240afd8cSMark Johnston 347240afd8cSMark Johnston static void 348240afd8cSMark Johnston zap_fat_write(zfs_opt_t *zfs, zfs_zap_t *zap) 349240afd8cSMark Johnston { 350240afd8cSMark Johnston struct dnode_cursor *c; 351240afd8cSMark Johnston zap_leaf_t l; 352240afd8cSMark Johnston zap_phys_t *zaphdr; 353240afd8cSMark Johnston struct zap_table_phys *zt; 354240afd8cSMark Johnston zfs_zap_entry_t *ent; 355240afd8cSMark Johnston dnode_phys_t *dnode; 356240afd8cSMark Johnston uint8_t *leafblks; 357240afd8cSMark Johnston uint64_t lblkcnt, *ptrhasht; 358240afd8cSMark Johnston off_t loc, blksz; 359240afd8cSMark Johnston size_t blkshift; 360240afd8cSMark Johnston unsigned int prefixlen; 361240afd8cSMark Johnston int ptrcnt; 362240afd8cSMark Johnston 363240afd8cSMark Johnston /* 364240afd8cSMark Johnston * For simplicity, always use the largest block size. This should be ok 365240afd8cSMark Johnston * since most directories will be micro ZAPs, but it's space inefficient 366240afd8cSMark Johnston * for small ZAPs and might need to be revisited. 367240afd8cSMark Johnston */ 368240afd8cSMark Johnston blkshift = MAXBLOCKSHIFT; 369240afd8cSMark Johnston blksz = (off_t)1 << blkshift; 370240afd8cSMark Johnston 371240afd8cSMark Johnston /* 372240afd8cSMark Johnston * Embedded pointer tables give up to 8192 entries. This ought to be 373240afd8cSMark Johnston * enough for anything except massive directories. 374240afd8cSMark Johnston */ 375240afd8cSMark Johnston ptrcnt = (blksz / 2) / sizeof(uint64_t); 376240afd8cSMark Johnston 377240afd8cSMark Johnston memset(zfs->filebuf, 0, sizeof(zfs->filebuf)); 378240afd8cSMark Johnston zaphdr = (zap_phys_t *)&zfs->filebuf[0]; 379240afd8cSMark Johnston zaphdr->zap_block_type = ZBT_HEADER; 380240afd8cSMark Johnston zaphdr->zap_magic = ZAP_MAGIC; 381240afd8cSMark Johnston zaphdr->zap_num_entries = zap->kvpcnt; 382240afd8cSMark Johnston zaphdr->zap_salt = zap->hashsalt; 383240afd8cSMark Johnston 384240afd8cSMark Johnston l.l_bs = blkshift; 385240afd8cSMark Johnston l.l_phys = NULL; 386240afd8cSMark Johnston 387240afd8cSMark Johnston zt = &zaphdr->zap_ptrtbl; 388240afd8cSMark Johnston zt->zt_blk = 0; 389240afd8cSMark Johnston zt->zt_numblks = 0; 390240afd8cSMark Johnston zt->zt_shift = flsll(ptrcnt) - 1; 391240afd8cSMark Johnston zt->zt_nextblk = 0; 392240afd8cSMark Johnston zt->zt_blks_copied = 0; 393240afd8cSMark Johnston 394240afd8cSMark Johnston /* 395240afd8cSMark Johnston * How many leaf blocks do we need? Initialize them and update the 396240afd8cSMark Johnston * header. 397240afd8cSMark Johnston */ 398240afd8cSMark Johnston prefixlen = zap_fat_write_prefixlen(zap, &l); 399cba2fa7cSMark Johnston lblkcnt = (uint64_t)1 << prefixlen; 400240afd8cSMark Johnston leafblks = ecalloc(lblkcnt, blksz); 401240afd8cSMark Johnston for (unsigned int li = 0; li < lblkcnt; li++) { 402240afd8cSMark Johnston l.l_phys = (zap_leaf_phys_t *)(leafblks + li * blksz); 403240afd8cSMark Johnston zap_fat_write_leaf_init(&l, li, prefixlen); 404240afd8cSMark Johnston } 405240afd8cSMark Johnston zaphdr->zap_num_leafs = lblkcnt; 406240afd8cSMark Johnston zaphdr->zap_freeblk = lblkcnt + 1; 407240afd8cSMark Johnston 408240afd8cSMark Johnston /* 409240afd8cSMark Johnston * For each entry, figure out which leaf block it belongs to based on 410240afd8cSMark Johnston * the upper bits of its hash, allocate chunks from that leaf, and fill 411240afd8cSMark Johnston * them out. 412240afd8cSMark Johnston */ 413240afd8cSMark Johnston ptrhasht = (uint64_t *)(&zfs->filebuf[0] + blksz / 2); 414240afd8cSMark Johnston STAILQ_FOREACH(ent, &zap->kvps, next) { 415240afd8cSMark Johnston struct zap_leaf_entry *le; 416240afd8cSMark Johnston uint16_t *lptr; 417240afd8cSMark Johnston uint64_t hi, li; 418240afd8cSMark Johnston uint16_t namelen, nchunks, nnamechunks, nvalchunks; 419240afd8cSMark Johnston 420240afd8cSMark Johnston hi = ZAP_HASH_IDX(ent->hash, zt->zt_shift); 421240afd8cSMark Johnston li = ZAP_HASH_IDX(ent->hash, prefixlen); 422240afd8cSMark Johnston assert(ptrhasht[hi] == 0 || ptrhasht[hi] == li + 1); 423240afd8cSMark Johnston ptrhasht[hi] = li + 1; 424240afd8cSMark Johnston l.l_phys = (zap_leaf_phys_t *)(leafblks + li * blksz); 425240afd8cSMark Johnston 426240afd8cSMark Johnston namelen = strlen(ent->name) + 1; 427240afd8cSMark Johnston 428240afd8cSMark Johnston /* 429240afd8cSMark Johnston * How many leaf chunks do we need for this entry? 430240afd8cSMark Johnston */ 431240afd8cSMark Johnston nnamechunks = howmany(namelen, ZAP_LEAF_ARRAY_BYTES); 432240afd8cSMark Johnston nvalchunks = howmany(ent->intcnt, 433240afd8cSMark Johnston ZAP_LEAF_ARRAY_BYTES / ent->intsz); 434240afd8cSMark Johnston nchunks = 1 + nnamechunks + nvalchunks; 435240afd8cSMark Johnston 436240afd8cSMark Johnston /* 437240afd8cSMark Johnston * Allocate a run of free leaf chunks for this entry, 438240afd8cSMark Johnston * potentially extending a hash chain. 439240afd8cSMark Johnston */ 440240afd8cSMark Johnston assert(l.l_phys->l_hdr.lh_nfree >= nchunks); 441240afd8cSMark Johnston l.l_phys->l_hdr.lh_nfree -= nchunks; 442240afd8cSMark Johnston l.l_phys->l_hdr.lh_nentries++; 443240afd8cSMark Johnston lptr = ZAP_LEAF_HASH_ENTPTR(&l, ent->hash); 444240afd8cSMark Johnston while (*lptr != 0xffff) { 445240afd8cSMark Johnston assert(*lptr < ZAP_LEAF_NUMCHUNKS(&l)); 446240afd8cSMark Johnston le = ZAP_LEAF_ENTRY(&l, *lptr); 447240afd8cSMark Johnston assert(le->le_type == ZAP_CHUNK_ENTRY); 448240afd8cSMark Johnston le->le_cd++; 449240afd8cSMark Johnston lptr = &le->le_next; 450240afd8cSMark Johnston } 451240afd8cSMark Johnston *lptr = l.l_phys->l_hdr.lh_freelist; 452240afd8cSMark Johnston l.l_phys->l_hdr.lh_freelist += nchunks; 453240afd8cSMark Johnston assert(l.l_phys->l_hdr.lh_freelist <= 454240afd8cSMark Johnston ZAP_LEAF_NUMCHUNKS(&l)); 455240afd8cSMark Johnston if (l.l_phys->l_hdr.lh_freelist == 456240afd8cSMark Johnston ZAP_LEAF_NUMCHUNKS(&l)) 457240afd8cSMark Johnston l.l_phys->l_hdr.lh_freelist = 0xffff; 458240afd8cSMark Johnston 459240afd8cSMark Johnston /* 460240afd8cSMark Johnston * Integer values must be stored in big-endian format. 461240afd8cSMark Johnston */ 462240afd8cSMark Johnston switch (ent->intsz) { 463240afd8cSMark Johnston case 1: 464240afd8cSMark Johnston break; 465240afd8cSMark Johnston case 2: 466240afd8cSMark Johnston for (uint16_t *v = ent->val16p; 467240afd8cSMark Johnston v - ent->val16p < (ptrdiff_t)ent->intcnt; 468240afd8cSMark Johnston v++) 469240afd8cSMark Johnston *v = htobe16(*v); 470240afd8cSMark Johnston break; 471240afd8cSMark Johnston case 4: 472240afd8cSMark Johnston for (uint32_t *v = ent->val32p; 473240afd8cSMark Johnston v - ent->val32p < (ptrdiff_t)ent->intcnt; 474240afd8cSMark Johnston v++) 475240afd8cSMark Johnston *v = htobe32(*v); 476240afd8cSMark Johnston break; 477240afd8cSMark Johnston case 8: 478240afd8cSMark Johnston for (uint64_t *v = ent->val64p; 479240afd8cSMark Johnston v - ent->val64p < (ptrdiff_t)ent->intcnt; 480240afd8cSMark Johnston v++) 481240afd8cSMark Johnston *v = htobe64(*v); 482240afd8cSMark Johnston break; 483240afd8cSMark Johnston default: 484240afd8cSMark Johnston assert(0); 485240afd8cSMark Johnston } 486240afd8cSMark Johnston 487240afd8cSMark Johnston /* 488240afd8cSMark Johnston * Finally, write out the leaf chunks for this entry. 489240afd8cSMark Johnston */ 490240afd8cSMark Johnston le = ZAP_LEAF_ENTRY(&l, *lptr); 491240afd8cSMark Johnston assert(le->le_type == ZAP_CHUNK_FREE); 492240afd8cSMark Johnston le->le_type = ZAP_CHUNK_ENTRY; 493240afd8cSMark Johnston le->le_next = 0xffff; 494240afd8cSMark Johnston le->le_name_chunk = *lptr + 1; 495240afd8cSMark Johnston le->le_name_numints = namelen; 496240afd8cSMark Johnston le->le_value_chunk = *lptr + 1 + nnamechunks; 497240afd8cSMark Johnston le->le_value_intlen = ent->intsz; 498240afd8cSMark Johnston le->le_value_numints = ent->intcnt; 499240afd8cSMark Johnston le->le_hash = ent->hash; 500240afd8cSMark Johnston zap_fat_write_array_chunk(&l, *lptr + 1, namelen, ent->name); 501240afd8cSMark Johnston zap_fat_write_array_chunk(&l, *lptr + 1 + nnamechunks, 502240afd8cSMark Johnston ent->intcnt * ent->intsz, ent->valp); 503240afd8cSMark Johnston } 504240afd8cSMark Johnston 505240afd8cSMark Johnston /* 506240afd8cSMark Johnston * Initialize unused slots of the pointer table. 507240afd8cSMark Johnston */ 508240afd8cSMark Johnston for (int i = 0; i < ptrcnt; i++) 509240afd8cSMark Johnston if (ptrhasht[i] == 0) 510240afd8cSMark Johnston ptrhasht[i] = (i >> (zt->zt_shift - prefixlen)) + 1; 511240afd8cSMark Johnston 512240afd8cSMark Johnston /* 513240afd8cSMark Johnston * Write the whole thing to disk. 514240afd8cSMark Johnston */ 515240afd8cSMark Johnston dnode = zap->dnode; 516240afd8cSMark Johnston dnode->dn_datablkszsec = blksz >> MINBLOCKSHIFT; 517240afd8cSMark Johnston dnode->dn_maxblkid = lblkcnt + 1; 518240afd8cSMark Johnston 519240afd8cSMark Johnston c = dnode_cursor_init(zfs, zap->os, zap->dnode, 520240afd8cSMark Johnston (lblkcnt + 1) * blksz, blksz); 521240afd8cSMark Johnston 522240afd8cSMark Johnston loc = objset_space_alloc(zfs, zap->os, &blksz); 523240afd8cSMark Johnston vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, zfs->filebuf, blksz, loc, 524240afd8cSMark Johnston dnode_cursor_next(zfs, c, 0)); 525240afd8cSMark Johnston 526240afd8cSMark Johnston for (uint64_t i = 0; i < lblkcnt; i++) { 527240afd8cSMark Johnston loc = objset_space_alloc(zfs, zap->os, &blksz); 528240afd8cSMark Johnston vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, leafblks + i * blksz, 529240afd8cSMark Johnston blksz, loc, dnode_cursor_next(zfs, c, (i + 1) * blksz)); 530240afd8cSMark Johnston } 531240afd8cSMark Johnston 532240afd8cSMark Johnston dnode_cursor_finish(zfs, c); 533240afd8cSMark Johnston 534240afd8cSMark Johnston free(leafblks); 535240afd8cSMark Johnston } 536240afd8cSMark Johnston 537240afd8cSMark Johnston void 538240afd8cSMark Johnston zap_write(zfs_opt_t *zfs, zfs_zap_t *zap) 539240afd8cSMark Johnston { 540240afd8cSMark Johnston zfs_zap_entry_t *ent; 541240afd8cSMark Johnston 542240afd8cSMark Johnston if (zap->micro) { 543240afd8cSMark Johnston zap_micro_write(zfs, zap); 544240afd8cSMark Johnston } else { 545240afd8cSMark Johnston assert(!STAILQ_EMPTY(&zap->kvps)); 546240afd8cSMark Johnston assert(zap->kvpcnt > 0); 547240afd8cSMark Johnston zap_fat_write(zfs, zap); 548240afd8cSMark Johnston } 549240afd8cSMark Johnston 550240afd8cSMark Johnston while ((ent = STAILQ_FIRST(&zap->kvps)) != NULL) { 551240afd8cSMark Johnston STAILQ_REMOVE_HEAD(&zap->kvps, next); 552240afd8cSMark Johnston if (ent->val64p != &ent->val64) 553240afd8cSMark Johnston free(ent->valp); 554240afd8cSMark Johnston free(ent->name); 555240afd8cSMark Johnston free(ent); 556240afd8cSMark Johnston } 557240afd8cSMark Johnston free(zap); 558240afd8cSMark Johnston } 559